From 72231250ed81e10d66bfe70701e64fa5fe50f712 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 15 Jun 2016 23:09:44 +0200 Subject: Publish --- searchlib/src/.gitignore | 7 + searchlib/src/Doxyfile | 1162 ++ searchlib/src/apps/docstore/.gitignore | 6 + searchlib/src/apps/docstore/CMakeLists.txt | 33 + searchlib/src/apps/docstore/benchmarkdatastore.cpp | 114 + .../src/apps/docstore/create-idx-from-dat.cpp | 157 + .../src/apps/docstore/documentstoreinspect.cpp | 114 + searchlib/src/apps/docstore/verifylogdatastore.cpp | 59 + searchlib/src/apps/expgolomb/.gitignore | 3 + searchlib/src/apps/expgolomb/CMakeLists.txt | 9 + searchlib/src/apps/expgolomb/expgolomb.cpp | 175 + searchlib/src/apps/fileheaderinspect/.gitignore | 3 + .../src/apps/fileheaderinspect/CMakeLists.txt | 9 + .../apps/fileheaderinspect/fileheaderinspect.cpp | 223 + searchlib/src/apps/loadattribute/.gitignore | 3 + searchlib/src/apps/loadattribute/CMakeLists.txt | 9 + searchlib/src/apps/loadattribute/loadattribute.cpp | 216 + searchlib/src/apps/loadattribute/loadattribute.rb | 43 + searchlib/src/apps/tests/.gitignore | 8 + searchlib/src/apps/tests/CMakeLists.txt | 22 + searchlib/src/apps/tests/biglogtest.cpp | 243 + searchlib/src/apps/tests/btreestress_test.cpp | 224 + .../src/apps/tests/memoryindexstress_test.cpp | 537 + searchlib/src/apps/uniform/.gitignore | 3 + searchlib/src/apps/uniform/CMakeLists.txt | 9 + searchlib/src/apps/uniform/uniform.cpp | 153 + searchlib/src/apps/vespa-index-inspect/.gitignore | 3 + .../src/apps/vespa-index-inspect/CMakeLists.txt | 9 + .../vespa-index-inspect/vespa-index-inspect.cpp | 965 ++ .../vespa-ranking-expression-analyzer/.gitignore | 3 + .../CMakeLists.txt | 9 + .../illegal.expression | 1 + .../vespa-ranking-expression-analyzer.cpp | 386 + searchlib/src/forcelink.sh | 45 + searchlib/src/main/OWNERS | 1 + .../searchlib/aggregation/AggregationResult.java | 161 + .../aggregation/AverageAggregationResult.java | 157 + .../aggregation/CountAggregationResult.java | 99 + .../ExpressionCountAggregationResult.java | 116 + .../com/yahoo/searchlib/aggregation/FS4Hit.java | 132 + .../com/yahoo/searchlib/aggregation/ForceLoad.java | 39 + .../com/yahoo/searchlib/aggregation/Group.java | 518 + .../com/yahoo/searchlib/aggregation/Grouping.java | 445 + .../yahoo/searchlib/aggregation/GroupingLevel.java | 184 + .../java/com/yahoo/searchlib/aggregation/Hit.java | 104 + .../aggregation/HitsAggregationResult.java | 218 + .../aggregation/MaxAggregationResult.java | 103 + .../aggregation/MinAggregationResult.java | 103 + .../com/yahoo/searchlib/aggregation/RawData.java | 130 + .../aggregation/SumAggregationResult.java | 103 + .../com/yahoo/searchlib/aggregation/VdsHit.java | 91 + .../aggregation/XorAggregationResult.java | 99 + .../searchlib/aggregation/hll/BiasEstimator.java | 131 + .../searchlib/aggregation/hll/HyperLogLog.java | 18 + .../aggregation/hll/HyperLogLogEstimator.java | 172 + .../searchlib/aggregation/hll/NormalSketch.java | 190 + .../yahoo/searchlib/aggregation/hll/Sketch.java | 32 + .../searchlib/aggregation/hll/SketchMerger.java | 60 + .../searchlib/aggregation/hll/SparseSketch.java | 105 + .../aggregation/hll/UniqueCountEstimator.java | 12 + .../yahoo/searchlib/aggregation/package-info.java | 4 + .../com/yahoo/searchlib/document/package-info.java | 5 + .../searchlib/expression/AddFunctionNode.java | 23 + .../searchlib/expression/AggregationRefNode.java | 115 + .../searchlib/expression/AndFunctionNode.java | 22 + .../expression/ArithmeticTypeConversion.java | 66 + .../searchlib/expression/ArrayAtLookupNode.java | 94 + .../yahoo/searchlib/expression/AttributeNode.java | 90 + .../searchlib/expression/BitFunctionNode.java | 36 + .../searchlib/expression/BucketResultNode.java | 47 + .../searchlib/expression/CatFunctionNode.java | 42 + .../yahoo/searchlib/expression/ConstantNode.java | 82 + .../expression/DebugWaitFunctionNode.java | 104 + .../searchlib/expression/DivideFunctionNode.java | 23 + .../searchlib/expression/DocumentAccessorNode.java | 19 + .../searchlib/expression/DocumentFieldNode.java | 116 + .../yahoo/searchlib/expression/ExpressionNode.java | 104 + .../expression/FixedWidthBucketFunctionNode.java | 82 + .../expression/FloatBucketResultNode.java | 118 + .../expression/FloatBucketResultNodeVector.java | 80 + .../searchlib/expression/FloatResultNode.java | 182 + .../expression/FloatResultNodeVector.java | 80 + .../com/yahoo/searchlib/expression/ForceLoad.java | 89 + .../yahoo/searchlib/expression/FunctionNode.java | 74 + .../GetDocIdNamespaceSpecificFunctionNode.java | 88 + .../expression/GetYMUMChecksumFunctionNode.java | 60 + .../searchlib/expression/Int16ResultNode.java | 149 + .../expression/Int16ResultNodeVector.java | 79 + .../searchlib/expression/Int32ResultNode.java | 149 + .../expression/Int32ResultNodeVector.java | 80 + .../yahoo/searchlib/expression/Int8ResultNode.java | 149 + .../searchlib/expression/Int8ResultNodeVector.java | 80 + .../expression/IntegerBucketResultNode.java | 102 + .../expression/IntegerBucketResultNodeVector.java | 80 + .../searchlib/expression/IntegerResultNode.java | 183 + .../expression/IntegerResultNodeVector.java | 80 + .../expression/InterpolatedLookupNode.java | 94 + .../searchlib/expression/MD5BitFunctionNode.java | 35 + .../searchlib/expression/MathFunctionNode.java | 185 + .../searchlib/expression/MaxFunctionNode.java | 23 + .../searchlib/expression/MinFunctionNode.java | 23 + .../searchlib/expression/ModuloFunctionNode.java | 23 + .../searchlib/expression/MultiArgFunctionNode.java | 176 + .../searchlib/expression/MultiplyFunctionNode.java | 23 + .../searchlib/expression/NegateFunctionNode.java | 52 + .../expression/NormalizeSubjectFunctionNode.java | 65 + .../yahoo/searchlib/expression/NullResultNode.java | 56 + .../searchlib/expression/NumElemFunctionNode.java | 50 + .../searchlib/expression/NumericFunctionNode.java | 31 + .../searchlib/expression/NumericResultNode.java | 52 + .../yahoo/searchlib/expression/OrFunctionNode.java | 22 + .../expression/PositiveInfinityResultNode.java | 44 + .../expression/RangeBucketPreDefFunctionNode.java | 82 + .../searchlib/expression/RawBucketResultNode.java | 101 + .../expression/RawBucketResultNodeVector.java | 75 + .../yahoo/searchlib/expression/RawResultNode.java | 184 + .../searchlib/expression/RawResultNodeVector.java | 80 + .../yahoo/searchlib/expression/RelevanceNode.java | 72 + .../com/yahoo/searchlib/expression/ResultNode.java | 82 + .../searchlib/expression/ResultNodeVector.java | 45 + .../searchlib/expression/ReverseFunctionNode.java | 39 + .../searchlib/expression/SingleResultNode.java | 38 + .../searchlib/expression/SortFunctionNode.java | 36 + .../searchlib/expression/StrCatFunctionNode.java | 42 + .../searchlib/expression/StrLenFunctionNode.java | 55 + .../expression/StringBucketResultNode.java | 114 + .../expression/StringBucketResultNodeVector.java | 80 + .../searchlib/expression/StringResultNode.java | 177 + .../expression/StringResultNodeVector.java | 80 + .../expression/TimeStampFunctionNode.java | 116 + .../searchlib/expression/ToFloatFunctionNode.java | 39 + .../searchlib/expression/ToIntFunctionNode.java | 44 + .../searchlib/expression/ToRawFunctionNode.java | 38 + .../searchlib/expression/ToStringFunctionNode.java | 51 + .../searchlib/expression/UcaFunctionNode.java | 84 + .../searchlib/expression/UnaryBitFunctionNode.java | 89 + .../searchlib/expression/UnaryFunctionNode.java | 44 + .../searchlib/expression/XorBitFunctionNode.java | 38 + .../searchlib/expression/XorFunctionNode.java | 22 + .../searchlib/expression/ZCurveFunctionNode.java | 92 + .../yahoo/searchlib/expression/package-info.java | 4 + .../yahoo/searchlib/gbdt/CategoryFeatureNode.java | 34 + .../java/com/yahoo/searchlib/gbdt/FeatureNode.java | 95 + .../com/yahoo/searchlib/gbdt/GbdtConverter.java | 34 + .../java/com/yahoo/searchlib/gbdt/GbdtModel.java | 92 + .../yahoo/searchlib/gbdt/NumericFeatureNode.java | 34 + .../com/yahoo/searchlib/gbdt/ResponseNode.java | 33 + .../java/com/yahoo/searchlib/gbdt/TreeNode.java | 43 + .../java/com/yahoo/searchlib/gbdt/XmlHelper.java | 110 + .../java/com/yahoo/searchlib/mlr/ga/CaseList.java | 15 + .../java/com/yahoo/searchlib/mlr/ga/Evolvable.java | 26 + .../com/yahoo/searchlib/mlr/ga/Individual.java | 69 + .../yahoo/searchlib/mlr/ga/KeyboardChecker.java | 50 + .../main/java/com/yahoo/searchlib/mlr/ga/Main.java | 73 + .../com/yahoo/searchlib/mlr/ga/Population.java | 60 + .../yahoo/searchlib/mlr/ga/PrintingTracker.java | 91 + .../mlr/ga/RankingExpressionCaseList.java | 33 + .../com/yahoo/searchlib/mlr/ga/Recombiner.java | 200 + .../java/com/yahoo/searchlib/mlr/ga/Species.java | 93 + .../com/yahoo/searchlib/mlr/ga/SpeciesName.java | 54 + .../java/com/yahoo/searchlib/mlr/ga/Tracker.java | 25 + .../java/com/yahoo/searchlib/mlr/ga/Trainer.java | 57 + .../searchlib/mlr/ga/TrainingEnvironment.java | 31 + .../yahoo/searchlib/mlr/ga/TrainingParameters.java | 100 + .../com/yahoo/searchlib/mlr/ga/TrainingSet.java | 122 + .../searchlib/mlr/ga/caselist/CsvFileCaseList.java | 56 + .../searchlib/mlr/ga/caselist/FileCaseList.java | 73 + .../searchlib/mlr/ga/caselist/FvFileCaseList.java | 59 + .../searchlib/mlr/gbdt/ExpressionAnalysis.java | 425 + .../java/com/yahoo/searchlib/package-info.java | 5 + .../ranking/features/ElementCompleteness.java | 96 + .../yahoo/searchlib/ranking/features/Features.java | 30 + .../searchlib/ranking/features/FieldTermMatch.java | 48 + .../ranking/features/fieldmatch/Field.java | 60 + .../features/fieldmatch/FieldMatchMetrics.java | 536 + .../fieldmatch/FieldMatchMetricsComputer.java | 433 + .../fieldmatch/FieldMatchMetricsParameters.java | 198 + .../ranking/features/fieldmatch/Main.java | 39 + .../ranking/features/fieldmatch/Query.java | 72 + .../ranking/features/fieldmatch/QueryTerm.java | 67 + .../features/fieldmatch/SegmentStartPoint.java | 145 + .../ranking/features/fieldmatch/Trace.java | 22 + .../ranking/features/fieldmatch/package-info.java | 12 + .../searchlib/ranking/features/package-info.java | 10 + .../rankingexpression/ExpressionFunction.java | 139 + .../searchlib/rankingexpression/FeatureList.java | 140 + .../rankingexpression/RankingExpression.java | 250 + .../evaluation/AbstractArrayContext.java | 131 + .../rankingexpression/evaluation/ArrayContext.java | 120 + .../rankingexpression/evaluation/BooleanValue.java | 61 + .../rankingexpression/evaluation/Context.java | 107 + .../evaluation/DoubleCompatibleValue.java | 51 + .../evaluation/DoubleOnlyArrayContext.java | 96 + .../rankingexpression/evaluation/DoubleValue.java | 158 + .../evaluation/ExpressionOptimizer.java | 55 + .../rankingexpression/evaluation/MapContext.java | 95 + .../evaluation/OptimizationReport.java | 63 + .../rankingexpression/evaluation/Optimizer.java | 23 + .../rankingexpression/evaluation/StringValue.java | 108 + .../rankingexpression/evaluation/TensorValue.java | 168 + .../rankingexpression/evaluation/Value.java | 96 + .../evaluation/gbdtoptimization/.gitignore | 0 .../gbdtoptimization/GBDTForestNode.java | 43 + .../gbdtoptimization/GBDTForestOptimizer.java | 124 + .../evaluation/gbdtoptimization/GBDTNode.java | 98 + .../evaluation/gbdtoptimization/GBDTOptimizer.java | 184 + .../evaluation/gbdtoptimization/test/.gitignore | 0 .../rankingexpression/evaluation/package-info.java | 10 + .../searchlib/rankingexpression/mlr/.gitignore | 0 .../searchlib/rankingexpression/package-info.java | 10 + .../rankingexpression/parser/package-info.java | 10 + .../rankingexpression/rule/Arguments.java | 81 + .../rankingexpression/rule/ArithmeticNode.java | 129 + .../rankingexpression/rule/ArithmeticOperator.java | 62 + .../rankingexpression/rule/BooleanNode.java | 11 + .../rankingexpression/rule/ComparisonNode.java | 62 + .../rankingexpression/rule/CompositeNode.java | 27 + .../rankingexpression/rule/ConstantNode.java | 54 + .../rankingexpression/rule/EmbracedNode.java | 57 + .../rankingexpression/rule/ExpressionNode.java | 51 + .../searchlib/rankingexpression/rule/Function.java | 55 + .../rankingexpression/rule/FunctionNode.java | 90 + .../searchlib/rankingexpression/rule/IfNode.java | 86 + .../searchlib/rankingexpression/rule/NameNode.java | 37 + .../rankingexpression/rule/NegativeNode.java | 49 + .../rankingexpression/rule/ReferenceNode.java | 119 + .../rule/SerializationContext.java | 116 + .../rankingexpression/rule/SetMembershipNode.java | 72 + .../rankingexpression/rule/TensorMatchNode.java | 59 + .../rankingexpression/rule/TensorSumNode.java | 65 + .../rankingexpression/rule/TruthOperator.java | 48 + .../rankingexpression/rule/package-info.java | 7 + .../transform/ConstantDereferencer.java | 62 + .../transform/ExpressionTransformer.java | 38 + .../rankingexpression/transform/Simplifier.java | 131 + .../rankingexpression/transform/package-info.java | 6 + .../yahoo/searchlib/treenet/TreeNetConverter.java | 35 + .../com/yahoo/searchlib/treenet/package-info.java | 5 + .../searchlib/treenet/parser/package-info.java | 5 + .../treenet/rule/ComparisonCondition.java | 39 + .../yahoo/searchlib/treenet/rule/Condition.java | 54 + .../com/yahoo/searchlib/treenet/rule/Response.java | 45 + .../treenet/rule/SetMembershipCondition.java | 57 + .../com/yahoo/searchlib/treenet/rule/Tree.java | 110 + .../com/yahoo/searchlib/treenet/rule/TreeNet.java | 63 + .../com/yahoo/searchlib/treenet/rule/TreeNode.java | 34 + .../yahoo/searchlib/treenet/rule/package-info.java | 5 + .../src/main/javacc/RankingExpressionParser.jj | 479 + searchlib/src/main/javacc/TreeNetParser.jj | 362 + searchlib/src/main/sh/evaluation-benchmark | 1 + searchlib/src/main/sh/ga | 69 + searchlib/src/main/sh/gbdt-analysis | 1 + searchlib/src/main/sh/vespa-gbdt-converter | 63 + searchlib/src/main/sh/vespa-treenet-converter | 63 + searchlib/src/test/OWNERS | 1 + searchlib/src/test/files/features01.expression | 1 + searchlib/src/test/files/features02.expression | 1 + searchlib/src/test/files/features03.expression | 4 + searchlib/src/test/files/features04.expression | 1 + searchlib/src/test/files/gbdt.expression | 10 + searchlib/src/test/files/gbdt.ext.xml | 284 + searchlib/src/test/files/gbdt.xml | 614 + searchlib/src/test/files/gbdt_empty_tree.xml | 46 + searchlib/src/test/files/gbdt_err.xml | 3 + .../src/test/files/gbdt_set_inclusion_test.xml | 119 + searchlib/src/test/files/gbdt_tree_response.xml | 35 + searchlib/src/test/files/mlr/cases-illegal1.csv | 5 + searchlib/src/test/files/mlr/cases-illegal2.csv | 2 + searchlib/src/test/files/mlr/cases-linear.csv | 7 + searchlib/src/test/files/mlr/cases.csv | 6 + searchlib/src/test/files/ranking01.expression | 10 + searchlib/src/test/files/ranking02.expression | 90 + searchlib/src/test/files/ranking03.expression | 97 + searchlib/src/test/files/ranking04.expression | 103 + searchlib/src/test/files/ranking05.expression | 77 + searchlib/src/test/files/ranking06.expression | 85 + searchlib/src/test/files/ranking07.expression | 200 + searchlib/src/test/files/ranking08.expression | 5 + searchlib/src/test/files/s-expression.vre | 1 + searchlib/src/test/files/simple.expression | 1 + searchlib/src/test/files/testAggregatorResults | Bin 0 -> 310 bytes searchlib/src/test/files/testFunctionNodes | Bin 0 -> 1025 bytes searchlib/src/test/files/testGroup | Bin 0 -> 427 bytes searchlib/src/test/files/testGrouping | Bin 0 -> 828 bytes searchlib/src/test/files/testGroupingLevel | Bin 0 -> 159 bytes searchlib/src/test/files/testHitCollection | Bin 0 -> 681 bytes searchlib/src/test/files/testResultTypes | Bin 0 -> 374 bytes searchlib/src/test/files/testSpecialNodes | Bin 0 -> 93 bytes searchlib/src/test/files/treenet01.model | 531 + searchlib/src/test/files/treenet02.model | 11784 ++++++++++++++++ searchlib/src/test/files/treenet03.model | 5880 ++++++++ searchlib/src/test/files/treenet04.model | 6247 +++++++++ searchlib/src/test/files/treenet05.model | 4684 +++++++ searchlib/src/test/files/treenet06.model | 3799 ++++++ searchlib/src/test/files/treenet07.model | 13275 +++++++++++++++++++ searchlib/src/test/files/treenet08.model | 227 + .../searchlib/aggregation/AggregationTestCase.java | 346 + .../ExpressionCountAggregationResultTest.java | 82 + .../searchlib/aggregation/ForceLoadTestCase.java | 19 + .../yahoo/searchlib/aggregation/GroupTestCase.java | 229 + .../aggregation/GroupingSerializationTest.java | 387 + .../searchlib/aggregation/GroupingTestCase.java | 227 + .../yahoo/searchlib/aggregation/MergeTestCase.java | 735 + .../aggregation/hll/BiasEstimatorTest.java | 70 + .../aggregation/hll/HyperLogLogEstimatorTest.java | 89 + .../hll/HyperLogLogPrecisionBenchmark.java | 70 + .../aggregation/hll/NormalSketchTest.java | 121 + .../aggregation/hll/SketchMergerTest.java | 69 + .../searchlib/aggregation/hll/SketchUtils.java | 46 + .../aggregation/hll/SparseSketchTest.java | 62 + .../searchlib/expression/ExpressionTestCase.java | 932 ++ .../FixedWidthBucketFunctionTestCase.java | 21 + .../expression/FloatBucketResultNodeTestCase.java | 44 + .../searchlib/expression/ForceLoadTestCase.java | 19 + .../IntegerBucketResultNodeTestCase.java | 35 + .../expression/IntegerResultNodeTestCase.java | 118 + .../expression/NullResultNodeTestCase.java | 36 + .../expression/ObjectVisitorTestCase.java | 61 + .../RangeBucketPreDefFunctionTestCase.java | 21 + .../expression/RawBucketResultNodeTestCase.java | 46 + .../yahoo/searchlib/expression/ResultNodeTest.java | 43 + .../expression/ResultNodeVectorTestCase.java | 167 + .../expression/StringBucketResultNodeTestCase.java | 57 + .../expression/TimeStampFunctionTestCase.java | 29 + .../expression/ZCurveFunctionTestCase.java | 25 + .../searchlib/gbdt/GbdtConverterTestCase.java | 169 + .../yahoo/searchlib/gbdt/GbdtModelTestCase.java | 65 + .../searchlib/gbdt/ReferenceNodeTestCase.java | 101 + .../yahoo/searchlib/gbdt/ResponseNodeTestCase.java | 40 + .../com/yahoo/searchlib/gbdt/TreeNodeTestCase.java | 57 + .../yahoo/searchlib/gbdt/XmlHelperTestCase.java | 153 + .../mlr/ga/test/CsvFileCaseListTestCase.java | 81 + .../mlr/ga/test/ExampleLearningSessions.java | 110 + .../yahoo/searchlib/mlr/ga/test/MainTestCase.java | 57 + .../mlr/ga/test/MockTrainingSetTestCase.java | 46 + .../mlr/ga/test/TripAdvisorFileCaseList.java | 99 + .../mlr/gbdt/ExpressionAnalysisRunner.java | 19 + .../features/ElementCompletenessTestCase.java | 80 + .../ranking/features/FieldTermMatchTestCase.java | 30 + .../fieldmatch/SemanticDistanceTestCase.java | 140 + .../reference/OptimalStringAlignmentDistance.java | 201 + .../reference/TextbookLevenshteinDistance.java | 38 + .../test/OptimalStringAlignmentTestCase.java | 58 + .../fieldmatch/test/FieldMatchMetricsTestCase.java | 757 ++ .../rankingexpression/FeatureListTestCase.java | 77 + .../RankingExpressionTestCase.java | 281 + .../rankingexpression/evaluation/Benchmark.java | 144 + .../evaluation/EvaluationBenchmark.java | 474 + .../evaluation/EvaluationTestCase.java | 399 + .../evaluation/NeuralNetEvaluationTestCase.java | 49 + .../evaluation/StreamEvaluationBenchmark.java | 160 + .../gbdtoptimization/ContextReuseTestCase.java | 61 + .../GBDTForestOptimizerTestCase.java | 109 + .../gbdtoptimization/GBDTOptimizerTestCase.java | 105 + .../rankingexpression/rule/ArgumentsTestCase.java | 42 + .../rule/ReferenceNodeTestCase.java | 35 + .../transform/ConstantDereferencerTestCase.java | 30 + .../transform/SimplifierTestCase.java | 80 + .../searchlib/treenet/TreeNetParserTestCase.java | 79 + searchlib/src/testlist.txt | 137 + searchlib/src/tests/.gitignore | 3 + searchlib/src/tests/aggregator/.gitignore | 7 + searchlib/src/tests/aggregator/CMakeLists.txt | 15 + searchlib/src/tests/aggregator/DESC | 1 + searchlib/src/tests/aggregator/FILES | 1 + searchlib/src/tests/aggregator/attr_test.cpp | 285 + searchlib/src/tests/aggregator/perdocexpr.cpp | 1693 +++ searchlib/src/tests/alignment/.gitignore | 4 + searchlib/src/tests/alignment/CMakeLists.txt | 8 + searchlib/src/tests/alignment/DESC | 1 + searchlib/src/tests/alignment/FILES | 1 + searchlib/src/tests/alignment/alignment.cpp | 68 + searchlib/src/tests/attribute/.gitignore | 11 + searchlib/src/tests/attribute/CMakeLists.txt | 29 + searchlib/src/tests/attribute/DESC | 1 + searchlib/src/tests/attribute/FILES | 2 + searchlib/src/tests/attribute/attribute_test.cpp | 2200 +++ searchlib/src/tests/attribute/attribute_test.sh | 7 + .../src/tests/attribute/attributebenchmark.cpp | 678 + .../src/tests/attribute/attributebenchmark.rb | 22 + .../tests/attribute/attributefilewriter/.gitignore | 1 + .../attribute/attributefilewriter/CMakeLists.txt | 8 + .../attributefilewriter_test.cpp | 116 + searchlib/src/tests/attribute/attributeguard.cpp | 32 + .../src/tests/attribute/attributeguard_test.sh | 7 + .../tests/attribute/attributemanager/.gitignore | 4 + .../attribute/attributemanager/CMakeLists.txt | 8 + .../attributemanager/attributemanager_test.cpp | 422 + searchlib/src/tests/attribute/attributesearcher.h | 265 + searchlib/src/tests/attribute/attributeupdater.h | 299 + searchlib/src/tests/attribute/benchmarkplotter.rb | 134 + searchlib/src/tests/attribute/bitvector/.gitignore | 1 + .../src/tests/attribute/bitvector/CMakeLists.txt | 9 + .../tests/attribute/bitvector/bitvector_test.cpp | 632 + .../src/tests/attribute/changevector_test.cpp | 92 + searchlib/src/tests/attribute/changevector_test.sh | 7 + .../src/tests/attribute/comparator/.gitignore | 4 + .../src/tests/attribute/comparator/CMakeLists.txt | 8 + searchlib/src/tests/attribute/comparator/DESC | 1 + searchlib/src/tests/attribute/comparator/FILES | 1 + .../tests/attribute/comparator/comparator_test.cpp | 169 + .../attribute/document_weight_iterator/.gitignore | 1 + .../document_weight_iterator/CMakeLists.txt | 9 + .../tests/attribute/document_weight_iterator/FILES | 1 + .../document_weight_iterator_test.cpp | 189 + .../src/tests/attribute/enumeratedsave/.gitignore | 127 + .../tests/attribute/enumeratedsave/CMakeLists.txt | 8 + .../enumeratedsave/enumeratedsave_test.cpp | 944 ++ searchlib/src/tests/attribute/enumstore/.gitignore | 4 + .../src/tests/attribute/enumstore/CMakeLists.txt | 8 + searchlib/src/tests/attribute/enumstore/DESC | 1 + searchlib/src/tests/attribute/enumstore/FILES | 1 + .../tests/attribute/enumstore/enumstore_test.cpp | 879 ++ .../tests/attribute/extendattributes/.gitignore | 4 + .../attribute/extendattributes/CMakeLists.txt | 8 + .../src/tests/attribute/extendattributes/DESC | 1 + .../src/tests/attribute/extendattributes/FILES | 1 + .../attribute/extendattributes/extendattribute.cpp | 176 + .../extendattributes/extendattribute_test.sh | 3 + .../src/tests/attribute/gidmapattribute/.gitignore | 0 .../tests/attribute/multivaluemapping/.gitignore | 4 + .../attribute/multivaluemapping/CMakeLists.txt | 8 + .../src/tests/attribute/multivaluemapping/DESC | 1 + .../src/tests/attribute/multivaluemapping/FILES | 1 + .../multivaluemapping/multivaluemapping_test.cpp | 836 ++ .../src/tests/attribute/postinglist/.gitignore | 4 + .../src/tests/attribute/postinglist/CMakeLists.txt | 8 + searchlib/src/tests/attribute/postinglist/DESC | 1 + searchlib/src/tests/attribute/postinglist/FILES | 1 + .../tests/attribute/postinglist/postinglist.cpp | 707 + .../attribute/postinglistattribute/.gitignore | 4 + .../attribute/postinglistattribute/CMakeLists.txt | 8 + .../src/tests/attribute/postinglistattribute/DESC | 1 + .../src/tests/attribute/postinglistattribute/FILES | 1 + .../postinglistattribute_test.cpp | 1021 ++ .../postinglistattribute_test.sh | 5 + searchlib/src/tests/attribute/runnable.h | 43 + .../src/tests/attribute/searchable/.gitignore | 4 + .../src/tests/attribute/searchable/CMakeLists.txt | 22 + .../attribute_searchable_adapter_test.cpp | 689 + .../attribute_searchable_adapter_test.sh | 4 + .../attribute_weighted_set_blueprint_test.cpp | 231 + .../searchable/attributeblueprint_test.cpp | 240 + .../src/tests/attribute/searchcontext/.gitignore | 4 + .../tests/attribute/searchcontext/CMakeLists.txt | 9 + searchlib/src/tests/attribute/searchcontext/DESC | 1 + searchlib/src/tests/attribute/searchcontext/FILES | 1 + .../attribute/searchcontext/searchcontext.cpp | 1900 +++ .../attribute/searchcontext/searchcontext_test.sh | 5 + .../src/tests/attribute/sourceselector/.gitignore | 4 + .../tests/attribute/sourceselector/CMakeLists.txt | 8 + searchlib/src/tests/attribute/sourceselector/DESC | 1 + searchlib/src/tests/attribute/sourceselector/FILES | 1 + .../sourceselector/sourceselector_test.cpp | 216 + .../src/tests/attribute/stringattribute/.gitignore | 4 + .../tests/attribute/stringattribute/CMakeLists.txt | 8 + searchlib/src/tests/attribute/stringattribute/DESC | 1 + .../src/tests/attribute/stringattribute/FILES | 1 + .../stringattribute/stringattribute_test.cpp | 453 + .../stringattribute/stringattribute_test.sh | 3 + .../src/tests/attribute/tensorattribute/.gitignore | 1 + .../tests/attribute/tensorattribute/CMakeLists.txt | 8 + searchlib/src/tests/attribute/tensorattribute/DESC | 1 + .../src/tests/attribute/tensorattribute/FILES | 1 + .../tensorattribute/tensorattribute_test.cpp | 217 + .../tensorattribute/tensorattribute_test.sh | 3 + .../src/tests/bitcompression/expgolomb/.gitignore | 1 + .../tests/bitcompression/expgolomb/CMakeLists.txt | 8 + searchlib/src/tests/bitcompression/expgolomb/DESC | 1 + searchlib/src/tests/bitcompression/expgolomb/FILES | 1 + .../bitcompression/expgolomb/expgolomb_test.cpp | 621 + searchlib/src/tests/bitvector/.gitignore | 4 + searchlib/src/tests/bitvector/CMakeLists.txt | 8 + searchlib/src/tests/bitvector/DESC | 1 + searchlib/src/tests/bitvector/FILES | 1 + .../src/tests/bitvector/bitvectorbenchmark.cpp | 225 + searchlib/src/tests/btree/.gitignore | 3 + searchlib/src/tests/btree/CMakeLists.txt | 15 + searchlib/src/tests/btree/DESC | 1 + searchlib/src/tests/btree/FILES | 1 + .../src/tests/btree/btreeaggregation_test.cpp | 1146 ++ searchlib/src/tests/btree/iteratespeed.cpp | 213 + searchlib/src/tests/bytecomplens/.gitignore | 5 + searchlib/src/tests/bytecomplens/CMakeLists.txt | 8 + searchlib/src/tests/bytecomplens/DESC | 1 + searchlib/src/tests/bytecomplens/FILES | 1 + searchlib/src/tests/bytecomplens/bytecomp.cpp | 102 + searchlib/src/tests/bytecomplens/example.txt | 122 + searchlib/src/tests/bytecomplens/tblprint.cpp | 357 + searchlib/src/tests/common/bitvector/.gitignore | 8 + .../src/tests/common/bitvector/CMakeLists.txt | 22 + searchlib/src/tests/common/bitvector/DESC | 1 + searchlib/src/tests/common/bitvector/FILES | 1 + .../tests/common/bitvector/bitvector_benchmark.cpp | 37 + .../src/tests/common/bitvector/bitvector_test.cpp | 541 + .../common/bitvector/condensedbitvector_test.cpp | 49 + .../tests/common/foregroundtaskexecutor/.gitignore | 1 + .../common/foregroundtaskexecutor/CMakeLists.txt | 8 + .../src/tests/common/foregroundtaskexecutor/DESC | 1 + .../src/tests/common/foregroundtaskexecutor/FILES | 1 + .../foregroundtaskexecutor_test.cpp | 124 + searchlib/src/tests/common/location/.gitignore | 1 + searchlib/src/tests/common/location/CMakeLists.txt | 8 + searchlib/src/tests/common/location/FILES | 1 + .../src/tests/common/location/location_test.cpp | 119 + searchlib/src/tests/common/packets/.gitignore | 4 + searchlib/src/tests/common/packets/CMakeLists.txt | 8 + searchlib/src/tests/common/packets/DESC | 1 + searchlib/src/tests/common/packets/FILES | 1 + .../src/tests/common/packets/packets_test.cpp | 705 + searchlib/src/tests/common/rcuvector/.gitignore | 4 + .../src/tests/common/rcuvector/CMakeLists.txt | 8 + searchlib/src/tests/common/rcuvector/DESC | 1 + searchlib/src/tests/common/rcuvector/FILES | 1 + .../src/tests/common/rcuvector/rcuvector_test.cpp | 284 + searchlib/src/tests/common/resultset/.gitignore | 1 + .../src/tests/common/resultset/CMakeLists.txt | 8 + .../src/tests/common/resultset/resultset_test.cpp | 109 + .../tests/common/sequencedtaskexecutor/.gitignore | 1 + .../common/sequencedtaskexecutor/CMakeLists.txt | 8 + .../src/tests/common/sequencedtaskexecutor/DESC | 1 + .../src/tests/common/sequencedtaskexecutor/FILES | 1 + .../sequencedtaskexecutor_test.cpp | 194 + .../src/tests/common/summaryfeatures/.gitignore | 4 + .../tests/common/summaryfeatures/CMakeLists.txt | 8 + searchlib/src/tests/common/summaryfeatures/DESC | 1 + searchlib/src/tests/common/summaryfeatures/FILES | 1 + .../common/summaryfeatures/summaryfeatures.cpp | 152 + searchlib/src/tests/create-test.sh | 52 + searchlib/src/tests/datastore/.gitignore | 8 + searchlib/src/tests/datastore/CMakeLists.txt | 8 + searchlib/src/tests/datastore/DESC | 1 + searchlib/src/tests/datastore/FILES | 1 + searchlib/src/tests/datastore/bad.dat | Bin 0 -> 4096 bytes .../datastore/bug-7257706/1422358701368384000.dat | Bin 0 -> 94208 bytes .../datastore/bug-7257706/1422358701368384000.idx | Bin 0 -> 4384 bytes .../datastore/dangling/1425506005745465000.dat | Bin 0 -> 4096 bytes .../datastore/dangling/1425506005745465000.idx | Bin 0 -> 480 bytes .../datastore/dangling/2425506005745465000.dat | Bin 0 -> 4096 bytes .../datastore/dangling/2425506005745465000.idx | Bin 0 -> 480 bytes .../datastore/dangling/3425506005745465000.dat | Bin 0 -> 4096 bytes .../datastore/dangling/4425506005745465000.dat | Bin 0 -> 4096 bytes .../datastore/dangling/4425506005745465000.idx | 0 searchlib/src/tests/datastore/datastore.dat | Bin 0 -> 5120 bytes .../src/tests/datastore/logdatastore_test.cpp | 468 + searchlib/src/tests/datastore/logdatastore_test.sh | 10 + searchlib/src/tests/diskindex/bitvector/.gitignore | 6 + .../src/tests/diskindex/bitvector/CMakeLists.txt | 8 + searchlib/src/tests/diskindex/bitvector/DESC | 1 + searchlib/src/tests/diskindex/bitvector/FILES | 1 + .../tests/diskindex/bitvector/bitvector_test.cpp | 221 + searchlib/src/tests/diskindex/diskindex/.gitignore | 5 + .../src/tests/diskindex/diskindex/CMakeLists.txt | 9 + searchlib/src/tests/diskindex/diskindex/DESC | 1 + searchlib/src/tests/diskindex/diskindex/FILES | 1 + .../tests/diskindex/diskindex/diskindex_test.cpp | 330 + .../src/tests/diskindex/fieldwriter/.gitignore | 3 + .../src/tests/diskindex/fieldwriter/CMakeLists.txt | 9 + .../diskindex/fieldwriter/fieldwriter_test.cpp | 972 ++ .../src/tests/diskindex/fieldwriter/runtests.sh | 66 + searchlib/src/tests/diskindex/fusion/.gitignore | 37 + .../src/tests/diskindex/fusion/CMakeLists.txt | 8 + searchlib/src/tests/diskindex/fusion/DESC | 1 + searchlib/src/tests/diskindex/fusion/FILES | 1 + .../src/tests/diskindex/fusion/fusion_test.cpp | 506 + .../src/tests/diskindex/fusion/fusion_test.sh | 15 + searchlib/src/tests/diskindex/pagedict4/.gitignore | 5 + .../src/tests/diskindex/pagedict4/CMakeLists.txt | 9 + .../tests/diskindex/pagedict4/pagedict4test.cpp | 876 ++ searchlib/src/tests/document_store/.gitignore | 1 + searchlib/src/tests/document_store/CMakeLists.txt | 8 + searchlib/src/tests/document_store/FILES | 1 + .../tests/document_store/document_store_test.cpp | 58 + .../src/tests/document_store/visitor/.gitignore | 1 + .../tests/document_store/visitor/CMakeLists.txt | 8 + searchlib/src/tests/document_store/visitor/DESC | 1 + searchlib/src/tests/document_store/visitor/FILES | 1 + .../visitor/document_store_visitor_test.cpp | 466 + searchlib/src/tests/engine/docsumapi/.gitignore | 4 + .../src/tests/engine/docsumapi/CMakeLists.txt | 8 + searchlib/src/tests/engine/docsumapi/DESC | 1 + searchlib/src/tests/engine/docsumapi/FILES | 1 + .../src/tests/engine/docsumapi/docsumapi_test.cpp | 185 + searchlib/src/tests/engine/monitorapi/.gitignore | 4 + .../src/tests/engine/monitorapi/CMakeLists.txt | 8 + searchlib/src/tests/engine/monitorapi/DESC | 1 + searchlib/src/tests/engine/monitorapi/FILES | 1 + .../tests/engine/monitorapi/monitorapi_test.cpp | 126 + searchlib/src/tests/engine/searchapi/.gitignore | 4 + .../src/tests/engine/searchapi/CMakeLists.txt | 8 + searchlib/src/tests/engine/searchapi/DESC | 1 + searchlib/src/tests/engine/searchapi/FILES | 1 + .../src/tests/engine/searchapi/searchapi_test.cpp | 267 + .../src/tests/engine/transportserver/.gitignore | 5 + .../tests/engine/transportserver/CMakeLists.txt | 12 + searchlib/src/tests/engine/transportserver/DESC | 1 + searchlib/src/tests/engine/transportserver/FILES | 1 + .../transportserver/transportserver_test.cpp | 187 + searchlib/src/tests/features/.gitignore | 11 + searchlib/src/tests/features/CMakeLists.txt | 19 + searchlib/src/tests/features/DESC | 1 + searchlib/src/tests/features/FILES | 3 + .../dotproduct/c-100000-1000-array-double.txt | 7 + .../dotproduct/c-100000-1000-array-float.txt | 7 + .../dotproduct/c-100000-1000-array-int.txt | 7 + .../dotproduct/c-100000-1000-array-long.txt | 7 + .../benchmark/dotproduct/c-100000-1000-wset.txt | 7 + .../features/benchmark/fieldmatch/c-100-1.txt | 7 + .../features/benchmark/fieldmatch/c-100-10.txt | 7 + .../features/benchmark/fieldmatch/c-100-100.txt | 7 + .../features/benchmark/fieldmatch/c-100-1000.txt | 7 + .../features/benchmark/fieldmatch/c-100-10000.txt | 7 + .../features/benchmark/fieldmatch/c-100-5.txt | 7 + .../features/benchmark/fieldmatch/c-100-50.txt | 7 + .../features/benchmark/fieldmatch/c-100-500.txt | 7 + .../tests/features/benchmark/fieldmatch/c-100.txt | 6 + .../benchmark/fieldmatch/c-1000-1-callgrind.txt | 7 + .../features/benchmark/fieldmatch/c-1000-1.txt | 7 + .../features/benchmark/fieldmatch/c-1000-10.txt | 7 + .../benchmark/fieldmatch/c-1000-100-callgrind.txt | 7 + .../features/benchmark/fieldmatch/c-1000-100.txt | 7 + .../features/benchmark/fieldmatch/c-1000-1000.txt | 7 + .../features/benchmark/fieldmatch/c-1000-10000.txt | 7 + .../features/benchmark/fieldmatch/c-1000-5.txt | 7 + .../features/benchmark/fieldmatch/c-1000-50.txt | 7 + .../features/benchmark/fieldmatch/c-1000-500.txt | 7 + .../tests/features/benchmark/fieldmatch/c-1000.txt | 6 + .../features/benchmark/fieldmatch/c-10000-1.txt | 7 + .../features/benchmark/fieldmatch/c-10000-10.txt | 7 + .../features/benchmark/fieldmatch/c-10000-100.txt | 7 + .../features/benchmark/fieldmatch/c-10000-1000.txt | 7 + .../benchmark/fieldmatch/c-10000-10000.txt | 7 + .../features/benchmark/fieldmatch/c-10000-5.txt | 7 + .../features/benchmark/fieldmatch/c-10000-50.txt | 7 + .../features/benchmark/fieldmatch/c-10000-500.txt | 7 + .../features/benchmark/fieldmatch/c-10000.txt | 6 + .../tests/features/benchmark/fieldmatch/c-20-1.txt | 7 + .../features/benchmark/fieldmatch/c-20-10.txt | 7 + .../features/benchmark/fieldmatch/c-20-100.txt | 7 + .../features/benchmark/fieldmatch/c-20-1000.txt | 7 + .../features/benchmark/fieldmatch/c-20-10000.txt | 7 + .../tests/features/benchmark/fieldmatch/c-20-5.txt | 7 + .../features/benchmark/fieldmatch/c-20-50.txt | 7 + .../features/benchmark/fieldmatch/c-20-500.txt | 7 + .../tests/features/benchmark/fieldmatch/c-20.txt | 6 + .../features/benchmark/fieldmatch/phrase-02.txt | 7 + .../features/benchmark/fieldmatch/phrase-10.txt | 7 + .../features/benchmark/fieldmatch/phrase-50.txt | 7 + .../tests/features/benchmark/fieldmatch/plot.rb | 30 + .../tests/features/benchmark/fieldmatch/readme.txt | 22 + .../src/tests/features/benchmark/fieldmatch/run.rb | 17 + searchlib/src/tests/features/benchmark/plotlib.rb | 36 + .../features/benchmark/rankingexpression/c-1.txt | 4 + .../features/benchmark/rankingexpression/c-10.txt | 4 + .../features/benchmark/rankingexpression/c-100.txt | 4 + .../features/benchmark/rankingexpression/c-200.txt | 4 + .../features/benchmark/rankingexpression/c-400.txt | 4 + .../features/benchmark/rankingexpression/c-5.txt | 4 + .../features/benchmark/rankingexpression/c-50.txt | 4 + .../features/benchmark/rankingexpression/c-800.txt | 4 + .../features/benchmark/rankingexpression/plot.rb | 22 + .../features/benchmark/rankingexpression/run.rb | 14 + searchlib/src/tests/features/beta/.gitignore | 1 + searchlib/src/tests/features/beta/CMakeLists.txt | 12 + .../src/tests/features/beta/beta_features.cpp | 726 + .../tests/features/element_completeness/.gitignore | 1 + .../features/element_completeness/CMakeLists.txt | 8 + .../src/tests/features/element_completeness/FILES | 1 + .../element_completeness_test.cpp | 201 + .../features/element_similarity_feature/.gitignore | 1 + .../element_similarity_feature/CMakeLists.txt | 8 + .../element_similarity_feature_test.cpp | 371 + .../tests/features/euclidean_distance/.gitignore | 1 + .../features/euclidean_distance/CMakeLists.txt | 8 + .../src/tests/features/euclidean_distance/FILES | 1 + .../euclidean_distance/euclidean_distance_test.cpp | 115 + searchlib/src/tests/features/featurebenchmark.cpp | 657 + .../src/tests/features/item_raw_score/.gitignore | 1 + .../tests/features/item_raw_score/CMakeLists.txt | 8 + searchlib/src/tests/features/item_raw_score/FILES | 1 + .../item_raw_score/item_raw_score_test.cpp | 158 + .../tests/features/native_dot_product/.gitignore | 1 + .../features/native_dot_product/CMakeLists.txt | 8 + .../src/tests/features/native_dot_product/FILES | 1 + .../native_dot_product/native_dot_product_test.cpp | 191 + searchlib/src/tests/features/prod_features.cpp | 1937 +++ searchlib/src/tests/features/prod_features.h | 175 + .../features/prod_features_attributematch.cpp | 300 + .../tests/features/prod_features_fieldmatch.cpp | 1079 ++ .../features/prod_features_fieldtermmatch.cpp | 113 + .../src/tests/features/prod_features_framework.cpp | 174 + searchlib/src/tests/features/prod_features_test.sh | 3 + .../tests/features/ranking_expression/.gitignore | 1 + .../features/ranking_expression/CMakeLists.txt | 8 + .../ranking_expression/ranking_expression_test.cpp | 90 + searchlib/src/tests/features/raw_score/.gitignore | 1 + .../src/tests/features/raw_score/CMakeLists.txt | 8 + searchlib/src/tests/features/raw_score/FILES | 1 + .../tests/features/raw_score/raw_score_test.cpp | 151 + searchlib/src/tests/features/subqueries/.gitignore | 1 + .../src/tests/features/subqueries/CMakeLists.txt | 8 + .../tests/features/subqueries/subqueries_test.cpp | 162 + searchlib/src/tests/features/tensor/.gitignore | 1 + searchlib/src/tests/features/tensor/CMakeLists.txt | 8 + searchlib/src/tests/features/tensor/FILES | 1 + .../src/tests/features/tensor/tensor_test.cpp | 237 + .../tests/features/tensor_from_labels/.gitignore | 1 + .../features/tensor_from_labels/CMakeLists.txt | 8 + .../src/tests/features/tensor_from_labels/FILES | 1 + .../tensor_from_labels/tensor_from_labels_test.cpp | 211 + .../features/tensor_from_weighted_set/.gitignore | 1 + .../tensor_from_weighted_set/CMakeLists.txt | 8 + .../tests/features/tensor_from_weighted_set/FILES | 1 + .../tensor_from_weighted_set_test.cpp | 198 + .../features/text_similarity_feature/.gitignore | 1 + .../text_similarity_feature/CMakeLists.txt | 8 + .../tests/features/text_similarity_feature/FILES | 1 + .../text_similarity_feature_test.cpp | 245 + searchlib/src/tests/features/util/.gitignore | 1 + searchlib/src/tests/features/util/CMakeLists.txt | 8 + searchlib/src/tests/features/util/FILES | 1 + searchlib/src/tests/features/util/util_test.cpp | 40 + searchlib/src/tests/fef/.gitignore | 4 + searchlib/src/tests/fef/CMakeLists.txt | 8 + searchlib/src/tests/fef/DESC | 1 + searchlib/src/tests/fef/FILES | 1 + .../src/tests/fef/attributecontent/.gitignore | 4 + .../src/tests/fef/attributecontent/CMakeLists.txt | 8 + searchlib/src/tests/fef/attributecontent/DESC | 1 + searchlib/src/tests/fef/attributecontent/FILES | 1 + .../fef/attributecontent/attributecontent_test.cpp | 106 + .../src/tests/fef/featurenamebuilder/.gitignore | 4 + .../tests/fef/featurenamebuilder/CMakeLists.txt | 8 + searchlib/src/tests/fef/featurenamebuilder/DESC | 1 + searchlib/src/tests/fef/featurenamebuilder/FILES | 1 + .../featurenamebuilder/featurenamebuilder_test.cpp | 78 + .../src/tests/fef/featurenameparser/.gitignore | 4 + .../src/tests/fef/featurenameparser/CMakeLists.txt | 8 + searchlib/src/tests/fef/featurenameparser/DESC | 1 + searchlib/src/tests/fef/featurenameparser/FILES | 1 + .../featurenameparser/featurenameparser_test.cpp | 151 + .../src/tests/fef/featurenameparser/parsetest.txt | 55 + searchlib/src/tests/fef/featureoverride/.gitignore | 4 + .../src/tests/fef/featureoverride/CMakeLists.txt | 8 + searchlib/src/tests/fef/featureoverride/DESC | 1 + searchlib/src/tests/fef/featureoverride/FILES | 1 + .../tests/fef/featureoverride/featureoverride.cpp | 175 + searchlib/src/tests/fef/fef_test.cpp | 116 + searchlib/src/tests/fef/object_passing/.gitignore | 1 + .../src/tests/fef/object_passing/CMakeLists.txt | 8 + .../fef/object_passing/object_passing_test.cpp | 128 + searchlib/src/tests/fef/parameter/.gitignore | 4 + searchlib/src/tests/fef/parameter/CMakeLists.txt | 8 + searchlib/src/tests/fef/parameter/DESC | 1 + searchlib/src/tests/fef/parameter/FILES | 1 + .../src/tests/fef/parameter/parameter_test.cpp | 267 + searchlib/src/tests/fef/phrasesplitter/.gitignore | 6 + .../src/tests/fef/phrasesplitter/CMakeLists.txt | 15 + searchlib/src/tests/fef/phrasesplitter/DESC | 1 + searchlib/src/tests/fef/phrasesplitter/FILES | 1 + .../src/tests/fef/phrasesplitter/benchmark.cpp | 84 + .../fef/phrasesplitter/phrasesplitter_test.cpp | 242 + searchlib/src/tests/fef/properties/.gitignore | 4 + searchlib/src/tests/fef/properties/CMakeLists.txt | 8 + searchlib/src/tests/fef/properties/DESC | 1 + searchlib/src/tests/fef/properties/FILES | 1 + .../src/tests/fef/properties/properties_test.cpp | 425 + searchlib/src/tests/fef/rank_program/.gitignore | 1 + .../src/tests/fef/rank_program/CMakeLists.txt | 8 + searchlib/src/tests/fef/rank_program/FILES | 1 + .../tests/fef/rank_program/rank_program_test.cpp | 172 + searchlib/src/tests/fef/resolver/.gitignore | 4 + searchlib/src/tests/fef/resolver/CMakeLists.txt | 8 + searchlib/src/tests/fef/resolver/DESC | 1 + searchlib/src/tests/fef/resolver/FILES | 1 + searchlib/src/tests/fef/resolver/resolver_test.cpp | 93 + searchlib/src/tests/fef/table/.gitignore | 4 + searchlib/src/tests/fef/table/CMakeLists.txt | 8 + searchlib/src/tests/fef/table/DESC | 1 + searchlib/src/tests/fef/table/FILES | 1 + searchlib/src/tests/fef/table/table_test.cpp | 159 + searchlib/src/tests/fef/table/tables1/a | 3 + searchlib/src/tests/fef/table/tables2/a | 3 + searchlib/src/tests/fef/table/tables2/b | 3 + searchlib/src/tests/fef/termfieldmodel/.gitignore | 4 + .../src/tests/fef/termfieldmodel/CMakeLists.txt | 8 + searchlib/src/tests/fef/termfieldmodel/DESC | 1 + searchlib/src/tests/fef/termfieldmodel/FILES | 1 + .../fef/termfieldmodel/termfieldmodel_test.cpp | 209 + .../src/tests/fef/termmatchdatamerger/.gitignore | 4 + .../tests/fef/termmatchdatamerger/CMakeLists.txt | 8 + searchlib/src/tests/fef/termmatchdatamerger/DESC | 1 + searchlib/src/tests/fef/termmatchdatamerger/FILES | 1 + .../termmatchdatamerger_test.cpp | 281 + searchlib/src/tests/fileheaderinspect/.gitignore | 6 + .../src/tests/fileheaderinspect/CMakeLists.txt | 8 + searchlib/src/tests/fileheaderinspect/DESC | 1 + searchlib/src/tests/fileheaderinspect/FILES | 1 + .../tests/fileheaderinspect/fileheaderinspect.cpp | 131 + searchlib/src/tests/fileheadertk/.gitignore | 6 + searchlib/src/tests/fileheadertk/CMakeLists.txt | 8 + searchlib/src/tests/fileheadertk/DESC | 1 + searchlib/src/tests/fileheadertk/FILES | 1 + .../src/tests/fileheadertk/fileheadertk_test.cpp | 47 + searchlib/src/tests/forcelink/.gitignore | 4 + searchlib/src/tests/forcelink/CMakeLists.txt | 8 + searchlib/src/tests/forcelink/DESC | 1 + searchlib/src/tests/forcelink/FILES | 1 + searchlib/src/tests/forcelink/forcelink.cpp | 18 + searchlib/src/tests/grouping/.gitignore | 11 + searchlib/src/tests/grouping/CMakeLists.txt | 29 + searchlib/src/tests/grouping/DESC | 1 + searchlib/src/tests/grouping/FILES | 4 + .../tests/grouping/grouping_serialization_test.cpp | 339 + searchlib/src/tests/grouping/grouping_test.cpp | 1912 +++ searchlib/src/tests/grouping/hyperloglog_test.cpp | 92 + searchlib/src/tests/grouping/sketch_test.cpp | 151 + searchlib/src/tests/groupingengine/.gitignore | 7 + searchlib/src/tests/groupingengine/CMakeLists.txt | 15 + searchlib/src/tests/groupingengine/DESC | 1 + searchlib/src/tests/groupingengine/FILES | 4 + .../groupingengine/groupingengine_benchmark.cpp | 292 + .../tests/groupingengine/groupingengine_test.cpp | 1985 +++ searchlib/src/tests/hitcollector/.gitignore | 4 + searchlib/src/tests/hitcollector/CMakeLists.txt | 8 + searchlib/src/tests/hitcollector/DESC | 1 + searchlib/src/tests/hitcollector/FILES | 1 + .../src/tests/hitcollector/hitcollector_test.cpp | 493 + searchlib/src/tests/index/docbuilder/.gitignore | 5 + .../src/tests/index/docbuilder/CMakeLists.txt | 8 + searchlib/src/tests/index/docbuilder/DESC | 1 + searchlib/src/tests/index/docbuilder/FILES | 1 + .../src/tests/index/docbuilder/docbuilder_test.cpp | 531 + .../src/tests/index/doctypebuilder/.gitignore | 5 + .../src/tests/index/doctypebuilder/CMakeLists.txt | 8 + searchlib/src/tests/index/doctypebuilder/DESC | 1 + searchlib/src/tests/index/doctypebuilder/FILES | 1 + .../index/doctypebuilder/doctypebuilder_test.cpp | 88 + searchlib/src/tests/indexmetainfo/.gitignore | 5 + searchlib/src/tests/indexmetainfo/CMakeLists.txt | 8 + searchlib/src/tests/indexmetainfo/DESC | 2 + searchlib/src/tests/indexmetainfo/FILES | 1 + searchlib/src/tests/indexmetainfo/bogus1.txt | 1 + searchlib/src/tests/indexmetainfo/bogus10.txt | 4 + searchlib/src/tests/indexmetainfo/bogus2.txt | 1 + searchlib/src/tests/indexmetainfo/bogus3.txt | 1 + searchlib/src/tests/indexmetainfo/bogus4.txt | 1 + searchlib/src/tests/indexmetainfo/bogus5.txt | 7 + searchlib/src/tests/indexmetainfo/bogus6.txt | 7 + searchlib/src/tests/indexmetainfo/bogus7.txt | 4 + searchlib/src/tests/indexmetainfo/bogus8.txt | 4 + searchlib/src/tests/indexmetainfo/bogus9.txt | 4 + .../src/tests/indexmetainfo/indexmetainfo_test.cpp | 127 + searchlib/src/tests/indexmetainfo/meta-info.txt | 12 + searchlib/src/tests/ld-library-path/.gitignore | 4 + searchlib/src/tests/ld-library-path/CMakeLists.txt | 7 + .../src/tests/ld-library-path/ld-library-path.cpp | 12 + searchlib/src/tests/memoryindex/btree/.gitignore | 6 + .../src/tests/memoryindex/btree/CMakeLists.txt | 15 + searchlib/src/tests/memoryindex/btree/DESC | 1 + searchlib/src/tests/memoryindex/btree/FILES | 1 + .../src/tests/memoryindex/btree/btree_test.cpp | 1282 ++ .../tests/memoryindex/btree/frozenbtree_test.cpp | 513 + .../compact_document_words_store/.gitignore | 1 + .../compact_document_words_store/CMakeLists.txt | 8 + .../memoryindex/compact_document_words_store/DESC | 1 + .../memoryindex/compact_document_words_store/FILES | 1 + .../compact_document_words_store_test.cpp | 157 + .../src/tests/memoryindex/datastore/.gitignore | 8 + .../src/tests/memoryindex/datastore/CMakeLists.txt | 22 + searchlib/src/tests/memoryindex/datastore/DESC | 1 + searchlib/src/tests/memoryindex/datastore/FILES | 2 + .../tests/memoryindex/datastore/datastore_test.cpp | 432 + .../memoryindex/datastore/featurestore_test.cpp | 245 + .../tests/memoryindex/datastore/wordstore_test.cpp | 104 + .../src/tests/memoryindex/dictionary/.gitignore | 6 + .../tests/memoryindex/dictionary/CMakeLists.txt | 9 + searchlib/src/tests/memoryindex/dictionary/DESC | 1 + searchlib/src/tests/memoryindex/dictionary/FILES | 1 + .../memoryindex/dictionary/dictionary_test.cpp | 1528 +++ .../tests/memoryindex/document_remover/.gitignore | 1 + .../memoryindex/document_remover/CMakeLists.txt | 8 + .../src/tests/memoryindex/document_remover/DESC | 1 + .../src/tests/memoryindex/document_remover/FILES | 1 + .../document_remover/document_remover_test.cpp | 144 + .../tests/memoryindex/documentinverter/.gitignore | 1 + .../memoryindex/documentinverter/CMakeLists.txt | 9 + .../src/tests/memoryindex/documentinverter/DESC | 1 + .../src/tests/memoryindex/documentinverter/FILES | 1 + .../documentinverter/documentinverter_test.cpp | 294 + .../src/tests/memoryindex/fieldinverter/.gitignore | 1 + .../tests/memoryindex/fieldinverter/CMakeLists.txt | 9 + searchlib/src/tests/memoryindex/fieldinverter/DESC | 1 + .../src/tests/memoryindex/fieldinverter/FILES | 1 + .../fieldinverter/fieldinverter_test.cpp | 338 + .../src/tests/memoryindex/memoryindex/.gitignore | 5 + .../tests/memoryindex/memoryindex/CMakeLists.txt | 8 + searchlib/src/tests/memoryindex/memoryindex/DESC | 1 + searchlib/src/tests/memoryindex/memoryindex/FILES | 1 + .../memoryindex/memoryindex/memoryindex_test.cpp | 438 + .../tests/memoryindex/urlfieldinverter/.gitignore | 1 + .../memoryindex/urlfieldinverter/CMakeLists.txt | 9 + .../src/tests/memoryindex/urlfieldinverter/DESC | 1 + .../src/tests/memoryindex/urlfieldinverter/FILES | 1 + .../urlfieldinverter/urlfieldinverter_test.cpp | 579 + searchlib/src/tests/memorytub/.gitignore | 4 + searchlib/src/tests/memorytub/CMakeLists.txt | 8 + searchlib/src/tests/memorytub/memorytub_test.cpp | 205 + searchlib/src/tests/nativerank/.gitignore | 2 + searchlib/src/tests/nativerank/CMakeLists.txt | 12 + searchlib/src/tests/nativerank/nativerank.cpp | 828 ++ searchlib/src/tests/nearsearch/.gitignore | 4 + searchlib/src/tests/nearsearch/CMakeLists.txt | 8 + searchlib/src/tests/nearsearch/DESC | 1 + searchlib/src/tests/nearsearch/FILES | 1 + searchlib/src/tests/nearsearch/nearsearch_test.cpp | 247 + searchlib/src/tests/postinglistbm/.gitignore | 10 + searchlib/src/tests/postinglistbm/CMakeLists.txt | 10 + searchlib/src/tests/postinglistbm/andstress.cpp | 536 + searchlib/src/tests/postinglistbm/andstress.h | 43 + .../src/tests/postinglistbm/postinglistbm.cpp | 491 + searchlib/src/tests/postinglistbm/skip.txt | 75 + searchlib/src/tests/predicate/.gitignore | 13 + searchlib/src/tests/predicate/CMakeLists.txt | 92 + searchlib/src/tests/predicate/OWNERS | 1 + .../predicate/document_features_store_test.cpp | 225 + .../predicate_bounds_posting_list_test.cpp | 107 + .../src/tests/predicate/predicate_index_test.cpp | 363 + .../predicate_interval_posting_list_test.cpp | 80 + .../predicate/predicate_interval_store_test.cpp | 152 + .../predicate_range_term_expander_test.cpp | 332 + .../tests/predicate/predicate_ref_cache_test.cpp | 106 + .../predicate/predicate_tree_analyzer_test.cpp | 157 + .../predicate/predicate_tree_annotator_test.cpp | 381 + ...predicate_zero_constraint_posting_list_test.cpp | 58 + ...redicate_zstar_compressed_posting_list_test.cpp | 95 + .../src/tests/predicate/simple_index_test.cpp | 333 + searchlib/src/tests/predicate/tree_crumbs_test.cpp | 65 + searchlib/src/tests/prettyfloat/.gitignore | 4 + searchlib/src/tests/prettyfloat/CMakeLists.txt | 8 + searchlib/src/tests/prettyfloat/DESC | 1 + searchlib/src/tests/prettyfloat/FILES | 1 + searchlib/src/tests/prettyfloat/prettyfloat.cpp | 32 + searchlib/src/tests/query/.gitignore | 10 + searchlib/src/tests/query/CMakeLists.txt | 50 + searchlib/src/tests/query/DESC | 1 + searchlib/src/tests/query/FILES | 2 + .../src/tests/query/customtypevisitor_test.cpp | 157 + searchlib/src/tests/query/query-old-large.cpp | 51 + searchlib/src/tests/query/query-old.cpp | 650 + searchlib/src/tests/query/query_visitor_test.cpp | 114 + searchlib/src/tests/query/querybuilder_test.cpp | 615 + .../src/tests/query/stackdumpquerycreator_test.cpp | 116 + .../src/tests/query/templatetermvisitor_test.cpp | 87 + searchlib/src/tests/queryeval/.gitignore | 5 + searchlib/src/tests/queryeval/CMakeLists.txt | 9 + searchlib/src/tests/queryeval/DESC | 1 + searchlib/src/tests/queryeval/FILES | 1 + searchlib/src/tests/queryeval/blueprint/.cvsignore | 3 + searchlib/src/tests/queryeval/blueprint/.gitignore | 8 + .../src/tests/queryeval/blueprint/CMakeLists.txt | 23 + searchlib/src/tests/queryeval/blueprint/DESC | 1 + searchlib/src/tests/queryeval/blueprint/FILES | 1 + .../tests/queryeval/blueprint/blueprint_test.cpp | 766 ++ .../blueprint/intermediate_blueprints_test.cpp | 1332 ++ .../queryeval/blueprint/leaf_blueprints_test.cpp | 125 + searchlib/src/tests/queryeval/blueprint/mysearch.h | 155 + .../booleanmatchiteratorwrapper/.cvsignore | 3 + .../booleanmatchiteratorwrapper/.gitignore | 4 + .../booleanmatchiteratorwrapper/CMakeLists.txt | 9 + .../queryeval/booleanmatchiteratorwrapper/DESC | 1 + .../queryeval/booleanmatchiteratorwrapper/FILES | 1 + .../booleanmatchiteratorwrapper_test.cpp | 133 + .../src/tests/queryeval/dot_product/.gitignore | 1 + .../src/tests/queryeval/dot_product/CMakeLists.txt | 9 + searchlib/src/tests/queryeval/dot_product/FILES | 1 + .../queryeval/dot_product/dot_product_test.cpp | 219 + searchlib/src/tests/queryeval/equiv/.cvsignore | 3 + searchlib/src/tests/queryeval/equiv/.gitignore | 4 + searchlib/src/tests/queryeval/equiv/CMakeLists.txt | 8 + searchlib/src/tests/queryeval/equiv/DESC | 1 + searchlib/src/tests/queryeval/equiv/FILES | 1 + searchlib/src/tests/queryeval/equiv/equiv_test.cpp | 130 + .../src/tests/queryeval/fake_searchable/.cvsignore | 3 + .../src/tests/queryeval/fake_searchable/.gitignore | 4 + .../tests/queryeval/fake_searchable/CMakeLists.txt | 8 + searchlib/src/tests/queryeval/fake_searchable/DESC | 1 + .../src/tests/queryeval/fake_searchable/FILES | 1 + .../fake_searchable/fake_searchable_test.cpp | 379 + .../src/tests/queryeval/getnodeweight/.gitignore | 1 + .../tests/queryeval/getnodeweight/CMakeLists.txt | 8 + .../queryeval/getnodeweight/getnodeweight_test.cpp | 49 + .../monitoring_search_iterator/.gitignore | 1 + .../monitoring_search_iterator/CMakeLists.txt | 9 + .../queryeval/monitoring_search_iterator/DESC | 1 + .../queryeval/monitoring_search_iterator/FILES | 1 + .../monitoring_search_iterator_test.cpp | 325 + .../queryeval/multibitvectoriterator/.gitignore | 2 + .../multibitvectoriterator/CMakeLists.txt | 15 + .../tests/queryeval/multibitvectoriterator/DESC | 1 + .../tests/queryeval/multibitvectoriterator/FILES | 2 + .../multibitvectoriterator_bench.cpp | 138 + .../multibitvectoriterator_test.cpp | 531 + .../tests/queryeval/parallel_weak_and/.gitignore | 1 + .../queryeval/parallel_weak_and/CMakeLists.txt | 9 + .../src/tests/queryeval/parallel_weak_and/DESC | 1 + .../src/tests/queryeval/parallel_weak_and/FILES | 2 + .../parallel_weak_and/parallel_weak_and_test.cpp | 681 + searchlib/src/tests/queryeval/predicate/.gitignore | 2 + .../src/tests/queryeval/predicate/CMakeLists.txt | 15 + .../predicate/predicate_blueprint_test.cpp | 241 + .../queryeval/predicate/predicate_search_test.cpp | 370 + searchlib/src/tests/queryeval/queryeval.cpp | 691 + .../src/tests/queryeval/simple_phrase/.cvsignore | 3 + .../src/tests/queryeval/simple_phrase/.gitignore | 4 + .../tests/queryeval/simple_phrase/CMakeLists.txt | 8 + searchlib/src/tests/queryeval/simple_phrase/DESC | 1 + searchlib/src/tests/queryeval/simple_phrase/FILES | 1 + .../queryeval/simple_phrase/simple_phrase_test.cpp | 341 + .../src/tests/queryeval/sourceblender/.gitignore | 4 + .../tests/queryeval/sourceblender/CMakeLists.txt | 9 + searchlib/src/tests/queryeval/sourceblender/DESC | 1 + searchlib/src/tests/queryeval/sourceblender/FILES | 1 + .../queryeval/sourceblender/sourceblender.cpp | 169 + .../queryeval/sparse_vector_benchmark/.gitignore | 6 + .../sparse_vector_benchmark/CMakeLists.txt | 8 + .../tests/queryeval/sparse_vector_benchmark/FILES | 1 + .../sparse_vector_benchmark_test.cpp | 429 + .../src/tests/queryeval/termwise_eval/.gitignore | 1 + .../tests/queryeval/termwise_eval/CMakeLists.txt | 9 + .../queryeval/termwise_eval/termwise_eval_test.cpp | 641 + searchlib/src/tests/queryeval/weak_and/.gitignore | 7 + .../src/tests/queryeval/weak_and/CMakeLists.txt | 30 + searchlib/src/tests/queryeval/weak_and/FILES | 2 + .../queryeval/weak_and/parallel_weak_and_bench.cpp | 19 + searchlib/src/tests/queryeval/weak_and/rise_wand.h | 132 + .../src/tests/queryeval/weak_and/rise_wand.hpp | 238 + .../tests/queryeval/weak_and/wand_bench_setup.hpp | 248 + .../tests/queryeval/weak_and/weak_and_bench.cpp | 19 + .../src/tests/queryeval/weak_and/weak_and_test.cpp | 128 + .../queryeval/weak_and/weak_and_test_expensive.cpp | 102 + .../src/tests/queryeval/weak_and_heap/.gitignore | 1 + .../tests/queryeval/weak_and_heap/CMakeLists.txt | 8 + searchlib/src/tests/queryeval/weak_and_heap/DESC | 1 + searchlib/src/tests/queryeval/weak_and_heap/FILES | 1 + .../queryeval/weak_and_heap/weak_and_heap_test.cpp | 101 + .../tests/queryeval/weak_and_scorers/.gitignore | 1 + .../queryeval/weak_and_scorers/CMakeLists.txt | 8 + .../src/tests/queryeval/weak_and_scorers/DESC | 1 + .../src/tests/queryeval/weak_and_scorers/FILES | 1 + .../weak_and_scorers/weak_and_scorers_test.cpp | 67 + .../tests/queryeval/weighted_set_term/.gitignore | 1 + .../queryeval/weighted_set_term/CMakeLists.txt | 9 + .../src/tests/queryeval/weighted_set_term/DESC | 1 + .../src/tests/queryeval/weighted_set_term/FILES | 1 + .../weighted_set_term/weighted_set_term_test.cpp | 240 + .../feature_name_extractor/.gitignore | 1 + .../feature_name_extractor/CMakeLists.txt | 8 + .../rankingexpression/feature_name_extractor/FILES | 1 + .../feature_name_extractor_test.cpp | 79 + .../tests/rankingexpression/rankingexpressionlist | 160 + searchlib/src/tests/ranksetup/.gitignore | 5 + searchlib/src/tests/ranksetup/CMakeLists.txt | 8 + searchlib/src/tests/ranksetup/DESC | 1 + searchlib/src/tests/ranksetup/FILES | 1 + searchlib/src/tests/ranksetup/ranksetup_test.cpp | 922 ++ .../src/tests/ranksetup/verify_feature/.gitignore | 1 + .../tests/ranksetup/verify_feature/CMakeLists.txt | 8 + searchlib/src/tests/ranksetup/verify_feature/FILES | 1 + .../verify_feature/verify_feature_test.cpp | 58 + searchlib/src/tests/sha1/.gitignore | 0 searchlib/src/tests/sort/.gitignore | 8 + searchlib/src/tests/sort/CMakeLists.txt | 22 + searchlib/src/tests/sort/DESC | 1 + searchlib/src/tests/sort/FILES | 1 + searchlib/src/tests/sort/javaorder.zh | 158 + searchlib/src/tests/sort/sort_test.cpp | 295 + searchlib/src/tests/sort/sortbenchmark.cpp | 115 + searchlib/src/tests/sort/uca.cpp | 121 + searchlib/src/tests/sortresults/.gitignore | 7 + searchlib/src/tests/sortresults/CMakeLists.txt | 8 + searchlib/src/tests/sortresults/sorttest.cpp | 99 + searchlib/src/tests/sortspec/.gitignore | 4 + searchlib/src/tests/sortspec/CMakeLists.txt | 8 + searchlib/src/tests/sortspec/multilevelsort.cpp | 413 + searchlib/src/tests/stackdumpiterator/.gitignore | 7 + .../src/tests/stackdumpiterator/CMakeLists.txt | 8 + .../stackdumpiterator/stackdumpiteratortest.cpp | 316 + .../stackdumpiterator/stackdumpiteratortest.h | 17 + .../src/tests/stackdumpiterator/testowner.ATS | 1 + searchlib/src/tests/stringenum/.gitignore | 8 + searchlib/src/tests/stringenum/CMakeLists.txt | 8 + searchlib/src/tests/stringenum/stringenum_test.cpp | 147 + searchlib/src/tests/transactionlog/.gitignore | 7 + searchlib/src/tests/transactionlog/CMakeLists.txt | 15 + searchlib/src/tests/transactionlog/DESC | 1 + searchlib/src/tests/transactionlog/FILES | 2 + .../tests/transactionlog/translogclient_test.cpp | 926 ++ .../tests/transactionlog/translogclient_test.sh | 4 + .../tests/transactionlog/translogserver_test.cpp | 19 + .../src/tests/transactionlogstress/.gitignore | 4 + .../src/tests/transactionlogstress/CMakeLists.txt | 8 + searchlib/src/tests/transactionlogstress/DESC | 1 + searchlib/src/tests/transactionlogstress/FILES | 1 + .../tests/transactionlogstress/translogstress.cpp | 875 ++ searchlib/src/tests/true/.gitignore | 4 + searchlib/src/tests/true/CMakeLists.txt | 7 + searchlib/src/tests/true/DESC | 1 + searchlib/src/tests/true/FILES | 1 + searchlib/src/tests/true/true.cpp | 15 + searchlib/src/tests/url/.gitignore | 7 + searchlib/src/tests/url/CMakeLists.txt | 8 + searchlib/src/tests/url/dotest.sh | 13 + searchlib/src/tests/url/testurl.cpp | 750 ++ searchlib/src/tests/util/.gitignore | 4 + searchlib/src/tests/util/CMakeLists.txt | 8 + searchlib/src/tests/util/bufferwriter/.gitignore | 3 + .../src/tests/util/bufferwriter/CMakeLists.txt | 16 + searchlib/src/tests/util/bufferwriter/bm.cpp | 95 + .../tests/util/bufferwriter/bufferwriter_test.cpp | 158 + searchlib/src/tests/util/bufferwriter/work.cpp | 93 + searchlib/src/tests/util/bufferwriter/work.h | 19 + searchlib/src/tests/util/ioerrorhandler/.gitignore | 1 + .../src/tests/util/ioerrorhandler/CMakeLists.txt | 9 + searchlib/src/tests/util/ioerrorhandler/DESC | 1 + searchlib/src/tests/util/ioerrorhandler/FILES | 1 + .../util/ioerrorhandler/ioerrorhandler_test.cpp | 358 + searchlib/src/tests/util/rawbuf_test.cpp | 198 + .../src/tests/util/searchable_stats/.gitignore | 4 + .../src/tests/util/searchable_stats/CMakeLists.txt | 8 + searchlib/src/tests/util/searchable_stats/DESC | 1 + searchlib/src/tests/util/searchable_stats/FILES | 1 + .../searchable_stats/searchable_stats_test.cpp | 42 + searchlib/src/tests/util/sigbushandler/.gitignore | 1 + .../src/tests/util/sigbushandler/CMakeLists.txt | 9 + searchlib/src/tests/util/sigbushandler/DESC | 1 + searchlib/src/tests/util/sigbushandler/FILES | 1 + .../util/sigbushandler/sigbushandler_test.cpp | 131 + .../util/slime_output_raw_buf_adapter/.gitignore | 1 + .../slime_output_raw_buf_adapter/CMakeLists.txt | 8 + .../tests/util/slime_output_raw_buf_adapter/FILES | 1 + .../slime_output_raw_buf_adapter_test.cpp | 25 + searchlib/src/tests/util/statebuf/.gitignore | 1 + searchlib/src/tests/util/statebuf/CMakeLists.txt | 8 + searchlib/src/tests/util/statebuf/DESC | 1 + searchlib/src/tests/util/statebuf/FILES | 1 + .../src/tests/util/statebuf/statebuf_test.cpp | 109 + searchlib/src/tests/util/statefile/.gitignore | 1 + searchlib/src/tests/util/statefile/CMakeLists.txt | 9 + searchlib/src/tests/util/statefile/DESC | 1 + searchlib/src/tests/util/statefile/FILES | 1 + .../src/tests/util/statefile/statefile_test.cpp | 294 + searchlib/src/vespa/searchlib/.gitignore | 4 + searchlib/src/vespa/searchlib/CMakeLists.txt | 31 + .../src/vespa/searchlib/aggregation/.gitignore | 6 + .../src/vespa/searchlib/aggregation/CMakeLists.txt | 16 + searchlib/src/vespa/searchlib/aggregation/OWNERS | 1 + .../vespa/searchlib/aggregation/aggregation.cpp | 448 + .../src/vespa/searchlib/aggregation/aggregation.h | 22 + .../searchlib/aggregation/aggregationresult.h | 116 + .../aggregation/averageaggregationresult.h | 27 + .../searchlib/aggregation/countaggregationresult.h | 27 + .../aggregation/expressioncountaggregationresult.h | 36 + .../src/vespa/searchlib/aggregation/forcelink.hpp | 29 + .../src/vespa/searchlib/aggregation/fs4hit.cpp | 61 + searchlib/src/vespa/searchlib/aggregation/fs4hit.h | 39 + .../src/vespa/searchlib/aggregation/group.cpp | 671 + searchlib/src/vespa/searchlib/aggregation/group.h | 201 + .../src/vespa/searchlib/aggregation/grouping.cpp | 357 + .../src/vespa/searchlib/aggregation/grouping.h | 93 + .../vespa/searchlib/aggregation/groupinglevel.cpp | 109 + .../vespa/searchlib/aggregation/groupinglevel.h | 121 + searchlib/src/vespa/searchlib/aggregation/hit.cpp | 46 + searchlib/src/vespa/searchlib/aggregation/hit.h | 34 + .../src/vespa/searchlib/aggregation/hitlist.cpp | 152 + .../src/vespa/searchlib/aggregation/hitlist.h | 74 + .../aggregation/hitsaggregationresult.cpp | 119 + .../searchlib/aggregation/hitsaggregationresult.h | 76 + .../searchlib/aggregation/maxaggregationresult.h | 24 + .../searchlib/aggregation/minaggregationresult.h | 22 + .../src/vespa/searchlib/aggregation/modifiers.cpp | 54 + .../src/vespa/searchlib/aggregation/modifiers.h | 19 + .../vespa/searchlib/aggregation/perdocexpression.h | 46 + .../src/vespa/searchlib/aggregation/predicates.h | 47 + .../src/vespa/searchlib/aggregation/rawrank.cpp | 51 + .../src/vespa/searchlib/aggregation/rawrank.h | 35 + .../searchlib/aggregation/sumaggregationresult.h | 24 + .../src/vespa/searchlib/aggregation/vdshit.cpp | 45 + searchlib/src/vespa/searchlib/aggregation/vdshit.h | 40 + .../searchlib/aggregation/xoraggregationresult.h | 26 + searchlib/src/vespa/searchlib/attribute/.gitignore | 6 + .../src/vespa/searchlib/attribute/CMakeLists.txt | 88 + searchlib/src/vespa/searchlib/attribute/OWNERS | 3 + .../vespa/searchlib/attribute/address_space.cpp | 20 + .../src/vespa/searchlib/attribute/address_space.h | 36 + .../searchlib/attribute/address_space_usage.cpp | 32 + .../searchlib/attribute/address_space_usage.h | 29 + .../src/vespa/searchlib/attribute/attribute.cpp | 11 + .../src/vespa/searchlib/attribute/attribute.h | 8 + .../attribute/attribute_blueprint_factory.cpp | 636 + .../attribute/attribute_blueprint_factory.h | 21 + .../attribute/attribute_weighted_set_blueprint.cpp | 187 + .../attribute/attribute_weighted_set_blueprint.h | 36 + .../vespa/searchlib/attribute/attributecontext.cpp | 72 + .../vespa/searchlib/attribute/attributecontext.h | 43 + .../vespa/searchlib/attribute/attributefactory.cpp | 58 + .../vespa/searchlib/attribute/attributefactory.h | 33 + .../vespa/searchlib/attribute/attributefile.cpp | 457 + .../src/vespa/searchlib/attribute/attributefile.h | 113 + .../attribute/attributefilebufferwriter.cpp | 48 + .../attribute/attributefilebufferwriter.h | 39 + .../attribute/attributefilesavetarget.cpp | 105 + .../searchlib/attribute/attributefilesavetarget.h | 41 + .../searchlib/attribute/attributefilewriter.cpp | 213 + .../searchlib/attribute/attributefilewriter.h | 58 + .../vespa/searchlib/attribute/attributeguard.cpp | 41 + .../src/vespa/searchlib/attribute/attributeguard.h | 78 + .../searchlib/attribute/attributeiterators.cpp | 237 + .../vespa/searchlib/attribute/attributeiterators.h | 567 + .../searchlib/attribute/attributeiterators.hpp | 62 + .../vespa/searchlib/attribute/attributemanager.cpp | 279 + .../vespa/searchlib/attribute/attributemanager.h | 73 + .../attribute/attributememoryfilebufferwriter.cpp | 31 + .../attribute/attributememoryfilebufferwriter.h | 25 + .../attribute/attributememoryfilewriter.cpp | 60 + .../attribute/attributememoryfilewriter.h | 27 + .../attribute/attributememorysavetarget.cpp | 78 + .../attribute/attributememorysavetarget.h | 54 + .../vespa/searchlib/attribute/attributesaver.cpp | 40 + .../src/vespa/searchlib/attribute/attributesaver.h | 35 + .../vespa/searchlib/attribute/attributevector.cpp | 1110 ++ .../vespa/searchlib/attribute/attributevector.h | 845 ++ .../vespa/searchlib/attribute/attributevector.hpp | 169 + .../src/vespa/searchlib/attribute/attrvector.cpp | 188 + .../src/vespa/searchlib/attribute/attrvector.h | 235 + .../src/vespa/searchlib/attribute/attrvector.hpp | 185 + .../src/vespa/searchlib/attribute/changevector.cpp | 21 + .../src/vespa/searchlib/attribute/changevector.h | 230 + .../vespa/searchlib/attribute/configconverter.cpp | 84 + .../vespa/searchlib/attribute/configconverter.h | 21 + .../searchlib/attribute/createarrayfastsearch.cpp | 69 + .../vespa/searchlib/attribute/createarraystd.cpp | 63 + .../searchlib/attribute/createsetfastsearch.cpp | 71 + .../src/vespa/searchlib/attribute/createsetstd.cpp | 62 + .../searchlib/attribute/createsinglefastsearch.cpp | 65 + .../vespa/searchlib/attribute/createsinglestd.cpp | 68 + .../src/vespa/searchlib/attribute/defines.cpp | 11 + searchlib/src/vespa/searchlib/attribute/defines.h | 11 + .../src/vespa/searchlib/attribute/diversity.h | 226 + .../vespa/searchlib/attribute/dociditerator.cpp | 11 + .../src/vespa/searchlib/attribute/dociditerator.h | 105 + .../vespa/searchlib/attribute/enumattribute.cpp | 12 + .../src/vespa/searchlib/attribute/enumattribute.h | 98 + .../vespa/searchlib/attribute/enumattribute.hpp | 147 + .../searchlib/attribute/enumattributesaver.cpp | 51 + .../vespa/searchlib/attribute/enumattributesaver.h | 34 + .../vespa/searchlib/attribute/enumcomparator.cpp | 83 + .../src/vespa/searchlib/attribute/enumcomparator.h | 195 + .../searchlib/attribute/enumhintsearchcontext.cpp | 79 + .../searchlib/attribute/enumhintsearchcontext.h | 49 + .../src/vespa/searchlib/attribute/enumstore.cpp | 361 + .../src/vespa/searchlib/attribute/enumstore.h | 501 + .../src/vespa/searchlib/attribute/enumstore.hpp | 502 + .../vespa/searchlib/attribute/enumstorebase.cpp | 657 + .../src/vespa/searchlib/attribute/enumstorebase.h | 622 + .../searchlib/attribute/extendableattributes.cpp | 162 + .../searchlib/attribute/extendableattributes.h | 245 + .../searchlib/attribute/fixedsourceselector.cpp | 90 + .../searchlib/attribute/fixedsourceselector.h | 48 + .../vespa/searchlib/attribute/flagattribute.cpp | 283 + .../src/vespa/searchlib/attribute/flagattribute.h | 73 + .../src/vespa/searchlib/attribute/floatbase.cpp | 91 + .../src/vespa/searchlib/attribute/floatbase.h | 123 + .../attribute/i_document_weight_attribute.cpp | 4 + .../attribute/i_document_weight_attribute.h | 32 + .../searchlib/attribute/iattributefilewriter.h | 38 + .../searchlib/attribute/iattributemanager.cpp | 11 + .../vespa/searchlib/attribute/iattributemanager.h | 57 + .../searchlib/attribute/iattributesavetarget.cpp | 11 + .../searchlib/attribute/iattributesavetarget.h | 161 + .../src/vespa/searchlib/attribute/integerbase.cpp | 90 + .../src/vespa/searchlib/attribute/integerbase.h | 136 + .../src/vespa/searchlib/attribute/interlock.h | 65 + .../attribute/ipostinglistattributebase.h | 33 + .../attribute/ipostinglistsearchcontext.cpp | 11 + .../attribute/ipostinglistsearchcontext.h | 62 + .../vespa/searchlib/attribute/iterator_pack.cpp | 10 + .../src/vespa/searchlib/attribute/iterator_pack.h | 56 + .../vespa/searchlib/attribute/loadedenumvalue.cpp | 28 + .../vespa/searchlib/attribute/loadedenumvalue.h | 177 + .../searchlib/attribute/loadednumericvalue.cpp | 124 + .../vespa/searchlib/attribute/loadednumericvalue.h | 69 + .../searchlib/attribute/loadedstringvalue.cpp | 49 + .../vespa/searchlib/attribute/loadedstringvalue.h | 95 + .../src/vespa/searchlib/attribute/loadedvalue.cpp | 11 + .../src/vespa/searchlib/attribute/loadedvalue.h | 163 + .../searchlib/attribute/multienumattribute.cpp | 12 + .../vespa/searchlib/attribute/multienumattribute.h | 120 + .../searchlib/attribute/multienumattribute.hpp | 238 + .../attribute/multienumattributesaver.cpp | 122 + .../searchlib/attribute/multienumattributesaver.h | 40 + .../searchlib/attribute/multinumericattribute.cpp | 12 + .../searchlib/attribute/multinumericattribute.h | 333 + .../searchlib/attribute/multinumericattribute.hpp | 197 + .../attribute/multinumericattributesaver.cpp | 130 + .../attribute/multinumericattributesaver.h | 38 + .../attribute/multinumericenumattribute.cpp | 12 + .../attribute/multinumericenumattribute.h | 289 + .../attribute/multinumericenumattribute.hpp | 145 + .../attribute/multinumericpostattribute.cpp | 12 + .../attribute/multinumericpostattribute.h | 133 + .../attribute/multinumericpostattribute.hpp | 143 + .../searchlib/attribute/multistringattribute.cpp | 17 + .../searchlib/attribute/multistringattribute.h | 173 + .../searchlib/attribute/multistringattribute.hpp | 146 + .../attribute/multistringpostattribute.cpp | 18 + .../searchlib/attribute/multistringpostattribute.h | 123 + .../attribute/multistringpostattribute.hpp | 152 + .../src/vespa/searchlib/attribute/multivalue.h | 63 + .../searchlib/attribute/multivalueattribute.cpp | 12 + .../searchlib/attribute/multivalueattribute.h | 78 + .../searchlib/attribute/multivalueattribute.hpp | 203 + .../attribute/multivalueattributesaver.cpp | 32 + .../searchlib/attribute/multivalueattributesaver.h | 36 + .../attribute/multivalueattributesaverutils.h | 97 + .../searchlib/attribute/multivaluemapping.cpp | 858 ++ .../vespa/searchlib/attribute/multivaluemapping.h | 1498 +++ .../searchlib/attribute/multivaluemapping.hpp | 50 + .../attribute/not_implemented_attribute.h | 182 + .../src/vespa/searchlib/attribute/numericbase.cpp | 74 + .../src/vespa/searchlib/attribute/numericbase.h | 147 + .../vespa/searchlib/attribute/postingchange.cpp | 275 + .../src/vespa/searchlib/attribute/postingchange.h | 86 + .../searchlib/attribute/postinglistattribute.cpp | 451 + .../searchlib/attribute/postinglistattribute.h | 165 + .../attribute/postinglistsearchcontext.cpp | 93 + .../searchlib/attribute/postinglistsearchcontext.h | 388 + .../attribute/postinglistsearchcontext.hpp | 388 + .../searchlib/attribute/postinglisttraits.cpp | 11 + .../vespa/searchlib/attribute/postinglisttraits.h | 56 + .../src/vespa/searchlib/attribute/postingstore.cpp | 638 + .../src/vespa/searchlib/attribute/postingstore.h | 361 + .../searchlib/attribute/predicate_attribute.cpp | 277 + .../searchlib/attribute/predicate_attribute.h | 104 + .../searchlib/attribute/singleenumattribute.cpp | 43 + .../searchlib/attribute/singleenumattribute.h | 152 + .../searchlib/attribute/singleenumattribute.hpp | 310 + .../attribute/singleenumattributesaver.cpp | 48 + .../searchlib/attribute/singleenumattributesaver.h | 33 + .../searchlib/attribute/singlenumericattribute.cpp | 12 + .../searchlib/attribute/singlenumericattribute.h | 235 + .../searchlib/attribute/singlenumericattribute.hpp | 188 + .../attribute/singlenumericattributesaver.cpp | 48 + .../attribute/singlenumericattributesaver.h | 31 + .../attribute/singlenumericenumattribute.cpp | 12 + .../attribute/singlenumericenumattribute.h | 191 + .../attribute/singlenumericenumattribute.hpp | 172 + .../attribute/singlenumericpostattribute.cpp | 12 + .../attribute/singlenumericpostattribute.h | 121 + .../attribute/singlenumericpostattribute.hpp | 153 + .../attribute/singlesmallnumericattribute.cpp | 242 + .../attribute/singlesmallnumericattribute.h | 313 + .../searchlib/attribute/singlestringattribute.cpp | 14 + .../searchlib/attribute/singlestringattribute.h | 115 + .../searchlib/attribute/singlestringattribute.hpp | 80 + .../attribute/singlestringpostattribute.cpp | 12 + .../attribute/singlestringpostattribute.h | 127 + .../attribute/singlestringpostattribute.hpp | 150 + .../vespa/searchlib/attribute/sourceselector.cpp | 136 + .../src/vespa/searchlib/attribute/sourceselector.h | 85 + .../vespa/searchlib/attribute/stringattribute.cpp | 11 + .../vespa/searchlib/attribute/stringattribute.h | 12 + .../src/vespa/searchlib/attribute/stringbase.cpp | 542 + .../src/vespa/searchlib/attribute/stringbase.h | 201 + .../vespa/searchlib/attribute/tensorattribute.cpp | 270 + .../vespa/searchlib/attribute/tensorattribute.h | 55 + .../searchlib/attribute/tensorattributesaver.cpp | 51 + .../searchlib/attribute/tensorattributesaver.h | 37 + .../src/vespa/searchlib/attribute/tensorstore.cpp | 133 + .../src/vespa/searchlib/attribute/tensorstore.h | 93 + .../src/vespa/searchlib/bitcompression/.gitignore | 3 + .../vespa/searchlib/bitcompression/CMakeLists.txt | 9 + .../src/vespa/searchlib/bitcompression/OWNERS | 1 + .../vespa/searchlib/bitcompression/compression.cpp | 450 + .../vespa/searchlib/bitcompression/compression.h | 1933 +++ .../searchlib/bitcompression/countcompression.cpp | 241 + .../searchlib/bitcompression/countcompression.h | 110 + .../vespa/searchlib/bitcompression/pagedict4.cpp | 2586 ++++ .../src/vespa/searchlib/bitcompression/pagedict4.h | 836 ++ .../searchlib/bitcompression/posocccompression.cpp | 1355 ++ .../searchlib/bitcompression/posocccompression.h | 616 + searchlib/src/vespa/searchlib/btree/CMakeLists.txt | 19 + searchlib/src/vespa/searchlib/btree/OWNERS | 2 + searchlib/src/vespa/searchlib/btree/btree.h | 170 + searchlib/src/vespa/searchlib/btree/btree.hpp | 30 + .../src/vespa/searchlib/btree/btreeaggregator.cpp | 25 + .../src/vespa/searchlib/btree/btreeaggregator.h | 65 + .../src/vespa/searchlib/btree/btreeaggregator.hpp | 84 + .../src/vespa/searchlib/btree/btreebuilder.cpp | 29 + searchlib/src/vespa/searchlib/btree/btreebuilder.h | 100 + .../src/vespa/searchlib/btree/btreebuilder.hpp | 459 + .../src/vespa/searchlib/btree/btreeinserter.cpp | 24 + .../src/vespa/searchlib/btree/btreeinserter.h | 62 + .../src/vespa/searchlib/btree/btreeinserter.hpp | 113 + .../src/vespa/searchlib/btree/btreeiterator.cpp | 26 + .../src/vespa/searchlib/btree/btreeiterator.h | 885 ++ .../src/vespa/searchlib/btree/btreeiterator.hpp | 1330 ++ searchlib/src/vespa/searchlib/btree/btreenode.cpp | 36 + searchlib/src/vespa/searchlib/btree/btreenode.h | 784 ++ searchlib/src/vespa/searchlib/btree/btreenode.hpp | 402 + .../vespa/searchlib/btree/btreenodeallocator.cpp | 27 + .../src/vespa/searchlib/btree/btreenodeallocator.h | 271 + .../vespa/searchlib/btree/btreenodeallocator.hpp | 437 + .../src/vespa/searchlib/btree/btreenodestore.cpp | 117 + .../src/vespa/searchlib/btree/btreenodestore.h | 399 + .../src/vespa/searchlib/btree/btreenodestore.hpp | 98 + .../src/vespa/searchlib/btree/btreeremover.cpp | 24 + searchlib/src/vespa/searchlib/btree/btreeremover.h | 104 + .../src/vespa/searchlib/btree/btreeremover.hpp | 185 + searchlib/src/vespa/searchlib/btree/btreeroot.cpp | 26 + searchlib/src/vespa/searchlib/btree/btreeroot.h | 253 + searchlib/src/vespa/searchlib/btree/btreeroot.hpp | 486 + .../src/vespa/searchlib/btree/btreerootbase.cpp | 26 + .../src/vespa/searchlib/btree/btreerootbase.h | 121 + .../src/vespa/searchlib/btree/btreerootbase.hpp | 90 + searchlib/src/vespa/searchlib/btree/btreestore.cpp | 36 + searchlib/src/vespa/searchlib/btree/btreestore.h | 511 + searchlib/src/vespa/searchlib/btree/btreestore.hpp | 1005 ++ searchlib/src/vespa/searchlib/btree/btreetraits.h | 25 + .../src/vespa/searchlib/btree/bufferstate.cpp | 351 + searchlib/src/vespa/searchlib/btree/bufferstate.h | 389 + searchlib/src/vespa/searchlib/btree/datastore.cpp | 16 + searchlib/src/vespa/searchlib/btree/datastore.h | 139 + searchlib/src/vespa/searchlib/btree/datastore.hpp | 248 + .../src/vespa/searchlib/btree/datastorebase.cpp | 426 + .../src/vespa/searchlib/btree/datastorebase.h | 404 + searchlib/src/vespa/searchlib/btree/entryref.h | 64 + .../src/vespa/searchlib/btree/minmaxaggrcalc.h | 82 + .../src/vespa/searchlib/btree/minmaxaggregated.h | 113 + searchlib/src/vespa/searchlib/btree/noaggrcalc.h | 98 + searchlib/src/vespa/searchlib/btree/noaggregated.h | 21 + searchlib/src/vespa/searchlib/common/.gitignore | 6 + .../src/vespa/searchlib/common/CMakeLists.txt | 30 + .../vespa/searchlib/common/allocatedbitvector.cpp | 156 + .../vespa/searchlib/common/allocatedbitvector.h | 92 + searchlib/src/vespa/searchlib/common/base.h | 16 + searchlib/src/vespa/searchlib/common/bitvector.cpp | 421 + searchlib/src/vespa/searchlib/common/bitvector.h | 354 + .../src/vespa/searchlib/common/bitvectorcache.cpp | 218 + .../src/vespa/searchlib/common/bitvectorcache.h | 86 + .../vespa/searchlib/common/bitvectoriterator.cpp | 116 + .../src/vespa/searchlib/common/bitvectoriterator.h | 42 + .../vespa/searchlib/common/condensedbitvectors.cpp | 148 + .../vespa/searchlib/common/condensedbitvectors.h | 39 + searchlib/src/vespa/searchlib/common/converters.h | 69 + searchlib/src/vespa/searchlib/common/docstamp.h | 18 + .../vespa/searchlib/common/documentlocations.cpp | 14 + .../src/vespa/searchlib/common/documentlocations.h | 43 + .../src/vespa/searchlib/common/documentsummary.cpp | 63 + .../src/vespa/searchlib/common/documentsummary.h | 24 + searchlib/src/vespa/searchlib/common/feature.h | 10 + .../src/vespa/searchlib/common/featureset.cpp | 90 + searchlib/src/vespa/searchlib/common/featureset.h | 128 + .../vespa/searchlib/common/fileheadercontext.cpp | 51 + .../src/vespa/searchlib/common/fileheadercontext.h | 42 + .../searchlib/common/foregroundtaskexecutor.cpp | 47 + .../searchlib/common/foregroundtaskexecutor.h | 35 + searchlib/src/vespa/searchlib/common/fslimits.h | 37 + searchlib/src/vespa/searchlib/common/gid.h | 54 + .../vespa/searchlib/common/growablebitvector.cpp | 55 + .../src/vespa/searchlib/common/growablebitvector.h | 27 + searchlib/src/vespa/searchlib/common/hitrank.h | 12 + .../src/vespa/searchlib/common/identifiable.h | 167 + .../vespa/searchlib/common/idestructorcallback.h | 20 + .../vespa/searchlib/common/idocumentmetastore.h | 152 + .../src/vespa/searchlib/common/indexmetainfo.cpp | 354 + .../src/vespa/searchlib/common/indexmetainfo.h | 63 + .../searchlib/common/isequencedtaskexecutor.h | 66 + searchlib/src/vespa/searchlib/common/lambdatask.h | 28 + .../src/vespa/searchlib/common/lid_usage_stats.h | 66 + searchlib/src/vespa/searchlib/common/location.cpp | 205 + searchlib/src/vespa/searchlib/common/location.h | 56 + .../vespa/searchlib/common/locationiterators.cpp | 121 + .../src/vespa/searchlib/common/locationiterators.h | 12 + searchlib/src/vespa/searchlib/common/mapnames.cpp | 14 + searchlib/src/vespa/searchlib/common/mapnames.h | 33 + searchlib/src/vespa/searchlib/common/packets.cpp | 2198 +++ searchlib/src/vespa/searchlib/common/packets.h | 593 + .../vespa/searchlib/common/partialbitvector.cpp | 24 + .../src/vespa/searchlib/common/partialbitvector.h | 38 + searchlib/src/vespa/searchlib/common/range.h | 30 + searchlib/src/vespa/searchlib/common/rankedhit.h | 35 + searchlib/src/vespa/searchlib/common/rcuvector.h | 354 + searchlib/src/vespa/searchlib/common/reserved.h | 19 + searchlib/src/vespa/searchlib/common/resultset.cpp | 149 + searchlib/src/vespa/searchlib/common/resultset.h | 51 + .../vespa/searchlib/common/scheduletaskcallback.h | 32 + .../searchlib/common/sequencedtaskexecutor.cpp | 65 + .../vespa/searchlib/common/sequencedtaskexecutor.h | 36 + .../common/sequencedtaskexecutorobserver.h | 44 + searchlib/src/vespa/searchlib/common/serialnum.h | 13 + .../common/serialnumfileheadercontext.cpp | 36 + .../searchlib/common/serialnumfileheadercontext.h | 31 + searchlib/src/vespa/searchlib/common/sort.cpp | 21 + searchlib/src/vespa/searchlib/common/sort.h | 537 + searchlib/src/vespa/searchlib/common/sortdata.cpp | 65 + searchlib/src/vespa/searchlib/common/sortdata.h | 99 + .../src/vespa/searchlib/common/sortresults.cpp | 507 + searchlib/src/vespa/searchlib/common/sortresults.h | 157 + searchlib/src/vespa/searchlib/common/sortspec.cpp | 180 + searchlib/src/vespa/searchlib/common/sortspec.h | 35 + searchlib/src/vespa/searchlib/common/transport.h | 401 + .../src/vespa/searchlib/common/tunefileinfo.cpp | 11 + .../src/vespa/searchlib/common/tunefileinfo.h | 431 + searchlib/src/vespa/searchlib/config/.gitignore | 5 + .../src/vespa/searchlib/config/CMakeLists.txt | 7 + .../src/vespa/searchlib/config/translogserver.def | 24 + searchlib/src/vespa/searchlib/diskindex/.gitignore | 3 + .../src/vespa/searchlib/diskindex/CMakeLists.txt | 28 + searchlib/src/vespa/searchlib/diskindex/OWNERS | 1 + .../searchlib/diskindex/bitvectordictionary.cpp | 108 + .../searchlib/diskindex/bitvectordictionary.h | 81 + .../vespa/searchlib/diskindex/bitvectorfile.cpp | 238 + .../src/vespa/searchlib/diskindex/bitvectorfile.h | 204 + .../vespa/searchlib/diskindex/bitvectoridxfile.cpp | 233 + .../vespa/searchlib/diskindex/bitvectoridxfile.h | 122 + .../searchlib/diskindex/bitvectorkeyscope.cpp | 73 + .../vespa/searchlib/diskindex/bitvectorkeyscope.h | 43 + .../vespa/searchlib/diskindex/checkpointfile.cpp | 189 + .../src/vespa/searchlib/diskindex/checkpointfile.h | 73 + .../searchlib/diskindex/dictionarywordreader.cpp | 71 + .../searchlib/diskindex/dictionarywordreader.h | 135 + .../src/vespa/searchlib/diskindex/diskindex.cpp | 476 + .../src/vespa/searchlib/diskindex/diskindex.h | 193 + .../searchlib/diskindex/disktermblueprint.cpp | 124 + .../vespa/searchlib/diskindex/disktermblueprint.h | 53 + .../src/vespa/searchlib/diskindex/docidmapper.cpp | 73 + .../src/vespa/searchlib/diskindex/docidmapper.h | 91 + .../src/vespa/searchlib/diskindex/extposocc.cpp | 157 + .../src/vespa/searchlib/diskindex/extposocc.h | 56 + .../src/vespa/searchlib/diskindex/fieldreader.cpp | 385 + .../src/vespa/searchlib/diskindex/fieldreader.h | 216 + .../src/vespa/searchlib/diskindex/fieldwriter.cpp | 258 + .../src/vespa/searchlib/diskindex/fieldwriter.h | 138 + .../src/vespa/searchlib/diskindex/fileheader.cpp | 165 + .../src/vespa/searchlib/diskindex/fileheader.h | 91 + searchlib/src/vespa/searchlib/diskindex/fusion.cpp | 606 + searchlib/src/vespa/searchlib/diskindex/fusion.h | 265 + .../src/vespa/searchlib/diskindex/indexbuilder.cpp | 720 + .../src/vespa/searchlib/diskindex/indexbuilder.h | 124 + .../vespa/searchlib/diskindex/pagedict4file.cpp | 738 ++ .../src/vespa/searchlib/diskindex/pagedict4file.h | 239 + .../searchlib/diskindex/pagedict4randread.cpp | 300 + .../vespa/searchlib/diskindex/pagedict4randread.h | 85 + .../vespa/searchlib/diskindex/wordnummapper.cpp | 110 + .../src/vespa/searchlib/diskindex/wordnummapper.h | 137 + .../src/vespa/searchlib/diskindex/zcposocc.cpp | 137 + searchlib/src/vespa/searchlib/diskindex/zcposocc.h | 83 + .../searchlib/diskindex/zcposocciterators.cpp | 89 + .../vespa/searchlib/diskindex/zcposocciterators.h | 93 + .../vespa/searchlib/diskindex/zcposoccrandread.cpp | 381 + .../vespa/searchlib/diskindex/zcposoccrandread.h | 112 + .../src/vespa/searchlib/diskindex/zcposting.cpp | 1470 ++ .../src/vespa/searchlib/diskindex/zcposting.h | 495 + .../searchlib/diskindex/zcpostingiterators.cpp | 700 + .../vespa/searchlib/diskindex/zcpostingiterators.h | 200 + searchlib/src/vespa/searchlib/docstore/.gitignore | 6 + .../src/vespa/searchlib/docstore/CMakeLists.txt | 18 + searchlib/src/vespa/searchlib/docstore/OWNERS | 2 + .../src/vespa/searchlib/docstore/bytecomplens.cpp | 260 + .../src/vespa/searchlib/docstore/bytecomplens.h | 110 + .../src/vespa/searchlib/docstore/cachestats.h | 41 + searchlib/src/vespa/searchlib/docstore/chunk.cpp | 139 + searchlib/src/vespa/searchlib/docstore/chunk.h | 108 + .../src/vespa/searchlib/docstore/chunkformat.cpp | 158 + .../src/vespa/searchlib/docstore/chunkformat.h | 106 + .../src/vespa/searchlib/docstore/chunkformats.cpp | 73 + .../src/vespa/searchlib/docstore/chunkformats.h | 48 + .../docstore/data_store_file_chunk_id.cpp | 17 + .../searchlib/docstore/data_store_file_chunk_id.h | 28 + .../docstore/data_store_file_chunk_stats.h | 30 + .../searchlib/docstore/data_store_storage_stats.h | 36 + .../docstore/document_store_visitor_progress.cpp | 31 + .../docstore/document_store_visitor_progress.h | 23 + .../src/vespa/searchlib/docstore/documentstore.cpp | 392 + .../src/vespa/searchlib/docstore/documentstore.h | 244 + .../src/vespa/searchlib/docstore/filechunk.cpp | 676 + searchlib/src/vespa/searchlib/docstore/filechunk.h | 338 + .../src/vespa/searchlib/docstore/ibucketizer.h | 25 + .../src/vespa/searchlib/docstore/idatastore.cpp | 19 + .../src/vespa/searchlib/docstore/idatastore.h | 187 + .../vespa/searchlib/docstore/idocumentstore.cpp | 23 + .../src/vespa/searchlib/docstore/idocumentstore.h | 203 + .../src/vespa/searchlib/docstore/liddatastore.h | 58 + .../src/vespa/searchlib/docstore/logdatastore.cpp | 1240 ++ .../src/vespa/searchlib/docstore/logdatastore.h | 304 + .../vespa/searchlib/docstore/logdocumentstore.cpp | 31 + .../vespa/searchlib/docstore/logdocumentstore.h | 67 + .../searchlib/docstore/writeablefilechunk.cpp | 868 ++ .../vespa/searchlib/docstore/writeablefilechunk.h | 185 + searchlib/src/vespa/searchlib/engine/.gitignore | 3 + .../src/vespa/searchlib/engine/CMakeLists.txt | 19 + searchlib/src/vespa/searchlib/engine/OWNERS | 1 + .../src/vespa/searchlib/engine/create-class-cpp.sh | 29 + .../src/vespa/searchlib/engine/create-class-h.sh | 27 + .../src/vespa/searchlib/engine/create-interface.sh | 23 + searchlib/src/vespa/searchlib/engine/docsumapi.cpp | 17 + searchlib/src/vespa/searchlib/engine/docsumapi.h | 74 + .../src/vespa/searchlib/engine/docsumreply.cpp | 21 + searchlib/src/vespa/searchlib/engine/docsumreply.h | 47 + .../src/vespa/searchlib/engine/docsumrequest.cpp | 38 + .../src/vespa/searchlib/engine/docsumrequest.h | 99 + .../src/vespa/searchlib/engine/errorcodes.cpp | 38 + searchlib/src/vespa/searchlib/engine/errorcodes.h | 38 + searchlib/src/vespa/searchlib/engine/monitorapi.h | 66 + .../src/vespa/searchlib/engine/monitorreply.cpp | 26 + .../src/vespa/searchlib/engine/monitorreply.h | 31 + .../src/vespa/searchlib/engine/monitorrequest.cpp | 17 + .../src/vespa/searchlib/engine/monitorrequest.h | 21 + .../src/vespa/searchlib/engine/packetconverter.cpp | 261 + .../src/vespa/searchlib/engine/packetconverter.h | 177 + .../src/vespa/searchlib/engine/propertiesmap.cpp | 30 + .../src/vespa/searchlib/engine/propertiesmap.h | 129 + searchlib/src/vespa/searchlib/engine/request.cpp | 41 + searchlib/src/vespa/searchlib/engine/request.h | 43 + searchlib/src/vespa/searchlib/engine/searchapi.h | 66 + .../src/vespa/searchlib/engine/searchreply.cpp | 56 + searchlib/src/vespa/searchlib/engine/searchreply.h | 84 + .../src/vespa/searchlib/engine/searchrequest.cpp | 33 + .../src/vespa/searchlib/engine/searchrequest.h | 82 + .../vespa/searchlib/engine/source_description.cpp | 11 + .../vespa/searchlib/engine/source_description.h | 17 + searchlib/src/vespa/searchlib/engine/tracereply.h | 16 + .../vespa/searchlib/engine/transport_metrics.cpp | 35 + .../src/vespa/searchlib/engine/transport_metrics.h | 37 + .../src/vespa/searchlib/engine/transportserver.cpp | 427 + .../src/vespa/searchlib/engine/transportserver.h | 334 + .../src/vespa/searchlib/expression/.gitignore | 6 + .../src/vespa/searchlib/expression/CMakeLists.txt | 32 + searchlib/src/vespa/searchlib/expression/OWNERS | 1 + .../vespa/searchlib/expression/addfunctionnode.h | 22 + .../searchlib/expression/aggregationrefnode.cpp | 69 + .../searchlib/expression/aggregationrefnode.h | 50 + .../vespa/searchlib/expression/andfunctionnode.h | 24 + .../expression/arrayatlookupfunctionnode.cpp | 163 + .../expression/arrayatlookupfunctionnode.h | 46 + .../searchlib/expression/arrayoperationnode.cpp | 66 + .../searchlib/expression/arrayoperationnode.h | 46 + .../vespa/searchlib/expression/attributenode.cpp | 283 + .../src/vespa/searchlib/expression/attributenode.h | 158 + .../searchlib/expression/binaryfunctionnode.h | 24 + .../vespa/searchlib/expression/bitfunctionnode.h | 24 + .../searchlib/expression/bucketresultnode.cpp | 17 + .../vespa/searchlib/expression/bucketresultnode.h | 27 + .../vespa/searchlib/expression/catfunctionnode.h | 23 + .../vespa/searchlib/expression/catserializer.cpp | 79 + .../src/vespa/searchlib/expression/catserializer.h | 38 + .../src/vespa/searchlib/expression/constantnode.h | 27 + .../searchlib/expression/debugwaitfunctionnode.cpp | 78 + .../searchlib/expression/debugwaitfunctionnode.h | 30 + .../searchlib/expression/dividefunctionnode.h | 22 + .../searchlib/expression/documentaccessornode.h | 37 + .../searchlib/expression/documentfieldnode.cpp | 340 + .../vespa/searchlib/expression/documentfieldnode.h | 87 + .../vespa/searchlib/expression/enumresultnode.h | 30 + .../vespa/searchlib/expression/expressionnode.h | 58 + .../vespa/searchlib/expression/expressiontree.cpp | 202 + .../vespa/searchlib/expression/expressiontree.h | 75 + .../expression/fixedwidthbucketfunctionnode.cpp | 134 + .../expression/fixedwidthbucketfunctionnode.h | 71 + .../searchlib/expression/floatbucketresultnode.cpp | 85 + .../searchlib/expression/floatbucketresultnode.h | 53 + .../vespa/searchlib/expression/floatresultnode.h | 56 + .../src/vespa/searchlib/expression/forcelink.hpp | 49 + .../src/vespa/searchlib/expression/functionnode.h | 30 + .../vespa/searchlib/expression/functionnodes.cpp | 624 + .../getdocidnamespacespecificfunctionnode.h | 28 + .../expression/getymumchecksumfunctionnode.h | 26 + .../expression/integerbucketresultnode.cpp | 73 + .../searchlib/expression/integerbucketresultnode.h | 52 + .../vespa/searchlib/expression/integerresultnode.h | 138 + .../expression/interpolatedlookupfunctionnode.cpp | 127 + .../expression/interpolatedlookupfunctionnode.h | 40 + .../searchlib/expression/mathfunctionnode.cpp | 70 + .../vespa/searchlib/expression/mathfunctionnode.h | 26 + .../vespa/searchlib/expression/maxfunctionnode.h | 22 + .../searchlib/expression/md5bitfunctionnode.h | 21 + .../vespa/searchlib/expression/minfunctionnode.h | 22 + .../searchlib/expression/modulofunctionnode.h | 22 + .../searchlib/expression/multiargfunctionnode.h | 41 + .../searchlib/expression/multiplyfunctionnode.h | 22 + .../searchlib/expression/negatefunctionnode.h | 21 + .../expression/normalizesubjectfunctionnode.h | 22 + .../vespa/searchlib/expression/nullresultnode.h | 36 + .../searchlib/expression/numelemfunctionnode.h | 22 + .../searchlib/expression/numericfunctionnode.cpp | 156 + .../searchlib/expression/numericfunctionnode.h | 178 + .../vespa/searchlib/expression/numericresultnode.h | 23 + .../vespa/searchlib/expression/orfunctionnode.h | 22 + .../searchlib/expression/perdocexpression.cpp | 36 + .../expression/positiveinfinityresultnode.h | 30 + .../searchlib/expression/rangebucketpredef.cpp | 133 + .../vespa/searchlib/expression/rangebucketpredef.h | 75 + .../searchlib/expression/rawbucketresultnode.cpp | 93 + .../searchlib/expression/rawbucketresultnode.h | 37 + .../src/vespa/searchlib/expression/rawresultnode.h | 53 + .../src/vespa/searchlib/expression/relevancenode.h | 27 + .../src/vespa/searchlib/expression/resultnode.cpp | 73 + .../src/vespa/searchlib/expression/resultnode.h | 128 + .../src/vespa/searchlib/expression/resultnodes.cpp | 410 + .../vespa/searchlib/expression/resultvector.cpp | 61 + .../src/vespa/searchlib/expression/resultvector.h | 399 + .../searchlib/expression/reversefunctionnode.h | 21 + .../src/vespa/searchlib/expression/serializer.h | 34 + .../vespa/searchlib/expression/singleresultnode.h | 30 + .../vespa/searchlib/expression/sortfunctionnode.h | 21 + .../searchlib/expression/strcatfunctionnode.h | 22 + .../searchlib/expression/strcatserializer.cpp | 54 + .../vespa/searchlib/expression/strcatserializer.h | 25 + .../expression/stringbucketresultnode.cpp | 93 + .../searchlib/expression/stringbucketresultnode.h | 44 + .../vespa/searchlib/expression/stringresultnode.h | 52 + .../searchlib/expression/strlenfunctionnode.h | 22 + .../src/vespa/searchlib/expression/timestamp.cpp | 108 + .../src/vespa/searchlib/expression/timestamp.h | 78 + .../searchlib/expression/tofloatfunctionnode.h | 22 + .../vespa/searchlib/expression/tointfunctionnode.h | 22 + .../vespa/searchlib/expression/torawfunctionnode.h | 22 + .../searchlib/expression/tostringfunctionnode.h | 22 + .../vespa/searchlib/expression/ucafunctionnode.cpp | 115 + .../vespa/searchlib/expression/ucafunctionnode.h | 60 + .../searchlib/expression/unarybitfunctionnode.h | 31 + .../vespa/searchlib/expression/unaryfunctionnode.h | 27 + .../searchlib/expression/xorbitfunctionnode.h | 23 + .../vespa/searchlib/expression/xorfunctionnode.h | 22 + .../src/vespa/searchlib/expression/zcurve.cpp | 91 + searchlib/src/vespa/searchlib/expression/zcurve.h | 59 + searchlib/src/vespa/searchlib/features/.gitignore | 3 + .../src/vespa/searchlib/features/CMakeLists.txt | 64 + searchlib/src/vespa/searchlib/features/OWNERS | 1 + .../src/vespa/searchlib/features/agefeature.cpp | 79 + .../src/vespa/searchlib/features/agefeature.h | 64 + .../src/vespa/searchlib/features/array_parser.cpp | 19 + .../src/vespa/searchlib/features/array_parser.h | 49 + .../src/vespa/searchlib/features/array_parser.hpp | 96 + .../vespa/searchlib/features/attributefeature.cpp | 433 + .../vespa/searchlib/features/attributefeature.h | 57 + .../searchlib/features/attributematchfeature.cpp | 350 + .../searchlib/features/attributematchfeature.h | 124 + .../vespa/searchlib/features/closenessfeature.cpp | 110 + .../vespa/searchlib/features/closenessfeature.h | 67 + .../searchlib/features/constant_tensor_executor.h | 44 + .../vespa/searchlib/features/create-class-cpp.sh | 29 + .../src/vespa/searchlib/features/create-class-h.sh | 27 + .../searchlib/features/debug_attribute_wait.cpp | 96 + .../searchlib/features/debug_attribute_wait.h | 71 + .../src/vespa/searchlib/features/debug_wait.cpp | 82 + .../src/vespa/searchlib/features/debug_wait.h | 66 + .../vespa/searchlib/features/distancefeature.cpp | 148 + .../src/vespa/searchlib/features/distancefeature.h | 75 + .../searchlib/features/distancetopathfeature.cpp | 177 + .../searchlib/features/distancetopathfeature.h | 82 + .../vespa/searchlib/features/dotproductfeature.cpp | 457 + .../vespa/searchlib/features/dotproductfeature.h | 217 + .../features/element_completeness_feature.cpp | 143 + .../features/element_completeness_feature.h | 131 + .../features/element_similarity_feature.cpp | 417 + .../features/element_similarity_feature.h | 44 + .../features/euclidean_distance_feature.cpp | 123 + .../features/euclidean_distance_feature.h | 76 + .../vespa/searchlib/features/fieldinfofeature.cpp | 235 + .../vespa/searchlib/features/fieldinfofeature.h | 70 + .../searchlib/features/fieldlengthfeature.cpp | 99 + .../vespa/searchlib/features/fieldlengthfeature.h | 65 + .../vespa/searchlib/features/fieldmatch/.gitignore | 3 + .../searchlib/features/fieldmatch/CMakeLists.txt | 10 + .../searchlib/features/fieldmatch/computer.cpp | 558 + .../vespa/searchlib/features/fieldmatch/computer.h | 382 + .../searchlib/features/fieldmatch/metrics.cpp | 344 + .../vespa/searchlib/features/fieldmatch/metrics.h | 563 + .../vespa/searchlib/features/fieldmatch/params.cpp | 45 + .../vespa/searchlib/features/fieldmatch/params.h | 261 + .../searchlib/features/fieldmatch/segmentstart.cpp | 103 + .../searchlib/features/fieldmatch/segmentstart.h | 186 + .../features/fieldmatch/simplemetrics.cpp | 39 + .../searchlib/features/fieldmatch/simplemetrics.h | 186 + .../vespa/searchlib/features/fieldmatchfeature.cpp | 311 + .../vespa/searchlib/features/fieldmatchfeature.h | 70 + .../searchlib/features/fieldtermmatchfeature.cpp | 129 + .../searchlib/features/fieldtermmatchfeature.h | 67 + .../vespa/searchlib/features/firstphasefeature.cpp | 62 + .../vespa/searchlib/features/firstphasefeature.h | 53 + .../features/flow_completeness_feature.cpp | 309 + .../searchlib/features/flow_completeness_feature.h | 111 + .../vespa/searchlib/features/foreachfeature.cpp | 186 + .../src/vespa/searchlib/features/foreachfeature.h | 185 + .../vespa/searchlib/features/freshnessfeature.cpp | 101 + .../vespa/searchlib/features/freshnessfeature.h | 67 + .../searchlib/features/item_raw_score_feature.cpp | 82 + .../searchlib/features/item_raw_score_feature.h | 63 + .../features/jarowinklerdistancefeature.cpp | 184 + .../features/jarowinklerdistancefeature.h | 84 + .../vespa/searchlib/features/logarithmcalculator.h | 61 + .../vespa/searchlib/features/matchesfeature.cpp | 90 + .../src/vespa/searchlib/features/matchesfeature.h | 71 + .../src/vespa/searchlib/features/matchfeature.cpp | 107 + .../src/vespa/searchlib/features/matchfeature.h | 68 + .../features/native_dot_product_feature.cpp | 57 + .../features/native_dot_product_feature.h | 47 + .../features/nativeattributematchfeature.cpp | 150 + .../features/nativeattributematchfeature.h | 119 + .../searchlib/features/nativefieldmatchfeature.cpp | 179 + .../searchlib/features/nativefieldmatchfeature.h | 133 + .../searchlib/features/nativeproximityfeature.cpp | 218 + .../searchlib/features/nativeproximityfeature.h | 119 + .../vespa/searchlib/features/nativerankfeature.cpp | 173 + .../vespa/searchlib/features/nativerankfeature.h | 133 + .../src/vespa/searchlib/features/nowfeature.cpp | 63 + .../src/vespa/searchlib/features/nowfeature.h | 60 + .../vespa/searchlib/features/proximityfeature.cpp | 149 + .../vespa/searchlib/features/proximityfeature.h | 82 + .../features/querycompletenessfeature.cpp | 112 + .../searchlib/features/querycompletenessfeature.h | 80 + .../src/vespa/searchlib/features/queryfeature.cpp | 161 + .../src/vespa/searchlib/features/queryfeature.h | 52 + .../src/vespa/searchlib/features/queryterm.cpp | 52 + searchlib/src/vespa/searchlib/features/queryterm.h | 64 + .../searchlib/features/querytermcountfeature.cpp | 58 + .../searchlib/features/querytermcountfeature.h | 48 + .../src/vespa/searchlib/features/randomfeature.cpp | 87 + .../src/vespa/searchlib/features/randomfeature.h | 71 + .../features/rankingexpression/.gitignore | 6 + .../features/rankingexpression/CMakeLists.txt | 6 + .../rankingexpression/feature_name_extractor.cpp | 87 + .../rankingexpression/feature_name_extractor.h | 23 + .../features/rankingexpressionfeature.cpp | 170 + .../searchlib/features/rankingexpressionfeature.h | 88 + .../vespa/searchlib/features/raw_score_feature.cpp | 57 + .../vespa/searchlib/features/raw_score_feature.h | 44 + .../searchlib/features/reverseproximityfeature.cpp | 136 + .../searchlib/features/reverseproximityfeature.h | 78 + searchlib/src/vespa/searchlib/features/setup.cpp | 115 + searchlib/src/vespa/searchlib/features/setup.h | 19 + .../searchlib/features/subqueries_feature.cpp | 58 + .../vespa/searchlib/features/subqueries_feature.h | 43 + .../features/tensor_factory_blueprint.cpp | 46 + .../searchlib/features/tensor_factory_blueprint.h | 33 + .../features/tensor_from_attribute_executor.h | 56 + .../features/tensor_from_labels_feature.cpp | 122 + .../features/tensor_from_labels_feature.h | 36 + .../tensor_from_tensor_attribute_executor.cpp | 35 + .../tensor_from_tensor_attribute_executor.h | 30 + .../features/tensor_from_weighted_set_feature.cpp | 137 + .../features/tensor_from_weighted_set_feature.h | 33 + .../searchlib/features/term_field_md_feature.cpp | 115 + .../searchlib/features/term_field_md_feature.h | 60 + .../searchlib/features/termdistancecalculator.cpp | 81 + .../searchlib/features/termdistancecalculator.h | 81 + .../searchlib/features/termdistancefeature.cpp | 100 + .../vespa/searchlib/features/termdistancefeature.h | 74 + .../searchlib/features/termeditdistancefeature.cpp | 234 + .../searchlib/features/termeditdistancefeature.h | 153 + .../src/vespa/searchlib/features/termfeature.cpp | 91 + .../src/vespa/searchlib/features/termfeature.h | 68 + .../vespa/searchlib/features/terminfofeature.cpp | 55 + .../src/vespa/searchlib/features/terminfofeature.h | 33 + .../searchlib/features/text_similarity_feature.cpp | 220 + .../searchlib/features/text_similarity_feature.h | 75 + searchlib/src/vespa/searchlib/features/utils.cpp | 155 + searchlib/src/vespa/searchlib/features/utils.h | 234 + .../src/vespa/searchlib/features/valuefeature.cpp | 65 + .../src/vespa/searchlib/features/valuefeature.h | 59 + .../searchlib/features/weighted_set_parser.cpp | 19 + .../vespa/searchlib/features/weighted_set_parser.h | 28 + .../searchlib/features/weighted_set_parser.hpp | 48 + searchlib/src/vespa/searchlib/fef/.gitignore | 4 + searchlib/src/vespa/searchlib/fef/CMakeLists.txt | 44 + searchlib/src/vespa/searchlib/fef/Doxyfile | 1162 ++ searchlib/src/vespa/searchlib/fef/OWNERS | 1 + searchlib/src/vespa/searchlib/fef/blueprint.cpp | 76 + searchlib/src/vespa/searchlib/fef/blueprint.h | 252 + .../src/vespa/searchlib/fef/blueprintfactory.cpp | 49 + .../src/vespa/searchlib/fef/blueprintfactory.h | 62 + .../src/vespa/searchlib/fef/blueprintresolver.cpp | 227 + .../src/vespa/searchlib/fef/blueprintresolver.h | 150 + .../src/vespa/searchlib/fef/collection_type.cpp | 21 + .../src/vespa/searchlib/fef/collection_type.h | 51 + .../src/vespa/searchlib/fef/create-class-cpp.sh | 29 + .../src/vespa/searchlib/fef/create-class-h.sh | 27 + .../src/vespa/searchlib/fef/create-fef-includes.sh | 26 + .../src/vespa/searchlib/fef/create-interface.sh | 23 + searchlib/src/vespa/searchlib/fef/dist_doc_hp.sh | 3 + searchlib/src/vespa/searchlib/fef/feature_type.cpp | 26 + searchlib/src/vespa/searchlib/fef/feature_type.h | 40 + .../src/vespa/searchlib/fef/featureexecutor.cpp | 22 + .../src/vespa/searchlib/fef/featureexecutor.h | 185 + .../src/vespa/searchlib/fef/featurenamebuilder.cpp | 159 + .../src/vespa/searchlib/fef/featurenamebuilder.h | 75 + .../src/vespa/searchlib/fef/featurenameparser.cpp | 499 + .../src/vespa/searchlib/fef/featurenameparser.h | 100 + .../src/vespa/searchlib/fef/featureoverrider.cpp | 54 + .../src/vespa/searchlib/fef/featureoverrider.h | 46 + searchlib/src/vespa/searchlib/fef/fef.cpp | 12 + searchlib/src/vespa/searchlib/fef/fef.h | 62 + searchlib/src/vespa/searchlib/fef/fieldinfo.cpp | 23 + searchlib/src/vespa/searchlib/fef/fieldinfo.h | 112 + .../vespa/searchlib/fef/fieldpositionsiterator.cpp | 14 + .../vespa/searchlib/fef/fieldpositionsiterator.h | 164 + searchlib/src/vespa/searchlib/fef/fieldtype.cpp | 21 + searchlib/src/vespa/searchlib/fef/fieldtype.h | 51 + .../src/vespa/searchlib/fef/filetablefactory.cpp | 41 + .../src/vespa/searchlib/fef/filetablefactory.h | 34 + .../vespa/searchlib/fef/functiontablefactory.cpp | 134 + .../src/vespa/searchlib/fef/functiontablefactory.h | 59 + searchlib/src/vespa/searchlib/fef/handle.h | 17 + .../src/vespa/searchlib/fef/iblueprintregistry.h | 28 + .../src/vespa/searchlib/fef/idumpfeaturevisitor.h | 33 + .../src/vespa/searchlib/fef/iindexenvironment.h | 125 + .../src/vespa/searchlib/fef/indexproperties.cpp | 373 + .../src/vespa/searchlib/fef/indexproperties.h | 307 + .../src/vespa/searchlib/fef/iqueryenvironment.h | 94 + searchlib/src/vespa/searchlib/fef/itablefactory.h | 36 + searchlib/src/vespa/searchlib/fef/itablemanager.h | 29 + searchlib/src/vespa/searchlib/fef/itermdata.h | 89 + searchlib/src/vespa/searchlib/fef/itermfielddata.h | 48 + searchlib/src/vespa/searchlib/fef/location.cpp | 19 + searchlib/src/vespa/searchlib/fef/location.h | 111 + searchlib/src/vespa/searchlib/fef/matchdata.cpp | 30 + searchlib/src/vespa/searchlib/fef/matchdata.h | 181 + .../src/vespa/searchlib/fef/matchdatalayout.cpp | 35 + .../src/vespa/searchlib/fef/matchdatalayout.h | 64 + searchlib/src/vespa/searchlib/fef/objectstore.cpp | 39 + searchlib/src/vespa/searchlib/fef/objectstore.h | 37 + searchlib/src/vespa/searchlib/fef/parameter.cpp | 19 + searchlib/src/vespa/searchlib/fef/parameter.h | 41 + .../vespa/searchlib/fef/parameterdescriptions.cpp | 34 + .../vespa/searchlib/fef/parameterdescriptions.h | 197 + .../src/vespa/searchlib/fef/parametervalidator.cpp | 158 + .../src/vespa/searchlib/fef/parametervalidator.h | 83 + .../src/vespa/searchlib/fef/phrasesplitter.cpp | 110 + searchlib/src/vespa/searchlib/fef/phrasesplitter.h | 146 + searchlib/src/vespa/searchlib/fef/properties.cpp | 269 + searchlib/src/vespa/searchlib/fef/properties.h | 324 + .../src/vespa/searchlib/fef/queryproperties.cpp | 16 + .../src/vespa/searchlib/fef/queryproperties.h | 42 + searchlib/src/vespa/searchlib/fef/rank_program.cpp | 240 + searchlib/src/vespa/searchlib/fef/rank_program.h | 135 + searchlib/src/vespa/searchlib/fef/ranksetup.cpp | 186 + searchlib/src/vespa/searchlib/fef/ranksetup.h | 393 + .../src/vespa/searchlib/fef/simpletermdata.cpp | 33 + searchlib/src/vespa/searchlib/fef/simpletermdata.h | 195 + .../vespa/searchlib/fef/simpletermfielddata.cpp | 26 + .../src/vespa/searchlib/fef/simpletermfielddata.h | 84 + searchlib/src/vespa/searchlib/fef/sumexecutor.cpp | 21 + searchlib/src/vespa/searchlib/fef/sumexecutor.h | 32 + .../src/vespa/searchlib/fef/symmetrictable.cpp | 52 + searchlib/src/vespa/searchlib/fef/symmetrictable.h | 58 + searchlib/src/vespa/searchlib/fef/table.cpp | 22 + searchlib/src/vespa/searchlib/fef/table.h | 65 + searchlib/src/vespa/searchlib/fef/tablemanager.cpp | 36 + searchlib/src/vespa/searchlib/fef/tablemanager.h | 50 + .../src/vespa/searchlib/fef/termfieldmatchdata.cpp | 121 + .../src/vespa/searchlib/fef/termfieldmatchdata.h | 267 + .../vespa/searchlib/fef/termfieldmatchdataarray.h | 69 + .../searchlib/fef/termfieldmatchdataposition.cpp | 12 + .../searchlib/fef/termfieldmatchdataposition.h | 113 + .../vespa/searchlib/fef/termmatchdatamerger.cpp | 77 + .../src/vespa/searchlib/fef/termmatchdatamerger.h | 46 + searchlib/src/vespa/searchlib/fef/test/.gitignore | 3 + .../src/vespa/searchlib/fef/test/CMakeLists.txt | 14 + .../fef/test/dummy_dependency_handler.cpp | 60 + .../searchlib/fef/test/dummy_dependency_handler.h | 39 + .../src/vespa/searchlib/fef/test/featuretest.cpp | 159 + .../src/vespa/searchlib/fef/test/featuretest.h | 137 + searchlib/src/vespa/searchlib/fef/test/ftlib.cpp | 399 + searchlib/src/vespa/searchlib/fef/test/ftlib.h | 238 + .../vespa/searchlib/fef/test/indexenvironment.cpp | 42 + .../vespa/searchlib/fef/test/indexenvironment.h | 83 + .../searchlib/fef/test/indexenvironmentbuilder.cpp | 28 + .../searchlib/fef/test/indexenvironmentbuilder.h | 50 + .../vespa/searchlib/fef/test/matchdatabuilder.cpp | 184 + .../vespa/searchlib/fef/test/matchdatabuilder.h | 150 + .../src/vespa/searchlib/fef/test/plugin/.gitignore | 3 + .../vespa/searchlib/fef/test/plugin/CMakeLists.txt | 12 + .../vespa/searchlib/fef/test/plugin/cfgvalue.cpp | 58 + .../src/vespa/searchlib/fef/test/plugin/cfgvalue.h | 34 + .../src/vespa/searchlib/fef/test/plugin/chain.cpp | 69 + .../src/vespa/searchlib/fef/test/plugin/chain.h | 38 + .../src/vespa/searchlib/fef/test/plugin/double.cpp | 59 + .../src/vespa/searchlib/fef/test/plugin/double.h | 42 + .../src/vespa/searchlib/fef/test/plugin/query.cpp | 46 + .../src/vespa/searchlib/fef/test/plugin/query.h | 30 + .../src/vespa/searchlib/fef/test/plugin/setup.cpp | 35 + .../src/vespa/searchlib/fef/test/plugin/setup.h | 16 + .../vespa/searchlib/fef/test/plugin/staticrank.cpp | 59 + .../vespa/searchlib/fef/test/plugin/staticrank.h | 41 + .../src/vespa/searchlib/fef/test/plugin/sum.cpp | 74 + .../src/vespa/searchlib/fef/test/plugin/sum.h | 38 + .../vespa/searchlib/fef/test/queryenvironment.cpp | 20 + .../vespa/searchlib/fef/test/queryenvironment.h | 94 + .../searchlib/fef/test/queryenvironmentbuilder.cpp | 66 + .../searchlib/fef/test/queryenvironmentbuilder.h | 71 + .../src/vespa/searchlib/fef/test/rankresult.cpp | 113 + .../src/vespa/searchlib/fef/test/rankresult.h | 113 + searchlib/src/vespa/searchlib/fef/utils.cpp | 75 + searchlib/src/vespa/searchlib/fef/utils.h | 37 + .../src/vespa/searchlib/fef/verify_feature.cpp | 29 + searchlib/src/vespa/searchlib/fef/verify_feature.h | 30 + .../src/vespa/searchlib/grouping/CMakeLists.txt | 9 + searchlib/src/vespa/searchlib/grouping/OWNERS | 1 + searchlib/src/vespa/searchlib/grouping/collect.cpp | 113 + searchlib/src/vespa/searchlib/grouping/collect.h | 105 + .../src/vespa/searchlib/grouping/forcelink.hpp | 13 + .../searchlib/grouping/groupandcollectengine.cpp | 50 + .../searchlib/grouping/groupandcollectengine.h | 21 + .../src/vespa/searchlib/grouping/groupengine.cpp | 227 + .../src/vespa/searchlib/grouping/groupengine.h | 139 + .../vespa/searchlib/grouping/groupingengine.cpp | 110 + .../src/vespa/searchlib/grouping/groupingengine.h | 33 + searchlib/src/vespa/searchlib/grouping/groupref.h | 22 + .../src/vespa/searchlib/grouping/hyperloglog.h | 140 + searchlib/src/vespa/searchlib/grouping/sketch.h | 260 + searchlib/src/vespa/searchlib/index/.gitignore | 6 + searchlib/src/vespa/searchlib/index/CMakeLists.txt | 18 + searchlib/src/vespa/searchlib/index/OWNERS | 1 + .../src/vespa/searchlib/index/bitvectorkeys.h | 43 + .../src/vespa/searchlib/index/dictionaryfile.cpp | 45 + .../src/vespa/searchlib/index/dictionaryfile.h | 138 + searchlib/src/vespa/searchlib/index/docbuilder.cpp | 930 ++ searchlib/src/vespa/searchlib/index/docbuilder.h | 432 + .../src/vespa/searchlib/index/docidandfeatures.cpp | 105 + .../src/vespa/searchlib/index/docidandfeatures.h | 338 + .../src/vespa/searchlib/index/doctypebuilder.cpp | 356 + .../src/vespa/searchlib/index/doctypebuilder.h | 95 + .../searchlib/index/dummyfileheadercontext.cpp | 70 + .../vespa/searchlib/index/dummyfileheadercontext.h | 47 + .../src/vespa/searchlib/index/indexbuilder.cpp | 28 + searchlib/src/vespa/searchlib/index/indexbuilder.h | 62 + .../vespa/searchlib/index/olddictionaryfile.cpp | 115 + .../src/vespa/searchlib/index/olddictionaryfile.h | 208 + .../vespa/searchlib/index/postinglistcountfile.cpp | 60 + .../vespa/searchlib/index/postinglistcountfile.h | 140 + .../vespa/searchlib/index/postinglistcounts.cpp | 90 + .../src/vespa/searchlib/index/postinglistcounts.h | 144 + .../src/vespa/searchlib/index/postinglistfile.cpp | 170 + .../src/vespa/searchlib/index/postinglistfile.h | 344 + .../vespa/searchlib/index/postinglisthandle.cpp | 27 + .../src/vespa/searchlib/index/postinglisthandle.h | 90 + .../vespa/searchlib/index/postinglistparams.cpp | 138 + .../src/vespa/searchlib/index/postinglistparams.h | 48 + searchlib/src/vespa/searchlib/index/schemautil.cpp | 217 + searchlib/src/vespa/searchlib/index/schemautil.h | 234 + .../src/vespa/searchlib/memoryindex/.gitignore | 3 + .../src/vespa/searchlib/memoryindex/CMakeLists.txt | 17 + searchlib/src/vespa/searchlib/memoryindex/OWNERS | 2 + .../memoryindex/compact_document_words_store.cpp | 176 + .../memoryindex/compact_document_words_store.h | 102 + .../src/vespa/searchlib/memoryindex/dictionary.cpp | 68 + .../src/vespa/searchlib/memoryindex/dictionary.h | 64 + .../searchlib/memoryindex/document_remover.cpp | 67 + .../vespa/searchlib/memoryindex/document_remover.h | 63 + .../searchlib/memoryindex/documentinverter.cpp | 206 + .../vespa/searchlib/memoryindex/documentinverter.h | 128 + .../vespa/searchlib/memoryindex/featurestore.cpp | 167 + .../src/vespa/searchlib/memoryindex/featurestore.h | 274 + .../vespa/searchlib/memoryindex/fieldinverter.cpp | 577 + .../vespa/searchlib/memoryindex/fieldinverter.h | 449 + .../memoryindex/i_document_insert_listener.h | 23 + .../memoryindex/i_document_remove_listener.h | 28 + .../memoryindex/iordereddocumentinserter.h | 52 + .../searchlib/memoryindex/memoryfieldindex.cpp | 342 + .../vespa/searchlib/memoryindex/memoryfieldindex.h | 283 + .../vespa/searchlib/memoryindex/memoryindex.cpp | 308 + .../src/vespa/searchlib/memoryindex/memoryindex.h | 184 + .../memoryindex/ordereddocumentinserter.cpp | 158 + .../memoryindex/ordereddocumentinserter.h | 80 + .../searchlib/memoryindex/postingiterator.cpp | 74 + .../vespa/searchlib/memoryindex/postingiterator.h | 43 + .../searchlib/memoryindex/urlfieldinverter.cpp | 384 + .../vespa/searchlib/memoryindex/urlfieldinverter.h | 79 + .../src/vespa/searchlib/memoryindex/wordstore.cpp | 59 + .../src/vespa/searchlib/memoryindex/wordstore.h | 41 + .../src/vespa/searchlib/parsequery/.gitignore | 6 + .../src/vespa/searchlib/parsequery/CMakeLists.txt | 8 + searchlib/src/vespa/searchlib/parsequery/OWNERS | 1 + searchlib/src/vespa/searchlib/parsequery/parse.cpp | 239 + searchlib/src/vespa/searchlib/parsequery/parse.h | 232 + .../searchlib/parsequery/simplequerystack.cpp | 354 + .../vespa/searchlib/parsequery/simplequerystack.h | 108 + .../searchlib/parsequery/stackdumpiterator.cpp | 297 + .../vespa/searchlib/parsequery/stackdumpiterator.h | 165 + .../src/vespa/searchlib/predicate/CMakeLists.txt | 14 + searchlib/src/vespa/searchlib/predicate/OWNERS | 1 + .../predicate/document_features_store.cpp | 293 + .../searchlib/predicate/document_features_store.h | 89 + .../predicate/predicate_bounds_posting_list.h | 96 + .../src/vespa/searchlib/predicate/predicate_hash.h | 125 + .../vespa/searchlib/predicate/predicate_index.cpp | 288 + .../vespa/searchlib/predicate/predicate_index.h | 131 + .../searchlib/predicate/predicate_interval.cpp | 24 + .../vespa/searchlib/predicate/predicate_interval.h | 64 + .../predicate/predicate_interval_posting_list.h | 68 + .../predicate/predicate_interval_store.cpp | 124 + .../searchlib/predicate/predicate_interval_store.h | 119 + .../searchlib/predicate/predicate_posting_list.h | 52 + .../predicate/predicate_range_expander.cpp | 17 + .../searchlib/predicate/predicate_range_expander.h | 122 + .../predicate/predicate_range_term_expander.h | 99 + .../searchlib/predicate/predicate_ref_cache.h | 160 + .../predicate/predicate_tree_analyzer.cpp | 168 + .../searchlib/predicate/predicate_tree_analyzer.h | 43 + .../predicate/predicate_tree_annotator.cpp | 256 + .../searchlib/predicate/predicate_tree_annotator.h | 51 + .../predicate_zero_constraint_posting_list.cpp | 27 + .../predicate_zero_constraint_posting_list.h | 28 + .../predicate_zstar_compressed_posting_list.h | 89 + .../src/vespa/searchlib/predicate/simple_index.cpp | 25 + .../src/vespa/searchlib/predicate/simple_index.h | 261 + .../src/vespa/searchlib/predicate/simple_index.hpp | 315 + .../src/vespa/searchlib/predicate/tree_crumbs.h | 44 + searchlib/src/vespa/searchlib/query/.gitignore | 6 + searchlib/src/vespa/searchlib/query/CMakeLists.txt | 12 + searchlib/src/vespa/searchlib/query/OWNERS | 1 + searchlib/src/vespa/searchlib/query/base.cpp | 16 + searchlib/src/vespa/searchlib/query/base.h | 141 + searchlib/src/vespa/searchlib/query/posocc.h | 30 + searchlib/src/vespa/searchlib/query/query.cpp | 348 + searchlib/src/vespa/searchlib/query/query.h | 212 + searchlib/src/vespa/searchlib/query/querynode.cpp | 199 + searchlib/src/vespa/searchlib/query/querynode.h | 66 + .../vespa/searchlib/query/querynoderesultbase.cpp | 8 + .../vespa/searchlib/query/querynoderesultbase.h | 35 + searchlib/src/vespa/searchlib/query/queryterm.cpp | 469 + searchlib/src/vespa/searchlib/query/queryterm.h | 190 + .../src/vespa/searchlib/query/tree/.gitignore | 3 + .../src/vespa/searchlib/query/tree/CMakeLists.txt | 13 + searchlib/src/vespa/searchlib/query/tree/OWNERS | 1 + .../searchlib/query/tree/customtypetermvisitor.h | 37 + .../vespa/searchlib/query/tree/customtypevisitor.h | 105 + .../vespa/searchlib/query/tree/intermediate.cpp | 22 + .../src/vespa/searchlib/query/tree/intermediate.h | 29 + .../searchlib/query/tree/intermediatenodes.cpp | 37 + .../vespa/searchlib/query/tree/intermediatenodes.h | 143 + .../src/vespa/searchlib/query/tree/location.cpp | 61 + .../src/vespa/searchlib/query/tree/location.h | 37 + searchlib/src/vespa/searchlib/query/tree/node.h | 26 + searchlib/src/vespa/searchlib/query/tree/point.h | 23 + .../searchlib/query/tree/predicate_query_term.h | 76 + .../vespa/searchlib/query/tree/querybuilder.cpp | 103 + .../src/vespa/searchlib/query/tree/querybuilder.h | 358 + .../vespa/searchlib/query/tree/querynodemixin.h | 28 + .../vespa/searchlib/query/tree/queryreplicator.h | 171 + .../vespa/searchlib/query/tree/querytreecreator.h | 32 + .../src/vespa/searchlib/query/tree/queryvisitor.h | 58 + searchlib/src/vespa/searchlib/query/tree/range.cpp | 24 + searchlib/src/vespa/searchlib/query/tree/range.h | 30 + .../src/vespa/searchlib/query/tree/rectangle.h | 26 + .../src/vespa/searchlib/query/tree/simplequery.h | 132 + .../searchlib/query/tree/stackdumpcreator.cpp | 301 + .../vespa/searchlib/query/tree/stackdumpcreator.h | 19 + .../searchlib/query/tree/stackdumpquerycreator.h | 175 + .../searchlib/query/tree/templatetermvisitor.h | 59 + searchlib/src/vespa/searchlib/query/tree/term.cpp | 27 + searchlib/src/vespa/searchlib/query/tree/term.h | 78 + .../src/vespa/searchlib/query/tree/termnodes.cpp | 29 + .../src/vespa/searchlib/query/tree/termnodes.h | 123 + searchlib/src/vespa/searchlib/query/weight.h | 52 + searchlib/src/vespa/searchlib/queryeval/.gitignore | 3 + .../src/vespa/searchlib/queryeval/CMakeLists.txt | 54 + searchlib/src/vespa/searchlib/queryeval/OWNERS | 1 + .../src/vespa/searchlib/queryeval/andnotsearch.cpp | 163 + .../src/vespa/searchlib/queryeval/andnotsearch.h | 101 + .../src/vespa/searchlib/queryeval/andsearch.cpp | 123 + .../src/vespa/searchlib/queryeval/andsearch.h | 37 + .../vespa/searchlib/queryeval/andsearchnostrict.h | 61 + .../vespa/searchlib/queryeval/andsearchstrict.h | 109 + .../vespa/searchlib/queryeval/begin_and_end_id.h | 10 + .../src/vespa/searchlib/queryeval/blueprint.cpp | 562 + .../src/vespa/searchlib/queryeval/blueprint.h | 314 + .../queryeval/booleanmatchiteratorwrapper.cpp | 48 + .../queryeval/booleanmatchiteratorwrapper.h | 64 + .../vespa/searchlib/queryeval/create-class-cpp.sh | 29 + .../vespa/searchlib/queryeval/create-class-h.sh | 27 + .../vespa/searchlib/queryeval/create-interface.sh | 23 + .../queryeval/create_blueprint_visitor_helper.cpp | 20 + .../queryeval/create_blueprint_visitor_helper.h | 143 + .../queryeval/document_weight_search_iterator.cpp | 4 + .../queryeval/document_weight_search_iterator.h | 62 + .../searchlib/queryeval/dot_product_blueprint.cpp | 92 + .../searchlib/queryeval/dot_product_blueprint.h | 47 + .../searchlib/queryeval/dot_product_search.cpp | 154 + .../vespa/searchlib/queryeval/dot_product_search.h | 46 + .../src/vespa/searchlib/queryeval/emptysearch.cpp | 29 + .../src/vespa/searchlib/queryeval/emptysearch.h | 27 + .../vespa/searchlib/queryeval/equiv_blueprint.cpp | 74 + .../vespa/searchlib/queryeval/equiv_blueprint.h | 36 + .../src/vespa/searchlib/queryeval/equivsearch.cpp | 72 + .../src/vespa/searchlib/queryeval/equivsearch.h | 32 + .../searchlib/queryeval/fake_requestcontext.cpp | 15 + .../searchlib/queryeval/fake_requestcontext.h | 34 + .../src/vespa/searchlib/queryeval/fake_result.cpp | 39 + .../src/vespa/searchlib/queryeval/fake_result.h | 108 + .../src/vespa/searchlib/queryeval/fake_search.cpp | 56 + .../src/vespa/searchlib/queryeval/fake_search.h | 45 + .../vespa/searchlib/queryeval/fake_searchable.cpp | 108 + .../vespa/searchlib/queryeval/fake_searchable.h | 68 + .../src/vespa/searchlib/queryeval/field_spec.cpp | 19 + .../src/vespa/searchlib/queryeval/field_spec.h | 119 + .../searchlib/queryeval/get_weight_from_node.cpp | 48 + .../searchlib/queryeval/get_weight_from_node.h | 15 + .../src/vespa/searchlib/queryeval/hitcollector.cpp | 313 + .../src/vespa/searchlib/queryeval/hitcollector.h | 214 + .../queryeval/intermediate_blueprints.cpp | 584 + .../searchlib/queryeval/intermediate_blueprints.h | 181 + .../vespa/searchlib/queryeval/irequestcontext.h | 33 + .../vespa/searchlib/queryeval/isourceselector.cpp | 16 + .../vespa/searchlib/queryeval/isourceselector.h | 103 + .../vespa/searchlib/queryeval/iterator_pack.cpp | 9 + .../src/vespa/searchlib/queryeval/iterator_pack.h | 84 + .../src/vespa/searchlib/queryeval/iterators.cpp | 24 + .../src/vespa/searchlib/queryeval/iterators.h | 38 + .../vespa/searchlib/queryeval/leaf_blueprints.cpp | 91 + .../vespa/searchlib/queryeval/leaf_blueprints.h | 82 + .../queryeval/monitoring_dump_iterator.cpp | 37 + .../searchlib/queryeval/monitoring_dump_iterator.h | 38 + .../queryeval/monitoring_search_iterator.cpp | 239 + .../queryeval/monitoring_search_iterator.h | 131 + .../searchlib/queryeval/multibitvectoriterator.cpp | 258 + .../searchlib/queryeval/multibitvectoriterator.h | 39 + .../src/vespa/searchlib/queryeval/multisearch.cpp | 95 + .../src/vespa/searchlib/queryeval/multisearch.h | 59 + .../src/vespa/searchlib/queryeval/nearsearch.cpp | 313 + .../src/vespa/searchlib/queryeval/nearsearch.h | 157 + .../src/vespa/searchlib/queryeval/orlikesearch.h | 73 + .../src/vespa/searchlib/queryeval/orsearch.cpp | 119 + searchlib/src/vespa/searchlib/queryeval/orsearch.h | 33 + .../src/vespa/searchlib/queryeval/posting_info.h | 45 + .../searchlib/queryeval/predicate_blueprint.cpp | 345 + .../searchlib/queryeval/predicate_blueprint.h | 94 + .../vespa/searchlib/queryeval/predicate_search.cpp | 310 + .../vespa/searchlib/queryeval/predicate_search.h | 71 + .../src/vespa/searchlib/queryeval/ranksearch.cpp | 64 + .../src/vespa/searchlib/queryeval/ranksearch.h | 33 + searchlib/src/vespa/searchlib/queryeval/scores.h | 21 + .../src/vespa/searchlib/queryeval/searchable.cpp | 33 + .../src/vespa/searchlib/queryeval/searchable.h | 60 + .../vespa/searchlib/queryeval/searchiterator.cpp | 129 + .../src/vespa/searchlib/queryeval/searchiterator.h | 345 + .../queryeval/simple_phrase_blueprint.cpp | 105 + .../searchlib/queryeval/simple_phrase_blueprint.h | 49 + .../searchlib/queryeval/simple_phrase_search.cpp | 201 + .../searchlib/queryeval/simple_phrase_search.h | 59 + .../src/vespa/searchlib/queryeval/simpleresult.cpp | 67 + .../src/vespa/searchlib/queryeval/simpleresult.h | 87 + .../src/vespa/searchlib/queryeval/simplesearch.cpp | 49 + .../src/vespa/searchlib/queryeval/simplesearch.h | 41 + .../searchlib/queryeval/sourceblendersearch.cpp | 187 + .../searchlib/queryeval/sourceblendersearch.h | 92 + .../src/vespa/searchlib/queryeval/split_float.cpp | 29 + .../src/vespa/searchlib/queryeval/split_float.h | 24 + .../src/vespa/searchlib/queryeval/termasstring.cpp | 120 + .../src/vespa/searchlib/queryeval/termasstring.h | 30 + .../queryeval/termwise_blueprint_helper.cpp | 43 + .../queryeval/termwise_blueprint_helper.h | 31 + .../vespa/searchlib/queryeval/termwise_search.cpp | 62 + .../vespa/searchlib/queryeval/termwise_search.h | 27 + .../vespa/searchlib/queryeval/test/CMakeLists.txt | 5 + .../vespa/searchlib/queryeval/test/eagerchild.h | 23 + .../src/vespa/searchlib/queryeval/test/leafspec.h | 61 + .../vespa/searchlib/queryeval/test/searchhistory.h | 58 + .../vespa/searchlib/queryeval/test/trackedsearch.h | 74 + .../src/vespa/searchlib/queryeval/test/wandspec.h | 53 + .../src/vespa/searchlib/queryeval/truesearch.cpp | 33 + .../src/vespa/searchlib/queryeval/truesearch.h | 25 + .../src/vespa/searchlib/queryeval/unpackinfo.cpp | 104 + .../src/vespa/searchlib/queryeval/unpackinfo.h | 69 + .../vespa/searchlib/queryeval/wand/CMakeLists.txt | 10 + .../queryeval/wand/parallel_weak_and_blueprint.cpp | 126 + .../queryeval/wand/parallel_weak_and_blueprint.h | 75 + .../queryeval/wand/parallel_weak_and_search.cpp | 263 + .../queryeval/wand/parallel_weak_and_search.h | 85 + .../vespa/searchlib/queryeval/wand/wand_parts.cpp | 27 + .../vespa/searchlib/queryeval/wand/wand_parts.h | 615 + .../searchlib/queryeval/wand/weak_and_heap.cpp | 41 + .../vespa/searchlib/queryeval/wand/weak_and_heap.h | 67 + .../searchlib/queryeval/wand/weak_and_search.cpp | 143 + .../searchlib/queryeval/wand/weak_and_search.h | 26 + .../queryeval/weighted_set_term_blueprint.cpp | 89 + .../queryeval/weighted_set_term_blueprint.h | 47 + .../queryeval/weighted_set_term_search.cpp | 161 + .../searchlib/queryeval/weighted_set_term_search.h | 41 + searchlib/src/vespa/searchlib/test/.gitignore | 6 + searchlib/src/vespa/searchlib/test/CMakeLists.txt | 12 + searchlib/src/vespa/searchlib/test/OWNERS | 1 + .../src/vespa/searchlib/test/diskindex/.gitignore | 2 + .../vespa/searchlib/test/diskindex/CMakeLists.txt | 7 + .../searchlib/test/diskindex/testdiskindex.cpp | 120 + .../vespa/searchlib/test/diskindex/testdiskindex.h | 26 + .../test/diskindex/threelevelcountbuffers.cpp | 133 + .../test/diskindex/threelevelcountbuffers.h | 82 + .../test/document_weight_attribute_helper.cpp | 9 + .../test/document_weight_attribute_helper.h | 55 + .../src/vespa/searchlib/test/fakedata/.gitignore | 2 + .../vespa/searchlib/test/fakedata/CMakeLists.txt | 16 + .../vespa/searchlib/test/fakedata/bitdecode64.cpp | 22 + .../vespa/searchlib/test/fakedata/bitdecode64.h | 91 + .../vespa/searchlib/test/fakedata/bitencode64.cpp | 37 + .../vespa/searchlib/test/fakedata/bitencode64.h | 59 + .../test/fakedata/fakeegcompr64filterocc.cpp | 1521 +++ .../test/fakedata/fakeegcompr64filterocc.h | 121 + .../searchlib/test/fakedata/fakefilterocc.cpp | 206 + .../vespa/searchlib/test/fakedata/fakefilterocc.h | 74 + .../searchlib/test/fakedata/fakememtreeocc.cpp | 430 + .../vespa/searchlib/test/fakedata/fakememtreeocc.h | 287 + .../vespa/searchlib/test/fakedata/fakeposting.cpp | 61 + .../vespa/searchlib/test/fakedata/fakeposting.h | 105 + .../src/vespa/searchlib/test/fakedata/fakeword.cpp | 796 ++ .../src/vespa/searchlib/test/fakedata/fakeword.h | 355 + .../vespa/searchlib/test/fakedata/fakewordset.cpp | 161 + .../vespa/searchlib/test/fakedata/fakewordset.h | 92 + .../searchlib/test/fakedata/fakezcbfilterocc.cpp | 268 + .../searchlib/test/fakedata/fakezcbfilterocc.h | 75 + .../searchlib/test/fakedata/fakezcfilterocc.cpp | 1823 +++ .../searchlib/test/fakedata/fakezcfilterocc.h | 119 + .../vespa/searchlib/test/fakedata/fpfactory.cpp | 120 + .../src/vespa/searchlib/test/fakedata/fpfactory.h | 84 + searchlib/src/vespa/searchlib/test/initrange.cpp | 185 + searchlib/src/vespa/searchlib/test/initrange.h | 38 + .../searchlib/test/memoryindex/CMakeLists.txt | 5 + .../test/memoryindex/ordereddocumentinserter.h | 119 + searchlib/src/vespa/searchlib/test/statefile.cpp | 48 + searchlib/src/vespa/searchlib/test/statefile.h | 22 + searchlib/src/vespa/searchlib/test/statestring.cpp | 98 + searchlib/src/vespa/searchlib/test/statestring.h | 24 + .../src/vespa/searchlib/transactionlog/.gitignore | 6 + .../vespa/searchlib/transactionlog/CMakeLists.txt | 14 + .../src/vespa/searchlib/transactionlog/OWNERS | 1 + .../src/vespa/searchlib/transactionlog/common.cpp | 107 + .../src/vespa/searchlib/transactionlog/common.h | 100 + .../src/vespa/searchlib/transactionlog/domain.cpp | 405 + .../src/vespa/searchlib/transactionlog/domain.h | 125 + .../vespa/searchlib/transactionlog/domainpart.cpp | 681 + .../vespa/searchlib/transactionlog/domainpart.h | 123 + .../vespa/searchlib/transactionlog/nosyncproxy.cpp | 28 + .../vespa/searchlib/transactionlog/nosyncproxy.h | 27 + .../src/vespa/searchlib/transactionlog/session.cpp | 275 + .../src/vespa/searchlib/transactionlog/session.h | 94 + .../src/vespa/searchlib/transactionlog/syncproxy.h | 27 + .../transactionlog/trans_log_server_explorer.cpp | 71 + .../transactionlog/trans_log_server_explorer.h | 27 + .../searchlib/transactionlog/translogclient.cpp | 402 + .../searchlib/transactionlog/translogclient.h | 140 + .../searchlib/transactionlog/translogserver.cpp | 672 + .../searchlib/transactionlog/translogserver.h | 110 + .../searchlib/transactionlog/translogserverapp.cpp | 68 + .../searchlib/transactionlog/translogserverapp.h | 46 + searchlib/src/vespa/searchlib/util/.gitignore | 6 + searchlib/src/vespa/searchlib/util/CMakeLists.txt | 25 + .../src/vespa/searchlib/util/bufferwriter.cpp | 43 + searchlib/src/vespa/searchlib/util/bufferwriter.h | 55 + searchlib/src/vespa/searchlib/util/comprbuffer.cpp | 147 + searchlib/src/vespa/searchlib/util/comprbuffer.h | 98 + searchlib/src/vespa/searchlib/util/comprfile.cpp | 650 + searchlib/src/vespa/searchlib/util/comprfile.h | 456 + searchlib/src/vespa/searchlib/util/dirtraverse.cpp | 289 + searchlib/src/vespa/searchlib/util/dirtraverse.h | 67 + .../vespa/searchlib/util/drainingbufferwriter.cpp | 41 + .../vespa/searchlib/util/drainingbufferwriter.h | 32 + searchlib/src/vespa/searchlib/util/filealign.cpp | 145 + searchlib/src/vespa/searchlib/util/filealign.h | 138 + .../src/vespa/searchlib/util/fileheadertk.cpp | 23 + searchlib/src/vespa/searchlib/util/fileheadertk.h | 23 + searchlib/src/vespa/searchlib/util/filekit.cpp | 108 + searchlib/src/vespa/searchlib/util/filekit.h | 35 + .../vespa/searchlib/util/filesizecalculator.cpp | 59 + .../src/vespa/searchlib/util/filesizecalculator.h | 26 + searchlib/src/vespa/searchlib/util/fileutil.cpp | 176 + searchlib/src/vespa/searchlib/util/fileutil.h | 389 + .../vespa/searchlib/util/foldedstringcompare.cpp | 82 + .../src/vespa/searchlib/util/foldedstringcompare.h | 58 + searchlib/src/vespa/searchlib/util/inline.h | 5 + .../src/vespa/searchlib/util/ioerrorhandler.cpp | 96 + .../src/vespa/searchlib/util/ioerrorhandler.h | 61 + searchlib/src/vespa/searchlib/util/logutil.cpp | 54 + searchlib/src/vespa/searchlib/util/logutil.h | 29 + searchlib/src/vespa/searchlib/util/memorytub.h | 94 + .../src/vespa/searchlib/util/memorytub_impl.h | 202 + searchlib/src/vespa/searchlib/util/memoryusage.h | 123 + .../vespa/searchlib/util/postingpriorityqueue.h | 258 + searchlib/src/vespa/searchlib/util/rand48.h | 44 + .../src/vespa/searchlib/util/randomgenerator.h | 63 + searchlib/src/vespa/searchlib/util/rawbuf.cpp | 360 + searchlib/src/vespa/searchlib/util/rawbuf.h | 163 + searchlib/src/vespa/searchlib/util/runnable.h | 42 + .../src/vespa/searchlib/util/searchable_stats.h | 44 + .../src/vespa/searchlib/util/sigbushandler.cpp | 168 + searchlib/src/vespa/searchlib/util/sigbushandler.h | 60 + .../util/slime_output_raw_buf_adapter.cpp | 8 + .../searchlib/util/slime_output_raw_buf_adapter.h | 24 + searchlib/src/vespa/searchlib/util/sort.h | 143 + searchlib/src/vespa/searchlib/util/statebuf.cpp | 215 + searchlib/src/vespa/searchlib/util/statebuf.h | 92 + searchlib/src/vespa/searchlib/util/statefile.cpp | 460 + searchlib/src/vespa/searchlib/util/statefile.h | 106 + searchlib/src/vespa/searchlib/util/stringenum.cpp | 131 + searchlib/src/vespa/searchlib/util/stringenum.h | 147 + searchlib/src/vespa/searchlib/util/url.cpp | 555 + searchlib/src/vespa/searchlib/util/url.h | 277 + 2408 files changed, 320134 insertions(+) create mode 100644 searchlib/src/.gitignore create mode 100644 searchlib/src/Doxyfile create mode 100644 searchlib/src/apps/docstore/.gitignore create mode 100644 searchlib/src/apps/docstore/CMakeLists.txt create mode 100644 searchlib/src/apps/docstore/benchmarkdatastore.cpp create mode 100644 searchlib/src/apps/docstore/create-idx-from-dat.cpp create mode 100644 searchlib/src/apps/docstore/documentstoreinspect.cpp create mode 100644 searchlib/src/apps/docstore/verifylogdatastore.cpp create mode 100644 searchlib/src/apps/expgolomb/.gitignore create mode 100644 searchlib/src/apps/expgolomb/CMakeLists.txt create mode 100644 searchlib/src/apps/expgolomb/expgolomb.cpp create mode 100644 searchlib/src/apps/fileheaderinspect/.gitignore create mode 100644 searchlib/src/apps/fileheaderinspect/CMakeLists.txt create mode 100644 searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp create mode 100644 searchlib/src/apps/loadattribute/.gitignore create mode 100644 searchlib/src/apps/loadattribute/CMakeLists.txt create mode 100644 searchlib/src/apps/loadattribute/loadattribute.cpp create mode 100644 searchlib/src/apps/loadattribute/loadattribute.rb create mode 100644 searchlib/src/apps/tests/.gitignore create mode 100644 searchlib/src/apps/tests/CMakeLists.txt create mode 100644 searchlib/src/apps/tests/biglogtest.cpp create mode 100644 searchlib/src/apps/tests/btreestress_test.cpp create mode 100644 searchlib/src/apps/tests/memoryindexstress_test.cpp create mode 100644 searchlib/src/apps/uniform/.gitignore create mode 100644 searchlib/src/apps/uniform/CMakeLists.txt create mode 100644 searchlib/src/apps/uniform/uniform.cpp create mode 100644 searchlib/src/apps/vespa-index-inspect/.gitignore create mode 100644 searchlib/src/apps/vespa-index-inspect/CMakeLists.txt create mode 100644 searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp create mode 100644 searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore create mode 100644 searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt create mode 100644 searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression create mode 100644 searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp create mode 100755 searchlib/src/forcelink.sh create mode 100644 searchlib/src/main/OWNERS create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtConverter.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FileCaseList.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/QueryTerm.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java create mode 100755 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java create mode 100644 searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java create mode 100755 searchlib/src/main/javacc/RankingExpressionParser.jj create mode 100755 searchlib/src/main/javacc/TreeNetParser.jj create mode 100755 searchlib/src/main/sh/evaluation-benchmark create mode 100644 searchlib/src/main/sh/ga create mode 100755 searchlib/src/main/sh/gbdt-analysis create mode 100755 searchlib/src/main/sh/vespa-gbdt-converter create mode 100755 searchlib/src/main/sh/vespa-treenet-converter create mode 100644 searchlib/src/test/OWNERS create mode 100644 searchlib/src/test/files/features01.expression create mode 100644 searchlib/src/test/files/features02.expression create mode 100644 searchlib/src/test/files/features03.expression create mode 100644 searchlib/src/test/files/features04.expression create mode 100644 searchlib/src/test/files/gbdt.expression create mode 100644 searchlib/src/test/files/gbdt.ext.xml create mode 100644 searchlib/src/test/files/gbdt.xml create mode 100644 searchlib/src/test/files/gbdt_empty_tree.xml create mode 100644 searchlib/src/test/files/gbdt_err.xml create mode 100644 searchlib/src/test/files/gbdt_set_inclusion_test.xml create mode 100644 searchlib/src/test/files/gbdt_tree_response.xml create mode 100644 searchlib/src/test/files/mlr/cases-illegal1.csv create mode 100644 searchlib/src/test/files/mlr/cases-illegal2.csv create mode 100644 searchlib/src/test/files/mlr/cases-linear.csv create mode 100644 searchlib/src/test/files/mlr/cases.csv create mode 100644 searchlib/src/test/files/ranking01.expression create mode 100644 searchlib/src/test/files/ranking02.expression create mode 100644 searchlib/src/test/files/ranking03.expression create mode 100644 searchlib/src/test/files/ranking04.expression create mode 100644 searchlib/src/test/files/ranking05.expression create mode 100644 searchlib/src/test/files/ranking06.expression create mode 100644 searchlib/src/test/files/ranking07.expression create mode 100644 searchlib/src/test/files/ranking08.expression create mode 100644 searchlib/src/test/files/s-expression.vre create mode 100644 searchlib/src/test/files/simple.expression create mode 100644 searchlib/src/test/files/testAggregatorResults create mode 100644 searchlib/src/test/files/testFunctionNodes create mode 100644 searchlib/src/test/files/testGroup create mode 100644 searchlib/src/test/files/testGrouping create mode 100644 searchlib/src/test/files/testGroupingLevel create mode 100644 searchlib/src/test/files/testHitCollection create mode 100644 searchlib/src/test/files/testResultTypes create mode 100644 searchlib/src/test/files/testSpecialNodes create mode 100644 searchlib/src/test/files/treenet01.model create mode 100644 searchlib/src/test/files/treenet02.model create mode 100644 searchlib/src/test/files/treenet03.model create mode 100644 searchlib/src/test/files/treenet04.model create mode 100644 searchlib/src/test/files/treenet05.model create mode 100644 searchlib/src/test/files/treenet06.model create mode 100644 searchlib/src/test/files/treenet07.model create mode 100644 searchlib/src/test/files/treenet08.model create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java create mode 100644 searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java create mode 100755 searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java create mode 100644 searchlib/src/testlist.txt create mode 100644 searchlib/src/tests/.gitignore create mode 100644 searchlib/src/tests/aggregator/.gitignore create mode 100644 searchlib/src/tests/aggregator/CMakeLists.txt create mode 100644 searchlib/src/tests/aggregator/DESC create mode 100644 searchlib/src/tests/aggregator/FILES create mode 100644 searchlib/src/tests/aggregator/attr_test.cpp create mode 100644 searchlib/src/tests/aggregator/perdocexpr.cpp create mode 100644 searchlib/src/tests/alignment/.gitignore create mode 100644 searchlib/src/tests/alignment/CMakeLists.txt create mode 100644 searchlib/src/tests/alignment/DESC create mode 100644 searchlib/src/tests/alignment/FILES create mode 100644 searchlib/src/tests/alignment/alignment.cpp create mode 100644 searchlib/src/tests/attribute/.gitignore create mode 100644 searchlib/src/tests/attribute/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/DESC create mode 100644 searchlib/src/tests/attribute/FILES create mode 100644 searchlib/src/tests/attribute/attribute_test.cpp create mode 100644 searchlib/src/tests/attribute/attribute_test.sh create mode 100644 searchlib/src/tests/attribute/attributebenchmark.cpp create mode 100644 searchlib/src/tests/attribute/attributebenchmark.rb create mode 100644 searchlib/src/tests/attribute/attributefilewriter/.gitignore create mode 100644 searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp create mode 100644 searchlib/src/tests/attribute/attributeguard.cpp create mode 100644 searchlib/src/tests/attribute/attributeguard_test.sh create mode 100644 searchlib/src/tests/attribute/attributemanager/.gitignore create mode 100644 searchlib/src/tests/attribute/attributemanager/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp create mode 100644 searchlib/src/tests/attribute/attributesearcher.h create mode 100644 searchlib/src/tests/attribute/attributeupdater.h create mode 100644 searchlib/src/tests/attribute/benchmarkplotter.rb create mode 100644 searchlib/src/tests/attribute/bitvector/.gitignore create mode 100644 searchlib/src/tests/attribute/bitvector/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/bitvector/bitvector_test.cpp create mode 100644 searchlib/src/tests/attribute/changevector_test.cpp create mode 100644 searchlib/src/tests/attribute/changevector_test.sh create mode 100644 searchlib/src/tests/attribute/comparator/.gitignore create mode 100644 searchlib/src/tests/attribute/comparator/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/comparator/DESC create mode 100644 searchlib/src/tests/attribute/comparator/FILES create mode 100644 searchlib/src/tests/attribute/comparator/comparator_test.cpp create mode 100644 searchlib/src/tests/attribute/document_weight_iterator/.gitignore create mode 100644 searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/document_weight_iterator/FILES create mode 100644 searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp create mode 100644 searchlib/src/tests/attribute/enumeratedsave/.gitignore create mode 100644 searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp create mode 100644 searchlib/src/tests/attribute/enumstore/.gitignore create mode 100644 searchlib/src/tests/attribute/enumstore/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/enumstore/DESC create mode 100644 searchlib/src/tests/attribute/enumstore/FILES create mode 100644 searchlib/src/tests/attribute/enumstore/enumstore_test.cpp create mode 100644 searchlib/src/tests/attribute/extendattributes/.gitignore create mode 100644 searchlib/src/tests/attribute/extendattributes/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/extendattributes/DESC create mode 100644 searchlib/src/tests/attribute/extendattributes/FILES create mode 100644 searchlib/src/tests/attribute/extendattributes/extendattribute.cpp create mode 100755 searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh create mode 100644 searchlib/src/tests/attribute/gidmapattribute/.gitignore create mode 100644 searchlib/src/tests/attribute/multivaluemapping/.gitignore create mode 100644 searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/multivaluemapping/DESC create mode 100644 searchlib/src/tests/attribute/multivaluemapping/FILES create mode 100644 searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp create mode 100644 searchlib/src/tests/attribute/postinglist/.gitignore create mode 100644 searchlib/src/tests/attribute/postinglist/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/postinglist/DESC create mode 100644 searchlib/src/tests/attribute/postinglist/FILES create mode 100644 searchlib/src/tests/attribute/postinglist/postinglist.cpp create mode 100644 searchlib/src/tests/attribute/postinglistattribute/.gitignore create mode 100644 searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/postinglistattribute/DESC create mode 100644 searchlib/src/tests/attribute/postinglistattribute/FILES create mode 100644 searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp create mode 100755 searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh create mode 100644 searchlib/src/tests/attribute/runnable.h create mode 100644 searchlib/src/tests/attribute/searchable/.gitignore create mode 100644 searchlib/src/tests/attribute/searchable/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp create mode 100755 searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh create mode 100644 searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp create mode 100644 searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp create mode 100644 searchlib/src/tests/attribute/searchcontext/.gitignore create mode 100644 searchlib/src/tests/attribute/searchcontext/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/searchcontext/DESC create mode 100644 searchlib/src/tests/attribute/searchcontext/FILES create mode 100644 searchlib/src/tests/attribute/searchcontext/searchcontext.cpp create mode 100755 searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh create mode 100644 searchlib/src/tests/attribute/sourceselector/.gitignore create mode 100644 searchlib/src/tests/attribute/sourceselector/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/sourceselector/DESC create mode 100644 searchlib/src/tests/attribute/sourceselector/FILES create mode 100644 searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp create mode 100644 searchlib/src/tests/attribute/stringattribute/.gitignore create mode 100644 searchlib/src/tests/attribute/stringattribute/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/stringattribute/DESC create mode 100644 searchlib/src/tests/attribute/stringattribute/FILES create mode 100644 searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp create mode 100755 searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh create mode 100644 searchlib/src/tests/attribute/tensorattribute/.gitignore create mode 100644 searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt create mode 100644 searchlib/src/tests/attribute/tensorattribute/DESC create mode 100644 searchlib/src/tests/attribute/tensorattribute/FILES create mode 100644 searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp create mode 100644 searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh create mode 100644 searchlib/src/tests/bitcompression/expgolomb/.gitignore create mode 100644 searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt create mode 100644 searchlib/src/tests/bitcompression/expgolomb/DESC create mode 100644 searchlib/src/tests/bitcompression/expgolomb/FILES create mode 100644 searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp create mode 100644 searchlib/src/tests/bitvector/.gitignore create mode 100644 searchlib/src/tests/bitvector/CMakeLists.txt create mode 100644 searchlib/src/tests/bitvector/DESC create mode 100644 searchlib/src/tests/bitvector/FILES create mode 100644 searchlib/src/tests/bitvector/bitvectorbenchmark.cpp create mode 100644 searchlib/src/tests/btree/.gitignore create mode 100644 searchlib/src/tests/btree/CMakeLists.txt create mode 100644 searchlib/src/tests/btree/DESC create mode 100644 searchlib/src/tests/btree/FILES create mode 100644 searchlib/src/tests/btree/btreeaggregation_test.cpp create mode 100644 searchlib/src/tests/btree/iteratespeed.cpp create mode 100644 searchlib/src/tests/bytecomplens/.gitignore create mode 100644 searchlib/src/tests/bytecomplens/CMakeLists.txt create mode 100644 searchlib/src/tests/bytecomplens/DESC create mode 100644 searchlib/src/tests/bytecomplens/FILES create mode 100644 searchlib/src/tests/bytecomplens/bytecomp.cpp create mode 100644 searchlib/src/tests/bytecomplens/example.txt create mode 100644 searchlib/src/tests/bytecomplens/tblprint.cpp create mode 100644 searchlib/src/tests/common/bitvector/.gitignore create mode 100644 searchlib/src/tests/common/bitvector/CMakeLists.txt create mode 100644 searchlib/src/tests/common/bitvector/DESC create mode 100644 searchlib/src/tests/common/bitvector/FILES create mode 100644 searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp create mode 100644 searchlib/src/tests/common/bitvector/bitvector_test.cpp create mode 100644 searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp create mode 100644 searchlib/src/tests/common/foregroundtaskexecutor/.gitignore create mode 100644 searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt create mode 100644 searchlib/src/tests/common/foregroundtaskexecutor/DESC create mode 100644 searchlib/src/tests/common/foregroundtaskexecutor/FILES create mode 100644 searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp create mode 100644 searchlib/src/tests/common/location/.gitignore create mode 100644 searchlib/src/tests/common/location/CMakeLists.txt create mode 100644 searchlib/src/tests/common/location/FILES create mode 100644 searchlib/src/tests/common/location/location_test.cpp create mode 100644 searchlib/src/tests/common/packets/.gitignore create mode 100644 searchlib/src/tests/common/packets/CMakeLists.txt create mode 100644 searchlib/src/tests/common/packets/DESC create mode 100644 searchlib/src/tests/common/packets/FILES create mode 100644 searchlib/src/tests/common/packets/packets_test.cpp create mode 100644 searchlib/src/tests/common/rcuvector/.gitignore create mode 100644 searchlib/src/tests/common/rcuvector/CMakeLists.txt create mode 100644 searchlib/src/tests/common/rcuvector/DESC create mode 100644 searchlib/src/tests/common/rcuvector/FILES create mode 100644 searchlib/src/tests/common/rcuvector/rcuvector_test.cpp create mode 100644 searchlib/src/tests/common/resultset/.gitignore create mode 100644 searchlib/src/tests/common/resultset/CMakeLists.txt create mode 100644 searchlib/src/tests/common/resultset/resultset_test.cpp create mode 100644 searchlib/src/tests/common/sequencedtaskexecutor/.gitignore create mode 100644 searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt create mode 100644 searchlib/src/tests/common/sequencedtaskexecutor/DESC create mode 100644 searchlib/src/tests/common/sequencedtaskexecutor/FILES create mode 100644 searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp create mode 100644 searchlib/src/tests/common/summaryfeatures/.gitignore create mode 100644 searchlib/src/tests/common/summaryfeatures/CMakeLists.txt create mode 100644 searchlib/src/tests/common/summaryfeatures/DESC create mode 100644 searchlib/src/tests/common/summaryfeatures/FILES create mode 100644 searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp create mode 100755 searchlib/src/tests/create-test.sh create mode 100644 searchlib/src/tests/datastore/.gitignore create mode 100644 searchlib/src/tests/datastore/CMakeLists.txt create mode 100644 searchlib/src/tests/datastore/DESC create mode 100644 searchlib/src/tests/datastore/FILES create mode 100644 searchlib/src/tests/datastore/bad.dat create mode 100644 searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat create mode 100644 searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx create mode 100644 searchlib/src/tests/datastore/dangling/1425506005745465000.dat create mode 100644 searchlib/src/tests/datastore/dangling/1425506005745465000.idx create mode 100644 searchlib/src/tests/datastore/dangling/2425506005745465000.dat create mode 100644 searchlib/src/tests/datastore/dangling/2425506005745465000.idx create mode 100644 searchlib/src/tests/datastore/dangling/3425506005745465000.dat create mode 100644 searchlib/src/tests/datastore/dangling/4425506005745465000.dat create mode 100644 searchlib/src/tests/datastore/dangling/4425506005745465000.idx create mode 100644 searchlib/src/tests/datastore/datastore.dat create mode 100644 searchlib/src/tests/datastore/logdatastore_test.cpp create mode 100755 searchlib/src/tests/datastore/logdatastore_test.sh create mode 100644 searchlib/src/tests/diskindex/bitvector/.gitignore create mode 100644 searchlib/src/tests/diskindex/bitvector/CMakeLists.txt create mode 100644 searchlib/src/tests/diskindex/bitvector/DESC create mode 100644 searchlib/src/tests/diskindex/bitvector/FILES create mode 100644 searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp create mode 100644 searchlib/src/tests/diskindex/diskindex/.gitignore create mode 100644 searchlib/src/tests/diskindex/diskindex/CMakeLists.txt create mode 100644 searchlib/src/tests/diskindex/diskindex/DESC create mode 100644 searchlib/src/tests/diskindex/diskindex/FILES create mode 100644 searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp create mode 100644 searchlib/src/tests/diskindex/fieldwriter/.gitignore create mode 100644 searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt create mode 100644 searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp create mode 100755 searchlib/src/tests/diskindex/fieldwriter/runtests.sh create mode 100644 searchlib/src/tests/diskindex/fusion/.gitignore create mode 100644 searchlib/src/tests/diskindex/fusion/CMakeLists.txt create mode 100644 searchlib/src/tests/diskindex/fusion/DESC create mode 100644 searchlib/src/tests/diskindex/fusion/FILES create mode 100644 searchlib/src/tests/diskindex/fusion/fusion_test.cpp create mode 100755 searchlib/src/tests/diskindex/fusion/fusion_test.sh create mode 100644 searchlib/src/tests/diskindex/pagedict4/.gitignore create mode 100644 searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt create mode 100644 searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp create mode 100644 searchlib/src/tests/document_store/.gitignore create mode 100644 searchlib/src/tests/document_store/CMakeLists.txt create mode 100644 searchlib/src/tests/document_store/FILES create mode 100644 searchlib/src/tests/document_store/document_store_test.cpp create mode 100644 searchlib/src/tests/document_store/visitor/.gitignore create mode 100644 searchlib/src/tests/document_store/visitor/CMakeLists.txt create mode 100644 searchlib/src/tests/document_store/visitor/DESC create mode 100644 searchlib/src/tests/document_store/visitor/FILES create mode 100644 searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp create mode 100644 searchlib/src/tests/engine/docsumapi/.gitignore create mode 100644 searchlib/src/tests/engine/docsumapi/CMakeLists.txt create mode 100644 searchlib/src/tests/engine/docsumapi/DESC create mode 100644 searchlib/src/tests/engine/docsumapi/FILES create mode 100644 searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp create mode 100644 searchlib/src/tests/engine/monitorapi/.gitignore create mode 100644 searchlib/src/tests/engine/monitorapi/CMakeLists.txt create mode 100644 searchlib/src/tests/engine/monitorapi/DESC create mode 100644 searchlib/src/tests/engine/monitorapi/FILES create mode 100644 searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp create mode 100644 searchlib/src/tests/engine/searchapi/.gitignore create mode 100644 searchlib/src/tests/engine/searchapi/CMakeLists.txt create mode 100644 searchlib/src/tests/engine/searchapi/DESC create mode 100644 searchlib/src/tests/engine/searchapi/FILES create mode 100644 searchlib/src/tests/engine/searchapi/searchapi_test.cpp create mode 100644 searchlib/src/tests/engine/transportserver/.gitignore create mode 100644 searchlib/src/tests/engine/transportserver/CMakeLists.txt create mode 100644 searchlib/src/tests/engine/transportserver/DESC create mode 100644 searchlib/src/tests/engine/transportserver/FILES create mode 100644 searchlib/src/tests/engine/transportserver/transportserver_test.cpp create mode 100644 searchlib/src/tests/features/.gitignore create mode 100644 searchlib/src/tests/features/CMakeLists.txt create mode 100644 searchlib/src/tests/features/DESC create mode 100644 searchlib/src/tests/features/FILES create mode 100644 searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt create mode 100644 searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt create mode 100644 searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt create mode 100644 searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt create mode 100644 searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/plot.rb create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/readme.txt create mode 100644 searchlib/src/tests/features/benchmark/fieldmatch/run.rb create mode 100644 searchlib/src/tests/features/benchmark/plotlib.rb create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/plot.rb create mode 100644 searchlib/src/tests/features/benchmark/rankingexpression/run.rb create mode 100644 searchlib/src/tests/features/beta/.gitignore create mode 100644 searchlib/src/tests/features/beta/CMakeLists.txt create mode 100644 searchlib/src/tests/features/beta/beta_features.cpp create mode 100644 searchlib/src/tests/features/element_completeness/.gitignore create mode 100644 searchlib/src/tests/features/element_completeness/CMakeLists.txt create mode 100644 searchlib/src/tests/features/element_completeness/FILES create mode 100644 searchlib/src/tests/features/element_completeness/element_completeness_test.cpp create mode 100644 searchlib/src/tests/features/element_similarity_feature/.gitignore create mode 100644 searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt create mode 100644 searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp create mode 100644 searchlib/src/tests/features/euclidean_distance/.gitignore create mode 100644 searchlib/src/tests/features/euclidean_distance/CMakeLists.txt create mode 100644 searchlib/src/tests/features/euclidean_distance/FILES create mode 100644 searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp create mode 100644 searchlib/src/tests/features/featurebenchmark.cpp create mode 100644 searchlib/src/tests/features/item_raw_score/.gitignore create mode 100644 searchlib/src/tests/features/item_raw_score/CMakeLists.txt create mode 100644 searchlib/src/tests/features/item_raw_score/FILES create mode 100644 searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp create mode 100644 searchlib/src/tests/features/native_dot_product/.gitignore create mode 100644 searchlib/src/tests/features/native_dot_product/CMakeLists.txt create mode 100644 searchlib/src/tests/features/native_dot_product/FILES create mode 100644 searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp create mode 100644 searchlib/src/tests/features/prod_features.cpp create mode 100644 searchlib/src/tests/features/prod_features.h create mode 100644 searchlib/src/tests/features/prod_features_attributematch.cpp create mode 100644 searchlib/src/tests/features/prod_features_fieldmatch.cpp create mode 100644 searchlib/src/tests/features/prod_features_fieldtermmatch.cpp create mode 100644 searchlib/src/tests/features/prod_features_framework.cpp create mode 100755 searchlib/src/tests/features/prod_features_test.sh create mode 100644 searchlib/src/tests/features/ranking_expression/.gitignore create mode 100644 searchlib/src/tests/features/ranking_expression/CMakeLists.txt create mode 100644 searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp create mode 100644 searchlib/src/tests/features/raw_score/.gitignore create mode 100644 searchlib/src/tests/features/raw_score/CMakeLists.txt create mode 100644 searchlib/src/tests/features/raw_score/FILES create mode 100644 searchlib/src/tests/features/raw_score/raw_score_test.cpp create mode 100644 searchlib/src/tests/features/subqueries/.gitignore create mode 100644 searchlib/src/tests/features/subqueries/CMakeLists.txt create mode 100644 searchlib/src/tests/features/subqueries/subqueries_test.cpp create mode 100644 searchlib/src/tests/features/tensor/.gitignore create mode 100644 searchlib/src/tests/features/tensor/CMakeLists.txt create mode 100644 searchlib/src/tests/features/tensor/FILES create mode 100644 searchlib/src/tests/features/tensor/tensor_test.cpp create mode 100644 searchlib/src/tests/features/tensor_from_labels/.gitignore create mode 100644 searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt create mode 100644 searchlib/src/tests/features/tensor_from_labels/FILES create mode 100644 searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp create mode 100644 searchlib/src/tests/features/tensor_from_weighted_set/.gitignore create mode 100644 searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt create mode 100644 searchlib/src/tests/features/tensor_from_weighted_set/FILES create mode 100644 searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp create mode 100644 searchlib/src/tests/features/text_similarity_feature/.gitignore create mode 100644 searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt create mode 100644 searchlib/src/tests/features/text_similarity_feature/FILES create mode 100644 searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp create mode 100644 searchlib/src/tests/features/util/.gitignore create mode 100644 searchlib/src/tests/features/util/CMakeLists.txt create mode 100644 searchlib/src/tests/features/util/FILES create mode 100644 searchlib/src/tests/features/util/util_test.cpp create mode 100644 searchlib/src/tests/fef/.gitignore create mode 100644 searchlib/src/tests/fef/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/DESC create mode 100644 searchlib/src/tests/fef/FILES create mode 100644 searchlib/src/tests/fef/attributecontent/.gitignore create mode 100644 searchlib/src/tests/fef/attributecontent/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/attributecontent/DESC create mode 100644 searchlib/src/tests/fef/attributecontent/FILES create mode 100644 searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp create mode 100644 searchlib/src/tests/fef/featurenamebuilder/.gitignore create mode 100644 searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/featurenamebuilder/DESC create mode 100644 searchlib/src/tests/fef/featurenamebuilder/FILES create mode 100644 searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp create mode 100644 searchlib/src/tests/fef/featurenameparser/.gitignore create mode 100644 searchlib/src/tests/fef/featurenameparser/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/featurenameparser/DESC create mode 100644 searchlib/src/tests/fef/featurenameparser/FILES create mode 100644 searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp create mode 100644 searchlib/src/tests/fef/featurenameparser/parsetest.txt create mode 100644 searchlib/src/tests/fef/featureoverride/.gitignore create mode 100644 searchlib/src/tests/fef/featureoverride/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/featureoverride/DESC create mode 100644 searchlib/src/tests/fef/featureoverride/FILES create mode 100644 searchlib/src/tests/fef/featureoverride/featureoverride.cpp create mode 100644 searchlib/src/tests/fef/fef_test.cpp create mode 100644 searchlib/src/tests/fef/object_passing/.gitignore create mode 100644 searchlib/src/tests/fef/object_passing/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/object_passing/object_passing_test.cpp create mode 100644 searchlib/src/tests/fef/parameter/.gitignore create mode 100644 searchlib/src/tests/fef/parameter/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/parameter/DESC create mode 100644 searchlib/src/tests/fef/parameter/FILES create mode 100644 searchlib/src/tests/fef/parameter/parameter_test.cpp create mode 100644 searchlib/src/tests/fef/phrasesplitter/.gitignore create mode 100644 searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/phrasesplitter/DESC create mode 100644 searchlib/src/tests/fef/phrasesplitter/FILES create mode 100644 searchlib/src/tests/fef/phrasesplitter/benchmark.cpp create mode 100644 searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp create mode 100644 searchlib/src/tests/fef/properties/.gitignore create mode 100644 searchlib/src/tests/fef/properties/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/properties/DESC create mode 100644 searchlib/src/tests/fef/properties/FILES create mode 100644 searchlib/src/tests/fef/properties/properties_test.cpp create mode 100644 searchlib/src/tests/fef/rank_program/.gitignore create mode 100644 searchlib/src/tests/fef/rank_program/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/rank_program/FILES create mode 100644 searchlib/src/tests/fef/rank_program/rank_program_test.cpp create mode 100644 searchlib/src/tests/fef/resolver/.gitignore create mode 100644 searchlib/src/tests/fef/resolver/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/resolver/DESC create mode 100644 searchlib/src/tests/fef/resolver/FILES create mode 100644 searchlib/src/tests/fef/resolver/resolver_test.cpp create mode 100644 searchlib/src/tests/fef/table/.gitignore create mode 100644 searchlib/src/tests/fef/table/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/table/DESC create mode 100644 searchlib/src/tests/fef/table/FILES create mode 100644 searchlib/src/tests/fef/table/table_test.cpp create mode 100644 searchlib/src/tests/fef/table/tables1/a create mode 100644 searchlib/src/tests/fef/table/tables2/a create mode 100644 searchlib/src/tests/fef/table/tables2/b create mode 100644 searchlib/src/tests/fef/termfieldmodel/.gitignore create mode 100644 searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/termfieldmodel/DESC create mode 100644 searchlib/src/tests/fef/termfieldmodel/FILES create mode 100644 searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp create mode 100644 searchlib/src/tests/fef/termmatchdatamerger/.gitignore create mode 100644 searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt create mode 100644 searchlib/src/tests/fef/termmatchdatamerger/DESC create mode 100644 searchlib/src/tests/fef/termmatchdatamerger/FILES create mode 100644 searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp create mode 100644 searchlib/src/tests/fileheaderinspect/.gitignore create mode 100644 searchlib/src/tests/fileheaderinspect/CMakeLists.txt create mode 100644 searchlib/src/tests/fileheaderinspect/DESC create mode 100644 searchlib/src/tests/fileheaderinspect/FILES create mode 100644 searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp create mode 100644 searchlib/src/tests/fileheadertk/.gitignore create mode 100644 searchlib/src/tests/fileheadertk/CMakeLists.txt create mode 100644 searchlib/src/tests/fileheadertk/DESC create mode 100644 searchlib/src/tests/fileheadertk/FILES create mode 100644 searchlib/src/tests/fileheadertk/fileheadertk_test.cpp create mode 100644 searchlib/src/tests/forcelink/.gitignore create mode 100644 searchlib/src/tests/forcelink/CMakeLists.txt create mode 100644 searchlib/src/tests/forcelink/DESC create mode 100644 searchlib/src/tests/forcelink/FILES create mode 100644 searchlib/src/tests/forcelink/forcelink.cpp create mode 100644 searchlib/src/tests/grouping/.gitignore create mode 100644 searchlib/src/tests/grouping/CMakeLists.txt create mode 100644 searchlib/src/tests/grouping/DESC create mode 100644 searchlib/src/tests/grouping/FILES create mode 100644 searchlib/src/tests/grouping/grouping_serialization_test.cpp create mode 100644 searchlib/src/tests/grouping/grouping_test.cpp create mode 100644 searchlib/src/tests/grouping/hyperloglog_test.cpp create mode 100644 searchlib/src/tests/grouping/sketch_test.cpp create mode 100644 searchlib/src/tests/groupingengine/.gitignore create mode 100644 searchlib/src/tests/groupingengine/CMakeLists.txt create mode 100644 searchlib/src/tests/groupingengine/DESC create mode 100644 searchlib/src/tests/groupingengine/FILES create mode 100644 searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp create mode 100644 searchlib/src/tests/groupingengine/groupingengine_test.cpp create mode 100644 searchlib/src/tests/hitcollector/.gitignore create mode 100644 searchlib/src/tests/hitcollector/CMakeLists.txt create mode 100644 searchlib/src/tests/hitcollector/DESC create mode 100644 searchlib/src/tests/hitcollector/FILES create mode 100644 searchlib/src/tests/hitcollector/hitcollector_test.cpp create mode 100644 searchlib/src/tests/index/docbuilder/.gitignore create mode 100644 searchlib/src/tests/index/docbuilder/CMakeLists.txt create mode 100644 searchlib/src/tests/index/docbuilder/DESC create mode 100644 searchlib/src/tests/index/docbuilder/FILES create mode 100644 searchlib/src/tests/index/docbuilder/docbuilder_test.cpp create mode 100644 searchlib/src/tests/index/doctypebuilder/.gitignore create mode 100644 searchlib/src/tests/index/doctypebuilder/CMakeLists.txt create mode 100644 searchlib/src/tests/index/doctypebuilder/DESC create mode 100644 searchlib/src/tests/index/doctypebuilder/FILES create mode 100644 searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp create mode 100644 searchlib/src/tests/indexmetainfo/.gitignore create mode 100644 searchlib/src/tests/indexmetainfo/CMakeLists.txt create mode 100644 searchlib/src/tests/indexmetainfo/DESC create mode 100644 searchlib/src/tests/indexmetainfo/FILES create mode 100644 searchlib/src/tests/indexmetainfo/bogus1.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus10.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus2.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus3.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus4.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus5.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus6.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus7.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus8.txt create mode 100644 searchlib/src/tests/indexmetainfo/bogus9.txt create mode 100644 searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp create mode 100644 searchlib/src/tests/indexmetainfo/meta-info.txt create mode 100644 searchlib/src/tests/ld-library-path/.gitignore create mode 100644 searchlib/src/tests/ld-library-path/CMakeLists.txt create mode 100644 searchlib/src/tests/ld-library-path/ld-library-path.cpp create mode 100644 searchlib/src/tests/memoryindex/btree/.gitignore create mode 100644 searchlib/src/tests/memoryindex/btree/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/btree/DESC create mode 100644 searchlib/src/tests/memoryindex/btree/FILES create mode 100644 searchlib/src/tests/memoryindex/btree/btree_test.cpp create mode 100644 searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp create mode 100644 searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore create mode 100644 searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/compact_document_words_store/DESC create mode 100644 searchlib/src/tests/memoryindex/compact_document_words_store/FILES create mode 100644 searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp create mode 100644 searchlib/src/tests/memoryindex/datastore/.gitignore create mode 100644 searchlib/src/tests/memoryindex/datastore/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/datastore/DESC create mode 100644 searchlib/src/tests/memoryindex/datastore/FILES create mode 100644 searchlib/src/tests/memoryindex/datastore/datastore_test.cpp create mode 100644 searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp create mode 100644 searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp create mode 100644 searchlib/src/tests/memoryindex/dictionary/.gitignore create mode 100644 searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/dictionary/DESC create mode 100644 searchlib/src/tests/memoryindex/dictionary/FILES create mode 100644 searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp create mode 100644 searchlib/src/tests/memoryindex/document_remover/.gitignore create mode 100644 searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/document_remover/DESC create mode 100644 searchlib/src/tests/memoryindex/document_remover/FILES create mode 100644 searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp create mode 100644 searchlib/src/tests/memoryindex/documentinverter/.gitignore create mode 100644 searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/documentinverter/DESC create mode 100644 searchlib/src/tests/memoryindex/documentinverter/FILES create mode 100644 searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp create mode 100644 searchlib/src/tests/memoryindex/fieldinverter/.gitignore create mode 100644 searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/fieldinverter/DESC create mode 100644 searchlib/src/tests/memoryindex/fieldinverter/FILES create mode 100644 searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp create mode 100644 searchlib/src/tests/memoryindex/memoryindex/.gitignore create mode 100644 searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/memoryindex/DESC create mode 100644 searchlib/src/tests/memoryindex/memoryindex/FILES create mode 100644 searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp create mode 100644 searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore create mode 100644 searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt create mode 100644 searchlib/src/tests/memoryindex/urlfieldinverter/DESC create mode 100644 searchlib/src/tests/memoryindex/urlfieldinverter/FILES create mode 100644 searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp create mode 100644 searchlib/src/tests/memorytub/.gitignore create mode 100644 searchlib/src/tests/memorytub/CMakeLists.txt create mode 100644 searchlib/src/tests/memorytub/memorytub_test.cpp create mode 100644 searchlib/src/tests/nativerank/.gitignore create mode 100644 searchlib/src/tests/nativerank/CMakeLists.txt create mode 100644 searchlib/src/tests/nativerank/nativerank.cpp create mode 100644 searchlib/src/tests/nearsearch/.gitignore create mode 100644 searchlib/src/tests/nearsearch/CMakeLists.txt create mode 100644 searchlib/src/tests/nearsearch/DESC create mode 100644 searchlib/src/tests/nearsearch/FILES create mode 100644 searchlib/src/tests/nearsearch/nearsearch_test.cpp create mode 100644 searchlib/src/tests/postinglistbm/.gitignore create mode 100644 searchlib/src/tests/postinglistbm/CMakeLists.txt create mode 100644 searchlib/src/tests/postinglistbm/andstress.cpp create mode 100644 searchlib/src/tests/postinglistbm/andstress.h create mode 100644 searchlib/src/tests/postinglistbm/postinglistbm.cpp create mode 100644 searchlib/src/tests/postinglistbm/skip.txt create mode 100644 searchlib/src/tests/predicate/.gitignore create mode 100644 searchlib/src/tests/predicate/CMakeLists.txt create mode 100644 searchlib/src/tests/predicate/OWNERS create mode 100644 searchlib/src/tests/predicate/document_features_store_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_index_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_interval_store_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_ref_cache_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp create mode 100644 searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp create mode 100644 searchlib/src/tests/predicate/simple_index_test.cpp create mode 100644 searchlib/src/tests/predicate/tree_crumbs_test.cpp create mode 100644 searchlib/src/tests/prettyfloat/.gitignore create mode 100644 searchlib/src/tests/prettyfloat/CMakeLists.txt create mode 100644 searchlib/src/tests/prettyfloat/DESC create mode 100644 searchlib/src/tests/prettyfloat/FILES create mode 100644 searchlib/src/tests/prettyfloat/prettyfloat.cpp create mode 100644 searchlib/src/tests/query/.gitignore create mode 100644 searchlib/src/tests/query/CMakeLists.txt create mode 100644 searchlib/src/tests/query/DESC create mode 100644 searchlib/src/tests/query/FILES create mode 100644 searchlib/src/tests/query/customtypevisitor_test.cpp create mode 100644 searchlib/src/tests/query/query-old-large.cpp create mode 100644 searchlib/src/tests/query/query-old.cpp create mode 100644 searchlib/src/tests/query/query_visitor_test.cpp create mode 100644 searchlib/src/tests/query/querybuilder_test.cpp create mode 100644 searchlib/src/tests/query/stackdumpquerycreator_test.cpp create mode 100644 searchlib/src/tests/query/templatetermvisitor_test.cpp create mode 100644 searchlib/src/tests/queryeval/.gitignore create mode 100644 searchlib/src/tests/queryeval/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/DESC create mode 100644 searchlib/src/tests/queryeval/FILES create mode 100644 searchlib/src/tests/queryeval/blueprint/.cvsignore create mode 100644 searchlib/src/tests/queryeval/blueprint/.gitignore create mode 100644 searchlib/src/tests/queryeval/blueprint/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/blueprint/DESC create mode 100644 searchlib/src/tests/queryeval/blueprint/FILES create mode 100644 searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp create mode 100644 searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp create mode 100644 searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp create mode 100644 searchlib/src/tests/queryeval/blueprint/mysearch.h create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES create mode 100644 searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp create mode 100644 searchlib/src/tests/queryeval/dot_product/.gitignore create mode 100644 searchlib/src/tests/queryeval/dot_product/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/dot_product/FILES create mode 100644 searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp create mode 100644 searchlib/src/tests/queryeval/equiv/.cvsignore create mode 100644 searchlib/src/tests/queryeval/equiv/.gitignore create mode 100644 searchlib/src/tests/queryeval/equiv/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/equiv/DESC create mode 100644 searchlib/src/tests/queryeval/equiv/FILES create mode 100644 searchlib/src/tests/queryeval/equiv/equiv_test.cpp create mode 100644 searchlib/src/tests/queryeval/fake_searchable/.cvsignore create mode 100644 searchlib/src/tests/queryeval/fake_searchable/.gitignore create mode 100644 searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/fake_searchable/DESC create mode 100644 searchlib/src/tests/queryeval/fake_searchable/FILES create mode 100644 searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp create mode 100644 searchlib/src/tests/queryeval/getnodeweight/.gitignore create mode 100644 searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp create mode 100644 searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore create mode 100644 searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/monitoring_search_iterator/DESC create mode 100644 searchlib/src/tests/queryeval/monitoring_search_iterator/FILES create mode 100644 searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/DESC create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/FILES create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp create mode 100644 searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp create mode 100644 searchlib/src/tests/queryeval/parallel_weak_and/.gitignore create mode 100644 searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/parallel_weak_and/DESC create mode 100644 searchlib/src/tests/queryeval/parallel_weak_and/FILES create mode 100644 searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp create mode 100644 searchlib/src/tests/queryeval/predicate/.gitignore create mode 100644 searchlib/src/tests/queryeval/predicate/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp create mode 100644 searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp create mode 100644 searchlib/src/tests/queryeval/queryeval.cpp create mode 100644 searchlib/src/tests/queryeval/simple_phrase/.cvsignore create mode 100644 searchlib/src/tests/queryeval/simple_phrase/.gitignore create mode 100644 searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/simple_phrase/DESC create mode 100644 searchlib/src/tests/queryeval/simple_phrase/FILES create mode 100644 searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp create mode 100644 searchlib/src/tests/queryeval/sourceblender/.gitignore create mode 100644 searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/sourceblender/DESC create mode 100644 searchlib/src/tests/queryeval/sourceblender/FILES create mode 100644 searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp create mode 100644 searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore create mode 100644 searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES create mode 100644 searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp create mode 100644 searchlib/src/tests/queryeval/termwise_eval/.gitignore create mode 100644 searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and/.gitignore create mode 100644 searchlib/src/tests/queryeval/weak_and/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/weak_and/FILES create mode 100644 searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and/rise_wand.h create mode 100644 searchlib/src/tests/queryeval/weak_and/rise_wand.hpp create mode 100644 searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp create mode 100644 searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and_heap/.gitignore create mode 100644 searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/weak_and_heap/DESC create mode 100644 searchlib/src/tests/queryeval/weak_and_heap/FILES create mode 100644 searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp create mode 100644 searchlib/src/tests/queryeval/weak_and_scorers/.gitignore create mode 100644 searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/weak_and_scorers/DESC create mode 100644 searchlib/src/tests/queryeval/weak_and_scorers/FILES create mode 100644 searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp create mode 100644 searchlib/src/tests/queryeval/weighted_set_term/.gitignore create mode 100644 searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt create mode 100644 searchlib/src/tests/queryeval/weighted_set_term/DESC create mode 100644 searchlib/src/tests/queryeval/weighted_set_term/FILES create mode 100644 searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp create mode 100644 searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore create mode 100644 searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt create mode 100644 searchlib/src/tests/rankingexpression/feature_name_extractor/FILES create mode 100644 searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp create mode 100644 searchlib/src/tests/rankingexpression/rankingexpressionlist create mode 100644 searchlib/src/tests/ranksetup/.gitignore create mode 100644 searchlib/src/tests/ranksetup/CMakeLists.txt create mode 100644 searchlib/src/tests/ranksetup/DESC create mode 100644 searchlib/src/tests/ranksetup/FILES create mode 100644 searchlib/src/tests/ranksetup/ranksetup_test.cpp create mode 100644 searchlib/src/tests/ranksetup/verify_feature/.gitignore create mode 100644 searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt create mode 100644 searchlib/src/tests/ranksetup/verify_feature/FILES create mode 100644 searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp create mode 100644 searchlib/src/tests/sha1/.gitignore create mode 100644 searchlib/src/tests/sort/.gitignore create mode 100644 searchlib/src/tests/sort/CMakeLists.txt create mode 100644 searchlib/src/tests/sort/DESC create mode 100644 searchlib/src/tests/sort/FILES create mode 100644 searchlib/src/tests/sort/javaorder.zh create mode 100644 searchlib/src/tests/sort/sort_test.cpp create mode 100644 searchlib/src/tests/sort/sortbenchmark.cpp create mode 100644 searchlib/src/tests/sort/uca.cpp create mode 100644 searchlib/src/tests/sortresults/.gitignore create mode 100644 searchlib/src/tests/sortresults/CMakeLists.txt create mode 100644 searchlib/src/tests/sortresults/sorttest.cpp create mode 100644 searchlib/src/tests/sortspec/.gitignore create mode 100644 searchlib/src/tests/sortspec/CMakeLists.txt create mode 100644 searchlib/src/tests/sortspec/multilevelsort.cpp create mode 100644 searchlib/src/tests/stackdumpiterator/.gitignore create mode 100644 searchlib/src/tests/stackdumpiterator/CMakeLists.txt create mode 100644 searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp create mode 100644 searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h create mode 100644 searchlib/src/tests/stackdumpiterator/testowner.ATS create mode 100644 searchlib/src/tests/stringenum/.gitignore create mode 100644 searchlib/src/tests/stringenum/CMakeLists.txt create mode 100644 searchlib/src/tests/stringenum/stringenum_test.cpp create mode 100644 searchlib/src/tests/transactionlog/.gitignore create mode 100644 searchlib/src/tests/transactionlog/CMakeLists.txt create mode 100644 searchlib/src/tests/transactionlog/DESC create mode 100644 searchlib/src/tests/transactionlog/FILES create mode 100644 searchlib/src/tests/transactionlog/translogclient_test.cpp create mode 100755 searchlib/src/tests/transactionlog/translogclient_test.sh create mode 100644 searchlib/src/tests/transactionlog/translogserver_test.cpp create mode 100644 searchlib/src/tests/transactionlogstress/.gitignore create mode 100644 searchlib/src/tests/transactionlogstress/CMakeLists.txt create mode 100644 searchlib/src/tests/transactionlogstress/DESC create mode 100644 searchlib/src/tests/transactionlogstress/FILES create mode 100644 searchlib/src/tests/transactionlogstress/translogstress.cpp create mode 100644 searchlib/src/tests/true/.gitignore create mode 100644 searchlib/src/tests/true/CMakeLists.txt create mode 100644 searchlib/src/tests/true/DESC create mode 100644 searchlib/src/tests/true/FILES create mode 100644 searchlib/src/tests/true/true.cpp create mode 100644 searchlib/src/tests/url/.gitignore create mode 100644 searchlib/src/tests/url/CMakeLists.txt create mode 100755 searchlib/src/tests/url/dotest.sh create mode 100644 searchlib/src/tests/url/testurl.cpp create mode 100644 searchlib/src/tests/util/.gitignore create mode 100644 searchlib/src/tests/util/CMakeLists.txt create mode 100644 searchlib/src/tests/util/bufferwriter/.gitignore create mode 100644 searchlib/src/tests/util/bufferwriter/CMakeLists.txt create mode 100644 searchlib/src/tests/util/bufferwriter/bm.cpp create mode 100644 searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp create mode 100644 searchlib/src/tests/util/bufferwriter/work.cpp create mode 100644 searchlib/src/tests/util/bufferwriter/work.h create mode 100644 searchlib/src/tests/util/ioerrorhandler/.gitignore create mode 100644 searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt create mode 100644 searchlib/src/tests/util/ioerrorhandler/DESC create mode 100644 searchlib/src/tests/util/ioerrorhandler/FILES create mode 100644 searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp create mode 100644 searchlib/src/tests/util/rawbuf_test.cpp create mode 100644 searchlib/src/tests/util/searchable_stats/.gitignore create mode 100644 searchlib/src/tests/util/searchable_stats/CMakeLists.txt create mode 100644 searchlib/src/tests/util/searchable_stats/DESC create mode 100644 searchlib/src/tests/util/searchable_stats/FILES create mode 100644 searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp create mode 100644 searchlib/src/tests/util/sigbushandler/.gitignore create mode 100644 searchlib/src/tests/util/sigbushandler/CMakeLists.txt create mode 100644 searchlib/src/tests/util/sigbushandler/DESC create mode 100644 searchlib/src/tests/util/sigbushandler/FILES create mode 100644 searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp create mode 100644 searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore create mode 100644 searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt create mode 100644 searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES create mode 100644 searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp create mode 100644 searchlib/src/tests/util/statebuf/.gitignore create mode 100644 searchlib/src/tests/util/statebuf/CMakeLists.txt create mode 100644 searchlib/src/tests/util/statebuf/DESC create mode 100644 searchlib/src/tests/util/statebuf/FILES create mode 100644 searchlib/src/tests/util/statebuf/statebuf_test.cpp create mode 100644 searchlib/src/tests/util/statefile/.gitignore create mode 100644 searchlib/src/tests/util/statefile/CMakeLists.txt create mode 100644 searchlib/src/tests/util/statefile/DESC create mode 100644 searchlib/src/tests/util/statefile/FILES create mode 100644 searchlib/src/tests/util/statefile/statefile_test.cpp create mode 100644 searchlib/src/vespa/searchlib/.gitignore create mode 100644 searchlib/src/vespa/searchlib/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/aggregation/.gitignore create mode 100644 searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/aggregation/OWNERS create mode 100644 searchlib/src/vespa/searchlib/aggregation/aggregation.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/aggregation.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/aggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/forcelink.hpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/fs4hit.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/group.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/group.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/grouping.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/grouping.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/groupinglevel.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/hit.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/hit.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/hitlist.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/hitlist.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/modifiers.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/modifiers.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/perdocexpression.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/predicates.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/rawrank.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/rawrank.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/vdshit.cpp create mode 100644 searchlib/src/vespa/searchlib/aggregation/vdshit.h create mode 100644 searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h create mode 100644 searchlib/src/vespa/searchlib/attribute/.gitignore create mode 100644 searchlib/src/vespa/searchlib/attribute/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/attribute/OWNERS create mode 100644 searchlib/src/vespa/searchlib/attribute/address_space.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/address_space.h create mode 100644 searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/address_space_usage.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributecontext.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributecontext.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefactory.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefactory.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefile.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefile.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributefilewriter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributeguard.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributeguard.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributeiterators.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributemanager.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributemanager.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributevector.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attributevector.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attributevector.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attrvector.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/attrvector.h create mode 100644 searchlib/src/vespa/searchlib/attribute/attrvector.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/changevector.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/changevector.h create mode 100644 searchlib/src/vespa/searchlib/attribute/configconverter.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/configconverter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/createarraystd.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/createsetstd.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/defines.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/defines.h create mode 100644 searchlib/src/vespa/searchlib/attribute/diversity.h create mode 100644 searchlib/src/vespa/searchlib/attribute/dociditerator.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/dociditerator.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumcomparator.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumstore.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumstore.h create mode 100644 searchlib/src/vespa/searchlib/attribute/enumstore.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/enumstorebase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/extendableattributes.h create mode 100644 searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h create mode 100644 searchlib/src/vespa/searchlib/attribute/flagattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/flagattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/floatbase.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/floatbase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h create mode 100644 searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/iattributemanager.h create mode 100644 searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h create mode 100644 searchlib/src/vespa/searchlib/attribute/integerbase.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/integerbase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/interlock.h create mode 100644 searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h create mode 100644 searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/iterator_pack.h create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h create mode 100644 searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/loadedvalue.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multienumattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalue.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/multivaluemapping.h create mode 100644 searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/numericbase.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/numericbase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/postingchange.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postingchange.h create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglistattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postinglisttraits.h create mode 100644 searchlib/src/vespa/searchlib/attribute/postingstore.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/postingstore.h create mode 100644 searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/predicate_attribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singleenumattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp create mode 100644 searchlib/src/vespa/searchlib/attribute/sourceselector.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/sourceselector.h create mode 100644 searchlib/src/vespa/searchlib/attribute/stringattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/stringattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/stringbase.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/stringbase.h create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorattribute.h create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorstore.cpp create mode 100644 searchlib/src/vespa/searchlib/attribute/tensorstore.h create mode 100644 searchlib/src/vespa/searchlib/bitcompression/.gitignore create mode 100644 searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/bitcompression/OWNERS create mode 100644 searchlib/src/vespa/searchlib/bitcompression/compression.cpp create mode 100644 searchlib/src/vespa/searchlib/bitcompression/compression.h create mode 100644 searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp create mode 100644 searchlib/src/vespa/searchlib/bitcompression/countcompression.h create mode 100644 searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp create mode 100644 searchlib/src/vespa/searchlib/bitcompression/pagedict4.h create mode 100644 searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp create mode 100644 searchlib/src/vespa/searchlib/bitcompression/posocccompression.h create mode 100644 searchlib/src/vespa/searchlib/btree/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/btree/OWNERS create mode 100644 searchlib/src/vespa/searchlib/btree/btree.h create mode 100644 searchlib/src/vespa/searchlib/btree/btree.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeaggregator.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreebuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreebuilder.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreebuilder.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeinserter.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeinserter.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreeinserter.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeiterator.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeiterator.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreeiterator.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenode.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenode.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreenode.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodeallocator.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodestore.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodestore.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreenodestore.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeremover.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeremover.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreeremover.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeroot.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreeroot.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreeroot.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreerootbase.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreerootbase.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreerootbase.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreestore.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreestore.h create mode 100644 searchlib/src/vespa/searchlib/btree/btreestore.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/btreetraits.h create mode 100644 searchlib/src/vespa/searchlib/btree/bufferstate.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/bufferstate.h create mode 100644 searchlib/src/vespa/searchlib/btree/datastore.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/datastore.h create mode 100644 searchlib/src/vespa/searchlib/btree/datastore.hpp create mode 100644 searchlib/src/vespa/searchlib/btree/datastorebase.cpp create mode 100644 searchlib/src/vespa/searchlib/btree/datastorebase.h create mode 100644 searchlib/src/vespa/searchlib/btree/entryref.h create mode 100644 searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h create mode 100644 searchlib/src/vespa/searchlib/btree/minmaxaggregated.h create mode 100644 searchlib/src/vespa/searchlib/btree/noaggrcalc.h create mode 100644 searchlib/src/vespa/searchlib/btree/noaggregated.h create mode 100644 searchlib/src/vespa/searchlib/common/.gitignore create mode 100644 searchlib/src/vespa/searchlib/common/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp create mode 100644 searchlib/src/vespa/searchlib/common/allocatedbitvector.h create mode 100644 searchlib/src/vespa/searchlib/common/base.h create mode 100644 searchlib/src/vespa/searchlib/common/bitvector.cpp create mode 100644 searchlib/src/vespa/searchlib/common/bitvector.h create mode 100644 searchlib/src/vespa/searchlib/common/bitvectorcache.cpp create mode 100644 searchlib/src/vespa/searchlib/common/bitvectorcache.h create mode 100644 searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp create mode 100644 searchlib/src/vespa/searchlib/common/bitvectoriterator.h create mode 100644 searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp create mode 100644 searchlib/src/vespa/searchlib/common/condensedbitvectors.h create mode 100644 searchlib/src/vespa/searchlib/common/converters.h create mode 100644 searchlib/src/vespa/searchlib/common/docstamp.h create mode 100644 searchlib/src/vespa/searchlib/common/documentlocations.cpp create mode 100644 searchlib/src/vespa/searchlib/common/documentlocations.h create mode 100644 searchlib/src/vespa/searchlib/common/documentsummary.cpp create mode 100644 searchlib/src/vespa/searchlib/common/documentsummary.h create mode 100644 searchlib/src/vespa/searchlib/common/feature.h create mode 100644 searchlib/src/vespa/searchlib/common/featureset.cpp create mode 100644 searchlib/src/vespa/searchlib/common/featureset.h create mode 100644 searchlib/src/vespa/searchlib/common/fileheadercontext.cpp create mode 100644 searchlib/src/vespa/searchlib/common/fileheadercontext.h create mode 100644 searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp create mode 100644 searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h create mode 100644 searchlib/src/vespa/searchlib/common/fslimits.h create mode 100644 searchlib/src/vespa/searchlib/common/gid.h create mode 100644 searchlib/src/vespa/searchlib/common/growablebitvector.cpp create mode 100644 searchlib/src/vespa/searchlib/common/growablebitvector.h create mode 100644 searchlib/src/vespa/searchlib/common/hitrank.h create mode 100644 searchlib/src/vespa/searchlib/common/identifiable.h create mode 100644 searchlib/src/vespa/searchlib/common/idestructorcallback.h create mode 100644 searchlib/src/vespa/searchlib/common/idocumentmetastore.h create mode 100644 searchlib/src/vespa/searchlib/common/indexmetainfo.cpp create mode 100644 searchlib/src/vespa/searchlib/common/indexmetainfo.h create mode 100644 searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h create mode 100644 searchlib/src/vespa/searchlib/common/lambdatask.h create mode 100644 searchlib/src/vespa/searchlib/common/lid_usage_stats.h create mode 100644 searchlib/src/vespa/searchlib/common/location.cpp create mode 100644 searchlib/src/vespa/searchlib/common/location.h create mode 100644 searchlib/src/vespa/searchlib/common/locationiterators.cpp create mode 100644 searchlib/src/vespa/searchlib/common/locationiterators.h create mode 100644 searchlib/src/vespa/searchlib/common/mapnames.cpp create mode 100644 searchlib/src/vespa/searchlib/common/mapnames.h create mode 100644 searchlib/src/vespa/searchlib/common/packets.cpp create mode 100644 searchlib/src/vespa/searchlib/common/packets.h create mode 100644 searchlib/src/vespa/searchlib/common/partialbitvector.cpp create mode 100644 searchlib/src/vespa/searchlib/common/partialbitvector.h create mode 100644 searchlib/src/vespa/searchlib/common/range.h create mode 100644 searchlib/src/vespa/searchlib/common/rankedhit.h create mode 100644 searchlib/src/vespa/searchlib/common/rcuvector.h create mode 100644 searchlib/src/vespa/searchlib/common/reserved.h create mode 100644 searchlib/src/vespa/searchlib/common/resultset.cpp create mode 100644 searchlib/src/vespa/searchlib/common/resultset.h create mode 100644 searchlib/src/vespa/searchlib/common/scheduletaskcallback.h create mode 100644 searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp create mode 100644 searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h create mode 100644 searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h create mode 100644 searchlib/src/vespa/searchlib/common/serialnum.h create mode 100644 searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp create mode 100644 searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h create mode 100644 searchlib/src/vespa/searchlib/common/sort.cpp create mode 100644 searchlib/src/vespa/searchlib/common/sort.h create mode 100644 searchlib/src/vespa/searchlib/common/sortdata.cpp create mode 100644 searchlib/src/vespa/searchlib/common/sortdata.h create mode 100644 searchlib/src/vespa/searchlib/common/sortresults.cpp create mode 100644 searchlib/src/vespa/searchlib/common/sortresults.h create mode 100644 searchlib/src/vespa/searchlib/common/sortspec.cpp create mode 100644 searchlib/src/vespa/searchlib/common/sortspec.h create mode 100644 searchlib/src/vespa/searchlib/common/transport.h create mode 100644 searchlib/src/vespa/searchlib/common/tunefileinfo.cpp create mode 100644 searchlib/src/vespa/searchlib/common/tunefileinfo.h create mode 100644 searchlib/src/vespa/searchlib/config/.gitignore create mode 100644 searchlib/src/vespa/searchlib/config/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/config/translogserver.def create mode 100644 searchlib/src/vespa/searchlib/diskindex/.gitignore create mode 100644 searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/diskindex/OWNERS create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/checkpointfile.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/diskindex.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/diskindex.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/docidmapper.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/extposocc.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/extposocc.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/fieldreader.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/fieldwriter.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/fileheader.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/fileheader.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/fusion.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/fusion.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/indexbuilder.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/pagedict4file.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/wordnummapper.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposocc.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposting.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcposting.h create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp create mode 100644 searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h create mode 100644 searchlib/src/vespa/searchlib/docstore/.gitignore create mode 100644 searchlib/src/vespa/searchlib/docstore/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/docstore/OWNERS create mode 100644 searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/bytecomplens.h create mode 100644 searchlib/src/vespa/searchlib/docstore/cachestats.h create mode 100644 searchlib/src/vespa/searchlib/docstore/chunk.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/chunk.h create mode 100644 searchlib/src/vespa/searchlib/docstore/chunkformat.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/chunkformat.h create mode 100644 searchlib/src/vespa/searchlib/docstore/chunkformats.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/chunkformats.h create mode 100644 searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h create mode 100644 searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h create mode 100644 searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h create mode 100644 searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h create mode 100644 searchlib/src/vespa/searchlib/docstore/documentstore.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/documentstore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/filechunk.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/filechunk.h create mode 100644 searchlib/src/vespa/searchlib/docstore/ibucketizer.h create mode 100644 searchlib/src/vespa/searchlib/docstore/idatastore.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/idatastore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/idocumentstore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/liddatastore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/logdatastore.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/logdatastore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/logdocumentstore.h create mode 100644 searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp create mode 100644 searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h create mode 100644 searchlib/src/vespa/searchlib/engine/.gitignore create mode 100644 searchlib/src/vespa/searchlib/engine/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/engine/OWNERS create mode 100755 searchlib/src/vespa/searchlib/engine/create-class-cpp.sh create mode 100644 searchlib/src/vespa/searchlib/engine/create-class-h.sh create mode 100644 searchlib/src/vespa/searchlib/engine/create-interface.sh create mode 100644 searchlib/src/vespa/searchlib/engine/docsumapi.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/docsumapi.h create mode 100644 searchlib/src/vespa/searchlib/engine/docsumreply.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/docsumreply.h create mode 100644 searchlib/src/vespa/searchlib/engine/docsumrequest.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/docsumrequest.h create mode 100644 searchlib/src/vespa/searchlib/engine/errorcodes.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/errorcodes.h create mode 100644 searchlib/src/vespa/searchlib/engine/monitorapi.h create mode 100644 searchlib/src/vespa/searchlib/engine/monitorreply.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/monitorreply.h create mode 100644 searchlib/src/vespa/searchlib/engine/monitorrequest.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/monitorrequest.h create mode 100644 searchlib/src/vespa/searchlib/engine/packetconverter.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/packetconverter.h create mode 100644 searchlib/src/vespa/searchlib/engine/propertiesmap.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/propertiesmap.h create mode 100644 searchlib/src/vespa/searchlib/engine/request.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/request.h create mode 100644 searchlib/src/vespa/searchlib/engine/searchapi.h create mode 100644 searchlib/src/vespa/searchlib/engine/searchreply.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/searchreply.h create mode 100644 searchlib/src/vespa/searchlib/engine/searchrequest.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/searchrequest.h create mode 100644 searchlib/src/vespa/searchlib/engine/source_description.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/source_description.h create mode 100644 searchlib/src/vespa/searchlib/engine/tracereply.h create mode 100644 searchlib/src/vespa/searchlib/engine/transport_metrics.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/transport_metrics.h create mode 100644 searchlib/src/vespa/searchlib/engine/transportserver.cpp create mode 100644 searchlib/src/vespa/searchlib/engine/transportserver.h create mode 100644 searchlib/src/vespa/searchlib/expression/.gitignore create mode 100644 searchlib/src/vespa/searchlib/expression/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/expression/OWNERS create mode 100644 searchlib/src/vespa/searchlib/expression/addfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/aggregationrefnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/andfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/arrayoperationnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/attributenode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/attributenode.h create mode 100644 searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/bitfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/bucketresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/catfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/catserializer.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/catserializer.h create mode 100644 searchlib/src/vespa/searchlib/expression/constantnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/dividefunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/documentaccessornode.h create mode 100644 searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/documentfieldnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/enumresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/expressionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/expressiontree.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/expressiontree.h create mode 100644 searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/floatresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/forcelink.hpp create mode 100644 searchlib/src/vespa/searchlib/expression/functionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/functionnodes.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/integerresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/mathfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/maxfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/minfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/modulofunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/negatefunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/nullresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/numericfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/numericresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/orfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/perdocexpression.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/rangebucketpredef.h create mode 100644 searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/rawresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/relevancenode.h create mode 100644 searchlib/src/vespa/searchlib/expression/resultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/resultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/resultnodes.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/resultvector.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/resultvector.h create mode 100644 searchlib/src/vespa/searchlib/expression/reversefunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/serializer.h create mode 100644 searchlib/src/vespa/searchlib/expression/singleresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/sortfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/strcatserializer.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/strcatserializer.h create mode 100644 searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/stringresultnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/timestamp.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/timestamp.h create mode 100644 searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/tointfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/torawfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/ucafunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/xorfunctionnode.h create mode 100644 searchlib/src/vespa/searchlib/expression/zcurve.cpp create mode 100644 searchlib/src/vespa/searchlib/expression/zcurve.h create mode 100644 searchlib/src/vespa/searchlib/features/.gitignore create mode 100644 searchlib/src/vespa/searchlib/features/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/features/OWNERS create mode 100644 searchlib/src/vespa/searchlib/features/agefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/agefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/array_parser.cpp create mode 100644 searchlib/src/vespa/searchlib/features/array_parser.h create mode 100644 searchlib/src/vespa/searchlib/features/array_parser.hpp create mode 100644 searchlib/src/vespa/searchlib/features/attributefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/attributefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/attributematchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/attributematchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/closenessfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/closenessfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/constant_tensor_executor.h create mode 100755 searchlib/src/vespa/searchlib/features/create-class-cpp.sh create mode 100644 searchlib/src/vespa/searchlib/features/create-class-h.sh create mode 100644 searchlib/src/vespa/searchlib/features/debug_attribute_wait.cpp create mode 100644 searchlib/src/vespa/searchlib/features/debug_attribute_wait.h create mode 100644 searchlib/src/vespa/searchlib/features/debug_wait.cpp create mode 100644 searchlib/src/vespa/searchlib/features/debug_wait.h create mode 100644 searchlib/src/vespa/searchlib/features/distancefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/distancefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/distancetopathfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/dotproductfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/dotproductfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/element_completeness_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/element_similarity_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldinfofeature.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldlengthfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/computer.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/params.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldmatchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/firstphasefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/firstphasefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/flow_completeness_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/foreachfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/foreachfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/freshnessfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/freshnessfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/item_raw_score_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/logarithmcalculator.h create mode 100644 searchlib/src/vespa/searchlib/features/matchesfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/matchesfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/matchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/matchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/native_dot_product_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/nativeproximityfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/nativerankfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/nativerankfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/nowfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/nowfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/proximityfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/proximityfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/querycompletenessfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/queryfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/queryfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/queryterm.cpp create mode 100644 searchlib/src/vespa/searchlib/features/queryterm.h create mode 100644 searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/querytermcountfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/randomfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/randomfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/raw_score_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/raw_score_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/reverseproximityfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/setup.cpp create mode 100644 searchlib/src/vespa/searchlib/features/setup.h create mode 100644 searchlib/src/vespa/searchlib/features/subqueries_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/subqueries_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/term_field_md_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp create mode 100644 searchlib/src/vespa/searchlib/features/termdistancecalculator.h create mode 100644 searchlib/src/vespa/searchlib/features/termdistancefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/termdistancefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/termeditdistancefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/termfeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/termfeature.h create mode 100644 searchlib/src/vespa/searchlib/features/terminfofeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/terminfofeature.h create mode 100644 searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/text_similarity_feature.h create mode 100644 searchlib/src/vespa/searchlib/features/utils.cpp create mode 100644 searchlib/src/vespa/searchlib/features/utils.h create mode 100644 searchlib/src/vespa/searchlib/features/valuefeature.cpp create mode 100644 searchlib/src/vespa/searchlib/features/valuefeature.h create mode 100644 searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp create mode 100644 searchlib/src/vespa/searchlib/features/weighted_set_parser.h create mode 100644 searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp create mode 100644 searchlib/src/vespa/searchlib/fef/.gitignore create mode 100644 searchlib/src/vespa/searchlib/fef/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/fef/Doxyfile create mode 100644 searchlib/src/vespa/searchlib/fef/OWNERS create mode 100644 searchlib/src/vespa/searchlib/fef/blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/blueprint.h create mode 100644 searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/blueprintfactory.h create mode 100644 searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/blueprintresolver.h create mode 100644 searchlib/src/vespa/searchlib/fef/collection_type.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/collection_type.h create mode 100755 searchlib/src/vespa/searchlib/fef/create-class-cpp.sh create mode 100644 searchlib/src/vespa/searchlib/fef/create-class-h.sh create mode 100644 searchlib/src/vespa/searchlib/fef/create-fef-includes.sh create mode 100644 searchlib/src/vespa/searchlib/fef/create-interface.sh create mode 100755 searchlib/src/vespa/searchlib/fef/dist_doc_hp.sh create mode 100644 searchlib/src/vespa/searchlib/fef/feature_type.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/feature_type.h create mode 100644 searchlib/src/vespa/searchlib/fef/featureexecutor.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/featureexecutor.h create mode 100644 searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/featurenamebuilder.h create mode 100644 searchlib/src/vespa/searchlib/fef/featurenameparser.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/featurenameparser.h create mode 100644 searchlib/src/vespa/searchlib/fef/featureoverrider.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/featureoverrider.h create mode 100644 searchlib/src/vespa/searchlib/fef/fef.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/fef.h create mode 100644 searchlib/src/vespa/searchlib/fef/fieldinfo.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/fieldinfo.h create mode 100644 searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h create mode 100644 searchlib/src/vespa/searchlib/fef/fieldtype.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/fieldtype.h create mode 100644 searchlib/src/vespa/searchlib/fef/filetablefactory.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/filetablefactory.h create mode 100644 searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/functiontablefactory.h create mode 100644 searchlib/src/vespa/searchlib/fef/handle.h create mode 100644 searchlib/src/vespa/searchlib/fef/iblueprintregistry.h create mode 100644 searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h create mode 100644 searchlib/src/vespa/searchlib/fef/iindexenvironment.h create mode 100644 searchlib/src/vespa/searchlib/fef/indexproperties.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/indexproperties.h create mode 100644 searchlib/src/vespa/searchlib/fef/iqueryenvironment.h create mode 100644 searchlib/src/vespa/searchlib/fef/itablefactory.h create mode 100644 searchlib/src/vespa/searchlib/fef/itablemanager.h create mode 100644 searchlib/src/vespa/searchlib/fef/itermdata.h create mode 100644 searchlib/src/vespa/searchlib/fef/itermfielddata.h create mode 100644 searchlib/src/vespa/searchlib/fef/location.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/location.h create mode 100644 searchlib/src/vespa/searchlib/fef/matchdata.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/matchdata.h create mode 100644 searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/matchdatalayout.h create mode 100644 searchlib/src/vespa/searchlib/fef/objectstore.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/objectstore.h create mode 100644 searchlib/src/vespa/searchlib/fef/parameter.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/parameter.h create mode 100644 searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/parameterdescriptions.h create mode 100644 searchlib/src/vespa/searchlib/fef/parametervalidator.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/parametervalidator.h create mode 100644 searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/phrasesplitter.h create mode 100644 searchlib/src/vespa/searchlib/fef/properties.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/properties.h create mode 100644 searchlib/src/vespa/searchlib/fef/queryproperties.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/queryproperties.h create mode 100644 searchlib/src/vespa/searchlib/fef/rank_program.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/rank_program.h create mode 100644 searchlib/src/vespa/searchlib/fef/ranksetup.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/ranksetup.h create mode 100644 searchlib/src/vespa/searchlib/fef/simpletermdata.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/simpletermdata.h create mode 100644 searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/simpletermfielddata.h create mode 100644 searchlib/src/vespa/searchlib/fef/sumexecutor.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/sumexecutor.h create mode 100644 searchlib/src/vespa/searchlib/fef/symmetrictable.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/symmetrictable.h create mode 100644 searchlib/src/vespa/searchlib/fef/table.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/table.h create mode 100644 searchlib/src/vespa/searchlib/fef/tablemanager.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/tablemanager.h create mode 100644 searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h create mode 100644 searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h create mode 100644 searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h create mode 100644 searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/.gitignore create mode 100644 searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/featuretest.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/featuretest.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/ftlib.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/ftlib.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/indexenvironment.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/chain.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/double.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/query.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/setup.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/plugin/sum.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/queryenvironment.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h create mode 100644 searchlib/src/vespa/searchlib/fef/test/rankresult.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/test/rankresult.h create mode 100644 searchlib/src/vespa/searchlib/fef/utils.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/utils.h create mode 100644 searchlib/src/vespa/searchlib/fef/verify_feature.cpp create mode 100644 searchlib/src/vespa/searchlib/fef/verify_feature.h create mode 100644 searchlib/src/vespa/searchlib/grouping/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/grouping/OWNERS create mode 100644 searchlib/src/vespa/searchlib/grouping/collect.cpp create mode 100644 searchlib/src/vespa/searchlib/grouping/collect.h create mode 100644 searchlib/src/vespa/searchlib/grouping/forcelink.hpp create mode 100644 searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp create mode 100644 searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h create mode 100644 searchlib/src/vespa/searchlib/grouping/groupengine.cpp create mode 100644 searchlib/src/vespa/searchlib/grouping/groupengine.h create mode 100644 searchlib/src/vespa/searchlib/grouping/groupingengine.cpp create mode 100644 searchlib/src/vespa/searchlib/grouping/groupingengine.h create mode 100644 searchlib/src/vespa/searchlib/grouping/groupref.h create mode 100644 searchlib/src/vespa/searchlib/grouping/hyperloglog.h create mode 100644 searchlib/src/vespa/searchlib/grouping/sketch.h create mode 100644 searchlib/src/vespa/searchlib/index/.gitignore create mode 100644 searchlib/src/vespa/searchlib/index/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/index/OWNERS create mode 100644 searchlib/src/vespa/searchlib/index/bitvectorkeys.h create mode 100644 searchlib/src/vespa/searchlib/index/dictionaryfile.cpp create mode 100644 searchlib/src/vespa/searchlib/index/dictionaryfile.h create mode 100644 searchlib/src/vespa/searchlib/index/docbuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/index/docbuilder.h create mode 100644 searchlib/src/vespa/searchlib/index/docidandfeatures.cpp create mode 100644 searchlib/src/vespa/searchlib/index/docidandfeatures.h create mode 100644 searchlib/src/vespa/searchlib/index/doctypebuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/index/doctypebuilder.h create mode 100644 searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp create mode 100644 searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h create mode 100644 searchlib/src/vespa/searchlib/index/indexbuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/index/indexbuilder.h create mode 100644 searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp create mode 100644 searchlib/src/vespa/searchlib/index/olddictionaryfile.h create mode 100644 searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp create mode 100644 searchlib/src/vespa/searchlib/index/postinglistcountfile.h create mode 100644 searchlib/src/vespa/searchlib/index/postinglistcounts.cpp create mode 100644 searchlib/src/vespa/searchlib/index/postinglistcounts.h create mode 100644 searchlib/src/vespa/searchlib/index/postinglistfile.cpp create mode 100644 searchlib/src/vespa/searchlib/index/postinglistfile.h create mode 100644 searchlib/src/vespa/searchlib/index/postinglisthandle.cpp create mode 100644 searchlib/src/vespa/searchlib/index/postinglisthandle.h create mode 100644 searchlib/src/vespa/searchlib/index/postinglistparams.cpp create mode 100644 searchlib/src/vespa/searchlib/index/postinglistparams.h create mode 100644 searchlib/src/vespa/searchlib/index/schemautil.cpp create mode 100644 searchlib/src/vespa/searchlib/index/schemautil.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/.gitignore create mode 100644 searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/memoryindex/OWNERS create mode 100644 searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/dictionary.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/document_remover.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/documentinverter.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/featurestore.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/memoryindex.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/postingiterator.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h create mode 100644 searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp create mode 100644 searchlib/src/vespa/searchlib/memoryindex/wordstore.h create mode 100644 searchlib/src/vespa/searchlib/parsequery/.gitignore create mode 100644 searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/parsequery/OWNERS create mode 100644 searchlib/src/vespa/searchlib/parsequery/parse.cpp create mode 100644 searchlib/src/vespa/searchlib/parsequery/parse.h create mode 100644 searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp create mode 100644 searchlib/src/vespa/searchlib/parsequery/simplequerystack.h create mode 100644 searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp create mode 100644 searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h create mode 100644 searchlib/src/vespa/searchlib/predicate/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/predicate/OWNERS create mode 100644 searchlib/src/vespa/searchlib/predicate/document_features_store.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/document_features_store.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_hash.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_index.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_index.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_interval.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h create mode 100644 searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h create mode 100644 searchlib/src/vespa/searchlib/predicate/simple_index.cpp create mode 100644 searchlib/src/vespa/searchlib/predicate/simple_index.h create mode 100644 searchlib/src/vespa/searchlib/predicate/simple_index.hpp create mode 100644 searchlib/src/vespa/searchlib/predicate/tree_crumbs.h create mode 100644 searchlib/src/vespa/searchlib/query/.gitignore create mode 100644 searchlib/src/vespa/searchlib/query/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/query/OWNERS create mode 100644 searchlib/src/vespa/searchlib/query/base.cpp create mode 100644 searchlib/src/vespa/searchlib/query/base.h create mode 100644 searchlib/src/vespa/searchlib/query/posocc.h create mode 100644 searchlib/src/vespa/searchlib/query/query.cpp create mode 100644 searchlib/src/vespa/searchlib/query/query.h create mode 100644 searchlib/src/vespa/searchlib/query/querynode.cpp create mode 100644 searchlib/src/vespa/searchlib/query/querynode.h create mode 100644 searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp create mode 100644 searchlib/src/vespa/searchlib/query/querynoderesultbase.h create mode 100644 searchlib/src/vespa/searchlib/query/queryterm.cpp create mode 100644 searchlib/src/vespa/searchlib/query/queryterm.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/.gitignore create mode 100644 searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/query/tree/OWNERS create mode 100644 searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/intermediate.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/intermediate.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/location.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/location.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/node.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/point.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/querybuilder.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/querynodemixin.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/queryreplicator.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/querytreecreator.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/queryvisitor.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/range.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/range.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/rectangle.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/simplequery.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/term.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/term.h create mode 100644 searchlib/src/vespa/searchlib/query/tree/termnodes.cpp create mode 100644 searchlib/src/vespa/searchlib/query/tree/termnodes.h create mode 100644 searchlib/src/vespa/searchlib/query/weight.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/.gitignore create mode 100644 searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/queryeval/OWNERS create mode 100644 searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/andnotsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/andsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/andsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h create mode 100755 searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh create mode 100644 searchlib/src/vespa/searchlib/queryeval/create-class-h.sh create mode 100644 searchlib/src/vespa/searchlib/queryeval/create-interface.sh create mode 100644 searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/dot_product_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/emptysearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/equivsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_result.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_result.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/fake_searchable.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/field_spec.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/field_spec.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/hitcollector.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/irequestcontext.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/isourceselector.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/iterator_pack.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/iterators.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/iterators.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/multisearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/multisearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/nearsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/orlikesearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/orsearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/orsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/posting_info.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/predicate_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/ranksearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/scores.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/searchable.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/searchable.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/searchiterator.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/simpleresult.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/simplesearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/split_float.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/split_float.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/termasstring.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/termasstring.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/termwise_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/leafspec.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/test/wandspec.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/truesearch.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/truesearch.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/unpackinfo.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h create mode 100644 searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp create mode 100644 searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h create mode 100644 searchlib/src/vespa/searchlib/test/.gitignore create mode 100644 searchlib/src/vespa/searchlib/test/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/test/OWNERS create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/.gitignore create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp create mode 100644 searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h create mode 100644 searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp create mode 100644 searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/.gitignore create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakeword.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp create mode 100644 searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h create mode 100644 searchlib/src/vespa/searchlib/test/initrange.cpp create mode 100644 searchlib/src/vespa/searchlib/test/initrange.h create mode 100644 searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h create mode 100644 searchlib/src/vespa/searchlib/test/statefile.cpp create mode 100644 searchlib/src/vespa/searchlib/test/statefile.h create mode 100644 searchlib/src/vespa/searchlib/test/statestring.cpp create mode 100644 searchlib/src/vespa/searchlib/test/statestring.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/.gitignore create mode 100644 searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/transactionlog/OWNERS create mode 100644 searchlib/src/vespa/searchlib/transactionlog/common.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/common.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/domain.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/domain.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/domainpart.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/session.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/session.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/syncproxy.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogclient.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogserver.h create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp create mode 100644 searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h create mode 100644 searchlib/src/vespa/searchlib/util/.gitignore create mode 100644 searchlib/src/vespa/searchlib/util/CMakeLists.txt create mode 100644 searchlib/src/vespa/searchlib/util/bufferwriter.cpp create mode 100644 searchlib/src/vespa/searchlib/util/bufferwriter.h create mode 100644 searchlib/src/vespa/searchlib/util/comprbuffer.cpp create mode 100644 searchlib/src/vespa/searchlib/util/comprbuffer.h create mode 100644 searchlib/src/vespa/searchlib/util/comprfile.cpp create mode 100644 searchlib/src/vespa/searchlib/util/comprfile.h create mode 100644 searchlib/src/vespa/searchlib/util/dirtraverse.cpp create mode 100644 searchlib/src/vespa/searchlib/util/dirtraverse.h create mode 100644 searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp create mode 100644 searchlib/src/vespa/searchlib/util/drainingbufferwriter.h create mode 100644 searchlib/src/vespa/searchlib/util/filealign.cpp create mode 100644 searchlib/src/vespa/searchlib/util/filealign.h create mode 100644 searchlib/src/vespa/searchlib/util/fileheadertk.cpp create mode 100644 searchlib/src/vespa/searchlib/util/fileheadertk.h create mode 100644 searchlib/src/vespa/searchlib/util/filekit.cpp create mode 100644 searchlib/src/vespa/searchlib/util/filekit.h create mode 100644 searchlib/src/vespa/searchlib/util/filesizecalculator.cpp create mode 100644 searchlib/src/vespa/searchlib/util/filesizecalculator.h create mode 100644 searchlib/src/vespa/searchlib/util/fileutil.cpp create mode 100644 searchlib/src/vespa/searchlib/util/fileutil.h create mode 100644 searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp create mode 100644 searchlib/src/vespa/searchlib/util/foldedstringcompare.h create mode 100644 searchlib/src/vespa/searchlib/util/inline.h create mode 100644 searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp create mode 100644 searchlib/src/vespa/searchlib/util/ioerrorhandler.h create mode 100644 searchlib/src/vespa/searchlib/util/logutil.cpp create mode 100644 searchlib/src/vespa/searchlib/util/logutil.h create mode 100644 searchlib/src/vespa/searchlib/util/memorytub.h create mode 100644 searchlib/src/vespa/searchlib/util/memorytub_impl.h create mode 100644 searchlib/src/vespa/searchlib/util/memoryusage.h create mode 100644 searchlib/src/vespa/searchlib/util/postingpriorityqueue.h create mode 100644 searchlib/src/vespa/searchlib/util/rand48.h create mode 100644 searchlib/src/vespa/searchlib/util/randomgenerator.h create mode 100644 searchlib/src/vespa/searchlib/util/rawbuf.cpp create mode 100644 searchlib/src/vespa/searchlib/util/rawbuf.h create mode 100644 searchlib/src/vespa/searchlib/util/runnable.h create mode 100644 searchlib/src/vespa/searchlib/util/searchable_stats.h create mode 100644 searchlib/src/vespa/searchlib/util/sigbushandler.cpp create mode 100644 searchlib/src/vespa/searchlib/util/sigbushandler.h create mode 100644 searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp create mode 100644 searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h create mode 100644 searchlib/src/vespa/searchlib/util/sort.h create mode 100644 searchlib/src/vespa/searchlib/util/statebuf.cpp create mode 100644 searchlib/src/vespa/searchlib/util/statebuf.h create mode 100644 searchlib/src/vespa/searchlib/util/statefile.cpp create mode 100644 searchlib/src/vespa/searchlib/util/statefile.h create mode 100644 searchlib/src/vespa/searchlib/util/stringenum.cpp create mode 100644 searchlib/src/vespa/searchlib/util/stringenum.h create mode 100644 searchlib/src/vespa/searchlib/util/url.cpp create mode 100644 searchlib/src/vespa/searchlib/util/url.h (limited to 'searchlib/src') diff --git a/searchlib/src/.gitignore b/searchlib/src/.gitignore new file mode 100644 index 00000000000..3e2fb17989e --- /dev/null +++ b/searchlib/src/.gitignore @@ -0,0 +1,7 @@ +*.dsp +*.mak +Makefile.ini +config_command.sh +html +latex +project.dsw diff --git a/searchlib/src/Doxyfile b/searchlib/src/Doxyfile new file mode 100644 index 00000000000..931ba9fba8e --- /dev/null +++ b/searchlib/src/Doxyfile @@ -0,0 +1,1162 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Doxyfile 1.3.9.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = SearchLib + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of source +# files, where putting all generated files in the same directory would otherwise +# cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, +# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, +# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, +# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, +# Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# This tag can be used to specify the encoding used in the generated output. +# The encoding is not always determined by the language that is chosen, +# but also whether or not the output is meant for Windows or non-Windows users. +# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES +# forces the Windows encoding (this is the default for the Windows binary), +# whereas setting the tag to NO uses a Unix-style encoding (the default for +# all platforms other than Windows). + +USE_WINDOWS_ENCODING = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is used +# as the annotated text. Otherwise, the brief description is used as-is. If left +# blank, the following values are used ("$name" is automatically replaced with the +# name of the entity): "The $name class" "The $name widget" "The $name file" +# "is" "provides" "specifies" "contains" "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited +# members of a class in the documentation of that class as if those members were +# ordinary class members. Constructors, destructors and assignment operators of +# the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources +# only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. + +SHOW_DIRECTORIES = YES + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = searchlib + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp +# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories +# that are symbolic links (a Unix filesystem feature) are excluded from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = IAM_DOXYGEN + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse the +# parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. Note that this +# option is superseded by the HAVE_DOT option below. This is only a fallback. It is +# recommended to install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes that +# lay further from the root node will be omitted. Note that setting this option to +# 1 or 2 may greatly reduce the computation time needed for large code bases. Also +# note that a graph may be further truncated if the graph's image dimensions are +# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT). +# If 0 is used for the depth value (the default), the graph is not depth-constrained. + +MAX_DOT_GRAPH_DEPTH = 0 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/searchlib/src/apps/docstore/.gitignore b/searchlib/src/apps/docstore/.gitignore new file mode 100644 index 00000000000..395e6ce624b --- /dev/null +++ b/searchlib/src/apps/docstore/.gitignore @@ -0,0 +1,6 @@ +/.depend +/Makefile +/vespa-verify-logdatastore +/vespa-documentstore-inspect +/vespa-documentstore-benchmark +/vespa-create-idx-from-dat diff --git a/searchlib/src/apps/docstore/CMakeLists.txt b/searchlib/src/apps/docstore/CMakeLists.txt new file mode 100644 index 00000000000..971d11ea4cc --- /dev/null +++ b/searchlib/src/apps/docstore/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_vespa-verify-logdatastore_app + SOURCES + verifylogdatastore.cpp + OUTPUT_NAME vespa-verify-logdatastore + INSTALL bin + DEPENDS + searchlib +) +vespa_add_executable(searchlib_vespa-documentstore-inspect_app + SOURCES + documentstoreinspect.cpp + OUTPUT_NAME vespa-documentstore-inspect + INSTALL bin + DEPENDS + searchlib +) +vespa_add_executable(searchlib_vespa-documentstore-benchmark_app + SOURCES + benchmarkdatastore.cpp + OUTPUT_NAME vespa-documentstore-benchmark + INSTALL bin + DEPENDS + searchlib +) +vespa_add_executable(searchlib_vespa-create-idx-from-dat_app + SOURCES + create-idx-from-dat.cpp + OUTPUT_NAME vespa-create-idx-from-dat + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/docstore/benchmarkdatastore.cpp b/searchlib/src/apps/docstore/benchmarkdatastore.cpp new file mode 100644 index 00000000000..1281e0d11b3 --- /dev/null +++ b/searchlib/src/apps/docstore/benchmarkdatastore.cpp @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP("documentstore.benchmark"); + +using namespace search; + +class BenchmarkDataStoreApp : public FastOS_Application +{ + void usage(void); + int benchmark(const vespalib::string & directory, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType); + int Main(void); + void read(size_t numReads, size_t perChunk, const IDataStore * dataStore); +}; + + + +void +BenchmarkDataStoreApp::usage(void) +{ + printf("Usage: %s \n", _argv[0]); + fflush(stdout); +} + +int +BenchmarkDataStoreApp::Main(void) +{ + if (_argc >= 2) { + size_t numThreads(16); + size_t numReads(1000000); + size_t perChunk(1); + vespalib::string readType("directio"); + vespalib::string directory(_argv[1]); + if (_argc >= 3) { + numReads = strtoul(_argv[2], NULL, 0); + if (_argc >= 4) { + numThreads = strtoul(_argv[3], NULL, 0); + if (_argc >= 5) { + perChunk = strtoul(_argv[4], NULL, 0); + if (_argc >= 5) { + readType = _argv[5]; + } + } + } + } + return benchmark(directory, numReads, numThreads, perChunk, readType); + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return 0; +} + +void BenchmarkDataStoreApp::read(size_t numReads, size_t perChunk, const IDataStore * dataStore) +{ + vespalib::DataBuffer buf; + struct random_data rstate; + char state[8]; + memset(state, 0, sizeof(state)); + memset(&rstate, 0, sizeof(rstate)); + const size_t numDocs(dataStore->nextId()); + assert(numDocs > 0); + initstate_r(getpid(), state, sizeof(state), &rstate); + assert(srandom_r(getpid(), &rstate) == 0); + int32_t rnd(0); + for ( size_t i(0); i < numReads; i++) { + random_r(&rstate, &rnd); + uint32_t lid(rnd%numDocs); + for (uint32_t j(lid); j < std::min(numDocs, lid+perChunk); j++) { + dataStore->read(j, buf); + buf.clear(); + } + } +} + +int +BenchmarkDataStoreApp::benchmark(const vespalib::string & dir, size_t numReads, size_t numThreads, size_t perChunk, const vespalib::string & readType) +{ + int retval(0); + LogDataStore::Config config; + GrowStrategy growStrategy; + TuneFileSummary tuning; + if (readType == "directio") { + tuning._randRead.setWantDirectIO(); + } else if (readType == "normal") { + tuning._randRead.setWantNormal(); + } else if (readType == "mmap") { + tuning._randRead.setWantMemoryMap(); + } + search::index::DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + transactionlog::NoSyncProxy noTlSyncer; + LogDataStore store(executor, dir, config, growStrategy, tuning, + fileHeaderContext, + noTlSyncer, NULL, true); + vespalib::ThreadStackExecutor bmPool(numThreads, 128*1024); + LOG(info, "Start read benchmark with %lu threads doing %lu reads in chunks of %lu reads. Totally %lu objects", numThreads, numReads, perChunk, numThreads * numReads * perChunk); + for (size_t i(0); i < numThreads; i++) { + bmPool.execute(vespalib::makeTask(vespalib::makeClosure(this, &BenchmarkDataStoreApp::read, numReads, perChunk, static_cast(&store)))); + } + bmPool.sync(); + LOG(info, "Benchmark done."); + return retval; +} + +FASTOS_MAIN(BenchmarkDataStoreApp); diff --git a/searchlib/src/apps/docstore/create-idx-from-dat.cpp b/searchlib/src/apps/docstore/create-idx-from-dat.cpp new file mode 100644 index 00000000000..66661b6468d --- /dev/null +++ b/searchlib/src/apps/docstore/create-idx-from-dat.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include + +using namespace search; + +class CreateIdxFileFromDatApp : public FastOS_Application +{ + void usage(void); + int createIdxFile(const vespalib::string & datFileName, const vespalib::string & idxFileName); + int Main(void); +}; + + + +void +CreateIdxFileFromDatApp::usage(void) +{ + printf("Usage: %s \n", _argv[0]); + fflush(stdout); +} + +bool tryDecode(size_t chunks, size_t offset, const char * p, size_t sz, size_t nextSync) +{ + bool success(false); + for (size_t lengthError(0); !success && (sz + lengthError <= nextSync); lengthError++) { + try { + Chunk chunk(chunks, p, sz + lengthError, false); + success = true; + } catch (const vespalib::Exception & e) { + fprintf(stdout, "Chunk %ld, with size=%ld failed with lengthError %ld due to '%s'\n", offset, sz, lengthError, e.what()); + } + } + return success; +} + +bool validUncompressed(const char * n, size_t offset) { + return (n[1] == document::CompressionConfig::NONE) && + (n[2] == 0) && + (n[3] == 0) && + (n[4] == 0) && + (n[5] != 0) && + tryDecode(0, offset, n, 6ul + 4ul + uint8_t(n[5]), 6ul + 4ul + uint8_t(n[5]) + 4); +} + +bool validHead(const char * n, size_t offset) { + return (n[0] == 0) && (validUncompressed(n, offset)); +} + +int CreateIdxFileFromDatApp::createIdxFile(const vespalib::string & datFileName, const vespalib::string & idxFileName) +{ + MMapRandRead datFile(datFileName, 0, 0); + int64_t fileSize = datFile.getSize(); + uint64_t datHeaderLen = FileChunk::readDataHeader(datFile); + const char * start = static_cast(datFile.getMapping()); + const char * end = start + fileSize; + uint64_t chunks(0); + uint64_t entries(0); + uint64_t alignment(512); + FastOS_File idxFile(idxFileName.c_str()); + assert(idxFile.OpenWriteOnly()); + index::DummyFileHeaderContext fileHeaderContext; + idxFile.SetPosition(WriteableFileChunk::writeIdxHeader(fileHeaderContext, idxFile)); + fprintf(stdout, "datHeaderLen=%ld\n", datHeaderLen); + uint64_t serialNum(0); + for (const char * current(start + datHeaderLen); current < end; ) { + if (validHead(current, current-start)) { + const char * tail(current); + const char * nextStart(current+alignment); + for (; nextStart < end; nextStart+=alignment) { + if (validHead(nextStart, nextStart-start)) { + tail = nextStart; + while(*(tail-1) == 0) { + tail--; + } + if (tryDecode(chunks, current-start, current, tail - current, nextStart-current)) { + break; + } else { + fprintf(stdout, "chunk %ld possibly starting at %ld ending at %ld false sync at pos=%ld\n", + chunks, current-start, tail-start, nextStart-start); + } + } + } + if (tail == current) { + nextStart = end; + tail = end; + while(*(tail-1) == 0) { + tail--; + } + } + uint64_t sz = tail - current; + fprintf(stdout, "Most likely found chunk at offset %ld with length %ld\n", current - start, sz); + vespalib::nbostream os; + for (size_t lengthError(0); int64_t(sz+lengthError) <= nextStart-start; lengthError++) { + try { + Chunk chunk(chunks, current, sz + lengthError, false); + fprintf(stdout, "id=%d lastSerial=%ld count=%ld\n", chunk.getId(), chunk.getLastSerial(), chunk.count()); + const Chunk::LidList & lidlist = chunk.getLids(); + if (chunk.getLastSerial() < serialNum) { + fprintf(stdout, "Serial num grows down prev=%ld, current=%ld\n", serialNum, chunk.getLastSerial()); + } + serialNum = std::max(serialNum, chunk.getLastSerial()); + ChunkMeta cmeta(current-start, sz + lengthError, serialNum, chunk.count()); + cmeta.serialize(os); + for (auto it(lidlist.begin()); it != lidlist.end(); it++) { + LidMeta lm(it->getLid(), it->netSize()); + lm.serialize(os); + } + break; + } catch (const vespalib::Exception & e) { + fprintf(stdout, "Failed with lengthError %ld due to '%s'\n", lengthError, e.what()); + } + } + idxFile.Write2(os.c_str(), os.size()); + chunks++; + for(current += alignment; current < tail; current += alignment); + } else { + current += alignment; + } + //fprintf(stdout, "Next is most likely at offset %ld tail(%p)\n", current - start, tail); +/* + ChunkMeta cm; + cm.deserialize(is); + fprintf(stdout, "Chunk(%ld) : LastSerial(%ld), Entries(%d), Offset(%ld), Size(%d)\n", + chunk, cm.getLastSerial(), cm.getNumEntries(), cm.getOffset(), cm.getSize()); + for (size_t i(0), m(cm.getNumEntries()); i < m; i++, entries++) { + LidMeta lm; + lm.deserialize(is); + fprintf(stdout, "Entry(%ld.%ld) : Lid(%d), Size(%d)\n", chunk, i, lm.getLid(), lm.size()); + } +*/ + } + fprintf(stdout, "Processed %ld chunks with total entries = %ld\n", chunks, entries); + return 0; +} + +int +CreateIdxFileFromDatApp::Main(void) +{ + vespalib::string cmd; + if (_argc == 3) { + vespalib::string datFile(_argv[1]); + vespalib::string idxfile(_argv[2]); + createIdxFile(datFile, idxfile); + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return 0; +} + +FASTOS_MAIN(CreateIdxFileFromDatApp); diff --git a/searchlib/src/apps/docstore/documentstoreinspect.cpp b/searchlib/src/apps/docstore/documentstoreinspect.cpp new file mode 100644 index 00000000000..587565672c0 --- /dev/null +++ b/searchlib/src/apps/docstore/documentstoreinspect.cpp @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include + +using namespace search; + +class DocumentStoreInspectApp : public FastOS_Application +{ + void usage(void); + int verify(const vespalib::string & directory); + int dumpIdxFile(const vespalib::string & file); + int Main(void); +}; + + + +void +DocumentStoreInspectApp::usage(void) +{ + printf("Usage: %s dumpidxfile [--idxfile idxFile]\n", _argv[0]); + fflush(stdout); +} + +int DocumentStoreInspectApp::dumpIdxFile(const vespalib::string & file) +{ + FastOS_File idxFile(file.c_str()); + idxFile.enableMemoryMap(0); + if (idxFile.OpenReadOnly()) { + if (idxFile.IsMemoryMapped()) { + int64_t fileSize = idxFile.GetSize(); + uint64_t idxHeaderLen = FileChunk::readIdxHeader(idxFile); + vespalib::nbostream is(static_cast + (idxFile.MemoryMapPtr(0)) + idxHeaderLen, + fileSize - idxHeaderLen); + size_t chunk(0); + size_t entries(0); + for (; ! is.empty(); chunk++) { + ChunkMeta cm; + cm.deserialize(is); + fprintf(stdout, "Chunk(%ld) : LastSerial(%ld), Entries(%d), Offset(%ld), Size(%d)\n", + chunk, cm.getLastSerial(), cm.getNumEntries(), cm.getOffset(), cm.getSize()); + for (size_t i(0), m(cm.getNumEntries()); i < m; i++, entries++) { + LidMeta lm; + lm.deserialize(is); + fprintf(stdout, "Entry(%ld.%ld) : Lid(%d), Size(%d)\n", chunk, i, lm.getLid(), lm.size()); + } + } + fprintf(stdout, "Processed %ld chunks with total entries = %ld\n", chunk, entries); + } else { + fprintf(stderr, "Failed memorymapping file '%s' due to %s\n", idxFile.GetFileName(), idxFile.getLastErrorString().c_str()); + } + } else { + fprintf(stderr, "Failed opening file '%s' readonly due to %s\n", idxFile.GetFileName(), idxFile.getLastErrorString().c_str()); + } + return 0; +} + +int +DocumentStoreInspectApp::Main(void) +{ + vespalib::string cmd; + if (_argc >= 2) { + cmd = _argv[1]; + if (cmd == "dumpidxfile") { + vespalib::string idxfile; + if (_argc >= 4) { + if (_argv[2] == vespalib::string("--idxfile")) { + idxfile = _argv[3]; + dumpIdxFile(idxfile); + } else { + fprintf(stderr, "Unknown option '%s'.\n", _argv[2]); + usage(); + return 1; + } + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + } else { + fprintf(stderr, "Unknown command '%s'.\n", cmd.c_str()); + usage(); + return 1; + } + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return 0; +} + +int +DocumentStoreInspectApp::verify(const vespalib::string & dir) +{ + int retval(0); + + LogDataStore::Config config; + GrowStrategy growStrategy; + TuneFileSummary tuning; + search::index::DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + transactionlog::NoSyncProxy noTlSyncer; + + LogDataStore store(executor, dir, config, growStrategy, tuning, + fileHeaderContext, noTlSyncer, NULL, true); + store.verify(false); + return retval; +} + +FASTOS_MAIN(DocumentStoreInspectApp); diff --git a/searchlib/src/apps/docstore/verifylogdatastore.cpp b/searchlib/src/apps/docstore/verifylogdatastore.cpp new file mode 100644 index 00000000000..200d6051d8f --- /dev/null +++ b/searchlib/src/apps/docstore/verifylogdatastore.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include + +using namespace search; + +class VerifyLogDataStoreApp : public FastOS_Application +{ + void usage(void); + int verify(const vespalib::string & directory); + int Main(void); +}; + + + +void +VerifyLogDataStoreApp::usage(void) +{ + printf("Usage: %s \n", _argv[0]); + fflush(stdout); +} + +int +VerifyLogDataStoreApp::Main(void) +{ + if (_argc >= 2) { + vespalib::string directory(_argv[1]); + return verify(directory); + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return 0; +} + +int +VerifyLogDataStoreApp::verify(const vespalib::string & dir) +{ + int retval(0); + + LogDataStore::Config config; + GrowStrategy growStrategy; + TuneFileSummary tuning; + search::index::DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + transactionlog::NoSyncProxy noTlSyncer; + + LogDataStore store(executor, dir, config, growStrategy, tuning, + fileHeaderContext, + noTlSyncer, NULL, true); + store.verify(false); + return retval; +} + +FASTOS_MAIN(VerifyLogDataStoreApp); diff --git a/searchlib/src/apps/expgolomb/.gitignore b/searchlib/src/apps/expgolomb/.gitignore new file mode 100644 index 00000000000..0886ab154a2 --- /dev/null +++ b/searchlib/src/apps/expgolomb/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +expgolomb diff --git a/searchlib/src/apps/expgolomb/CMakeLists.txt b/searchlib/src/apps/expgolomb/CMakeLists.txt new file mode 100644 index 00000000000..230718907dd --- /dev/null +++ b/searchlib/src/apps/expgolomb/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_expgolomb_app + SOURCES + expgolomb.cpp + OUTPUT_NAME expgolomb + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/expgolomb/expgolomb.cpp b/searchlib/src/apps/expgolomb/expgolomb.cpp new file mode 100644 index 00000000000..1070a9dab8f --- /dev/null +++ b/searchlib/src/apps/expgolomb/expgolomb.cpp @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +class ExpGolombApp : public FastOS_Application +{ + void + usage(void); + + int + testExpGolomb64(int kValue); + + int + testExpGolomb64le(int kValue); + + int + Main(void); +}; + + + +void +ExpGolombApp::usage(void) +{ + printf("Usage: expgolomb testeg64 ]\n"); + fflush(stdout); +} + + +int +ExpGolombApp::testExpGolomb64(int kValue) +{ + std::vector myrand; + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + myrand.push_back(rval); + } + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + uint32_t bits = (rand() & 63); + rval &= ((UINT64_C(1) << bits) - 1); + myrand.push_back(rval); + } + typedef search::bitcompression::EncodeContext64BE EC; + + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + + int rsize = myrand.size(); + for (int i = 0; i < rsize; ++i) { + e.encodeExpGolomb(myrand[i], kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + UC64_DECODECONTEXT(o); + unsigned int length; + uint64_t val64; + UC64BE_SETUPBITS_NS(o, static_cast(wc._comprBuf), 0, EC); + + bool failure = false; + for (int i = 0; i < rsize; ++i) { + UC64BE_DECODEEXPGOLOMB(oVal, oCompr, oPreRead, oCacheInt, + kValue, EC); + if (val64 != myrand[i]) { + printf("FAILURE: TestExpGolomb64, val64=%" + PRIu64 ", myrand[%d]=%" PRIu64 "\n", + val64, i, myrand[i]); + failure = true; + } + } + if (!failure) + printf("SUCCESS: TestExpGolomb64\n"); + return failure ? 1 : 0; +} + +int +ExpGolombApp::testExpGolomb64le(int kValue) +{ + std::vector myrand; + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + myrand.push_back(rval); + } + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + uint32_t bits = (rand() & 63); + rval &= ((UINT64_C(1) << bits) - 1); + myrand.push_back(rval); + } + typedef search::bitcompression::EncodeContext64LE EC; + + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + + int rsize = myrand.size(); + for (int i = 0; i < rsize; ++i) { + e.encodeExpGolomb(myrand[i], kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + UC64_DECODECONTEXT(o); + unsigned int length; + uint64_t val64; + UC64LE_SETUPBITS_NS(o, static_cast(wc._comprBuf), 0, EC); + + bool failure = false; + for (int i = 0; i < rsize; ++i) { + UC64LE_DECODEEXPGOLOMB(oVal, oCompr, oPreRead, oCacheInt, + kValue, EC); + if (val64 != myrand[i]) { + printf("FAILURE: TestExpGolomb64le, val64=%" + PRIu64 ", myrand[%d]=%" PRIu64 "\n", + val64, i, myrand[i]); + failure = true; + } + } + if (!failure) + printf("SUCCESS: TestExpGolomb64le\n"); + return failure ? 1 : 0; +} + + +int +ExpGolombApp::Main(void) +{ + printf("Hello world\n"); + if (_argc >= 2) { + if (strcmp(_argv[1], "testeg64") == 0) { + if (_argc < 3) { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return testExpGolomb64(atoi(_argv[2])); + } else if (strcmp(_argv[1], "testeg64le") == 0) { + if (_argc < 3) { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return testExpGolomb64le(atoi(_argv[2])); + } else { + fprintf(stderr, "Wrong arguments\n"); + usage(); + return 1; + } + } else { + fprintf(stderr, "Too few arguments\n"); + usage(); + return 1; + } + return 0; +} + +FASTOS_MAIN(ExpGolombApp); + + diff --git a/searchlib/src/apps/fileheaderinspect/.gitignore b/searchlib/src/apps/fileheaderinspect/.gitignore new file mode 100644 index 00000000000..5616f8e735c --- /dev/null +++ b/searchlib/src/apps/fileheaderinspect/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +vespa-header-inspect diff --git a/searchlib/src/apps/fileheaderinspect/CMakeLists.txt b/searchlib/src/apps/fileheaderinspect/CMakeLists.txt new file mode 100644 index 00000000000..322bf6fefcb --- /dev/null +++ b/searchlib/src/apps/fileheaderinspect/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_vespa-header-inspect_app + SOURCES + fileheaderinspect.cpp + OUTPUT_NAME vespa-header-inspect + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp b/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp new file mode 100644 index 00000000000..1cd280830d9 --- /dev/null +++ b/searchlib/src/apps/fileheaderinspect/fileheaderinspect.cpp @@ -0,0 +1,223 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fileheaderinspect"); + +#include +#include +#include +#include + +using namespace vespalib; + +class Application : public FastOS_Application { +private: + vespalib::string _fileName; + char _delimiter; + bool _quiet; + + int parseOpts(); + void usage(); + void printQuiet(FileHeader &header); + void printVerbose(FileHeader &header); + vespalib::string escape(const vespalib::string &str, char quote = '\0'); + vespalib::string getTypeString(const FileHeader::Tag &tag); + vespalib::string getValueString(const FileHeader::Tag &tag); + +public: + Application(); + int Main(); +}; + +Application::Application() : + _fileName(""), + _delimiter(';'), + _quiet(false) +{ + // empty +} + + +void +Application::usage() +{ + printf("Tool for inspecting the headers of files used by Vespa.\n"); + printf("Usage: %s [options] filename\n", _argv[0]); + printf("\n"); + printf("The options are:\n"); + printf("-d delimiter The delimiter to use to separate values in quiet output.\n"); + printf("-f file The name of the file to inspect.\n"); + printf("-q Enables machine readable output.\n"); + printf("-h Shows this help page.\n"); +} + + +int +Application::parseOpts() +{ + char c = '?'; + const char *optArg = NULL; + int optInd = 0; + while ((c = GetOpt("d:f:qh", optArg, optInd)) != -1) { + switch (c) { + case 'd': + _delimiter = optArg[0]; + break; + case 'f': + _fileName = optArg; + break; + case 'q': + _quiet = true; + break; + case 'h': + usage(); + return EXIT_SUCCESS; + default: + usage(); + return EXIT_FAILURE; + } + } + if (_argc == optInd + 1) { + _fileName = _argv[optInd]; + } + if (_fileName.empty()) { + std::cerr << "No filename given." << std::endl; + return EXIT_FAILURE; + } + return ~(EXIT_SUCCESS | EXIT_FAILURE); +} + +int +Application::Main() +{ + int ret = parseOpts(); + if (ret == EXIT_FAILURE || ret == EXIT_SUCCESS) { + return ret; + } + + FastOS_File file; + if (!file.OpenReadOnly(_fileName.c_str())) { + std::cerr << "Failed to open file '" << _fileName << "'." << std::endl; + return EXIT_FAILURE; + } + + FileHeader header; + try { + header.readFile(file); + } catch (IllegalHeaderException &e) { + std::cerr << e.getMessage() << std::endl; + return EXIT_FAILURE; + } + file.Close(); + + if (_quiet) { + printQuiet(header); + } else { + printVerbose(header); + } + return EXIT_SUCCESS; +} + +void +Application::printQuiet(FileHeader &header) +{ + for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + std::cout << escape(tag.getName(), _delimiter) << _delimiter + << escape(getTypeString(tag), _delimiter) << _delimiter + << escape(getValueString(tag), _delimiter) << std::endl; + } +} + +void +Application::printVerbose(FileHeader &header) +{ + uint32_t nameWidth = 3, typeWidth = 4, valueWidth = 5; + for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + nameWidth = std::max(nameWidth, (uint32_t)tag.getName().size()); + typeWidth = std::max(typeWidth, (uint32_t)getTypeString(tag).size()); + valueWidth = std::max(valueWidth, (uint32_t)getValueString(tag).size()); + } + + vespalib::asciistream line; + line << "+" << std::string(nameWidth + 2, '-') + << "+" << std::string(typeWidth + 2, '-') + << "+" << std::string(valueWidth + 2, '-') + << "+"; + + std::cout << std::left << line.str() << std::endl; + std::cout << "| " << std::setw(nameWidth) << "Tag" << " " + << "| " << std::setw(typeWidth) << "Type" << " " + << "| " << std::setw(valueWidth)<< "Value" << " " + << "| " << std::endl; + std::cout << line.str() << std::endl; + for (uint32_t i = 0, len = header.getNumTags(); i < len; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + std::cout << "| " << std::setw(nameWidth) << escape(tag.getName()) << " " + << "| " << std::setw(typeWidth) << getTypeString(tag) << " " + << "| " << std::setw(valueWidth) << escape(getValueString(tag)) << " " + << "| " << std::endl; + } + std::cout << line.str() << std::endl; +} + +vespalib::string +Application::escape(const vespalib::string &str, char quote) +{ + vespalib::string ret = ""; + for (uint32_t i = 0, len = str.size(); i < len; ++i) { + char c = str[i]; + switch (c) { + case '\f': + ret.append("\\f"); + break; + case '\n': + ret.append("\\n"); + break; + case '\r': + ret.append("\\r"); + break; + case '\t': + ret.append("\\t"); + break; + default: + if (c != '\0' && c == quote) { + ret.append("\\"); + } + ret.push_back(c); + } + } + return ret; +} + +vespalib::string +Application::getTypeString(const FileHeader::Tag &tag) +{ + switch (tag.getType()) { + case FileHeader::Tag::TYPE_FLOAT: + return "float"; + case FileHeader::Tag::TYPE_INTEGER: + return "integer"; + case FileHeader::Tag::TYPE_STRING: + return "string"; + default: + LOG_ASSERT(tag.getType() == FileHeader::Tag::TYPE_INTEGER); + abort(); + } +} + +vespalib::string +Application::getValueString(const FileHeader::Tag &tag) +{ + vespalib::asciistream out; + out << tag; + return out.str(); +} + +int +main(int argc, char** argv) +{ + Application app; + return app.Entry(argc, argv); +} diff --git a/searchlib/src/apps/loadattribute/.gitignore b/searchlib/src/apps/loadattribute/.gitignore new file mode 100644 index 00000000000..4f008fbf84e --- /dev/null +++ b/searchlib/src/apps/loadattribute/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +loadattribute diff --git a/searchlib/src/apps/loadattribute/CMakeLists.txt b/searchlib/src/apps/loadattribute/CMakeLists.txt new file mode 100644 index 00000000000..6712519e59a --- /dev/null +++ b/searchlib/src/apps/loadattribute/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_loadattribute_app + SOURCES + loadattribute.cpp + OUTPUT_NAME loadattribute + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/loadattribute/loadattribute.cpp b/searchlib/src/apps/loadattribute/loadattribute.cpp new file mode 100644 index 00000000000..b1d1f896af8 --- /dev/null +++ b/searchlib/src/apps/loadattribute/loadattribute.cpp @@ -0,0 +1,216 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP("loadattribute"); + +namespace search { + +typedef AttributeVector::SP AttributePtr; + +class LoadAttribute : public FastOS_Application +{ +private: + void load(const AttributePtr & ptr); + void applyUpdate(const AttributePtr & ptr); + void printContent(const AttributePtr & ptr, std::ostream & os); + void usage(); + +public: + int Main(); +}; + +void +LoadAttribute::load(const AttributePtr & ptr) +{ + std::cout << "loading attribute: " << ptr->getBaseFileName() << std::endl; + ptr->load(); + std::cout << "attribute successfully loaded" << std::endl; +} + +void +LoadAttribute::applyUpdate(const AttributePtr & ptr) +{ + std::cout << "applyUpdate" << std::endl; + if (ptr->getClass().inherits(IntegerAttribute::classId)) { + IntegerAttribute * a = static_cast(ptr.get()); + if (ptr->hasMultiValue()) { + a->append(0, 123456789, 1); + } else { + a->update(0, 123456789); + } + a->commit(); + } else if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + FloatingPointAttribute * a = static_cast(ptr.get()); + if (ptr->hasMultiValue()) { + a->append(0, 123456789.5f, 1); + } else { + a->update(0, 123456789); + } + a->commit(); + } else if (ptr->getClass().inherits(StringAttribute::classId)) { + StringAttribute * a = static_cast(ptr.get()); + if (ptr->hasMultiValue()) { + a->append(0, "non-existing string value", 1); + } else { + a->update(0, "non-existing string value"); + } + a->commit(); + } +} + +void +LoadAttribute::printContent(const AttributePtr & ptr, std::ostream & os) +{ + uint32_t sz = ptr->getMaxValueCount(); + if (ptr->hasWeightedSetType()) { + AttributeVector::WeightedString * buf = new AttributeVector::WeightedString[sz]; + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, buf, sz); + assert(valueCount <= sz); + os << "doc " << doc << ": valueCount(" << valueCount << ")" << std::endl; + for (uint32_t i = 0; i < valueCount; ++i) { + os << " " << i << ": " << "[" << buf[i].getValue() << ", " << buf[i].getWeight() << "]" << std::endl; + } + } + delete [] buf; + } else { + vespalib::string *buf = new vespalib::string[ptr->getMaxValueCount()]; + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, buf, sz); + assert(valueCount <= sz); + os << "doc " << doc << ": valueCount(" << valueCount << ")" << std::endl; + for (uint32_t i = 0; i < valueCount; ++i) { + os << " " << i << ": " << "[" << buf[i] << "]" << std::endl; + } + } + delete [] buf; + } +} + +void +LoadAttribute::usage() +{ + std::cout << "usage: loadattribute [-p (print content to .out)]" << std::endl; + std::cout << " [-a (apply a single update)]" << std::endl; + std::cout << " [-s (save attribute to .save.dat)]" << std::endl; + std::cout << " " << std::endl; +} + +int +LoadAttribute::Main() +{ + bool doPrintContent = false; + bool doApplyUpdate = false; + bool doSave = false; + bool doFastSearch = false; + bool doEnableEnumeratedSave = false; + bool doHuge = false; + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("pasf:eh", arg, idx)) != -1) { + switch (opt) { + case 'p': + doPrintContent = true; + break; + case 'a': + doApplyUpdate = true; + break; + case 'e': + doEnableEnumeratedSave = true; + break; + case 'h': + doHuge = true; + break; + case 'f': + if (strcmp(arg, "search") == 0) { + doFastSearch = true; + } else { + std::cerr << "Expected 'search' or 'aggregate', got '" << + arg << "'" << std::endl; + optError = true; + } + break; + case 's': + doSave = true; + break; + default: + optError = true; + break; + } + } + + if (_argc != (idx + 1) || optError) { + usage(); + return -1; + } + + vespalib::string fileName(_argv[idx]); + vespalib::FileHeader fh; + do { + vespalib::string datFileName(fileName + ".dat"); + Fast_BufferedFile file; + file.ReadOpenExisting(datFileName.c_str()); + (void) fh.readFile(file); + } while (0); + attribute::BasicType bt(fh.getTag("datatype").asString()); + attribute::CollectionType ct(fh.getTag("collectiontype").asString()); + attribute::Config c(bt, ct); + c.setFastSearch(doFastSearch); + c.setHuge(doHuge); + AttributePtr ptr = AttributeFactory::createAttribute(fileName, c); + if (doEnableEnumeratedSave) + ptr->enableEnumeratedSave(); + AttributeVector::enableEnumeratedLoad(); + FastOS_Time timer; + timer.SetNow(); + load(ptr); + std::cout << "load time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl; + + std::cout << "numDocs: " << ptr->getNumDocs() << std::endl; + + if (doApplyUpdate) { + timer.SetNow(); + applyUpdate(ptr); + std::cout << "update time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl; + } + + if (doPrintContent) { + vespalib::string outFile(fileName + ".out"); + std::ofstream of(outFile.c_str()); + if (of.fail()) { + std::cout << "failed opening: " << fileName << ".out" << std::endl; + } + std::cout << "printContent" << std::endl; + printContent(ptr, of); + of.close(); + } + + if (doSave) { + vespalib::string saveFile = fileName + ".save"; + std::cout << "saving attribute: " << saveFile << std::endl; + timer.SetNow(); + ptr->saveAs(saveFile); + std::cout << "save time: " << timer.MilliSecsToNow() / 1000 << " seconds " << std::endl; + } + + return 0; +} + +} + +int main(int argc, char ** argv) +{ + search::LoadAttribute myApp; + return myApp.Entry(argc, argv); +} diff --git a/searchlib/src/apps/loadattribute/loadattribute.rb b/searchlib/src/apps/loadattribute/loadattribute.rb new file mode 100644 index 00000000000..d1fb5a5632c --- /dev/null +++ b/searchlib/src/apps/loadattribute/loadattribute.rb @@ -0,0 +1,43 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +attribute = ARGV[0] + +dat = File.open(attribute + ".dat", "r") +puts "opened " + attribute + ".dat" +dat_buffer = [] +dat.each_byte do |byte| + dat_buffer.push(byte) +end + +string = [] +strings = [] +dat_buffer.each do |byte| + if byte == 0 + strings.push(string.pack("c*")) + string.clear + else + string.push(byte) + end +end +puts "num strings: #{strings.size}" + +idx = File.open(attribute + ".idx", "r") +puts "opened " + attribute + ".idx" +idx_buffer = [] +while not idx.eof + idx_buffer.push((idx.read(4).unpack("I")).first) +end +puts "num docs: #{idx_buffer.size - 1}" +puts "num values: #{idx_buffer.last}" + +out = File.open(attribute + ".out", "w") +for i in 0...(idx_buffer.size - 1) + count = idx_buffer[i + 1]. - idx_buffer[i] + out.write("doc #{i}: count = #{count}\n") + for j in 0...count + if idx_buffer[i] + j >= strings.size + raise "ERROR: idx_buffer[i] + j (#{idx_buffer[i] + j}) >= strings.size (#{strings.size})" + end + out.write(" #{j}: #{strings[idx_buffer[i] + j]}\n") + end +end + diff --git a/searchlib/src/apps/tests/.gitignore b/searchlib/src/apps/tests/.gitignore new file mode 100644 index 00000000000..e05359d841e --- /dev/null +++ b/searchlib/src/apps/tests/.gitignore @@ -0,0 +1,8 @@ +/.depend +/Makefile +/biglog_test +/btreestress_test +/memoryindexstress_test +searchlib_biglog_test_app +searchlib_btreestress_test_app +searchlib_memoryindexstress_test_app diff --git a/searchlib/src/apps/tests/CMakeLists.txt b/searchlib/src/apps/tests/CMakeLists.txt new file mode 100644 index 00000000000..5c275e4cfb8 --- /dev/null +++ b/searchlib/src/apps/tests/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_biglog_test_app + SOURCES + biglogtest.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_biglog_test_app COMMAND searchlib_biglog_test_app BENCHMARK) +vespa_add_executable(searchlib_btreestress_test_app + SOURCES + btreestress_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btreestress_test_app COMMAND searchlib_btreestress_test_app BENCHMARK) +vespa_add_executable(searchlib_memoryindexstress_test_app + SOURCES + memoryindexstress_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_memoryindexstress_test_app COMMAND searchlib_memoryindexstress_test_app BENCHMARK) diff --git a/searchlib/src/apps/tests/biglogtest.cpp b/searchlib/src/apps/tests/biglogtest.cpp new file mode 100644 index 00000000000..56b695b69c0 --- /dev/null +++ b/searchlib/src/apps/tests/biglogtest.cpp @@ -0,0 +1,243 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("big_logdatastore_test"); + +#include +#include +#include +#include +#include +#include + +#include + +using namespace search; +using search::index::DummyFileHeaderContext; + +class Test : public vespalib::TestApp { +private: + struct Blob { + ssize_t sz; + char *buf; + Blob(size_t s) : sz(s), buf(s == 0 ? 0 : new char[s]) {} + }; + typedef std::map Map; + + void makeBlobs(); + void cleanBlobs(); + void checkBlobs(const IDataStore &datastore, const Map &lidToBlobMap); + + template + void testDIO(); + + std::string _dir; + std::vector _blobs; + vespalib::RandomGen _randomgenerator; + +public: + int Main() { + TEST_INIT("big_logdatastore_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + makeBlobs(); + + _dir = "logged"; + TEST_DO(testDIO()); + + cleanBlobs(); + + TEST_DONE(); + } + + Test() : _dir(""), _blobs(), _randomgenerator(42) {} +}; + +TEST_APPHOOK(Test); + + +void +Test::makeBlobs() +{ + _randomgenerator.setSeed(42); + _blobs.push_back(Blob(0)); + size_t usemem = 444222111; + while (usemem > 0) { + size_t sizeclass = 6 + _randomgenerator.nextUint32() % 20; + size_t blobsize = _randomgenerator.nextUint32() % (1< usemem) blobsize = usemem; + _blobs.push_back(Blob(blobsize)); + char *p = _blobs.back().buf; + for (size_t j=0; j < blobsize; ++j) { + *p++ = _randomgenerator.nextUint32(); + } + usemem -= blobsize; + } +} + +void +Test::cleanBlobs() +{ + printf("count %lu blobs sizes:", _blobs.size()); + while (_blobs.size() > 0) { + char *p = _blobs.back().buf; + printf(" %lu", _blobs.back().sz); + delete[] p; + _blobs.pop_back(); + } + printf("\n"); +} + + +void +Test::checkBlobs(const IDataStore &datastore, + const Map &lidToBlobMap) +{ + for (Map::const_iterator it = lidToBlobMap.begin(); + it != lidToBlobMap.end(); + ++it) + { + uint32_t lid = it->first; + uint32_t bno = it->second; + vespalib::DataBuffer got; + EXPECT_EQUAL(datastore.read(lid, got), _blobs[bno].sz); + EXPECT_TRUE(memcmp(got.getData(), _blobs[bno].buf, _blobs[bno].sz) == 0); + } +} + +struct DioTune +{ + TuneFileSummary tuning; + DioTune() { + tuning._seqRead.setWantDirectIO(); + tuning._write.setWantDirectIO(); + tuning._randRead.setWantDirectIO(); + } +}; + +template +struct factory {}; + +template <> +struct factory : DioTune +{ + DummyFileHeaderContext _fileHeaderContext; + LogDataStore::Config _config; + vespalib::ThreadStackExecutor _executor; + transactionlog::NoSyncProxy _noTlSyncer; + LogDataStore _datastore; + factory(std::string dir) + : DioTune(), + _fileHeaderContext(), + _config(), + _executor(_config.getNumThreads(), 128*1024), + _noTlSyncer(), + _datastore(_executor, dir, _config, GrowStrategy(), tuning, + _fileHeaderContext, _noTlSyncer, NULL) + {} + IDataStore & operator() () { return _datastore; } + +}; + +template +void +Test::testDIO() +{ + uint64_t serial = 0; + + FastOS_File::EmptyDirectory(_dir.c_str()); + FastOS_File::RemoveDirectory(_dir.c_str()); + EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str())); + + Map lidToBlobMap; + vespalib::DataBuffer buf; + { + factory ds(_dir); + for (uint32_t lid=0; lid<15; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + uint64_t flushToken = ds().initFlush(serial); + ds().flush(flushToken); + for (uint32_t lid=10; lid<30; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + checkBlobs(ds(), lidToBlobMap); + flushToken = ds().initFlush(serial); + ds().flush(flushToken); + checkBlobs(ds(), lidToBlobMap); + } + { + factory ds(_dir); + checkBlobs(ds(), lidToBlobMap); + + for (uint32_t lid=3; lid<8; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + for (uint32_t lid=23; lid<28; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + for (uint32_t lid=100033; lid<100088; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + checkBlobs(ds(), lidToBlobMap); + + ds().remove(++serial, 1); + lidToBlobMap[1] = 0; + ds().remove(++serial, 11); + lidToBlobMap[11] = 0; + ds().remove(++serial, 21); + lidToBlobMap[21] = 0; + ds().remove(++serial, 31); + lidToBlobMap[31] = 0; + + checkBlobs(ds(), lidToBlobMap); + uint64_t flushToken = ds().initFlush(serial); + ds().flush(flushToken); + checkBlobs(ds(), lidToBlobMap); + } + { + factory ds(_dir); + + ASSERT_TRUE(ds().read(1, buf) <= 0); + ASSERT_TRUE(ds().read(11, buf) <= 0); + ASSERT_TRUE(ds().read(21, buf) <= 0); + ASSERT_TRUE(ds().read(31, buf) <= 0); + + checkBlobs(ds(), lidToBlobMap); + uint64_t flushToken = ds().initFlush(serial); + ds().flush(flushToken); + } + { + factory ds(_dir); + checkBlobs(ds(), lidToBlobMap); + + for (uint32_t lid=1234567; lid < 1234999; ++lid) { + uint32_t blobno = _randomgenerator.nextUint32() % _blobs.size(); + lidToBlobMap[lid] = blobno; + ds().write(++serial, lid, _blobs[blobno].buf, _blobs[blobno].sz); + } + checkBlobs(ds(), lidToBlobMap); + uint64_t flushToken = ds().initFlush(22); + ds().flush(flushToken); + checkBlobs(ds(), lidToBlobMap); + } + { + factory ds(_dir); + checkBlobs(ds(), lidToBlobMap); + } + FastOS_File::EmptyDirectory(_dir.c_str()); + FastOS_File::RemoveDirectory(_dir.c_str()); + TEST_FLUSH(); +} diff --git a/searchlib/src/apps/tests/btreestress_test.cpp b/searchlib/src/apps/tests/btreestress_test.cpp new file mode 100644 index 00000000000..7e4cdf32fdd --- /dev/null +++ b/searchlib/src/apps/tests/btreestress_test.cpp @@ -0,0 +1,224 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("btreestress_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +using MyTree = search::btree::BTree; +using MyTreeIterator = typename MyTree::Iterator; +using MyTreeConstIterator = typename MyTree::ConstIterator; +using GenerationHandler = vespalib::GenerationHandler; +using search::makeLambdaTask; + +struct Fixture +{ + GenerationHandler _generationHandler; + MyTree _tree; + MyTreeIterator _writeItr; + vespalib::ThreadStackExecutor _writer; // 1 write thread + vespalib::ThreadStackExecutor _readers; // multiple reader threads + search::Rand48 _rnd; + uint32_t _keyLimit; + std::atomic _readSeed; + std::atomic _doneWriteWork; + std::atomic _doneReadWork; + std::atomic _stopRead; + bool _reportWork; + + Fixture(); + ~Fixture(); + void commit(); + void adjustWriteIterator(uint32_t key); + void insert(uint32_t key); + void remove(uint32_t key); + + void readWork(uint32_t cnt); + void readWork(); + void writeWork(uint32_t cnt); +}; + + +Fixture::Fixture() + : _generationHandler(), + _tree(), + _writeItr(_tree.begin()), + _writer(1, 128 * 1024), + _readers(4, 128 * 1024), + _rnd(), + _keyLimit(1000000), + _readSeed(50), + _doneWriteWork(0), + _doneReadWork(0), + _stopRead(0), + _reportWork(false) +{ + _rnd.srand48(32); +} + + +Fixture::~Fixture() +{ + _readers.sync(); + _readers.shutdown(); + _writer.sync(); + _writer.shutdown(); + commit(); + if (_reportWork) { + LOG(info, + "readWork=%ld, writeWork=%ld", + _doneReadWork.load(), _doneWriteWork.load()); + } +} + + +void +Fixture::commit() +{ + auto &allocator = _tree.getAllocator(); + allocator.freeze(); + allocator.transferHoldLists(_generationHandler.getCurrentGeneration()); + _generationHandler.incGeneration(); + allocator.trimHoldLists(_generationHandler.getFirstUsedGeneration()); +} + +void +Fixture::adjustWriteIterator(uint32_t key) +{ + if (_writeItr.valid() && _writeItr.getKey() < key) { + _writeItr.binarySeek(key); + } else { + _writeItr.lower_bound(key); + } +} + +void +Fixture::insert(uint32_t key) +{ + adjustWriteIterator(key); + assert(!_writeItr.valid() || _writeItr.getKey() >= key); + if (!_writeItr.valid() || _writeItr.getKey() != key) { + _tree.insert(_writeItr, key, 0u); + } +} + +void +Fixture::remove(uint32_t key) +{ + adjustWriteIterator(key); + assert(!_writeItr.valid() || _writeItr.getKey() >= key); + if (_writeItr.valid() && _writeItr.getKey() == key) { + _tree.remove(_writeItr); + } +} + + +void +Fixture::readWork(uint32_t cnt) +{ + search::Rand48 rnd; + rnd.srand48(++_readSeed); + uint32_t i; + for (i = 0; i < cnt && _stopRead.load() == 0; ++i) { + auto guard = _generationHandler.takeGuard(); + uint32_t key = rnd.lrand48() % (_keyLimit + 1); + MyTreeConstIterator itr = _tree.getFrozenView().lowerBound(key); + assert(!itr.valid() || itr.getKey() >= key); + } + _doneReadWork += i; + LOG(info, "done %u read work", i); +} + + +void +Fixture::readWork() +{ + readWork(std::numeric_limits::max()); +} + + +void +Fixture::writeWork(uint32_t cnt) +{ + search::Rand48 &rnd(_rnd); + for (uint32_t i = 0; i < cnt; ++i) { + uint32_t key = rnd.lrand48() % _keyLimit; + if ((rnd.lrand48() & 1) == 0) { + insert(key); + } else { + remove(key); + } + commit(); + } + _doneWriteWork += cnt; + _stopRead = 1; + LOG(info, "done %u write work", cnt); +} + + +TEST_F("Test manual lower bound call", Fixture) +{ + f.insert(1); + f.remove(2); + f.insert(1); + f.insert(5); + f.insert(4); + f.remove(3); + f.remove(5); + f.commit(); + auto itr = f._tree.getFrozenView().lowerBound(3); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(4u, itr.getKey()); +} + +TEST_F("Test single threaded lower_bound reader without updates", Fixture) +{ + f._reportWork = true; + f.writeWork(10); + f._stopRead = 0; + f.readWork(10); +} + +TEST_F("Test single threaded lower_bound reader during updates", Fixture) +{ + uint32_t cnt = 1000000; + f._reportWork = true; + f._writer.execute(makeLambdaTask([=]() { f.writeWork(cnt); })); + f._readers.execute(makeLambdaTask([=]() { f.readWork(); })); +} + +TEST_F("Test multithreaded lower_bound reader during updates", Fixture) +{ + uint32_t cnt = 1000000; + f._reportWork = true; + f._writer.execute(makeLambdaTask([=]() { f.writeWork(cnt); })); + f._readers.execute(makeLambdaTask([=]() { f.readWork(); })); + f._readers.execute(makeLambdaTask([=]() { f.readWork(); })); + f._readers.execute(makeLambdaTask([=]() { f.readWork(); })); + f._readers.execute(makeLambdaTask([=]() { f.readWork(); })); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/apps/tests/memoryindexstress_test.cpp b/searchlib/src/apps/tests/memoryindexstress_test.cpp new file mode 100644 index 00000000000..88aaae374b3 --- /dev/null +++ b/searchlib/src/apps/tests/memoryindexstress_test.cpp @@ -0,0 +1,537 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("memoryindexstress_test"); +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using document::AnnotationType; +using document::DataType; +using document::Document; +using document::DocumentId; +using document::DocumentType; +using document::DocumentTypeRepo; +using document::FieldValue; +using document::Span; +using document::SpanList; +using document::StringFieldValue; +using search::query::Node; +using search::query::SimplePhrase; +using search::query::SimpleStringTerm; +using search::makeLambdaTask; +using search::ScheduleTaskCallback; +using namespace search::fef; +using namespace search::index; +using namespace search::memoryindex; +using namespace search::queryeval; +using vespalib::asciistream; + +namespace +{ + +const vespalib::string SPANTREE_NAME("linguistics"); +const vespalib::string title("title"); +const vespalib::string body("body"); +const vespalib::string foo("foo"); +const vespalib::string bar("bar"); +const vespalib::string doc_type_name = "test"; +const vespalib::string header_name = doc_type_name + ".header"; +const vespalib::string body_name = doc_type_name + ".body"; + + +Schema +makeSchema() +{ + Schema schema; + schema.addIndexField(Schema::IndexField(title, Schema::STRING)); + schema.addIndexField(Schema::IndexField(body, Schema::STRING)); + return schema; +} + +document::DocumenttypesConfig +makeDocTypeRepoConfig(void) +{ + const int32_t doc_type_id = 787121340; + document::config_builder::DocumenttypesConfigBuilderHelper builder; + builder.document(doc_type_id, + doc_type_name, + document::config_builder::Struct(header_name), + document::config_builder::Struct(body_name). + addField(title, DataType::T_STRING). + addField(body, DataType::T_STRING)); + return builder.config(); +} + + +bool isWordChar(char c) { + return ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z')); +} + + +void +tokenizeStringFieldValue(const document::FixedTypeRepo & repo, StringFieldValue &field) +{ + document::SpanTree::UP spanTree; // Note: Not thread safe, is linkedptr + SpanList::UP spanList(std::make_unique()); + SpanList *spans = spanList.get(); + spanTree.reset(new document::SpanTree(SPANTREE_NAME, std::move(spanList))); + const vespalib::string &text = field.getValue(); + uint32_t cur = 0; + int32_t start = 0; + bool inWord = false; + for (cur = 0; cur < text.size(); ++cur) { + char c = text[cur]; + bool isWc = isWordChar(c); + if (!inWord && isWc) { + inWord = true; + start = cur; + } else if (inWord && !isWc) { + int32_t len = cur - start; + spanTree->annotate(spans->add(std::make_unique(start, len)), + *AnnotationType::TERM); + inWord = false; + } + } + if (inWord) { + int32_t len = cur - start; + spanTree->annotate(spans->add(std::make_unique(start, len)), + *AnnotationType::TERM); + } + if (spanTree->numAnnotations() > 0u) { + StringFieldValue::SpanTrees trees; + trees.emplace_back(std::move(spanTree)); + field.setSpanTrees(trees, repo); + } +} + + +void +setFieldValue(Document &doc, const vespalib::string &fieldName, + const vespalib::string &fieldString) +{ + std::unique_ptr fieldValue = + std::make_unique(fieldString); + document::FixedTypeRepo repo(*doc.getRepo(), doc.getType()); + tokenizeStringFieldValue(repo, *fieldValue); + doc.setFieldValue(doc.getField(fieldName), std::move(fieldValue)); +} + +Document::UP +makeDoc(const DocumentTypeRepo &repo, uint32_t i, + const vespalib::string &titleString, + const vespalib::string &bodyString = "") +{ + asciistream idstr; + idstr << "id:test:test:: " << i; + DocumentId id(idstr.str()); + const DocumentType *docType = repo.getDocumentType(doc_type_name); + Document::UP doc(new Document(*docType, id)); + doc->setRepo(repo); + if (!titleString.empty()) { + setFieldValue(*doc, title, titleString); + } + if (!bodyString.empty()) { + setFieldValue(*doc, body, bodyString); + } + ASSERT_TRUE(doc.get()); +#if 0 + doc->print(std::cout, true, ""); + std::cout << std::endl; +#endif + return doc; +} + +Document::UP +makeDoc(const DocumentTypeRepo &repo, uint32_t i) +{ + asciistream titleStr; + asciistream bodyStr; + titleStr << i; + bodyStr << (i * 3); + return makeDoc(repo, i, titleStr.str(), bodyStr.str()); +} + + +SimpleStringTerm makeTerm(const std::string &term) { + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +Node::UP makePhrase(const std::string &term1, const std::string &term2) { + SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0)); + Node::UP node(phrase); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1)))); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2)))); + return node; +} + +} // namespace + + + +struct Fixture { + Schema schema; + DocumentTypeRepo repo; + vespalib::ThreadStackExecutor _executor; + search::SequencedTaskExecutor _invertThreads; + search::SequencedTaskExecutor _pushThreads; + MemoryIndex index; + uint32_t _readThreads; + vespalib::ThreadStackExecutor _writer; // 1 write thread + vespalib::ThreadStackExecutor _readers; // multiple reader threads + search::Rand48 _rnd; + uint32_t _keyLimit; + std::atomic _readSeed; + std::atomic _doneWriteWork; + std::atomic _doneReadWork; + std::atomic _emptyCount; + std::atomic _nonEmptyCount; + std::atomic _stopRead; + bool _reportWork; + + Fixture(uint32_t readThreads = 1); + + ~Fixture(); + + void internalSyncCommit() { + vespalib::Gate gate; + index.commit(std::make_shared + (_executor, + makeLambdaTask([&]() { gate.countDown(); }))); + gate.await(); + } + void put(uint32_t id, Document::UP doc) { + index.insertDocument(id, *doc); + } + void remove(uint32_t id) { + index.removeDocument(id); + } + + void readWork(uint32_t cnt); + void readWork(); + void writeWork(uint32_t cnt); + uint32_t getReadThreads() const { return _readThreads; } + void stressTest(uint32_t writeCnt); + +private: + Fixture(const Fixture &index) = delete; + Fixture(Fixture &&index) = delete; + Fixture &operator=(const Fixture &index) = delete; + Fixture &operator=(Fixture &&index) = delete; +}; + + +Fixture::Fixture(uint32_t readThreads) + : schema(makeSchema()), + repo(makeDocTypeRepoConfig()), + _executor(1, 128 * 1024), + _invertThreads(2), + _pushThreads(2), + index(schema, _invertThreads, _pushThreads), + _readThreads(readThreads), + _writer(1, 128 * 1024), + _readers(readThreads, 128 * 1024), + _rnd(), + _keyLimit(1000000), + _readSeed(50), + _doneWriteWork(0), + _doneReadWork(0), + _emptyCount(0), + _nonEmptyCount(0), + _stopRead(0), + _reportWork(false) +{ + _rnd.srand48(32); +} + + +Fixture::~Fixture() +{ + _readers.sync(); + _readers.shutdown(); + _writer.sync(); + _writer.shutdown(); + if (_reportWork) { + LOG(info, + "readWork=%ld, writeWork=%ld, emptyCount=%ld, nonemptyCount=%ld", + _doneReadWork.load(), _doneWriteWork.load(), + _emptyCount.load(), _nonEmptyCount.load()); + } +} + + +void +Fixture::readWork(uint32_t cnt) +{ + search::Rand48 rnd; + rnd.srand48(++_readSeed); + uint32_t i; + uint32_t emptyCount = 0; + uint32_t nonEmptyCount = 0; + std::string fieldName = "title"; + + for (i = 0; i < cnt && _stopRead.load() == 0; ++i) { + uint32_t key = (rnd.lrand48() % (_keyLimit + 1)) + 1; + + asciistream keyStr; + keyStr << key; + + SimpleStringTerm term = makeTerm(keyStr.str()); + + uint32_t fieldId = 0; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FieldSpec field(fieldName, fieldId, handle); + FieldSpecList fields; + fields.add(field); + Blueprint::UP result = index.createBlueprint(requestContext, + fields, term); + if (!EXPECT_TRUE(result.get() != 0)) { + LOG(error, "Did not get blueprint"); + break; + } + if (result->getState().estimate().empty) { + ++emptyCount; + } else { + ++nonEmptyCount; + } + result->fetchPostings(true); + SearchIterator::UP search = result->createSearch(*match_data, true); + if (!EXPECT_TRUE(search.get() != 0)) { + LOG(error, "Did not get search iterator"); + break; + } + } + _doneReadWork += i; + _emptyCount += emptyCount; + _nonEmptyCount += nonEmptyCount; + LOG(info, "done %u read work", i); +} + + +void +Fixture::readWork() +{ + readWork(std::numeric_limits::max()); +} + + +void +Fixture::writeWork(uint32_t cnt) +{ + search::Rand48 &rnd(_rnd); + for (uint32_t i = 0; i < cnt; ++i) { + uint32_t key = rnd.lrand48() % _keyLimit; + if ((rnd.lrand48() & 1) == 0) { + put(key + 1, makeDoc(repo, key + 1)); + } else { + remove(key + 1); + } + internalSyncCommit(); + } + _doneWriteWork += cnt; + _stopRead = 1; + LOG(info, "done %u write work", cnt); +} + + +void +Fixture::stressTest(uint32_t writeCnt) +{ + _reportWork = true; + uint32_t readThreads = getReadThreads(); + LOG(info, + "starting stress test, 1 write thread, %u read threads, %u writes", + readThreads, writeCnt); + _writer.execute(makeLambdaTask([=]() { writeWork(writeCnt); })); + for (uint32_t i = 0; i < readThreads; ++i) { + _readers.execute(makeLambdaTask([=]() { readWork(); })); + } +} + + +//----------------------------------------------------------------------------- + +std::string toString(SearchIterator & search) +{ + std::ostringstream oss; + bool first = true; + for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) { + if (!first) oss << ","; + oss << search.getDocId(); + first = false; + } + return oss.str(); +} + +//----------------------------------------------------------------------------- + +bool +verifyResult(const FakeResult &expect, + Searchable &index, + std::string fieldName, + const Node &term) +{ + uint32_t fieldId = 0; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FieldSpec field(fieldName, fieldId, handle); + FieldSpecList fields; + fields.add(field); + + Blueprint::UP result = index.createBlueprint(requestContext, fields, term); + if (!EXPECT_TRUE(result.get() != 0)) { + return false; + } + EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits); + EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty); + + result->fetchPostings(true); + SearchIterator::UP search = result->createSearch(*match_data, true); + if (!EXPECT_TRUE(search.get() != 0)) { + return false; + } + TermFieldMatchData &tmd = *match_data->resolveTermField(handle); + + FakeResult actual; + search->initFullRange(); + for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) { + actual.doc(search->getDocId()); + search->unpack(search->getDocId()); + EXPECT_EQUAL(search->getDocId(), tmd.getDocId()); + FieldPositionsIterator p = tmd.getIterator(); + actual.len(p.getFieldLength()); + for (; p.valid(); p.next()) { + actual.pos(p.getPosition()); + } + } + return EXPECT_EQUAL(expect, actual); +} + +// tests basic usage; index some documents in docid order and perform +// some searches. +TEST_F("testIndexAndSearch", Fixture) +{ + f.put(1, makeDoc(f.repo, 1, "foo bar foo", "foo foo foo")); + f.internalSyncCommit(); + f.put(2, makeDoc(f.repo, 2, "bar foo", "bar bar bar bar")); + f.internalSyncCommit(); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + f.index, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + f.index, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + f.index, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + f.index, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + f.index, title, makeTerm("bogus"))); + + // search for "foo" in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + f.index, "bogus", makeTerm(foo))); + + // search for "bar foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + f.index, title, *makePhrase(bar, foo))); + +} + +// tests index update behavior; remove/update and unordered docid +// indexing. +TEST_F("require that documents can be removed and updated", Fixture) +{ + // add unordered + f.put(3, makeDoc(f.repo, 3, "foo foo foo")); + f.internalSyncCommit(); + f.put(1, makeDoc(f.repo, 1, "foo")); + f.internalSyncCommit(); + f.put(2, makeDoc(f.repo, 2, "foo foo")); + f.internalSyncCommit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(2).len(2).pos(0).pos(1) + .doc(3).len(3).pos(0).pos(1).pos(2), + f.index, title, makeTerm(foo))); + + // remove document + f.remove(2); + f.internalSyncCommit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(3).len(3).pos(0).pos(1).pos(2), + f.index, title, makeTerm(foo))); + + // update document + f.put(1, makeDoc(f.repo, 1, "bar foo foo")); + f.internalSyncCommit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1).pos(2) + .doc(3).len(3).pos(0).pos(1).pos(2), + f.index, title, makeTerm(foo))); +} + + +TEST_F("stress test, 4 readers", Fixture(4)) +{ + f.stressTest(1000000); +} + +TEST_F("stress test, 128 readers", Fixture(128)) +{ + f.stressTest(1000000); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/apps/uniform/.gitignore b/searchlib/src/apps/uniform/.gitignore new file mode 100644 index 00000000000..ff18dbaa7fd --- /dev/null +++ b/searchlib/src/apps/uniform/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +uniform diff --git a/searchlib/src/apps/uniform/CMakeLists.txt b/searchlib/src/apps/uniform/CMakeLists.txt new file mode 100644 index 00000000000..9f9c2139f42 --- /dev/null +++ b/searchlib/src/apps/uniform/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_uniform_app + SOURCES + uniform.cpp + OUTPUT_NAME uniform + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/uniform/uniform.cpp b/searchlib/src/apps/uniform/uniform.cpp new file mode 100644 index 00000000000..18bdcadbc20 --- /dev/null +++ b/searchlib/src/apps/uniform/uniform.cpp @@ -0,0 +1,153 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + + +static uint64_t +maxExpGolombVal(uint64_t kValue, uint64_t maxBits) +{ + return static_cast + ((UINT64_C(1) << ((maxBits + kValue + 1) / 2)) - + (UINT64_C(1) << kValue)); +} + +class UniformApp : public FastOS_Application +{ + typedef search::bitcompression::EncodeContext64BE EC64; + + enum { + MAXK = 30 + }; + + uint64_t _bits[MAXK + 1]; + uint64_t _next; + + static uint32_t + encodeSpace(uint64_t x, uint32_t k) + { + return EC64::encodeExpGolombSpace(x, k); + } + + void + clearBits(void); + + void + reportBits(void); + + int + Main(void); +}; + + +void +UniformApp::clearBits(void) +{ + for (unsigned int k = 0; k <= MAXK; ++k) + _bits[k] = 0; + _next = 0; +} + + +void +UniformApp::reportBits(void) +{ + printf("next=%" PRIu64 " ", _next); + for (unsigned int k = 0; k <= MAXK; ++k) + printf("b[%u]=%" PRIu64 " ", + static_cast(k), + _bits[k]); + printf("\n"); + +} + + + +int +UniformApp::Main(void) +{ + int k, l, m, bestmask, oldbestmask; + printf("Hello world\n"); + clearBits(); + reportBits(); + + m = 0; + oldbestmask = 0; + for (;;) { + uint64_t minnext = 0; + int minnextk = 0; + int bestk = 0; + printf("_next=%" PRIu64 "\n", _next); + for (k = 0; k <= MAXK; ++k) { + uint32_t bits = encodeSpace(_next, k); // Current bits + uint64_t next = maxExpGolombVal(k, bits); + assert(encodeSpace(next - 1, k) == bits); + assert(encodeSpace(next, k) > bits); + if (k == 0 || next < minnext) { + minnext = next; + minnextk = k; + } + if (_bits[k] < _bits[bestk]) + bestk = k; + printf("k=%d, bits=%d, next=%" PRIu64 "\n", k, bits, next); + } + printf("minnext=%" PRIu64 ", minnextk=%d, bestk=%d\n", + minnext, minnextk, bestk); + for (k = 0; k <= MAXK; ++k) { + uint32_t kbits = encodeSpace(_next, k); // Current bits + l = bestk; + uint32_t lbits = encodeSpace(_next, l); // Current bits + if (_bits[k] > _bits[l] && kbits < lbits) { + uint32_t dbits = lbits - kbits; + uint64_t dsbits = _bits[k] - _bits[l]; + uint64_t delt = (dsbits + dbits - 1) / dbits; + if (minnext >= _next + delt) { + minnext = _next + delt; + bestk = k; + } + } else if (_bits[k] == _bits[l] && kbits < lbits) { + minnext = _next + 1; + bestk = k; + } + } + printf("minnext=%" PRIu64 ", minnextk=%d, bestk=%d\n", + minnext, minnextk, bestk); + for (k = 0; k <= MAXK; ++k) { + assert(encodeSpace(_next, k) == encodeSpace(minnext - 1, k)); + _bits[k] += (minnext - _next) * encodeSpace(_next, k); + } + _next = minnext; + bestmask = 0; + uint32_t smallk = 0; + for (k = 0; k <= MAXK; ++k) { + if (_bits[k] < _bits[smallk]) + smallk = k; + } + for (k = 0; k <= MAXK; ++k) + if (_bits[k] <= _bits[smallk]) + bestmask |= (1 << k); + if (bestmask == oldbestmask && _next < (UINT64_C(1) << 30)) + continue; + reportBits(); + printf("Best k for interval [0..%" PRIu64 ") is", _next); + for (k = 0; k <= MAXK; ++k) + if (_bits[k] <= _bits[smallk]) + printf(" %d", k); + printf("\n"); + oldbestmask = bestmask; + if (_next >= (UINT64_C(1) << 30)) + break; + printf("m iter=%d\n", m); + ++m; + if (m >= 10000) { + printf("m breakout\n"); + break; + } + } + + return 0; +} + +FASTOS_MAIN(UniformApp); + + diff --git a/searchlib/src/apps/vespa-index-inspect/.gitignore b/searchlib/src/apps/vespa-index-inspect/.gitignore new file mode 100644 index 00000000000..4d5ccbbcb89 --- /dev/null +++ b/searchlib/src/apps/vespa-index-inspect/.gitignore @@ -0,0 +1,3 @@ +/.depend +/Makefile +/vespa-index-inspect diff --git a/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt b/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt new file mode 100644 index 00000000000..c68aa6b1a6a --- /dev/null +++ b/searchlib/src/apps/vespa-index-inspect/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_vespa-index-inspect_app + SOURCES + vespa-index-inspect.cpp + OUTPUT_NAME vespa-index-inspect + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp new file mode 100644 index 00000000000..b1cf96f81ef --- /dev/null +++ b/searchlib/src/apps/vespa-index-inspect/vespa-index-inspect.cpp @@ -0,0 +1,965 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("vespa-index-inspect"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::index::Schema; +using search::index::SchemaUtil; +using search::index::DictionaryFileRandRead; +using search::index::PostingListFileRandRead; +using search::index::PostingListOffsetAndCounts; +using search::index::PostingListCounts; +using search::index::PostingListHandle; +using search::diskindex::PageDict4RandRead; +using search::diskindex::Zc4PosOccRandRead; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::fef::FieldPositionsIterator; +using search::queryeval::SearchIterator; +using search::index::DocIdAndFeatures; +using search::diskindex::DocIdMapping; +using search::diskindex::WordNumMapping; +using search::diskindex::FieldReader; +using search::diskindex::PageDict4FileSeqRead; +using search::TuneFileSeqRead; + +namespace +{ + +/** + * Fine granularity, for small scale inversion within a single document. + */ +class PosEntry +{ +public: + uint32_t _docId; + uint32_t _fieldId; + uint64_t _wordNum; + uint32_t _elementId; + uint32_t _wordPos; + uint32_t _elementLen; + int32_t _elementWeight; + + PosEntry(uint32_t docId, + uint32_t fieldId, + uint32_t elementId, uint32_t wordPos, + uint64_t wordNum, + uint32_t elementLen, int32_t elementWeight) + : _docId(docId), + _fieldId(fieldId), + _wordNum(wordNum), + _elementId(elementId), + _wordPos(wordPos), + _elementLen(elementLen), + _elementWeight(elementWeight) + { + } + + bool + operator<(const PosEntry &rhs) const + { + if (_docId != rhs._docId) + return _docId < rhs._docId; + if (_fieldId != rhs._fieldId) + return _fieldId < rhs._fieldId; + if (_elementId != rhs._elementId) + return _elementId < rhs._elementId; + if (_wordPos != rhs._wordPos) + return _wordPos < rhs._wordPos; + return _wordNum < rhs._wordNum; + } +}; + + +void +unpackFeatures(std::vector &entries, + uint32_t fieldId, + uint64_t wordNum, + const DocIdAndFeatures &features) +{ + std::vector::const_iterator + element = features._elements.begin(); + std::vector:: + const_iterator position = features._wordPositions.begin(); + uint32_t numElements = features._elements.size(); + while (numElements--) { + uint32_t numOccs = element->getNumOccs(); + while (numOccs--) { + entries.push_back(PosEntry(features._docId, + fieldId, + element->getElementId(), + position->getWordPos(), + wordNum, + element->getElementLen(), + element->getWeight())); + ++position; + } + ++element; + } +} + + +void +usageHeader(void) +{ + using std::cerr; + cerr << + "vespa-index-inspect version 0.0\n" + "\n" + "USAGE:\n"; +} + + +class FieldOptions +{ +public: + std::vector _fields; + std::vector _ids; + + FieldOptions() + : _fields(), + _ids() + { + } + + void + addField(const vespalib::string &field) + { + _fields.push_back(field); + } + + bool + empty(void) const + { + return _ids.empty(); + } + + void + validateFields(const Schema &schema); +}; + + +void +FieldOptions::validateFields(const Schema &schema) +{ + for (std::vector::const_iterator + i = _fields.begin(), ie = _fields.end(); + i != ie; ++i) { + uint32_t fieldId = schema.getIndexFieldId(*i); + if (fieldId == Schema::UNKNOWN_FIELD_ID) { + LOG(error, + "No such field: %s", + i->c_str()); + exit(1); + } + _ids.push_back(fieldId); + } +} + + +} + +class SubApp +{ +protected: + FastOS_Application &_app; + +public: + SubApp(FastOS_Application &app) + : _app(app) + { + } + + virtual + ~SubApp(void) + { + } + + virtual void + usage(bool showHeader) = 0; + + virtual bool + getOptions(void) = 0; + + virtual int + run(void) = 0; +}; + + +class ShowPostingListSubApp : public SubApp +{ + vespalib::string _indexDir; + FieldOptions _fieldOptions; + vespalib::string _word; + bool _verbose; + bool _readmmap; + bool _directio; + bool _transpose; + int _optIndex; + DocIdMapping _dm; + std::vector _wmv; + std::vector> _wordsv; + uint32_t _docIdLimit; + uint32_t _minDocId; + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNum(void) + { + return 0u; + } +public: + + ShowPostingListSubApp(FastOS_Application &app); + + virtual + ~ShowPostingListSubApp(void); + + virtual void + usage(bool showHeader); + + virtual bool + getOptions(void); + + virtual int + run(void); + + void + showPostingList(void); + + bool + readDocIdLimit(const Schema &schema); + + bool + readWordList(const SchemaUtil::IndexIterator &index); + + bool + readWordList(const Schema &schema); + + void + readPostings(const SchemaUtil::IndexIterator &index, + std::vector &entries); + + void + showTransposedPostingList(); +}; + + +ShowPostingListSubApp::ShowPostingListSubApp(FastOS_Application &app) + : SubApp(app), + _indexDir("."), + _fieldOptions(), + _word(), + _verbose(false), + _readmmap(false), + _directio(false), + _transpose(false), + _optIndex(1), + _dm(), + _wmv(), + _wordsv(), + _docIdLimit(std::numeric_limits::max()), + _minDocId(0u) +{ +} + + +ShowPostingListSubApp::~ShowPostingListSubApp(void) +{ +} + + +void +ShowPostingListSubApp::usage(bool showHeader) +{ + using std::cerr; + if (showHeader) + usageHeader(); + cerr << + "vespa-index-inspect showpostings [--indexdir indexDir]\n" + " --field field\n" + " word\n" + "\n" + "vespa-index-inspect showpostings [--indexdir indexDir]\n" + " [--field field]\n" + " --transpose\n" + " [--docidlimit docIdLimit] [--mindocid mindocid]\n" + "\n"; +} + + +bool +ShowPostingListSubApp::getOptions(void) +{ + int c; + const char *optArgument = NULL; + int longopt_index = 0; + static struct option longopts[] = { + { "indexdir", 1, NULL, 0 }, + { "field", 1, NULL, 0 }, + { "transpose", 0, NULL, 0 }, + { "docidlimit", 1, NULL, 0 }, + { "mindocid", 1, NULL, 0 }, + { NULL, 0, NULL, 0 } + }; + enum longopts_enum { + LONGOPT_INDEXDIR, + LONGOPT_FIELD, + LONGOPT_TRANSPOSE, + LONGOPT_DOCIDLIMIT, + LONGOPT_MINDOCID + }; + int optIndex = 2; + while ((c = _app.GetOptLong("di:mv", + optArgument, + optIndex, + longopts, + &longopt_index)) != -1) { + switch (c) { + case 0: + switch (longopt_index) { + case LONGOPT_INDEXDIR: + _indexDir = optArgument; + break; + case LONGOPT_FIELD: + _fieldOptions.addField(optArgument); + break; + case LONGOPT_TRANSPOSE: + _transpose = true; + break; + case LONGOPT_DOCIDLIMIT: + _docIdLimit = atoi(optArgument); + break; + case LONGOPT_MINDOCID: + _minDocId = atoi(optArgument); + break; + default: + if (optArgument != NULL) { + LOG(error, + "longopt %s with arg %s", + longopts[longopt_index].name, optArgument); + } else { + LOG(error, + "longopt %s", + longopts[longopt_index].name); + } + } + break; + case 'd': + _directio = true; + break; + case 'i': + _indexDir = optArgument; + break; + case 'm': + _readmmap = true; + break; + case 'v': + _verbose = true; + break; + default: + return false; + } + } + if (_transpose) { + } else { + if (_fieldOptions._fields.empty()) + return false; + if (_fieldOptions._fields.size() > 1) + return false; + } + _optIndex = optIndex; + if (_transpose) { + } else { + if (_optIndex >= _app._argc) { + return false; + } + _word = _app._argv[optIndex]; + } + return true; +} + + +bool +ShowPostingListSubApp::readDocIdLimit(const Schema &schema) +{ + TuneFileSeqRead tuneFileRead; + if (_dm.readDocIdLimit(_indexDir)) + return true; + uint32_t numIndexFields = schema.getNumIndexFields(); + for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) { + const Schema::IndexField &field = schema.getIndexField(fieldId); + if (field.getDataType() == Schema::STRING) { + FieldReader fr; + if (!fr.open(_indexDir + "/" + field.getName() + "/", + tuneFileRead)) + continue; + _dm.setup(fr.getDocIdLimit()); + return true; + } + } + return false; +} + + +bool +ShowPostingListSubApp::readWordList(const SchemaUtil::IndexIterator &index) +{ + std::vector &words = _wordsv[index.getIndex()]; + WordNumMapping &wm = _wmv[index.getIndex()]; + + search::TuneFileSeqRead tuneFileRead; + PageDict4FileSeqRead wr; + vespalib::string fieldDir = _indexDir + "/" + index.getName(); + if (!wr.open(fieldDir + "/dictionary", tuneFileRead)) + return false; + vespalib::string word; + PostingListCounts counts; + uint64_t wordNum = noWordNum(); + wr.readWord(word, wordNum, counts); + words.push_back(""); // Word number 0 is special here. + while (wordNum != noWordNumHigh()) { + assert(wordNum == words.size()); + words.push_back(word); + wr.readWord(word, wordNum, counts); + } + wm.setup(words.size() - 1); + if (!wr.close()) + return false; + return true; +} + +bool +ShowPostingListSubApp::readWordList(const Schema &schema) +{ + _wordsv.clear(); + _wmv.clear(); + uint32_t numFields = schema.getNumIndexFields(); + _wordsv.resize(numFields); + _wmv.resize(numFields); + + if (!_fieldOptions.empty()) { + for (std::vector::const_iterator + i = _fieldOptions._ids.begin(), ie = _fieldOptions._ids.end(); + i != ie; ++i) { + SchemaUtil::IndexIterator index(schema, *i); + if (!readWordList(index)) + return false; + } + } else { + SchemaUtil::IndexIterator index(schema); + while (index.isValid()) { + if (!readWordList(index)) + return false; + ++index; + } + } + return true; +} + + +void +ShowPostingListSubApp::readPostings(const SchemaUtil::IndexIterator &index, + std::vector &entries) +{ + FieldReader r; + std::unique_ptr postingfile(new Zc4PosOccRandRead); + vespalib::string mangledName = _indexDir + "/" + index.getName() + + "/"; + search::TuneFileSeqRead tuneFileRead; + r.setup(_wmv[index.getIndex()], _dm); + if (!r.open(mangledName, tuneFileRead)) + return; + if (r.isValid()) + r.read(); + while (r.isValid()) { + uint32_t docId = r._docIdAndFeatures._docId; + if (docId >= _minDocId && docId < _docIdLimit) { + unpackFeatures(entries, index.getIndex(), + r._wordNum, r._docIdAndFeatures); + } + r.read(); + } + if (!r.close()) + abort(); +} + + +void +ShowPostingListSubApp::showTransposedPostingList(void) +{ + Schema schema; + std::string schemaName = _indexDir + "/schema.txt"; + if (!schema.loadFromFile(schemaName)) { + LOG(error, + "Could not load schema from %s", schemaName.c_str()); + exit(1); + } + _fieldOptions.validateFields(schema); + if (!readDocIdLimit(schema)) + return; + if (!readWordList(schema)) + return; + std::vector entries; + if (!_fieldOptions.empty()) { + for (std::vector::const_iterator + i = _fieldOptions._ids.begin(), ie = _fieldOptions._ids.end(); + i != ie; ++i) { + SchemaUtil::IndexIterator index(schema, *i); + readPostings(index, entries); + } + } else { + SchemaUtil::IndexIterator index(schema); + while (index.isValid()) { + readPostings(index, entries); + ++index; + } + } + std::sort(entries.begin(), entries.end()); + uint32_t prevDocId = static_cast(-1); + uint32_t prevFieldId = static_cast(-1); + uint32_t prevElemId = static_cast(-1); + uint32_t prevElementLen = 0; + int32_t prevElementWeight = 0; + for (std::vector::const_iterator + i = entries.begin(), ie = entries.end(); i != ie; ++i) { + if (i->_docId != prevDocId) { + std::cout << "docId = " << i->_docId << '\n'; + prevDocId = i->_docId; + prevFieldId = static_cast(-1); + } + if (i->_fieldId != prevFieldId) { + std::cout << " field = " << i->_fieldId << + " \"" << schema.getIndexField(i->_fieldId).getName() << + "\"\n"; + prevFieldId = i->_fieldId; + prevElemId = static_cast(-1); + } + if (i->_elementId != prevElemId || + i->_elementLen != prevElementLen || + i->_elementWeight != prevElementWeight) { + std::cout << " element = " << i->_elementId << + ", elementLen = " << i->_elementLen << + ", elementWeight = " << i->_elementWeight << + '\n'; + prevElemId = i->_elementId; + prevElementLen = i->_elementLen; + prevElementWeight = i->_elementWeight; + } + assert(i->_wordNum != 0); + assert(i->_wordNum < _wordsv[i->_fieldId].size()); + std::cout << " pos = " << i->_wordPos << + ", word = \"" << _wordsv[i->_fieldId][i->_wordNum] << "\""; + std::cout << '\n'; + } +} + + +void +ShowPostingListSubApp::showPostingList(void) +{ + Schema schema; + uint32_t numFields = 1; + std::string schemaName = _indexDir + "/schema.txt"; + std::vector fieldNames; + vespalib::string shortName; + if (!schema.loadFromFile(schemaName)) { + LOG(error, + "Could not load schema from %s", schemaName.c_str()); + exit(1); + } + _fieldOptions.validateFields(schema); + if (_fieldOptions._ids.size() != 1) { + LOG(error, + "Wrong number of field arguments: %d", + static_cast(_fieldOptions._ids.size())); + exit(1); + } + SchemaUtil::IndexIterator it(schema, _fieldOptions._ids.front()); + + shortName = it.getName(); + fieldNames.push_back(it.getName()); + std::unique_ptr dict(new PageDict4RandRead); + std::string dictName = _indexDir + "/" + shortName + "/dictionary"; + search::TuneFileRandRead tuneFileRead; + if (_directio) + tuneFileRead.setWantDirectIO(); + if (_readmmap) + tuneFileRead.setWantMemoryMap(); + if (!dict->open(dictName, tuneFileRead)) { + LOG(error, + "Could not open dictionary %s", + dictName.c_str()); + exit(1); + } + std::unique_ptr postingfile(new Zc4PosOccRandRead); + std::string mangledName = _indexDir + "/" + shortName + + "/posocc.dat.compressed"; + if (!postingfile->open(mangledName, tuneFileRead)) { + LOG(error, + "Could not open posting list file %s", + mangledName.c_str()); + exit(1); + } + PostingListOffsetAndCounts offsetAndCounts; + uint64_t wordNum = 0; + bool res = dict->lookup(_word, wordNum, offsetAndCounts); + if (!res) { + LOG(warning, "Unknown word %s", _word.c_str()); + exit(1); + } + if (_verbose) { + LOG(info, + "bitOffset %" PRId64 ", bitLen=%" PRId64 ", numDocs=%" PRId64, + offsetAndCounts._offset, + offsetAndCounts._counts._bitLength, + offsetAndCounts._counts._numDocs); + } + typedef PostingListCounts Counts; + typedef PostingListHandle Handle; + typedef std::pair CH; + typedef std::unique_ptr CHAP; + CHAP handle(new CH); + handle->first = offsetAndCounts._counts; + handle->second._bitOffset = offsetAndCounts._offset; + handle->second._bitLength = handle->first._bitLength; + const uint32_t first_segment = 0; + const uint32_t num_segments = 0; // means all segments + handle->second._file = postingfile.get(); + handle->second._file->readPostingList(handle->first, + first_segment, + num_segments, + handle->second); + std::vector tfmdv(numFields); + TermFieldMatchDataArray tfmda; + for (std::vector::iterator + tfit = tfmdv.begin(), tfite = tfmdv.end(); + tfit != tfite; ++tfit) { + tfmda.add(&*tfit); + } + std::unique_ptr sb(handle->second.createIterator( + handle->first, tfmda)); + sb->initFullRange(); + uint32_t docId = 0; + bool first = true; + for (;;) { + if (sb->seek(docId)) { + first = false; + std::cout << "docId = " << docId << '\n'; + sb->unpack(docId); + for (uint32_t field = 0; field < numFields; ++field) { + const TermFieldMatchData &md = *tfmda[field]; + if (md.getDocId() != docId) + continue; + std::cout << " field = " << fieldNames[field] << '\n'; + FieldPositionsIterator fpi = md.getIterator(); + uint32_t lastElement = static_cast(-1); + while (fpi.valid()) { + if (fpi.getElementId() != lastElement) { + std::cout << " element = " << fpi.getElementId() << + ", elementLen = " << fpi.getElementLen() << + ", elementWeight = " << fpi.getElementWeight() << + '\n'; + lastElement = fpi.getElementId(); + } + std::cout << " pos = " << fpi.getPosition() << '\n'; + fpi.next(); + } + } + ++docId; + } else { + docId = sb->getDocId(); + if (sb->isAtEnd()) + break; + } + } + if (first) { + std::cout << "No hits\n"; + } + + if (!postingfile->close()) { + LOG(error, + "Could not close posting list file %s", + mangledName.c_str()); + exit(1); + } + if (!dict->close()) { + LOG(error, + "Could not close dictionary %s", dictName.c_str()); + exit(1); + } +} + + +int +ShowPostingListSubApp::run(void) +{ + if (_transpose) + showTransposedPostingList(); + else + showPostingList(); + return 0; +} + + +class DumpWordsSubApp : public SubApp +{ + std::string _indexDir; + FieldOptions _fieldOptions; + uint64_t _minNumDocs; + bool _verbose; + bool _all; + bool _showWordNum; + +public: + DumpWordsSubApp(FastOS_Application &app); + + virtual + ~DumpWordsSubApp(void); + + virtual void + usage(bool showHeader); + + virtual bool + getOptions(void); + + virtual int + run(void); + + void + dumpWords(void); +}; + + +DumpWordsSubApp::DumpWordsSubApp(FastOS_Application &app) + : SubApp(app), + _indexDir("."), + _fieldOptions(), + _minNumDocs(0u), + _verbose(false), + _showWordNum(false) +{ +} + + +DumpWordsSubApp::~DumpWordsSubApp(void) +{ +} + + +void +DumpWordsSubApp::usage(bool showHeader) +{ + using std::cerr; + if (showHeader) + usageHeader(); + cerr << + "vespa-index-inspect dumpwords [--indexdir indexDir]\n" + " --field field\n" + " [--minnumdocs minnumdocs] [--verbose] [--wordnum]\n" + "\n"; +} + + +bool +DumpWordsSubApp::getOptions(void) +{ + int c; + const char *optArgument = NULL; + int longopt_index = 0; + static struct option longopts[] = { + { "indexdir", 1, NULL, 0 }, + { "field", 1, NULL, 0 }, + { "minnumdocs", 1, NULL, 0 }, + { "verbose", 0, NULL, 0 }, + { "wordnum", 0, NULL, 0 }, + { NULL, 0, NULL, 0 } + }; + enum longopts_enum { + LONGOPT_INDEXDIR, + LONGOPT_FIELD, + LONGOPT_MINNUMDOCS, + LONGOPT_VERBOSE, + LONGOPT_WORDNUM + }; + int optIndex = 2; + while ((c = _app.GetOptLong("i:", + optArgument, + optIndex, + longopts, + &longopt_index)) != -1) { + switch (c) { + case 0: + switch (longopt_index) { + case LONGOPT_INDEXDIR: + _indexDir = optArgument; + break; + case LONGOPT_FIELD: + _fieldOptions.addField(optArgument); + break; + case LONGOPT_MINNUMDOCS: + _minNumDocs = atol(optArgument); + break; + case LONGOPT_VERBOSE: + _verbose = true; + break; + case LONGOPT_WORDNUM: + _showWordNum = true; + break; + default: + if (optArgument != NULL) { + LOG(error, + "longopt %s with arg %s", + longopts[longopt_index].name, optArgument); + } else { + LOG(error, + "longopt %s", + longopts[longopt_index].name); + } + } + break; + case 'i': + _indexDir = optArgument; + break; + default: + return false; + } + } + return true; +} + + +void +DumpWordsSubApp::dumpWords(void) +{ + search::index::Schema schema; + std::string schemaName = _indexDir + "/schema.txt"; + if (!schema.loadFromFile(schemaName)) { + LOG(error, + "Could not load schema from %s", schemaName.c_str()); + exit(1); + } + _fieldOptions.validateFields(schema); + if (_fieldOptions._ids.size() != 1) { + LOG(error, + "Wrong number of field arguments: %d", + static_cast(_fieldOptions._ids.size())); + exit(1); + } + + SchemaUtil::IndexIterator index(schema, _fieldOptions._ids[0]); + vespalib::string fieldDir = _indexDir + "/" + index.getName(); + PageDict4FileSeqRead wordList; + std::string wordListName = fieldDir + "/dictionary"; + search::TuneFileSeqRead tuneFileRead; + if (!wordList.open(wordListName, tuneFileRead)) { + LOG(error, + "Could not open wordlist %s", wordListName.c_str()); + exit(1); + } + uint64_t wordNum = 0; + vespalib::string word; + PostingListCounts counts; + for (;;) { + wordList.readWord(word, wordNum, counts); + if (wordNum == wordList.noWordNumHigh()) + break; + if (counts._numDocs < _minNumDocs) + continue; + if (_showWordNum) { + std::cout << wordNum << '\t'; + } + std::cout << word << '\t' << counts._numDocs; + if (_verbose) { + std::cout << '\t' << counts._bitLength; + } + std::cout << '\n'; + } + if (!wordList.close()) { + LOG(error, + "Could not close wordlist %s", wordListName.c_str()); + exit(1); + } +} + + +int +DumpWordsSubApp::run(void) +{ + dumpWords(); + return 0; +} + + +class VespaIndexInspectApp : public FastOS_Application +{ +public: + VespaIndexInspectApp(void); + + void + usage(void); + + int + Main(void); +}; + + +VespaIndexInspectApp::VespaIndexInspectApp(void) + : FastOS_Application() +{ +} + + +void +VespaIndexInspectApp::usage(void) +{ + ShowPostingListSubApp(*this).usage(true); + DumpWordsSubApp(*this).usage(false); +} + + +int +VespaIndexInspectApp::Main(void) +{ + if (_argc < 2) { + usage(); + return 1; + } + std::unique_ptr subApp; + if (strcmp(_argv[1], "showpostings") == 0) + subApp.reset(new ShowPostingListSubApp(*this)); + else if (strcmp(_argv[1], "dumpwords") == 0) + subApp.reset(new DumpWordsSubApp(*this)); + if (subApp.get() != NULL) { + if (!subApp->getOptions()) { + subApp->usage(true); + return 1; + } + return subApp->run(); + } + usage(); + return 1; +} + +FASTOS_MAIN(VespaIndexInspectApp); diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore b/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore new file mode 100644 index 00000000000..5c3dba7e243 --- /dev/null +++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/.gitignore @@ -0,0 +1,3 @@ +/.depend +/Makefile +/vespa-ranking-expression-analyzer diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt b/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt new file mode 100644 index 00000000000..6d1b7f55980 --- /dev/null +++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_vespa-ranking-expression-analyzer_app + SOURCES + vespa-ranking-expression-analyzer.cpp + OUTPUT_NAME vespa-ranking-expression-analyzer + INSTALL bin + DEPENDS + searchlib +) diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression b/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression new file mode 100644 index 00000000000..87c9e959d1f --- /dev/null +++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/illegal.expression @@ -0,0 +1 @@ +a # b \ No newline at end of file diff --git a/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp b/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp new file mode 100644 index 00000000000..e64fb406bb5 --- /dev/null +++ b/searchlib/src/apps/vespa-ranking-expression-analyzer/vespa-ranking-expression-analyzer.cpp @@ -0,0 +1,386 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//----------------------------------------------------------------------------- + +using vespalib::BenchmarkTimer; +using vespalib::tensor::DefaultTensorEngine; +using namespace vespalib::eval; +using namespace vespalib::eval::nodes; +using namespace vespalib::eval::gbdt; +using namespace search::features::rankingexpression; + +//----------------------------------------------------------------------------- + +struct File { + int file; + char *data; + size_t size; + File(const vespalib::string &file_name) + : file(open(file_name.c_str(), O_RDONLY)), data((char*)MAP_FAILED), size(0) + { + struct stat info; + if ((file != -1) && (fstat(file, &info) == 0)) { + data = (char*)mmap(0, info.st_size, PROT_READ, MAP_SHARED, file, 0); + if (data != MAP_FAILED) { + size = info.st_size; + } + } + } + ~File() { + if (valid()) { + munmap(data, size); + } + if (file != -1) { + close(file); + } + } + bool valid() const { return (data != MAP_FAILED); } +}; + +//----------------------------------------------------------------------------- + +vespalib::string strip_name(const vespalib::string &name) { + const char *expected_ending = ".expression"; + vespalib::string tmp = name; + size_t pos = tmp.rfind("/"); + if (pos != tmp.npos) { + tmp = tmp.substr(pos + 1); + } + pos = tmp.rfind(expected_ending); + if (pos == tmp.size() - strlen(expected_ending)) { + tmp = tmp.substr(0, pos); + } + return tmp; +} + +size_t as_percent(double value) { + return size_t(round(value * 100.0)); +} + +const char *maybe_s(size_t n) { return (n == 1) ? "" : "s"; } + +//----------------------------------------------------------------------------- + +size_t count_nodes(const Node &node) { + size_t count = 1; + for (size_t i = 0; i < node.num_children(); ++i) { + count += count_nodes(node.get_child(i)); + } + return count; +} + +//----------------------------------------------------------------------------- + +struct InputInfo { + vespalib::string name; + std::vector cmp_with; + explicit InputInfo(vespalib::stringref name_in) + : name(name_in), cmp_with() {} + double select_value() const { + return cmp_with.empty() ? 0.5 : cmp_with[(cmp_with.size()-1)/2]; + return 0.5; + } +}; + +//----------------------------------------------------------------------------- + +struct FunctionInfo { + typedef std::vector TreeList; + + size_t expression_size; + bool root_is_forest; + std::vector forests; + std::vector inputs; + std::vector params; + + void find_forests(const Node &node) { + if (node.is_forest()) { + forests.push_back(extract_trees(node)); + } else { + for (size_t i = 0; i < node.num_children(); ++i) { + find_forests(node.get_child(i)); + } + } + } + + template + void check_cmp(const T *node) { + if (node) { + auto lhs_symbol = as(node->lhs()); + auto rhs_symbol = as(node->rhs()); + if (lhs_symbol && node->rhs().is_const()) { + inputs[lhs_symbol->id()].cmp_with.push_back(node->rhs().get_const_value()); + } + if (node->lhs().is_const() && rhs_symbol) { + inputs[rhs_symbol->id()].cmp_with.push_back(node->lhs().get_const_value()); + } + } + } + + void check_in(const In *node) { + if (node) { + auto lhs_symbol = as(node->lhs()); + auto rhs_symbol = as(node->rhs()); + if (lhs_symbol && node->rhs().is_const()) { + auto array = as(node->rhs()); + if (array) { + for (size_t i = 0; i < array->size(); ++i) { + inputs[lhs_symbol->id()].cmp_with.push_back(array->get(i).get_const_value()); + } + } else { + inputs[lhs_symbol->id()].cmp_with.push_back(node->rhs().get_const_value()); + } + } + if (node->lhs().is_const() && rhs_symbol) { + inputs[rhs_symbol->id()].cmp_with.push_back(node->lhs().get_const_value()); + } + } + } + + void analyze_inputs(const Node &node) { + for (size_t i = 0; i < node.num_children(); ++i) { + analyze_inputs(node.get_child(i)); + } + check_cmp(as(node)); + check_cmp(as(node)); + check_cmp(as(node)); + check_cmp(as(node)); + check_cmp(as(node)); + check_cmp(as(node)); + check_cmp(as(node)); + check_in(as(node)); + } + + FunctionInfo(const Function &function) + : expression_size(count_nodes(function.root())), + root_is_forest(function.root().is_forest()), + forests(), + inputs(), + params() + { + for (size_t i = 0; i < function.num_params(); ++i) { + inputs.emplace_back(function.param_name(i)); + } + find_forests(function.root()); + analyze_inputs(function.root()); + for (size_t i = 0; i < function.num_params(); ++i) { + std::sort(inputs[i].cmp_with.begin(), inputs[i].cmp_with.end()); + } + for (size_t i = 0; i < function.num_params(); ++i) { + params.push_back(inputs[i].select_value()); + } + } + + size_t get_path_len(const TreeList &trees) const { + size_t path = 0; + for (const Node *tree: trees) { + InterpretedFunction ifun(DefaultTensorEngine::ref(), *tree, params.size()); + InterpretedFunction::Context ctx; + for (double param: params) { + ctx.add_param(param); + } + ifun.eval(ctx); + path += ctx.if_cnt(); + } + return path; + } + + void report() const { + fprintf(stderr, " number of inputs: %zu\n", inputs.size()); + fprintf(stderr, " expression size (AST node count): %zu\n", expression_size); + if (root_is_forest) { + fprintf(stderr, " expression root is a sum of GBD trees\n"); + } + if (!forests.empty()) { + fprintf(stderr, " expression contains %zu GBD forest%s\n", + forests.size(), maybe_s(forests.size())); + } + for (size_t i = 0; i < forests.size(); ++i) { + ForestStats forest(forests[i]); + fprintf(stderr, " GBD forest %zu:\n", i); + fprintf(stderr, " average path length: %g\n", forest.total_average_path_length); + fprintf(stderr, " expected path length: %g\n", forest.total_expected_path_length); + fprintf(stderr, " actual path with sample input: %zu\n", get_path_len(forests[i])); + if (forest.total_tuned_checks == 0) { + fprintf(stderr, " WARNING: checks are not tuned (expected path length to be ignored)\n"); + } + fprintf(stderr, " largest set membership check: %zu\n", forest.max_set_size); + for (const auto &item: forest.tree_sizes) { + fprintf(stderr, " forest contains %zu GBD tree%s of size %zu\n", + item.count, maybe_s(item.count), item.size); + } + if (forest.tree_sizes.size() > 1) { + fprintf(stderr, " forest contains %zu GBD trees in total\n", forest.num_trees); + } + } + } +}; + +//----------------------------------------------------------------------------- + +bool none_used(const std::vector &forests) { + return forests.empty(); +} + +bool deinline_used(const std::vector &forests) { + if (forests.empty()) { + return false; + } + for (const Forest::UP &forest: forests) { + if (dynamic_cast(forest.get()) == nullptr) { + return false; + } + } + return true; +} + +bool vmforest_used(const std::vector &forests) { + if (forests.empty()) { + return false; + } + for (const Forest::UP &forest: forests) { + if (dynamic_cast(forest.get()) == nullptr) { + return false; + } + } + return true; +} + +//----------------------------------------------------------------------------- + +struct State { + vespalib::string name; + vespalib::string expression; + Function function; + FunctionInfo fun_info; + CompiledFunction::UP compiled_function; + + double llvm_compile_s = 0.0; + double llvm_execute_us = 0.0; + + std::vector options; + std::vector options_us; + + explicit State(const vespalib::string &file_name, + vespalib::stringref expression_in) + : name(strip_name(file_name)), + expression(expression_in), + function(Function::parse(expression, FeatureNameExtractor())), + fun_info(function), + compiled_function(), + llvm_compile_s(0.0), + llvm_execute_us(0.0), + options(), + options_us() + { + } + + void benchmark_llvm_compile() { + BenchmarkTimer timer(1.0); + while (timer.has_budget()) { + timer.before(); + CompiledFunction::UP new_cf(new CompiledFunction(function, PassParams::ARRAY)); + timer.after(); + compiled_function = std::move(new_cf); + } + llvm_compile_s = timer.min_time(); + } + + void benchmark_option(const vespalib::string &opt_name, Optimize::Chain optimizer_chain) { + options.push_back(opt_name); + options_us.push_back(CompiledFunction(function, PassParams::ARRAY, optimizer_chain).estimate_cost_us(fun_info.params)); + fprintf(stderr, " LLVM(%s) execute time: %g us\n", opt_name.c_str(), options_us.back()); + } + + void report() { + fun_info.report(); + benchmark_llvm_compile(); + fprintf(stderr, " LLVM compile time: %g s\n", llvm_compile_s); + llvm_execute_us = compiled_function->estimate_cost_us(fun_info.params); + fprintf(stderr, " LLVM(default) execute time: %g us\n", llvm_execute_us); + if (!none_used(compiled_function->get_forests())) { + benchmark_option("none", Optimize::none); + } + if (!deinline_used(compiled_function->get_forests()) && !fun_info.forests.empty()) { + benchmark_option("deinline", DeinlineForest::optimize_chain); + } + if (!vmforest_used(compiled_function->get_forests()) && !fun_info.forests.empty()) { + benchmark_option("vmforest", VMForest::optimize_chain); + } + fprintf(stdout, "[compile: %.3fs][execute: %.3fus]", llvm_compile_s, llvm_execute_us); + for (size_t i = 0; i < options.size(); ++i) { + double rel_speed = (llvm_execute_us / options_us[i]); + fprintf(stdout, "[%s: %zu%%]", options[i].c_str(), as_percent(rel_speed)); + if (rel_speed >= 1.1) { + fprintf(stderr, " WARNING: LLVM(%s) faster than default choice\n", + options[i].c_str()); + } + } + fprintf(stdout, "[name: %s]\n", name.c_str()); + fflush(stdout); + } +}; + +//----------------------------------------------------------------------------- + +struct MyApp : public FastOS_Application { + int Main(); + int usage(); + virtual bool useProcessStarter() const { return false; } +}; + +int +MyApp::usage() { + fprintf(stderr, "usage: %s \n", _argv[0]); + fprintf(stderr, " analyze/benchmark vespa ranking expression\n"); + return 1; +} + +int +MyApp::Main() +{ + if (_argc != 2) { + return usage(); + } + vespalib::string file_name(_argv[1]); + File file(file_name); + if (!file.valid()) { + fprintf(stderr, "could not read input file: '%s'\n", + file_name.c_str()); + return 1; + } + State state(file_name, vespalib::stringref(file.data, file.size)); + if (state.function.has_error()) { + vespalib::string error_message = state.function.get_error(); + fprintf(stderr, "input file (%s) contains an illegal expression:\n%s\n", + file_name.c_str(), error_message.c_str()); + return 1; + } + fprintf(stderr, "analyzing expression file: '%s'\n", + file_name.c_str()); + state.report(); + return 0; +} + +int main(int argc, char **argv) { + MyApp my_app; + return my_app.Entry(argc, argv); +} + +//----------------------------------------------------------------------------- diff --git a/searchlib/src/forcelink.sh b/searchlib/src/forcelink.sh new file mode 100755 index 00000000000..b088b8363a5 --- /dev/null +++ b/searchlib/src/forcelink.sh @@ -0,0 +1,45 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +project=searchlib + +if [ X$1 == "Xupdate" ]; then + # update forcelink.hpp + echo "generating forcelink.hpp..." + guard=`pwd | sed -e "s|.*/${project}/||" -e "s|/|_|g"` + prefix=forcelink_file_${project}_${guard}_ + echo "#ifndef GUARD_${project}_${guard}_FORCELINK" > forcelink.hpp + echo "#define GUARD_${project}_${guard}_FORCELINK" >> forcelink.hpp + echo "" >> forcelink.hpp + find . -name "*.cpp" -maxdepth 1 | sed -e "s|.*/\(.*\)\.cpp|void ${prefix}\1();|" >> forcelink.hpp + echo "" >> forcelink.hpp + echo "void forcelink_${project}_${guard}() {" >> forcelink.hpp + find . -name "*.cpp" -maxdepth 1 | sed -e "s|.*/\(.*\)\.cpp| ${prefix}\1();|" >> forcelink.hpp + echo "}" >> forcelink.hpp + echo "" >> forcelink.hpp + echo "#endif" >> forcelink.hpp + echo "invoke 'forcelink_${project}_${guard}()' to force link this directory" + + # update .cpp files + for file in *.cpp; do + name=`echo "${prefix}${file}" | sed 's|\(.*\)\.cpp|\1|'` + found=`grep ${name} ${file} | wc -l` + if [ $found == "0" ]; then + echo "updating ${file}..." + echo "" >> $file + echo "// this function was added by $0" >> $file + echo "void ${name}() {}" >> $file + fi + done +else + echo "This is a small utility script that might help out when trying to" + echo "force the linkage of object files. When run in a subdirectory within" + echo "${project}, it will create a 'forcelink.hpp' file that contains the" + echo "force linkage wrapping code. It will also update any .cpp files in the" + echo "directory with appropriate dummy functions to allow consistent force" + echo "linkage. Note that this script will make a large" + echo "number of assumptions; USE AT YOUR OWN RISK!" + echo "" + echo "if you feel lucky, run:" + echo "$0 update" +fi diff --git a/searchlib/src/main/OWNERS b/searchlib/src/main/OWNERS new file mode 100644 index 00000000000..31af040f698 --- /dev/null +++ b/searchlib/src/main/OWNERS @@ -0,0 +1 @@ +bratseth diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java new file mode 100644 index 00000000000..b877a88fc8d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AggregationResult.java @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.ExpressionNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + *

This is the aggregation super-class from which all types of aggregation inherits.

+ * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class AggregationResult extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 80, AggregationResult.class); + private ExpressionNode expression = null; + private int tag = -1; + + /** + *

Returns the tag of this aggregation result. This is useful for uniquely identifying a result.

+ * + * @return The numerical tag. + */ + public int getTag() { + return tag; + } + + /** + *

Assigns a tag to this group.

+ * + * @param tag The numerical tag to set. + * @return This, to allow chaining. + */ + public AggregationResult setTag(int tag) { + this.tag = tag; + return this; + } + + /** + *

This method is called when merging aggregation results. This method is simply a proxy for the abstract {@link + * #onMerge(AggregationResult)} method.

+ * + * @param result The result to merge with. + */ + public void merge(AggregationResult result) { + onMerge(result); + } + + /** + *

This method is called when all aggregation results have been merged. This method can be overloaded by + * subclasses that need special behaviour to occur after merge.

+ */ + public void postMerge() { + // empty + } + + /** + *

This method returns a value that can be used for ranking.

+ * + * @return The rankable result. + */ + public abstract ResultNode getRank(); + + /** + *

Sets the expression to aggregate on.

+ * + * @param exp The expression. + * @return This, to allow chaining. + */ + public AggregationResult setExpression(ExpressionNode exp) { + expression = exp; + return this; + } + + /** + *

Returns the expression to aggregate on.

+ * + * @return The expression. + */ + public ExpressionNode getExpression() { + return expression; + } + + /** + *

This method must be implemented by subclasses to support merge. It is called as the {@link + * #merge(AggregationResult)} method is invoked.

+ * + * @param result The result to merge with. + */ + protected abstract void onMerge(AggregationResult result); + + @Override + public ResultNode getResult() { + return getRank(); + } + + @Override + public void onPrepare() { + + } + + @Override + public boolean onExecute() { + return true; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, expression); + buf.putInt(null, tag); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + expression = (ExpressionNode)deserializeOptional(buf); + tag = buf.getInt(null); + } + + @Override + public AggregationResult clone() { + AggregationResult obj = (AggregationResult)super.clone(); + if (expression != null) { + obj.expression = expression.clone(); + } + return obj; + } + + @Override + protected final boolean equalsExpression(ExpressionNode obj) { + AggregationResult rhs = (AggregationResult)obj; + if (!equals(expression, rhs.expression)) { + return false; + } + if (tag != rhs.tag) { + return false; + } + if (!equalsAggregation(rhs)) { + return false; + } + return true; + } + + protected abstract boolean equalsAggregation(AggregationResult obj); + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("expression", expression); + visitor.visit("tag", tag); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java new file mode 100644 index 00000000000..651ab192786 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/AverageAggregationResult.java @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.NumericResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the average of all results. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class AverageAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 85, AverageAggregationResult.class); + private NumericResultNode sum; + private long count; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public AverageAggregationResult() { + + } + + /** + * Constructs an instance of this class with given sum and count values. + * + * @param sum The initial sum to set. + * @param count The initial number of results. + */ + public AverageAggregationResult(NumericResultNode sum, long count) { + setSum(sum); + setCount(count); + } + + /** + * Returns the sum of all results in this. + * + * @return The numeric sum. + */ + public final NumericResultNode getSum() { + return sum; + } + + /** + * Sets the sum of all results in this. + * + * @param sum The sum to set. + * @return This, to allow chaining. + */ + public final AverageAggregationResult setSum(NumericResultNode sum) { + this.sum = sum; + return this; + } + + /** + * Returns the number of results in this. + * + * @return The number of results. + */ + public final long getCount() { + return count; + } + + /** + * Sets the number of results in this. + * + * @param count The number of results. + * @return This, to allow chaining. + */ + public final AverageAggregationResult setCount(long count) { + this.count = count; + return this; + } + + /** + * Returns the average value of the results. Because the result can be any numeric type, this method returns a + * {@link NumericResultNode} object. + * + * @return The average result value. + */ + public final NumericResultNode getAverage() { + NumericResultNode sum = (NumericResultNode)this.sum.clone(); + if (count != 0) { + sum.divide(new IntegerResultNode(count)); + } + return sum; + } + + @Override + public ResultNode getRank() { + return getAverage(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putLong(null, count); + serializeOptional(buf, sum); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + count = buf.getLong(null); + sum = (NumericResultNode)deserializeOptional(buf); + } + + @Override + protected void onMerge(AggregationResult result) { + sum.add(((AverageAggregationResult)result).sum); + count += ((AverageAggregationResult)result).count; + } + + @Override + public AverageAggregationResult clone() { + AverageAggregationResult obj = (AverageAggregationResult)super.clone(); + if (sum != null) { + obj.sum = (NumericResultNode)sum.clone(); + } + return obj; + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + AverageAggregationResult rhs = (AverageAggregationResult)obj; + if (!equals(sum, rhs.sum)) { + return false; + } + if (count != rhs.count) { + return false; + } + return true; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)count; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("sum", sum); + visitor.visit("count", count); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java new file mode 100644 index 00000000000..5f90c126115 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/CountAggregationResult.java @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the number of aggregated hits. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class CountAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 81, CountAggregationResult.class); + private long count = 0; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public CountAggregationResult() { + + } + + /** + * Constructs an instance of this class with given count value. + * + * @param count The initial number of hits. + */ + public CountAggregationResult(long count) { + setCount(count); + } + + /** + * Returns the number of aggregated hits. + * + * @return The count. + */ + public final long getCount() { + return count; + } + + /** + * Sets the number of aggregated hits. + * + * @param count The count. + * @return This, to allow chaining. + */ + public final CountAggregationResult setCount(long count) { + this.count = count; + return this; + } + + @Override + public ResultNode getRank() { + return new IntegerResultNode(count); + } + + @Override + protected void onMerge(AggregationResult result) { + count += ((CountAggregationResult)result).count; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putLong(null, count); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + count = buf.getLong(null); + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + return count == ((CountAggregationResult)obj).count; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)count; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("count", count); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java new file mode 100644 index 00000000000..d6c76087e4e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResult.java @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.aggregation.hll.*; +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the number of unique documents matching a given expression. + * + * @author bjorncs + */ +public class ExpressionCountAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 88, ExpressionCountAggregationResult.class); + private static final int UNDEFINED = -1; + + // The unique count estimator + private final UniqueCountEstimator> estimator; + // Sketch merger + private final SketchMerger sketchMerger = new SketchMerger(); + // The sketch used as basis for the unique count calculation. The sketch is populated with data by the search nodes. + private Sketch sketch; + // The estimated unique count. This value will not be serialized / deserialized. + private long estimatedUniqueCount = UNDEFINED; + + + /** + * Constructor used for deserialization. Will be instantiated with a default sketch. + */ + @SuppressWarnings("UnusedDeclaration") + public ExpressionCountAggregationResult() { + this(new SparseSketch(), new HyperLogLogEstimator()); + } + + /** + * Constructs an instance with a given sketch, sketch merger and unique count estimator. For test purposes. + * + * @param initialSketch The HLL sketch. + */ + public ExpressionCountAggregationResult(Sketch initialSketch, UniqueCountEstimator> estimator) { + this.sketch = initialSketch; + this.estimator = estimator; + } + + /** + * @return The unique count estimated by the HyperLogLog algorithm. + */ + public long getEstimatedUniqueCount() { + if (estimatedUniqueCount == UNDEFINED) { + updateEstimate(); + } + return estimatedUniqueCount; + } + + @Override + public ResultNode getRank() { + return new IntegerResultNode(getEstimatedUniqueCount()); + } + + @Override + protected void onMerge(AggregationResult result) { + ExpressionCountAggregationResult other = (ExpressionCountAggregationResult) result; + sketch = sketchMerger.merge(sketch, other.sketch); + // Any cached result should be invalidated. + estimatedUniqueCount = UNDEFINED; + } + + public Sketch getSketch() { + return sketch; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + sketch.serializeWithId(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + sketch = (Sketch) create(buf); + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + // obj is assumed to always be of correct type. + ExpressionCountAggregationResult other = (ExpressionCountAggregationResult) obj; + return sketch.equals(other.sketch); + } + + private void updateEstimate() { + estimatedUniqueCount = estimator.estimateCount(sketch); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("sketch", sketch); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + sketch.hashCode(); + return result; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java new file mode 100644 index 00000000000..8b0704eea9b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/FS4Hit.java @@ -0,0 +1,132 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.document.GlobalId; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This class represents a single hit from the fastserver4 backend + * + * @author Haavard Pettersen + */ +public class FS4Hit extends Hit { + + public static final int classId = registerClass(0x4000 + 95, FS4Hit.class); // shared with c++ + private int path = 0; + private GlobalId globalId = new GlobalId(new byte[GlobalId.LENGTH]); + private int distributionKey = -1; + + /** + * Constructs an empty result node. + */ + public FS4Hit() { + } + + /** + * Create a hit with the given path and document id. + * + * @param path The mangled search node path. + * @param globalId The local document id. + * @param rank The rank of this hit. + */ + public FS4Hit(int path, GlobalId globalId, double rank) { + this(path, globalId, rank, -1); + } + + /** + * Create a hit with the given path and document id. + * + * @param path The mangled search node path. + * @param globalId The local document id. + * @param rank The rank of this hit. + * @param distributionKey The doc stamp. + */ + public FS4Hit(int path, GlobalId globalId, double rank, int distributionKey) { + super(rank); + this.path = path; + this.globalId = globalId; + this.distributionKey = distributionKey; + } + + /** + * Obtain the (mangled) network path back to the search node returning this hit. + * + * @return The mangled search node path. + */ + public int getPath() { + return path; + } + + /** + * Obtain the global document id on the search node returning this hit. + * + * @return The global document id. + */ + public GlobalId getGlobalId() { + return globalId; + } + + /** + * Obtain the distribution key for the node producing this hit. + * + * @return distribution key + */ + public int getDistributionKey() { + return distributionKey; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, path); + buf.put(null, globalId.getRawId()); + buf.putInt(null, distributionKey); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + path = buf.getInt(null); + globalId = new GlobalId(buf.getBytes(null, GlobalId.LENGTH)); + distributionKey = buf.getInt(null); + } + + @Override + public int hashCode() { + return super.hashCode() + path + globalId.hashCode() + distributionKey; + } + + @SuppressWarnings({ "EqualsWhichDoesntCheckParameterClass", "RedundantIfStatement" }) + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + FS4Hit rhs = (FS4Hit)obj; + if (path != rhs.path) { + return false; + } + if (!globalId.equals(rhs.globalId)) { + return false; + } + if (distributionKey != rhs.distributionKey) { + return false; + } + return true; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("path", path); + visitor.visit("globalId", globalId.toString()); + visitor.visit("distributionKey", distributionKey); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java new file mode 100644 index 00000000000..ecbab688821 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/ForceLoad.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +/** + * FIXME: Really ugly hack to force class loading for subclasses of Identifiable. + * This should be fixed by doing the all class registration in a single place (similar to how its done in C++). + */ +public class ForceLoad { + + static { + String pkg = "com.yahoo.searchlib.aggregation"; + String[] classes = { + "XorAggregationResult", + "SumAggregationResult", + "Group", + "HitsAggregationResult", + "AggregationResult", + "FS4Hit", + "VdsHit", + "Grouping", + "Hit", + "ForceLoad", + "MinAggregationResult", + "GroupingLevel", + "MaxAggregationResult", + "CountAggregationResult", + "AverageAggregationResult", + "ExpressionCountAggregationResult", + "hll.SparseSketch", + "hll.NormalSketch", + "ForceLoad" + }; + com.yahoo.system.ForceLoad.forceLoad(pkg, classes); + } + + public static boolean forceLoad() { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java new file mode 100644 index 00000000000..03836d75efc --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Group.java @@ -0,0 +1,518 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.AggregationRefNode; +import com.yahoo.searchlib.expression.ExpressionNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.*; + +import java.util.*; + +public class Group extends Identifiable { + + public static final int classId = registerClass(0x4000 + 90, Group.class); + private static final ObjectPredicate REF_LOCATOR = new RefLocator(); + private List orderByIdx = new ArrayList(); + private List orderByExp = new ArrayList(); + private List aggregationResults = new ArrayList(); + private List children = new ArrayList(); + private ResultNode id = null; + private double rank; + private int tag = -1; + private SortType sortType = SortType.UNSORTED; + + /** + *

This tells you if the children are ranked by the pure relevance or by a more complex expression. That + * indicates if the rank score from the child can be used for ordering.

+ * + * @return True if it ranked by pure relevance. + */ + public boolean isRankedByRelevance() { + return orderByIdx.isEmpty(); + } + + /** + *

Merges the content of the given group into this. When this function returns, make sure to call {@link + * #postMerge(java.util.List, int, int)}.

+ * + * @param firstLevel The first level to merge. + * @param currentLevel The current level. + * @param rhs The group to merge with. + */ + public void merge(int firstLevel, int currentLevel, Group rhs) { + if (rhs.rank > rank) { + rank = rhs.rank; // keep highest rank + } + if (currentLevel >= firstLevel) { + for (int i = 0, len = aggregationResults.size(); i < len; ++i) { + aggregationResults.get(i).merge(rhs.aggregationResults.get(i)); + } + } + + ArrayList merged = new ArrayList(); + Iterator lhsChild = children.iterator(), rhsChild = rhs.children.iterator(); + if (lhsChild.hasNext() && rhsChild.hasNext()) { + Group lhsGroup = lhsChild.next(); + Group rhsGroup = rhsChild.next(); + for (; (lhsGroup != null) && (rhsGroup != null); ) { + int cmp = lhsGroup.getId().compareTo(rhsGroup.getId()); + if (cmp < 0) { + merged.add(lhsGroup); + lhsGroup = lhsChild.hasNext() ? lhsChild.next() : null; + } else if (cmp > 0) { + merged.add(rhsGroup); + rhsGroup = rhsChild.hasNext() ? rhsChild.next() : null; + } else { + lhsGroup.merge(firstLevel, currentLevel + 1, rhsGroup); + merged.add(lhsGroup); + lhsGroup = lhsChild.hasNext() ? lhsChild.next() : null; + rhsGroup = rhsChild.hasNext() ? rhsChild.next() : null; + } + } + if (lhsGroup != null) { + merged.add(lhsGroup); + } + if (rhsGroup != null) { + merged.add(rhsGroup); + } + } + while (lhsChild.hasNext()) { + merged.add(lhsChild.next()); + } + while (rhsChild.hasNext()) { + merged.add(rhsChild.next()); + } + children = merged; + } + + private void executeOrderBy() { + for (ExpressionNode node : orderByExp) { + node.prepare(); + node.execute(); + } + } + + /** + *

After merging, this method will prune all levels so that they do not exceed the configured maximum number of + * groups per level.

+ * + * @param levels The specs of all grouping levels. + * @param firstLevel The first level to merge. + * @param currentLevel The current level. + */ + public void postMerge(List levels, int firstLevel, int currentLevel) { + if (currentLevel >= firstLevel) { + for (AggregationResult result : aggregationResults) { + result.postMerge(); + } + for (ExpressionNode result : orderByExp) { + result.execute(); + } + } + if (currentLevel < levels.size()) { + int maxGroups = (int)levels.get(currentLevel).getMaxGroups(); + for (Group group : children) { + group.executeOrderBy(); + } + if (maxGroups >= 0 && children.size() > maxGroups) { + // prune groups + sortChildrenByRank(); + children = children.subList(0, maxGroups); + sortChildrenById(); + } + for (Group group : children) { + group.postMerge(levels, firstLevel, currentLevel + 1); + } + } + + } + + /** + *

Will sort the children by their id, if they are not sorted already.

+ */ + public void sortChildrenById() { + if (sortType == SortType.BYID) { + return; + } + Collections.sort(children, new Comparator() { + public int compare(Group lhs, Group rhs) { + return lhs.compareId(rhs); + } + }); + sortType = SortType.BYID; + } + + /** + *

Will sort the children by their rank, if they are not sorted already.

+ */ + public void sortChildrenByRank() { + if (sortType == SortType.BYRANK) { + return; + } + Collections.sort(children, new Comparator() { + public int compare(Group lhs, Group rhs) { + return lhs.compareRank(rhs); + } + }); + sortType = SortType.BYRANK; + } + + /** + *

Returns the label to use for this group. See comment on {@link #setId(com.yahoo.searchlib.expression.ResultNode)} + * on the rationale of this being a {@link ResultNode}.

+ * + * @return The label. + */ + public ResultNode getId() { + return id; + } + + /** + *

Sets the label to use for this group. This is a {@link ResultNode} so that a group can be labeled with + * whatever value the classifier expression returns.

+ * + * @param id The label to set. + * @return This, to allow chaining. + */ + public Group setId(ResultNode id) { + this.id = id; + return this; + } + + /** + *

Sets the relevancy to use for this group.

+ * + * @param rank The rank to set. + * @return This, to allow chaining. + */ + public Group setRank(double rank) { + this.rank = rank; + return this; + } + + /** + *

Return the relevancy of this group.

+ * + * @return Relevance. + */ + public double getRank() { + return rank; + } + + /** + *

Adds a child group to this.

+ * + * @param child The group to add. + * @return This, to allow chaining. + */ + public Group addChild(Group child) { + if (child == null) { + throw new IllegalArgumentException("Child can not be null."); + } + children.add(child); + return this; + } + + /** + *

Returns the list of child groups to this.

+ * + * @return The children. + */ + public List getChildren() { + return children; + } + + /** + *

Returns the tag of this group. This value is set per-level in the grouping request, and then becomes assigned + * to each group of that level in the grouping result as they are copied from the prototype.

+ * + * @return The numerical tag. + */ + public int getTag() { + return tag; + } + + /** + *

Assigns a tag to this group.

+ * + * @param tag The numerical tag to set. + * @return This, to allow chaining. + */ + public Group setTag(int tag) { + this.tag = tag; + return this; + } + + /** + *

Returns this group's aggregation results.

+ * + * @return The aggregation results. + */ + public List getAggregationResults() { + return aggregationResults; + } + + /** + *

Adds an aggregation result to this group.

+ * + * @param result The result to add. + * @return This, to allow chaining. + */ + public Group addAggregationResult(AggregationResult result) { + aggregationResults.add(result); + return this; + } + + /** + *

Adds an order-by expression to this group. If the expression is an AggregationResult, it will be added to the + * list of this group's AggregationResults, and a reference to that expression is added instead. If the + * AggregationResult is already present, a reference to THAT result is created instead.

+ * + * @param exp The result to add. + * @param asc True to sort ascending, false to sort descending. + * @return This, to allow chaining. + */ + public Group addOrderBy(ExpressionNode exp, boolean asc) { + if (exp instanceof AggregationResult) { + exp = new AggregationRefNode((AggregationResult)exp); + } + exp.select(REF_LOCATOR, new RefResolver(this)); + orderByExp.add(exp); + orderByIdx.add((asc ? 1 : -1) * orderByExp.size()); + return this; + } + + public List getOrderByIndexes() { + return Collections.unmodifiableList(orderByIdx); + } + + public List getOrderByExpressions() { + return Collections.unmodifiableList(orderByExp); + } + + private int compareId(Group rhs) { + return getId().compareTo(rhs.getId()); + } + + private int compareRank(Group rhs) { + long diff = 0; + for (int i = 0, m = orderByIdx.size(); (diff == 0) && (i < m); i++) { + int rawIndex = orderByIdx.get(i); + int index = ((rawIndex < 0) ? -rawIndex : rawIndex) - 1; + diff = orderByExp.get(index).getResult().compareTo(rhs.orderByExp.get(index).getResult()); + diff = diff * rawIndex; + } + if (diff < 0) { + return -1; + } + if (diff > 0) { + return 1; + } + if (rank > rhs.rank) { + return -1; + } + if (rank < rhs.rank) { + return 1; + } + return 0; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, id); + buf.putDouble(null, rank); + int sz = orderByIdx.size(); + buf.putInt(null, sz); + for (Integer index : orderByIdx) { + buf.putInt(null, index); + } + int numResults = aggregationResults.size(); + buf.putInt(null, numResults); + for (AggregationResult a : aggregationResults) { + serializeOptional(buf, a); + } + int numExpressionResults = orderByExp.size(); + buf.putInt(null, numExpressionResults); + for (ExpressionNode e : orderByExp) { + serializeOptional(buf, e); + } + int numGroups = children.size(); + buf.putInt(null, numGroups); + for (Group g : children) { + g.serializeWithId(buf); + } + buf.putInt(null, tag); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + id = (ResultNode)deserializeOptional(buf); + rank = buf.getDouble(null); + orderByIdx.clear(); + int orderByCount = buf.getInt(null); + for (int i = 0; i < orderByCount; i++) { + orderByIdx.add(buf.getInt(null)); + } + int numResults = buf.getInt(null); + for (int i = 0; i < numResults; i++) { + AggregationResult e = (AggregationResult)deserializeOptional(buf); + aggregationResults.add(e); + } + int numExpressionResults = buf.getInt(null); + RefResolver resolver = new RefResolver(this); + for (int i = 0; i < numExpressionResults; i++) { + ExpressionNode exp = (ExpressionNode)deserializeOptional(buf); + exp.select(REF_LOCATOR, resolver); + orderByExp.add(exp); + } + int numGroups = buf.getInt(null); + for (int i = 0; i < numGroups; i++) { + Group g = new Group(); + g.deserializeWithId(buf); + children.add(g); + } + tag = buf.getInt(null); + } + + @Override + public int hashCode() { + return super.hashCode() + aggregationResults.hashCode() + children.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + Group rhs = (Group)obj; + if (!equals(id, rhs.id)) { + return false; + } + if (rank != rhs.rank) { + return false; + } + if (!aggregationResults.equals(rhs.aggregationResults)) { + return false; + } + if (!orderByIdx.equals(rhs.orderByIdx)) { + return false; + } + if (!orderByExp.equals(rhs.orderByExp)) { + return false; + } + if (!children.equals(rhs.children)) { + return false; + } + return true; + } + + @Override + public Group clone() { + Group obj = (Group)super.clone(); + if (id != null) { + obj.id = (ResultNode)id.clone(); + } + obj.aggregationResults = new ArrayList(); + for (AggregationResult result : aggregationResults) { + obj.aggregationResults.add(result.clone()); + } + obj.orderByIdx = new ArrayList(); + for (Integer idx : orderByIdx) { + obj.orderByIdx.add(idx); + } + obj.orderByExp = new ArrayList(); + RefResolver resolver = new RefResolver(obj); + for (ExpressionNode exp : orderByExp) { + exp = exp.clone(); + exp.select(REF_LOCATOR, resolver); + obj.orderByExp.add(exp); + } + obj.children = new ArrayList(); + for (Group child : children) { + obj.children.add(child.clone()); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("id", id); + visitor.visit("rank", rank); + visitor.visit("aggregationresults", aggregationResults); + visitor.visit("orderby-idx", orderByIdx); + visitor.visit("orderby-exp", orderByExp); + visitor.visit("children", children); + visitor.visit("tag", tag); + } + + @Override + public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) { + for (AggregationResult result : aggregationResults) { + result.select(predicate, operation); + } + for (ExpressionNode exp : orderByExp) { + exp.select(predicate, operation); + } + } + + private static enum SortType { + UNSORTED, + BYRANK, + BYID + } + + private static class RefLocator implements ObjectPredicate { + + @Override + public boolean check(Object obj) { + return obj instanceof AggregationRefNode; + } + } + + private static class RefResolver implements ObjectOperation { + + final List results; + + RefResolver(Group group) { + this.results = group.aggregationResults; + } + + @Override + public void execute(Object obj) { + AggregationRefNode ref = (AggregationRefNode)obj; + int idx = ref.getIndex(); + if (idx < 0) { + AggregationResult res = ref.getExpression(); + idx = indexOf(res); + if (idx < 0) { + idx = results.size(); + results.add(res); + } + ref.setIndex(idx); + } else { + ref.setExpression(results.get(idx)); + } + } + + int indexOf(AggregationResult lhs) { + int prevTag = lhs.getTag(); + for (int i = 0, len = results.size(); i < len; ++i) { + AggregationResult rhs = results.get(i); + lhs.setTag(rhs.getTag()); + if (lhs.equals(rhs)) { + return i; + } + } + lhs.setTag(prevTag); + return -1; + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java new file mode 100644 index 00000000000..6e384e6e0b5 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Grouping.java @@ -0,0 +1,445 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.BucketResultNode; +import com.yahoo.searchlib.expression.NullResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.*; + +import java.util.ArrayList; +import java.util.List; + +public class Grouping extends Identifiable { + + // Force load all of expression and aggregation when using this class. + static { + com.yahoo.searchlib.aggregation.ForceLoad.forceLoad(); + com.yahoo.searchlib.expression.ForceLoad.forceLoad(); + } + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 91, Grouping.class); + + // The client id for this grouping request. + private int id = 0; + + // Whether or not this grouping is valid. + private boolean valid = true; + + // Whether or not to group all hits or only those with hits. Only applicable for streaming search. + private boolean all = false; + + // How many hits to group per backend node. + private long topN = -1; + + // The level to start grouping in backend. This also instantiates the next level, if any. + private int firstLevel = 0; + + // The last level to group in backend. + private int lastLevel = 0; + + private boolean forceSinglePass = false; + + // Details for each level except root. + private List groupingLevels = new ArrayList<>(); + + // Actual root group, does not require level details. + private Group root = new Group(); + + /** + *

Constructs an empty result node. NOTE: This instance is broken until non-optional member data is + * set.

+ */ + public Grouping() { + super(); + } + + /** + *

Constructs an instance of this class with given client id.

+ * + * @param id The client id for this grouping request. + */ + public Grouping(int id) { + super(); + setId(id); + } + + /** + *

Merges the content of the given grouping into this.

+ * + * @param rhs The grouping to merge with. + */ + public void merge(Grouping rhs) { + root.merge(firstLevel, 0, rhs.root); + } + + /** + *

This method is invoked after merging is done. It is intended used for resolving any dependencies or derivates + * that might have changes due to the merge.

+ */ + public void postMerge() { + root.postMerge(groupingLevels, firstLevel, 0); + } + + /** + *

Returns the client id of this grouping request.

+ * + * @return The identifier. + */ + public int getId() { + return id; + } + + /** + *

Sets the client id for this grouping request.

+ * + * @param id The identifier to set. + * @return This, to allow chaining. + */ + public Grouping setId(int id) { + this.id = id; + return this; + } + + /** + *

Returns whether or not this grouping request is valid.

+ * + * @return True if valid. + */ + public boolean valid() { + return valid; + } + + /** + *

Returns whether or not to perform grouping on the entire document corpus instead of only those matching the + * search criteria. Please see note on {@link #setAll(boolean)}.

+ * + * @return True if grouping all documents. + */ + public boolean getAll() { + return all; + } + + /** + *

Sets whether or not to perform grouping on the entire document corpus instead of only those matching the + * search criteria. NOTE: This is only possible with streaming search.

+ * + * @param all True to group all documents. + * @return This, to allow chaining. + */ + public Grouping setAll(boolean all) { + this.all = all; + return this; + } + + /** + *

Returns the number of candidate documents to group.

+ * + * @return The number. + */ + public long getTopN() { + return topN; + } + + /** + *

Sets the number of candidate documents to group.

+ * + * @param topN The number to set. + * @return This, to allow chaining. + */ + public Grouping setTopN(long topN) { + this.topN = topN; + return this; + } + + /** + *

Returns the first level to start grouping work. See note on {@link #setFirstLevel(int)}.

+ * + * @return The first level. + */ + public int getFirstLevel() { + return firstLevel; + } + + /** + *

Sets the first level to start grouping work. All the necessary work above this group level is expected to be + * already done.

+ * + * @param level The level to set. + * @return This, to allow chaining. + */ + public Grouping setFirstLevel(int level) { + firstLevel = level; + return this; + } + + /** + *

Returns the last level to do grouping work. See note on {@link #setLastLevel(int)}.

+ * + * @return The last level. + */ + public int getLastLevel() { + return lastLevel; + } + + /** + *

Sets the last level to do grouping work. Executing a level will instantiate the {@link Group} objects for the + * next level, if there is any. This means that grouping work ends at this level, but also instantiates the groups + * for level (lastLevel + 1).

+ * + * @param level The level to set. + * @return This, to allow chaining. + */ + public Grouping setLastLevel(int level) { + lastLevel = level; + return this; + } + + /** + *

Returns the list of grouping levels that make up this grouping request.

+ * + * @return The list. + */ + public List getLevels() { + return groupingLevels; + } + + /** + *

Appends the given grouping level specification to the list of levels.

+ * + * @param level The level to add. + * @return This, to allow chaining. + * @throws NullPointerException If level argument is null. + */ + public Grouping addLevel(GroupingLevel level) { + level.getClass(); // throws NullPointerException + groupingLevels.add(level); + return this; + } + + /** + *

Returns the root group.

+ * + * @return The root. + */ + public Group getRoot() { + return root; + } + + /** + *

Sets the root group.

+ * + * @param root The group to set as root. + * @return This, to allow chaining. + * @throws NullPointerException If root argument is null. + */ + public Grouping setRoot(Group root) { + root.getClass(); // throws NullPointerException + this.root = root; + return this; + } + + /** + *

Returns whether or not single pass execution of grouping is forced.

+ * + * @return True if single pass grouping is forced. + */ + public boolean getForceSinglePass() { + return forceSinglePass; + } + + /** + *

Sets whether or not grouping should be forced to execute in a single pass. If false, this Grouping + * might still execute in a single pass due to other constraints.

+ * + * @param forceSinglePass True to force execution in single pass. + * @return This, to allow chaining. + */ + public Grouping setForceSinglePass(boolean forceSinglePass) { + this.forceSinglePass = forceSinglePass; + return this; + } + + /** + *

Returns whether or not grouping should be executed in a single pass.

+ * + * @return True if grouping should be executed in a single pass. + */ + public boolean useSinglePass() { + return needDeepResultCollection() || getForceSinglePass(); + } + + /** + *

Tell if ordering will need results collected in children. in that case we will probably just do a single + * pass.

+ * + * @return If deeper resultcollection is needed. + */ + public boolean needDeepResultCollection() { + if (forceSinglePass) { + return true; + } + for (GroupingLevel level : groupingLevels) { + if (level.needResultCollection()) { + return true; + } + } + return false; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putInt(null, id); + byte tmp = valid ? (byte)1 : (byte)0; + buf.putByte(null, tmp); + tmp = all ? (byte)1 : (byte)0; + buf.putByte(null, tmp); + buf.putLong(null, topN); + buf.putInt(null, firstLevel); + buf.putInt(null, lastLevel); + buf.putInt(null, groupingLevels.size()); + for (GroupingLevel level : groupingLevels) { + level.serializeWithId(buf); + } + root.serializeWithId(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + id = buf.getInt(null); + byte tmp = buf.getByte(null); + valid = (tmp != 0); + tmp = buf.getByte(null); + all = (tmp != 0); + topN = buf.getLong(null); + firstLevel = buf.getInt(null); + lastLevel = buf.getInt(null); + int numLevels = buf.getInt(null); + for (int i = 0; i < numLevels; i++) { + GroupingLevel level = new GroupingLevel(); + level.deserializeWithId(buf); + groupingLevels.add(level); + } + root.deserializeWithId(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + id + (valid ? 66 : 99) + (all ? 666 : 999) + (int)topN + groupingLevels.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + Grouping rhs = (Grouping)obj; + if (id != rhs.id) { + return false; + } + if (valid != rhs.valid) { + return false; + } + if (all != rhs.all) { + return false; + } + if (topN != rhs.topN) { + return false; + } + if (firstLevel != rhs.firstLevel) { + return false; + } + if (lastLevel != rhs.lastLevel) { + return false; + } + if (!groupingLevels.equals(rhs.groupingLevels)) { + return false; + } + if (!root.equals(rhs.root)) { + return false; + } + return true; + } + + @Override + public Grouping clone() { + Grouping obj = (Grouping)super.clone(); + obj.groupingLevels = new ArrayList<>(); + for (GroupingLevel level : groupingLevels) { + obj.groupingLevels.add(level.clone()); + } + obj.root = root.clone(); + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("id", id); + visitor.visit("valid", valid); + visitor.visit("all", all); + visitor.visit("topN", topN); + visitor.visit("firstLevel", firstLevel); + visitor.visit("lastLevel", lastLevel); + visitor.visit("groupingLevels", groupingLevels); + visitor.visit("root", root); + } + + @Override + public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) { + selectGroups(predicate, operation, root, firstLevel, lastLevel, 0); + } + + public void unifyNull() { + class FindGroup implements ObjectPredicate { + + @Override + public boolean check(Object obj) { + return obj instanceof Group; + } + } + class UnifyNullGroupId implements ObjectOperation { + + @Override + public void execute(Object obj) { + Group group = (Group)obj; + ResultNode id = group.getId(); + if (id instanceof BucketResultNode && ((BucketResultNode)id).empty()) { + group.setId(new NullResultNode()); + } + } + } + selectMembers(new FindGroup(), new UnifyNullGroupId()); + } + + /** + *

This is a helper function to perform recursive traversal of all groups contained in this grouping object. It + * is invoked by the {@link #selectMembers(ObjectPredicate, ObjectOperation)} method and itself. This method will + * only evaluate the groups that belong to active levels.

+ * + * @param predicate The object predicate to evaluate. + * @param operation The operation to execute when the predicate is true. + * @param group The group to evaluate. + * @param first The first active level. + * @param last The last active level. + * @param current The level being evaluated. + */ + private static void selectGroups(ObjectPredicate predicate, ObjectOperation operation, + Group group, int first, int last, int current) + { + if (current > last) { + return; + } + if (current >= first) { + group.select(predicate, operation); + } + for (Group child : group.getChildren()) { + selectGroups(predicate, operation, child, first, last, current + 1); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java new file mode 100644 index 00000000000..7e10507a57a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/GroupingLevel.java @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.ExpressionNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +public class GroupingLevel extends Identifiable { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 93, GroupingLevel.class); + + // The maximum number of groups allowed at this level. + private long maxGroups = -1; + + // The precsicion used for estimation. This is number of groups returned up when using orderby that need more info to get it correct. + private long precision = -1; + + // The classifier expression; the result of this is the group key. + private ExpressionNode classify = null; + + // The prototype of the groups to create for each class. + private Group collect = new Group(); + + /** + *

Returns the presicion (i.e number of groups) returned up from this level.

+ * + * @return The precision. + */ + public long getPrecision() { + return precision; + } + + /** + *

Returns the maximum number of groups allowed at this level.

+ * + * @return The maximum number. + */ + public long getMaxGroups() { + return maxGroups; + } + + /** + *

Sets the maximum number of groups allowed at this level.

+ * + * @param max The maximum number to set. + * @return This, to allow chaining. + */ + public GroupingLevel setMaxGroups(long max) { + maxGroups = max; + if (precision < maxGroups) { + precision = maxGroups; + } + return this; + } + + /** + *

Sets the presicion (i.e number of groups) returned up from this level.

+ * + * @param precision The precision to set. + * @return This, to allow chaining. + */ + public GroupingLevel setPrecision(long precision) { + this.precision = precision; + return this; + } + + /** + *

Returns the expression used to classify hits into groups.

+ * + * @return The classifier expression. + */ + public ExpressionNode getExpression() { + return classify; + } + + /** + *

Sets the expression used to classify hits into groups.

+ * + * @param exp The classifier expression to set. + * @return This, to allow chaining. + */ + public GroupingLevel setExpression(ExpressionNode exp) { + classify = exp; + return this; + } + + /** + *

Sets the prototype to use when creating groups at this level.

+ * + * @param group The group prototype. + * @return This, to allow chaining. + */ + public GroupingLevel setGroupPrototype(Group group) { + this.collect = group; + return this; + } + + /** + *

Returns the prototype to use when creating groups at this level.

+ * + * @return The group prototype. + */ + public Group getGroupPrototype() { + return collect; + } + + /** + *

Tell if ordering will need results collected in children.

+ * + * @return If deeper resultcollection is needed. + */ + public boolean needResultCollection() { + return !collect.isRankedByRelevance(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putLong(null, maxGroups); + buf.putLong(null, precision); + serializeOptional(buf, classify); + collect.serializeWithId(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + maxGroups = buf.getLong(null); + precision = buf.getLong(null); + classify = (ExpressionNode)deserializeOptional(buf); + collect.deserializeWithId(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + (int)maxGroups + (int)precision + collect.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + GroupingLevel rhs = (GroupingLevel)obj; + if (maxGroups != rhs.maxGroups) { + return false; + } + if (precision != rhs.precision) { + return false; + } + if (!equals(classify, rhs.classify)) { + return false; + } + if (!collect.equals(rhs.collect)) { + return false; + } + return true; + } + + @Override + public GroupingLevel clone() { + GroupingLevel obj = (GroupingLevel)super.clone(); + if (classify != null) { + obj.classify = classify.clone(); + } + obj.collect = collect.clone(); + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("maxGroups", maxGroups); + visitor.visit("precision", precision); + visitor.visit("classify", classify); + visitor.visit("collect", collect); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java new file mode 100644 index 00000000000..8c5db8a6ecc --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/Hit.java @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This class represents a generic hit with a rank value. Actual hits are represented using subclasses of this class. + * + * @author Haavard Pettersen + */ +public abstract class Hit extends Identifiable { + + public static final int classId = registerClass(0x4000 + 94, Hit.class); // shared with c++ + private Object context = null; + private double rank = 0.0; + + /** + * Constructs an empty result node. + */ + public Hit() { + // empty + } + + /** + * Create a new hit with the given rank + * + * @param rank generic rank value + */ + public Hit(double rank) { + this.rank = rank; + } + + /** + * Obtain the rank of this hit. This is a comparable rank to allow multilevel sorting on arbitrary rank type. + * + * @return generic rank value + */ + public double getRank() { + return rank; + } + + /** + * Returns the context object of this hit. + * + * @return The context object. + */ + public Object getContext() { + return context; + } + + /** + * Sets the context object of this hit. This is not serialized, and is merely a tag used by the QRS. + * + * @param context The context to set. + * @return This, to allow chaining. + */ + public Hit setContext(Object context) { + this.context = context; + return this; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putDouble(null, rank); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + rank = buf.getDouble(null); + } + + @Override + public int hashCode() { + return super.hashCode() + (int)rank; + } + + @SuppressWarnings({ "RedundantIfStatement", "EqualsWhichDoesntCheckParameterClass" }) + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + Hit rhs = (Hit)obj; + if (rank != rhs.rank) { + return false; + } + if (!equals(context, rhs.context)) { + return false; + } + return true; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("rank", rank); + visitor.visit("context", context); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java new file mode 100644 index 00000000000..6d5d95bbcc0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/HitsAggregationResult.java @@ -0,0 +1,218 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.FloatResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.*; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * This is an aggregated result holding the top n hits for a single group. + * + * @author Haavard Pettersen + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class HitsAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 87, HitsAggregationResult.class); + private String summaryClass = "default"; + private int maxHits = -1; + private List hits = new ArrayList(); + + /** + * Constructs an empty result node. + */ + public HitsAggregationResult() { + // empty + } + + /** + * Create a hits aggregation result that will collect the given number of hits + * + * @param maxHits maximum number of hits to collect + */ + public HitsAggregationResult(int maxHits) { + this.maxHits = maxHits; + } + + /** + * Create a hits aggregation result that will collect the given number of hits of the summaryClass asked. + * + * @param maxHits maximum number of hits to collect + * @param summaryClass SummaryClass to use for hits to collect + */ + public HitsAggregationResult(int maxHits, String summaryClass) { + this.summaryClass = summaryClass; + this.maxHits = maxHits; + } + + /** + * Obtain the summary class used to collect the hits. + * + * @return The summary class id. + */ + public String getSummaryClass() { + return summaryClass; + } + + /** + * Obtain the maximum number of hits to collect. + * + * @return Max number of hits to collect. + */ + public int getMaxHits() { + return maxHits; + } + + /** + * Sets the summary class of hits to collect. + * + * @param summaryClass The summary class to collect. + * @return This, to allow chaining. + */ + public HitsAggregationResult setSummaryClass(String summaryClass) { + this.summaryClass = summaryClass; + return this; + } + + /** + * Sets the maximum number of hits to collect. + * + * @param maxHits The number of hits to collect. + * @return This, to allow chaining. + */ + public HitsAggregationResult setMaxHits(int maxHits) { + this.maxHits = maxHits; + return this; + } + + /** + * Obtain the hits collected by this aggregation result + * + * @return collected hits + */ + public List getHits() { + return hits; + } + + /** + * Add a hit to this aggregation result + * + * @param h the hit + * @return this object + */ + public HitsAggregationResult addHit(Hit h) { + hits.add(h); + return this; + } + + @Override + public ResultNode getRank() { + if (hits.isEmpty()) { + return new FloatResultNode(0); + } + return new FloatResultNode(hits.get(0).getRank()); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + byte[] raw = Utf8.toBytes(summaryClass); + buf.putInt(null, raw.length); + buf.put(null, raw); + + buf.putInt(null, maxHits); + int numHits = hits.size(); + buf.putInt(null, numHits); + for (Hit h : hits) { + serializeOptional(buf, h); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + summaryClass = getUtf8(buf); + maxHits = buf.getInt(null); + int numHits = buf.getInt(null); + for (int i = 0; i < numHits; i++) { + Hit h = (Hit)deserializeOptional(buf); + hits.add(h); + } + } + + @Override + protected void onMerge(AggregationResult result) { + hits.addAll(((HitsAggregationResult)result).hits); + } + + @Override + public void postMerge() { + Collections.sort(hits, new Comparator() { + public int compare(Hit lhs, Hit rhs) { + return (lhs.getRank() > rhs.getRank()) ? -1 : (lhs.getRank() < rhs.getRank()) ? 1 : 0; + } + }); + if ((maxHits >= 0) && (hits.size() > maxHits)) { + hits = hits.subList(0, maxHits); + } + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + HitsAggregationResult rhs = (HitsAggregationResult)obj; + if (!summaryClass.equals(rhs.summaryClass)) { + return false; + } + if (maxHits != rhs.maxHits) { + return false; + } + if (!hits.equals(rhs.hits)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + return super.hashCode() + summaryClass.hashCode() + maxHits + hits.hashCode(); + } + + @Override + public HitsAggregationResult clone() { + HitsAggregationResult obj = (HitsAggregationResult)super.clone(); + obj.summaryClass = summaryClass; + obj.maxHits = maxHits; + obj.hits = new ArrayList(); + for (Hit hit : hits) { + obj.hits.add((Hit)hit.clone()); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("summaryClass", summaryClass); + visitor.visit("maxHits", maxHits); + visitor.visit("hits", hits); + } + + @Override + public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) { + for (Hit hit : hits) { + hit.select(predicate, operation); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java new file mode 100644 index 00000000000..dba44dcf023 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MaxAggregationResult.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.searchlib.expression.SingleResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the maximum result of the matching hits. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MaxAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 83, MaxAggregationResult.class); + private SingleResultNode max; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public MaxAggregationResult() { + + } + + /** + * Constructs an instance of this class with given max value. + * + * @param max The initial maximum to set. + */ + public MaxAggregationResult(SingleResultNode max) { + setMax(max); + } + + /** + * Returns the maximum value found in all matching hits. + * + * @return The value. + */ + public final SingleResultNode getMax() { + return max; + } + + /** + * Sets the maximum value found in all matching hits. + * + * @param max The value. + * @return This, to allow chaining. + */ + public final MaxAggregationResult setMax(SingleResultNode max) { + this.max = max; + return this; + } + + @Override + public ResultNode getRank() { + return max; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, max); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + max = (SingleResultNode)deserializeOptional(buf); + } + + @Override + protected void onMerge(AggregationResult result) { + max.max(((MaxAggregationResult)result).max); + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + return equals(max, ((MaxAggregationResult)obj).max); + } + + @Override + public MaxAggregationResult clone() { + MaxAggregationResult obj = (MaxAggregationResult)super.clone(); + if (max != null) { + obj.max = (SingleResultNode)max.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("max", max); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java new file mode 100644 index 00000000000..ca8c71e6ede --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/MinAggregationResult.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.searchlib.expression.SingleResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the minimum result of the matching hits. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MinAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 84, MinAggregationResult.class); + private SingleResultNode min; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public MinAggregationResult() { + + } + + /** + * Constructs an instance of this class with given min value. + * + * @param min The initial minimum to set. + */ + public MinAggregationResult(SingleResultNode min) { + setMin(min); + } + + /** + * Returns the minimum value found in all matching hits. + * + * @return The value. + */ + public final SingleResultNode getMin() { + return min; + } + + /** + * Sets the minimum value found in all matching hits. + * + * @param min The value. + * @return This, to allow chaining. + */ + public final MinAggregationResult setMin(SingleResultNode min) { + this.min = min; + return this; + } + + @Override + public ResultNode getRank() { + return min; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, min); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + min = (SingleResultNode)deserializeOptional(buf); + } + + @Override + protected void onMerge(AggregationResult result) { + min.min(((MinAggregationResult)result).min); + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + return equals(min, ((MinAggregationResult)obj).min); + } + + @Override + public MinAggregationResult clone() { + MinAggregationResult obj = (MinAggregationResult)super.clone(); + if (min != null) { + obj.min = (SingleResultNode)min.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("min", min); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java new file mode 100755 index 00000000000..7c9dd33477b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/RawData.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.Arrays; + +/** + *

This class encapsulates a byte array into a cloneable and comparable object. It also implements a sane {@link + * #hashCode()} and {@link #toString()}.

+ * + * @author Simon Thoresen + */ +public class RawData implements Cloneable, Comparable { + + private byte[] data; + + /** + *

Constructs an empty data object.

+ */ + public RawData() { + data = new byte[0]; + } + + /** + *

Constructs a raw data object that holds the given byte array.

+ * + * @param data The rank to set. + */ + public RawData(byte[] data) { + setData(data); + } + + /** + *

Serializes the content of this data into the given byte buffer.

+ * + * @param buf The buffer to serialize to. + */ + public void serialize(Serializer buf) { + buf.putInt(null, data.length); + buf.put(null, data); + } + + /** + *

Deserializes the content for this data from the given byte buffer.

+ * + * @param buf The buffer to deserialize from. + */ + public void deserialize(Deserializer buf) { + int len = buf.getInt(null); + data = buf.getBytes(null, len); + } + + /** + *

Returns the byte array that constitutes this data.

+ * + * @return The byte array. + */ + public byte[] getData() { + return data; + } + + /** + *

Sets the byte array that constitutes this data. This does not copy the given array, it simply assigns + * it to this.

+ * + * @param data The data to set. + * @return This, to allow chaining. + */ + public RawData setData(byte[] data) { + if (data == null) { + throw new IllegalArgumentException("Data can not be null."); + } + this.data = data; + return this; + } + + @Override + public int compareTo(RawData rhs) { + return compare(data, rhs.data); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof RawData)) { + return false; + } + RawData rhs = (RawData)obj; + if (!Arrays.equals(data, rhs.data)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } + + @Override + public String toString() { + return "RawData(data = " + Arrays.toString(data) + ")"; + } + + @Override + public Object clone() { + return new RawData(Arrays.copyOf(data, data.length)); + } + + /** + *

Implements comparison of two byte arrays.

+ * + * @param lhs The left-hand-side of the comparison. + * @param rhs The right-hand-side of the comparison. + * @return The result of comparing the two byte arrays. + */ + public static int compare(byte[] lhs, byte[] rhs) { + int cmp = 0; + for (int i = 0, len = Math.min(lhs.length, rhs.length); (i < len) && (cmp == 0); i++) { + int a = lhs[i] & 0xFF; + int b = rhs[i] & 0xFF; + cmp = a - b; + } + if (cmp == 0) { + cmp = lhs.length - rhs.length; + } + return cmp; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java new file mode 100644 index 00000000000..88e61d98ba0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/SumAggregationResult.java @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.searchlib.expression.SingleResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the sum of the aggregating expression for all matching hits. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class SumAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 82, SumAggregationResult.class); + private SingleResultNode sum; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public SumAggregationResult() { + + } + + /** + * Constructs an instance of this class with given sum. + * + * @param sum The initial sum to set. + */ + public SumAggregationResult(SingleResultNode sum) { + setSum(sum); + } + + /** + * Returns the sum of all results in this. + * + * @return The numeric sum. + */ + public final SingleResultNode getSum() { + return sum; + } + + /** + * Sets the sum of all results in this. + * + * @param sum The sum to set. + * @return This, to allow chaining. + */ + public final SumAggregationResult setSum(SingleResultNode sum) { + this.sum = sum; + return this; + } + + @Override + public ResultNode getRank() { + return sum; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, sum); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + sum = (SingleResultNode)deserializeOptional(buf); + } + + @Override + protected void onMerge(AggregationResult result) { + sum.add(((SumAggregationResult)result).sum); + } + + @Override + public SumAggregationResult clone() { + SumAggregationResult obj = (SumAggregationResult)super.clone(); + if (sum != null) { + obj.sum = (SingleResultNode)sum.clone(); + } + return obj; + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + return equals(sum, ((SumAggregationResult)obj).sum); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("sum", sum); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java new file mode 100644 index 00000000000..adecdee8401 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/VdsHit.java @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +public class VdsHit extends Hit { + + public static final int classId = registerClass(0x4000 + 96, VdsHit.class); + private String docId = ""; + private RawData summary = new RawData(); + + @SuppressWarnings("UnusedDeclaration") + public VdsHit() { + // user by deserializer + } + + /** + * Create a hit with the given path and document id. + * + * @param summary The summary blob standard fs4 coding. + * @param docId The local document id. + * @param rank The rank of this hit. + */ + public VdsHit(String docId, byte[] summary, double rank) { + super(rank); + this.docId = docId; + this.summary = new RawData(summary); + } + + /** + * Obtain the summary blob for this hit. + * + * @return The summary blob. + */ + public RawData getSummary() { + return summary; + } + + /** + * Obtain the local document id of this hit. + * + * @return The local document id. + */ + public String getDocId() { + return docId; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + byte[] utf8 = Utf8.toBytes(docId); + buf.putInt(null, utf8.length); + buf.put(null, utf8); + summary.serialize(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + docId = getUtf8(buf); + summary.deserialize(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + docId.hashCode() + summary.hashCode(); + } + + @Override + public boolean equals(Object obj) { + VdsHit rhs = (VdsHit)obj; + return super.equals(obj) && + docId.equals(rhs.docId) && + summary.equals(rhs.summary); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("docId", docId); + visitor.visit("summary", summary); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java new file mode 100644 index 00000000000..ee171be0c4b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/XorAggregationResult.java @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.ResultNode; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an aggregated result holding the xor of the aggregating expression for all matching hits. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class XorAggregationResult extends AggregationResult { + + public static final int classId = registerClass(0x4000 + 86, XorAggregationResult.class); + private long xor = 0; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public XorAggregationResult() { + + } + + /** + * Constructs an instance of this class with given xor value. + * + * @param xor The initial xor value to set. + */ + public XorAggregationResult(long xor) { + setXor(xor); + } + + /** + * Returns the current xor value. + * + * @return The value. + */ + public long getXor() { + return xor; + } + + /** + * Sets the current xor value. + * + * @param xor The value to set. + * @return This, to allow chaining. + */ + public XorAggregationResult setXor(long xor) { + this.xor = xor; + return this; + } + + @Override + public ResultNode getRank() { + return new IntegerResultNode(xor); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putLong(null, xor); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + xor = buf.getLong(null); + } + + @Override + protected void onMerge(AggregationResult result) { + xor = xor ^ ((XorAggregationResult)result).xor; + } + + @Override + protected boolean equalsAggregation(AggregationResult obj) { + return xor == ((XorAggregationResult)obj).xor; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)xor; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("xor", xor); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java new file mode 100644 index 00000000000..54651bdfae4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/BiasEstimator.java @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.google.common.base.Preconditions; + +import java.util.Arrays; + +/** + * Performs bias correction for a given precision and raw estimate. + * The values are taken from Google's HLL++ paper: + * https://docs.google.com/document/d/1gyjfMHy43U9OWBXxfaeG-3MjGzejW1dlpyMwEYAAWEI/view?fullscreen# + * + * @author bjorncs + */ +public class BiasEstimator { + // Raw estimate data for given precision + private final double[] rawEstimateData; + // Raw bias data for a given precision + private final double[] biasData; + + /** + * Constructs the BiasEstimator for a given HLL precision. + * + * @param precision HLL precision + */ + public BiasEstimator(int precision) { + Preconditions.checkArgument(precision >= 4 && precision <= 18, + "Invalid precision: %s. Expected 4 <= precision <= 18.", precision); + this.rawEstimateData = rawEstimateDataAllPrecisions[precision - 4]; + this.biasData = biasDataAllPrecisions[precision - 4]; + } + + /** + * Maps a given raw estimate to a bias correction value. The callee should subtract the bias from the raw estimate + * to get a bias corrected HLL estimate. Uses linear interpolation when no exact value exist. + * + * @param rawEstimate The raw HLL estimate + * @return The estimated bias for the given raw estimate. + */ + public double estimateBias(double rawEstimate) { + int index = Arrays.binarySearch(rawEstimateData, rawEstimate); + // Check if the value is in rawEstimate or not. + if (index >= 0) { + return biasData[index]; + } else { + int insertionIndex = -index - 1; + if (insertionIndex == 0) { + return biasData[0]; + } else if (insertionIndex == biasData.length) { + return biasData[biasData.length - 1]; + } else { + //Perform linear interpolation + double x0 = rawEstimateData[insertionIndex - 1]; + double x1 = rawEstimateData[insertionIndex]; + double f0 = biasData[insertionIndex - 1]; + double f1 = biasData[insertionIndex]; + return linearInterpolationOf(x0, x1, f0, f1, rawEstimate); + } + } + } + + private static double linearInterpolationOf(double x0, double x1, double f0, double f1, double x) { + return f0 + (f1 - f0) / (x1 - x0) * (x - x0); + } + + private static final double[][] rawEstimateDataAllPrecisions = { + // precision 4 + { 11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, }, + // precision 5 + { 23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852, }, + // precision 6 + { 46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858, }, + // precision 7 + { 92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102, }, + // precision 8 + { 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, }, + // precision 9 + { 369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768, }, + // precision 10 + { 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, }, + // precision 11 + { 1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176, }, + // precision 12 + { 2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, }, + // precision 13 + { 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, }, + // precision 14 + { 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, }, + // precision 15 + { 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, }, + // precision 16 + { 47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542, }, + // precision 17 + { 94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845, }, + // precision 18 + { 189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, } + }; + + private static final double[][] biasDataAllPrecisions = { + // precision 4 + { 10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606, }, + // precision 5 + { 22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014, }, + // precision 6 + { 45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983, }, + // precision 7 + { 91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, }, + // precision 8 + { 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996, }, + // precision 9 + { 368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997, }, + // precision 10 + { 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041, }, + // precision 11 + { 1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993, }, + // precision 12 + { 2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, }, + // precision 13 + { 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972, }, + // precision 14 + { 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, }, + // precision 15 + { 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005, }, + // precision 16 + { 47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, }, + // precision 17 + { 94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028, }, + // precision 18 + { 189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892, } + }; +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java new file mode 100644 index 00000000000..20beb642fde --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLog.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +/** + * Contain constants relevant for HyperLogLog classes. + * + * @author bjorncs + */ +public interface HyperLogLog { + /** + * Default HLL precision. + */ + int DEFAULT_PRECISION = 10; + /** + * Threshold to convert sparse sketch to normal sketch. + */ + int SPARSE_SKETCH_CONVERSION_THRESHOLD = 256; +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java new file mode 100644 index 00000000000..7055686a4c0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimator.java @@ -0,0 +1,172 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.google.common.base.Preconditions; + +/** + * Implementation of the result computation phase of the HyperLogLog algorithm. + * Based on the pseudo code from: http://www.dmtcs.org/dmtcs-ojs/index.php/proceedings/article/viewArticle/914 + * + * @author bjorncs + */ +public class HyperLogLogEstimator implements UniqueCountEstimator> { + + // Number of buckets in sketch. + private final int nBuckets; + // The bias estimator used to bias correct the raw estimate. + private final BiasEstimator biasEstimator; + // Linear counting threshold. Linear counting will only be used if raw estimate is equal or below this threshold. + private final int linearCountingThreshold; + // A bias correcting constant used in calculation of raw estimate. + private final double alphaCoefficient; + + /** + * Creates the estimator for a given precision. The resulting memory consumption is the exponential to the precision. + * + * @param precision The precision parameter as defined in HLL algorithm. + */ + public HyperLogLogEstimator(int precision) { + Preconditions.checkArgument(precision >= 4 && precision <= 18, "Invalid precision: %s.", precision); + this.nBuckets = 1 << precision; + this.biasEstimator = new BiasEstimator(precision); + this.linearCountingThreshold = getLinearCountingThreshold(precision); + this.alphaCoefficient = getAlphaCoefficient(nBuckets); + } + + + /** + * Creates the estimator with the default precision ({@link HyperLogLog#DEFAULT_PRECISION}. + */ + public HyperLogLogEstimator() { + this(HyperLogLog.DEFAULT_PRECISION); + } + + /** + * Estimates the number of unique elements. + * + * @param sketch A sketch populated with values from the aggregation phase of HLL. + * @return The estimated number of unique elements. + */ + @Override + public long estimateCount(Sketch sketch) { + if (sketch instanceof NormalSketch) { + return estimateCount((NormalSketch) sketch); + } else { + return estimateCount((SparseSketch) sketch); + } + } + + // The sparse sketch contains a set of unique hash values. The size of this set is a good estimator as the + // probability for hash collision is very low. + private long estimateCount(SparseSketch sketch) { + return sketch.size(); + } + + + // Performs the result calculation phase of HLL. Note that the {@link NormalSketch} + // precision must match the one supplied in the constructor. + private long estimateCount(NormalSketch sketch) { + Preconditions.checkArgument(sketch.size() == nBuckets, + "Sketch has invalid size. Expected %s, actual %s.", nBuckets, sketch.size()); + double rawEstimate = calculateRawEstimate(sketch); + if (shouldPerformBiasCorrection(rawEstimate)) { + rawEstimate -= biasEstimator.estimateBias(rawEstimate); + } + + // Use linear counting if sketch contains buckets with 0 value. + int nZeroBuckets = countZeroBuckets(sketch); + if (nZeroBuckets > 0) { + double linearCountingEstimate = calculateLinearCountingEstimate(nZeroBuckets); + if (linearCountingEstimate <= linearCountingThreshold) { + rawEstimate = linearCountingEstimate; + } + } + + return Math.round(rawEstimate); + } + + private double calculateLinearCountingEstimate(int nZeroBuckets) { + return nBuckets * Math.log(nBuckets / (double) nZeroBuckets); + } + + private boolean shouldPerformBiasCorrection(double rawEstimate) { + return rawEstimate <= 5 * nBuckets; + } + + private double calculateRawEstimate(NormalSketch sketch) { + double indicator = calculateIndicator(sketch); + return alphaCoefficient * nBuckets * nBuckets * indicator; + } + + // Calculates the raw indicator, summing up the probabilities for each bucket. + // indicator == 1 / sum(2^(-S[i]) where i = 0 to n + private static double calculateIndicator(NormalSketch sketch) { + double sum = 0; + for (byte prefixLength : sketch.data()) { + sum += Math.pow(2, -prefixLength); + } + return 1 / sum; + } + + private static int countZeroBuckets(NormalSketch sketch) { + int nZeroBuckets = 0; + for (byte prefixLength : sketch.data()) { + if (prefixLength == 0) { + ++nZeroBuckets; + } + } + return nZeroBuckets; + } + + // Empirically determined values from Google HLL++ paper. Decides whether to use linear counting instead of raw HLL estimate. + private static int getLinearCountingThreshold(int precision) { + switch (precision) { + case 4: + return 10; + case 5: + return 20; + case 6: + return 40; + case 7: + return 80; + case 8: + return 220; + case 9: + return 400; + case 10: + return 900; + case 11: + return 1800; + case 12: + return 3100; + case 13: + return 6500; + case 14: + return 11500; + case 15: + return 22000; + case 16: + return 50000; + case 17: + return 120000; + case 18: + return 350000; + default: + // Unreachable code. + throw new RuntimeException(); + } + } + + private static double getAlphaCoefficient(int nBuckets) { + switch (nBuckets) { + case 16: + return 0.673; + case 32: + return 0.697; + case 64: + return 0.709; + default: /* nBuckets >= 128 */ + return 0.7213 / (1 + 1.079 / nBuckets); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java new file mode 100644 index 00000000000..c91f1e82a3b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/NormalSketch.java @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.google.common.base.Preconditions; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; +import net.jpountz.lz4.LZ4Compressor; +import net.jpountz.lz4.LZ4Exception; +import net.jpountz.lz4.LZ4Factory; +import net.jpountz.lz4.LZ4FastDecompressor; + +import java.util.Arrays; + +/** + * Sketch used by the HyperLogLog (HLL) algorithm. + * + * @author bjorncs + */ +public class NormalSketch extends Sketch { + + public static final int classId = registerClass(0x4000 + 170, NormalSketch.class); + + private final byte[] data; + private final int bucketMask; + + /** + * Create a sketch with the default precision given by {@link HyperLogLog#DEFAULT_PRECISION}. + * */ + public NormalSketch() { + this(HyperLogLog.DEFAULT_PRECISION); + } + + /** + * Create a sketch with a given HLL precision parameter. + * + * @param precision The precision parameter used by HLL. Determines the size of the sketch. + */ + public NormalSketch(int precision) { + this.data = new byte[1 << precision]; + this.bucketMask = (1 << precision) - 1; // A mask where the lowest `precision` bits are 1. + } + + /** + * Lossless merge of sketches. Performs a pairwise maximum on the underlying data array. + * + * @param other Other sketch + */ + @Override + public void merge(NormalSketch other) { + Preconditions.checkArgument(data.length == other.data.length, + "Trying to merge sketch with one of different size. Expected %s, actual %s", data.length, other.data.length); + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Math.max(data[i], other.data[i]); + } + } + + /** + * Aggregates the hash values. + * + * @param hashValues Provides an iterator for the hash values + */ + @Override + public void aggregate(Iterable hashValues) { + for (int hash : hashValues) { + aggregate(hash); + } + } + + /** + * Aggregates the hash value. + * + * @param hash Hash value. + */ + @Override + public void aggregate(int hash) { + int existingValue = data[hash & bucketMask]; + int newValue = Integer.numberOfLeadingZeros(hash | bucketMask) + 1; + data[hash & bucketMask] = (byte) Math.max(newValue, existingValue); + } + + /** + * Serializes the Sketch. + * + * Serialization format + * ================== + * Original size: 4 bytes + * Compressed size: 4 bytes + * Compressed data: N * 1 bytes + * + * Invariant: + * compressed size <= original size + * + * Special case: + * compressed size == original size => data is uncompressed + * + * @param buf Serializer + */ + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, data.length); + try { + LZ4Compressor c = LZ4Factory.safeInstance().highCompressor(); + byte[] compressedData = new byte[data.length]; + int compressedSize = c.compress(data, compressedData); + serializeDataArray(compressedData, compressedSize, buf); + } catch (LZ4Exception e) { + // LZ4Compressor.compress will throw this exception if it is unable to compress + // into compressedData (when compressed size >= original size) + serializeDataArray(data, data.length, buf); + } + } + + private static void serializeDataArray(byte[] source, int length, Serializer buf) { + buf.putInt(null, length); + for (int i = 0; i < length; i++) { + buf.putByte(null, source[i]); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int length = buf.getInt(null); + int compressedLength = buf.getInt(null); + Preconditions.checkState(length == data.length, + "Size of serialized sketch does not match expected value. Expected %s, actual %s.", data.length, length); + + if (length == compressedLength) { + deserializeDataArray(data, length, buf); + } else { + LZ4FastDecompressor c = LZ4Factory.safeInstance().fastDecompressor(); + byte[] compressedData = buf.getBytes(null, compressedLength); + c.decompress(compressedData, data); + } + } + + private static void deserializeDataArray(byte[] destination, int length, Deserializer buf) { + for (int i = 0; i < length; i++) { + destination[i] = buf.getByte(null); + } + } + + /** + * Returns the underlying byte array backing the sketch. + * + * @return The underlying sketch data + */ + public byte[] data() { + return data; + } + + /** + * Sketch size. + * + * @return Number of buckets in the sketch. + */ + public int size() { + return data.length; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + NormalSketch sketch = (NormalSketch) o; + + if (!Arrays.equals(data, sketch.data)) return false; + + return true; + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public String toString() { + return "NormalSketch{" + + "data=" + Arrays.toString(data) + + '}'; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java new file mode 100644 index 00000000000..523942f1e3e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/Sketch.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.yahoo.vespa.objects.Identifiable; + +/** + * Represents a sketch. All sketch types must provide a merge method. + * + * @param The type of the sub-class. + */ +public abstract class Sketch> extends Identifiable { + /** + * Merge content of other into 'this'. + * + * @param other Other sketch + */ + public abstract void merge(T other); + + /** + * Aggregates the hash values. + * + * @param hashValues Provides an iterator for the hash values + */ + public abstract void aggregate(Iterable hashValues); + + /** + * Aggregates the hash value. + * + * @param hash Hash value. + */ + public abstract void aggregate(int hash); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java new file mode 100644 index 00000000000..9d9a67edafb --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SketchMerger.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +/** + * This class is responsible for merging any combinations of two {@link Sketch} instances. + */ +public class SketchMerger { + + /** + * Merges one of the two sketches into the other. The merge operation is performed in-place is possible. + * + * @param left Either a {@link NormalSketch} or {@link SparseSketch}. + * @param right Either a {@link NormalSketch} or {@link SparseSketch}. + * @return The merged sketch. Is either first parameter, the other parameter or a new instance. + */ + public Sketch merge(Sketch left, Sketch right) { + if (left instanceof NormalSketch && right instanceof NormalSketch) { + return mergeNormalWithNormal(asNormal(left), asNormal(right)); + } else if (left instanceof NormalSketch && right instanceof SparseSketch) { + return mergeNormalWithSparse(asNormal(left), asSparse(right)); + } else if (left instanceof SparseSketch && right instanceof NormalSketch) { + return mergeNormalWithSparse(asNormal(right), asSparse(left)); + } else if (left instanceof SparseSketch && right instanceof SparseSketch) { + return mergeSparseWithSparse(asSparse(left), asSparse(right)); + } else { + throw new IllegalArgumentException( + String.format("Invalid sketch types: left=%s, right=%s", right.getClass(), left.getClass())); + } + } + + private Sketch mergeSparseWithSparse(SparseSketch dest, SparseSketch other) { + dest.merge(other); + if (dest.size() > HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD) { + NormalSketch newSketch = new NormalSketch(); + newSketch.aggregate(dest.data()); + return newSketch; + } + return dest; + } + + private NormalSketch mergeNormalWithNormal(NormalSketch dest, NormalSketch other) { + dest.merge(other); + return dest; + } + + private NormalSketch mergeNormalWithSparse(NormalSketch dest, SparseSketch other) { + NormalSketch newSketch = new NormalSketch(); + newSketch.aggregate(other.data()); + dest.merge(newSketch); + return dest; + } + + private static NormalSketch asNormal(Sketch sketch) { + return (NormalSketch) sketch; + } + + private static SparseSketch asSparse(Sketch sketch) { + return (SparseSketch) sketch; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java new file mode 100644 index 00000000000..fbfd08be6b0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/SparseSketch.java @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.HashSet; + +public class SparseSketch extends Sketch { + + public static final int classId = registerClass(0x4000 + 171, SparseSketch.class); + private final HashSet values = new HashSet<>(); + + @Override + public void merge(SparseSketch other) { + values.addAll(other.values); + } + + /** + * Aggregates the hash values. + * + * @param hashValues Provides an iterator for the hash values + */ + @Override + public void aggregate(Iterable hashValues) { + for (int hash: hashValues) { + aggregate(hash); + } + } + + /** + * Aggregates the hash value. + * + * @param hash Hash value. + */ + @Override + public void aggregate(int hash) { + values.add(hash); + } + + /** + * Serializes the Sketch. + * + * Serialization format + * ================== + * Number of elements: 4 bytes + * Elements: N * 4 bytes + * @param buf Serializer + */ + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, values.size()); + for (int value : values) { + buf.putInt(null, value); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + values.clear(); + int nElements = buf.getInt(null); + for (int i = 0; i < nElements; i++) { + values.add(buf.getInt(null)); + } + } + + @Override + protected int onGetClassId() { + return classId; + } + + public HashSet data() { + return values; + } + + public int size() { + return values.size(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + SparseSketch sketch = (SparseSketch) o; + + if (!values.equals(sketch.values)) return false; + + return true; + } + + @Override + public int hashCode() { + return values.hashCode(); + } + + @Override + public String toString() { + return "SparseSketch{" + + "values=" + values + + '}'; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java new file mode 100644 index 00000000000..b6edd72c40c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/hll/UniqueCountEstimator.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +/** + * A interface for unique count estimation algorithms. The goal of this interface is + * to aid unit testing of {@link HyperLogLogEstimator} users. + * + * @author bjorncs + */ +public interface UniqueCountEstimator { + long estimateCount(T sketch); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java new file mode 100644 index 00000000000..2a974a4a3da --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/aggregation/package-info.java @@ -0,0 +1,4 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage package com.yahoo.searchlib.aggregation; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java new file mode 100644 index 00000000000..adfc4da0b7e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/document/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib.document; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java new file mode 100644 index 00000000000..a56215a6991 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AddFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to add all arguments. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class AddFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 61, AddFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).add(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java new file mode 100644 index 00000000000..d16058afde2 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AggregationRefNode.java @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.searchlib.aggregation.AggregationResult; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This node holds the index of an ExpressionNode in an external array, and is used as a proxy in the back-end to allow + * aggregators to be used in expressions. + * + * @author Ulf Lilleengen + */ +public class AggregationRefNode extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 142, AggregationRefNode.class); + private AggregationResult result = null; + private int index = - 1; + + @SuppressWarnings("UnusedDeclaration") + public AggregationRefNode() { + // Used by deserializer. + } + + public AggregationRefNode(int index) { + this.index = index; + } + + public AggregationRefNode(AggregationResult result) { + this.result = result; + } + + public AggregationResult getExpression() { + return result; + } + + public AggregationRefNode setExpression(AggregationResult result) { + this.result = result; + return this; + } + + public AggregationRefNode setIndex(int index) { + this.index = index; + return this; + } + + public int getIndex() { + return index; + } + + @Override + public boolean onExecute() { + return result.execute(); + } + + @Override + public void onPrepare() { + result.prepare(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, index); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + index = buf.getInt(null); + result = null; + } + + @Override + public AggregationRefNode clone() { + AggregationRefNode obj = (AggregationRefNode)super.clone(); + obj.index = this.index; + obj.result = this.result.clone(); + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("index", index); + } + + @Override + public ResultNode getResult() { + return result.getResult(); + } + + @Override + public int hashCode() { + return super.hashCode() + index; + } + + @Override + public boolean equalsExpression(ExpressionNode obj) { + AggregationRefNode rhs = (AggregationRefNode)obj; + if (index != rhs.index) { + return false; + } + if (!equals(result, rhs.result)) { + return false; + } + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java new file mode 100644 index 00000000000..f54b8fba9ea --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AndFunctionNode.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to perform bitwise AND on the result of all arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class AndFunctionNode extends BitFunctionNode { + + public static final int classId = registerClass(0x4000 + 67, AndFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + public void onArgument(final ResultNode arg, IntegerResultNode result) { + result.andOp(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java new file mode 100644 index 00000000000..a8484a1245e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArithmeticTypeConversion.java @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import java.util.HashMap; +import java.util.Map; + +/** + * This class implements a lookup table for result node type conversion. + * + * @author Ulf Lilleengen + */ +public class ArithmeticTypeConversion { + private static final Map> types = new HashMap>(); + + static { + add(IntegerResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId); + add(IntegerResultNode.classId, FloatResultNode.classId, FloatResultNode.classId); + add(IntegerResultNode.classId, StringResultNode.classId, IntegerResultNode.classId); + add(IntegerResultNode.classId, RawResultNode.classId, IntegerResultNode.classId); + add(FloatResultNode.classId, IntegerResultNode.classId, FloatResultNode.classId); + add(FloatResultNode.classId, FloatResultNode.classId, FloatResultNode.classId); + add(FloatResultNode.classId, StringResultNode.classId, FloatResultNode.classId); + add(FloatResultNode.classId, RawResultNode.classId, FloatResultNode.classId); + add(StringResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId); + add(StringResultNode.classId, FloatResultNode.classId, FloatResultNode.classId); + add(StringResultNode.classId, StringResultNode.classId, StringResultNode.classId); + add(StringResultNode.classId, RawResultNode.classId, StringResultNode.classId); + add(RawResultNode.classId, IntegerResultNode.classId, IntegerResultNode.classId); + add(RawResultNode.classId, FloatResultNode.classId, FloatResultNode.classId); + add(RawResultNode.classId, StringResultNode.classId, StringResultNode.classId); + add(RawResultNode.classId, RawResultNode.classId, RawResultNode.classId); + } + + private static void add(int a, int b, int c) { + Map entry; + if (types.containsKey(a)) { + entry = types.get(a); + } else { + entry = new HashMap(); + } + entry.put(b, c); + types.put(a, entry); + } + + public static ResultNode getType(ResultNode arg) { + return (ResultNode)ResultNode.createFromId(getBaseType(arg)); + } + + public static ResultNode getType(ResultNode arg1, ResultNode arg2) { + return (ResultNode)ResultNode.createFromId(types.get(getBaseType(arg1)).get(getBaseType(arg2))); + } + + public static int getBaseType(ResultNode arg) { + if (arg instanceof IntegerResultNode) { + return IntegerResultNode.classId; + } else if (arg instanceof FloatResultNode) { + return FloatResultNode.classId; + } else if (arg instanceof StringResultNode) { + return StringResultNode.classId; + } else if (arg instanceof RawResultNode) { + return RawResultNode.classId; + } else { + return ResultNode.classId; + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java new file mode 100644 index 00000000000..0d005e06326 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ArrayAtLookupNode.java @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function is an instruction to retrieve an index a named array attribute. + * + * @author arnej27959 + */ +public class ArrayAtLookupNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 38, ArrayAtLookupNode.class); + private String attribute; + + /** + * Constructs an empty result node. + * NOTE: This instance is broken until non-optional member data is set. + */ + public ArrayAtLookupNode() { } + + /** + * Constructs an instance of this class with given attribute name + * and index argument. + * + * @param attribute The attribute to retrieve. + * @param arg Expression evaluating to the index argument. + */ + public ArrayAtLookupNode(String attribute, ExpressionNode arg) { + setAttributeName(attribute); + addArg(arg); + } + + /** + * Returns the name of the attribute whose value we do index in. + * + * @return The attribute name. + */ + public String getAttributeName() { + return attribute; + } + + /** + * Sets the name of the attribute whose value we do index in. + * + * @param attribute The attribute to retrieve. + * @return This, to allow chaining. + */ + public ArrayAtLookupNode setAttributeName(String attribute) { + if (attribute == null) { + throw new IllegalArgumentException("Attribute name can not be null."); + } + this.attribute = attribute; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + putUtf8(buf, attribute); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + attribute = getUtf8(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + attribute.hashCode(); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + // "arg" checked by superclass + String otherAttr = ((ArrayAtLookupNode)obj).getAttributeName(); + return attribute.equals(otherAttr); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("attribute", attribute); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java new file mode 100644 index 00000000000..e58bf1e317d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/AttributeNode.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function is an instruction to retrieve the value of a named attribute. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class AttributeNode extends FunctionNode { + + public static final int classId = registerClass(0x4000 + 55, AttributeNode.class); + private String attribute; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public AttributeNode() { + + } + + /** + * Constructs an instance of this class with given attribute name. + * + * @param attribute The attribute to retrieve. + */ + public AttributeNode(String attribute) { + setAttributeName(attribute); + } + + /** + * Returns the name of the attribute whose value this function is to retrieve. + * + * @return The attribute name. + */ + public String getAttributeName() { + return attribute; + } + + /** + * Sets the name of the attribute whose value this function is to retrieve. + * + * @param attribute The attribute to retrieve. + * @return This, to allow chaining. + */ + public AttributeNode setAttributeName(String attribute) { + if (attribute == null) { + throw new IllegalArgumentException("Attribute name can not be null."); + } + this.attribute = attribute; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + putUtf8(buf, attribute); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + attribute = getUtf8(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + attribute.hashCode(); + } + + @Override + protected boolean equalsFunction(FunctionNode obj) { + return attribute.equals(((AttributeNode)obj).attribute); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("attribute", attribute); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java new file mode 100644 index 00000000000..830b74bbb5f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/BitFunctionNode.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This is an abstract super-class for all non-unary functions that operator on bit values. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class BitFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 47, BitFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + onArgument(arg, (IntegerResultNode)result); + } + + @Override + protected void onPrepareResult() { + setResult(new IntegerResultNode(0)); + } + + /** + * Method for performing onArgument on integers, the only type supported for bit operations. + * + * @param arg Argument given to the bit function. + * @param result Place to store the result. + */ + protected abstract void onArgument(final ResultNode arg, IntegerResultNode result); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java new file mode 100644 index 00000000000..0dc7f49a826 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/BucketResultNode.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This is the superclass of all bucket values + * + * @author Haavard Pettersen + * @author Henning Baldersheim + * @author Simon Thoresen + */ +abstract public class BucketResultNode extends ResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 100, BucketResultNode.class); + + @Override + public long getInteger() { + return 0; + } + + @Override + public double getFloat() { + return 0.0; + } + + @Override + public String getString() { + return ""; + } + + @Override + public byte[] getRaw() { + return new byte[0]; + } + + @Override + public void set(ResultNode rhs) { + } + + /** + * Tell if this bucket has zero width. Indicates that is has no value and can be considered a NULL range. An empty + * range is used by the backend to represent hits that end in no buckets. + * + * @return If this bucket has zero width. + */ + public abstract boolean empty(); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java new file mode 100644 index 00000000000..98c3ba0580e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/CatFunctionNode.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to concatenate the bits of all arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class CatFunctionNode extends MultiArgFunctionNode { + + public static final int classId = registerClass(0x4000 + 72, CatFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) { + return true; + } + + @Override + protected void onPrepareResult() { + setResult(new RawResultNode()); + } + + @Override + protected void onPrepare() { + super.onPrepare(); + } + + @Override + protected boolean onExecute() { + for (int i = 0; i < getNumArgs(); i++) { + getArg(i).execute(); + ((RawResultNode)getResult()).add(getArg(i).getResult()); + } + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java new file mode 100644 index 00000000000..2ba6ee6e1c3 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ConstantNode.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This abstract expression node represents a function to execute. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class ConstantNode extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 49, ConstantNode.class); + private ResultNode value = null; + + public ConstantNode() { + + } + + public ConstantNode(ResultNode value) { + this.value = value; + } + + public ResultNode getValue() { + return value; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + value = (ResultNode)deserializeOptional(buf); + } + + @Override + public ConstantNode clone() { + ConstantNode obj = (ConstantNode)super.clone(); + if (value != null) { + obj.value = (ResultNode)value.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + protected void onPrepare() { + + } + + @Override + protected boolean onExecute() { + return true; + } + + @Override + public ResultNode getResult() { + return value; + } + + @Override + protected boolean equalsExpression(ExpressionNode obj) { + return equals(value, ((ConstantNode)obj).value); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java new file mode 100644 index 00000000000..c24e6fa1acd --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DebugWaitFunctionNode.java @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is a debug wait function node that waits for a specified amount of time before executing its expression. + * + * @author Ulf Lilleengen + */ +public class DebugWaitFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 144, DebugWaitFunctionNode.class); + private double waitTime; + private boolean busyWait; + + @SuppressWarnings("UnusedDeclaration") + public DebugWaitFunctionNode() { + // used by deserializer + } + + /** + * Constructs an instance of this class with given argument and wait parameters. + * + * @param arg The argument for this function. + * @param waitTime The time to wait before executing expression. + * @param busyWait true if busy wait, false if not. + */ + public DebugWaitFunctionNode(ExpressionNode arg, double waitTime, boolean busyWait) { + addArg(arg); + this.waitTime = waitTime; + this.busyWait = busyWait; + } + + @Override + public void onPrepare() { + super.onPrepare(); + } + + @Override + public boolean onExecute() { + // TODO: Add wait code. + double millis = waitTime * 1000.0; + long start = System.currentTimeMillis(); + try { + while ((System.currentTimeMillis() - start) < millis) { + if (busyWait) { + for (int i = 0; i < 1000; i++) { + ; + } + } else { + long rem = (long)(millis - (System.currentTimeMillis() - start)); + Thread.sleep(rem); + } + } + } catch (InterruptedException ie) { + // Not critical + } + getArg().execute(); + getResult().set(getArg().getResult()); + return true; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)waitTime + (busyWait ? 1 : 0); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + DebugWaitFunctionNode rhs = (DebugWaitFunctionNode)obj; + return waitTime == rhs.waitTime && busyWait == rhs.busyWait; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putDouble(null, waitTime); + byte tmp = busyWait ? (byte)1 : (byte)0; + buf.putByte(null, tmp); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + waitTime = buf.getDouble(null); + byte tmp = buf.getByte(null); + busyWait = (tmp != 0); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("waitTime", waitTime); + visitor.visit("busyWait", busyWait); + } + + @Override + protected int onGetClassId() { + return classId; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java new file mode 100644 index 00000000000..2a99e9f1edb --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DivideFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to divide the arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class DivideFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 63, DivideFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).divide(arg); + } +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java new file mode 100644 index 00000000000..dabbf8d622b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentAccessorNode.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This abstract expression node represents a document whose content is accessed depending on the subclass + * implementation of this. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class DocumentAccessorNode extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 48, FunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java new file mode 100644 index 00000000000..c33ef6cd7fd --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/DocumentFieldNode.java @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * The node is a request to retrieve the content of a document field. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class DocumentFieldNode extends DocumentAccessorNode { + + public static final int classId = registerClass(0x4000 + 56, DocumentFieldNode.class); + private String fieldName; + private ResultNode result; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public DocumentFieldNode() { + super(); + } + + /** + * Constructs an instance of this class with given field name. + * + * @param fieldName The field whose value to retrieve. + */ + public DocumentFieldNode(String fieldName) { + super(); + setDocumentFieldName(fieldName); + } + + /** + * Returns the name of the field whose value to retrieve. + * + * @return The field name. + */ + public String getDocumentFieldName() { + return fieldName; + } + + /** + * Sets the name of the field whose value to retrieve. + * + * @param fieldName The field name to set. + * @return This, to allow chaining. + */ + public DocumentFieldNode setDocumentFieldName(String fieldName) { + if (fieldName == null) { + throw new IllegalArgumentException("Field name can not be null."); + } + this.fieldName = fieldName; + return this; + } + + @Override + public ResultNode getResult() { + return result; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + putUtf8(buf, fieldName); + serializeOptional(buf, result); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + fieldName = getUtf8(buf); + result = (ResultNode)deserializeOptional(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + fieldName.hashCode(); + } + + @Override + protected boolean equalsExpression(ExpressionNode obj) { + DocumentFieldNode rhs = (DocumentFieldNode)obj; + if (!fieldName.equals(rhs.fieldName)) { + return false; + } + if (!equals(result, rhs.result)) { + return false; + } + return true; + } + + @Override + public DocumentFieldNode clone() { + DocumentFieldNode obj = (DocumentFieldNode)super.clone(); + if (result != null) { + obj.result = (ResultNode)result.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("fieldName", fieldName); + visitor.visit("result", result); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java new file mode 100644 index 00000000000..07aa4c8d580 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ExpressionNode.java @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.io.Serializable; + +/** + * This is the base class for all expression node types. There is no execution logic implemented in Java, since that all + * happens in the C++ backend. This class hierarchy is for building the expression tree to pass to the backend. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class ExpressionNode extends Identifiable implements Serializable { + + public static final int classId = registerClass(0x4000 + 40, ExpressionNode.class); + + /** + * Prepare expression for execution. + */ + public void prepare() { + onPrepare(); + } + + /** + * Execute expression. + * + * @return true if successful, false if not. + */ + public boolean execute() { + return onExecute(); + } + + /** + * Give an argument to this expression and store the result. + * + * @param arg Argument to use for expression. + * @param result Node to contain the result. + */ + protected void executeIterative(final ResultNode arg, ResultNode result) { + onArgument(arg, result); + } + + protected boolean onExecute() { + throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onExecute()."); + } + + protected void onPrepare() { + throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onPrepare()."); + } + + protected void onArgument(final ResultNode arg, ResultNode result) { + throw new RuntimeException("Class " + this.getClass().getName() + " does not implement onArgument()."); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + } + + @Override + public ExpressionNode clone() { + return (ExpressionNode)super.clone(); + } + + @Override + public final boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + if (!equalsExpression((ExpressionNode)obj)) { + return false; + } + return true; + } + + protected abstract boolean equalsExpression(ExpressionNode obj); + + /** + * Get the result of this expression. + * + * @return the result as a ResultNode. + */ + abstract public ResultNode getResult(); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java new file mode 100644 index 00000000000..5c8a526291d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionNode.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function assign a fixed width bucket to each input value + * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class FixedWidthBucketFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 77, FixedWidthBucketFunctionNode.class); + private NumericResultNode width = null; + + /** + * Constructs an empty result node. + */ + public FixedWidthBucketFunctionNode() { + // empty + } + + /** + * Create a bucket expression with the given width and the given subexpression + * + * @param w bucket width + * @param arg The argument for this function. + */ + public FixedWidthBucketFunctionNode(NumericResultNode w, ExpressionNode arg) { + addArg(arg); + width = w; + } + + /** + * Obtain the width of this bucket expression + * + * @return bucket width for this expression + */ + public NumericResultNode getWidth() { + return width; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, width); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + width = (NumericResultNode)deserializeOptional(buf); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return equals(width, ((FixedWidthBucketFunctionNode)obj).width); + } + + @Override + public FixedWidthBucketFunctionNode clone() { + FixedWidthBucketFunctionNode obj = (FixedWidthBucketFunctionNode)super.clone(); + if (width != null) { + obj.width = (NumericResultNode)width.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("width", width); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java new file mode 100644 index 00000000000..e5088e27a2e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNode.java @@ -0,0 +1,118 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This result holds a float value. + * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class FloatBucketResultNode extends BucketResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 102, FloatBucketResultNode.class); + + // bucket start, inclusive + private double from = 0; + + // bucket end, exclusive + private double to = 0; + + @Override + public boolean empty() { + return to == from; + } + + /** + * Constructs an empty result node. + */ + public FloatBucketResultNode() { + // empty + } + + /** + * Create a bucket with the given limits + * + * @param from bucket start + * @param to bucket end + */ + public FloatBucketResultNode(double from, double to) { + this.from = from; + this.to = to; + } + + /** + * Obtain the bucket start + * + * @return bucket start + */ + public double getFrom() { + return from; + } + + /** + * Obtain the bucket end + * + * @return bucket end + */ + public double getTo() { + return to; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putDouble(null, from); + buf.putDouble(null, to); + } + + @Override + protected void onDeserialize(Deserializer buf) { + from = buf.getDouble(null); + to = buf.getDouble(null); + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + FloatBucketResultNode b = (FloatBucketResultNode)rhs; + double f1 = from; + double f2 = b.from; + if (f1 < f2) { + return -1; + } else if (f1 > f2) { + return 1; + } else { + double t1 = to; + double t2 = b.to; + if (t1 < t2) { + return -1; + } else if (t1 > t2) { + return 1; + } + } + return 0; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)from + (int)to; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("from", from); + visitor.visit("to", to); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java new file mode 100644 index 00000000000..9d6d83ccc5c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatBucketResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class FloatBucketResultNodeVector extends ResultNodeVector { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 113, FloatBucketResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public FloatBucketResultNodeVector() { + } + + public FloatBucketResultNodeVector add(FloatBucketResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((FloatBucketResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (FloatBucketResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + FloatBucketResultNode node = new FloatBucketResultNode(0, 0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + FloatBucketResultNodeVector b = (FloatBucketResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java new file mode 100644 index 00000000000..6e44f113eed --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNode.java @@ -0,0 +1,182 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.nio.ByteBuffer; + +/** + * This result holds a float value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class FloatResultNode extends NumericResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 52, FloatResultNode.class); + private static FloatResultNode negativeInfinity = new FloatResultNode(Double.NEGATIVE_INFINITY); + private static FloatResultNode positiveInfinity = new FloatResultNode(Double.POSITIVE_INFINITY); + // The numeric value of this node. + private double value; + + /** + * Constructs an empty result node. + */ + public FloatResultNode() { + super(); + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public FloatResultNode(double value) { + super(); + setValue(value); + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public FloatResultNode setValue(double value) { + this.value = value; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putDouble(null, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = buf.getDouble(null); + } + + @Override + public long getInteger() { + return Math.round(value); + } + + @Override + public double getFloat() { + return value; + } + + @Override + public String getString() { + return String.valueOf(value); + } + + @Override + public byte[] getRaw() { + return ByteBuffer.allocate(8).putDouble(value).array(); + } + + @Override + public void add(ResultNode rhs) { + value += rhs.getFloat(); + } + + @Override + public void negate() { + value = -value; + } + + @Override + public void multiply(ResultNode rhs) { + value *= rhs.getFloat(); + } + + @Override + public void divide(ResultNode rhs) { + double val = rhs.getFloat(); + value = (val == 0.0) ? 0.0 : (value / val); + } + + @Override + public void modulo(ResultNode rhs) { + value %= rhs.getInteger(); + } + + @Override + public void min(ResultNode rhs) { + double value = rhs.getFloat(); + if (value < this.value) { + this.value = value; + } + } + + @Override + public void max(ResultNode rhs) { + double value = rhs.getFloat(); + if (value > this.value) { + this.value = value; + } + } + + @Override + public Object getNumber() { + return new Double(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + double b = rhs.getFloat(); + if (Double.isNaN(value)) { + return Double.isNaN(b) ? 0 : -1; + } else { + if (Double.isNaN(b)) { + return 1; + } else { + return (value < b) ? -1 : (value > b) ? 1 : 0; + } + } + } + + @Override + public int hashCode() { + return super.hashCode() + (int)value; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + public void set(ResultNode rhs) { + value = rhs.getFloat(); + } + + /** + * Will provide the smallest possible value + * + * @return the smallest possible FloatResultNode + */ + public static FloatResultNode getNegativeInfinity() { + return negativeInfinity; + } + + /** + * Will provide the largest possible value + * + * @return the smallest largest FloatResultNode + */ + public static FloatResultNode getPositiveInfinity() { + return positiveInfinity; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java new file mode 100644 index 00000000000..ae57aeb6a7f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FloatResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class FloatResultNodeVector extends ResultNodeVector { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 110, FloatResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public FloatResultNodeVector() { + } + + public FloatResultNodeVector add(FloatResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((FloatResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (FloatResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + FloatResultNode node = new FloatResultNode(0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + FloatResultNodeVector b = (FloatResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java new file mode 100644 index 00000000000..6ebb4c672c8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ForceLoad.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This file was generated by ../../../../../forceload.sh + */ +public class ForceLoad { + + static { + String pkg = "com.yahoo.searchlib.expression"; + String[] classes = { + "StringResultNode", + "BucketResultNode", + "MaxFunctionNode", + "FloatResultNode", + "RawResultNode", + "NegateFunctionNode", + "SortFunctionNode", + "ReverseFunctionNode", + "ToIntFunctionNode", + "ToFloatFunctionNode", + "MathFunctionNode", + "StrLenFunctionNode", + "NormalizeSubjectFunctionNode", + "StrCatFunctionNode", + "ToStringFunctionNode", + "NumElemFunctionNode", + "CatFunctionNode", + "ResultNode", + "AddFunctionNode", + "DivideFunctionNode", + "XorFunctionNode", + "MD5BitFunctionNode", + "UnaryBitFunctionNode", + "AttributeNode", + "MinFunctionNode", + "BitFunctionNode", + "FixedWidthBucketFunctionNode", + "RangeBucketPreDefFunctionNode", + "GetYMUMChecksumFunctionNode", + "DocumentFieldNode", + "NullResultNode", + "FunctionNode", + "ConstantNode", + "RawResultNode", + "OrFunctionNode", + "ExpressionNode", + "AggregationRefNode", + "IntegerResultNode", + "Int32ResultNode", + "Int16ResultNode", + "Int8ResultNode", + "ModuloFunctionNode", + "IntegerResultNodeVector", + "Int32ResultNodeVector", + "Int16ResultNodeVector", + "Int8ResultNodeVector", + "FloatResultNodeVector", + "StringResultNodeVector", + "RawResultNodeVector", + "ForceLoad", + "MultiplyFunctionNode", + "IntegerBucketResultNode", + "FloatBucketResultNode", + "StringBucketResultNode", + "RawBucketResultNode", + "RawBucketResultNodeVector", + "IntegerBucketResultNodeVector", + "FloatBucketResultNodeVector", + "StringBucketResultNodeVector", + "AndFunctionNode", + "DocumentAccessorNode", + "GetDocIdNamespaceSpecificFunctionNode", + "NumericResultNode", + "UnaryFunctionNode", + "TimeStampFunctionNode", + "ZCurveFunctionNode", + "XorBitFunctionNode", + "MultiArgFunctionNode", + "DebugWaitFunctionNode", + "ForceLoad" + }; + com.yahoo.system.ForceLoad.forceLoad(pkg, classes); + } + + public static boolean forceLoad() { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java new file mode 100644 index 00000000000..13f7f8e11a2 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/FunctionNode.java @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This abstract expression node represents a function to execute. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class FunctionNode extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 42, FunctionNode.class); + private ResultNode result = null; + + public FunctionNode setResult(ResultNode res) { + this.result = res; + return this; + } + + @Override + public final ResultNode getResult() { + return result; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, result); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + result = (ResultNode)deserializeOptional(buf); + } + + @Override + public FunctionNode clone() { + FunctionNode obj = (FunctionNode)super.clone(); + if (result != null) { + obj.result = (ResultNode)result.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("result", result); + } + + @Override + protected final boolean equalsExpression(ExpressionNode obj) { + FunctionNode rhs = (FunctionNode)obj; + if (!equals(result, rhs.result)) { + return false; + } + if (!equalsFunction(rhs)) { + return false; + } + return true; + } + + protected abstract boolean equalsFunction(FunctionNode obj); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java new file mode 100644 index 00000000000..1308e668d3b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetDocIdNamespaceSpecificFunctionNode.java @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * The node is a request to retrieve the namespace-specific content of a document id. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class GetDocIdNamespaceSpecificFunctionNode extends DocumentAccessorNode { + + public static final int classId = registerClass(0x4000 + 73, GetDocIdNamespaceSpecificFunctionNode.class); + private ResultNode result = null; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public GetDocIdNamespaceSpecificFunctionNode() { + super(); + } + + /** + * Constructs an instance of this class with given result. + * + * @param result The result to assign to this. + */ + public GetDocIdNamespaceSpecificFunctionNode(ResultNode result) { + super(); + setResult(result); + } + + /** + * Sets the result of this function. + * + * @param result The result to set. + * @return This, to allow chaining. + */ + public GetDocIdNamespaceSpecificFunctionNode setResult(ResultNode result) { + this.result = result; + return this; + } + + @Override + public ResultNode getResult() { + return result; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, result); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + result = (ResultNode)deserializeOptional(buf); + } + + @Override + public GetDocIdNamespaceSpecificFunctionNode clone() { + GetDocIdNamespaceSpecificFunctionNode obj = (GetDocIdNamespaceSpecificFunctionNode)super.clone(); + if (result != null) { + obj.result = (ResultNode)result.clone(); + } + return obj; + } + + @Override + protected boolean equalsExpression(ExpressionNode obj) { + return equals(result, ((GetDocIdNamespaceSpecificFunctionNode)obj).result); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("result", result); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java new file mode 100644 index 00000000000..89b1f477706 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/GetYMUMChecksumFunctionNode.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This node is a request to retrieve the YMUM checksum of a document. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class GetYMUMChecksumFunctionNode extends DocumentAccessorNode { + + public static final int classId = registerClass(0x4000 + 74, GetYMUMChecksumFunctionNode.class); + private IntegerResultNode result = new IntegerResultNode(0); + + @Override + public ResultNode getResult() { + return result; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + result.serialize(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + result.deserialize(buf); + } + + @Override + public GetYMUMChecksumFunctionNode clone() { + GetYMUMChecksumFunctionNode obj = (GetYMUMChecksumFunctionNode)super.clone(); + if (result != null) { + obj.result = (IntegerResultNode)result.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("result", result); + } + + @Override + protected boolean equalsExpression(ExpressionNode obj) { + return equals(result, ((GetYMUMChecksumFunctionNode)obj).result); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java new file mode 100644 index 00000000000..53455fe26ec --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNode.java @@ -0,0 +1,149 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.nio.ByteBuffer; + +/** + * This result holds an integer value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int16ResultNode extends NumericResultNode { + + public static final int classId = registerClass(0x4000 + 105, Int16ResultNode.class); + private short value = 0; + + @SuppressWarnings("UnusedDeclaration") + public Int16ResultNode() { + // used by deserializer + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public Int16ResultNode(short value) { + this.value = value; + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public Int16ResultNode setValue(short value) { + this.value = value; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putShort(null, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = buf.getShort(null); + } + + @Override + public long getInteger() { + return value; + } + + @Override + public double getFloat() { + return value; + } + + @Override + public String getString() { + return String.valueOf(value); + } + + @Override + public byte[] getRaw() { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + @Override + public void add(ResultNode rhs) { + value += rhs.getInteger(); + } + + @Override + public void negate() { + value = (short)-value; + } + + @Override + public void multiply(ResultNode rhs) { + value *= rhs.getInteger(); + } + + @Override + public void divide(ResultNode rhs) { + short val = (short)rhs.getInteger(); + value = (short)((val == 0) ? 0 : (value / val)); + } + + @Override + public void modulo(ResultNode rhs) { + value %= rhs.getInteger(); + } + + @Override + public void min(ResultNode rhs) { + short value = (short)rhs.getInteger(); + if (value < this.value) { + this.value = value; + } + } + + @Override + public void max(ResultNode rhs) { + short value = (short)rhs.getInteger(); + if (value > this.value) { + this.value = value; + } + } + + @Override + public Object getNumber() { + return new Integer(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + long value = rhs.getInteger(); + return (this.value < value) ? -1 : (this.value > value) ? 1 : 0; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)value; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + public void set(ResultNode rhs) { + value = (short)rhs.getInteger(); + } +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java new file mode 100644 index 00000000000..7e67f80b5e0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int16ResultNodeVector.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int16ResultNodeVector extends ResultNodeVector { + + public static final int classId = registerClass(0x4000 + 117, Int16ResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + public Int16ResultNodeVector() { + } + + public Int16ResultNodeVector add(Int16ResultNode v) { + vector.add(v); + return this; + } + + public ArrayList getVector() { + return vector; + } + + @Override + public ResultNodeVector add(ResultNode r) { + return add((Int16ResultNode)r); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (Int16ResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + Int16ResultNode node = new Int16ResultNode((short)0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + Int16ResultNodeVector b = (Int16ResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java new file mode 100644 index 00000000000..e2acb243714 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNode.java @@ -0,0 +1,149 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.nio.ByteBuffer; + +/** + * This result holds an integer value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int32ResultNode extends NumericResultNode { + + public static final int classId = registerClass(0x4000 + 106, Int32ResultNode.class); + private int value = 0; + + @SuppressWarnings("UnusedDeclaration") + public Int32ResultNode() { + // used by deserializer + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public Int32ResultNode(int value) { + this.value = value; + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public Int32ResultNode setValue(int value) { + this.value = value; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putInt(null, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = buf.getInt(null); + } + + @Override + public long getInteger() { + return value; + } + + @Override + public double getFloat() { + return value; + } + + @Override + public String getString() { + return String.valueOf(value); + } + + @Override + public byte[] getRaw() { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + @Override + public void add(ResultNode rhs) { + value += rhs.getInteger(); + } + + @Override + public void negate() { + value = -value; + } + + @Override + public void multiply(ResultNode rhs) { + value *= rhs.getInteger(); + } + + @Override + public void divide(ResultNode rhs) { + int val = (int)rhs.getInteger(); + value = (val == 0) ? 0 : (value / val); + } + + @Override + public void modulo(ResultNode rhs) { + value %= rhs.getInteger(); + } + + @Override + public void min(ResultNode rhs) { + int value = (int)rhs.getInteger(); + if (value < this.value) { + this.value = value; + } + } + + @Override + public void max(ResultNode rhs) { + int value = (int)rhs.getInteger(); + if (value > this.value) { + this.value = value; + } + } + + @Override + public Object getNumber() { + return new Integer(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + long value = rhs.getInteger(); + return (this.value < value) ? -1 : (this.value > value) ? 1 : 0; + } + + @Override + public int hashCode() { + return super.hashCode() + value; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + public void set(ResultNode rhs) { + value = (int)rhs.getInteger(); + } +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java new file mode 100644 index 00000000000..f9166ac63da --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int32ResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int32ResultNodeVector extends ResultNodeVector { + + public static final int classId = registerClass(0x4000 + 118, Int32ResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + public Int32ResultNodeVector() { + + } + + public Int32ResultNodeVector add(Int32ResultNode v) { + vector.add(v); + return this; + } + + public ArrayList getVector() { + return vector; + } + + @Override + public ResultNodeVector add(ResultNode r) { + return add((Int32ResultNode)r); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (Int32ResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + Int32ResultNode node = new Int32ResultNode(0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + Int32ResultNodeVector b = (Int32ResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java new file mode 100644 index 00000000000..dedb2f3ddbc --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNode.java @@ -0,0 +1,149 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.nio.ByteBuffer; + +/** + * This result holds an integer value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int8ResultNode extends NumericResultNode { + + public static final int classId = registerClass(0x4000 + 104, Int8ResultNode.class); + private byte value = 0; + + @SuppressWarnings("UnusedDeclaration") + public Int8ResultNode() { + // used by deserializer + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public Int8ResultNode(byte value) { + this.value = value; + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public Int8ResultNode setValue(byte value) { + this.value = value; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putByte(null, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = buf.getByte(null); + } + + @Override + public long getInteger() { + return value; + } + + @Override + public double getFloat() { + return value; + } + + @Override + public String getString() { + return String.valueOf(value); + } + + @Override + public byte[] getRaw() { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + @Override + public void add(ResultNode rhs) { + value += rhs.getInteger(); + } + + @Override + public void negate() { + value = (byte)-value; + } + + @Override + public void multiply(ResultNode rhs) { + value *= rhs.getInteger(); + } + + @Override + public void divide(ResultNode rhs) { + int val = (int)rhs.getInteger(); + value = (byte)((val == 0) ? 0 : (value / val)); + } + + @Override + public void modulo(ResultNode rhs) { + value %= rhs.getInteger(); + } + + @Override + public void min(ResultNode rhs) { + byte value = (byte)rhs.getInteger(); + if (value < this.value) { + this.value = value; + } + } + + @Override + public void max(ResultNode rhs) { + byte value = (byte)rhs.getInteger(); + if (value > this.value) { + this.value = value; + } + } + + @Override + public Object getNumber() { + return new Integer(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + long value = rhs.getInteger(); + return (this.value < value) ? -1 : (this.value > value) ? 1 : 0; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)value; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + public void set(ResultNode rhs) { + value = (byte)rhs.getInteger(); + } +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java new file mode 100644 index 00000000000..da1edfc5a3a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/Int8ResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class Int8ResultNodeVector extends ResultNodeVector { + + public static final int classId = registerClass(0x4000 + 116, Int8ResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + public Int8ResultNodeVector() { + + } + + public Int8ResultNodeVector add(Int8ResultNode v) { + vector.add(v); + return this; + } + + public ArrayList getVector() { + return vector; + } + + @Override + public ResultNodeVector add(ResultNode r) { + return add((Int8ResultNode)r); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (Int8ResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + Int8ResultNode node = new Int8ResultNode((byte)0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + Int8ResultNodeVector b = (Int8ResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java new file mode 100644 index 00000000000..08a85375e7c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNode.java @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an integer bucket value + * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class IntegerBucketResultNode extends BucketResultNode { + + public static final int classId = registerClass(0x4000 + 101, IntegerBucketResultNode.class); + private long from = 0; // bucket start, inclusive + private long to = 0; // bucket end, exclusive + + /** + * Constructs an empty result node. + */ + public IntegerBucketResultNode() { + // empty + } + + /** + * Create a bucket with the given limits + * + * @param from bucket start + * @param to bucket end + */ + public IntegerBucketResultNode(long from, long to) { + this.from = from; + this.to = to; + } + + /** + * Obtain the bucket start + * + * @return bucket start + */ + public long getFrom() { + return from; + } + + /** + * Obtain the bucket end + * + * @return bucket end + */ + public long getTo() { + return to; + } + + @Override + public boolean empty() { + return to == from; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putLong(null, from); + buf.putLong(null, to); + } + + @Override + protected void onDeserialize(Deserializer buf) { + from = buf.getLong(null); + to = buf.getLong(null); + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + IntegerBucketResultNode b = (IntegerBucketResultNode)rhs; + long diff = from - b.from; + if (diff == 0) { + diff = to - b.to; + } + return ((diff == 0) ? 0 : ((diff < 0) ? -1 : 1)); + } + + @Override + public int hashCode() { + return super.hashCode() + (int)from + (int)to; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("from", from); + visitor.visit("to", to); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java new file mode 100644 index 00000000000..1ea639bd67f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class IntegerBucketResultNodeVector extends ResultNodeVector { + + public static final int classId = registerClass(0x4000 + 112, IntegerBucketResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + public IntegerBucketResultNodeVector() { + + } + + public IntegerBucketResultNodeVector add(IntegerBucketResultNode v) { + vector.add(v); + return this; + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public ResultNodeVector add(ResultNode r) { + return add((IntegerBucketResultNode)r); + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (IntegerBucketResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + IntegerBucketResultNode node = new IntegerBucketResultNode(0, 0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + IntegerBucketResultNodeVector b = (IntegerBucketResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java new file mode 100644 index 00000000000..4ca5dfc4139 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNode.java @@ -0,0 +1,183 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.nio.ByteBuffer; + +/** + * This result holds an integer value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class IntegerResultNode extends NumericResultNode { + + public static final int classId = registerClass(0x4000 + 107, IntegerResultNode.class); + private static IntegerResultNode negativeInfinity = new IntegerResultNode(Long.MIN_VALUE); + private static IntegerResultNode positiveInfinity = new IntegerResultNode(Long.MAX_VALUE); + private long value; + + /** + * Constructs an empty result node. + */ + public IntegerResultNode() { + + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public IntegerResultNode(long value) { + setValue(value); + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public IntegerResultNode setValue(long value) { + this.value = value; + return this; + } + + void andOp(final ResultNode b) { + value &= b.getInteger(); + } + + void orOp(final ResultNode b) { + value |= b.getInteger(); + } + + void xorOp(final ResultNode b) { + value ^= b.getInteger(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + buf.putLong(null, value); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = buf.getLong(null); + } + + @Override + public long getInteger() { + return value; + } + + @Override + public double getFloat() { + return value; + } + + @Override + public String getString() { + return String.valueOf(value); + } + + @Override + public byte[] getRaw() { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + @Override + public void add(ResultNode rhs) { + value += rhs.getInteger(); + } + + @Override + public void negate() { + value = -value; + } + + @Override + public void multiply(ResultNode rhs) { + value *= rhs.getInteger(); + } + + @Override + public void divide(ResultNode rhs) { + long val = rhs.getInteger(); + value = (val == 0) ? 0 : (value / val); + } + + @Override + public void modulo(ResultNode rhs) { + value %= rhs.getInteger(); + } + + @Override + public void min(ResultNode rhs) { + long value = rhs.getInteger(); + if (value < this.value) { + this.value = value; + } + } + + @Override + public void max(ResultNode rhs) { + long value = rhs.getInteger(); + if (value > this.value) { + this.value = value; + } + } + + @Override + public Object getNumber() { + return new Long(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + long value = rhs.getInteger(); + return (this.value < value) ? -1 : (this.value > value) ? 1 : 0; + } + + @Override + public int hashCode() { + return super.hashCode() + (int)value; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + @Override + public void set(ResultNode rhs) { + value = rhs.getInteger(); + } + + /** + * Will provide the smallest possible value + * + * @return the smallest possible IntegerResultNode + */ + public static IntegerResultNode getNegativeInfinity() { + return negativeInfinity; + } + + /** + * Will provide the largest possible value + * + * @return the smallest largest IntegerResultNode + */ + public static IntegerResultNode getPositiveInfinity() { + return positiveInfinity; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java new file mode 100644 index 00000000000..ac55a4e7d8b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/IntegerResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class IntegerResultNodeVector extends ResultNodeVector { + + public static final int classId = registerClass(0x4000 + 119, IntegerResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + public IntegerResultNodeVector() { + + } + + public IntegerResultNodeVector add(IntegerResultNode v) { + vector.add(v); + return this; + } + + public ArrayList getVector() { + return vector; + } + + @Override + public ResultNodeVector add(ResultNode r) { + return add((IntegerResultNode)r); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (IntegerResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + IntegerResultNode node = new IntegerResultNode(0); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + IntegerResultNodeVector b = (IntegerResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java new file mode 100644 index 00000000000..6bd9e10a75a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/InterpolatedLookupNode.java @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function is an instruction to retrieve the value of a named attribute. + * + * @author arnej27959 + */ +public class InterpolatedLookupNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 39, InterpolatedLookupNode.class); + private String attribute; + + /** + * Constructs an empty result node. + * NOTE: This instance is broken until non-optional member data is set. + */ + public InterpolatedLookupNode() { } + + /** + * Constructs an instance of this class with given attribute name + * and lookup argument. + * + * @param attribute The attribute to retrieve. + * @param arg Expression evaluating to the lookup argument. + */ + public InterpolatedLookupNode(String attribute, ExpressionNode arg) { + setAttributeName(attribute); + addArg(arg); + } + + /** + * Returns the name of the attribute whose value we do lookup in. + * + * @return The attribute name. + */ + public String getAttributeName() { + return attribute; + } + + /** + * Sets the name of the attribute whose value we do lookup in. + * + * @param attribute The attribute to retrieve. + * @return This, to allow chaining. + */ + public InterpolatedLookupNode setAttributeName(String attribute) { + if (attribute == null) { + throw new IllegalArgumentException("Attribute name can not be null."); + } + this.attribute = attribute; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + putUtf8(buf, attribute); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + attribute = getUtf8(buf); + } + + @Override + public int hashCode() { + return super.hashCode() + attribute.hashCode(); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + // "arg" checked by superclass + String otherAttr = ((InterpolatedLookupNode)obj).getAttributeName(); + return attribute.equals(otherAttr); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("attribute", attribute); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java new file mode 100644 index 00000000000..64c81072714 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MD5BitFunctionNode.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is a request to calculate the MD5 of the result of its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MD5BitFunctionNode extends UnaryBitFunctionNode { + + public static final int classId = registerClass(0x4000 + 70, MD5BitFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public MD5BitFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument and number of bits. + * + * @param arg The argument for this function. + * @param numBits The number of bits to operate on. + */ + public MD5BitFunctionNode(ExpressionNode arg, int numBits) { + super(arg, numBits); + } + + @Override + protected int onGetClassId() { + return classId; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java new file mode 100644 index 00000000000..0d82b6a260e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MathFunctionNode.java @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MathFunctionNode extends MultiArgFunctionNode { + + // Make sure these match the definition in c++ searchlib/src/searchlib/expression/mathfunctionnode.h. + public static enum Function { + EXP(0), + POW(1), + LOG(2), + LOG1P(3), + LOG10(4), + SIN(5), + ASIN(6), + COS(7), + ACOS(8), + TAN(9), + ATAN(10), + SQRT(11), + SINH(12), + ASINH(13), + COSH(14), + ACOSH(15), + TANH(16), + ATANH(17), + CBRT(18), + HYPOT(19), + FLOOR(20); + + private final int id; + + private Function(int id) { + this.id = id; + } + + private static Function valueOf(int id) { + for (Function fnc : values()) { + if (id == fnc.id) { + return fnc; + } + } + return null; + } + } + + public static final int classId = registerClass(0x4000 + 136, MathFunctionNode.class); + private Function fnc; + + @SuppressWarnings("UnusedDeclaration") + public MathFunctionNode() { + this(Function.LOG); + } + + public MathFunctionNode(Function fnc) { + this(null, fnc); + } + + public MathFunctionNode(ExpressionNode exp, Function fnc) { + this.fnc = fnc; + if (exp != null) { + addArg(exp); + } + } + + @Override + protected boolean onExecute() { + getArg(0).execute(); + double result = 0.0; + switch (fnc) { + case EXP: + result = Math.exp(getArg(0).getResult().getFloat()); + break; + case POW: + result = Math.pow(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); + break; + case LOG: + result = Math.log(getArg(0).getResult().getFloat()); + break; + case LOG1P: + result = Math.log1p(getArg(0).getResult().getFloat()); + break; + case LOG10: + result = Math.log10(getArg(0).getResult().getFloat()); + break; + case SIN: + result = Math.sin(getArg(0).getResult().getFloat()); + break; + case ASIN: + result = Math.asin(getArg(0).getResult().getFloat()); + break; + case COS: + result = Math.cos(getArg(0).getResult().getFloat()); + break; + case ACOS: + result = Math.acos(getArg(0).getResult().getFloat()); + break; + case TAN: + result = Math.tan(getArg(0).getResult().getFloat()); + break; + case ATAN: + result = Math.atan(getArg(0).getResult().getFloat()); + break; + case SQRT: + result = Math.sqrt(getArg(0).getResult().getFloat()); + break; + case SINH: + result = Math.sinh(getArg(0).getResult().getFloat()); + break; + case ASINH: + throw new IllegalArgumentException("Inverse hyperbolic sine(asinh) is not supported in java"); + case COSH: + result = Math.cosh(getArg(0).getResult().getFloat()); + break; + case ACOSH: + throw new IllegalArgumentException("Inverse hyperbolic cosine (acosh) is not supported in java"); + case TANH: + result = Math.tanh(getArg(0).getResult().getFloat()); + break; + case ATANH: + throw new IllegalArgumentException("Inverse hyperbolic tangents (atanh) is not supported in java"); + case FLOOR: + result = Math.floor(getArg(0).getResult().getFloat()); + break; + case CBRT: + result = Math.cbrt(getArg(0).getResult().getFloat()); + break; + case HYPOT: + result = Math.hypot(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); + break; + } + ((FloatResultNode)getResult()).setValue(result); + return true; + } + + @Override + public void onPrepareResult() { + setResult(new FloatResultNode()); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putByte(null, (byte)fnc.id); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int b = buf.getByte(null); + fnc = Function.valueOf(b & 0xff); + } + + @Override + protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) { + return fnc == ((MathFunctionNode)obj).fnc; + } + + @Override + public MathFunctionNode clone() { + MathFunctionNode obj = (MathFunctionNode)super.clone(); + obj.fnc = fnc; + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("function", fnc); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java new file mode 100644 index 00000000000..8496f88eb1c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MaxFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to return the maximum value of all its arguments. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MaxFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 66, MaxFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).max(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java new file mode 100644 index 00000000000..f7c18077791 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MinFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to return the minimum value of all its arguments. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MinFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 65, MinFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).min(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java new file mode 100644 index 00000000000..a2c919b1d4d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ModuloFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to modulo the arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class ModuloFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 64, ModuloFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).modulo(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java new file mode 100644 index 00000000000..4f201e98bfb --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiArgFunctionNode.java @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.*; + +import java.util.ArrayList; +import java.util.List; + +/** + *

This is an abstract super-class for all functions that accepts multiple arguments. This node implements the + * necessary API for manipulating arguments.

+ * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class MultiArgFunctionNode extends FunctionNode { + + public static final int classId = registerClass(0x4000 + 45, MultiArgFunctionNode.class); + private List args = new ArrayList(); + + /** + *

Adds the given argument to this function.

+ * + * @param arg The argument to add. + * @return This, to allow chaining. + */ + public MultiArgFunctionNode addArg(ExpressionNode arg) { + arg.getClass(); // throws NullPointerException + args.add(arg); + return this; + } + + /** + *

Returns the argument at the given index.

+ * + * @param i The index of the argument to return. + * @return The argument. + */ + public ExpressionNode getArg(int i) { + return args.get(i); + } + + /** + *

Returns the number of arguments this function has.

+ * + * @return The size of the argument list. + */ + public int getNumArgs() { + return args.size(); + } + + @Override + protected boolean onExecute() { + for (int i = 0; i < args.size(); i++) { + args.get(i).execute(); + } + return calculate(args, getResult()); + } + + @Override + protected void onPrepare() { + for (int i = 0; i < args.size(); i++) { + args.get(i).prepare(); + } + prepareResult(); + } + + /** + *

Perform the appropriate calculation of the arguments into a result node.

+ * + * @param args A list of operands. + * @param result Place to put the result. + * @return True if successful, false if not. + */ + private boolean calculate(final List args, ResultNode result) { + return onCalculate(args, result); + } + + private void prepareResult() { + onPrepareResult(); + } + + protected boolean onCalculate(final List args, ResultNode result) { + result.set(args.get(0).getResult()); + for (int i = 1; i < args.size(); i++) { + executeIterative(args.get(i).getResult(), result); + } + return true; + } + + protected void onPrepareResult() { + if (args.size() == 1) { + setResult(ArithmeticTypeConversion.getType(args.get(0).getResult())); + } else if (args.size() > 1) { + setResult((ResultNode)args.get(0).getResult().clone()); + for (int i = 1; i < args.size(); i++) { + if (args.get(i).getResult() != null) { + setResult(ArithmeticTypeConversion.getType(getResult(), args.get(i).getResult())); + } + } + } + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + int numArgs = args.size(); + buf.putInt(null, numArgs); + for (ExpressionNode node : args) { + serializeOptional(buf, node); // TODO: Not optional. + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + args.clear(); + int numArgs = buf.getInt(null); + for (int i = 0; i < numArgs; i++) { + ExpressionNode node = (ExpressionNode)deserializeOptional(buf); // TODO: Not optional. + args.add(node); + } + } + + @Override + public int hashCode() { + int ret = super.hashCode(); + for (ExpressionNode node : args) { + ret += node.hashCode(); + } + return ret; + } + + @Override + protected final boolean equalsFunction(FunctionNode obj) { + MultiArgFunctionNode rhs = (MultiArgFunctionNode)obj; + if (!args.equals(rhs.args)) { + return false; + } + if (!equalsMultiArgFunction(rhs)) { + return false; + } + return true; + } + + protected abstract boolean equalsMultiArgFunction(MultiArgFunctionNode obj); + + @Override + public MultiArgFunctionNode clone() { + MultiArgFunctionNode obj = (MultiArgFunctionNode)super.clone(); + obj.args = new ArrayList(); + for (ExpressionNode node : args) { + obj.args.add(node.clone()); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("args", args); + } + + @Override + public void selectMembers(ObjectPredicate predicate, ObjectOperation operation) { + super.selectMembers(predicate, operation); + for (ExpressionNode arg : args) { + arg.select(predicate, operation); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java new file mode 100644 index 00000000000..b55e86ba5fe --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/MultiplyFunctionNode.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to multiply all arguments. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class MultiplyFunctionNode extends NumericFunctionNode { + + public static final int classId = registerClass(0x4000 + 62, MultiplyFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onArgument(final ResultNode arg, ResultNode result) { + ((NumericResultNode)result).multiply(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java new file mode 100644 index 00000000000..0fdf07d6291 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NegateFunctionNode.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class NegateFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 60, NegateFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public NegateFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public NegateFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + public void onPrepare() { + super.onPrepare(); + } + + @Override + public boolean onExecute() { + getArg().execute(); + getResult().set(getArg().getResult()); + getResult().negate(); + return true; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java new file mode 100644 index 00000000000..dd24c1f9efe --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NormalizeSubjectFunctionNode.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class NormalizeSubjectFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 143, NormalizeSubjectFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public NormalizeSubjectFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public NormalizeSubjectFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + public void onPrepareResult() { + setResult(new StringResultNode()); + } + + @Override + public void onPrepare() { + super.onPrepare(); + } + + @Override + public boolean onExecute() { + String result = getArg().getResult().getString(); + + if (result.startsWith("Re: ") || result.startsWith("RE: ") || result.startsWith("Fw: ") || + result.startsWith("FW: ")) + { + result = result.substring(4); + } else if (result.startsWith("Fwd: ")) { + result = result.substring(5); + } + + ((StringResultNode)getResult()).setValue(result); + return true; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java new file mode 100644 index 00000000000..bc66e0d1899 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NullResultNode.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.ObjectVisitor; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class NullResultNode extends ResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 57, NullResultNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public long getInteger() { + return 0; + } + + @Override + public double getFloat() { + return 0.0; + } + + @Override + public String getString() { + return ""; + } + + @Override + public byte[] getRaw() { + return new byte[0]; + } + + @Override + protected int onCmp(ResultNode rhs) { + return classId - rhs.getClassId(); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("result", null); + } + + @Override + public void set(ResultNode rhs) { + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java new file mode 100644 index 00000000000..f949dc67936 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumElemFunctionNode.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class NumElemFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 132, NumElemFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public NumElemFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public NumElemFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public void onPrepareResult() { + setResult(new IntegerResultNode(1)); + } + + @Override + public boolean onExecute() { + getArg().execute(); + return true; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java new file mode 100644 index 00000000000..a3312313733 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericFunctionNode.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This is an abstract class for all functions that perform arithmetics. This node implements the necessary API for + * doing arithmetic operations. + * + * @author Ulf Lilleengen + */ +public abstract class NumericFunctionNode extends MultiArgFunctionNode { + + @Override + public void onPrepare() { + super.onPrepare(); + + ResultNode result = getResult(); + if (!(result instanceof IntegerResultNode) && + !(result instanceof FloatResultNode) && + !(result instanceof StringResultNode) && + !(result instanceof RawResultNode)) + { + throw new RuntimeException("Can not perform numeric function on value of type '" + + getResult().getClass().getName() + "'."); + } + } + + @Override + protected final boolean equalsMultiArgFunction(MultiArgFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java new file mode 100644 index 00000000000..70a5cdcaf98 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/NumericResultNode.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This is a superclass for all numerical results. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +abstract public class NumericResultNode extends SingleResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 50, NumericResultNode.class); + + /** + * In-place multiplication of this result with another. + * + * @param rhs The result to multiply with this. + */ + public abstract void multiply(ResultNode rhs); + + /** + * In-place division of this result with another. + * + * @param rhs The result to divide this by. + */ + public abstract void divide(ResultNode rhs); + + /** + * In-place modulo of this result with another. + * + * @param rhs The result to modulo this with. + */ + public abstract void modulo(ResultNode rhs); + + /** + * Return a java numeric, either Double or Long, depending on the underlying container. + * + * @return The underlying numeric value. + */ + public abstract Object getNumber(); + + @Override + public Object getValue() { + return getNumber(); + } + + @Override + protected int onGetClassId() { + return classId; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java new file mode 100644 index 00000000000..6f34f261543 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/OrFunctionNode.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to perform bitwise OR on the result of all arguments. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class OrFunctionNode extends BitFunctionNode { + + public static final int classId = registerClass(0x4000 + 68, OrFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + public void onArgument(final ResultNode arg, IntegerResultNode result) { + result.orOp(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java new file mode 100644 index 00000000000..a72d9d41318 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/PositiveInfinityResultNode.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * @author Henning Baldersheim + */ +public class PositiveInfinityResultNode extends ResultNode { + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 124, PositiveInfinityResultNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public long getInteger() { + return Long.MAX_VALUE; + } + + @Override + public double getFloat() { + return Double.MAX_VALUE; + } + + @Override + public byte[] getRaw() { + return new byte[0]; + } + + @Override + public String getString() { + return ""; + } + + @Override + protected int onCmp(ResultNode rhs) { + return rhs instanceof PositiveInfinityResultNode ? 0 : 1; + } + + @Override + public void set(ResultNode rhs) { + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java new file mode 100644 index 00000000000..dab0221fcb5 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionNode.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function assign a fixed width bucket to each input value + * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class RangeBucketPreDefFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 76, RangeBucketPreDefFunctionNode.class); + private ResultNodeVector predef = null; + + /** + * Constructs an empty result node. + */ + public RangeBucketPreDefFunctionNode() { + // empty + } + + /** + * Create a bucket expression with the given width and the given subexpression + * + * @param v predefined bucket list + * @param arg The argument for this function. + */ + public RangeBucketPreDefFunctionNode(ResultNodeVector v, ExpressionNode arg) { + addArg(arg); + predef = v; + } + + /** + * Obtain the predefined bucket list of this bucket expression + * + * @return predefined bucket list for this expression + */ + public ResultNodeVector getBucketList() { + return predef; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + serializeOptional(buf, predef); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + predef = (ResultNodeVector)deserializeOptional(buf); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return equals(predef, ((RangeBucketPreDefFunctionNode)obj).predef); + } + + @Override + public RangeBucketPreDefFunctionNode clone() { + RangeBucketPreDefFunctionNode obj = (RangeBucketPreDefFunctionNode)super.clone(); + if (predef != null) { + obj.predef = (ResultNodeVector)predef.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("predef", predef); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java new file mode 100644 index 00000000000..eef386735a1 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNode.java @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * @author Henning Baldersheim + */ +public class RawBucketResultNode extends BucketResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 125, RawBucketResultNode.class); + + // bucket start, inclusive + private ResultNode from = RawResultNode.getNegativeInfinity(); + + // bucket end, exclusive + private ResultNode to = RawResultNode.getNegativeInfinity(); + + @Override + public boolean empty() { + return to.equals(from); + } + + /** + * Constructs an empty result node. + */ + public RawBucketResultNode() { + // empty + } + + /** + * Create a bucket with the given limits + * + * @param from bucket start + * @param to bucket end + */ + public RawBucketResultNode(ResultNode from, ResultNode to) { + this.from = from; + this.to = to; + } + + /** + * Obtain the bucket start + * + * @return bucket start + */ + public byte[] getFrom() { + return from.getRaw(); + } + + /** + * Obtain the bucket end + * + * @return bucket end + */ + public byte[] getTo() { + return to.getRaw(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + serializeOptional(buf, from); + serializeOptional(buf, to); + } + + @Override + protected void onDeserialize(Deserializer buf) { + from = (ResultNode)deserializeOptional(buf); + to = (ResultNode)deserializeOptional(buf); + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + RawBucketResultNode b = (RawBucketResultNode)rhs; + int diff = from.compareTo(b.from); + return (diff == 0) ? to.compareTo(b.to) : diff; + } + + @Override + public int hashCode() { + return super.hashCode() + from.hashCode() + to.hashCode(); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("from", from); + visitor.visit("to", to); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java new file mode 100644 index 00000000000..caed1de4134 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawBucketResultNodeVector.java @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * @author Henning Baldersheim + */ +public class RawBucketResultNodeVector extends ResultNodeVector { + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 126, RawBucketResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public RawBucketResultNodeVector() { + } + + public RawBucketResultNodeVector add(RawBucketResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((RawBucketResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (RawBucketResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + RawBucketResultNode node = new RawBucketResultNode(); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + RawBucketResultNodeVector b = (RawBucketResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java new file mode 100644 index 00000000000..ad40fc5026f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNode.java @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.searchlib.aggregation.RawData; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +import java.util.Arrays; + +/** + * This result holds a byte array value. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class RawResultNode extends SingleResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 54, RawResultNode.class); + private static RawResultNode negativeInfinity = new RawResultNode(); + private static PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode(); + + // The raw value of this node. + private RawData value = null; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public RawResultNode() { + super(); + value = new RawData(); + } + + /** + * Constructs an instance of this class with given byte buffer. + * + * @param value The value to assign to this. + */ + public RawResultNode(byte[] value) { + super(); + setValue(value); + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public RawResultNode setValue(byte[] value) { + this.value = new RawData(value); + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + value.serialize(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = new RawData(); + value.deserialize(buf); + } + + @Override + public long getInteger() { + return 0; + } + + @Override + public double getFloat() { + return 0; + } + + @Override + public String getString() { + return new String(value.getData()); + } + + @Override + public byte[] getRaw() { + return value.getData(); + } + + @Override + public String toString() { + if (value != null) { + return Arrays.toString(value.getData()); + } + return "[]"; + } + + @Override + protected int onCmp(ResultNode rhs) { + return (rhs instanceof PositiveInfinityResultNode) + ? -1 + : RawData.compare(value.getData(), rhs.getRaw()); + } + + @Override + public int hashCode() { + return super.hashCode() + value.hashCode(); + } + + @Override + public RawResultNode clone() { + RawResultNode obj = (RawResultNode)super.clone(); + if (value != null) { + obj.value = (RawData)value.clone(); + } + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + public void add(ResultNode rhs) { + byte[] nb = new byte[value.getData().length + rhs.getRaw().length]; + System.arraycopy(value.getData(), 0, nb, 0, value.getData().length); + System.arraycopy(rhs.getRaw(), 0, nb, value.getData().length, rhs.getRaw().length); + value = new RawData(nb); + } + + public void min(ResultNode rhs) { + RawData b = new RawData(rhs.getRaw()); + if (value.compareTo(b) > 0) { + value = b; + } + } + + public void max(ResultNode rhs) { + RawData b = new RawData(rhs.getRaw()); + if (value.compareTo(b) < 0) { + value = b; + } + } + + @Override + public Object getValue() { + return getString(); + } + + @Override + public void set(ResultNode rhs) { + value = new RawData(rhs.getRaw()); + } + + @Override + public void negate() { + byte[] data = value.getData(); + for (int i = 0; i < data.length; i++) { + data[i] = (byte)-data[i]; + } + } + + /** + * Will provide the smallest possible value + * + * @return the smallest possible IntegerResultNode + */ + public static RawResultNode getNegativeInfinity() { + return negativeInfinity; + } + + /** + * Will provide the largest possible value + * + * @return the smallest largest IntegerResultNode + */ + public static PositiveInfinityResultNode getPositiveInfinity() { + return positiveInfinity; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java new file mode 100644 index 00000000000..dc791b7ce69 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RawResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class RawResultNodeVector extends ResultNodeVector { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 115, RawResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public RawResultNodeVector() { + } + + public RawResultNodeVector add(RawResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((RawResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (RawResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + RawResultNode node = new RawResultNode(); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + RawResultNodeVector b = (RawResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java new file mode 100644 index 00000000000..90077238925 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/RelevanceNode.java @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This abstract expression node represents a function to execute. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class RelevanceNode extends ExpressionNode { + + public static final int classId = registerClass(0x4000 + 59, RelevanceNode.class); + private FloatResultNode relevance = new FloatResultNode(); + + public RelevanceNode() { + + } + + @Override + public void onPrepare() { + + } + + @Override + public boolean onExecute() { + return true; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + relevance.serialize(buf); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + relevance.deserialize(buf); + } + + @Override + public RelevanceNode clone() { + RelevanceNode obj = (RelevanceNode)super.clone(); + obj.relevance = (FloatResultNode)relevance.clone(); + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("relevance", relevance); + } + + @Override + public ResultNode getResult() { + return relevance; + } + + @Override + protected boolean equalsExpression(ExpressionNode obj) { + return relevance.equals(((RelevanceNode)obj).relevance); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java new file mode 100644 index 00000000000..7a31e1598f6 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNode.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Identifiable; + +/** + * This abstract expression node represents the result value of execution. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class ResultNode extends Identifiable implements Comparable { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 41, ResultNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public final int compareTo(ResultNode b) { + return onCmp(b); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof ResultNode && compareTo((ResultNode)obj) == 0; + } + + /** + * This method must be implemented by all subclasses of this to allow new results to be calculated. + * + * @param rhs The node to get the result from. + */ + protected abstract void set(ResultNode rhs); + + /** + * This method must be implemented by all subclasses of this to allow ordering of results. This method is used by + * the {@link Cloneable} implementation. + * + * @param rhs The other node to compare with. + * @return Comparable result. + */ + protected abstract int onCmp(ResultNode rhs); + + /** + * Returns the integer representation of this result. + * + * @return The value of this. + */ + public abstract long getInteger(); + + /** + * Returns the float representation of this result. + * + * @return The value of this. + */ + public abstract double getFloat(); + + /** + * Returns the string representation of this result. + * + * @return The value of this. + */ + public abstract String getString(); + + /** + * Returns the raw byte array representation of this result. + * + * @return The value of this. + */ + public abstract byte[] getRaw(); + + /** + * Negate the value contained within the result node. + */ + public void negate() { + throw new RuntimeException("Class " + getClass().getName() + " does not implement negate"); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java new file mode 100644 index 00000000000..e6d2818e39d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ResultNodeVector.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class ResultNodeVector extends ResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 108, ResultNodeVector.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public long getInteger() { + return 0; + } + + @Override + public double getFloat() { + return 0.0; + } + + @Override + public String getString() { + return ""; + } + + @Override + public byte[] getRaw() { + return new byte[0]; + } + + @Override + public void set(ResultNode rhs) { + } + + public abstract ResultNodeVector add(ResultNode r); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java new file mode 100644 index 00000000000..7aa9cd92163 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ReverseFunctionNode.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This class will revert the order on any multivalues. Nothing is done to single value types such as integers, float, + * strings and Raw values. + * + * @author Henning Baldersheim + */ +public class ReverseFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 138, ReverseFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public ReverseFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public ReverseFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java new file mode 100644 index 00000000000..2c9b940cbf0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/SingleResultNode.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * @author Henning Baldersheim + */ +public abstract class SingleResultNode extends ResultNode { + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 121, NumericResultNode.class); + + /** + * In-place addition of this result with another. + * + * @param rhs The result to add to this. + */ + public abstract void add(ResultNode rhs); + + /** + * Swaps the numerical value of this node with the smaller of this and the other. + * + * @param rhs The other result to evaluate. + */ + public abstract void min(ResultNode rhs); + + /** + * Swaps the numerical value of this node with the larger of this and the other. + * + * @param rhs The other result to evaluate. + */ + public abstract void max(ResultNode rhs); + + /** + * Return a java native, either String, Double or Long, depending on the underlying container. + * + * @return The underlying numeric value. + */ + public abstract Object getValue(); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java new file mode 100644 index 00000000000..0b0f1e1ed5b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/SortFunctionNode.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * @author Henning Baldersheim + */ +public class SortFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 137, SortFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public SortFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public SortFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java new file mode 100644 index 00000000000..de748394ca3 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrCatFunctionNode.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to concatenate the bits of all arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class StrCatFunctionNode extends MultiArgFunctionNode { + + public static final int classId = registerClass(0x4000 + 133, StrCatFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsMultiArgFunction(MultiArgFunctionNode obj) { + return true; + } + + @Override + protected void onPrepareResult() { + setResult(new StringResultNode()); + } + + @Override + protected void onPrepare() { + super.onPrepare(); + } + + @Override + protected boolean onExecute() { + for (int i = 0; i < getNumArgs(); i++) { + getArg(i).execute(); + ((StringResultNode)getResult()).append(getArg(i).getResult()); + } + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java new file mode 100644 index 00000000000..dbec8903177 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StrLenFunctionNode.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class StrLenFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 130, StrLenFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public StrLenFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public StrLenFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + public void onPrepareResult() { + setResult(new IntegerResultNode(0)); + } + + @Override + public void onPrepare() { + super.onPrepare(); + } + + @Override + public boolean onExecute() { + ((IntegerResultNode)getResult()).setValue(getArg().getResult().getString().length()); + return true; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java new file mode 100644 index 00000000000..d830cb0f2c4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNode.java @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an integer bucket value + * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class StringBucketResultNode extends BucketResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 103, StringBucketResultNode.class); + + // bucket start, inclusive + private ResultNode from = StringResultNode.getNegativeInfinity(); + + // bucket end, exclusive + private ResultNode to = StringResultNode.getNegativeInfinity(); + + @Override + public boolean empty() { + return to.equals(from); + } + + /** + * Constructs an empty result node. + */ + public StringBucketResultNode() { + // empty + } + + /** + * Create a bucket with the given limits + * + * @param from bucket start + * @param to bucket end + */ + public StringBucketResultNode(ResultNode from, ResultNode to) { + this.from = from; + this.to = to; + } + + /** + * Create a bucket with the given limits + * + * @param from bucket start + * @param to bucket end + */ + public StringBucketResultNode(String from, String to) { + this(new StringResultNode(from), new StringResultNode(to)); + } + + /** + * Obtain the bucket start + * + * @return bucket start + */ + public String getFrom() { + return from.getString(); + } + + /** + * Obtain the bucket end + * + * @return bucket end + */ + public String getTo() { + return to.getString(); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + serializeOptional(buf, from); + serializeOptional(buf, to); + } + + @Override + protected void onDeserialize(Deserializer buf) { + from = (ResultNode)deserializeOptional(buf); + to = (ResultNode)deserializeOptional(buf); + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + StringBucketResultNode b = (StringBucketResultNode)rhs; + int diff = from.compareTo(b.from); + return (diff == 0) ? to.compareTo(b.to) : diff; + } + + @Override + public int hashCode() { + return super.hashCode() + from.hashCode() + to.hashCode(); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("from", from); + visitor.visit("to", to); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java new file mode 100644 index 00000000000..89570c702ec --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringBucketResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class StringBucketResultNodeVector extends ResultNodeVector { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 114, StringBucketResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public StringBucketResultNodeVector() { + } + + public StringBucketResultNodeVector add(StringBucketResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((StringBucketResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (StringBucketResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + StringBucketResultNode node = new StringBucketResultNode(); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + StringBucketResultNodeVector b = (StringBucketResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java new file mode 100644 index 00000000000..f428e2aef9f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNode.java @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This result holds a string. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class StringResultNode extends SingleResultNode { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 53, StringResultNode.class); + private static StringResultNode negativeInfinity = new StringResultNode(""); + private static PositiveInfinityResultNode positiveInfinity = new PositiveInfinityResultNode(); + + // The string value of this node. + private String value; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public StringResultNode() { + super(); + value = ""; + } + + /** + * Constructs an instance of this class with given value. + * + * @param value The value to assign to this. + */ + public StringResultNode(String value) { + super(); + setValue(value); + } + + /** + * Sets the value of this result. + * + * @param value The value to set. + * @return This, to allow chaining. + */ + public StringResultNode setValue(String value) { + if (value == null) { + throw new IllegalArgumentException("Value can not be null."); + } + this.value = value; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + byte[] raw = getRaw(); + buf.putInt(null, raw.length); + buf.put(null, raw); + } + + @Override + protected void onDeserialize(Deserializer buf) { + value = getUtf8(buf); + } + + @Override + public long getInteger() { + try { + return Integer.valueOf(value); + } catch (java.lang.NumberFormatException e) { + return 0; + } + } + + @Override + public double getFloat() { + try { + return Double.valueOf(value); + } catch (java.lang.NumberFormatException e) { + return 0; + } + } + + @Override + public String getString() { + return value; + } + + @Override + public byte[] getRaw() { + return Utf8.toBytes(value); + } + + @Override + protected int onCmp(ResultNode rhs) { + return (rhs instanceof PositiveInfinityResultNode) + ? -1 + : value.compareTo(rhs.getString()); + } + + @Override + public int hashCode() { + return super.hashCode() + value.hashCode(); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("value", value); + } + + public void add(ResultNode rhs) { + value += rhs.getString(); + } + + public void min(ResultNode rhs) { + if (value.compareTo(rhs.getString()) > 0) { + value = rhs.getString(); + } + } + + public void max(ResultNode rhs) { + if (value.compareTo(rhs.getString()) < 0) { + value = rhs.getString(); + } + } + + public void append(ResultNode rhs) { + value += rhs.getString(); + } + + @Override + public Object getValue() { + return getString(); + } + + @Override + public void set(ResultNode rhs) { + value = rhs.getString(); + } + + @Override + public void negate() { + char a[] = value.toCharArray(); + for (int i = 0; i < a.length; i++) { + a[i] = (char)-a[i]; + } + value = new String(a); + } + + /** + * Will provide the smallest possible value + * + * @return the smallest possible IntegerResultNode + */ + public static StringResultNode getNegativeInfinity() { + return negativeInfinity; + } + + /** + * Will provide the largest possible value + * + * @return the smallest largest IntegerResultNode + */ + public static PositiveInfinityResultNode getPositiveInfinity() { + return positiveInfinity; + } +} + diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java new file mode 100644 index 00000000000..ba172f5db01 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/StringResultNodeVector.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.Serializer; + +import java.util.ArrayList; + +/** + * This result holds nothing. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class StringResultNodeVector extends ResultNodeVector { + + // The global class identifier shared with C++. + public static final int classId = registerClass(0x4000 + 111, StringResultNodeVector.class); + private ArrayList vector = new ArrayList(); + + @Override + protected int onGetClassId() { + return classId; + } + + public StringResultNodeVector() { + } + + public StringResultNodeVector add(StringResultNode v) { + vector.add(v); + return this; + } + + public ResultNodeVector add(ResultNode r) { + return add((StringResultNode)r); + } + + public ArrayList getVector() { + return vector; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, vector.size()); + for (StringResultNode node : vector) { + node.serialize(buf); + } + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int sz = buf.getInt(null); + vector = new ArrayList(); + for (int i = 0; i < sz; i++) { + StringResultNode node = new StringResultNode(); + node.deserialize(buf); + vector.add(node); + } + } + + @Override + protected int onCmp(ResultNode rhs) { + if (classId != rhs.getClassId()) { + return (classId - rhs.getClassId()); + } + StringResultNodeVector b = (StringResultNodeVector)rhs; + int minLength = vector.size(); + if (b.vector.size() < minLength) { + minLength = b.vector.size(); + } + int diff = 0; + for (int i = 0; (diff == 0) && (i < minLength); i++) { + diff = vector.get(i).compareTo(b.vector.get(i)); + } + return (diff == 0) ? (vector.size() - b.vector.size()) : diff; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java new file mode 100644 index 00000000000..b84fa124841 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/TimeStampFunctionNode.java @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + *

This function assign a fixed width bucket to each input value.

+ * + * @author Haavard Pettersen + * @author Simon Thoresen + */ +public class TimeStampFunctionNode extends UnaryFunctionNode { + + public static enum TimePart { + Year(0), + Month(1), + MonthDay(2), + WeekDay(3), + Hour(4), + Minute(5), + Second(6), + YearDay(7), + IsDST(8); + + private final int id; + + private TimePart(int id) { + this.id = id; + } + + private static TimePart valueOf(int id) { + for (TimePart part : values()) { + if (id == part.id) { + return part; + } + } + return null; + } + } + + public static final int classId = registerClass(0x4000 + 75, TimeStampFunctionNode.class); + private TimePart timePart = TimePart.Year; + private boolean isGmt = false; + + @SuppressWarnings("UnusedDeclaration") + public TimeStampFunctionNode() { + // used by deserializer + } + + /** + *

Create a bucket expression with the given width and the given subexpression.

+ * + * @param arg The argument for this function. + * @param part The part of time to retrieve. + * @param gmt Whether or not to treat time as GMT. + */ + public TimeStampFunctionNode(ExpressionNode arg, TimePart part, boolean gmt) { + addArg(arg); + timePart = part; + isGmt = gmt; + } + + public TimePart getTimePart() { + return timePart; + } + + public boolean isGmt() { + return isGmt; + } + + public boolean isLocal() { + return !isGmt; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putByte(null, (byte)(timePart.id | (isGmt ? 0x80 : 0))); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int b = buf.getByte(null); + timePart = TimePart.valueOf(b & 0x7f); + isGmt = (b & 0x80) != 0; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + TimeStampFunctionNode rhs = (TimeStampFunctionNode)obj; + return timePart == rhs.timePart && isGmt == rhs.isGmt; + } + + @Override + public TimeStampFunctionNode clone() { + TimeStampFunctionNode obj = (TimeStampFunctionNode)super.clone(); + obj.timePart = timePart; + obj.isGmt = isGmt; + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("timepart", timePart); + visitor.visit("islocal", isGmt); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java new file mode 100644 index 00000000000..4511797d3dd --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToFloatFunctionNode.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class ToFloatFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 134, ToFloatFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public ToFloatFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public ToFloatFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java new file mode 100644 index 00000000000..8ff20216374 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToIntFunctionNode.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class ToIntFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 135, ToIntFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public ToIntFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public ToIntFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public void onPrepareResult() { + setResult(new IntegerResultNode()); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java new file mode 100644 index 00000000000..0ee1fd1cb71 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToRawFunctionNode.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function converts its argument to a raw function node. + * + * @author Ulf Lilleengen + */ +public class ToRawFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 141, ToRawFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public ToRawFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public ToRawFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java new file mode 100644 index 00000000000..490d19ad9a8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ToStringFunctionNode.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to negate its argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class ToStringFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 131, ToStringFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public ToStringFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument. + * + * @param arg The argument for this function. + */ + public ToStringFunctionNode(ExpressionNode arg) { + addArg(arg); + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + public void onPrepareResult() { + setResult(new StringResultNode()); + } + + @Override + public boolean onExecute() { + getArg().execute(); + ((StringResultNode)getResult()).setValue(getArg().getResult().getString()); + return true; + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java new file mode 100644 index 00000000000..233023d1a2e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UcaFunctionNode.java @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function is a request to use the Unicode Collation Algorithm specification when sorting this field. + * + * @author Henning Baldersheim + */ +public class UcaFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 140, UcaFunctionNode.class); + private String locale = "en-US"; + private String strength = "TERTIARY"; + + /** + * Constructs an empty result node. + */ + public UcaFunctionNode() { + // empty + } + + /** + * Create an UCA node with a specific locale. + * + * @param arg The argument for this function. + * @param locale The locale to use. + */ + public UcaFunctionNode(ExpressionNode arg, String locale) { + this(arg, locale, "TERTIARY"); + } + + /** + * Create an UCA node with a specific locale and strength setting. + * + * @param arg The argument for this function. + * @param locale The locale to use. + * @param strength The strength setting to use. + */ + public UcaFunctionNode(ExpressionNode arg, String locale, String strength) { + addArg(arg); + this.locale = locale; + this.strength = strength; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + putUtf8(buf, locale); + putUtf8(buf, strength); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + locale = getUtf8(buf); + strength = getUtf8(buf); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return true; + } + + @Override + public UcaFunctionNode clone() { + return (UcaFunctionNode)super.clone(); + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("locale", locale); + visitor.visit("strength", strength); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java new file mode 100644 index 00000000000..05afc5d99b9 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryBitFunctionNode.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This is an abstract super-class for all unary functions that operator on bit values. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class UnaryBitFunctionNode extends UnaryFunctionNode { + + public static final int classId = registerClass(0x4000 + 46, UnaryBitFunctionNode.class); + private int numBits = 0; + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public UnaryBitFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument and number of bits. + * + * @param arg The argument for this function. + * @param numBits The number of bits to operate on. + */ + public UnaryBitFunctionNode(ExpressionNode arg, int numBits) { + addArg(arg); + setNumBits(numBits); + } + + /** + * Returns the number of bits to operate on. + * + * @return The number of bits. + */ + public final int getNumBits() { + return numBits; + } + + /** + * Sets the number of bits to operate on. + * + * @param numBits The number of bits. + * @return This, to allow chaining. + */ + public UnaryBitFunctionNode setNumBits(int numBits) { + this.numBits = numBits; + return this; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putInt(null, numBits); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + numBits = buf.getInt(null); + } + + @Override + public int hashCode() { + return super.hashCode() + numBits; + } + + @Override + protected final boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return numBits == ((UnaryBitFunctionNode)obj).numBits; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("numBits", numBits); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java new file mode 100644 index 00000000000..84264f47ef4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/UnaryFunctionNode.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This is an abstract super-class for all functions that accept only a single argument. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public abstract class UnaryFunctionNode extends MultiArgFunctionNode { + + public static final int classId = registerClass(0x4000 + 43, UnaryFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + /** + * Return the single argument given to this function. + * + * @return The argument to this function + */ + public ExpressionNode getArg() { + return getArg(0); + } + + @Override + public void onPrepareResult() { + setResult((ResultNode)getArg().getResult().clone()); + } + + @Override + public void onPrepare() { + super.onPrepare(); + } + + @Override + protected final boolean equalsMultiArgFunction(MultiArgFunctionNode obj) { + return equalsUnaryFunction((UnaryFunctionNode)obj); + } + + protected abstract boolean equalsUnaryFunction(UnaryFunctionNode obj); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java new file mode 100644 index 00000000000..57fa01c97de --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorBitFunctionNode.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is a request to bitwise XOR the result of its first argument with itself in chunks of the second + * argument number of bits. If the result to XOR is a 24 bit value, and the second argument is 8, this function will XOR + * the first 8 bits of the result with the next 8 bits of the result, and then XOR that number with the next 8 bits of + * the result. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class XorBitFunctionNode extends UnaryBitFunctionNode { + + public static final int classId = registerClass(0x4000 + 71, XorBitFunctionNode.class); + + /** + * Constructs an empty result node. NOTE: This instance is broken until non-optional member data is set. + */ + public XorBitFunctionNode() { + + } + + /** + * Constructs an instance of this class with given argument and number of bits. + * + * @param arg The argument for this function. + * @param numBits The number of bits to operate on. + */ + public XorBitFunctionNode(ExpressionNode arg, int numBits) { + super(arg, numBits); + } + + @Override + protected int onGetClassId() { + return classId; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java new file mode 100644 index 00000000000..036d7fc8f16 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/XorFunctionNode.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +/** + * This function is an instruction to perform bitwise XOR on the result of all arguments in order. + * + * @author Henning Baldersheim + * @author Simon Thoresen + */ +public class XorFunctionNode extends BitFunctionNode { + + public static final int classId = registerClass(0x4000 + 69, XorFunctionNode.class); + + @Override + protected int onGetClassId() { + return classId; + } + + public void onArgument(final ResultNode arg, IntegerResultNode result) { + result.xorOp(arg); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java new file mode 100644 index 00000000000..54e86f8353c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/ZCurveFunctionNode.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.Deserializer; +import com.yahoo.vespa.objects.ObjectVisitor; +import com.yahoo.vespa.objects.Serializer; + +/** + * This function decompose two-dimensonal zcurve values into x and y values. + * + * @author Henning Baldersheim + */ +public class ZCurveFunctionNode extends UnaryFunctionNode { + + public static enum Dimension { + X(0), + Y(1); + + private final int id; + + private Dimension(int id) { + this.id = id; + } + + private static Dimension valueOf(int id) { + for (Dimension dim : values()) { + if (id == dim.id) { + return dim; + } + } + return null; + } + } + + public static final int classId = registerClass(0x4000 + 139, ZCurveFunctionNode.class); + private Dimension dim = Dimension.X; + + @SuppressWarnings("UnusedDeclaration") + public ZCurveFunctionNode() { + // used by deserializer + } + + public ZCurveFunctionNode(ExpressionNode arg, Dimension dimension) { + addArg(arg); + dim = dimension; + } + + /** + * Obtain the predefined bucket list of this bucket expression + * + * @return what part of the time you have requested + */ + public final Dimension getDimension() { + return dim; + } + + @Override + protected int onGetClassId() { + return classId; + } + + @Override + protected void onSerialize(Serializer buf) { + super.onSerialize(buf); + buf.putByte(null, (byte)dim.id); + } + + @Override + protected void onDeserialize(Deserializer buf) { + super.onDeserialize(buf); + int b = buf.getByte(null); + dim = Dimension.valueOf(b); + } + + @Override + protected boolean equalsUnaryFunction(UnaryFunctionNode obj) { + return dim == ((ZCurveFunctionNode)obj).dim; + } + + @Override + public ZCurveFunctionNode clone() { + ZCurveFunctionNode obj = (ZCurveFunctionNode)super.clone(); + obj.dim = dim; + return obj; + } + + @Override + public void visitMembers(ObjectVisitor visitor) { + super.visitMembers(visitor); + visitor.visit("dimension", dim); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java new file mode 100644 index 00000000000..ebe2448ebf0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/expression/package-info.java @@ -0,0 +1,4 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage package com.yahoo.searchlib.expression; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java new file mode 100644 index 00000000000..285b39cbfbb --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/CategoryFeatureNode.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Arrays; +import java.util.Optional; + +/** + * A GBDT node representing a set inclusion test: feature IN [value-list] where values can be strings or numbers. + * + * @author Jon Bratseth + */ +public final class CategoryFeatureNode extends FeatureNode { + + private final Value[] values; + + public CategoryFeatureNode(String feature, Value[] values, Optional samples, TreeNode left, TreeNode right) { + super(feature, samples, left, right); + this.values = Arrays.copyOf(values, values.length); + } + + /** Returns a copy of the array of values in this */ + public Value[] values() { + return Arrays.copyOf(values, values.length); + } + + @Override + protected String rankingExpressionCondition() { + return " in " + Arrays.toString(values); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java new file mode 100644 index 00000000000..2d69624726c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/FeatureNode.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.evaluation.StringValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import org.w3c.dom.Element; +import org.w3c.dom.Node; + +import java.util.List; +import java.util.Optional; + +/** + * A node in a GBDT tree which references a feature value + * + * @author bratseth + */ +public abstract class FeatureNode extends TreeNode { + + private final String feature; + + private final TreeNode left; + private final TreeNode right; + + public FeatureNode(String feature, Optional samples, TreeNode left, TreeNode right) { + super(samples); + this.feature = feature; + this.left = left; + this.right = right; + } + + public String feature() { return feature; } + + public TreeNode left() { return left; } + + public TreeNode right() { return right; } + + // TODO: Integrate with programmatic API rather than strings + @Override + public String toRankingExpression() { + StringBuilder expression = new StringBuilder(); + expression.append("if (").append(feature).append(rankingExpressionCondition()); + expression.append(", ").append(left.toRankingExpression()); + expression.append(", ").append(right.toRankingExpression()); + + Optional trueProbability = trueProbability(); + if (trueProbability.isPresent()) + expression.append(", ").append(trueProbability.get()); + + expression.append(")"); + return expression.toString(); + } + + private Optional trueProbability() { + if (left.samples().isPresent() && right.samples().isPresent()) + return Optional.of((float)left.samples().get() / (left.samples().get() + right.samples().get())); + return Optional.empty(); + } + + protected abstract String rankingExpressionCondition(); + + public static FeatureNode fromDom(Node node) { + List children = XmlHelper.getChildElements(node, null); + if (children.size() != 2) { + throw new IllegalArgumentException("Expected 2 children in element '" + node.getNodeName() + "', got " + + children.size() + "."); + } + + String name = XmlHelper.getAttributeText(node, "feature"); + Value[] values = toValues(XmlHelper.getAttributeText(node, "value")); + Optional samples = toInteger(XmlHelper.getOptionalAttributeText(node, "nSamples")); + TreeNode left = TreeNode.fromDom(children.get(0)); + TreeNode right = TreeNode.fromDom(children.get(1)); + + if (name.endsWith("$") || values.length>1 || values[0] instanceof StringValue) + return new CategoryFeatureNode(name, values, samples, left, right); + else + return new NumericFeatureNode(name, values[0], samples, left, right); + } + + /** Converts one or more comma-separated values into an array of values */ + private static Value[] toValues(String valueListString) { + String[] valueStrings = valueListString.split(","); + Value[] values = new Value[valueStrings.length]; + for (int i=0; iSimon Thoresen + */ +public class GbdtConverter { + + /** + * Implements an application main function so that the converter can be used as a command-line tool. + * + * @param args List of arguments. + */ + public static void main(String[] args) { + if (args.length != 1) { + System.err.println("Usage: GbdtConverter "); + System.exit(1); + } + try { + System.out.println(GbdtModel.fromXmlFile(args[0]).toRankingExpression()); + } catch (FileNotFoundException e) { + System.err.println("Could not find file '" + args[0] + "'."); + System.exit(1); + } catch (Exception e) { + System.err.println("An error occurred while parsing the content of file '" + args[0] + "': " + + Exceptions.toMessageString(e)); + System.exit(1); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java new file mode 100644 index 00000000000..0e40fe33b03 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/GbdtModel.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +/** + * @author Simon Thoresen + */ +public class GbdtModel { + + private final List trees; + + public GbdtModel(List trees) { + this.trees = asForest(trees); + } + + public List trees() { + return trees; + } + + public String toRankingExpression() { + if ( ! hasSampleInformation()) + System.err.println("The model nodes does not have the 'nSamples' attribute. " + + "For optimal runtime performance use an 'ext' model which has this information."); + StringBuilder ret = new StringBuilder(); + for (TreeNode tree : trees) { + if (ret.length() > 0) { + ret.append(" +\n"); + } + ret.append(tree.toRankingExpression()); + } + ret.append("\n"); + return ret.toString(); + } + + /** + * Return whether this model has sample information. + * Don't bother to check every node as files either has this for all nodes or for none. + */ + private boolean hasSampleInformation() { + if (trees.size() == 0) return true; // no matter + return trees.get(0).samples() !=null; + } + + public static GbdtModel fromXml(String xml) throws ParserConfigurationException, IOException, SAXException { + return fromDom(XmlHelper.parseXml(xml)); + } + + public static GbdtModel fromXmlFile(String fileName) throws ParserConfigurationException, IOException, SAXException { + return fromDom(XmlHelper.parseXmlFile(fileName)); + } + + public static GbdtModel fromDom(Node doc) { + Element dtree = XmlHelper.getSingleElement(doc, "DecisionTree"); + Element forest = XmlHelper.getSingleElement(dtree, "Forest"); + List trees = XmlHelper.getChildElements(forest, "Tree"); + if (trees.isEmpty()) { + throw new IllegalArgumentException("Forest has no trees."); + } + List model = new ArrayList<>(); + for (Node tree : trees) { + if (XmlHelper.getChildElements(tree, null).isEmpty()) continue; // ignore + model.add(TreeNode.fromDom(XmlHelper.getSingleElement(tree, null))); + } + return new GbdtModel(model); + } + + private static List asForest(List in) { + List out = new ArrayList<>(in.size()); + for (TreeNode node : in) { + if (node instanceof FeatureNode) { + out.add(node); + } else if (node instanceof ResponseNode) { // TODO): We should stop this sillyness ... + out.add(new NumericFeatureNode("value(0)", new DoubleValue(1), node.samples(), node, + new ResponseNode(0, Optional.of(0)))); + } else { + throw new UnsupportedOperationException(node.getClass().getName()); + } + } + return Collections.unmodifiableList(out); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java new file mode 100644 index 00000000000..b78b9ed4224 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/NumericFeatureNode.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Arrays; +import java.util.Optional; + +/** + * A GBDT node representing a numeric "less than" comparison: feature < numeric-value + * + * @author Jon Bratseth + */ +public final class NumericFeatureNode extends FeatureNode { + + private final Value value; + + public NumericFeatureNode(String feature, Value value, Optional samples, TreeNode left, TreeNode right) { + super(feature, samples, left, right); + this.value = value; + } + + /** Returns a copy of the array of values in this */ + public Value value() { + return value; + } + + @Override + public String rankingExpressionCondition() { + return " < " + value; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java new file mode 100644 index 00000000000..fa4ef2b38e0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/ResponseNode.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.w3c.dom.Node; + +import java.util.Optional; + +/** + * @author Simon Thoresen + */ +public class ResponseNode extends TreeNode { + + private final double value; + + public ResponseNode(double value, Optional samples) { + super(samples); + this.value = value; + } + + public double value() { + return value; + } + + @Override + public String toRankingExpression() { + return String.valueOf(value); + } + + public static ResponseNode fromDom(Node node) { + return new ResponseNode(Double.valueOf(XmlHelper.getAttributeText(node, "value")), + toInteger(XmlHelper.getOptionalAttributeText(node, "nSamples"))); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java new file mode 100644 index 00000000000..a8a6add87cd --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/TreeNode.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.w3c.dom.Node; + +import java.util.Optional; + +/** + * @author bratseth + */ +public abstract class TreeNode { + + private final Optional samples; + + public TreeNode(Optional samples) { + this.samples = samples; + } + + public abstract String toRankingExpression(); + + /** + * Returns the number of samples in the training set that matches this node + * if this model does not contain this information (i.e if it is not an "ext" model). + */ + public Optional samples() { return samples; } + + public static TreeNode fromDom(Node node) { + String nodeName = node.getNodeName(); + if (nodeName.equalsIgnoreCase("node")) { + return FeatureNode.fromDom(node); + } else if (nodeName.equalsIgnoreCase("response")) { + return ResponseNode.fromDom(node); + } else { + throw new UnsupportedOperationException(nodeName); + } + } + + static Optional toInteger(Optional integerText) { + if ( ! integerText.isPresent()) return Optional.empty(); + return Optional.of(Integer.parseInt(integerText.get())); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java new file mode 100644 index 00000000000..4ed0106e7ae --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; + +/** + * @author Simon Thoresen + */ +abstract class XmlHelper { + + private static final Charset UTF8 = Charset.forName("UTF-8"); + + public static Element parseXml(String xml) + throws ParserConfigurationException, IOException, SAXException + { + return parseXmlStream(new ByteArrayInputStream(xml.getBytes(UTF8))); + } + + public static Element parseXmlFile(String fileName) + throws ParserConfigurationException, IOException, SAXException + { + return parseXmlStream(new FileInputStream(fileName)); + } + + public static Element parseXmlStream(InputStream in) + throws ParserConfigurationException, IOException, SAXException + { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document doc = builder.parse(in); + return doc.getDocumentElement(); + } + + public static String getAttributeText(Node node, String name) { + Node valueNode = node.getAttributes().getNamedItem(name); + if (valueNode == null) { + throw new IllegalArgumentException("Missing '" + name + "' attribute in element '" + + node.getNodeName() + "'."); + } + String valueText = valueNode.getTextContent(); + if (valueText == null || valueText.isEmpty()) { + throw new IllegalArgumentException("Attribute '" + name + "' in element '" + + node.getNodeName() + "' is empty."); + } + return valueText; + } + + public static String getAttributeTextOrNull(Node node, String name) { + Node valueNode = node.getAttributes().getNamedItem(name); + if (valueNode == null) return null; + return valueNode.getTextContent(); + } + + public static Optional getOptionalAttributeText(Node node, String name) { + Node valueNode = node.getAttributes().getNamedItem(name); + if (valueNode == null) return Optional.empty(); + return Optional.of(valueNode.getTextContent()); + } + + public static Element getSingleElement(Node node, String name) { + List children = getChildElements(node, name); + if (children.isEmpty()) { + if (name != null) { + throw new IllegalArgumentException("Node '" + node.getNodeName() + "' has no '" + name + "' children."); + } else { + throw new IllegalArgumentException("Node '" + node.getNodeName() + "' has no children."); + } + } + if (children.size() != 1) { + if (name != null) { + throw new IllegalArgumentException("Expected 1 '" + name + "' child, got " + children.size() + "."); + } else { + throw new IllegalArgumentException("Expected 1 child, got " + children.size() + "."); + } + } + return children.get(0); + } + + public static List getChildElements(Node node, String name) { + NodeList children = node.getChildNodes(); + List lst = new LinkedList<>(); + for (int i = 0, len = children.getLength(); i < len; ++i) { + Node child = children.item(i); + if (!(child instanceof Element)) { + continue; + } + if (name != null && !child.getNodeName().equalsIgnoreCase(name)) { + continue; + } + lst.add((Element)child); + } + return lst; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java new file mode 100644 index 00000000000..608a4b499ed --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/CaseList.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import java.util.List; + +/** + * A producer of a list of cases for function training. + * + * @author Jon Bratseth + */ +public interface CaseList { + + public List cases(); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java new file mode 100644 index 00000000000..0ccce4ad2ad --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Evolvable.java @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.List; + +/** + * An entity which may evolve over time + * + * @author Jon Bratseth + */ +public abstract class Evolvable implements Comparable { + + public abstract Evolvable makeSuccessor(int memberNumber, List genepool, TrainingEnvironment environment); + + public abstract RankingExpression getGenepool(); + + @Override + public int compareTo(Evolvable other) { + return -Double.compare(getFitness(), other.getFitness()); + } + + public abstract double getFitness(); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java new file mode 100644 index 00000000000..416e2da4c82 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Individual.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.Collections; +import java.util.List; + +/** + * An individual in an evolving population - a genome with a fitness score. + * Individuals are comparable by decreasing fitness. + *

+ * As we are training ranking expressions, the genome, here, is the ranking expression. + * + * @author Jon Bratseth + */ +public class Individual extends Evolvable { + + private final RankingExpression genome; + private final TrainingSet trainingSet; + private final double fitness; + + public Individual(RankingExpression genome, TrainingSet trainingSet) { + this.genome = genome; + this.trainingSet = trainingSet; + this.fitness = trainingSet.evaluate(genome); + } + + public RankingExpression getGenome() { return genome; } + + public double calculateAverageError() { + return trainingSet.calculateAverageError(genome); + } + + public double calculateAverageErrorPercentage() { + return trainingSet.calculateAverageErrorPercentage(genome); + } + + @Override + public double getFitness() { return fitness; } + + @Override + public Individual makeSuccessor(int memberNumber, List genepool, TrainingEnvironment environment) { + return new Individual(environment.recombiner().recombine(genome, genepool), trainingSet); + } + + @Override + public RankingExpression getGenepool() { + return genome; + } + + @Override + public String toString() { + return toSomewhatShortString() + ", expression: " + genome; + } + + /** Returns a shorter string describing this (not including the expression */ + public String toSomewhatShortString() { + return "Error % " + calculateAverageErrorPercentage() + + " average error " + calculateAverageError() + + " fitness " + getFitness(); + } + + /** Returns a shorter string describing this (not including the expression */ + public String toShortString() { + return "Error: " + calculateAverageErrorPercentage() + " %"; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java new file mode 100644 index 00000000000..7f2e3645076 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/KeyboardChecker.java @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import java.awt.KeyEventDispatcher; +import java.awt.KeyboardFocusManager; +import java.awt.event.KeyEvent; + +/** + * TODO + * + * @author Jon Bratseth + */ +public class KeyboardChecker { + + private static boolean qPressed = false; + + private final Object lock = new Object(); + + public KeyboardChecker() { + KeyboardFocusManager.getCurrentKeyboardFocusManager().addKeyEventDispatcher(new KeyEventDispatcher() { + + @Override + public boolean dispatchKeyEvent(KeyEvent ke) { + synchronized (lock) { + switch (ke.getID()) { + case KeyEvent.KEY_PRESSED: + if (ke.getKeyCode() == KeyEvent.VK_Q) { + qPressed = true; + } + break; + + case KeyEvent.KEY_RELEASED: + if (ke.getKeyCode() == KeyEvent.VK_Q) { + qPressed = false; + } + break; + } + return false; + } + } + }); + } + + public boolean isQPressed() { + synchronized (lock) { + return qPressed; + } + } + +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java new file mode 100644 index 00000000000..c62462d0c3d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Main.java @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.io.IOUtils; +import com.yahoo.searchlib.mlr.ga.caselist.FileCaseList; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; + +/** + * Command line runner for training sessions + * + * @author Jon Bratseth + */ +/* +TODO: Switch order of generation and sequence in names +TODO: Output fitness improvement on each step (esp useful for species evolution) +TODO: Detect local optima (no improvement for n rounds) and stop early +TODO: Split into training and validation sets + */ +public class Main { + + public Main(String[] args, Tracker tracker) { + if (args.length < 1 || args[0].trim().equals("help")) { + System.out.println( + "Finds a ranking expression matching a training set given as a case file.\n" + + "Run until the expression seems good enough.\n" + + "Usage: ga - \n" + + " where case-file is a file containing case lines on the form \n" + + " targetValue, argument1:value1, ...\n" + + " (comment lines starting by # are also permitted)\n"); + return; + } + + TrainingParameters parameters = new TrainingParameters(); + //parameters.setAllowConditions(false); + parameters.setErrorIsRelative(false); + parameters.setInitialSpeciesSize(40); + parameters.setSpeciesLifespan(100); + parameters.setExcludeFeatures("F7,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F21,F23,F24,F25,F26,F27,F29,F30,F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F44,F46,F47,F48,F49,F50,F52,F53,F55,F56,F57,F58,F59,F60,F61,F62,F63,F64,F65,F67,F69,F70,F71,F72,F73,F75,F76,F78,F79,F80,F81,F82,F83,F84,F85,F86,F87,F88,F90,F92,F93,F94,F95,F96,F98,F99,F100,F101,F102,F103,F104,F105,F106,F107,F108,F109,F66,F89,F110"); + //parameters.setInitialSpeciesSize(20); + + String caseFile = args[0]; + TrainingSet trainingSet = new TrainingSet(FileCaseList.create(caseFile, parameters), parameters); + Trainer trainer = new Trainer(trainingSet); + + if (args.length > 1) { // Evaluate given expression + try { + Individual given = new Individual(new RankingExpression(new BufferedReader(new FileReader(args[1]))), trainingSet); + System.out.println("Error in '" + args[1] + "': error % " + given.calculateAverageErrorPercentage() + + " average error " + given.calculateAverageError() + + " fitness " + given.getFitness()); + } + catch (IOException | ParseException e) { + throw new IllegalArgumentException("Could not evaluate expression in argument 2", e); + } + } + else { // Train expression + // TODO: Move system outs to tracker + System.out.println("Learning ..."); + RankingExpression learntExpression = trainer.train(parameters, tracker); + System.out.println("Learnt expression: " + learntExpression); + } + } + + public static void main(String[] args) { + new Main(args, new PrintingTracker(10, 0, 1)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java new file mode 100644 index 00000000000..484a0747e24 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Population.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * A collection of evolvables + * + * @author Jon Bratseth + */ +public class Population { + + /** The current members of this population, always sorted by decreasing fitness */ + private List members; + + public Population(List initialMembers) { + members = new ArrayList<>(initialMembers); + Collections.sort(members); + } + + /** Returns the most fit member of this population (never null) */ + public Evolvable best() { + return members.get(0); + } + + /** Returns the members of this population as an unmodifiable list sorted by decreasing fitness*/ + public List members() { return Collections.unmodifiableList(members); } + + public void evolve(int generation, TrainingEnvironment environment) { + TrainingParameters p = environment.parameters(); + int generationSize = p.getInitialSpeciesSize() - + (int)Math.round((p.getInitialSpeciesSize() - p.getFinalSpeciesSize()) * generation/p.getSpeciesLifespan()); + members = breed(members, generationSize * p.getGenerationCandidatesFactor(), environment); + Collections.sort(members); + members = members.subList(0, Math.min(generationSize, members.size())); + } + + private List breed(List members, int offspringCount, TrainingEnvironment environment) { + List offspring = new ArrayList<>(offspringCount); // TODO: Can we do this inline and keep the list forever (and then also the immutable view) + offspring.add(members.get(0)); // keep the best as-is + List genePool = collectGenepool(members); + for (int i = 0; i < offspringCount - 1; i++) { + Evolvable child = members.get(i % members.size()).makeSuccessor(i, genePool, environment); + offspring.add(child); + } + return offspring; + } + + private List collectGenepool(List members) { + List genepool = new ArrayList<>(); + for (Evolvable member : members) + genepool.add(member.getGenepool()); + return genepool; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java new file mode 100644 index 00000000000..4a3edd35a8d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/PrintingTracker.java @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.yolean.Exceptions; + +import java.util.List; + +/** + * A tracker which prints a summary of training events to standard out + * + * @author Jon Bratseth + */ +public class PrintingTracker implements Tracker { + + private final int iterationEvery; + private final int survivorsEvery; + private final int printSpeciesCreationLevel; + private final int printSpeciesCompletionLevel; + + public PrintingTracker() { + this(0, 1); + } + + public PrintingTracker(int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { + this(Integer.MAX_VALUE, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel); + } + + public PrintingTracker(int iterationEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { + this(iterationEvery, Integer.MAX_VALUE, printSpeciesCreationLevel, printSpeciesCompletionLevel); + } + + public PrintingTracker(int iterationEvery, int survivorsEvery, int printSpeciesCreationLevel, int printSpeciesCompletionLevel) { + this.iterationEvery = iterationEvery; + this.survivorsEvery = survivorsEvery; + this.printSpeciesCreationLevel = printSpeciesCreationLevel; + this.printSpeciesCompletionLevel = printSpeciesCompletionLevel; + } + + @Override + public void newSpecies(Species predecessor, int initialSize, List genePool) { + if (predecessor.name().level() > printSpeciesCreationLevel) return; + System.out.println(spaces(predecessor.name().level()*2) + "Creating new species of size " + initialSize + " and a gene pool of size " + genePool.size() + " from predecessor " + predecessor); + } + + @Override + public void newSpeciesCreated(Species species) { + if (species.name().level() > printSpeciesCreationLevel) return; + System.out.println(spaces(species.name().level()*2) + "Created and will now evolve " + species); + } + + @Override + public void speciesCompleted(Species species) { + if (species.name().level() > printSpeciesCompletionLevel) return; + System.out.println(spaces(species.name().level()*2) + "--> Evolution completed for " + species); + } + + /** Called each time a species (or super-species) have completed one generation */ + @Override + public void iteration(Species species, int generation) { + try { + new RankingExpression(species.bestIndividual().getGenome().toString()); + } + catch (Exception e) { + System.err.println("ERROR: " + Exceptions.toMessageString(e) + ": " + species.bestIndividual().getGenome()); + } + + if ( (generation % iterationEvery) == 0) + System.out.println(spaces(species.name().level()*2) + "Gen " + generation + " of " + species); + + if ( (generation % survivorsEvery) == 0) + printPopulation(species.name().level(), species.population().members()); + } + + @Override + public void result(Evolvable winner) { + System.out.println("Learnt expression: " + winner); + } + + private String spaces(int spaces) { + return " ".substring(0,spaces); + } + + private void printPopulation(int level, List survivors) { + if (survivors.size()<=1) return; + System.out.println(" Population:"); + for (Evolvable individual : survivors) + System.out.println(spaces(level*2) + " " + individual); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java new file mode 100644 index 00000000000..a4421595917 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/RankingExpressionCaseList.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.mlr.ga.CaseList; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.mlr.ga.TrainingSet; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Produces a list of training cases (argument and target value pairs) + * from a Ranking Expression. + * Useful for testing. + * + * @author Jon Bratseth + */ +public class RankingExpressionCaseList implements CaseList { + + private final List cases = new ArrayList(); + + public RankingExpressionCaseList(List arguments, RankingExpression targetFunction) { + for (Context argument : arguments) + cases.add(new TrainingSet.Case(argument,targetFunction.evaluate(argument).asDouble())); + } + + /** Returns the list of cases generated from the ranking expression */ + @Override + public List cases() { return Collections.unmodifiableList(cases); } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java new file mode 100644 index 00000000000..d67afddd3c5 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Recombiner.java @@ -0,0 +1,200 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.rule.*; + +import java.util.*; +import java.util.logging.Logger; + +import static java.lang.Math.abs; +import static java.lang.Math.max; +import static java.lang.Math.min; + +/** + * A class which returns a mutated, recombined genome from a list of parent genomes. + * + * @author Jon Bratseth + */ +public class Recombiner { + + // TODO: Either make ranking expressions immutable and get rid of parent pointer, or do clone everywhere below + + private static final Logger log = Logger.getLogger(Trainer.class.getName()); + + private final Random random = new Random(); + + private final List features; + + private final TrainingParameters parameters; + + /** + * Creates a recombiner + * + * @param features the list of feature names which are possible within the space we are training, + * such that these may be spontaneously added to expressions. + */ + public Recombiner(Collection features, TrainingParameters trainingParameters) { + this.features = Collections.unmodifiableList(new ArrayList<>(features)); + this.parameters = trainingParameters; + } + + public RankingExpression recombine(RankingExpression genome, List genePool) { + List genePoolRoots = new ArrayList<>(); + for (RankingExpression genePoolGenome : genePool) + genePoolRoots.add(genePoolGenome.getRoot()); + return new RankingExpression(mutate(genome.getRoot(), genePoolRoots, 0)); + } + + private ExpressionNode mutate(ExpressionNode gene, List genePool, int depth) { + // TODO: Extract insert level + if (gene instanceof BooleanNode) + return simplifyCondition(mutateChildren((CompositeNode)gene,genePool,depth+1)); + if (gene instanceof CompositeNode) + return insertNodeLevel(simplify(removeNodeLevel(mutateChildren((CompositeNode)gene,genePool,depth+1))), genePool, depth+1); + else + return insertNodeLevel(mutateLeaf(gene), genePool, depth+1); + } + + private BooleanNode simplifyCondition(ExpressionNode node) { + // Nothing yet + return (BooleanNode)node; + } + + /** Very basic algorithmic simplification */ + private ExpressionNode simplify(ExpressionNode node) { + if (! (node instanceof CompositeNode)) return node; + CompositeNode composite = (CompositeNode)node; + if (maxDepth(composite)>2) return composite; + List children = composite.children(); + if (children.size()!=2) return composite; + if ( ! (children.get(0) instanceof ConstantNode)) return composite; + if ( ! (children.get(1) instanceof ConstantNode)) return composite; + return new ConstantNode(composite.evaluate(null)); + } + + private CompositeNode mutateChildren(CompositeNode gene, List genePool, int depth) { + if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make this a non-composite + + List mutatedChildren = new ArrayList<>(); + for (ExpressionNode child : gene.children()) + mutatedChildren.add(mutate(child, genePool, depth)); + return gene.setChildren(mutatedChildren); + } + + private ExpressionNode insertNodeLevel(ExpressionNode gene, List genePool, int depth) { + if (probability() < 0.9) return gene; + if (depth + maxDepth(gene) >= parameters.getMaxExpressionDepth()) return gene; + ExpressionNode newChild = generateChild(genePool, depth); + if (probability() < 0.5) + return generateComposite(gene, newChild, genePool, depth); + else + return generateComposite(newChild, gene, genePool, depth); + } + + private ExpressionNode removeNodeLevel(CompositeNode gene) { + if (gene instanceof ReferenceNode) return gene; // TODO: Remove if we make featurenode a non-composite + if (probability() < 0.9) return gene; + return randomFrom(gene.children()); + } + + private ExpressionNode generateComposite(ExpressionNode left, ExpressionNode right, List genePool, int depth) { + int type = random.nextInt(2 + ( parameters.getAllowConditions() ? 1:0 ) ); // pick equally between 2 or 3 types + if (type == 0) { + return new ArithmeticNode(left, pickArithmeticOperator(), right); + } + else if (type == 1) { + Function function = pickFunction(); + if (function.arity() == 1) + return new FunctionNode(function, left); + else // arity==2 + return new FunctionNode(function, left, right); + } + else { + return new IfNode(generateCondition(genePool, depth + 1), left, right); + } + } + + private BooleanNode generateCondition(List genePool, int depth) { + // TODO: Add set membership nodes + return new ComparisonNode(generateChild(genePool, depth), TruthOperator.SMALLER, generateChild(genePool, depth)); + } + + private ExpressionNode generateChild(List genePool, int depth) { + if (genePool.isEmpty() || probability() < 0.1) { // entirely new child + return generateLeaf(); + } + else { // pick from gene pool + ExpressionNode picked = randomFrom(genePool); + int pickedDepth = 0; + // descend until we are at at least the same depth as this depth + // to make sure branches spliced in are shallow enough that we avoid growing + // larger than maxDepth + while (picked instanceof CompositeNode && (pickedDepth++ < depth || probability() < 0.5)) { + if (picked instanceof ReferenceNode) continue; // TODO: Remove if we make referencenode a noncomposite + picked = randomFrom(((CompositeNode)picked).children()); + } + return picked; + } + } + + public ExpressionNode mutateLeaf(ExpressionNode leaf) { + if (probability() < 0.5) return leaf; // TODO: For performance. Drop? + // TODO: Other leaves + ConstantNode constant = (ConstantNode)leaf; + return new ConstantNode(DoubleValue.frozen(constant.getValue().asDouble()*aboutOne())); + } + + public ExpressionNode generateLeaf() { + if (probability()<0.5 || features.size() == 0) + return new ConstantNode(DoubleValue.frozen(random.nextDouble() * 2000 - 1000)); // TODO: Use some non-uniform distribution + else + return new ReferenceNode(randomFrom(features)); + } + + private double aboutOne() { + return 1 + Math.pow(-0.1, random.nextInt(4) + 1); + } + + private double probability() { + return random.nextDouble(); + } + + private T randomFrom(List expressionList) { + return expressionList.get(random.nextInt(expressionList.size())); + } + + private ArithmeticOperator pickArithmeticOperator() { + switch (random.nextInt(4)) { + case 0: return ArithmeticOperator.PLUS; + case 1: return ArithmeticOperator.MINUS; + case 2: return ArithmeticOperator.MULTIPLY; + case 3: return ArithmeticOperator.DIVIDE; + } + throw new RuntimeException("This cannot happen"); + } + + /** Pick among the subset of functions which are probably useful */ + private Function pickFunction() { + switch (random.nextInt(5)) { + case 0: return Function.tanh; + case 1: return Function.exp; + case 2: return Function.log; + case 3: return Function.pow; + case 4: return Function.sqrt; + } + throw new RuntimeException("This cannot happen"); + } + + // TODO: Make ranking expressions immutable and compute this on creation? + private int maxDepth(ExpressionNode node) { + if ( ! (node instanceof CompositeNode)) return 1; + + int maxChildDepth = 0; + for (ExpressionNode child : ((CompositeNode)node).children()) + maxChildDepth = Math.max(maxDepth(child), maxChildDepth); + return maxChildDepth + 1; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java new file mode 100644 index 00000000000..39694b6253f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Species.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.ArrayList; +import java.util.List; + +/** + * A species is a population of evolvables. + * Contrary to a real species, a species population may contain (sub)species + * rather than individuals - at all levels but the lowest. + * + * @author Jon Bratseth + */ +public class Species extends Evolvable { + + private SpeciesName name; + private final Population population; + + /** Create a species having a given initial population */ + public Species(SpeciesName name, Population population) { + this.name = name; + this.population = population; + } + + /** Create a species evolved from a predecessor species, using the given gene pool for mutating it */ + private Species(SpeciesName name, Species predecessor, List genepool, TrainingEnvironment environment) { + this.name = name; + environment.tracker().newSpecies(predecessor, environment.parameters().getInitialSpeciesSize(), genepool); + + // Initialize new species with members generated from the predecessor species + List initialMembers = new ArrayList<>(); + for (int i = 0; i < environment.parameters().getInitialSpeciesSize(); i++) + initialMembers.add(drawFrom(predecessor.population, i).makeSuccessor(i, genepool, environment)); + population = new Population(initialMembers); + + // Evolve the population of this species for the configured number of generations + environment.tracker().newSpeciesCreated(this); + for (int generation = 0; generation < environment.parameters().getSpeciesLifespan(); generation++) { + environment.tracker().iteration(this, generation+1); + population.evolve(generation, environment); + if (Double.isInfinite(bestIndividual().getFitness())) break; // jackpot + // if (keyboardChecker.isQPressed()) break; // user quit TODO: Make work + } + environment.tracker().speciesCompleted(this); + } + + /** + * Draws a member from the given population, where the probability of being drawn is proportional to the + * fitness of the member + */ + private Evolvable drawFrom(Population population, int succession) { + return population.members().get(Math.min(succession % 3, population.members().size() - 1)); // TODO: Probabilistic by fitness? + } + + public SpeciesName name() { return name; } + + /** The fitness of the fittest individual in the population */ + @Override + public double getFitness() { + return population.best().getFitness(); + } + + /** Creates the successor of this, using its genes, mutated drawing from the given gene pool */ + @Override + public Evolvable makeSuccessor(int memberNumber, List genepool, TrainingEnvironment environment) { + return new Species(name.successor(memberNumber), this, genepool, environment); + } + + /** Returns the members of this species */ + public Population population() { return population; } + + /** The genes of the fittest individual in the population of this */ + @Override + public RankingExpression getGenepool() { // TODO: Less sharp? + return population.best().getGenepool(); + } + + /** Returns the best individual below this in the species hierarchy (e.g recursively the best leaf) */ + public Individual bestIndividual() { + Evolvable child = this; + while (child instanceof Species) + child = ((Species)child).population.best(); + return (Individual)child; // it is when it is not instanceof Species + } + + @Override + public String toString() { + return "species " + name + ", best member: " + population.best(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java new file mode 100644 index 00000000000..3bd8ae5e55f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/SpeciesName.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +/** + * The name of a species. For tracking purposes. + * A name has the form superSpeciesName + "/" + serialNumber.generationNumber. + * + * @author Jon Bratseth + */ +public class SpeciesName { + + private final int level, serial, generation; + + private final String name, prefixName; + + private SpeciesName(int level, int serial, int generation, String prefixName) { + this.level = level; + this.serial = serial; + this.generation = generation; + this.prefixName = prefixName; + if (level == 0) + this.name = ""; + else + this.name = prefixName + (prefixName.isEmpty() ? "" : "/") + serial + "." + generation; + } + + /** + * The level in the species hierarchy of the species having this name. + * The root species has level 0. + */ + public int level() { return level; } + + /** Returns the name of the root species: The empty string at level 0 */ + public static SpeciesName createRoot() { + return new SpeciesName(0 ,0 ,0, ""); + } + + @Override + public String toString() { + if (level == 0) return "(root)"; + return name; + } + + /** Returns the name of a new subspecies */ + public SpeciesName subspecies(int serial) { + return new SpeciesName(level+1, serial, 0, name); + } + + /** Returns the name of the successor of this species */ + public SpeciesName successor(int serial) { + return new SpeciesName(level, serial, generation+1, prefixName); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java new file mode 100644 index 00000000000..d86af40b805 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Tracker.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.List; + +/** + * A tracker receives callbacks about events happening during a training session. + * + * @author Jon Bratseth + */ +public interface Tracker { + + public void newSpecies(Species predecessor, int initialSize, List genePool); + + public void newSpeciesCreated(Species species); + + public void speciesCompleted(Species species); + + public void iteration(Species species, int generation); + + public void result(Evolvable winner); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java new file mode 100644 index 00000000000..7e2551eccb2 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/Trainer.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Learns a ranking expression from some seed expressions and a training set. + * + * @author Jon Bratseth + */ +public class Trainer { + + // TODO: Simplify this to constructor only ... or maybe remove ... or combine with TrainingEnvironment + // TODO: Also: Rename to Training? + + private final TrainingSet trainingSet; + private final Set argumentNames; + + /** + * Creates a new trainer. + */ + public Trainer(TrainingSet trainingSet) { + this(trainingSet, trainingSet.argumentNames()); + } + + /** + * Creates a new trainer which uses a specified list of expression argument names + * rather than the argument names given by the training set. + */ + public Trainer(TrainingSet trainingSet, Set argumentNames) { + this.trainingSet = trainingSet; + this.argumentNames = new HashSet<>(argumentNames); + } + + public RankingExpression train(TrainingParameters parameters, Tracker tracker) { + TrainingEnvironment environment = new TrainingEnvironment(new Recombiner(argumentNames, parameters), tracker, trainingSet, parameters); + SpeciesName rootName = SpeciesName.createRoot(); + Species genesisSubSpecies = new Species(rootName.subspecies(0), new Population(Collections.singletonList(new Individual(new RankingExpression(new ConstantNode(new DoubleValue(1))), trainingSet)))); + Species rootSpecies = (Species) new Species(rootName, new Population(Collections.singletonList(genesisSubSpecies))) + .makeSuccessor(0, Collections.emptyList(), environment); + Individual winner = rootSpecies.bestIndividual(); + tracker.result(winner); + return winner.getGenome(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java new file mode 100644 index 00000000000..757a2e4d3d2 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingEnvironment.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +/** + * The static environment of a training session + * + * @author Jon Bratseth + */ +public class TrainingEnvironment { + + // TODO: Not sure if this belongs ... or should even be an instance + // TODO: maybe collapse Trainer into this and call it TrainingSession + private final Recombiner recombiner; + private final Tracker tracker; + private final TrainingSet trainingSet; + private final TrainingParameters parameters; + + public TrainingEnvironment(Recombiner recombiner, Tracker tracker, + TrainingSet trainingSet, TrainingParameters parameters) { + this.recombiner = recombiner; + this.tracker = tracker; + this.trainingSet = trainingSet; + this.parameters = parameters; + } + + public Recombiner recombiner() { return recombiner; } + public Tracker tracker() { return tracker; } + public TrainingSet trainingSet() { return trainingSet; } + public TrainingParameters parameters() { return parameters; } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java new file mode 100644 index 00000000000..e18f560878e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingParameters.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author Jon Bratseth + */ +public class TrainingParameters { + + // A note: + // The total number of species generated and evaluated is + // (generationCandidatesFactor * speciesLifespan * (initialSpeciesSize-finalSpeciesSize)/2 ) ^ speciesLevels + // (speciesLevel is hardcoded to 2 atm) + + private int speciesLifespan = 1000; + private int initialSpeciesSize = 10; + private double finalSpeciesSize = 1; + private int generationCandidatesFactor = 3; + private int maxExpressionDepth = 6; + private boolean allowConditions = true; + private boolean errorIsRelative = true; + private Set excludeFeatures = new HashSet<>(); + private String trainingSetFormat = null; + private double validationFraction = 0.2; + + /** The number of generation which a given species (or super-species at any level) lives. Default:1000 */ + public int getSpeciesLifespan() { return speciesLifespan; } + public void setSpeciesLifespan(int generations) { this.speciesLifespan = generations; } + + /** The number of members in a species (or super-species at any level) as it is created. Default: 10 */ + public int getInitialSpeciesSize() { return initialSpeciesSize; } + public void setInitialSpeciesSize(int initialSpeciesSize) { this.initialSpeciesSize = initialSpeciesSize; } + + /** + * The number of members in a species in its final generation. + * The size of the species will be reduced linearly in each generation to go from initial size to final size. + * Default: 1 + */ + public double getFinalSpeciesSize() { return finalSpeciesSize; } + public void setFinalSpeciesSize(int finalSpeciesSize) { this.finalSpeciesSize = finalSpeciesSize; } + + /* + * The factor determining how many more members are generated than are allowed to survive in each generation of a species. + * Default: 3 + */ + public int getGenerationCandidatesFactor() { return generationCandidatesFactor; } + public void setGenerationCandidatesFactor(int generationCandidatesFactor) { this.generationCandidatesFactor = generationCandidatesFactor; } + + /** + * The max depth of expressions this is allowed to generate. + * Default: 6 + */ + public int getMaxExpressionDepth() { return maxExpressionDepth; } + public void setMaxExpressionDepth(int maxExpressionDepth) { this.maxExpressionDepth = maxExpressionDepth; } + + /** + * Whether mutation should allow creation of condition (if) expressions. + * Default: true + */ + public boolean getAllowConditions() { return allowConditions; } + public void setAllowConditions(boolean allowConditions) { this.allowConditions = allowConditions; } + + /** + * Whether errors are relative to the absolute value of the function at that point or not. + * If true, training will assign equal weight to the error of 1.1 for 1 and 110 for 100. + * If false, training will instead assign a 10x weight to the latter. + * Default: True. + */ + public boolean getErrorIsRelative() { return errorIsRelative; } + public void setErrorIsRelative(boolean errorIsRelative) { this.errorIsRelative = errorIsRelative; } + + /** + * Returns the set of features to exclude during training. + * Returned as an immutable set, never null. + */ + public Set getExcludeFeatures() { return excludeFeatures; } + /** Sets the features to exclude from a comma-separated string */ + public void setExcludeFeatures(String excludeFeatureString) { + for (String featureName : excludeFeatureString.split(",")) + excludeFeatures.add(featureName.trim()); + } + + /** + * Returns the format of the training set to read. "fv" or "cvs" is supported. + * If this is null the format name is taken from the last name of the file instead. + * Default: null. + */ + public String getTrainingSetFormat() { return trainingSetFormat; } + public void setTrainingSetFormat(String trainingSetFormat) { this.trainingSetFormat = trainingSetFormat; } + + /** + * Returns the fraction of the result set to hold out of training and use for validation. + * Default 0.2 + */ + public double getValidationFraction() { return validationFraction; } + public void setValidationFraction(double validationFraction) { this.validationFraction = validationFraction; } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java new file mode 100644 index 00000000000..507ab26806a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/TrainingSet.java @@ -0,0 +1,122 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * A training set: a set of cases: Input data to output value pairs + * + * @author Jon Bratseth + */ +public class TrainingSet { + + private final TrainingParameters parameters; + private final List trainingCases; + private final List validationCases; + private final Set argumentNames = new HashSet<>(); + + /** + * Creates a training set from a list of cases. + * The ownership of the argument list and all the cases are transferred to this by this call. + */ + public TrainingSet(CaseList caseList, TrainingParameters parameters) { + List cases = caseList.cases(); + + this.parameters = parameters; + for (Case aCase : cases) + argumentNames.addAll(aCase.arguments().names()); + argumentNames.removeAll(parameters.getExcludeFeatures()); + + int validationCaseCount = (int)Math.round((cases.size() * parameters.getValidationFraction())); + this.validationCases = cases.subList(0, validationCaseCount); + this.trainingCases = cases.subList(validationCaseCount, cases.size()); + } + + public Set argumentNames() { + return Collections.unmodifiableSet(argumentNames); + } + + /** + * Returns the fitness of a genome (ranking expression) according to this training set. + * The fitness to be returned by this is the inverse of the average squared difference between the + * target function result and the function result returned by the genome function. + */ + // TODO: Take expression length into account. + public double evaluate(RankingExpression genome) { + boolean constantExpressionGenome = true; + double squaredErrorSum = 0; + Double previousValue = null; + for (Case trainingCase : trainingCases) { + double value = genome.evaluate(trainingCase.arguments()).asDouble(); + double error = saneAbs(effectiveError(trainingCase.targetValue(), value)); + squaredErrorSum += Math.pow(error, 2); + + if (previousValue != null && previousValue != value) + constantExpressionGenome = false; + previousValue = value; + } + if (constantExpressionGenome) return 0; // Disqualify constant expressions as we know we're not looking for them + return 1 / (squaredErrorSum / trainingCases.size()); + } + + private double effectiveError(double a, double b) { + return parameters.getErrorIsRelative() ? errorFraction(a, b) : a - b; + } + + /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */ + public double calculateAverageError(RankingExpression genome) { + double errorSum=0; + for (Case trainingCase : trainingCases) + errorSum += saneAbs(trainingCase.targetValue() - genome.evaluate(trainingCase.arguments()).asDouble()); + return errorSum/(double) trainingCases.size(); + } + + /** Calculate error in a way which is easy to understand (but which behaves badly when the target is around 0 */ + public double calculateAverageErrorPercentage(RankingExpression genome) { + double errorFractionSum = 0; + for (Case trainingCase : trainingCases) { + double errorFraction = saneAbs(errorFraction(trainingCase.targetValue(), genome.evaluate(trainingCase.arguments()).asDouble())); + // System.out.println("Error %: " + (100 * errorFraction + " Target: " + trainingCase.targetValue() + " Learned: " + genome.evaluate(trainingCase.arguments()).asDouble())); + errorFractionSum += errorFraction; + } + return ( errorFractionSum/(double) trainingCases.size() ) *100; + } + + private double errorFraction(double a, double b) { + double error = a - b; + if (error == 0 ) return 0; // otherwise a or b is different from 0 + if (a != 0) + return error / a; + else + return error / b; + } + + private double saneAbs(double d) { + if (Double.isInfinite(d) || Double.isNaN(d)) return Double.MAX_VALUE; + return Math.abs(d); + } + + public static class Case { + + private Context arguments; + + private double targetValue; + + public Case(Context arguments, double targetValue) { + this.arguments = arguments; + this.targetValue = targetValue; + } + + public double targetValue() { return targetValue; } + + public Context arguments() { return arguments; } + + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java new file mode 100644 index 00000000000..78291768380 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/CsvFileCaseList.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.caselist; + +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; + +import java.util.Optional; + +/** + *

A list of training set cases created by reading a file containing lines specifying a case + * per line using the following syntax + * targetValue, argument1:value, argument2:value2, ... + * where arguments are identifiers and values are doubles.

+ * + *

Comment lines starting with "#" are ignored.

+ * + * @author Jon Bratseth + */ +public class CsvFileCaseList extends FileCaseList { + + public CsvFileCaseList(String fileName) { + super(fileName); + } + + protected Optional lineToCase(String line, int lineNumber) { + String[] elements = line.split(","); + if (elements.length<2) + throw new IllegalArgumentException("At line " + lineNumber + ": Expected a comma-separated case on the " + + "form 'targetValue, argument1:value1, ...', but got '" + line ); + + double target; + try { + target = Double.parseDouble(elements[0].trim()); + } + catch (NumberFormatException e) { + throw new IllegalArgumentException("At line " + lineNumber + ": Expected a target value double " + + "at the start of the line, got '" + elements[0] + "'"); + } + + Context context = new MapContext(); + for (int i=1; i cases = new ArrayList<>(); + + /** + * Reads a case list from file. + * + * @throws IllegalArgumentException if the file could not be found or opened + */ + public FileCaseList(String fileName) { + try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) { + String line; + int lineNumber=0; + while (null != (line=reader.readLine())) { + lineNumber++; + line = line.trim(); + if (line.startsWith("#")) continue; + if (line.isEmpty()) continue; + Optional newCase = lineToCase(line, lineNumber); + if (newCase.isPresent()) + cases.add(newCase.get()); + + } + } + catch (IOException | IllegalArgumentException e) { + throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e); + } + } + + /** Returns the case constructed from reading a line, if any */ + protected abstract Optional lineToCase(String line, int lineNumber); + + @Override + public List cases() { return Collections.unmodifiableList(cases); } + + /** Creates a file case list of the type specified in the parameters */ + public static FileCaseList create(String fileName, TrainingParameters parameters) { + String format = parameters.getTrainingSetFormat(); + if (format == null) + format = ending(fileName); + + switch (format) { + case "csv" : return new CsvFileCaseList(fileName); + case "fv" : return new FvFileCaseList(fileName); + default : throw new IllegalArgumentException("Unknown file format '" + format + "'"); + } + } + + private static String ending(String fileName) { + int lastDot = fileName.lastIndexOf("."); + if (lastDot <= 0) return null; + return fileName.substring(lastDot + 1, fileName.length()); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java new file mode 100644 index 00000000000..ec07a939932 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/ga/caselist/FvFileCaseList.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.caselist; + +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; + +import java.util.Optional; + +/** + * A list of training set cases created by reading a file containing lines specifying a case + * per line using the following syntax + * feature1\tfeature2\tfeature3\t...\ttarget1 + *

+ * The first line contains the name of each feature in the same order. + * + *

Comment lines starting with "#" are ignored.

+ * + * @author Jon Bratseth + */ +// NOTE: If we get another type of case list it is time to abstract into a common CaseList base class +public class FvFileCaseList extends FileCaseList { + + private String[] argumentNames; + + public FvFileCaseList(String fileName) { + super(fileName); + } + + protected Optional lineToCase(String line, int lineNumber) { + String[] values = line.split("\t"); + + if (argumentNames == null) { // first line + argumentNames = values; + return Optional.empty(); + } + + if (argumentNames.length != values.length) + throw new IllegalArgumentException("Wrong number of values at line " + lineNumber); + + + Context context = new MapContext(); + for (int i = 0; i < values.length-1; i++) + context.put(argumentNames[i], toDouble(values[i], lineNumber)); + + double target = toDouble(values[values.length-1], lineNumber); + return Optional.of(new TrainingSet.Case(context, target)); + } + + private double toDouble(String s, int lineNumber) { + try { + return Double.parseDouble(s.trim()); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("At line " + lineNumber + ": Expected only double values, " + + "got '" + s + "'"); + } + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java b/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java new file mode 100644 index 00000000000..874f8e8666b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysis.java @@ -0,0 +1,425 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.gbdt; + +import com.yahoo.searchlib.rankingexpression.rule.SetMembershipNode; +import com.yahoo.yolean.Exceptions; +import com.yahoo.searchlib.mlr.ga.Individual; +import com.yahoo.searchlib.mlr.ga.PrintingTracker; +import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; +import com.yahoo.searchlib.mlr.ga.Trainer; +import com.yahoo.searchlib.mlr.ga.TrainingParameters; +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode; +import com.yahoo.searchlib.rankingexpression.rule.ComparisonNode; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.IfNode; +import com.yahoo.searchlib.rankingexpression.rule.NegativeNode; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Random; + +/** + * A standalone tool which analyzes a GBDT form ranking expression + * + * @author bratseth + */ +public class ExpressionAnalysis { + + private final Map features = new HashMap<>(); + + private int currentTree; + + private final RankingExpression expression; + + public ExpressionAnalysis(RankingExpression expression) { + this.expression = expression; + if ( ! instanceOf(expression.getRoot(), ArithmeticNode.class)) return; + analyzeSum((ArithmeticNode)expression.getRoot()); + } + + /** Returns the expression analyzed by this */ + public RankingExpression expression() { return expression; } + + /** Returns the analysis of each feature in this expression as a read-only map indexed by feature name */ + private Map featureMap() { + return Collections.unmodifiableMap(features); + } + + /** Returns list containing the analysis of each feature, sorted by decreasing usage */ + private List features() { + List featureList = new ArrayList<>(features.values()); + Collections.sort(featureList); + return featureList; + } + + /** Returns the name of each feature, sorted by decreasing usage */ + private List featureNames() { + List featureNameList = new ArrayList<>(features.values().size()); + for (Feature feature : features()) + featureNameList.add(feature.name()); + return featureNameList; + } + + private void analyzeSum(ArithmeticNode node) { + for (ExpressionNode child : node.children()) { + currentTree++; + analyze(child); + } + } + + private void analyze(ExpressionNode node) { + if (node instanceof IfNode) { + analyzeIf((IfNode)node); + } + + if (node instanceof CompositeNode) { + for (ExpressionNode child : ((CompositeNode)node).children()) + analyze(child); + } + } + + private void analyzeIf(IfNode node) { + if (node.getCondition() instanceof ComparisonNode) + analyzeComparisonIf(node); + else if (node.getCondition() instanceof SetMembershipNode) + analyzeSetMembershipIf(node); + else + System.err.println("Warning: Expected a comparison or set membership test, got " + node.getCondition().getClass()); + } + + private void analyzeComparisonIf(IfNode node) { + ComparisonNode comparison = (ComparisonNode)node.getCondition(); + + if (comparison.getOperator() != TruthOperator.SMALLER) { + System.err.println("Warning: This expression has " + comparison.getOperator() + " where we expect < :" + + comparison); + return; + } + + if ( ! instanceOf(comparison.getLeftCondition(), ReferenceNode.class)) return; + String featureName = ((ReferenceNode)comparison.getLeftCondition()).getName(); + + Double value = nodeValue(comparison.getRightCondition()); + if (value == null) return; + + ComparisonFeature feature = (ComparisonFeature)features.get(featureName); + if (feature == null) { + feature = new ComparisonFeature(featureName); + features.put(featureName, feature); + } + feature.isComparedTo(value, currentTree, average(node.getTrueExpression()), average(node.getFalseExpression())); + } + + private void analyzeSetMembershipIf(IfNode node) { + SetMembershipNode membershipTest = (SetMembershipNode)node.getCondition(); + + if ( ! instanceOf(membershipTest.getTestValue(), ReferenceNode.class)) return; + String featureName = ((ReferenceNode)membershipTest.getTestValue()).getName(); + + SetMembershipFeature feature = (SetMembershipFeature)features.get(featureName); + if (feature == null) { + feature = new SetMembershipFeature(featureName); + features.put(featureName, feature); + } + } + + /** + * Returns the value of a constant node, or a negative wrapping a constant. + * Warns and returns null if it is neither. + */ + private Double nodeValue(ExpressionNode node) { + if (node instanceof NegativeNode) { + NegativeNode negativeNode = (NegativeNode)node; + if ( ! instanceOf(negativeNode.getValue(), ConstantNode.class)) return null; + return - ((ConstantNode)negativeNode.getValue()).getValue().asDouble(); + } + else { + if ( ! instanceOf(node, ConstantNode.class)) return null; + return ((ConstantNode)node).getValue().asDouble(); + } + } + + + /** Returns the average value of all the leaf constants below this */ + private double average(ExpressionNode node) { + Sum sum = new Sum(); + average(node, sum); + return sum.average(); + } + + private void average(ExpressionNode node, Sum sum) { + if (node instanceof CompositeNode) { + for (ExpressionNode child : ((CompositeNode)node).children()) + average(child, sum); + } + else { + Double value = nodeValue(node); + if (value == null) return; + sum.add(value); + } + } + + private boolean instanceOf(Object object, Class clazz) { + if (clazz.isAssignableFrom(object.getClass())) return true; + System.err.println("Warning: This expression has " + object.getClass() + " where we expect " + clazz + + ": Instance " + object); + return false; + } + + private List generateArgumentSets(int count) { + List argumentSets = new ArrayList<>(count); + for (int i=0; i(analysis.featureNames().subList(skipFeatures, featureCount))); + + System.out.println("Learning ..."); + RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker(100, 0, 1)); + System.out.println("Learnt expression: " + learntExpression); + + // Check for overtraining + new LearntExpressionAnalysis(analysis, learntExpression); + } + + } + + private static void error(String message) { + System.err.println(message); + System.exit(1); + } + + public abstract static class Feature implements Comparable { + + private final String name; + + protected Feature(String name) { + this.name = name; + } + + public String name() { return name; } + + /** Primary sort by type, secondary by name */ + @Override + public int compareTo(Feature other) { + int typeComparison = this.getClass().getName().compareTo(other.getClass().getName()); + if (typeComparison != 0) return typeComparison; + return this.name.compareTo(other.name); + } + + } + + /** A feature used in comparisons. These are the ones on which our serious analysis is focused */ + public static class ComparisonFeature extends Feature { + + private double lowerBound = Double.MAX_VALUE; + private double upperBound = Double.MIN_VALUE; + + /** The number of usages of this feature */ + private int usages = 0; + + /** The sum of the tree numbers where this is accessed */ + private int treeNumberSum = 0; + + /** + * The net times where the left values are smaller than the right values for this + * (which is a measure of correlation between input and output because the comparison is <) + */ + private int correlationCount = 0; + + /** + * The sum difference in returned value between choosing the right and left branch due to this feature + */ + private double netSum = 0; + + public ComparisonFeature(String name) { + super(name); + } + + public double lowerBound() { return lowerBound; } + public double upperBound() { return upperBound; } + + public void isComparedTo(double value, int inTreeNumber, double leftAverage, double rightAverage) { + lowerBound = Math.min(lowerBound, value); + upperBound = Math.max(upperBound, value); + usages++; + treeNumberSum += inTreeNumber; + correlationCount += leftAverage < rightAverage ? 1 : -1; + netSum += rightAverage - leftAverage; + } + + /** Override to do secondary sort by usages */ + public int compareTo(Feature o) { + if ( ! (o instanceof ComparisonFeature)) return super.compareTo(o); + ComparisonFeature other = (ComparisonFeature)o; + return - Integer.compare(this.usages, other.usages); + } + + @Override + public String toString() { + return "Numeric feature: " + name() + + ": range [" + lowerBound + ", " + upperBound + "]" + + ", usages " + usages + + ", average tree occurrence " + (treeNumberSum / usages) + + ", correlation: " + (correlationCount / (double)usages) + + ", net contribution: " + netSum; + } + + } + + /** A feature used in set membership tests */ + public static class SetMembershipFeature extends Feature { + + public SetMembershipFeature(String name) { + super(name); + } + + @Override + public String toString() { + return "Categorical feature: " + name(); + } + + } + + /** A sum which can returns its average */ + private static class Sum { + + private double sum; + private int count; + + public void add(double value) { + sum+=value; + count++; + } + + public double average() { + return sum / count; + } + + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/package-info.java new file mode 100644 index 00000000000..63343d425b6 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java new file mode 100644 index 00000000000..fb74fb4de6b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/ElementCompleteness.java @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Calculates the elementCompleteness features + * + * @author bratseth + */ +public class ElementCompleteness { + + /** Hardcoded to default for now */ + private static final double fieldCompletenessImportance = 0.05; + + /** + * Computes the following elementCompleteness features: + *
    + *
  • completeness + *
  • fieldCompleteness + *
  • queryCompleteness + *
  • elementWeight + *
+ * + * @param queryTerms the query terms with associated weights to compute over + * @param field a set of weighted field values, where each is taken to be a space-separated string of tokens + * @return a features object containing the values listed above + */ + public static Features compute(Map queryTerms, Item[] field) { + double completeness = 0; + double fieldCompleteness = 0; + double queryCompleteness = 0; + double elementWeight = 0; + + double queryTermWeightSum = sum(queryTerms.values()); + + for (Item item : field) { + String[] itemTokens =item.value().split(" "); + int matchCount = 0; + int matchWeightSum = 0; + for (String token : itemTokens) { + Integer weight = queryTerms.get(token); + if (weight == null) continue; + matchCount++; + matchWeightSum += weight; + } + double itemFieldCompleteness = (double)matchCount / itemTokens.length; + double itemQueryCompleteness = matchWeightSum / queryTermWeightSum; + double itemCompleteness = + fieldCompletenessImportance * itemFieldCompleteness + + (1 - fieldCompletenessImportance) * itemQueryCompleteness; + if (itemCompleteness > completeness) { + completeness = itemCompleteness; + fieldCompleteness = itemFieldCompleteness; + queryCompleteness = itemQueryCompleteness; + elementWeight = item.weight(); + } + } + + Map features = new HashMap<>(); + features.put("completeness", new DoubleValue(completeness)); + features.put("fieldCompleteness", new DoubleValue(fieldCompleteness)); + features.put("queryCompleteness", new DoubleValue(queryCompleteness)); + features.put("elementWeight", new DoubleValue(elementWeight)); + return new Features(features); + } + + private static int sum(Collection integers) { + int sum = 0; + for (int integer : integers) + sum += integer; + return sum; + } + + public static class Item { + + private final String value; + private final double weight; + + public Item(String value, double weight) { + this.value = value; + this.weight = weight; + } + + public String value() { return value; } + public double weight() { return weight; } + + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java new file mode 100644 index 00000000000..9dac3db11c8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/Features.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Collections; +import java.util.Map; + +/** + * A set of (immutable) computed features + * + * @author Jon Bratseth + */ +@Beta +public class Features { + + private Map features; + + /** Creates a set of features by assigning ownership of map of features to this */ + Features(Map features) { + this.features = Collections.unmodifiableMap(features); + } + + /** Returns the Value of a feature, or null if it is not present in this */ + public Value get(String featureName) { + return features.get(featureName); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java new file mode 100644 index 00000000000..e5b4a899844 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/FieldTermMatch.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.HashMap; +import java.util.Map; + +/** + * Calculates the fieldTermMatch features + * + * @author Jon Bratseth + */ +@Beta +public class FieldTermMatch { + + /** + * Computes the fieldTermMatch features: + *
    + *
  • firstPosition - the position of the first occurrence of this query term in this index field
  • + *
  • occurrences - the position of the first occurrence of this query term in this index field
  • + *
+ * @param queryTerm the term to return these features for + * @param field the field value to compute over, assumed to be a space-separated string of tokens + * @return a features object containing the two values described above + */ + public static Features compute(String queryTerm, String field) { + Map features = new HashMap<>(); + + String[] tokens = field.split(" "); + + int occurrences = 0; + int firstPosition = 1000000; + for (int i = 0; i < tokens.length; i++) { + if (tokens[i].equals(queryTerm)) { + if (occurrences == 0) + firstPosition = i; + occurrences++; + } + } + features.put("firstPosition", new DoubleValue(firstPosition)); + features.put("occurrences", new DoubleValue(occurrences)); + return new Features(features); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java new file mode 100644 index 00000000000..b71eff8ffde --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Field.java @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * Represents a document field which can be matched and ranked against. + * + * @author bratseth + */ +public class Field { + + private final ImmutableList terms; + + /** Creates a field from a space-separated string */ + public Field(String fieldString) { + ImmutableList.Builder list = new ImmutableList.Builder<>(); + for (String term : fieldString.split(" ")) + list.add(new Term(term)); + this.terms = list.build(); + } + + /** Creates a field from a list of terms */ + public Field(List terms) { + this.terms = ImmutableList.copyOf(terms); + } + + /** Returns an immutable list of the terms in this */ + public List terms() { return terms; } + + /** A term in a field */ + public static class Term { + + private final String value; + private final float exactness; + + /** Creates a term with the given value and full exactness (1.0) */ + public Term(String value) { + this(value, 1.0f); + } + + public Term(String value, float exactness) { + this.value = value; + this.exactness = exactness; + } + + /** Returns the string value of this term */ + public String value() { return value; } + + /** + * Returns the degree to which this term is exactly what was in the document (1.0), + * or some stemmed form (closer to 0) + */ + public float exactness() { return exactness; } + + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java new file mode 100644 index 00000000000..77083d4edb4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetrics.java @@ -0,0 +1,536 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static java.lang.Math.*; + +/** + * The collection of metrics calculated by the string match metric calculator. + * + * @author Jon Bratseth + */ +public final class FieldMatchMetrics implements Cloneable { + + /** The calculator creating this - given on initialization */ + private FieldMatchMetricsComputer source; + + /** The trace accumulated during execution - empty if no tracing */ + private final Trace trace=new Trace(); + + private boolean complete=false; + + // Metrics + private int outOfOrder; + private int segments; + private int gaps; + private int gapLength; + private int longestSequence; + private int head; + private int tail; + private int matches; + private float proximity; + private float unweightedProximity; + private float segmentDistance; + private int pairs; + private float weight; + private float significance; + private float occurrence; + private float weightedOccurrence; + private float absoluteOccurrence; + private float weightedAbsoluteOccurrence; + private float significantOccurrence; + private float weightedExactnessSum; + private int weightSum; + + // Temporary variables + private int currentSequence; + private List segmentStarts=new ArrayList<>(); + private int queryLength; + + public FieldMatchMetrics(FieldMatchMetricsComputer source) { + this.source=source; + + complete=false; + + outOfOrder = 0; + segments = 0; + gaps = 0; + gapLength = 0; + longestSequence = 1; + head = -1; + tail = -1; + proximity = 0; + unweightedProximity = 0; + segmentDistance = 0; + matches = 0; + pairs = 0; + weight = 0; + significance = 0; + weightedExactnessSum = 0; + weightSum = 0; + + currentSequence=0; + segmentStarts.clear(); + queryLength=source.getQuery().getTerms().length; + } + + /** Are these metrics representing a complete match */ + public boolean isComplete() { return complete; } + + public void setComplete(boolean complete) { this.complete=complete; } + + /** Returns the segment start points */ + public List getSegmentStarts() { return segmentStarts; } + + /** + * Returns a metric by name + * + * @throws IllegalArgumentException if the metric name (case sensitive) is not present + */ + public float get(String name) { + try { + Method getter=getClass().getMethod("get" + name.substring(0,1).toUpperCase() + name.substring(1)); + return ((Number)getter.invoke(this)).floatValue(); + } + catch (NoSuchMethodException e) { + throw new IllegalArgumentException("No metric named '" + name + "' is known"); + } + catch (Exception e) { + throw new RuntimeException("Error getting metric '" + name + "'",e); + } + } + + // Base metrics ---------------------------------------------------------------------------------------------- + + /** Returns the total number of out of order token sequences within field segments */ + public int getOutOfOrder() { return outOfOrder; } + + /** Returns the number of field text segments which are needed to match the query as completely as possible */ + public int getSegments() { return segments; } + + /** Returns the total number of position jumps (backward or forward) within document segments */ + public int getGaps() { return gaps; } + + /** Returns the summed size of all gaps within segments */ + public int getGapLength() { return gapLength; } + + /** Returns the size of the longest matched continuous, in-order sequence in the document */ + public int getLongestSequence() { return longestSequence; } + + /** Returns the number of tokens in the field preceding the start of the first matched segment */ + public int getHead() { return head; } + + /** Returns the number of tokens in the field following the end of the last matched segment */ + public int getTail() { return tail; } + + /** Returns the number of query terms which was matched in this field */ + public int getMatches() { return matches; } + + /** Returns the number of in-segment token pairs */ + public int getPairs() { return pairs; } + + /** + * Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. + * This number is 0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they + * are following in sequence and have a high connectedness, and close to 0 if they are far from each other in the + * segment or out of order + */ + public float getAbsoluteProximity() { + if (pairs <1) return 0.1f; + + return proximity/pairs; + } + + /** + * Returns the normalized proximity of the matched terms, not taking term connectedness into account. + * This number is close to 1 if all the matched terms are + * following each other in sequence, and close to 0 if they are far from each other or out of order + */ + public float getUnweightedProximity() { + if (pairs <1) return 1f; + return unweightedProximity/pairs; + } + + /** + * Returns the sum of the distance between all segments making up a match to the query, measured + * as the sum of the number of token positions separating the start of each field adjacent segment. + */ + public float getSegmentDistance() { return segmentDistance; } + + /** + *

Returns the normalized weight of this match relative to the whole query: + * The sum of the weights of all matched terms/the sum of the weights of all query terms + * If all the query terms were matched, this is 1. If no terms were matched, or these matches has weight zero, + * this is 0.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of + * normalized rank features for each field multiplied by this number for the same field will produce a + * normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ */ + public float getWeight() { return weight; } + + /** + *

Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query: + * The sum of the significance of all matched terms/the sum of the significance of all query terms + * If all the query terms were matched, this is 1. If no terms were matched, or if the significance of all the matched terms + * is zero (they are present in all (possible) documents), this number is zero.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of + * normalized rank features for each field multiplied by this number for the same field will produce a + * normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ */ + public float getSignificance() { return significance; } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query. + * This number is 1 if there are many occurrences of the query terms in absolute terms, + * or relative to the total content of the field, and 0 if there are none.

+ * + *

This is suitable for occurrence in fields containing regular text.

+ */ + public float getOccurrence() { return occurrence; } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query: + * + * sum over all query terms(min(number of occurrences of the term,maxOccurrences))/(query term count*100) + * + *

This number is 1 if there are many occurrences of the query terms, and 0 if there are none. + * This number does not take the actual length of the field into account, so it is suitable for uses of occurrence + * to denote importance across multiple terms.

+ */ + public float getAbsoluteOccurrence() { return absoluteOccurrence; } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight. + * This number is close to 1 if there are many occurrences of highly weighted query terms, + * in absolute terms, or relative to the total content of the field, and 0 if there are none.

+ */ + public float getWeightedOccurrence() { return weightedOccurrence; } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query, taking weights + * into account so that occurrences of higher weighted query terms has more impact than lower weighted terms.

+ * + *

This number is 1 if there are many occurrences of the highly weighted terms, and 0 if there are none. + * This number does not take the actual length of the field into account, so it is suitable for uses of occurrence + * to denote importance across multiple terms.

+ */ + public float getWeightedAbsoluteOccurrence() { return weightedAbsoluteOccurrence; } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query + * in absolute terms, + * or relative to the total content of the field, weighted by term significance. + * + *

This number is 1 if there are many occurrences of the highly significant terms, and 0 if there are none.

+ */ + public float getSignificantOccurrence() { return significantOccurrence; } + + /** + *

Returns the degree to which the query terms submitted matched exactly terms contained in the document. + * This is 1 if all the terms matched exactly, and closer to 0 as more of the terms was matched only as stem forms. + *

+ * + *

This is the query term weighted average of the exactness of each match, where the exactness of a match is + * the product of the exactness of the matching query term and the matching field term: + * + * sum over matching query terms(query term weight * query term exactness * field term exactness) / + * sum over matching query terms(query term weight) + * + */ + public float getExactness() { + if (matches == 0) return 0; + return weightedExactnessSum / weightSum; + } + + // Derived metrics ---------------------------------------------------------------------------------------------- + + /** The ratio of query tokens which was matched in the field: matches/queryLength */ + public float getQueryCompleteness() { + return (float)matches/source.getQuery().getTerms().length; + } + + /** The ratio of query tokens which was matched in the field: matches/fieldLength */ + public float getFieldCompleteness() { + return (float)matches/source.getField().terms().size(); + } + + /** + * Total completeness, where field completeness is more important: + * queryCompleteness * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance * fieldCompleteness + */ + public float getCompleteness() { + float fieldCompletenessImportance=source.getParameters().getFieldCompletenessImportance(); + return getQueryCompleteness() * ( 1 - fieldCompletenessImportance) + fieldCompletenessImportance*getFieldCompleteness(); + } + + /** Returns how well the order of the terms agreed in segments: 1-outOfOrder/pairs */ + public float getOrderness() { + if (pairs ==0) return 1f; + return 1-(float)outOfOrder/pairs; + } + + /** Returns the degree to which different terms are related (occurring in the same segment): 1-segments/(matches-1) */ + public float getRelatedness() { + if (matches==0) return 0; + if (matches==1) return 1; + return 1-(float)(segments-1)/(matches-1); + } + + /** Returns longestSequence/matches */ + public float getLongestSequenceRatio() { + if (matches==0) return 0; + return (float)longestSequence/matches; + } + + /** Returns the closeness of the segments in the field: 1-segmentDistance/fieldLength */ + public float getSegmentProximity() { + if (matches==0) return 0; + return 1-(float)segmentDistance/source.getField().terms().size(); + } + + /** + * Returns a value which is close to 1 when matched terms are close and close to zero when they are far apart + * in the segment. Relatively more connected terms influence this value more. + * This is absoluteProximity/average connectedness. + */ + public float getProximity() { + float totalConnectedness=0; + for (int i=1; i1) + averageConnectedness=totalConnectedness/(queryLength-1); + return getAbsoluteProximity()/averageConnectedness; + } + + /** + *

Returns the average of significance and weight.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of + * normalized rank features for each field multiplied by this number for the same field will produce a + * normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ */ + public float getImportance() { + return (getSignificance() + getWeight()) / 2; + } + + /** A normalized measure of how early the first segment occurs in this field: 1-head/(max(6,field.length)-1) */ + public float getEarliness() { + if (matches == 0) return 0; // Covers field.length==0 too + if (source.getField().terms().size() == 1) return 1; + return 1 - (float)head/(max(6, source.getField().terms().size()) - 1); + } + + /** + *

A ready-to-use aggregate match score. Use this if you don't have time to find a better application specific + * aggregate score of the fine grained match metrics.

+ * + *

The current formula is + * + * + * ( proximityCompletenessImportance * (1-relatednessImportance + relatednessImportance*relatedness) + * proximity * exactness * completeness^2 + earlinessImportance * earliness + segmentProximityImportance * segmentProximity ) + * / (proximityCompletenessImportance + earlinessImportance + relatednessImportance) + * + * but this is subject to change (i.e improvement) at any time. + *

+ * + * + *

Weight and significance are not taken into account because this is mean to capture tha quality of the + * match in this field, while those measures relate this match to matches in other fields. This number + * can be multiplied with those values when combining with other field match scores.

+ */ + public float getMatch() { + float proximityCompletenessImportance = source.getParameters().getProximityCompletenessImportance(); + float earlinessImportance = source.getParameters().getEarlinessImportance(); + float relatednessImportance = source.getParameters().getRelatednessImportance(); + float segmentProximityImportance = source.getParameters().getSegmentProximityImportance(); + float occurrenceImportance = source.getParameters().getOccurrenceImportance(); + float scaledRelatedness = 1 - relatednessImportance + relatednessImportance*getRelatedness(); + + return ( proximityCompletenessImportance * scaledRelatedness * getProximity() * getExactness() * getCompleteness() * getCompleteness() + + earlinessImportance * getEarliness() + + segmentProximityImportance * getSegmentProximity() + + occurrenceImportance * getOccurrence()) + / (proximityCompletenessImportance + earlinessImportance + segmentProximityImportance + occurrenceImportance); + } + + /** + *

The metric use to select the best segments during execution of the string match metric algorithm.

+ * + *

This metric, and any metric it depends on, must be correct each time a segment is completed, + * not only when the metrics are complete, because this metric is used to choose segments during calculation.

+ */ + float getSegmentationScore() { + if (segments==0) return 0; + return getAbsoluteProximity() * getExactness() / (segments * segments); + } + + // Events emitted from the computer while matching strings ---------------------------------------------------- + // Note that one move in the computer may cause multiple events + + // Events on single positions ---------- + + /** Called once for every match */ + void onMatch(int i, int j) { + if (matches>=source.getField().terms().size()) return; + matches++; + weight += (float)source.getQuery().getTerms()[i].getWeight() / source.getQuery().getTotalTermWeight(); + significance += source.getQuery().getTerms()[i].getSignificance() / source.getQuery().getTotalSignificance(); + int queryTermWeight = source.getQuery().getTerms()[i].getWeight(); + weightedExactnessSum += queryTermWeight * source.getQuery().getTerms()[i].getExactness() * source.getField().terms().get(j).exactness(); + weightSum += queryTermWeight; + } + + /** Called once per sequence, when the sequence starts */ + void onSequenceStart(int j) { + if (head==-1 || j longestSequence) + longestSequence = currentSequence; + currentSequence = 0; + } + + /** Called once when this value is calculated, before onComplete */ + void setOccurrence(float occurrence) { this.occurrence=occurrence; } + + /** Called once when this value is calculated, before onComplete */ + void setWeightedOccurrence(float weightedOccurrence) { this.weightedOccurrence=weightedOccurrence; } + + /** Called once when this value is calculated, before onComplete */ + void setAbsoluteOccurrence(float absoluteOccurrence) { this.absoluteOccurrence=absoluteOccurrence; } + + /** Called once when this value is calculated, before onComplete */ + void setWeightedAbsoluteOccurrence(float weightedAbsoluteOccurrence) { this.weightedAbsoluteOccurrence=weightedAbsoluteOccurrence; } + + /** Called once when this value is calculated, before onComplete */ + void setSignificantOccurrence(float significantOccurrence) { this.significantOccurrence =significantOccurrence; } + + /** Called once when matching is complete */ + void onComplete() { + // segment distance - calculated from sorted segment starts + if (segmentStarts.size()<=1) { + segmentDistance=0; + } + else { + Collections.sort(segmentStarts); + for (int i=1; iany pair is encountered */ + void onPair(int i, int j, int previousJ) { + int distance = j-previousJ-1; + if (distance < 0) distance++; // Discontinuity where the two terms are in the same position + if (abs(distance) > source.getParameters().getProximityLimit()) return; // Contribution=0 + + // We have an in-segment pair + float pairProximity = source.getParameters().getProximity(distance + source.getParameters().getProximityLimit()); + + unweightedProximity += pairProximity; + + float connectedness = source.getQuery().getTerms()[i].getConnectedness(); + proximity += pow(pairProximity, connectedness/0.1) * max(0.1, connectedness); + + pairs++; + } + + /** Called when an in-sequence pair is encountered */ + void onInSequence(int i, int j, int previousJ) { + currentSequence++; + } + + /** Called when a gap (within a sequence) is encountered */ + void onInSegmentGap(int i, int j, int previousJ) { + gaps++; + if (j>previousJ) { + gapLength+=abs(j-previousJ)-1; // gap length may be 0 if the gap was in the query + } + else { + outOfOrder++; + gapLength+=abs(j-previousJ); + } + } + + /** + * Called when a new segment is started + * + * @param previousJ the end of the previous segment, or -1 if this is the first segment + * */ + void onNewSegment(int i, int j, int previousJ) { + segments++; + segmentStarts.add(j); + } + + @Override + public FieldMatchMetrics clone() { + try { + FieldMatchMetrics clone=(FieldMatchMetrics)super.clone(); + clone.segmentStarts=new ArrayList<>(segmentStarts); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error",e); + } + } + + @Override + public String toString() { + return "Metrics: [match: " + getMatch() + "]"; + } + + public String toStringDump() { + try { + StringBuilder b=new StringBuilder(); + for (Method m : this.getClass().getDeclaredMethods()) { + if ( ! m.getName().startsWith("get")) continue; + if (m.getReturnType()!=Integer.TYPE && m.getReturnType()!=Float.TYPE) continue; + if ( m.getParameterTypes().length!=0 ) continue; + + Object value=m.invoke(this,new Object[0]); + b.append(m.getName().substring(3,4).toLowerCase() + m.getName().substring(4) + ": " + value + "\n"); + } + return b.toString(); + } + catch (Exception e) { + throw new RuntimeException("Programming error",e); + } + } + + /** Returns the trace of this computation. This is empty (never null) if tracing is off */ + public Trace trace() { return trace; } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java new file mode 100644 index 00000000000..3fc3780151a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsComputer.java @@ -0,0 +1,433 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + *

Calculates a set of metrics capturing information about the degree of agreement between a query + * and a field string. This algorithm attempts to capture the property of text that very close tokens + * are usually part of the same semantic structure, while tokens farther apart are much more loosely related. + * The algorithm will locate alternative such regions containing multiple query tokens (segments), do a more + * detailed analysis of these segments and choose the ones producing the best overall set of match metrics + * (subject to certain resource constraints).

+ * + *

Such segments are found by looking at query terms in sequence from + * left to right and finding matches in the field. All alternative segment start points are explored, and the + * segmentation achieving the best overall string match metric score is preferred. Dynamic programming + * is used to avoid redoing work on segmentations.

+ * + *

When a segment start point is found, subsequent tokens from the query are searched in the field + * from this starting point in "semantic order". This search order can be defined independently of the + * algorithm. The current order searches proximityLimit tokens ahead first, then the same distance backwards + * (so if you need to go two steps backwards in the field from the segment starting point, the real distance is -2, + * but the "semantic distance" is proximityLimit+2).

+ * + *

The actual metrics are calculated during execution of this algorithm by the {@link FieldMatchMetrics} class, + * by receiving events emitted from the algorithm. Any set of metrics derivable from these events are computable using + * this algorithm.

+ * + *

Terminology: + *

    + *
  • Sequence - A set of adjacent matched tokens in the field + *
  • Segment - A field area containing matches to a continuous section of the query + *
  • Gap - A chunk of adjacent tokens inside a segment separating two matched characters + *
  • Semantic distance - A non-continuous distance between tokens in j, where the non-continuousness is + * mean to capture the semantic similarity between the query and those tokens. + *
+ * + *

Notation: A position index in the query is denoted i. A position index in the field is + * denoted j.

+ * + *

This class is not multithread safe, but is reusable across queries for a single thread.

+ * + * @author Jon Bratseth + */ +public final class FieldMatchMetricsComputer { + + private Query query; + + private Field field; + + private final FieldMatchMetricsParameters parameters; + + /** The metrics of the currently explored segmentation */ + private FieldMatchMetrics metrics; + + /** + * Known segment starting points. The array is 0..i, one element per starting point query item i, + * and a last element representing the entire query. + */ + private List segmentStartPoints=new ArrayList<>(); + + /** True to collect trace */ + private boolean collectTrace; + + private int alternativeSegmentationsTried=0; + + /** Creates a feature computer using default settings */ + public FieldMatchMetricsComputer() { + this(FieldMatchMetricsParameters.defaultParameters()); + } + + /** + * Creates a feature computer with the given parameters. + * The parameters are frozen if they were not already, this may cause + * validation exceptions to be thrown from this. + */ + public FieldMatchMetricsComputer(FieldMatchMetricsParameters parameters) { + this.parameters = parameters; + } + + /** Computes the string match metrics from a query and field string. */ + public FieldMatchMetrics compute(String queryString,String fieldString) { + return compute(new Query(queryString), fieldString); + } + + /** Computes the string match metrics from a query and field string. */ + public FieldMatchMetrics compute(Query query, String fieldString) { + return compute(query,fieldString,false); + } + + /** + * Computes the string match metrics from a query and field string. + * + * @param query the query to compute over + * @param fieldString the field value to compute over - tokenized by splitting on space + * @param collectTrace true to accumulate trace information in the trace returned with the metrics + */ + public FieldMatchMetrics compute(Query query, String fieldString, boolean collectTrace) { + return compute(query, new Field(fieldString), collectTrace); + } + + /** + * Computes the string match metrics from a query and field. + * + * @param query the query to compute over + * @param field the field value to compute over + * @param collectTrace true to accumulate trace information in the trace returned with the metrics + */ + public FieldMatchMetrics compute(Query query, Field field, boolean collectTrace) { + // 1. Reset state + this.collectTrace = collectTrace; + this.query = query; + this.field = field; + segmentStartPoints.clear(); + for (int i = 0; i <= query.getTerms().length; i++) + segmentStartPoints.add(null); + alternativeSegmentationsTried = 0; + metrics = new FieldMatchMetrics(this); + + // 2. Compute + exploreSegments(); + return metrics; + } + + /** Finds segment candidates and explores them until we have the best segmentation history of the entire query */ + private void exploreSegments() { + if (collectTrace) + metrics.trace().add("Calculating matches for\n " + query + "\n " + field + "\n"); + + // Create an initial start point + SegmentStartPoint segmentStartPoint=new SegmentStartPoint(metrics,this); + segmentStartPoints.set(0,segmentStartPoint); + + // Explore segmentations + while (segmentStartPoint!=null) { + metrics =segmentStartPoint.getMetrics().clone(); + if (collectTrace) + metrics.trace().add("\nLooking for segment from " + segmentStartPoint + "..." + "\n"); + boolean found=findAlternativeSegmentFrom(segmentStartPoint); + if (collectTrace) + metrics.trace().add(found ? "...found segment: " + metrics.getSegmentStarts() + " score: " + + metrics.getSegmentationScore() : "...no complete and improved segment existed" + "\n"); + if (!found) + segmentStartPoint.setOpen(false); + segmentStartPoint=findOpenSegment(segmentStartPoint.getI()); + } + + metrics=findLastStartPoint().getMetrics(); // these metrics are the final set + setOccurrenceCounts(metrics); + metrics.onComplete(); + metrics.setComplete(true); + } + + /** + * Find correspondences from a segment starting point + * + * @return true if a segment was found, false if none could be found + */ + private boolean findAlternativeSegmentFrom(SegmentStartPoint segmentStartPoint) { + // i: index into the query + // j: index into the field + int semanticDistanceExplored=segmentStartPoint.getSemanticDistanceExplored(); + int previousI=-1; + int previousJ=segmentStartPoint.getPreviousJ(); + boolean hasOpenSequence=false; + boolean isFirst=true; + + for (int i=segmentStartPoint.getStartI(); i0 && isFirst) { + return false; // Segment explored before, and no more matches found + } + + if ( hasOpenSequence && ( j==-1 || j!=previousJ+1 ) ) { + metrics.onSequenceEnd(previousJ); + hasOpenSequence=false; + } + + if (isFirst) { + if (j!=-1) { + segmentStart(i,j,isFirst ? -1 : previousJ); + segmentStartPoint.exploredTo(j); + isFirst=false; + } + else { + segmentStartPoint.incrementStartI(); // Remember that there are no matches for this i + } + } + else { + if (Math.abs(j-previousJ) >= parameters.getProximityLimit()) { + segmentEnd(i-1,previousJ); + return true; + } + else if (j!=-1) { + inSegment(i,j,previousJ,previousI); + } + } + + if (j!=-1) + metrics.onMatch(i,j); + + if (j!=-1 && !hasOpenSequence) { + metrics.onSequenceStart(j); + hasOpenSequence=true; + } + + if (j!=-1) + semanticDistanceExplored=1; // Skip the current match when looking for the next + else + semanticDistanceExplored=0; + + if (j>=0) { + previousI=i; + previousJ=j; + } + } + + if (hasOpenSequence) + metrics.onSequenceEnd(previousJ); + + if (!isFirst) { + segmentEnd(query.getTerms().length-1,previousJ); + return true; + } + else { + return false; + } + } + + /** + * Implements the preferred search order for finding a match to a query item - first + * looking close in the right order, then close in the reverse order, then far in the right order + * and lastly far in the reverse order. + * + * @param startSemanticDistance is the semantic distance we must be larger than or equal to + * @return the semantic distance of the next mathing j larger than startSemanticDistance, or -1 if + * there are no matches larger than startSemanticDistance + */ + private int findClosestInFieldBySemanticDistance(int i,int previousJ,int startSemanticDistance) { + String term=query.getTerms()[i].getTerm(); + for (int distance=startSemanticDistance; distance= zeroJ) { + if ( (j - zeroJ) < firstSegmentLength ) + return j - zeroJ; // 0..limit + else + return j - zeroJ+secondSegmentLength; // limit*2..field.length-zeroJ + } + else { + if ( (zeroJ - j - 1) < secondSegmentLength ) + return zeroJ - j + firstSegmentLength-1; // limit..limit*2 + else + return (zeroJ - j - 1) + field.terms().size() - zeroJ; // field.length-zeroJ.. + } + + } + + private void inSegment(int i, int j, int previousJ, int previousI) { + metrics.onPair(i, j, previousJ); + if (j==previousJ+1 && i==previousI+1) { + metrics.onInSequence(i, j, previousJ); + } + else { + metrics.onInSegmentGap(i, j, previousJ); + if (collectTrace) + metrics.trace().add(" in segment gap: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n"); + } + } + + /** Returns whether this segment was accepted as a starting point */ + private boolean segmentStart(int i,int j,int previousJ) { + metrics.onNewSegment(i, j, previousJ); + + if (previousJ>=0) + metrics.onPair(i,j,previousJ); + + if (collectTrace) + metrics.trace().add(" new segment at: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n"); + return true; + } + + /** + * Registers an end of a segment + * + * @param i the i at which this segment ends + * @param j the j at which this segment ends + */ + private void segmentEnd(int i,int j) { + if (collectTrace) + metrics.trace().add(" segment ended at: " + i + "->" + j + " (" + query.getTerms()[i] + ")" + "\n"); + SegmentStartPoint startOfNext=segmentStartPoints.get(i + 1); + if (startOfNext==null) + segmentStartPoints.set(i+1,new SegmentStartPoint(i+1,j, metrics,this)); + else + startOfNext.offerHistory(j, metrics, collectTrace); + } + + /** Returns the next open segment to explore, or null if no more segments exists or should be explored */ + private SegmentStartPoint findOpenSegment(int startI) { + for (int i=startI; i=parameters.getMaxAlternativeSegmentations()) continue; + alternativeSegmentationsTried++; + return startPoint; + } + + return null; + } + + private SegmentStartPoint findLastStartPoint() { + for (int i=segmentStartPoints.size()-1; i>=0; i--) { + SegmentStartPoint startPoint=segmentStartPoints.get(i); + if (startPoint!=null) + return startPoint; + } + return null; // Impossible + } + + /** Counts all occurrences of terms of the query in the field and set those metrics */ + private void setOccurrenceCounts(FieldMatchMetrics metrics) { + Set uniqueQueryTerms=new HashSet<>(); + for (QueryTerm queryTerm : query.getTerms()) + uniqueQueryTerms.add(queryTerm); + + List weightedOccurrences=new ArrayList(); + List significantOccurrences=new ArrayList(); + + int divider = Math.min(field.terms().size(),parameters.getMaxOccurrences()*uniqueQueryTerms.size()); + int maxOccurence = Math.min(field.terms().size(),parameters.getMaxOccurrences()); + + float occurrence=0; + float absoluteOccurrence=0; + float weightedAbsoluteOccurrence=0; + int totalWeight=0; + float totalWeightedOccurrences=0; + float totalSignificantOccurrences=0; + + for (QueryTerm queryTerm : uniqueQueryTerms) { + int termOccurrences=0; + for (Field.Term fieldTerm : field.terms()) { + if (fieldTerm.value().equals(queryTerm.getTerm())) + termOccurrences++; + if (termOccurrences == parameters.getMaxOccurrences()) break; + } + occurrence+=(float)termOccurrences/divider; + + absoluteOccurrence+=(float)termOccurrences/(parameters.getMaxOccurrences()*uniqueQueryTerms.size()); + + weightedAbsoluteOccurrence+=(float)termOccurrences*queryTerm.getWeight()/parameters.getMaxOccurrences(); + totalWeight+=queryTerm.getWeight(); + + totalWeightedOccurrences+=(float)maxOccurence*queryTerm.getWeight()/divider; + weightedOccurrences.add((float)termOccurrences*queryTerm.getWeight()/divider); + + totalSignificantOccurrences+=(float)maxOccurence*queryTerm.getSignificance()/divider; + significantOccurrences.add((float)termOccurrences*queryTerm.getSignificance()/divider); + } + + float weightedOccurrenceSum=0; + for (float weightedOccurence : weightedOccurrences) + weightedOccurrenceSum+=weightedOccurence/totalWeightedOccurrences; + + float significantOccurrenceSum=0; + for (float significantOccurence : significantOccurrences) + significantOccurrenceSum+=significantOccurence/totalSignificantOccurrences; + + if (totalWeight>0) + weightedAbsoluteOccurrence=weightedAbsoluteOccurrence/totalWeight; + + metrics.setOccurrence(occurrence); + metrics.setAbsoluteOccurrence(absoluteOccurrence); + metrics.setWeightedOccurrence(weightedOccurrenceSum); + metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence); + metrics.setSignificantOccurrence(significantOccurrenceSum); + } + + /** Returns the parameter settings of this */ + public FieldMatchMetricsParameters getParameters() { return parameters; } + + Query getQuery() { return query; } + + Field getField() { return field; } + + @Override + public String toString() { + return query + "\n" + field + "\n" + metrics + "\n"; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java new file mode 100644 index 00000000000..4ab8565a285 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/FieldMatchMetricsParameters.java @@ -0,0 +1,198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +/** + * The parameters to a string match metric calculator. + * Mutable until frozen. + * + * @author Jon Bratseth + */ +public final class FieldMatchMetricsParameters { + + private boolean frozen=false; + + private int proximityLimit=10; + + private int maxAlternativeSegmentations = 10000; + + private int maxOccurrences=100; + + private float proximityCompletenessImportance =0.9f; + + private float relatednessImportance =0.9f; + + private float earlinessImportance =0.05f; + + private float segmentProximityImportance =0.05f; + + private float occurrenceImportance =0.05f; + + private float fieldCompletenessImportance =0.05f; + + private float[] proximityTable= new float[] { 0.01f, 0.02f, 0.03f, 0.04f, 0.06f, 0.08f, 0.12f, 0.17f, 0.24f, 0.33f, 1, + 0.71f, 0.50f, 0.35f, 0.25f, 0.18f, 0.13f, 0.09f, 0.06f, 0.04f, 0.03f }; + + /* Calculation of the table above: + static { + System.out.println("Right order"); + for (float i=0; i<=10; i++) + System.out.println(1/Math.pow(2,i/2)); + + System.out.println("Reverse order"); + for (float i=0; i<=10; i++) + System.out.println(1/Math.pow(2,i/2)/3); + } + */ + + private static FieldMatchMetricsParameters defaultParameters; + + static { + defaultParameters=new FieldMatchMetricsParameters(); + defaultParameters.freeze(); + } + + /** Returns the frozen default parameters */ + public static FieldMatchMetricsParameters defaultParameters() { + return defaultParameters; + } + + /** Creates an unfrozen marcg metrics object initialized to the default values */ + public FieldMatchMetricsParameters() { } + + /** Sets the maximum allowed gap within a segment. Default: 10 */ + public void setProximityLimit(int proximityLimit) { + ensureNotFrozen(); + this.proximityLimit=proximityLimit; + } + + /** Returns the maximum allowed gap within a segment. Default: 10 */ + public int getProximityLimit() { return proximityLimit; } + + /** + * Sets the proximity table deciding the importance of separations of various distances, + * The table must have size proximityLimit*2+1, where the first half is for reverse direction + * distances. The table must only contain values between 0 and 1, where 1 is "perfect" and 0 is "worst". + */ + public void setProximityTable(float[] proximityTable) { + ensureNotFrozen(); + this.proximityTable=proximityTable; + } + + /** + * Returns the current proxmity table. + * The default table is calculated by + * 1/2^(n/2) on the right order side, and + * 1/2^(n/2) /3 on the reverse order side + * where n is the distance between the tokens. + */ + public float[] getProximityTable() { return proximityTable; } + + /** Returns the proximity table value at an index */ + public float getProximity(int index) { return proximityTable[index]; } + + /** + * Returns the maximal number of alternative segmentations allowed in addition to the first one found. + * Default is 10000. This will prefer to not consider iterations on segments that are far out in the field, + * and which starts late in the query. + */ + public int getMaxAlternativeSegmentations() { return maxAlternativeSegmentations; } + + public void setMaxAlternativeSegmentations(int maxAlternativeSegmentations) { + ensureNotFrozen(); + this.maxAlternativeSegmentations = maxAlternativeSegmentations; + } + + /** + * Returns the number of occurrences the number of occurrences of each word is normalized against. + * This should be set as the number above which additional occurrences of the term has no real significance. + * The default is 100. + */ + public int getMaxOccurrences() { return maxOccurrences; } + + public void setMaxOccurrences(int maxOccurrences) { this.maxOccurrences=maxOccurrences; } + + /** + * Returns a number between 0 and 1 which determines the importancy of field completeness in relation to + * query completeness in the match and completeness metrics. Default is 0.05 + */ + public float getFieldCompletenessImportance() { return fieldCompletenessImportance; } + + public void setFieldCompletenessImportance(float fieldCompletenessImportance) { + ensureNotFrozen(); + this.fieldCompletenessImportance = fieldCompletenessImportance; + } + + /** + * Returns the importance of the match having high proximity and being complete, relative to segmentProximityImportance, + * occurrenceImportance and earlinessImportance in the match metric. Default: 0.9 + */ + public float getProximityCompletenessImportance() { return proximityCompletenessImportance; } + + public void setProximityCompletenessImportance(float proximityCompletenessImportance) { + ensureNotFrozen(); + this.proximityCompletenessImportance = proximityCompletenessImportance; + } + + /** + * Returns the importance of the match occuring early in the query, relative to segmentProximityImportance, + * occurrenceImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + */ + public float getEarlinessImportance() { return earlinessImportance; } + + public void setEarlinessImportance(float earlinessImportance) { + ensureNotFrozen(); + this.earlinessImportance = earlinessImportance; + } + + /** + * Returns the importance of multiple segments being close to each other, relative to earlinessImportance, + * occurrenceImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + */ + public float getSegmentProximityImportance() { return segmentProximityImportance; } + + public void setSegmentProximityImportance(float segmentProximityImportance) { + ensureNotFrozen(); + this.segmentProximityImportance = segmentProximityImportance; + } + + /** + * Returns the importance of having many occurrences of the query terms, relative to earlinessImportance, + * segmentProximityImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + */ + public float getOccurrenceImportance() { return occurrenceImportance; } + + public void setOccurrenceImportance(float occurrenceImportance) { + ensureNotFrozen(); + this.occurrenceImportance = occurrenceImportance; + } + + /** Returns the normalized importance of relatedness used in the match metric. Default: 0.9 */ + public float getRelatednessImportance() { return relatednessImportance; } + + public void setRelatednessImportance(float relatednessImportance) { + ensureNotFrozen(); + this.relatednessImportance = relatednessImportance; + } + + + /** Throws IllegalStateException if this is frozen. Does nothing otherwise */ + private void ensureNotFrozen() { + if (frozen) + throw new IllegalStateException(this + " is frozen"); + } + + /** + * Freezes this object. All changes after this point will cause an IllegalStateException. + * This must be frozen before being handed to a calculator. + * + * @throws IllegalStateException if this parameter object is inconsistent. In this case, this is not frozen. + */ + public void freeze() { + if (proximityTable.length!=proximityLimit*2+1) + throw new IllegalStateException("Proximity table length is " + proximityTable.length + ". Must be " + + (proximityLimit*2+1) + + " (proximityLimit*2+1), because the proximity limit is " + proximityLimit); + frozen=true; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java new file mode 100644 index 00000000000..f101448a3dd --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Main.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +/** + * Helper for computing metrics from the command line. + */ +public class Main { + + public static void main(String[] args) { + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(); + String query=getQuery(args); + String field=getField(args); + if (query==null || field==null) { + printUsage(); + return; + } + + FieldMatchMetrics metrics = c.compute(query,field); + System.out.println(metrics.toStringDump()); + } + + private static String getQuery(String[] args) { + if (args.length<1) return null; + if (args[0].equals("-h") || args[0].equals("-help")) return null; + return args[0]; + } + + private static String getField(String[] args) { + if (args.length<2) return null; + return args[1]; + } + + private static void printUsage() { + System.out.println("Computes the string segment match metrics of a query and field."); + System.out.println("Usage: java -jar searchlib.jar query field"); + System.out.println("By: Jon Bratseth (bratseth@yahoo-inc.com)"); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java new file mode 100644 index 00000000000..6cd9d651a09 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Query.java @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import com.yahoo.searchlib.ranking.features.fieldmatch.QueryTerm; + +import java.util.Arrays; + +/** + * A query: An array of the QueryTerms which searches the field we are calculating for, + *

+ * In addition the sum of the term weights of all the query terms can be set + * explicitly. This allows us to model the matchWeight rank feature of a field as dependent of + * the weights of all the terms in the query. + * + * @author Jon Bratseth + */ +public class Query { + + private QueryTerm[] terms; + + private int totalTermWeight=0; + + private float totalSignificance=0; + + public Query(String query) { + this(splitQuery(query)); + } + + /** Creates a query with a list of query terms. The query terms are not, and must not be subsequently modified */ + public Query(QueryTerm[] terms) { + this.terms=terms; + + for (QueryTerm term : terms) { + totalTermWeight+=term.getWeight(); + totalSignificance+=term.getSignificance(); + } + } + + private static QueryTerm[] splitQuery(String queryString) { + String[] queryTerms=queryString.split(" "); + QueryTerm[] query=new QueryTerm[queryTerms.length]; + for (int i=0; iJon Bratseth + */ +public final class QueryTerm { + + private String term; + + private float connectedness = 0.1f; + + private int weight = 100; + + private float significance = 0.1f; + + private float exactness = 1.0f; + + public QueryTerm(String term) { + this.term=term; + } + + public QueryTerm(String term,float connectedness) { + this.term=term; + this.connectedness=connectedness; + } + + public void setTerm(String term) { this.term=term; } + + public String getTerm() { return term; } + + /** + * Returns how connected this term is to the previous term in the query. + * Default: 0.1. This is always a number between 0 (not connected at all) and 1 (virtually inseparable) + */ + public float getConnectedness() { return connectedness; } + + public void setConnectedness(float connectedness) { this.connectedness=connectedness; } + + public void setWeight(int weight) { this.weight=weight; } + + public int getWeight() { return weight; } + + /** The significance of this term: 1-term frequency */ + public void setSignificance(float significance) { this.significance=significance; } + + public float getSignificance() { return significance; } + + /** The degree to which this is exactly the term the user specified (1), or a stemmed form (closer to 0) */ + public float getExactness() { return exactness; } + + public @Override int hashCode() { return term.hashCode(); } + + public @Override boolean equals(Object object) { + if (! (object instanceof QueryTerm)) return false; + + return this.term.equals(((QueryTerm)object).term); + } + + public @Override String toString() { + if (connectedness==0.1f) return term; + return connectedness + ":" + term; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java new file mode 100644 index 00000000000..9f6e81a04bc --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/SegmentStartPoint.java @@ -0,0 +1,145 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +/** + *

Information on segment start points stored temporarily during string match metric calculation.

+ * + *

Given that we want to start a segment at i, this holdes the best known metrics up to i + * and the end of the previous segment. In addition it holds information on how far we have tried + * to look for alternative segments from this starting point (skipI and previousJ).

+ * + * @author Jon Bratseth + */ +final class SegmentStartPoint { + + private FieldMatchMetricsComputer owner; + + /** The i for which this is the possible segment starting points */ + private int i; + + private int skipI; + + /** The best known metrics up to this starting point */ + private FieldMatchMetrics metrics; + + /** The j ending the previous segmentation producing those best metrics */ + private int previousJ; + + /** The semantic distance from the current previousJ which is already explored */ + private int semanticDistanceExplored=0; + + /** There are possibly more j's to try at this starting point */ + boolean open=true; + + /** Creates a segment start point for the first segment */ + public SegmentStartPoint(FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) { + this.i=0; + this.previousJ=0; + this.metrics=metrics; + this.owner=owner; + this.semanticDistanceExplored=0; + } + + /** Creates a segment start point for any i position where the j is not known */ + public SegmentStartPoint(int i,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) { + this.i=i; + this.previousJ=previousJ; + this.metrics=metrics; + this.owner=owner; + this.semanticDistanceExplored=0; + } + + /** Creates a segment start point for any position, where the j of the start point is known */ + public SegmentStartPoint(int i,int j,int previousJ,FieldMatchMetrics metrics, FieldMatchMetricsComputer owner) { + this.i=i; + this.previousJ=previousJ; + this.metrics=metrics; + this.owner=owner; + this.semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1; + } + + /** Returns the current best metrics for this starting point */ + public FieldMatchMetrics getMetrics() { return metrics; } + + /** + * Stores that we have explored to a certain j from the current previousJ. + */ + public void exploredTo(int j) { + semanticDistanceExplored=owner.fieldIndexToSemanticDistance(j,previousJ)+1; + } + + /** + * Offers an alternative history leading up to this point, which is accepted and stored if it is + * better than the current history + */ + public void offerHistory(int offeredPreviousJ,FieldMatchMetrics offeredMetrics,boolean collectTrace) { + if (offeredMetrics.getSegmentationScore()<=metrics.getSegmentationScore()) { + if (collectTrace) + offeredMetrics.trace().add(" rejected offered history [match: " + offeredMetrics.getSegmentationScore() + + " ending at:" + previousJ + "] at " + this + "\n"); + return; // Reject + } + + /* + if (previousJ!=offeredPreviousJ) { // Starting over like this achieves higher correctness if + semanticDistanceExplored=0; // the match metric is dependent on relative distance between segments + open=true; // but is more expensive + } + */ + + if (collectTrace) + offeredMetrics.trace().add(" accepted offered history [match: " + offeredMetrics.getSegmentationScore() + + " ending at:" + previousJ + "] at " + this + "\n"); + + previousJ=offeredPreviousJ; + metrics=offeredMetrics; + } + + /** + * Returns whether there are possibly still unexplored j's for this i + */ + public boolean isOpen() { return open; } + + public void setOpen(boolean open) { this.open=open; } + + /** Returns the i for which this is the possible segment starting points */ + public int getI() { return i; } + + /** + * Returns the j ending the previous segmentation producing those best metrics, + */ + public int getPreviousJ() { return previousJ; } + + /** + * Returns the semantic distance from the previous j which is explored so far, exclusive + * (meaning, if the value is 0, 0 is not explored yet) + */ + public int getSemanticDistanceExplored() { return semanticDistanceExplored; } + + public void setSemanticDistanceExplored(int distance) { this.semanticDistanceExplored=distance; } + + /** + * Returns the position startI we should start at from this start point i. + * startI==i except when there are i's from this starting point which are not found anywhere in + * the field. In that case, startI==i+the number of terms following i which are known not to be present + */ + public int getStartI() { + return i+skipI; + } + + /** + * Increments the startI by one because we have discovered that the term at the current startI is not + * present in the field + */ + public void incrementStartI() { skipI++; } + + public String toString() { + if (i==owner.getQuery().getTerms().length) + return "last segment: Complete match: " + metrics.getMatch() + " previous j: " + previousJ + + " (" + (open ? "open" : "closed") + ")"; + return "segment at " + i + " (" + owner.getQuery().getTerms()[i] + "): Match up to here: " + metrics.getMatch() + " previous j: " + + previousJ + " explored to: " + semanticDistanceExplored + + " (" + (open ? "open" : "closed") + ")"; + } + +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java new file mode 100644 index 00000000000..775c7d1d687 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/Trace.java @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +/** + * A computation trace + * + * @author Jon Bratseth + */ +public class Trace { + + private StringBuilder b = new StringBuilder(); + + public void add(String s) { + b.append(b); + } + + @Override + public String toString() { + return b.toString(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java new file mode 100644 index 00000000000..c16fbb4521e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/fieldmatch/package-info.java @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Reference implementation of the + * string segment match algorithm + * which creates the fieldMatch feature set. + */ +@ExportPackage +@PublicApi +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java new file mode 100644 index 00000000000..028bf3337f0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/ranking/features/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Java implementations for various Vespa rank features + */ +@ExportPackage +@PublicApi +package com.yahoo.searchlib.ranking.features; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java new file mode 100755 index 00000000000..86ac53a1e44 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/ExpressionFunction.java @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; +import com.yahoo.text.Utf8; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.*; + +/** + *

A function defined by a ranking expression

+ * + * @author Simon Thoresen + * @author bratseth + */ +public class ExpressionFunction { + + private final String name; + private final ImmutableList arguments; + private final RankingExpression body; + + /** + *

Constructs a new function

+ * + * @param name the name of this function + * @param arguments its argument names + * @param body the ranking expression that defines this function + */ + public ExpressionFunction(String name, List arguments, RankingExpression body) { + this.name = name; + this.arguments = arguments==null ? ImmutableList.of() : ImmutableList.copyOf(arguments); + this.body = body; + } + + public String getName() { return name; } + + /** Returns an immutable list of the arguments of this */ + public List arguments() { return arguments; } + + public RankingExpression getBody() { return body; } + + /** + *

Create and return an instance of this function based on the given + * arguments. If function calls are nested, this call might produce + * additional scripts.

+ * + * @param context the context used to expand this + * @param arguments the arguments to instantiate on. + * @param path the expansion path leading to this. + * @return the script function instance created. + */ + public Instance expand(SerializationContext context, List arguments, Deque path) { + Map argumentBindings = new HashMap<>(); + for (int i = 0; i < this.arguments.size() && i < arguments.size(); ++i) { + argumentBindings.put(this.arguments.get(i), arguments.get(i).toString(context, path, null)); + } + return new Instance(toSymbol(argumentBindings), body.getRoot().toString(context.createBinding(argumentBindings), path, null)); + } + + /** + * Returns a symbolic string that represents this function with a given + * list of arguments. The arguments are mangled by hashing the string + * representation of the argument expressions, so we might need to revisit + * this if we start seeing collisions. + * + * @param argumentBindings the bound arguments to include in the symbolic name. + * @return the symbolic name for an instance of this function + */ + private String toSymbol(Map argumentBindings) { + if (argumentBindings.isEmpty()) return name; + + StringBuilder ret = new StringBuilder(); + ret.append(name).append("@"); + for (Map.Entry argumentBinding : argumentBindings.entrySet()) { + ret.append(Long.toHexString(symbolCode(argumentBinding.getKey() + "=" + argumentBinding.getValue()))); + ret.append("."); + } + if (ret.toString().endsWith(".")) + ret.setLength(ret.length()-1); + return ret.toString(); + } + + + /** + *

Returns a more unique hash code than what Java's own {@link + * String#hashCode()} method would produce.

+ * + * @param str The string to hash. + * @return A 64 bit long hash code. + */ + private static long symbolCode(String str) { + try { + MessageDigest md = java.security.MessageDigest.getInstance("SHA-1"); + byte[] buf = md.digest(Utf8.toBytes(str)); + if (buf.length >= 8) { + long ret = 0; + for (int i = 0; i < 8; ++i) { + ret = (ret << 8) + (buf[i] & 0xff); + } + return ret; + } + } catch (NoSuchAlgorithmException e) { + throw new Error("java must always support SHA-1 message digest format", e); + } + return str.hashCode(); + } + + @Override + public String toString() { + return name; + } + + /** + * An instance of a serialization of this function, using a particular serialization context (by {@link + * ExpressionFunction#expand}) + */ + public class Instance { + + private final String name; + private final String expressionString; + + public Instance(String name, String expressionString) { + this.name = name; + this.expressionString = expressionString; + } + + public String getName() { + return name; + } + + public String getExpressionString() { + return expressionString; + } + + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java new file mode 100755 index 00000000000..527a908da73 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/FeatureList.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser; +import com.yahoo.searchlib.rankingexpression.parser.TokenMgrError; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; + +import java.io.*; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Encapsulates the production rule 'featureList()' int the RankingExpressionParser. + * + * @author Simon Thoresen + */ +@Beta +public class FeatureList implements Iterable { + + private final List features = new ArrayList<>(); + + /** + * Creates a new feature list by consuming from a reader object. + * + * @param reader The reader that contains the string to parse. + * @throws ParseException Thrown if the string could not be parsed. + */ + public FeatureList(Reader reader) throws ParseException { + features.addAll(parse(reader)); + } + + /** + * Creates a new feature list by parsing a string. + * + * @param list The string to parse. + * @throws ParseException Thrown if the string could not be parsed. + */ + public FeatureList(String list) throws ParseException { + features.addAll(parse(new StringReader(list))); + } + + /** + * Creates a new feature list by reading the content of a file. + * + * @param file The file whose content to parse. + * @throws ParseException Thrown if the string could not be parsed. + * @throws FileNotFoundException Thrown if the file specified could not be found. + */ + public FeatureList(File file) throws ParseException, FileNotFoundException { + features.addAll(parse(new FileReader(file))); + } + + /** + * Parses the content of a reader object as a list of feature nodes. + * + * @param reader A reader object that contains an feature list. + * @return A list of those features named in the string. + * @throws ParseException if the string could not be parsed. + */ + private static List parse(Reader reader) throws ParseException { + List lst; + try { + lst = new RankingExpressionParser(reader).featureList(); + } + catch (TokenMgrError e) { + ParseException t = new ParseException(); + throw (ParseException)t.initCause(e); + } + List ret = new ArrayList(lst.size()); + for (Object obj : lst) { + if (!(obj instanceof ReferenceNode)) { + throw new IllegalStateException("Feature list contains a " + obj.getClass().getName() + "."); + } + ret.add((ReferenceNode)obj); + } + return ret; + } + + /** + * Returns the number of features in this list. + * + * @return The size. + */ + public int size() { + return features.size(); + } + + /** + * Returns the feature at the given index. + * + * @param i The index of the feature to return. + * @return The featuer at the given index. + */ + public ReferenceNode get(int i) { + return features.get(i); + } + + @Override + public int hashCode() { + int ret = 0; + for (ReferenceNode node : features) { + ret += node.hashCode() * 17; + } + return ret; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof FeatureList)) { + return false; + } + FeatureList lst = (FeatureList)obj; + if (features.size() != lst.features.size()) { + return false; + } + for (int i = 0; i < features.size(); ++i) { + if (!features.get(i).equals(lst.features.get(i))) { + return false; + } + } + return true; + } + + @Override + public String toString() { + StringBuilder ret = new StringBuilder(); + for (ReferenceNode node : this) { + ret.append(node).append(" "); + } + return ret.toString(); + } + + @Override + public Iterator iterator() { + return features.iterator(); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java new file mode 100755 index 00000000000..e17d524e906 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/RankingExpression.java @@ -0,0 +1,250 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser; +import com.yahoo.searchlib.rankingexpression.parser.TokenMgrError; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; + +import java.io.*; +import java.util.*; + +/** + *

A ranking expression. Ranking expressions are used to calculate a rank score for a searched instance from a set of + * rank features.

+ * + *

A ranking expression wraps a expression node tree and may also optionally have a name.

+ * + *

The identity of a ranking expression is decided by both its name and expression tree. Two expressions which + * looks the same in string form are the same.

+ * + *

Simple usage

+

+try {
+    MapContext context=new MapContext();
+    context.put("one",1d);
+    RankingExpression expression=new RankingExpression("10*if(i>35,if(i>one,if(i>=670,4,8),if(i>8000,5,3)),if(i==478,90,91))");
+    double result=expression.evaluate(context);
+   }
+catch (ParseException e) {
+    throw new RuntimeException(e);
+}
+
+ * + *

Or, usage optimized for repeated evaluation of the same expression

+

+// Members in a class living across multiple evaluations
+RankingExpression expression;
+ArrayContext contextPrototype;
+
+...
+
+// Initialization of the above members (once)
+// Create reusable, gbdt optimized expression and context.
+// The expression is multithread-safe while the context created is not
+try {
+    RankingExpression expression=new RankingExpression("10*if(i>35,if(i>one,if(i>=670,4,8),if(i>8000,5,3)),if(i==478,90,91))");
+    ArrayContext contextPrototype=new ArrayContext(expression);
+    ExpressionOptimizer optimizer=new ExpressionOptimizer(); // Increases evaluation speed of gbdt form expressions by 3-4x
+    OptimizationReport triviaAboutTheOptimization=optimizer.optimize(expression,contextPrototype);
+}
+catch (ParseException e) {
+    throw new RuntimeException(e);
+}
+
+...
+
+// Execution (many)
+context=contextPrototype.clone(); // If evaluation is multithreaded - skip this if execution is single-threaded
+context.put("one",1d);
+double result=expression.evaluate(context);
+
+ * + * @author Simon Thoresen + * @author bratseth + */ +public class RankingExpression implements Serializable { + + private String name = ""; + private ExpressionNode root; + + /** + * Creates a new ranking expression by consuming from the reader + * + * @param reader the reader that contains the string to parse. + * @throws ParseException if the string could not be parsed. + */ + public RankingExpression(Reader reader) throws ParseException { + root = parse(reader); + } + + /** + * Creates a ranking expression from a string + * + * @param expression The reader that contains the string to parse. + * @throws ParseException if the string could not be parsed. + */ + public RankingExpression(String expression) throws ParseException { + try { + if (expression == null || expression.length() == 0) { + throw new IllegalArgumentException("Empty ranking expressions are not allowed"); + } + root = parse(new StringReader(expression)); + } + catch (ParseException e) { + ParseException p = new ParseException("Could not parse '" + expression + "'"); + p.initCause(e); + throw p; + } + } + + /** + * Creates a ranking expression from a file. For convenience, the file.getName() up to any dot becomes the name of + * this expression. + * + * @param file the name of the file whose content to parse. + * @throws ParseException if the string could not be parsed. + * @throws IllegalArgumentException if the file could not be found + */ + public RankingExpression(File file) throws ParseException { + try { + name = file.getName().split("\\.")[0]; + root = parse(new FileReader(file)); + } + catch (FileNotFoundException e) { + throw new IllegalArgumentException("Could not create a ranking expression", e); + } + } + + /** + * Creates a named ranking expression from an expression root node. + */ + public RankingExpression(String name, ExpressionNode root) { + this.name = name; + this.root = root; + } + + /** + * Creates a ranking expression from an expression root node. + * + * @param root The root node. + */ + public RankingExpression(ExpressionNode root) { + this.root = root; + } + + /** + * Parses the content of the reader object as an expression string. + * + * @param reader A reader object that contains an expression string. + * @return An expression node that corresponds to the given string. + * @throws ParseException if the string could not be parsed. + */ + private static ExpressionNode parse(Reader reader) throws ParseException { + try { + return new RankingExpressionParser(reader).rankingExpression(); + } + catch (TokenMgrError e) { + throw new ParseException(e.getMessage()); + } + } + + /** + * Returns the name of this ranking expression, or "" if no name is set. + * + * @return The name of this expression. + */ + public String getName() { + return name; + } + + /** + * Sets the name of this ranking expression. + * + * @param name The name to set. + */ + public void setName(String name) { + this.name = name; + } + + /** + * Returns the root of the expression tree of this expression. + * + * @return The root node. + */ + public ExpressionNode getRoot() { + return root; + } + + /** + * Sets the root of the expression tree of this expression. + * + * @param root The root node to set. + */ + public void setRoot(ExpressionNode root) { + this.root = root; + } + + @Override + public int hashCode() { + return toString().hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof RankingExpression && toString().equals(obj.toString()); + } + + @Override + public String toString() { + if ("".equals(name)) { + return root.toString(); + } else { + return name + ": " + root.toString(); + } + } + + /** + * Creates the necessary rank properties required to implement this expression. + * + * @param macros the expression macros to expand. + * @return a list of named rank properties required to implement this expression. + */ + public Map getRankProperties(List macros) { + Map arg = new HashMap<>(); + for (ExpressionFunction function : macros) { + arg.put(function.getName(), function); + } + Deque path = new LinkedList<>(); + SerializationContext context = new SerializationContext(macros); + String serializedRoot = root.toString(context, path, null); + Map serializedExpressions = context.serializedFunctions(); + serializedExpressions.put(propertyName(name), serializedRoot); + return serializedExpressions; + } + + /** + * Returns the rank-property name for a given expression name. + * + * @param expressionName The expression name to mangle. + * @return The property name. + */ + public static String propertyName(String expressionName) { + return "rankingExpression(" + expressionName + ").rankingScript"; + } + + /** + * Returns the value of evaluating this expression over the given context. + * + * @param context The variable bindings to use for this evaluation. + * @return The evaluation result. + * @throws IllegalArgumentException if there are variables which are not bound in the given map + */ + public Value evaluate(Context context) { + return root.evaluate(context); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java new file mode 100644 index 00000000000..f4d21fd634b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/AbstractArrayContext.java @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; + +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +/** + * Superclass of contexts which supports array index based lookup. + * Instances may be reused indefinitely for evaluations of a single + * ranking expression, in a single thread at the time. + * + * @author bratseth + */ +public abstract class AbstractArrayContext extends Context implements Cloneable { + + private final boolean ignoreUnknownValues; + + /** The mapping from variable name to index */ + private final ImmutableMap nameToIndex; + + /** The current values set, pre-converted to doubles */ + private double[] doubleValues; + + /** The name of the ranking expression this was created for */ + private final String rankingExpressionName; + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * This will fail if unknown values are attempted added. + */ + protected AbstractArrayContext(RankingExpression expression) { + this(expression, false); + } + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * + * @param expression the expression to create a context for + * @param ignoreUnknownValues whether attempts to put values not present in this expression + * should fail (false - the default), or be ignored (true) + */ + protected AbstractArrayContext(RankingExpression expression, boolean ignoreUnknownValues) { + this.ignoreUnknownValues = ignoreUnknownValues; + this.rankingExpressionName = expression.getName(); + Set variables = new LinkedHashSet<>(); + extractVariables(expression.getRoot(),variables); + + doubleValues = new double[variables.size()]; + + int i = 0; + ImmutableMap.Builder nameToIndexBuilder = new ImmutableMap.Builder<>(); + for (String variable : variables) + nameToIndexBuilder.put(variable,i++); + nameToIndex = nameToIndexBuilder.build(); + } + + private void extractVariables(ExpressionNode node,Set variables) { + if (node instanceof ReferenceNode) { + ReferenceNode fNode=(ReferenceNode)node; + if (fNode.getArguments().expressions().size()>0) + throw new UnsupportedOperationException("Array lookup is not supported with features having arguments)"); + variables.add(fNode.toString()); + } + else if (node instanceof CompositeNode) { + CompositeNode cNode=(CompositeNode)node; + for (ExpressionNode child : cNode.children()) + extractVariables(child,variables); + } + } + + protected final Map nameToIndex() { return nameToIndex; } + protected final double[] doubleValues() { return doubleValues; } + protected final boolean ignoreUnknownValues() { return ignoreUnknownValues; } + + /** + * Creates a clone of this context suitable for evaluating against the same ranking expression + * in a different thread (i.e, name name to index map, different value set. + */ + public AbstractArrayContext clone() { + try { + AbstractArrayContext clone=(AbstractArrayContext)super.clone(); + clone.doubleValues=new double[nameToIndex.size()]; + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error"); + } + } + + public Set names() { + return nameToIndex.keySet(); + } + + /** + * Returns the index from a name. + * + * @throws NullPointerException is this name is not known to this context + */ + public final int getIndex(String name) { + return nameToIndex.get(name); + } + + /** Returns the max number of variables which may be set in this */ + public int size() { + return doubleValues.length; + } + + /** Perform a fast lookup directly of the value as a double. This is faster than get(index).asDouble() */ + @Override + public double getDouble(int index) { + return doubleValues[index]; + } + + @Override + public String toString() { + return "fast lookup context for ranking expression '" + rankingExpressionName + + "' [" + doubleValues.length + " variables]"; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java new file mode 100644 index 00000000000..b9ff630198e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ArrayContext.java @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +import java.util.Arrays; + +/** + * Creates a context which supports array index based lookup. + * This instance may be reused indefinitely for evaluations of a single + * ranking expression, in a single thread at the time. + * + * @author bratseth + */ +public class ArrayContext extends AbstractArrayContext implements Cloneable { + + /** The current values set */ + private Value[] values; + + private static DoubleValue constantZero = DoubleValue.frozen(0); + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * This will fail if unknown values are attempted added. + */ + public ArrayContext(RankingExpression expression) { + this(expression, false); + } + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * + * @param expression the expression to create a context for + * @param ignoreUnknownValues whether attempts to put values not present in this expression + * should fail (false - the default), or be ignored (true) + */ + public ArrayContext(RankingExpression expression, boolean ignoreUnknownValues) { + super(expression, ignoreUnknownValues); + values = new Value[doubleValues().length]; + Arrays.fill(values, DoubleValue.zero); + } + + /** + * Puts a value by name. + * The value will be frozen if it isn't already. + * + * @throws IllegalArgumentException if the name is not present in the ranking expression this was created with, and + * ignoredUnknownValues is false + * @since 5.1.5 + */ + @Override + public final void put(String name, Value value) { + Integer index = nameToIndex().get(name); + if (index==null) { + if (ignoreUnknownValues()) + return; + else + throw new IllegalArgumentException("Value '" + name + "' is not known to " + this); + } + put(index, value); + } + + /** Same as put(index,DoubleValue.frozen(value)) */ + public final void put(int index, double value) { + put(index, DoubleValue.frozen(value)); + } + + /** + * Puts a value by index. + * The value will be frozen if it isn't already. + * + * @since 5.1.5 + */ + public final void put(int index, Value value) { + values[index]=value.freeze(); + try { + doubleValues()[index]=value.asDouble(); + } + catch (UnsupportedOperationException e) { + doubleValues()[index]=Double.NaN; // see getDouble below + } + } + + /** Perform a slow lookup by name */ + @Override + public Value get(String name) { + Integer index=nameToIndex().get(name); + if (index==null) return DoubleValue.zero; + return values[index]; + } + + /** Perform a fast lookup by index */ + @Override + public final Value get(int index) { + return values[index]; + } + + /** Perform a fast lookup directly of the value as a double. This is faster than get(index).asDouble() */ + @Override + public final double getDouble(int index) { + double value=doubleValues()[index]; + if (value==Double.NaN) + throw new UnsupportedOperationException("Value at " + index + " has no double representation"); + return value; + } + + /** + * Creates a clone of this context suitable for evaluating against the same ranking expression + * in a different thread (i.e, name name to index map, different value set. + */ + public ArrayContext clone() { + ArrayContext clone=(ArrayContext)super.clone(); + clone.values = new Value[nameToIndex().size()]; + Arrays.fill(values,constantZero); + return clone; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java new file mode 100644 index 00000000000..8b456b9236b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/BooleanValue.java @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; + +/** + * A value which is either true or false. + * In numerical context true is interpreted as 1 and false as 0. + * + * @author Jon Bratseth + * @since 5.1.21 + */ +public class BooleanValue extends DoubleCompatibleValue { + + private boolean value; + + /** + * Create a boolean value which is frozen at the outset. + */ + public static BooleanValue frozen(boolean value) { + BooleanValue booleanValue=new BooleanValue(value); + booleanValue.freeze(); + return booleanValue; + } + + public BooleanValue(boolean value) { + this.value = value; + } + + public boolean asBoolean() { return value; }; + + @Override + public double asDouble() { + return value ? 1 : 0; + } + + @Override + public Value asMutable() { + if ( ! isFrozen()) return this; + return new BooleanValue(value); + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public boolean equals(Object other) { + if (this==other) return true; + if ( ! (other instanceof BooleanValue)) return false; + return ((BooleanValue)other).value==this.value; + } + + @Override + public int hashCode() { + return value ? 1 : 3; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java new file mode 100644 index 00000000000..0dff0414ac2 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Context.java @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.rule.Arguments; + +import java.util.Set; + +/** + *

The context providing value bindings for an expression evaluation.

+ * + * @author bratseth + */ +public abstract class Context { + + /** + *

Returns the value of a simple variable name.

+ * + * @param name The name of the variable whose value to return. + * @return The value of the named variable. + */ + public abstract Value get(String name); + + /** + *

Returns the value of a structured variable on the form + * name(argument*)(.output)?, where argument is any + * string. This may be used to implement more advanced variables whose + * values are calculated at runtime from arguments. Supporting this in a + * context is optional. Implementations may choose to throw + * UnsupportedOperationException or always return null, or to handle outputs + * but not arguments.

+ * + *

This default implementation does the latter - if arguments is non-null + * and non-empty an UnsupportedOperationException is thrown, otherwise + * get(name + "." + output) is called (or just get(name)) if output is also + * null.

+ * + * @param name The name of this variable. + * @param arguments The parsed arguments as given in the textual expression. + * @param output The name of the value to output (to enable one named + * calculation to output several), or null to output the + * "main" (or only) value. + */ + public Value get(String name, Arguments arguments,String output) { + if (arguments!=null && arguments.expressions().size()>0) + throw new UnsupportedOperationException(this + " does not support structured ranking expression variables, attempted to reference '" + + name + arguments + "'"); + if (output==null) + return get(name); + return get(name + "." + output); + } + + /** + *

Lookup by index rather than name. This is supported by some optimized + * context subclasses. This default implementation throws + * UnsupportedOperationException.

+ * + * @param index The index of the variable whose value to return. + * @return The value of the indexed variable. + */ + public Value get(int index) { + throw new UnsupportedOperationException(this + " does not support variable lookup by index"); + } + + /** + *

Lookup by index rather than name directly to a double. This is supported by some optimized + * context subclasses. This default implementation throws + * UnsupportedOperationException.

+ * + * @param index The index of the variable whose value to return. + * @return The value of the indexed variable. + */ + public double getDouble(int index) { + throw new UnsupportedOperationException(this + " does not support variable lookup by index"); + } + + /** + * Same as put(name,DoubleValue.frozen(value)) + */ + public final void put(String name, double value) { + put(name, DoubleValue.frozen(value)); + } + + /** + *

Sets a value to this, or throws an UnsupportedOperationException if + * this is not supported. This default implementation does the latter.

* + * + * @param name The name of the variable to set. + * @param value the value to set. Ownership of this value is transferred to this - if it is mutable + * (not frozen) it may be modified during execution + * @since 5.1.5 + */ + public void put(String name, Value value) { + throw new UnsupportedOperationException(this + " does not support variable assignment"); + } + + /** + *

Returns all the names available in this, or throws an + * UnsupportedOperationException if this operation is not supported. This + * default implementation does the latter.

+ * + * @return The set of all variable names. + */ + public Set names() { + throw new UnsupportedOperationException(this + " does not support return a list of its names"); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java new file mode 100644 index 00000000000..3129bfa05a3 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleCompatibleValue.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; + +/** + * A value which acts as a double in numerical context. + * + * @author Jon Bratseth + * @since 5.1.21 + */ +public abstract class DoubleCompatibleValue extends Value { + + @Override + public boolean hasDouble() { return true; } + + @Override + public Value negate() { return new DoubleValue(-asDouble()); } + + @Override + public Value add(Value value) { + return new DoubleValue(asDouble() + value.asDouble()); + } + + @Override + public Value subtract(Value value) { + return new DoubleValue(asDouble() - value.asDouble()); + } + + @Override + public Value multiply(Value value) { + return new DoubleValue(asDouble() * value.asDouble()); + } + + @Override + public Value divide(Value value) { + return new DoubleValue(asDouble() / value.asDouble()); + } + + @Override + public boolean compare(TruthOperator operator, Value value) { + return operator.evaluate(asDouble(), value.asDouble()); + } + + @Override + public Value function(Function function, Value value) { + return new DoubleValue(function.evaluate(asDouble(),value.asDouble())); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java new file mode 100644 index 00000000000..2a9a6173125 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleOnlyArrayContext.java @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +/** + * A variant of an array context variant which supports faster binding of variables but slower lookup + * from non-gbdt-optimized ranking expressions. + * + * @author bratseth + */ +public class DoubleOnlyArrayContext extends AbstractArrayContext { + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * This will fail if unknown values are attempted added. + */ + public DoubleOnlyArrayContext(RankingExpression expression) { + this(expression, false); + } + + /** + * Create a fast lookup context for an expression. + * This instance should be reused indefinitely by a single thread. + * + * @param expression the expression to create a context for + * @param ignoreUnknownValues whether attempts to put values not present in this expression + * should fail (false - the default), or be ignored (true) + */ + public DoubleOnlyArrayContext(RankingExpression expression, boolean ignoreUnknownValues) { + super(expression, ignoreUnknownValues); + } + + /** + * Puts a value by name. + * The value will be frozen if it isn't already. + * + * @throws IllegalArgumentException if the name is not present in the ranking expression this was created with, and + * ignoredUnknownValues is false + * @since 5.1.5 + */ + @Override + public final void put(String name, Value value) { + Integer index = nameToIndex().get(name); + if (index == null) { + if (ignoreUnknownValues()) + return; + else + throw new IllegalArgumentException("Value '" + name + "' is not known to " + this); + } + put(index, value); + } + + /** Same as put(index,DoubleValue.frozen(value)) */ + public final void put(int index, double value) { + doubleValues()[index] = value; + } + + /** + * Puts a value by index. + * + * @since 5.1.5 + */ + public final void put(int index, Value value) { + try { + put(index, value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw new IllegalArgumentException("This context only supports doubles, not " + value); + } + } + + /** Perform a slow lookup by name */ + @Override + public Value get(String name) { + Integer index = nameToIndex().get(name); + if (index==null) return DoubleValue.zero; + return new DoubleValue(getDouble(index)); + } + + /** Perform a faster lookup by index */ + @Override + public final Value get(int index) { + return new DoubleValue(getDouble(index)); + } + + /** + * Creates a clone of this context suitable for evaluating against the same ranking expression + * in a different thread (i.e, name name to index map, different value set. + */ + public DoubleOnlyArrayContext clone() { + return (DoubleOnlyArrayContext)super.clone(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java new file mode 100644 index 00000000000..1cd65c3133a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/DoubleValue.java @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; + +/** + * A double value result of a ranking expression evaluation. + * In a boolean context doubles are true if they are different from 0.0 + * + * @author bratseth + * @since 5.1.5 + */ +public final class DoubleValue extends DoubleCompatibleValue { + + // A note on performance: Reusing double values like below is actually slightly slower per evaluation, + // but the reduced garbage cost seems to regain this plus some additional percentages + + private double value; + + /** The double value instance for 0 */ + public final static DoubleValue zero=DoubleValue.frozen(0); + + public DoubleValue(double value) { + this.value=value; + } + + /** + * Create a double which is frozen at the outset. + */ + public static DoubleValue frozen(double value) { + DoubleValue doubleValue=new DoubleValue(value); + doubleValue.freeze(); + return doubleValue; + } + + @Override + public double asDouble() { return value; } + + @Override + public DoubleValue asDoubleValue() { return this; } + + @Override + public boolean asBoolean() { return value != 0.0; } + + @Override + public DoubleValue negate() { + return mutable(-value); + } + + @Override + public Value add(Value value) { + if (value instanceof TensorValue) + return value.add(this); + + try { + return mutable(this.value + value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw unsupported("add",value); + } + } + + @Override + public Value subtract(Value value) { + if (value instanceof TensorValue) + return value.negate().add(this); + + try { + return mutable(this.value - value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw unsupported("subtract",value); + } + } + + @Override + public Value multiply(Value value) { + if (value instanceof TensorValue) + return value.multiply(this); + + try { + return mutable(this.value * value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw unsupported("multiply", value); + } + } + + @Override + public Value divide(Value value) { + try { + return mutable(this.value / value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw unsupported("divide",value); + } + } + + @Override + public boolean compare(TruthOperator operator, Value value) { + try { + return operator.evaluate(this.value, value.asDouble()); + } + catch (UnsupportedOperationException e) { + throw unsupported("comparison",value); + } + } + + @Override + public Value function(Function function, Value value) { + // use the tensor implementation of max and min if the argument is a tensor + if ( (function.equals(Function.min) || function.equals(Function.max)) && value instanceof TensorValue) + return value.function(function, this); + + try { + return mutable(function.evaluate(this.value, value.asDouble())); + } + catch (UnsupportedOperationException e) { + throw unsupported("function " + function.toString(), value); + } + } + + private UnsupportedOperationException unsupported(String operation, Value value) { + return new UnsupportedOperationException("Cannot perform " + operation + " on " + value + " and " + this); + } + + /** Returns this or a mutable copy assigned the given value */ + private DoubleValue mutable(double value) { + DoubleValue mutable=this.asMutable(); + mutable.value=value; + return mutable; + } + + @Override + public DoubleValue asMutable() { + if ( ! isFrozen()) return this; + return new DoubleValue(value); + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public boolean equals(Object other) { + if (this==other) return true; + if ( ! (other instanceof DoubleValue)) return false; + return ((DoubleValue)other).value==this.value; + } + + @Override + public int hashCode() { + return toString().hashCode(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java new file mode 100644 index 00000000000..6730053e9fe --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/ExpressionOptimizer.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer; +import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTOptimizer; + +/** + * This class will perform various optimizations on the ranking expressions. Clients using optimized expressions + * will do + * + * + * // Set up once + * RankingExpression expression = new RankingExpression(myExpressionString); + * ArrayContext context = new ArrayContext(expression); + * new ExpressionOptimizer().optimize(expression, context); + * + * // Execute repeatedly + * context.put("featureName1", value1); + * ... + * expression.evaluate(context); + * + * // Note that the expression may be used by multiple threads at the same time, while the + * // context is single-threaded. To create a context for another tread, use the above context as a prototype, + * // contextForOtherThread = context.clone(); + * + *

+ * Instances of this class are not multithread safe. + * + * @author Jon Bratseth + */ +public class ExpressionOptimizer { + + private GBDTOptimizer gbdtOptimizer = new GBDTOptimizer(); + + private GBDTForestOptimizer gbdtForestOptimizer = new GBDTForestOptimizer(); + + /** Gets an optimizer instance used by this by class name, or null if the optimizer is not known */ + public Optimizer getOptimizer(Class clazz) { + if (clazz == gbdtOptimizer.getClass()) + return gbdtOptimizer; + if (clazz == gbdtForestOptimizer.getClass()) + return gbdtForestOptimizer; + return null; + } + + public OptimizationReport optimize(RankingExpression expression, AbstractArrayContext arrayContext) { + OptimizationReport report = new OptimizationReport(); + // Note: Order of optimizations matter + gbdtOptimizer.optimize(expression, arrayContext, report); + gbdtForestOptimizer.optimize(expression, arrayContext, report); + return report; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java new file mode 100644 index 00000000000..9ee9a1f7a71 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/MapContext.java @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * A context backed by a Map + * + * @author bratseth + */ +public class MapContext extends Context { + + private Map bindings=new HashMap<>(); + + private boolean frozen = false; + + public MapContext() { + } + + /** + * Freezes this. + * Returns this for convenience. + */ + public MapContext freeze() { + if ( ! frozen) + bindings = Collections.unmodifiableMap(bindings); + return this; + } + + /** + * Creates a map context from a map. + * The ownership of the map is transferred to this - it cannot be further modified by the caller. + * All the Values of the map will be frozen. + * + * @since 5.1.5 + */ + public MapContext(Map bindings) { + this.bindings=bindings; + for (Value boundValue : bindings.values()) + boundValue.freeze(); + } + + /** + * Returns the value of a key. 0 is returned if the given key is not bound in this. + */ + public @Override Value get(String key) { + Value value=bindings.get(key); + if (value==null) return DoubleValue.zero; + return value; + } + + /** + * Sets the value of a key. + * The value is frozen by this. + * + * @since 5.1.5 + */ + public @Override void put(String key,Value value) { + bindings.put(key,value.freeze()); + } + + /** Returns an immutable view of the bindings of this. */ + public Map bindings() { + if (frozen) return bindings; + return Collections.unmodifiableMap(bindings); + } + + /** Returns an unmodifiable map of the names of this */ + public @Override Set names() { + if (frozen) return bindings.keySet(); + return Collections.unmodifiableMap(bindings).keySet(); + } + + public @Override String toString() { + return "a map context [" + bindings.size() + " bindings]"; + } + + /** + * A convenience constructor which returns a map context from a string on the form + * name1:value1, name2:value2 .... + * Extra spaces are allowed anywhere. Any other deviation from the syntax causes an exception to be thrown. + */ + public static MapContext fromString(String contextString) { + MapContext mapContext = new MapContext(); + for (String keyValueString : contextString.split(",")) { + String[] strings = keyValueString.trim().split(":"); + mapContext.put(strings[0].trim(), Value.parse(strings[1].trim())); + } + return mapContext; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java new file mode 100644 index 00000000000..340a074f179 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/OptimizationReport.java @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Reports the result of optimizations of a ranking expression. + * + * @author Jon Bratseth + */ +public class OptimizationReport { + + private Map metrics=new LinkedHashMap(); + + private List notes=new ArrayList(); + + public void setMetric(String name,int value) { + metrics.put(name,value); + } + + /** Returns the value of a metric, or null if it is not set */ + public int getMetric(String name) { + return metrics.get(name); + } + + /** + * Increases the metric by the given name by increment, if the metric is not previously set, + * this will assign it the value increment as expected + */ + public void incMetric(String name,int increment) { + Integer currentValue=metrics.get(name); + if (currentValue==null) + currentValue=0; + metrics.put(name,currentValue+increment); + } + + public void note(String note) { + notes.add(note); + } + + /** Returns all the content of this report as a multiline string */ + public String toString() { + StringBuilder b=new StringBuilder(); + + if (notes.size()>0) { + b.append("Optimization notes:\n"); + List displayedNotes=notes.subList(0,Math.min(5,notes.size())); + for (String note : displayedNotes) + b.append(" ").append(note).append("\n"); + if (notes.size()>displayedNotes.size()) + b.append(" ...\n"); + } + + b.append("Optimization metrics:\n"); + for (Map.Entry metric : metrics.entrySet()) + b.append(" " + metric.getKey() + ": " + metric.getValue() + "\n"); + return b.toString(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java new file mode 100644 index 00000000000..337e2f84774 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Optimizer.java @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; + +/** + * Superclass of ranking expression optimizers + * + * @author bratseth + */ +public abstract class Optimizer { + + private boolean enabled=true; + + /** Sets whether this optimizer is enabled. Default true */ + public void setEnabled(boolean enabled) { this.enabled=enabled; } + + /** Returns whether this is enabled */ + public boolean isEnabled() { return enabled; } + + public abstract void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java new file mode 100644 index 00000000000..ff935031149 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/StringValue.java @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.javacc.UnicodeUtilities; +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; + +/** + * A string value. + * + * @author Jon Bratseth + * @since 5.1.21 + */ +public class StringValue extends Value { + + private final String value; + + /** + * Create a string value which is frozen at the outset. + */ + public static StringValue frozen(String value) { + StringValue stringValue=new StringValue(value); + stringValue.freeze(); + return stringValue; + } + + public StringValue(String value) { + this.value = value; + } + + /** Returns the hashcode of this, to enable strings to be encoded (with reasonable safely) as doubles for optimization */ + @Override + public double asDouble() { + return UnicodeUtilities.unquote(value.toString()).hashCode(); + } + + @Override + public boolean hasDouble() { return true; } + + @Override + public boolean asBoolean() { + throw new UnsupportedOperationException("A string value ('" + value + "') does not have a boolean value"); + } + + @Override + public Value negate() { + throw new UnsupportedOperationException("A string value ('" + value + "') cannot be negated"); + } + + @Override + public Value add(Value value) { + return new StringValue(value + value.toString()); + } + + @Override + public Value subtract(Value value) { + throw new UnsupportedOperationException("String values ('" + value + "') does not support subtraction"); + } + + @Override + public Value multiply(Value value) { + throw new UnsupportedOperationException("String values ('" + value + "') does not support multiplication"); + } + + @Override + public Value divide(Value value) { + throw new UnsupportedOperationException("String values ('" + value + "') does not support division"); + } + + @Override + public boolean compare(TruthOperator operator, Value value) { + if (operator.equals(TruthOperator.EQUAL)) + return this.equals(value); + throw new UnsupportedOperationException("String values ('" + value + "') cannot be compared except with '='"); + } + + @Override + public Value function(Function function, Value value) { + throw new UnsupportedOperationException("Mathematical functions cannot be applied on strings ('" + value + "')"); + } + + @Override + public Value asMutable() { + if ( ! isFrozen()) return this; + return new StringValue(value); + } + + @Override + public String toString() { + return "\"" + value + "\""; + } + + @Override + public boolean equals(Object other) { + if (this==other) return true; + if ( ! (other instanceof StringValue)) return false; + return ((StringValue)other).value.equals(this.value); + } + + @Override + public int hashCode() { + return value.hashCode(); + } + + /** Returns the value of this as a string */ + public String asString() { return value; } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java new file mode 100644 index 00000000000..12bede95aae --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/TensorValue.java @@ -0,0 +1,168 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.google.common.annotations.Beta; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorAddress; +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; +import com.yahoo.tensor.TensorType; + +import java.util.Optional; + +/** + * A Value containing a tensor. + * See {@link com.yahoo.tensor.Tensor} for definition of a tensor + * and the operations supported. + * + * @author bratseth + */ + @Beta +public class TensorValue extends Value { + + /** The tensor value of this */ + private final Tensor value; + private final Optional type; + + public TensorValue(Tensor value) { + this.value = value; + this.type = Optional.empty(); + } + + public TensorValue(Tensor value, TensorType type) { + this.value = value; + this.type = Optional.of(type); + } + + @Override + public double asDouble() { + if (value.dimensions().size() == 0) + return value.get(TensorAddress.empty); + throw new UnsupportedOperationException("Requires a double value from a tensor with dimensions " + + value.dimensions() + ", but a tensor of order > 0 does " + + "not have a double value. Input tensor: " + this); + } + + @Override + public boolean hasDouble() { return value.dimensions().size() == 0; } + + @Override + public boolean asBoolean() { + throw new UnsupportedOperationException("A tensor does not have a boolean value"); + } + + @Override + public Value negate() { + return new TensorValue(value.apply((Double value) -> -value)); + } + + @Override + public Value add(Value argument) { + if (argument instanceof TensorValue) + return new TensorValue(value.add(((TensorValue)argument).value)); + else + return new TensorValue(value.apply((Double value) -> value + argument.asDouble())); + } + + @Override + public Value subtract(Value argument) { + if (argument instanceof TensorValue) + return new TensorValue(value.subtract(((TensorValue) argument).value)); + else + return new TensorValue(value.apply((Double value) -> value - argument.asDouble())); + } + + @Override + public Value multiply(Value argument) { + if (argument instanceof TensorValue) + return new TensorValue(value.multiply(((TensorValue) argument).value)); + else + return new TensorValue(value.apply((Double value) -> value * argument.asDouble())); + } + + @Override + public Value divide(Value argument) { + if (argument instanceof TensorValue) + throw new UnsupportedOperationException("Two tensors cannot be divided"); + else + return new TensorValue(value.apply((Double value) -> value / argument.asDouble())); + } + + public Value match(Value argument) { + return new TensorValue(value.match(asTensor(argument, "match"))); + } + + public Value min(Value argument) { + return new TensorValue(value.min(asTensor(argument, "min"))); + } + + public Value max(Value argument) { + return new TensorValue(value.max(asTensor(argument, "max"))); + } + + public Value sum(String dimension) { + return new TensorValue(value.sum(dimension)); + } + + public Value sum() { + return new DoubleValue(value.sum()); + } + + private Tensor asTensor(Value value, String operationName) { + if ( ! (value instanceof TensorValue)) + throw new UnsupportedOperationException("Could not perform " + operationName + + ": The second argument must be a tensor but was " + value); + return ((TensorValue)value).value; + } + + public Tensor asTensor() { return value; } + + public Optional getType() { + return type; + } + + @Override + public boolean compare(TruthOperator operator, Value value) { + throw new UnsupportedOperationException("A tensor cannot be compared with any value"); + } + + @Override + public Value function(Function function, Value argument) { + if (function.equals(Function.min) && argument instanceof TensorValue) + return min(argument); + else if (function.equals(Function.max) && argument instanceof TensorValue) + return max(argument); + else + return new TensorValue(value.apply((Double value) -> function.evaluate(value, argument.asDouble()))); + } + + @Override + public Value asMutable() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return value.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TensorValue that = (TensorValue) o; + + if (!type.equals(that.type)) return false; + if (!value.equals(that.value)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = value.hashCode(); + result = 31 * result + type.hashCode(); + return result; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java new file mode 100644 index 00000000000..e56c005cdf7 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/Value.java @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.javacc.UnicodeUtilities; +import com.yahoo.searchlib.rankingexpression.rule.Function; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; +import com.yahoo.tensor.MapTensor; + +/** + * The result of a ranking expression evaluation. + * Concrete subclasses of this provides implementations of these methods or throws + * UnsupportedOperationException if the operation is not supported. + * + * @author Jon Bratseth + * @since 5.1.5 + */ +public abstract class Value { + + private boolean frozen=false; + + /** Returns this value as a double, or throws UnsupportedOperationException if it cannot be represented as a double */ + public abstract double asDouble(); + + /** Returns this value as a double value, or throws UnsupportedOperationException if it cannot be represented as a double */ + public DoubleValue asDoubleValue() { + return new DoubleValue(asDouble()); + } + + /** Returns true if this value can return itself as a double, i.e asDoubleValue will return a value and not throw */ + public abstract boolean hasDouble(); + + /** Returns this value as a boolean. */ + public abstract boolean asBoolean(); + + public abstract Value negate(); + + public abstract Value add(Value value); + + public abstract Value subtract(Value value); + + public abstract Value multiply(Value value); + + public abstract Value divide(Value value); + + /** Perform the comparison specified by the operator between this value and the given value */ + public abstract boolean compare(TruthOperator operator,Value value); + + /** Perform the given binary function on this value and the given value */ + public abstract Value function(Function function,Value value); + + /** + * Irreversibly makes this immutable. Overriders must always call super.freeze() and return this + * + * @return this for convenience + */ + public Value freeze() { + frozen=true; + return this; + } + + /** Returns true if this is immutable, false otherwise */ + public final boolean isFrozen() { return frozen; } + + /** Returns this is mutable, or a mutable copy otherwise */ + public abstract Value asMutable(); + + @Override + public abstract String toString(); + + @Override + public abstract boolean equals(Object other); + + @Override + public abstract int hashCode(); + + /** + * Parses the given string to a value and returns it. + * Different subtypes of Value will be returned depending on the string. + * + * @return a mutable Value + * @throws IllegalArgumentException if the given string is not parseable as a value + */ + public static Value parse(String value) { + if (value.equals("true")) + return new BooleanValue(true); + else if (value.equals("false")) + return new BooleanValue(false); + else if (value.startsWith("\"") || value.startsWith("'")) + return new StringValue(UnicodeUtilities.unquote(value)); + else if (value.startsWith("{")) + return new TensorValue(MapTensor.from(value)); + else + return new DoubleValue(Double.parseDouble(value)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java new file mode 100644 index 00000000000..3e138aa7d72 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestNode.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; + +import java.util.Deque; + +/** + * An optimized version of a sum of consecutive decision trees. + * + * @author bratseth + */ +public class GBDTForestNode extends ExpressionNode { + + private final double[] values; + + public GBDTForestNode(double[] values) { + this.values=values; + } + + @Override + public final Value evaluate(Context context) { + int pc = 0; + double treeSum = 0; + while (pc < values.length) { + int nextTree = (int)values[pc++]; + treeSum += GBDTNode.evaluate(values, pc, context); + pc += nextTree; + } + return new DoubleValue(treeSum); + } + + /** Returns (optimized sum of condition trees) */ + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "(optimized sum of condition trees of size " + (values.length*8) + " bytes)"; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java new file mode 100644 index 00000000000..7d84124f2af --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizer.java @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.AbstractArrayContext; +import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext; +import com.yahoo.searchlib.rankingexpression.evaluation.OptimizationReport; +import com.yahoo.searchlib.rankingexpression.evaluation.Optimizer; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticOperator; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author bratseth + */ +public class GBDTForestOptimizer extends Optimizer { + + private OptimizationReport report; + + /** + * A temporary value used within the algorithm + */ + private int currentTreesOptimized = 0; + + /** + * Optimizes sums of GBDTNodes by replacing them by a single GBDTForestNode + * + * @param expression the expression to destructively optimize + * @param context a fast lookup context created from the given expression + * @param report the optimization report to which actions of this is logged + */ + @Override + public void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report) { + if ( ! isEnabled()) return; + + this.report = report; + expression.setRoot(findAndOptimize(expression.getRoot())); + report.note("GBDT forest optimization done"); + } + + /** + * Recursively descend and optimize gbdt forest nodes. + * + * @return the resulting node, which may be the input node if no optimizations were found + */ + private ExpressionNode findAndOptimize(ExpressionNode node) { + ExpressionNode newNode = optimize(node); + if ( ! (newNode instanceof CompositeNode)) return newNode; // + + CompositeNode newComposite = (CompositeNode)newNode; + List newChildren = new ArrayList<>(); + for (ExpressionNode child : newComposite.children()) { + newChildren.add(findAndOptimize(child)); + } + return newComposite.setChildren(newChildren); + } + + /** + * Optimize the given node (only) + * + * @return the resulting node, which may be the input node if it could not be optimized + */ + private ExpressionNode optimize(ExpressionNode node) { + currentTreesOptimized = 0; + List forest = new ArrayList<>(); + boolean optimized = optimize(node, forest); + if ( ! optimized ) return node; + + GBDTForestNode forestNode = new GBDTForestNode(toArray(forest)); + report.incMetric("Number of forests", 1); + report.incMetric("GBDT trees optimized to forests", currentTreesOptimized); + return forestNode; + } + + /** + * Optimize the given node, if it is the root of a gdbt forest. Otherwise do nothing and return false + */ + private boolean optimize(ExpressionNode node, List forest) { + if (node instanceof GBDTNode) { + addTo(forest, (GBDTNode)node); + currentTreesOptimized++; + return true; + } + if (!(node instanceof ArithmeticNode)) { + return false; + } + ArithmeticNode aNode = (ArithmeticNode)node; + for (ArithmeticOperator op : aNode.operators()) { + if (op != ArithmeticOperator.PLUS) { + return false; + } + } + for (ExpressionNode child : aNode.children()) { + if (!optimize(child, forest)) { + return false; + } + } + return true; + } + + private void addTo(List forest, GBDTNode tree) { + forest.add((double)tree.values().length); + addAll(tree.values(), forest); + } + + private void addAll(double[] values, List forest) { + for (double value : values) { + forest.add(value); + } + } + + private double[] toArray(List valueList) { + double[] valueArray = new double[valueList.size()]; + for (int i = 0; i < valueList.size(); i++) { + valueArray[i] = valueList.get(i); + } + return valueArray; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java new file mode 100644 index 00000000000..607b4dc55cb --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTNode.java @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; + +import java.util.Deque; + +/** + * An optimized version of a decision tree. + * + * @author bratseth + */ +public final class GBDTNode extends ExpressionNode { + + // The GBDT node vm works by reading doubles one at a time and interpreting them + // as either constant data or a mangling of opcode and variable reference: + // The value space is as follows: + // n=[0,MAX_LEAF_VALUE> : n is data (tree leaf constant value) + // n=[MAX_LEAF_VALUE+MAX_VARIABLES*0,MAX_LEAF_VALUE+MAX_VARIABLES*1>: < than var at index n + // n=[MAX_LEAF_VALUE+MAX_VARIABLES*1,MAX_LEAF_VALUE+MAX_VARIABLES*2>: = to var at index n-MAX_VARIABLES + // n=[MAX_LEAF_VALUE+MAX_VARIABLES*2,MAX_LEAF_VALUE+MAX_VARIABLES*3]: n-MAX_VARIABLES*2 is IN the following set + + // The full layout of an IF instruction is + // COMPARISON,TRUE_BRANCH_LENGTH,TRUE_BRANCH,FALSE_BRANCH + // where COMPARISON is VARIABLE_AND_OPCODE,COMPARE_CONSTANT if the opcode is < or =, + // and VARIABLE_AND_OPCODE,COMPARE_CONSTANTS_LENGTH,COMPARE_CONSTANTS if the opcode is IN + + + // If any change is made to this encoding, this change must also be reflected in GBDTNodeOptimizer + + /** The max (absolute) supported value an optimized leaf may have */ + public final static int MAX_LEAF_VALUE=2*1000*1000*1000; + + /** The max number of variables (features) supported in the context */ + public final static int MAX_VARIABLES=1*1000*1000; + + private final double[] values; + + public GBDTNode(double[] values) { + this.values=values; + } + + /** Returns a direct reference to the values of this. The returned array must not be modified. */ + public final double[] values() { return values; } + + @Override + public final Value evaluate(Context context) { + return new DoubleValue(evaluate(values,0,context)); + } + + public static double evaluate(double[] values, int startOffset, Context context) { + int pc = startOffset; + while (true) { + double nextValue = values[pc++]; + if (nextValue >= MAX_LEAF_VALUE) { // a condition node + int offset = (int)nextValue - MAX_LEAF_VALUE; + boolean comparisonIsTrue = false; + if (offset < MAX_VARIABLES) { + comparisonIsTrue = context.getDouble(offset) 0) { // test each value in the set + setValuesLeft--; + if (testValue == values[pc++]) { + comparisonIsTrue=true; + break; + } + } + pc += setValuesLeft; // jump to after the set + } + + if (comparisonIsTrue) + pc++; // true branch - skip the jump value + else + pc += values[pc]; // false branch - jump + } + else { // a leaf + return nextValue; + } + } + } + + /** Returns "(optimized condition tree)" */ + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "(optimized condition tree)"; + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java new file mode 100644 index 00000000000..7e74bdce9e6 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizer.java @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.yolean.Exceptions; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.*; +import com.yahoo.searchlib.rankingexpression.rule.*; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + *

This ranking expression processor recognizes and optimizes GBDT expressions. Note that this optimization is + * destructive - inspection is not possible into optimized subtrees.

+ * + *

This class is not multithread safe.

+ * + * @author Jon Bratseth + */ +public class GBDTOptimizer extends Optimizer { + + private OptimizationReport report; + + /** + * Optimizes this by replacing GBDT sub-expressions by GBDTNodes. These optimized expressions must be + * executed using an instance of {@link com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext} as context. + * These thread locally reusable contexts must be created from the ranking expression before the ranking + * expression is optimized. + * + * @param expression the expression to destructively optimize + * @param context a fast lookup context created from the given expression + * @param report the optimization report to which actions of this is logged + */ + @Override + public void optimize(RankingExpression expression, AbstractArrayContext context, OptimizationReport report) { + if (!isEnabled()) return; + + this.report = report; + + if (context.size() > GBDTNode.MAX_VARIABLES) { + report.note("Can not optimize expressions referencing more than " + GBDTNode.MAX_VARIABLES + " features: " + + expression + " has " + context.size()); + return; + } + + expression.setRoot(optimize(expression.getRoot(), context)); + report.note("GBDT tree optimization done"); + } + + /** + *

Recursively optimize nodes of the form ArithmeticNode(IfNode,ArithmeticNode(IfNode)) etc., ignore + * anything else.

+ * + *

Each condition node is converted to the double sequence [(OperatorIsEquals ? GBDTNode.MAX_VARIABLES : 0) + + * IndexOfLeftComparisonFeature+GBDTNode.MAX_LEAFT_VALUE, ValueOfRightComparisonValue,#OfValuesInTrueBranch,true + * branch values,false branch values]

+ * + *

Each value node is converted to the double value of the value node itself.

+ * + * @return the optimized expression + */ + private ExpressionNode optimize(ExpressionNode node, AbstractArrayContext context) { + if (node instanceof ArithmeticNode) { + Iterator childIt = ((ArithmeticNode)node).children().iterator(); + ExpressionNode ret = optimize(childIt.next(), context); + + Iterator operIt = ((ArithmeticNode)node).operators().iterator(); + while (childIt.hasNext() && operIt.hasNext()) { + ret = ArithmeticNode.resolve(ret, operIt.next(), optimize(childIt.next(), context)); + } + return ret; + } + if (node instanceof IfNode) { + return createGBDTNode((IfNode)node, context); + } + return node; + } + + private ExpressionNode createGBDTNode(IfNode cNode, AbstractArrayContext context) { + List values = new ArrayList<>(); + try { + consumeNode(cNode, values, context); + } + catch (IllegalArgumentException e) { // Conversion was impossible + report.note("Skipped optimization: " + Exceptions.toMessageString(e) + ". Expression: " + cNode); + return cNode; + } + report.incMetric("Optimized GDBT trees",1); + return new GBDTNode(toArray(values)); + } + + /** + * Recursively consume nodes into the value list Returns the number of values produced by this. + */ + private int consumeNode(ExpressionNode node, List values, AbstractArrayContext context) { + int beforeIndex = values.size(); + if ( node instanceof IfNode) { + IfNode ifNode = (IfNode)node; + int jumpValueIndex = consumeIfCondition(ifNode.getCondition(), values, context); + values.add(0d); // jumpValue goes here after the next line + int jumpValue = consumeNode(ifNode.getTrueExpression(), values, context) + 1; + values.set(jumpValueIndex, (double) jumpValue); + consumeNode(ifNode.getFalseExpression(), values, context); + } else { + double value = toValue(node); + if (Math.abs(value) > GBDTNode.MAX_LEAF_VALUE) { + throw new IllegalArgumentException("Leaf value is too large for optimization: " + value); + } + values.add(toValue(node)); + } + return values.size() - beforeIndex; + } + + /** Consumes the if condition and return the size of the values resulting, for convenience */ + private int consumeIfCondition(ExpressionNode condition, List values, AbstractArrayContext context) { + if (condition instanceof ComparisonNode) { + ComparisonNode comparison = (ComparisonNode)condition; + if (comparison.getOperator() == TruthOperator.SMALLER) + values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*0 + getVariableIndex(comparison.getLeftCondition(), context)); + else if (comparison.getOperator() == TruthOperator.EQUAL) + values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*1 + getVariableIndex(comparison.getLeftCondition(), context)); + else + throw new IllegalArgumentException("Cannot optimize other conditions than < and ==, encountered: " + comparison.getOperator()); + values.add(toValue(comparison.getRightCondition())); + } + else if (condition instanceof SetMembershipNode) { + SetMembershipNode setMembership = (SetMembershipNode)condition; + values.add(GBDTNode.MAX_LEAF_VALUE + GBDTNode.MAX_VARIABLES*2 + getVariableIndex(setMembership.getTestValue(),context)); + values.add((double)setMembership.getSetValues().size()); + for (ExpressionNode setElementNode : setMembership.getSetValues()) + values.add(toValue(setElementNode)); + } + else { + throw new IllegalArgumentException("Node condition could not be optimized: " + condition); + } + + return values.size(); + } + + private double getVariableIndex(ExpressionNode node, AbstractArrayContext context) { + if (!(node instanceof ReferenceNode)) { + throw new IllegalArgumentException("Contained a left-hand comparison expression " + + "which was not a feature value but was: " + node); + } + ReferenceNode fNode = (ReferenceNode)node; + Integer index = context.getIndex(fNode.toString()); + if (index == null) { + throw new IllegalStateException("The ranking expression contained feature '" + fNode.getName() + + "', which is not known to " + context + ": The context must be created" + + "from the same ranking expression which is to be optimized"); + } + return index; + } + + private double toValue(ExpressionNode node) { + if (node instanceof ConstantNode) { + Value value = ((ConstantNode)node).getValue(); + if (value instanceof DoubleCompatibleValue || value instanceof StringValue) + return value.asDouble(); + else + throw new IllegalArgumentException("Cannot optimize a node containing a value of type " + + value.getClass().getSimpleName() + " (" + value + ") in a set test: " + node); + } + + if (node instanceof NegativeNode) { + NegativeNode nNode = (NegativeNode)node; + if (!(nNode.getValue() instanceof ConstantNode)) { + throw new IllegalArgumentException("Contained a negation of a non-number: " + nNode.getValue()); + } + return -((ConstantNode)nNode.getValue()).getValue().asDouble(); + } + throw new IllegalArgumentException("Node could not be optimized: " + node); + } + + private double[] toArray(List valueList) { + double[] valueArray = new double[valueList.size()]; + for (int i = 0; i < valueList.size(); i++) { + valueArray[i] = valueList.get(i); + } + return valueArray; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/test/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java new file mode 100644 index 00000000000..b744b884e0f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/evaluation/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Execution engine for ranking expressions + */ +@ExportPackage +@PublicApi +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/mlr/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java new file mode 100644 index 00000000000..95099876eb4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Ranking expression execution library, see {@link com.yahoo.searchlib.rankingexpression.RankingExpression}. + */ +@ExportPackage +@PublicApi +package com.yahoo.searchlib.rankingexpression; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java new file mode 100644 index 00000000000..01af7c12ae4 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/parser/package-info.java @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Ranking expression parser + */ +@ExportPackage +@PublicApi +package com.yahoo.searchlib.rankingexpression.parser; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java new file mode 100644 index 00000000000..a5d04c0f3b9 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Arguments.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * A set of argument expressions to a function or feature. + * This is immutable. + * + * @author bratseth + */ +public final class Arguments implements Serializable { + + private final ImmutableList expressions; + + public Arguments() { + this(null); + } + + public Arguments(List expressions) { + if (expressions == null) { + this.expressions = ImmutableList.of(); + return; + } + + // Build in a roundabout way because java generics and lists + ImmutableList.Builder b = ImmutableList.builder(); + for (ExpressionNode node : expressions) + b.add(node); + this.expressions = b.build(); + } + + /** Returns an unmodifiable list of the expressions in this */ + public List expressions() { return expressions; } + + /** Evaluate all arguments in this */ + public Value[] evaluate(Context context) { + Value[] values=new Value[expressions.size()]; + for (int i=0; i 0) + b.append(")"); + return b.toString(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java new file mode 100755 index 00000000000..c6669d87d1b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticNode.java @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.*; + +/** + * A binary mathematical operation + * + * @author bratseth + */ +public final class ArithmeticNode extends CompositeNode { + + private final ImmutableList children; + private final ImmutableList operators; + + public ArithmeticNode(List children, List operators) { + this.children = ImmutableList.copyOf(children); + this.operators = ImmutableList.copyOf(operators); + } + + public ArithmeticNode(ExpressionNode leftExpression, ArithmeticOperator operator, ExpressionNode rightExpression) { + this.children = ImmutableList.of(leftExpression, rightExpression); + this.operators = ImmutableList.of(operator); + } + + public List operators() { return operators; } + + @Override + public List children() { return children; } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + StringBuilder string = new StringBuilder(); + + boolean nonDefaultPrecedence = nonDefaultPrecedence(parent); + if (nonDefaultPrecedence) + string.append("("); + + Iterator child = children.iterator(); + string.append(child.next().toString(context, path, this)).append(" "); + for (Iterator op = operators.iterator(); op.hasNext() && child.hasNext();) { + string.append(op.next().toString()).append(" "); + string.append(child.next().toString(context, path, this)); + if (op.hasNext()) + string.append(" "); + } + if (nonDefaultPrecedence) + string.append(")"); + string.append(" "); + + return string.toString().trim(); + } + + /** + * Returns true if this node has lower precedence than the parent + * (even though by virtue of being a node it will be calculated before the parent). + */ + private boolean nonDefaultPrecedence(CompositeNode parent) { + if ( parent==null) return false; + if ( ! (parent instanceof ArithmeticNode)) return false; + + return ((ArithmeticNode)parent).operators.get(0).hasPrecedenceOver(this.operators.get(0)); + } + + @Override + public Value evaluate(Context context) { + Iterator child = children.iterator(); + + Deque stack = new ArrayDeque<>(); + stack.push(new ValueItem(ArithmeticOperator.PLUS, child.next().evaluate(context))); + for (Iterator it = operators.iterator(); it.hasNext() && child.hasNext();) { + ArithmeticOperator op = it.next(); + if (!stack.isEmpty()) { + while (stack.peek().op.hasPrecedenceOver(op)) { + popStack(stack); + } + } + stack.push(new ValueItem(op, child.next().evaluate(context))); + } + while (stack.size() > 1) { + popStack(stack); + } + return stack.getFirst().value; + } + + private void popStack(Deque stack) { + ValueItem rhs = stack.pop(); + ValueItem lhs = stack.peek(); + lhs.value = rhs.op.evaluate(lhs.value, rhs.value); + } + + public static ArithmeticNode resolve(ExpressionNode left, ArithmeticOperator op, ExpressionNode right) { + if ( ! (left instanceof ArithmeticNode)) return new ArithmeticNode(left, op, right); + + ArithmeticNode leftArithmetic = (ArithmeticNode)left; + + List newChildren = new ArrayList<>(leftArithmetic.children()); + newChildren.add(right); + + List newOperators = new ArrayList<>(leftArithmetic.operators()); + newOperators.add(op); + + return new ArithmeticNode(newChildren, newOperators); + } + + private static class ValueItem { + + final ArithmeticOperator op; + Value value; + + public ValueItem(ArithmeticOperator op, Value value) { + this.op = op; + this.value = value; + } + } + + @Override + public CompositeNode setChildren(List newChildren) { + if (children.size() != newChildren.size()) + throw new IllegalArgumentException("Expected " + children.size() + " children but got " + newChildren.size()); + return new ArithmeticNode(newChildren, operators); + } + +} + diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java new file mode 100644 index 00000000000..e5a794ab53e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ArithmeticOperator.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * A mathematical operator + * + * @author bratseth + */ +public enum ArithmeticOperator { + + PLUS(0, "+") { public Value evaluate(Value x, Value y) { + return x.add(y); + }}, + MINUS(1, "-") { public Value evaluate(Value x, Value y) { + return x.subtract(y); + }}, + MULTIPLY(2, "*") { public Value evaluate(Value x, Value y) { + return x.multiply(y); + }}, + DIVIDE(3, "/") { public Value evaluate(Value x, Value y) { + return x.divide(y); + }}; + + /** A list of all the operators in this in order of decreasing precedence */ + public static final List operatorsByPrecedence = operatorsByPrecedence(); + + private final int precedence; + private final String image; + + private ArithmeticOperator(int precedence, String image) { + this.precedence = precedence; + this.image = image; + } + + /** Returns true if this operator has precedence over the given operator */ + public boolean hasPrecedenceOver(ArithmeticOperator op) { + return precedence > op.precedence; + } + + public abstract Value evaluate(Value x, Value y); + + @Override + public String toString() { + return image; + } + + private static List operatorsByPrecedence() { + List operators = new ArrayList<>(); + operators.add(DIVIDE); + operators.add(MULTIPLY); + operators.add(MINUS); + operators.add(PLUS); + return Collections.unmodifiableList(operators); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java new file mode 100755 index 00000000000..22b777d4b9d --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/BooleanNode.java @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +/** + * A node which produces a boolean value when evaluated. + * + * @author bratseth + * @since 5.1.21 + */ +public abstract class BooleanNode extends CompositeNode { +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java new file mode 100644 index 00000000000..882d16ebc1c --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ComparisonNode.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.*; + +/** + * A node which returns true or false depending on the outcome of a comparison. + * + * @author bratseth + * @since 5.1.21 + */ +public class ComparisonNode extends BooleanNode { + + /** The operator string of this condition. */ + private final TruthOperator operator; + + private final ExpressionNode leftCondition, rightCondition; + + public ComparisonNode(ExpressionNode leftCondition, TruthOperator operator, ExpressionNode rightCondition) { + this.leftCondition = leftCondition; + this.operator = operator; + this.rightCondition = rightCondition; + } + + @Override + public List children() { + List children = new ArrayList<>(2); + children.add(leftCondition); + children.add(rightCondition); + return children; + } + + public TruthOperator getOperator() { return operator; } + + public ExpressionNode getLeftCondition() { return leftCondition; } + + public ExpressionNode getRightCondition() { return rightCondition; } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return leftCondition.toString(context, path, this) + " " + operator + " " + + rightCondition.toString(context, path, this); + } + + @Override + public Value evaluate(Context context) { + Value leftValue=leftCondition.evaluate(context); + Value rightValue=rightCondition.evaluate(context); + return new BooleanValue(leftValue.compare(operator,rightValue)); + } + + @Override + public ComparisonNode setChildren(List children) { + if (children.size() != 2) throw new IllegalArgumentException("A comparison test must have 2 children"); + return new ComparisonNode(children.get(0), operator, children.get(1)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java new file mode 100644 index 00000000000..d181c29b516 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/CompositeNode.java @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import java.util.List; + +/** + *

The parent of all node types which contains child nodes.

+ * + * @author bratseth + */ +public abstract class CompositeNode extends ExpressionNode { + + /** + *

Returns a read-only list containing the immediate children of this composite

+ * + * @return The children of this. + */ + public abstract List children(); + + /** + * Returns a copy of this where the children is replaced by the given children. + * + * @throws IllegalArgumentException if the given list of children has different size than children() + */ + public abstract CompositeNode setChildren(List children); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java new file mode 100755 index 00000000000..e51519059ed --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ConstantNode.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Deque; + +/** + * A node which holds a constant (frozen) value. + * + * @author Simon Thoresen + */ +public final class ConstantNode extends ExpressionNode { + + private final String sourceImage; + + private final Value value; + + public ConstantNode(Value value) { + this(value,null); + } + + /** + * Creates a constant value + * + * @param value the value. Ownership of this value is transferred to this. + * @param sourceImage the source string image producing this value + */ + public ConstantNode(Value value, String sourceImage) { + value.freeze(); + this.value=value; + this.sourceImage=sourceImage; + } + + public Value getValue() { return value; } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return sourceString(); + } + + /** Returns the string which created this, or the value.toString() if not known */ + public String sourceString() { + if (sourceImage != null) return sourceImage; + return value.toString(); + } + + @Override + public Value evaluate(Context context) { + return value; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java new file mode 100755 index 00000000000..7e9e1cb2825 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/EmbracedNode.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Collections; +import java.util.Deque; +import java.util.List; + +/** + * This class represents another expression enclosed in braces. + * + * @author Simon Thoresen + */ +public final class EmbracedNode extends CompositeNode { + + // The node to embrace. + private final ExpressionNode value; + + /** + * Creates a new expression node that embraces another. + * + * @param value The node to embrace. + */ + public EmbracedNode(ExpressionNode value) { + this.value=value; + } + + /** Returns the node enclosed by this */ + public ExpressionNode getValue() { return value; } + + @Override + public List children() { + return Collections.singletonList(value); + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + String expression = value.toString(context, path, this); + if (value instanceof ReferenceNode) return expression; + return "(" + expression + ")"; + } + + @Override + public Value evaluate(Context context) { + return value.evaluate(context); + } + + @Override + public CompositeNode setChildren(List newChildren) { + if (newChildren.size() != 1) + throw new IllegalArgumentException("Expected 1 child but got " + newChildren.size()); + return new EmbracedNode(newChildren.get(0)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java new file mode 100755 index 00000000000..05d998afd35 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ExpressionNode.java @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.io.Serializable; +import java.util.Deque; + +/** + * Superclass of all expression nodes. Expression nodes have their identity determined by their content. + * All expression nodes are immutable. + * + * @author Simon Thoresen + */ +public abstract class ExpressionNode implements Serializable { + + @Override + public final int hashCode() { + return toString().hashCode(); + } + + @Override + public final boolean equals(Object obj) { + return obj instanceof ExpressionNode && toString().equals(obj.toString()); + } + + @Override + public final String toString() { + return toString(new SerializationContext(), null, null); + } + + /** + * Returns a script instance of this based on the supplied script functions. + * + * @param context the serialization context + * @param path the call path to this, used for cycle detection, or null if this is a root + * @param parent the parent node of this, or null if it a root + * @return the main script, referring to script instances. + */ + public abstract String toString(SerializationContext context, Deque path, CompositeNode parent); + + /** + * Returns the value of evaluating this expression over the given context. + * + * @param context the variable bindings to use for this evaluation + * @throws IllegalArgumentException if there are variables which are not bound in the given map + */ + public abstract Value evaluate(Context context); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java new file mode 100644 index 00000000000..ecd8182a108 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/Function.java @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import java.io.Serializable; + +import static java.lang.Math.*; + +/** + * A scalar function + * + * @author bratseth + */ +public enum Function implements Serializable { + + cosh { public double evaluate(double x, double y) { return cosh(x); } }, + sinh { public double evaluate(double x, double y) { return sinh(x); } }, + tanh { public double evaluate(double x, double y) { return tanh(x); } }, + cos { public double evaluate(double x, double y) { return cos(x); } }, + sin { public double evaluate(double x, double y) { return sin(x); } }, + tan { public double evaluate(double x, double y) { return tan(x); } }, + acos { public double evaluate(double x, double y) { return acos(x); } }, + asin { public double evaluate(double x, double y) { return asin(x); } }, + atan { public double evaluate(double x, double y) { return atan(x); } }, + exp { public double evaluate(double x, double y) { return exp(x); } }, + log10 { public double evaluate(double x, double y) { return log10(x); } }, + log { public double evaluate(double x, double y) { return log(x); } }, + sqrt { public double evaluate(double x, double y) { return sqrt(x); } }, + ceil { public double evaluate(double x, double y) { return ceil(x); } }, + fabs { public double evaluate(double x, double y) { return abs(x); } }, + floor { public double evaluate(double x, double y) { return floor(x); } }, + isNan { public double evaluate(double x, double y) { return Double.isNaN(x) ? 1.0 : 0.0; } }, + atan2(2) { public double evaluate(double x, double y) { return atan2(x,y); } }, + pow(2) { public double evaluate(double x, double y) { return pow(x,y); } }, + ldexp(2) { public double evaluate(double x, double y) { return x*pow(2,y); } }, + fmod(2) { public double evaluate(double x, double y) { return IEEEremainder(x,y); } }, + min(2) { public double evaluate(double x, double y) { return min(x,y); } }, + max(2) { public double evaluate(double x, double y) { return max(x,y); } }; + + private final int arity; + + private Function() { + this(1); + } + + private Function(int arity) { + this.arity = arity; + } + + /** Perform the function on the input */ + public abstract double evaluate(double x, double y); + + /** Returns the number of arguments this function takes */ + public int arity() { return arity; } + +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java new file mode 100755 index 00000000000..8ab403bff7a --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/FunctionNode.java @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.List; + +/** + * Invocation of a native function. + * + * @author simon + * @author bratseth + */ +public final class FunctionNode extends CompositeNode { + + /** The type of function. */ + private final Function function; + + /** The arguments to this function. */ + private final Arguments arguments; + + /* Creates an unary function node */ + public FunctionNode(Function function, ExpressionNode argument) { + if (function.arity() != 1) throw new IllegalArgumentException(function + " is not unary"); + this.function = function; + this.arguments = new Arguments(Collections.singletonList(argument)); + } + + /** Creates a binary function node */ + public FunctionNode(Function function, ExpressionNode argument1, ExpressionNode argument2) { + if (function.arity() != 2) throw new IllegalArgumentException(function + " is not binary"); + this.function = function; + List argumentList = new ArrayList<>(); + argumentList.add(argument1); + argumentList.add(argument2); + arguments=new Arguments(argumentList); + } + + public Function getFunction() { return function; } + + /** Returns the arguments of this */ + @Override + public List children() { + return arguments.expressions(); + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + StringBuilder b=new StringBuilder(function.toString()); + b.append("("); + for (int i = 0; i < this.arguments.expressions().size(); ++i) { + b.append(this.arguments.expressions().get(i).toString(context, path, this)); + if (i < this.arguments.expressions().size() - 1) { + b.append(","); + } + } + b.append(")"); + return b.toString(); + } + + @Override + public Value evaluate(Context context) { + if (arguments.expressions().size() == 0) + return DoubleValue.zero.function(function,DoubleValue.zero); + + Value argument1 = arguments.expressions().get(0).evaluate(context); + if (arguments.expressions().size() == 1) + return argument1.function(function, DoubleValue.zero); + + Value argument2 = arguments.expressions().get(1).evaluate(context); + return argument1.function(function,argument2); + } + + /** Returns a new function node with the children replaced by the given children */ + @Override + public FunctionNode setChildren(List children) { + if (arguments.expressions().size() != children.size()) + throw new IllegalArgumentException("Expected " + arguments.expressions().size() + " children but got " + children.size()); + if (children.size() == 1) + return new FunctionNode(function, children.get(0)); + else // binary + return new FunctionNode(function, children.get(0), children.get(1)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java new file mode 100755 index 00000000000..994c3db9bac --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/IfNode.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.*; + +/** + * A conditional branch of a ranking expression. + * + * @author Simon Thoresen + * @author bratseth + */ +public final class IfNode extends CompositeNode { + + /** The expression nodes that make up this condition. */ + private final ExpressionNode condition, trueExpression, falseExpression; + + private final Double trueProbability; + + public IfNode(ExpressionNode condition, ExpressionNode trueExpression, ExpressionNode falseExpression) { + this(condition, trueExpression, falseExpression, null); + } + + /** + * Creates a new condition node. + * + * @param condition the condition of this + * @param trueExpression the expression to evaluate if the comparison is true + * @param falseExpression the expression to evaluate if the comparison is false + * @param trueProbability the probability that the condition will evaluate to true, or null if not known. + * @throws IllegalArgumentException if trueProbability is non-null and not between 0.0 and 1.0 + */ + public IfNode(ExpressionNode condition, ExpressionNode trueExpression, ExpressionNode falseExpression, + Double trueProbability) { + if (trueProbability != null && ( trueProbability < 0.0 || trueProbability > 1.0) ) + throw new IllegalArgumentException("trueProbability must be a between 0.0 and 1.0, not " + trueProbability); + this.condition = condition; + this.trueProbability = trueProbability; + this.trueExpression = trueExpression; + this.falseExpression = falseExpression; + } + + @Override + public List children() { + List children = new ArrayList(4); + children.add(condition); + children.add(trueExpression); + children.add(falseExpression); + return Collections.unmodifiableList(children); + } + + public ExpressionNode getCondition() { return condition; } + + public ExpressionNode getTrueExpression() { return trueExpression; } + + public ExpressionNode getFalseExpression() { return falseExpression; } + + /** The average probability that the condition of this node will evaluate to true, or null if not known */ + public Double getTrueProbability() { return trueProbability; } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "if (" + + condition.toString(context, path, this) + ", " + + trueExpression.toString(context, path, this) + ", " + + falseExpression.toString(context, path, this) + + (trueProbability != null ? ", " + trueProbability : "") + ")"; + } + + @Override + public Value evaluate(Context context) { + if (condition.evaluate(context).asBoolean()) + return trueExpression.evaluate(context); + else + return falseExpression.evaluate(context); + } + + @Override + public IfNode setChildren(List children) { + if (children.size() != 3) throw new IllegalArgumentException("Expected 3 children but got " + children.size()); + return new IfNode(children.get(0), children.get(1), children.get(2)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java new file mode 100755 index 00000000000..eee729fa3a8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NameNode.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Deque; + +/** + * An opaque name in a ranking expression. This is used to represent names passed to the context + * and interpreted by the given context in a way which is opaque to the ranking expressions. + * + * @author Simon Thoresen + */ +public final class NameNode extends ExpressionNode { + + private final String name; + + public NameNode(String name) { + this.name = name; + } + + public String getValue() { + return name; + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return name; + } + + @Override + public Value evaluate(Context context) { + throw new RuntimeException("Name nodes should never be evaluated"); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java new file mode 100644 index 00000000000..11feddb919e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/NegativeNode.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Collections; +import java.util.Deque; +import java.util.List; + +/** + * A node which flips the sign of the value produced from the nested expression + * + * @author Jon Bratseth + */ +public class NegativeNode extends CompositeNode { + + private final ExpressionNode value; + + /** Constructs a new negative node */ + public NegativeNode(ExpressionNode value) { + this.value = value; + } + + /** Returns the node creating the value negated by this */ + public ExpressionNode getValue() { return value; } + + @Override + public List children() { + return Collections.singletonList(value); + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "-" + value.toString(context, path, parent); + } + + @Override + public Value evaluate(Context context) { + return value.evaluate(context).negate(); + } + + @Override + public NegativeNode setChildren(List children) { + if (children.size() != 1) throw new IllegalArgumentException("Expected 1 children but got " + children.size()); + return new NegativeNode(children.get(0)); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java new file mode 100755 index 00000000000..2968b414cb8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNode.java @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.List; + +/** + * A node referring either to a value in the context or to another named ranking expression. + * + * @author simon + * @author bratseth + */ +public final class ReferenceNode extends CompositeNode { + + private final String name, output; + + private final Arguments arguments; + + public ReferenceNode(String name) { + this(name, null, null); + } + + public ReferenceNode(String name, List arguments, String output) { + this.name = name; + this.arguments = arguments != null ? new Arguments(arguments) : new Arguments(); + this.output = output; + } + + public String getName() { + return name; + } + + /** Returns the arguments, never null */ + public Arguments getArguments() { return arguments; } + + /** Returns a copy of this where the arguments are replaced by the given arguments */ + public ReferenceNode setArguments(List arguments) { + return new ReferenceNode(name, arguments, output); + } + + public String getOutput() { + return output; + } + + /** Returns a copy of this node with a modified output */ + public ReferenceNode setOutput(String output) { + return new ReferenceNode(name, arguments.expressions(), output); + } + + /** Returns an empty list as this has no children */ + @Override + public List children() { return arguments.expressions(); } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + if (path == null) + path = new ArrayDeque<>(); + String myName = this.name; + String myOutput = this.output; + List myArguments = this.arguments.expressions(); + + String resolvedArgument = context.getBinding(myName); + if (resolvedArgument != null && this.arguments.expressions().size() == 0 && myOutput == null) { + // Replace this whole node with the value of the argument value that it maps to + myName = resolvedArgument; + myArguments = null; + myOutput = null; + } else if (context.getFunction(myName) != null) { + // Replace this whole node with a reference to another script. + ExpressionFunction function = context.getFunction(myName); + if (function != null && myArguments != null && function.arguments().size() == myArguments.size() && myOutput == null) { + String myPath = name + this.arguments.expressions(); + if (path.contains(myPath)) { + throw new IllegalStateException("Cycle in ranking expression function: " + path); + } + path.addLast(myPath); + ExpressionFunction.Instance instance = function.expand(context, myArguments, path); + path.removeLast(); + context.addFunctionSerialization(RankingExpression.propertyName(instance.getName()), instance.getExpressionString()); + myName = "rankingExpression(" + instance.getName() + ")"; + myArguments = null; + myOutput = null; + } + } + // Always print the same way, the magic is already done. + StringBuilder ret = new StringBuilder(myName); + if (myArguments != null && myArguments.size() > 0) { + ret.append("("); + for (int i = 0; i < myArguments.size(); ++i) { + ret.append(myArguments.get(i).toString(context, path, this)); + if (i < myArguments.size() - 1) { + ret.append(","); + } + } + ret.append(")"); + } + ret.append(myOutput != null ? "." + myOutput : ""); + return ret.toString(); + } + + @Override + public Value evaluate(Context context) { + if (arguments.expressions().size()==0 && output==null) + return context.get(name); + return context.get(name, arguments, output); + } + + @Override + public CompositeNode setChildren(List newChildren) { + return new ReferenceNode(name, newChildren, output); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java new file mode 100644 index 00000000000..8ea0a886b65 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SerializationContext.java @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Context needed to serialize an expression to a string. This has the lifetime of a single serialization + * + * @author bratseth + */ +public class SerializationContext { + + /** Expression functions indexed by name */ + private final ImmutableMap functions; + + /** A cache of already serialized expressions indexed by name */ + private final Map serializedFunctions; + + /** Mapping from argument names to the expressions they resolve to */ + public final Map bindings = new HashMap<>(); + + /** Create a context for a single serialization task */ + public SerializationContext() { + this(Collections.emptyList()); + } + + /** Create a context for a single serialization task */ + public SerializationContext(Collection functions) { + this(functions, Collections.emptyMap(), new LinkedHashMap<>()); + } + + /** Create a context for a single serialization task */ + public SerializationContext(Map functions) { + this(functions.values()); + } + + /** Create a context for a single serialization task */ + public SerializationContext(List functions, Map bindings) { + this(functions, bindings, new LinkedHashMap<>()); + } + + /** + * Create a context for a single serialization task + * + * @param functions the functions of this + * @param bindings the arguments of this + * @param serializedFunctions a cache of serializedFunctions - the ownership of this map + * is transferred to this and will be modified in it + */ + public SerializationContext(Collection functions, Map bindings, + Map serializedFunctions) { + this(toMap(functions), bindings, serializedFunctions); + } + + private static ImmutableMap toMap(Collection list) { + ImmutableMap.Builder mapBuilder = new ImmutableMap.Builder<>(); + for (ExpressionFunction function : list) + mapBuilder.put(function.getName(), function); + return mapBuilder.build(); + } + + /** + * Create a context for a single serialization task + * + * @param functions the functions of this + * @param bindings the arguments of this + * @param serializedFunctions a cache of serializedFunctions - the ownership of this map + * is transferred to this and will be modified in it + */ + public SerializationContext(ImmutableMap functions, Map bindings, + Map serializedFunctions) { + this.functions = functions; + this.serializedFunctions = serializedFunctions; + if (bindings != null) + this.bindings.putAll(bindings); + } + + /** + * Returns a function or null if it isn't defined in this context + */ + public ExpressionFunction getFunction(String name) { return functions.get(name); } + + /** Adds the serialization of a function */ + public void addFunctionSerialization(String name, String expressionString) { + serializedFunctions.put(name, expressionString); + } + + /** Returns the existing serialization of a function, or null if none */ + public String getFunctionSerialization(String name) { + return serializedFunctions.get(name); + } + + /** + * Returns the resolution of an argument, or null if it isn't defined in this context + */ + public String getBinding(String name) { return bindings.get(name); } + + /** + * Returns a new context which shares the functions and serialized function map with this but has different + * arguments. + */ + public SerializationContext createBinding(Map arguments) { + return new SerializationContext(this.functions, arguments, this.serializedFunctions); + } + + public Map serializedFunctions() { return serializedFunctions; } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java new file mode 100644 index 00000000000..bb3b028f696 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/SetMembershipNode.java @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.*; + +/** + * A node which returns true or false depending on a set membership test + * + * @author Jon Bratseth + * @since 5.1.21 + */ +public class SetMembershipNode extends BooleanNode { + + private final ExpressionNode testValue; + + private final ImmutableList setValues; + + public SetMembershipNode(ExpressionNode testValue, List setValues) { + this.testValue = testValue; + this.setValues = ImmutableList.copyOf(setValues); + } + + /** The value to check for membership in the set */ + public ExpressionNode getTestValue() { return testValue; } + + /** Returns an immutable list of the values of the set */ + public List getSetValues() { return setValues; } + + @Override + public List children() { + ArrayList children = new ArrayList<>(); + children.add(testValue); + children.addAll(setValues); + return children; + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + StringBuilder b = new StringBuilder(testValue.toString(context, path, this)); + b.append(" in ["); + for (int i = 0, len = setValues.size(); i < len; ++i) { + b.append(setValues.get(i).toString(context, path, this)); + if (i < len - 1) { + b.append(", "); + } + } + b.append("]"); + return b.toString(); + } + + @Override + public Value evaluate(Context context) { + Value value = testValue.evaluate(context); + for (ExpressionNode setValue : setValues) { + if (setValue.evaluate(context).equals(value)) + return new BooleanValue(true); + } + return new BooleanValue(false); + } + + @Override + public SetMembershipNode setChildren(List children) { + if (children.size()<1) throw new IllegalArgumentException("A set membership test must have at least 1 child"); + return new SetMembershipNode(children.get(0), children.subList(1, children.size())); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java new file mode 100644 index 00000000000..af309b3e8d8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorMatchNode.java @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; + +/** + * @author bratseth + */ + @Beta +public class TensorMatchNode extends CompositeNode { + + private final ExpressionNode left, right; + + public TensorMatchNode(ExpressionNode left, ExpressionNode right) { + this.left = left; + this.right = right; + } + + @Override + public List children() { + List children = new ArrayList<>(2); + children.add(left); + children.add(right); + return children; + } + + @Override + public CompositeNode setChildren(List children) { + if ( children.size() != 2) + throw new IllegalArgumentException("A match product must have two children"); + return new TensorMatchNode(children.get(0), children.get(1)); + + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "match(" + left.toString(context, path, parent) + ", " + right.toString(context, path, parent) + ")"; + } + + @Override + public Value evaluate(Context context) { + return asTensor(left.evaluate(context)).match(asTensor(right.evaluate(context))); + } + + private TensorValue asTensor(Value value) { + if ( ! (value instanceof TensorValue)) + throw new IllegalArgumentException("Attempted to take the tensor product with an argument which is " + + "not a tensor: " + value); + return (TensorValue)value; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java new file mode 100644 index 00000000000..a1f83157e20 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TensorSumNode.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import com.google.common.annotations.Beta; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; + +import java.util.Collections; +import java.util.Deque; +import java.util.List; +import java.util.Optional; + +/** + * A node which sums over all cells in the argument tensor + * + * @author bratseth + */ + @Beta +public class TensorSumNode extends CompositeNode { + + /** The tensor to sum */ + private final ExpressionNode argument; + + /** The dimension to sum over, or empty to sum all cells to a scalar */ + private final Optional dimension; + + public TensorSumNode(ExpressionNode argument, Optional dimension) { + this.argument = argument; + this.dimension = dimension; + } + + @Override + public List children() { + return Collections.singletonList(argument); + } + + @Override + public CompositeNode setChildren(List children) { + if (children.size() != 1) throw new IllegalArgumentException("A tensor sum node must have one tensor argument"); + return new TensorSumNode(children.get(0), dimension); + } + + @Override + public String toString(SerializationContext context, Deque path, CompositeNode parent) { + return "sum(" + + argument.toString(context, path, parent) + + ( dimension.isPresent() ? ", " + dimension.get() : "" ) + + ")"; + } + + @Override + public Value evaluate(Context context) { + Value argumentValue = argument.evaluate(context); + if ( ! ( argumentValue instanceof TensorValue)) + throw new IllegalArgumentException("Attempted to take the tensor sum of argument '" + argument + "', " + + "but this returns " + argumentValue + ", not a tensor"); + TensorValue tensorArgument = (TensorValue)argumentValue; + if (dimension.isPresent()) + return tensorArgument.sum(dimension.get()); + else + return tensorArgument.sum(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java new file mode 100644 index 00000000000..26e8b183c21 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/TruthOperator.java @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import java.io.Serializable; + +/** + * A mathematical operator + * + * @author Jon Bratseth + */ +public enum TruthOperator implements Serializable { + + SMALLER("<") { public boolean evaluate(double x, double y) { return x") { public boolean evaluate(double x, double y) { return x>y; } }, + LARGEREQUAL(">=") { public boolean evaluate(double x, double y) { return x>=y; } }; + + private final String operatorString; + + TruthOperator(String operatorString) { + this.operatorString=operatorString; + } + + /** Perform the truth operation on the input */ + public abstract boolean evaluate(double x, double y); + + public @Override String toString() { return operatorString; } + + public static TruthOperator fromString(String string) { + for (TruthOperator operator : values()) + if (operator.toString().equals(string)) + return operator; + throw new IllegalArgumentException("Illegal truth operator '" + string + "'"); + } + + private static boolean approxEqual(double x,double y) { + if (y < -1.0 || y > 1.0) { + x = Math.nextAfter(x/y, 1.0); + y = 1.0; + } else { + x = Math.nextAfter(x, y); + } + return x==y; + } + +} \ No newline at end of file diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java new file mode 100644 index 00000000000..d6a27aae0f8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/rule/package-info.java @@ -0,0 +1,7 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@PublicApi +@ExportPackage +package com.yahoo.searchlib.rankingexpression.rule; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java new file mode 100644 index 00000000000..bd9ad43f155 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencer.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Replaces "features" which found in the given constants by their constant value + * + * @author Jon Bratseth + */ +public class ConstantDereferencer extends ExpressionTransformer { + + /** The map of constants to dereference */ + private final Map constants; + + public ConstantDereferencer(Map constants) { + this.constants = constants; + } + + @Override + public ExpressionNode transform(ExpressionNode node) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node); + else if (node instanceof CompositeNode) + return transformChildren((CompositeNode)node); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode node) { + if (!node.getArguments().isEmpty()) + return transformArguments(node); + else + return transformConstantReference(node); + } + + private ExpressionNode transformArguments(ReferenceNode node) { + List arguments = node.getArguments().expressions(); + List transformedArguments = new ArrayList<>(arguments.size()); + for (ExpressionNode argument : arguments) + transformedArguments.add(transform(argument)); + return node.setArguments(transformedArguments); + } + + private ExpressionNode transformConstantReference(ReferenceNode node) { + Value value = constants.get(node.getName()); + if (value == null || (value instanceof TensorValue)) { + return node; // not a value constant reference + } + return new ConstantNode(value.freeze()); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java new file mode 100644 index 00000000000..d8995bd8752 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/ExpressionTransformer.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; + +import java.util.ArrayList; +import java.util.List; + +/** + * Superclass of expression transformers + * + * @author Jon Bratseth + */ +public abstract class ExpressionTransformer { + + public RankingExpression transform(RankingExpression expression) { + return new RankingExpression(expression.getName(), transform(expression.getRoot())); + } + + /** Transforms an expression node and returns the transformed node */ + public abstract ExpressionNode transform(ExpressionNode node); + + /** + * Utility method which calls transform on each child of the given node and return the resulting transformed + * composite + */ + protected CompositeNode transformChildren(CompositeNode node) { + List children = node.children(); + List transformedChildren = new ArrayList<>(children.size()); + for (ExpressionNode child : children) + transformedChildren.add(transform(child)); + return node.setChildren(transformedChildren); + } + + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java new file mode 100644 index 00000000000..5b5a06c99bf --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/Simplifier.java @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.searchlib.rankingexpression.evaluation.BooleanValue; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticOperator; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.EmbracedNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.IfNode; + +import java.util.ArrayList; +import java.util.List; + +/** + * Performs simple algebraic simplification of expressions + * + * @author Jon Bratseth + */ +public class Simplifier extends ExpressionTransformer { + + @Override + public ExpressionNode transform(ExpressionNode node) { + if (node instanceof CompositeNode) + node = transformChildren((CompositeNode) node); // depth first + if (node instanceof IfNode) + node = transformIf((IfNode) node); + if (node instanceof EmbracedNode && hasSingleUndividableChild((EmbracedNode)node)) + node = ((EmbracedNode)node).children().get(0); + if (node instanceof ArithmeticNode) + node = transformArithmetic((ArithmeticNode) node); + return node; + } + + private boolean hasSingleUndividableChild(EmbracedNode node) { + if (node.children().size() > 1) return false; + if (node.children().get(0) instanceof ArithmeticNode) return false; + return true; + } + + private ExpressionNode transformArithmetic(ArithmeticNode node) { + if (node.children().size() > 1) { + List children = new ArrayList<>(node.children()); + List operators = new ArrayList<>(node.operators()); + for (ArithmeticOperator operator : ArithmeticOperator.operatorsByPrecedence) + transform(operator, children, operators); + node = new ArithmeticNode(children, operators); + } + + if (isConstant(node)) + return new ConstantNode(node.evaluate(null)); + else if (allMultiplicationOrDivision(node) && hasZero(node)) // disregarding the /0 case + return new ConstantNode(new DoubleValue(0)); + else + return node; + } + + private void transform(ArithmeticOperator operator, List children, List operators) { + int i = 0; + while (i < children.size()-1) { + if ( ! operators.get(i).equals(operator)) { + i++; + continue; + } + + ExpressionNode child1 = children.get(i); + ExpressionNode child2 = children.get(i + 1); + if (isConstant(child1) && isConstant(child2) && hasPrecedence(operators, i)) { + Value evaluated = new ArithmeticNode(child1, operators.remove(i), child2).evaluate(null); + children.set(i, new ConstantNode(evaluated.freeze())); + children.remove(i+1); + } + else { // try the next index + i++; + } + } + } + + /** + * Returns true if the operator at i binds at least as strongly as the neighbouring operators on each side (if any). + * This check works because we simplify by decreasing precedence, so neighbours will either be single constant values + * or a more complex expression that can't be simplified and hence also prevents the simplification in question here. + */ + private boolean hasPrecedence(List operators, int i) { + if (i > 0 && operators.get(i-1).hasPrecedenceOver(operators.get(i))) return false; + if (i < operators.size()-1 && operators.get(i+1).hasPrecedenceOver(operators.get(i))) return false; + return true; + } + + private ExpressionNode transformIf(IfNode node) { + if ( ! isConstant(node.getCondition())) return node; + + if (((BooleanValue)node.getCondition().evaluate(null)).asBoolean()) + return node.getTrueExpression(); + else + return node.getFalseExpression(); + } + + private boolean allMultiplicationOrDivision(ArithmeticNode node) { + for (ArithmeticOperator o : node.operators()) + if (o == ArithmeticOperator.PLUS || o == ArithmeticOperator.MINUS) + return false; + return true; + } + + private boolean hasZero(ArithmeticNode node) { + for (ExpressionNode child : node.children()) { + if ( ! (child instanceof ConstantNode)) continue; + ConstantNode constant = (ConstantNode)child; + if ( ! constant.getValue().hasDouble()) return false; + if (constant.getValue().asDouble() == 0.0) + return true; + } + return false; + } + + private boolean isConstant(ExpressionNode node) { + if (node instanceof ConstantNode) return true; + if (node instanceof ReferenceNode) return false; + if ( ! (node instanceof CompositeNode)) return false; + for (ExpressionNode child : ((CompositeNode)node).children()) { + if ( ! isConstant(child)) return false; + } + return true; + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java new file mode 100644 index 00000000000..da4e4f64615 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/rankingexpression/transform/package-info.java @@ -0,0 +1,6 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.api.annotations.PublicApi; +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java new file mode 100755 index 00000000000..c147c3a33b8 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/TreeNetConverter.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet; + +import com.yahoo.searchlib.treenet.parser.TreeNetParser; + +import java.io.FileNotFoundException; +import java.io.FileReader; + +/** + * @author Simon Thoresen + */ +public class TreeNetConverter { + + /** + * Implements an application main function so that the converter can be used as a command-line tool. + * + * @param args List of arguments. + */ + public static void main(String[] args) { + if (args.length != 1) { + System.err.println("Usage: TreeNetConverter "); + System.exit(1); + } + try { + TreeNetParser parser = new TreeNetParser(new FileReader(args[0])); + System.out.println(parser.treeNet().toRankingExpression()); + } catch (FileNotFoundException e) { + System.err.println("Could not find file '" + args[0] + "'."); + System.exit(1); + } catch (Exception e) { + System.err.println("An error occured while parsing the content of file '" + args[0] + "': " + e); + System.exit(1); + } + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java new file mode 100644 index 00000000000..debffbdcf5b --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib.treenet; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java new file mode 100644 index 00000000000..f3244457c66 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib.treenet.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java new file mode 100755 index 00000000000..1855a8a5674 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/ComparisonCondition.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +import com.yahoo.java7compat.Util; + +/** + * Represents a condition which comparing two values + * + * @author Simon Thoresen + */ +public class ComparisonCondition extends Condition { + + private final double rhs; + + /** + * Constructs a new instance of this class. + * + * @param lhs The name of the feature to compare to a constant. + * @param rhs The constant to compare the feature with. + * @param ift The label to jump to if left < right. + * @param iff The label to jump to if left >= right; + */ + public ComparisonCondition(String lhs, double rhs, String ift, String iff) { + super(lhs, ift, iff); + this.rhs = rhs; + } + + /** + * Returns the constant to compare the feature with. + * + * @return The constant. + */ + public double getConstant() { return rhs; } + + @Override + public String conditionToRankingExpression() { + return "< " + Util.toJava7String(rhs); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java new file mode 100644 index 00000000000..4506f4970b0 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Condition.java @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +import java.util.Iterator; + +/** + * Represents a condition + * + * @author Jon Bratseth + */ +public abstract class Condition extends TreeNode { + + private final String leftValue; + private final String trueLabel; + private final String falseLabel; + + public Condition(String leftValue, String trueLabel, String falseLabel) { + this.leftValue = leftValue; + this.trueLabel = trueLabel; + this.falseLabel = falseLabel; + } + + /** Returns the name of the feature to compare to a constant. */ + public String getLeftValue() { return leftValue; } + + /** Return the label to jump to if this condition is true. */ + public String getTrueLabel() { return trueLabel; } + + /** Return the label to jump to if this condition is false. */ + public String getFalseLabel() { return falseLabel; } + + @Override + public final String toRankingExpression() { + StringBuilder b = new StringBuilder("if ("); + b.append(getLeftValue()); + b.append(" "); + b.append(conditionToRankingExpression()); + b.append(", "); + b.append(getParent().getNodes().get(getTrueLabel()).toRankingExpression()); + b.append(", "); + b.append(getParent().getNodes().get(getFalseLabel()).toRankingExpression()); + b.append(")"); + return b.toString(); + } + + /** + * Returns the ranking expression string for the condition part of this condition, i.e the ... part of + *
+     *     if(leftValue ..., trueExpression, falseExpression)
+     * 
+ */ + protected abstract String conditionToRankingExpression(); + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java new file mode 100755 index 00000000000..347dd84f419 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Response.java @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +/** + * @author Simon Thoresen + */ +public class Response extends TreeNode { + + // The id of the next tree to run after this. + private final Double value; + + // The value of this response. + private final String next; + + /** + * Constructs a new response. + * + * @param next The id of the next tree to run after this. + * @param value The value of this response. + */ + public Response(Double value, String next) { + super(); + this.value = value; + this.next = next; + } + + /** + * Returns the value of this response. + */ + public Double getValue() { + return value; + } + + /** + * Returns the id of the next tree to run after this. + */ + public String getNext() { + return next; + } + + @Override + public String toRankingExpression() { + return value.toString(); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java new file mode 100755 index 00000000000..95841bf829f --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/SetMembershipCondition.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +import com.yahoo.java7compat.Util; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * Represents a set membership test on the form feature IN (integer1, integer2 ...) + * + * @author Jon Bratseth + * @since 5.1.21 + */ +public class SetMembershipCondition extends Condition { + + private final List setValues; + + /** + * Constructs a new instance of this class. + * + * @param testValue the name of the feature to test + * @param setValues the set of values to compare to + * @param trueLabel the label to jump to if the value is in the set + * @param falseLabel the label to jumt to if the value is not in the set + */ + public SetMembershipCondition(String testValue, List setValues, String trueLabel, String falseLabel) { + super(testValue, trueLabel, falseLabel); + this.setValues = Collections.unmodifiableList(new ArrayList<>(setValues)); + } + + /** Returns the unmodifiable set of values to check */ + public List getSetValues() { return setValues; } + + @Override + protected String conditionToRankingExpression() { + StringBuilder b = new StringBuilder("in ["); + for (Iterator i = setValues.iterator(); i.hasNext(); ) { + Object value = i.next(); + if (value instanceof String) + b.append("\"").append(value).append("\""); + else if (value instanceof Integer) + b.append(value); + else + throw new RuntimeException("Excepted a string or integer in a set membership test, not a " + + value.getClass() + ": " + value); + + if (i.hasNext()) + b.append(","); + } + b.append("]"); + return b.toString(); + } + +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java new file mode 100755 index 00000000000..2a7191baeba --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/Tree.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +import java.util.Map; + +/** + * @author Simon Thoresen + */ +public class Tree { + + private final String name; + + // The parent tree net of this. + private TreeNet parent; + + // Returns the id of the next tree to run after this. + private String next; + + // The initial response value of this tree, may be null. + private final Double value; + + // The id of the first condition or response to run in this tree. + private final String begin; + + // All named nodes of this tree. + private final Map nodes; + + /** + * Constructs a new tree. + * + * @param name The name of this tree, used for error outputs. + * @param value The initial response value of this tree, may be null. + * @param begin The id of the first condition or response to run in this tree. + * @param nodes All named nodes of this tree. + */ + public Tree(String name, Double value, String begin, Map nodes) { + this.name = name; + this.value = value; + this.begin = begin; + this.nodes = nodes; + + this.next = null; + for (TreeNode node : this.nodes.values()) { + node.setParent(this); + if (node instanceof Response) { + String next = ((Response)node).getNext(); + if (this.next == null) { + this.next = next; + } else if (!this.next.equals(next)) { + throw new IllegalStateException("Not all child nodes of tree '" + name + "' agree on the next " + + "tree to run. Initial name was '" + this.next + "', conflicting " + + "name is '" + next + "'."); + } + } + } + } + + public String getName() { return name; } + + /** + * Returns the parent tree net of this. + */ + public TreeNet getParent() { return parent; } + + /** + * Sets the parent tree net of this. + * + * @param parent The parent tree net. + * @return This, to allow chaining. + */ + public Tree setParent(TreeNet parent) { + this.parent = parent; + return this; + } + + /** + * Returns the id of the next tree to run after this. + */ + public String getNext() { + return next; + } + + /** + * Returns the initial response value of this tree, may be null. + */ + public Double getValue() { + return value; + } + + /** + * Returns the id of the first condition or response to run in this tree. + */ + public String getBegin() { + return begin; + } + + /** + * Returns all named nodes of this tree. + */ + public Map getNodes() { + return nodes; + } + + /** + * Returns a ranking expression equivalent of this tree. + */ + public String toRankingExpression() { + return nodes.get(begin).toRankingExpression(); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java new file mode 100755 index 00000000000..1db13b6c12e --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNet.java @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +import java.util.Map; + +/** + * @author Simon Thoresen + */ +public class TreeNet { + + // The id of the first tree to run in this net. + private String begin; + + // All named trees of this net. + private final Map trees; + + /** + * Constructs a new tree net. + * + * @param begin The id of the first tree to run in this net. + * @param trees All named trees of this net. + */ + public TreeNet(String begin, Map trees) { + this.begin = begin; + this.trees = trees; + for (Tree tree : this.trees.values()) { + tree.setParent(this); + } + } + + /** + * Returns the id of the first tree to run in this net. + */ + public String getBegin() { + return begin; + } + + /** + * Returns all named trees of this net. + */ + public Map getTrees() { + return trees; + } + + /** + * Returns a ranking expression equivalent of this net. + */ + public String toRankingExpression() { + StringBuilder ret = new StringBuilder(); + String next = begin; + while (next != null) { + Tree tree = trees.get(next); + if (tree.getBegin() != null) { + if (ret.length() > 0) { + ret.append(" + \n"); + } + ret.append(tree.toRankingExpression()); + } + next = tree.getNext(); + } + return ret.toString(); + } +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java new file mode 100755 index 00000000000..a637adafc73 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/TreeNode.java @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet.rule; + +/** + * @author Simon Thoresen + */ +public abstract class TreeNode { + + // The parent tree of this. + private Tree parent = null; + + /** + * Returns the parent tree of this. + */ + public Tree getParent() { + return parent; + } + + /** + * Sets the parent tree net of this. + * + * @param parent The parent tree net. + * @return This, to allow chaining. + */ + public TreeNode setParent(Tree parent) { + this.parent = parent; + return this; + } + + /** + * Returns a ranking expression equivalent of this net. + */ + public abstract String toRankingExpression(); +} diff --git a/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java new file mode 100644 index 00000000000..aae05b07627 --- /dev/null +++ b/searchlib/src/main/java/com/yahoo/searchlib/treenet/rule/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.searchlib.treenet.rule; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/searchlib/src/main/javacc/RankingExpressionParser.jj b/searchlib/src/main/javacc/RankingExpressionParser.jj new file mode 100755 index 00000000000..40dc31f13ae --- /dev/null +++ b/searchlib/src/main/javacc/RankingExpressionParser.jj @@ -0,0 +1,479 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * When this file is changed, do "mvn generate-sources" to rebuild the parser. + * + * @author bratseth + */ +options { + CACHE_TOKENS = true; + STATIC = false; + DEBUG_PARSER = false; + USER_TOKEN_MANAGER = false; + ERROR_REPORTING = true; + USER_CHAR_STREAM = false; +} + +PARSER_BEGIN(RankingExpressionParser) + +package com.yahoo.searchlib.rankingexpression.parser; + +import com.yahoo.searchlib.rankingexpression.rule.*; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.evaluation.StringValue; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.tensor.MapTensor; +import com.yahoo.tensor.TensorAddress; +import java.util.Collections; +import java.util.Map; +import java.util.LinkedHashMap; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class RankingExpressionParser { + +} + +PARSER_END(RankingExpressionParser) + +SKIP : +{ + <[" ","\n","\r","\t"]> +} + +TOKEN : +{ + (["l","L"])? | (["l","L"])? | (["l","L"])?> | + <#DECIMAL: ["1"-"9"] (["0"-"9"])*> | + <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> | + <#OCTAL: "0" (["0"-"7"])*> | + )? (["f","F","d","D"])?> | + <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> +} + +TOKEN : +{ + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + ="> | + "> | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + +} + +// Declare a special skip token for comments. +SPECIAL_TOKEN : +{ + +} + +List featureList() : +{ + List ret = new ArrayList(); + ReferenceNode exp; +} +{ + ( ( exp = feature() { ret.add(exp); } )+ ) + { return ret; } +} + +ExpressionNode rankingExpression() : +{ + ExpressionNode ret; +} +{ + ( ret = expression() ) + { return ret; } +} + +ExpressionNode expression() : +{ + ExpressionNode left, right; + List rightList; + TruthOperator comparatorOp; +} +{ + ( left = arithmeticExpression() + ( + ( comparatorOp = comparator() right = arithmeticExpression() { left = new ComparisonNode(left, comparatorOp, right); } ) | + ( rightList = expressionList() { left = new SetMembershipNode(left, rightList); } ) + ) * + ) + { return left; } +} + +ExpressionNode arithmeticExpression() : +{ + ExpressionNode left, right = null; + ArithmeticOperator arithmeticOp; +} +{ + ( left = value() + ( arithmeticOp = arithmetic() right = value() { left = ArithmeticNode.resolve(left, arithmeticOp, right); } ) * + ) + { return left; } +} + +ArithmeticOperator arithmetic() : { } +{ + ( { return ArithmeticOperator.PLUS; } | + { return ArithmeticOperator.MINUS; } | +
{ return ArithmeticOperator.DIVIDE; } | + { return ArithmeticOperator.MULTIPLY; } ) + { return null; } +} + +TruthOperator comparator() : { } +{ + ( { return TruthOperator.SMALLEREQUAL; } | + { return TruthOperator.SMALLER; } | + { return TruthOperator.EQUAL; } | + { return TruthOperator.APPROX_EQUAL; } | + { return TruthOperator.LARGEREQUAL; } | + { return TruthOperator.LARGER; } ) + { return null; } +} + +ExpressionNode value() : +{ + ExpressionNode ret; + boolean neg = false; +} +{ + ( [ LOOKAHEAD(2) { neg = true; } ] + ( ret = constantPrimitive() | + ret = constantTensor() | + LOOKAHEAD(2) ret = ifExpression() | + LOOKAHEAD(2) ret = function() | + ret = feature() | + ret = queryFeature() | + ( ret = expression() { ret = new EmbracedNode(ret); } ) ) ) + { return neg ? new NegativeNode(ret) : ret; } +} + +IfNode ifExpression() : +{ + ExpressionNode condition, ifTrue, ifFalse; + Double trueProbability = null; +} +{ + ( ( condition = expression() ) + ifTrue = expression() ifFalse = expression() ( trueProbability = number() )? ) + { + return new IfNode(condition, ifTrue, ifFalse, trueProbability); + } +} + +ReferenceNode queryFeature() : +{ + String name; +} +{ + ( name = identifier() ) + { return new ReferenceNode("query", Arrays.asList((ExpressionNode)new NameNode(name)), null); } +} + +ReferenceNode feature() : +{ + List args = null; + String name, out = null; +} +{ + ( name = identifier() [ args = args() ] [ out = outs() ] ) + { return new ReferenceNode(name, args, out); } +} + +String outs() : +{ + StringBuilder ret = new StringBuilder(); + String str; +} +{ + ( str = out() { ret.append(str); } + ( { ret.append(token.image); } + str = out() { ret.append(str); } )* ) + { return ret.toString(); } +} + +String out() : +{ + Function fnc; + String name; +} +{ + ( { return token.image; } | + { return token.image; } | + name = identifier() { return name; } ) + { return null; } +} + +List args() : +{ + List arguments = new ArrayList(); + ExpressionNode argument; +} +{ + ( argument = arg() { arguments.add(argument); } ( argument = arg() { arguments.add(argument); } )* ) + { return arguments; } +} + +// TODO: Replace use of this for macro arguments with value() +// For that to work with the current search execution framework +// we need to generate another macro for the argument such that we can replace +// instances of the argument with the reference to that macro in the same way +// as we replace by constants/names today (this can make for some fun combinatorial explosion). +// Simon also points out that we should stop doing macro expansion in the toString of a macro. +// - Jon 2014-05-02 +ExpressionNode arg() : +{ + ExpressionNode ret; + String name; + Function fnc; +} +{ + ( ret = constantPrimitive() | + ret = constantTensor() | + LOOKAHEAD(2) ret = feature() | + name = identifier() { ret = new NameNode(name); } ) + { return ret; } +} + +ExpressionNode function() : +{ + ExpressionNode function; +} +{ + ( function = scalarFunction() | function = tensorFunction() ) + { return function; } +} + +FunctionNode scalarFunction() : +{ + Function function; + ExpressionNode arg1, arg2; +} +{ + ( + ( function = unaryFunctionName() arg1 = expression() ) + { return new FunctionNode(function, arg1); } + ) | + ( + ( function = binaryFunctionName() arg1 = expression() arg2 = expression() ) + { return new FunctionNode(function, arg1, arg2); } + ) +} + +ExpressionNode tensorFunction() : +{ + ExpressionNode tensor1, tensor2; + String dimension = null; + TensorAddress address = null; +} +{ + ( + tensor1 = expression() ( dimension = identifier() )? + { return new TensorSumNode(tensor1, Optional.ofNullable(dimension)); } + ) | + ( + tensor1 = expression() tensor2 = expression() + { return new TensorMatchNode(tensor1, tensor2); } + ) +} + +// This is needed not to parse tensor functions but for the "reserved names as literals" workaround cludge +String tensorFunctionName() : +{ +} +{ + ( | ) + { return token.image; } +} + +Function unaryFunctionName() : { } +{ + { return Function.cos; } | + { return Function.sin; } | + { return Function.tan; } | + { return Function.cosh; } | + { return Function.sinh; } | + { return Function.tanh; } | + { return Function.acos; } | + { return Function.asin; } | + { return Function.atan; } | + { return Function.exp; } | + { return Function.log10; } | + { return Function.log; } | + { return Function.sqrt; } | + { return Function.ceil; } | + { return Function.fabs; } | + { return Function.floor; } | + { return Function.isNan; } +} + +Function binaryFunctionName() : { } +{ + { return Function.atan2; } | + { return Function.ldexp; } | + { return Function.pow; } | + { return Function.fmod; } | + { return Function.min; } | + { return Function.max; } +} + +List expressionList() : +{ + List list = new ArrayList(); + ExpressionNode expression; +} +{ + + expression=expression() { list.add(expression); } + ( LOOKAHEAD(2) expression=expression() { list.add(expression); } ) * + + { return list; } +} + +double number() : +{ + String sign = ""; +} +{ + ( { sign = "-";} )? ( | ) + { return Double.parseDouble(sign + token.image); } +} + +String identifier() : +{ + String name; + Function func; +} +{ + name = tensorFunctionName() { return name; } | + func = unaryFunctionName() { return func.toString(); } | + func = binaryFunctionName() { return func.toString(); } | + { return token.image; } | + { return token.image; } | + { return token.image; } +} + +// An identifier or integer +String tag() : +{ + String name; +} +{ + name = identifier() { return name; } | + { return token.image; } +} + +ConstantNode constantPrimitive() : +{ + String sign = ""; +} +{ + ( { sign = "-";} ) ? + ( | | ) + { return new ConstantNode(Value.parse(sign + token.image),sign + token.image); } +} + +Value primitiveValue() : +{ + String sign = ""; +} +{ + ( { sign = "-";} ) ? + ( | | ) + { return Value.parse(sign + token.image); } +} + +ConstantNode constantTensor() : +{ + Value constantValue; +} +{ + constantValue = tensorContent() + { return new ConstantNode(constantValue); } +} + +TensorValue tensorContent() : +{ + Map cells = new LinkedHashMap(); + TensorAddress address; + Double value; +} +{ + ( address = tensorAddress() value = number() { cells.put(address, value); } ) ? + ( address = tensorAddress() value = number() { cells.put(address, value); } ) * + { return new TensorValue(new MapTensor(cells)); } +} + +TensorAddress tensorAddress() : +{ + List elements = new ArrayList(); + String dimension; + String label; +} +{ + + ( dimension = tag() label = label() { elements.add(new TensorAddress.Element(dimension, label)); } ) ? + ( dimension = tag() label = label() { elements.add(new TensorAddress.Element(dimension, label)); } ) * + + { return TensorAddress.fromUnsorted(elements); } +} + +String label() : +{ + String label; + +} +{ + ( label = tag() | + ( "-" { label = "-"; } ) ) + { return label; } +} + diff --git a/searchlib/src/main/javacc/TreeNetParser.jj b/searchlib/src/main/javacc/TreeNetParser.jj new file mode 100755 index 00000000000..db160c094ca --- /dev/null +++ b/searchlib/src/main/javacc/TreeNetParser.jj @@ -0,0 +1,362 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * A best-effort treenet parser. + * + * @author Simon Thoresen + * @version $Id: TreeNetParser.jj,v 1.1 2009-02-24 10:06:32 arnej Exp $ + */ +options { + CACHE_TOKENS = true; + STATIC = false; + DEBUG_PARSER = false; + IGNORE_CASE = true; + + // Flip for higher performance + ERROR_REPORTING = true; +} + +PARSER_BEGIN(TreeNetParser) + +package com.yahoo.searchlib.treenet.parser; + +import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser; +import com.yahoo.searchlib.treenet.rule.*; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TreeNetParser { + + void verifyCategoricalVar(String expected, String actual) throws ParseException { + if (!expected.equals(actual)) { + throw new ParseException("Expected variable '" + expected + "', got '" + actual + "'."); + } + } + + ComparisonCondition resolveCategoricalCondition(String var, Integer valA, Integer valB, String lblA, String lblB) { + if (valA < valB) + return new ComparisonCondition(var, valA + (valB - valA) / 2.0, lblA, lblB); + else + return new ComparisonCondition(var, valB + (valA - valB) / 2.0, lblB, lblA); + } + +} + +PARSER_END(TreeNetParser) + +SKIP : +{ + <[" ","\r","\t"]> | + <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | + <"#" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | + <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/"> +} + +TOKEN : +{ + (["l","L"])? | (["l","L"])? | (["l","L"])?> | + <#DECIMAL: ["1"-"9"] (["0"-"9"])*> | + <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> | + <#OCTAL: "0" (["0"-"7"])*> | + )? (["f","F","d","D"])? | "." (["0"-"9"])+ + ()? (["f","F","d","D"])? | (["0"-"9"])+ (["f","F","d","D"])? | (["0"-"9"])+ + ()? ["f","F","d","D"]> | + <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> | + +} + +TOKEN : +{ + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + | + +} + +TreeNet treeNet() : +{ + String begin, label; + Tree tree; + Map trees = new HashMap(); +} +{ + ( ( ignoredCpp() )* + nl() + { begin = token.image; } eol() + eol() + eol() + + ( tree = tree() { trees.put(tree.getName(), tree); } )* + + + ( ignoredCpp() )* + + ) + { return new TreeNet(begin, trees); } +} + +/** C++ code outside the model which we can ignore */ +void ignoredCpp() : +{ +} +{ + | | | | | | | | | | | | +} + +Tree tree() : +{ + String name; + String begin = null, label; + Double value = null; + + TreeNode node; + Map nodes = new HashMap(); +} +{ + ( + ( + ( value = tnScore() name = label() ) | + ( name = label() value = tnScore() ) + ) + ( + LOOKAHEAD(label() (condition() | response())) + label = label() { if (begin == null) { begin = label; } } + ( node = condition() { nodes.put(label, node); } | + node = response() { nodes.put(label, node); } ) )* ) + { return new Tree(name, value, begin, nodes); } +} + +Double tnScore() : +{ + Double value = null; +} +{ + ( value = floatVal() | ( ) ) eol() + { return value; } +} + +Condition condition() : +{ + String var; + Condition ret; +} +{ + ( var = feature() ( ret = continuousCondition(var) | + LOOKAHEAD(8) ret = singleValueCategoricalCondition(var) | + ret = setMembershipCondition(var) ) ) + { return ret; } +} + +ComparisonCondition continuousCondition(String left) : +{ + Double right; + String ift, iff; +} +{ + ( right = floatVal() ift = jump() eol() + iff = jump() eol()) + { return new ComparisonCondition(left, right, ift, iff); } +} + +// Handle single-value IN expression as a regular comparison. +// This special case may be removed when IN support is implemented in ranking expressions in both C++ and Java +ComparisonCondition singleValueCategoricalCondition(String varA) : +{ + Integer valA, valB; + String lblA, lblB, varB; +} +{ + ( valA = intVal() nl() lblA = jump() eol() + varB = feature() { verifyCategoricalVar(varA, varB); } + valB = intVal() nl() lblB = jump() eol() ) + { return resolveCategoricalCondition(varA, valA, valB, lblA, lblB); } +} + +SetMembershipCondition setMembershipCondition(String testValue) : +{ + List setValues; + String trueLabel, falseLabel; +} +{ + ( setValues = valueList() trueLabel = jump() eol() + falseLabel = jump() eol() ) + { return new SetMembershipCondition(testValue, setValues, trueLabel, falseLabel); } +} + +Response response() : +{ + Double val; + String lbl; +} +{ + ( val = floatVal() eol() + lbl = jump() eol() ) + { return new Response(val, lbl); } +} + +String feature() : +{ + String name; + String arguments = null; + String output = null; +} +{ + ( name = identifier() [ arguments = featureArguments() ] [ output = featureOutputs() ] ) + { return name + (arguments != null ? "(" + arguments + ")" : "") + (output !=null ? "." + output : ""); } +} + +String featureArguments() : +{ + String argument; + StringBuilder arguments = new StringBuilder(); +} +{ + ( argument = featureArgument() { arguments.append(argument); } + ( argument = featureArgument() { arguments.append(",").append(argument); } )* ) + { return arguments.toString(); } +} + +String featureArgument() : +{ + String argument; +} +{ + ( argument = string() | argument = floatImage() | argument = feature() ) + { return argument; } +} + +String featureOutputs() : +{ + StringBuilder outputs = new StringBuilder(); + String output; +} +{ + output = featureOutput() { outputs.append(output); } + ( output = featureOutput() { outputs.append(output); } ) * + { return outputs.toString(); } +} + +String featureOutput() : +{ + String name; +} +{ + { return token.image; } | + { return token.image; } | + name = identifier() { return name; } +} + +String label() : +{ + String ret; +} +{ + ( ret = identifier() nl() ) + { return ret; } +} + +void eol() : { } +{ + nl() +} + +void nl() : { } +{ + ( )+ +} + +String jump() : { } +{ + { return token.image; } +} + +String identifier() : { } +{ + ( /* | + | + | + | + | + | + | + | + | + | + |*/ + ) + { return token.image; } +} + +String spaceSeparatedIdentifiers() : +{ + StringBuilder identifiers = new StringBuilder(); + String identifier; +} +{ + identifier = identifier() { identifiers.append(identifier); } + ( identifier = identifier() { identifiers.append(identifier); } ) * + { return identifiers.toString(); } +} + +List valueList() : +{ + List values = new ArrayList(); + Object value; +} +{ + value = value() { values.add(value); } + ( value = value() { values.add(value); } ) * + { return values; } +} + +Object value() : +{ + Object value; +} +{ + ( value = spaceSeparatedIdentifiers() | value = intVal() | value = string() ) + { return value; } +} + +String string() : { } +{ + { return token.image; } +} + +Integer intVal() : { } +{ + { return Integer.valueOf(token.image); } +} + +Double floatVal() : { } +{ + ( | ) { return Double.valueOf(token.image); } +} + +String floatImage() : { } +{ + ( | ) { return token.image; } +} diff --git a/searchlib/src/main/sh/evaluation-benchmark b/searchlib/src/main/sh/evaluation-benchmark new file mode 100755 index 00000000000..0c9afd83fbd --- /dev/null +++ b/searchlib/src/main/sh/evaluation-benchmark @@ -0,0 +1 @@ +java -cp "target/test-classes:target/searchlib.jar" com.yahoo.searchlib.rankingexpression.evaluation.EvaluationBenchmark $@ diff --git a/searchlib/src/main/sh/ga b/searchlib/src/main/sh/ga new file mode 100644 index 00000000000..f1e5b0981e9 --- /dev/null +++ b/searchlib/src/main/sh/ga @@ -0,0 +1,69 @@ +#! /bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# BEGIN environment bootstrap section +# Do not edit between here and END as this section should stay identical in all scripts + +findpath () { + myname=${0} + mypath=${myname%/*} + myname=${myname##*/} + if [ "$mypath" ] && [ -d "$mypath" ]; then + return + fi + mypath=$(pwd) + if [ -f "${mypath}/${myname}" ]; then + return + fi + echo "FATAL: Could not figure out the path where $myname lives from $0" + exit 1 +} + +COMMON_ENV=libexec/vespa/common-env.sh + +source_common_env () { + if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then + # ensure it ends with "/" : + VESPA_HOME=${VESPA_HOME%/}/ + export VESPA_HOME + common_env=$VESPA_HOME/$COMMON_ENV + if [ -f "$common_env" ]; then + . $common_env + return + fi + fi + return 1 +} + +findroot () { + source_common_env && return + if [ "$VESPA_HOME" ]; then + echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'" + exit 1 + fi + if [ "$ROOT" ] && [ -d "$ROOT" ]; then + VESPA_HOME="$ROOT" + source_common_env && return + fi + findpath + while [ "$mypath" ]; do + VESPA_HOME=${mypath} + source_common_env && return + mypath=${mypath%/*} + done + echo "FATAL: missing VESPA_HOME environment variable" + echo "Could not locate $COMMON_ENV anywhere" + exit 1 +} + +findroot + +# END environment bootstrap section + +JAR=$VESPA_HOME/lib/jars/searchlib-deploy.jar +if [[ "$1" == *.jar ]]; then + JAR=$1 +fi +shift + +exec java -cp $JAR com.yahoo.searchlib.mlr.ga.Main "$@" diff --git a/searchlib/src/main/sh/gbdt-analysis b/searchlib/src/main/sh/gbdt-analysis new file mode 100755 index 00000000000..6ff9c98ef1f --- /dev/null +++ b/searchlib/src/main/sh/gbdt-analysis @@ -0,0 +1 @@ +java -cp target/searchlib.jar com.yahoo.searchlib.mlr.gbdt.ExpressionAnalysis $@ diff --git a/searchlib/src/main/sh/vespa-gbdt-converter b/searchlib/src/main/sh/vespa-gbdt-converter new file mode 100755 index 00000000000..aa1f79b1dc2 --- /dev/null +++ b/searchlib/src/main/sh/vespa-gbdt-converter @@ -0,0 +1,63 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# BEGIN environment bootstrap section +# Do not edit between here and END as this section should stay identical in all scripts + +findpath () { + myname=${0} + mypath=${myname%/*} + myname=${myname##*/} + if [ "$mypath" ] && [ -d "$mypath" ]; then + return + fi + mypath=$(pwd) + if [ -f "${mypath}/${myname}" ]; then + return + fi + echo "FATAL: Could not figure out the path where $myname lives from $0" + exit 1 +} + +COMMON_ENV=libexec/vespa/common-env.sh + +source_common_env () { + if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then + # ensure it ends with "/" : + VESPA_HOME=${VESPA_HOME%/}/ + export VESPA_HOME + common_env=$VESPA_HOME/$COMMON_ENV + if [ -f "$common_env" ]; then + . $common_env + return + fi + fi + return 1 +} + +findroot () { + source_common_env && return + if [ "$VESPA_HOME" ]; then + echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'" + exit 1 + fi + if [ "$ROOT" ] && [ -d "$ROOT" ]; then + VESPA_HOME="$ROOT" + source_common_env && return + fi + findpath + while [ "$mypath" ]; do + VESPA_HOME=${mypath} + source_common_env && return + mypath=${mypath%/*} + done + echo "FATAL: missing VESPA_HOME environment variable" + echo "Could not locate $COMMON_ENV anywhere" + exit 1 +} + +findroot + +# END environment bootstrap section + +exec java -cp $VESPA_HOME/lib/jars/searchlib.jar:$VESPA_HOME/lib/jars/document.jar:$VESPA_HOME/lib/jars/vespajlib.jar com.yahoo.searchlib.gbdt.GbdtConverter "$@" diff --git a/searchlib/src/main/sh/vespa-treenet-converter b/searchlib/src/main/sh/vespa-treenet-converter new file mode 100755 index 00000000000..a95d910c4b4 --- /dev/null +++ b/searchlib/src/main/sh/vespa-treenet-converter @@ -0,0 +1,63 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# BEGIN environment bootstrap section +# Do not edit between here and END as this section should stay identical in all scripts + +findpath () { + myname=${0} + mypath=${myname%/*} + myname=${myname##*/} + if [ "$mypath" ] && [ -d "$mypath" ]; then + return + fi + mypath=$(pwd) + if [ -f "${mypath}/${myname}" ]; then + return + fi + echo "FATAL: Could not figure out the path where $myname lives from $0" + exit 1 +} + +COMMON_ENV=libexec/vespa/common-env.sh + +source_common_env () { + if [ "$VESPA_HOME" ] && [ -d "$VESPA_HOME" ]; then + # ensure it ends with "/" : + VESPA_HOME=${VESPA_HOME%/}/ + export VESPA_HOME + common_env=$VESPA_HOME/$COMMON_ENV + if [ -f "$common_env" ]; then + . $common_env + return + fi + fi + return 1 +} + +findroot () { + source_common_env && return + if [ "$VESPA_HOME" ]; then + echo "FATAL: bad VESPA_HOME value '$VESPA_HOME'" + exit 1 + fi + if [ "$ROOT" ] && [ -d "$ROOT" ]; then + VESPA_HOME="$ROOT" + source_common_env && return + fi + findpath + while [ "$mypath" ]; do + VESPA_HOME=${mypath} + source_common_env && return + mypath=${mypath%/*} + done + echo "FATAL: missing VESPA_HOME environment variable" + echo "Could not locate $COMMON_ENV anywhere" + exit 1 +} + +findroot + +# END environment bootstrap section + +exec java -cp $VESPA_HOME/lib/jars/searchlib.jar:$VESPA_HOME/lib/jars/document.jar:$VESPA_HOME/lib/jars/vespajlib.jar com.yahoo.searchlib.treenet.TreeNetConverter "$@" diff --git a/searchlib/src/test/OWNERS b/searchlib/src/test/OWNERS new file mode 100644 index 00000000000..31af040f698 --- /dev/null +++ b/searchlib/src/test/OWNERS @@ -0,0 +1 @@ +bratseth diff --git a/searchlib/src/test/files/features01.expression b/searchlib/src/test/files/features01.expression new file mode 100644 index 00000000000..fbb43a77696 --- /dev/null +++ b/searchlib/src/test/files/features01.expression @@ -0,0 +1 @@ +attribute(foo).out \ No newline at end of file diff --git a/searchlib/src/test/files/features02.expression b/searchlib/src/test/files/features02.expression new file mode 100644 index 00000000000..0a58b4b10f2 --- /dev/null +++ b/searchlib/src/test/files/features02.expression @@ -0,0 +1 @@ +attribute(foo).out attribute ( bar ) . out \ No newline at end of file diff --git a/searchlib/src/test/files/features03.expression b/searchlib/src/test/files/features03.expression new file mode 100644 index 00000000000..12760619b04 --- /dev/null +++ b/searchlib/src/test/files/features03.expression @@ -0,0 +1,4 @@ +foo + bar + + baz diff --git a/searchlib/src/test/files/features04.expression b/searchlib/src/test/files/features04.expression new file mode 100644 index 00000000000..b8dea2e902c --- /dev/null +++ b/searchlib/src/test/files/features04.expression @@ -0,0 +1 @@ +attribute attribute(foo) attribute(foo).out attribute(bar).out.out \ No newline at end of file diff --git a/searchlib/src/test/files/gbdt.expression b/searchlib/src/test/files/gbdt.expression new file mode 100644 index 00000000000..b59d6052f5d --- /dev/null +++ b/searchlib/src/test/files/gbdt.expression @@ -0,0 +1,10 @@ +if (F55 < 2.0932798, if (F42 < 1.7252731, if (F33 < 0.5, if (F38 < 1.5367546, 1.7333333, 1.3255814), if (F37 < 0.675922, 1.9014085, 1.0)), if (F109 < 0.5, if (F116 < 5.25, if (F111 < 0.0521445, 1.0, 1.9090909), if (F38 < 4.0740733, 0.8, if (F38 < 6.6152048, 1.7142857, 0.625))), 1.5945946)), if (F109 < 0.5, if (F113 < 0.7835808, if (F110 < 491.0, if (F56 < 2.5423126, if (F108 < 243.5, 1.375, 0.78), 0.5), 2.0), if (F103 < 0.9918365, 1.6, 0.3333333)), if (F59 < 0.9207, if (F30 < 0.86, 1.5890411, 0.625), if (F100 < 5.9548216, 1.0, 0.0)))) + +if (F55 < 59.5480576, if (F42 < 1.8308522, if (F100 < 5.9549484, if (F107 < 0.5, -0.3406279, if (F56 < 1.7057916, if (F36 < 3.778285, if (F103 < 0.5600199, 0.047108, if (F36 < 1.2203553, if (F102 < 1.5, 0.0460316, -0.473794), -0.9825869)), -0.8848045), if (F47 < 15.5, 0.348047, -1.0890411))), 1.75), if (F113 < 0.8389627, if (F110 < 7.5, -0.5778378, if (F111 < 0.8596972, if (F114 < 831.5, if (F113 < 0.3807178, 0.0497646, if (F110 < 63.0, 0.6549377, 0.2486999)), if (F39 < 8.9685574, 0.3222195, -0.1690968)), 1.0381818)), if (F58 < 0.889763, -0.0702703, -1.6))), if (F102 < 3.5, -0.3059684, -1.5890411)) + +if (F55 < 119.6311035, if (F55 < 90.895813, if (F39 < 12.162282, if (F35 < 1.1213787, if (F55 < 34.9389648, if (F45 < 3.5, if (F51 < 0.0502058, if (F103 < 0.8550526, if (F55 < 4.96804, 0.048519, 0.6596588), if (F38 < 1.3808891, -0.7416763, 0.0176633)), 0.4502234), -0.6811898), 0.5572351), if (F100 < 3.3971992, if (F39 < 7.0869236, if (F43 < 5.5100875, if (F46 < 4.5, -0.1702421, -0.9797453), -1.5426025), 0.0774408), if (F52 < 22.3562355, if (F35 < 4.4263992, 0.4011598, -0.3898472), -1.75))), if (F39 < 14.5762558, if (F109 < 0.5, 1.6616928, 0.4001626), if (F100 < 3.0519419, 0.616491, -0.1808479))), -1.2135522), 0.5535716) + +if (F43 < 9.272151, if (F36 < 9.0613861, if (F115 < 36.5, if (F34 < 1.4407213, if (F41 < 10.4713802, if (F34 < 1.2610778, if (F105 < 8.2159586, if (F46 < 88.5, 0.0075843, -0.6358738), if (F105 < 9.5308332, 1.4464284, -0.0895592)), 0.3532708), -1.8289603), if (F45 < 24.5, if (F111 < 0.9095335, if (F113 < 0.0529755, -0.6272416, if (F50 < 34.2163391, if (F113 < 0.0813664, 0.3683843, if (F34 < 1.6283135, -0.6334628, -0.1610307)), 1.5559684)), -1.7492068), 1.5060212)), if (F49 < 23.5787125, if (F100 < 6.5115452, if (F37 < 0.8601408, if (F57 < 6.5, 0.0547747, 1.193346), 0.6402962), 1.7395205), 2.5559684)), -3.1016318), 1.8657542) + +if (F55 < 764.9404297, if (F34 < 23.2379246, if (F36 < 9.2296076, if (F114 < 116.0, if (F108 < 13.5, if (F108 < 12.5, -0.2736142, -1.7384173), if (F110 < 10.5, 0.0794336, -0.2171646)), if (F114 < 129.0, if (F109 < 0.5, 1.4407836, -0.1458547), if (F111 < 0.9703438, if (F47 < 18.5, if (F32 < 3.5, 0.0708936, if (F118 < 0.6794872, if (F119 < 3.8533711, if (F34 < 0.1213822, -2.0046196, -8.566E-4), -0.9490828), 0.0790339)), if (F113 < 0.3637481, 0.1161088, -0.9997786)), 1.3003114))), if (F111 < 0.2438112, -2.0582902, 0.6918949)), if (F115 < 95.0, -2.8602383, -0.0063699)), if (F101 < 0.9411763, -2.0253283, -0.6417007)) + +if (F114 < 516.0, if (F49 < 8.9197922, if (F48 < 3.5, if (F36 < 1.3889931, if (F43 < 0.9699799, if (F34 < 9.6113167, if (F106 < 8.5, if (F108 < 153.5, if (F110 < 130.5, 0.180242, 2.545163), if (F108 < 161.5, -2.2253985, if (F55 < 31.4965668, -0.0122572, 0.7364454))), -0.2596613), 0.7247348), if (F111 < 0.2817393, -0.6409092, 0.2100071)), if (F116 < 18.75, 0.511352, -0.1093323)), 0.9379161), 0.3603908), if (F46 < 32.5, if (F46 < 5.5, if (F39 < 11.7440758, if (F115 < 774.0, -0.0433343, -1.7439904), -0.3662575), 0.5413771), if (F110 < 67.0, if (F46 < 34.5, -2.6581287, -0.9399502), 0.075664))) + +if (F42 < 24.3080139, if (F118 < 0.8452381, if (F119 < 6.2847767, if (F100 < 3.2778931, if (F46 < 30.0, if (F43 < 1.2712233, if (F104 < 3.5, 0.1365837, 0.5592712), if (F39 < 0.6294491, -0.8729556, -0.0123421)), 3.7677864), if (F111 < 0.6580936, if (F103 < 0.9319581, -0.2822538, if (F107 < 1.5, -0.3983539, if (F104 < 5.5, 0.0792465, 0.7273864))), if (F104 < 3.5, -1.1550477, 0.0490706))), 1.4735778), if (F111 < 0.3724709, if (F51 < 16.0989189, if (F114 < 154.0, if (F108 < 57.5, -0.0675733, -0.3994327), -0.0250285), -1.4871782), if (F34 < 2.1943491, 0.0229469, if (F108 < 1527.0, 1.4706301, 0.0285333)))), 3.489949) + +if (F34 < 30.3465347, if (F103 < 0.9996098, if (F38 < 0.558669, if (F105 < 3.6287756, if (F104 < 3.5, if (F31 < 0.86, 0.1121421, 1.8153648), -0.8281607), if (F55 < 37.6819153, 0.9656266, 0.1585065)), if (F113 < 0.840385, if (F38 < 9.6623116, if (F46 < 136.0, if (F53 < 0.5548913, if (F38 < 8.4469957, if (F34 < 3.1969421, if (F114 < 20.0, -0.2944335, 0.03499), if (F34 < 3.4671984, -1.3154796, -0.1742507)), 0.4071658), if (F105 < 2.315434, if (F110 < 59.5, -0.1713032, -1.420465), -0.1456236)), 0.5520287), if (F108 < 12156.5, if (F111 < 0.3892631, -0.16285, -0.9015614), -2.6391831)), 0.2011691)), -3.073049), -3.2461861) + +if (F55 < 28.4668102, if (F34 < 0.4929269, if (F30 < 0.86, if (F37 < 0.8360082, -0.0815482, -0.7898247), -0.5144471), if (F108 < 20498.0, if (F44 < 1.1856511, if (F56 < 1.0706565, if (F39 < 8.377079, if (F59 < 0.5604, 0.0429508, if (F34 < 0.7287493, -1.0264078, 0.6052195)), -0.4814408), if (F119 < 3.7530813, if (F115 < 8.5, 0.4916013, 0.0457533), if (F114 < 1093.5, 1.1673864, 0.3411176))), -0.6176305), if (F100 < 3.151973, 2.6908011, 0.3835885))), if (F116 < 62.0, if (F114 < 562.0, -0.415543, if (F103 < 0.9826763, -0.1169933, if (F104 < 0.5, -0.0665763, 1.0238317))), if (F100 < 5.8046961, -3.2954836, 0.2781039))) + +if (F34 < 26.9548168, if (F35 < 18.4714928, if (F115 < 698.0, if (F116 < 41.5, if (F38 < 1.1138718, if (F46 < 9.0, if (F31 < 0.86, 0.1059075, -0.2995292), if (F46 < 25.5, if (F46 < 13.0, 0.6297316, 1.8451736), 0.2079161)), if (F38 < 19.3839836, if (F49 < 29.9797497, if (F46 < 235.5, if (F38 < 1.2626771, -0.5165347, if (F35 < 10.3027954, if (F50 < 0.2823648, -0.0424489, if (F113 < 0.0776736, 0.7495954, -0.2948665)), 0.3229146)), -1.0711968), 0.3153474), if (F116 < 5.2182379, 2.8017734, 0.3444192))), if (F113 < 0.5691726, 1.7530511, 0.3534861)), -2.4915219), if (F103 < 0.9680555, -2.1724317, 0.2143739)), 3.1712332) \ No newline at end of file diff --git a/searchlib/src/test/files/gbdt.ext.xml b/searchlib/src/test/files/gbdt.ext.xml new file mode 100644 index 00000000000..f466751eb35 --- /dev/null +++ b/searchlib/src/test/files/gbdt.ext.xml @@ -0,0 +1,284 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searchlib/src/test/files/gbdt.xml b/searchlib/src/test/files/gbdt.xml new file mode 100644 index 00000000000..76e64b129f3 --- /dev/null +++ b/searchlib/src/test/files/gbdt.xml @@ -0,0 +1,614 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searchlib/src/test/files/gbdt_empty_tree.xml b/searchlib/src/test/files/gbdt_empty_tree.xml new file mode 100644 index 00000000000..15bf46471b6 --- /dev/null +++ b/searchlib/src/test/files/gbdt_empty_tree.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searchlib/src/test/files/gbdt_err.xml b/searchlib/src/test/files/gbdt_err.xml new file mode 100644 index 00000000000..aa6103c8604 --- /dev/null +++ b/searchlib/src/test/files/gbdt_err.xml @@ -0,0 +1,3 @@ + + + diff --git a/searchlib/src/test/files/gbdt_set_inclusion_test.xml b/searchlib/src/test/files/gbdt_set_inclusion_test.xml new file mode 100644 index 00000000000..ad62c556c87 --- /dev/null +++ b/searchlib/src/test/files/gbdt_set_inclusion_test.xml @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searchlib/src/test/files/gbdt_tree_response.xml b/searchlib/src/test/files/gbdt_tree_response.xml new file mode 100644 index 00000000000..8bed53957d2 --- /dev/null +++ b/searchlib/src/test/files/gbdt_tree_response.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/searchlib/src/test/files/mlr/cases-illegal1.csv b/searchlib/src/test/files/mlr/cases-illegal1.csv new file mode 100644 index 00000000000..fe8dcf1ac8a --- /dev/null +++ b/searchlib/src/test/files/mlr/cases-illegal1.csv @@ -0,0 +1,5 @@ +# Argument missing value + + + +23, arg1:3, arg2: diff --git a/searchlib/src/test/files/mlr/cases-illegal2.csv b/searchlib/src/test/files/mlr/cases-illegal2.csv new file mode 100644 index 00000000000..0b755035b36 --- /dev/null +++ b/searchlib/src/test/files/mlr/cases-illegal2.csv @@ -0,0 +1,2 @@ +# Target isn't a number +5db,7 diff --git a/searchlib/src/test/files/mlr/cases-linear.csv b/searchlib/src/test/files/mlr/cases-linear.csv new file mode 100644 index 00000000000..dadc626ce18 --- /dev/null +++ b/searchlib/src/test/files/mlr/cases-linear.csv @@ -0,0 +1,7 @@ +# f(x)=x +0, x:0 +1, x:1 +2, x:2 +3, x:3 +4, x:4 +5, x:5 diff --git a/searchlib/src/test/files/mlr/cases.csv b/searchlib/src/test/files/mlr/cases.csv new file mode 100644 index 00000000000..2b9ea8bf7ff --- /dev/null +++ b/searchlib/src/test/files/mlr/cases.csv @@ -0,0 +1,6 @@ +# Comments are legal + +1, arg1:2, arg2:-1.3 +-1.003,arg1:500007 + +0, arg2:1.00 diff --git a/searchlib/src/test/files/ranking01.expression b/searchlib/src/test/files/ranking01.expression new file mode 100644 index 00000000000..4df9b580069 --- /dev/null +++ b/searchlib/src/test/files/ranking01.expression @@ -0,0 +1,10 @@ +if (attribute(b) < 0.65, if (attribute(c) < 0.55, if (attribute(a) < 0.55, 0.369863, -0.6578947), if (attribute(a) < 0.65, -0.775, -1.0)), if (attribute(c) < 0.45, -0.9090909, -1.0)) + +if (attribute(a) < 0.55, if (attribute(b) < 0.35, if (attribute(c) < 0.75, 0.4327977, 0.025), if (attribute(c) < 0.75, -0.1090028, -0.07682927)), if (attribute(c) < 0.55, -0.04031544, -0.01875)) + +if (attribute(b) < 0.35, 0.06336273, if (attribute(c) < 0.85, if (attribute(a) < 0.35, if (attribute(c) < 0.35, 0.6091127, 0.02845135), -0.07638131), if (attribute(a) < 0.75, -0.018862, 0.01875))) + +if (attribute(c) < 0.15, if (attribute(b) < 0.55, if (attribute(a) < 0.35, if (attribute(b) < 0.3, -1.866023, 0.1300271), 0.6299557), 0.1788445), if (attribute(b) < 0.65, -0.1586424, 0.06778581)) + +if (attribute(c) < 0.45, if (attribute(a) < 0.35, if (attribute(b) < 0.75, 0.1426054, -0.2282), if (attribute(b) < 0.85, -0.09571452, -0.04941978)), if (attribute(a) < 0.25, 0.2759441, 0.0172878)) + +if (attribute(a) < 0.15, if (attribute(b) < 0.75, 0.3165435, -0.04458321), if (attribute(a) < 0.55, -0.1137117, if (attribute(b) < 0.75, 0.04622166, if (attribute(c) < 0.65, 0.004746275, -0.03648972)))) + +if (attribute(a) < 0.95, if (attribute(b) < 0.25, if (attribute(c) < 0.25, 0.7623822, if (attribute(a) < 0.65, 0.2338952, if (attribute(c) < 0.85, -0.06132011, 0.05052024))), -0.04188744), -0.03245768) + +if (attribute(c) < 0.55, if (attribute(b) < 0.65, -0.2042442, 0.03887484), if (attribute(b) < 0.25, -0.0474437, if (attribute(a) < 0.15, -0.3700475, if (attribute(a) < 0.65, 0.07656199, 0.1085871)))) + +if (attribute(a) < 0.75, 0.0189638, if (attribute(b) < 0.85, 1.942833E-4, if (attribute(c) < 0.85, if (attribute(c) < 0.45, -0.009795157, if (attribute(a) < 0.85, -0.01795083, -0.01329222)), -0.1179778))) + +if (attribute(c) < 0.75, if (attribute(b) < 0.45, if (attribute(c) < 0.15, -0.4551494, if (attribute(c) < 0.65, 0.1471968, -0.06380587)), 0.03410008), if (attribute(b) < 0.65, -0.06397114, -0.01491517)) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking02.expression b/searchlib/src/test/files/ranking02.expression new file mode 100644 index 00000000000..4e80576c7e7 --- /dev/null +++ b/searchlib/src/test/files/ranking02.expression @@ -0,0 +1,90 @@ +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (match < 0.6113165021, -0.0284270267, 3.592783E-4), if (fieldMatch(text).significantOccurrence < 0.0488094985, if (attribute(user_friends_count) < 184.5, -0.0124428511, 0.0077143433), if (term(0).significance < 0.997767508, -0.0390395696, if (term(1).significance < 0.9895755053, 0.02259176, if (fieldMatch(text).significantOccurrence < 0.1335410029, -0.01671786, -0.0425634221))))), if (age(created_at) < 5400.0, -0.046690069, if (age(created_at) < 45000.0, -0.025709541, -0.012803042))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488905191, if (fieldMatch(text).absoluteOccurrence < 0.0116665, if (fieldMatch(text) < 0.8492144942, 0.0288744693, 0.0439309311), 0.0566558463), 0.0594293259), if (age(created_at) < 5400.0, -0.0184100055, if (age(created_at) < 27000.0, -3.458478E-4, 0.0087464789)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (term(0).significance < 0.9914690256, -0.0118607453, 0.0031225791), if (term(0).significance < 0.997767508, if (term(1).significance < 0.9883980155, -0.0803907557, if (attribute(yst_link_array_size) < 0.0250600018, -0.0303931857, 0.0259097321)), if (term(1).significance < 0.9972054958, 0.0065438125, if (term(1).significance < 0.9975290298, -0.0913176725, -0.0123125115)))), if (age(created_at) < 5400.0, -0.0448246506, if (age(created_at) < 23400.0, -0.0262210797, -0.0146461827))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488645315, if (fieldMatch(text) < 0.856990993, 0.0307982478, 0.0469020946), 0.0558564997), if (age(created_at) < 5400.0, -0.0166881751, if (fieldMatch(text) < 0.3820354939, -0.0035580609, if (age(created_at) < 30600.0, 0.0028319521, 0.0104819912))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 7.5, if (match < 0.6697604656, -0.0323866906, if (term(0).significance < 0.9955350161, -0.0025720554, -0.0170321274)), if (attribute(user_followers_count) < 489.5, if (term(0).significance < 0.9964904785, 0.0035465045, if (term(0).significance < 0.999284029, -0.0205069971, 0.0010003389)), 0.0149904595)), if (age(created_at) < 9000.0, if (age(created_at) < 3570.0, -0.0501614448, -0.0347695722), if (age(created_at) < 45000.0, -0.0217186612, -0.0115826893))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.127717495, if (fieldMatch(text) < 0.8662694693, 0.0270881826, 0.0422977189), 0.0471192106), if (age(created_at) < 5400.0, if (fieldMatch(text).importance < 0.666426003, -0.023211464, -0.0105863112), if (age(created_at) < 19800.0, -0.0011091805, 0.0079984015)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0342704034, if (attribute(user_statuses_count) < 574.0, -0.0031829638, if (fieldMatch(text).weightedOccurrence < 0.109127, if (fieldMatch(text) < 0.8472499847, -0.0193605912, -0.0038142662), -0.0308342022))), if (age(created_at) < 5400.0, -0.0418216807, if (age(created_at) < 48600.0, -0.0237625386, -0.0115288531))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7488905191, if (term(0).significance < 0.991820991, if (term(2).significance < 0.983879447, if (fieldLength(text) < 23.5, 0.0091275797, -0.0681415824), 0.0280728758), if (fieldTermMatch(text,0).firstPosition < 9.5, if (attribute(user_followers_count) < 2165.5, 0.0378854321, 0.055539461), 0.0261930857)), 0.049689868), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0136135545, -0.0029542657), 0.0066915734))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0678231236, if (fieldMatch(text).significantOccurrence < 0.1249914989, if (attribute(user_statuses_count) < 103.5, 0.0047730322, if (attribute(user_followers_count) < 3070.5, if (fieldMatch(text).earliness < 0.8834840059, -0.0130691877, 0.0030931972), 0.0212955094)), -0.0250041155)), if (age(created_at) < 5400.0, -0.0386563137, if (age(created_at) < 48600.0, -0.0213844929, -0.0116543752))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.5683230162, if (fieldMatch(text).gapLength < 5.5, 0.0208840083, -0.0392353393), if (term(0).significance < 0.9139549732, -0.059916078, 0.0359567192)), 0.0417870117), if (age(created_at) < 5400.0, -0.0124473711, if (age(created_at) < 27000.0, -3.908889E-4, if (fieldMatch(text) < 0.5566675067, 9.45327E-4, 0.0097812185))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, if (term(0).significance < 0.9873124957, 0.062526781, -0.0025881996), if (fieldMatch(text).tail < 7.5, if (attribute(user_statuses_count) < 504.0, -0.0072144471, -0.0184304751), -0.0041050691)), if (age(created_at) < 5400.0, -0.0342922301, if (age(created_at) < 52200.0, -0.0213685384, -0.0114302758))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).significantOccurrence < 0.057490997, if (term(0).significance < 0.9980279803, -0.0131328933, 0.0192113014), if (fieldMatch(text) < 0.8584204912, 0.0314073419, -0.0026767115)), if (fieldMatch(text).fieldCompleteness < 0.0392310023, -0.0016304919, 0.034703474)), 0.0373450153), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0106738218, -0.0029072167), 0.0056105069))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 12.5, if (attribute(user_statuses_count) < 826.0, -0.0058871349, if (fieldMatch(text).earliness < 0.8774999976, -0.0128456148, -0.0362508217)), 0.0039172531), if (age(created_at) < 5400.0, -0.033274366, if (age(created_at) < 48600.0, if (fieldMatch(text) < 0.5479695201, -0.0292307762, -0.0167816152), -0.0103426077))), if (age(created_at) < 1830.0, if (fieldMatch(text).earliness < 0.6510869861, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text) < 0.2023105025, -0.0093665406, 0.0265294786), -0.016323195), if (term(2).significance < 0.9791975021, if (attribute(yst_reply_auth) < 2.5, 0.0195383609, 0.0376308584), 0.038683455)), if (age(created_at) < 12600.0, if (term(4).significance < 0.9926320314, if (age(created_at) < 5400.0, -0.0119040624, -0.0033941367), 0.003778577), 0.0059147929))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 10.5, if (fieldMatch(text).importance < 0.7443764806, if (match < 0.9134370089, -0.0252698647, -0.0099637807), if (term(2).significance < 0.7909680009, -0.0344071695, if (term(0).significance < 0.9947484732, 0.0145760432, -0.0105169825))), -2.56762E-4), if (age(created_at) < 12600.0, if (age(created_at) < 5400.0, -0.0322518692, -0.0221817109), -0.0127554041)), if (age(created_at) < 1830.0, if (fieldMatch(text).earliness < 0.724747479, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0292128233, if (fieldMatch(text) < 0.3254045248, -0.0359002315, 0.0194921959)), if (attribute(user_followers_count) < 609.5, 0.0299861583, 0.04219304)), if (age(created_at) < 5400.0, if (fieldMatch(text).occurrence < 0.1731635034, -0.0134935559, -0.0027367126), if (term(1).significance < 0.9878399968, 0.0121433273, 0.0020006783)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 14.5, if (fieldMatch(text).importance < 0.7413114905, -0.0091155042, 0.0233289393), -0.0239608468), if (attribute(user_followers_count) < 2995.0, -0.0016973828, 0.0438873528)), if (age(created_at) < 12600.0, if (age(created_at) < 3570.0, -0.034580545, -0.0236031788), -0.0119280014)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0323605063, if (attribute(yst_tweet_language) < 3243.5, if (fieldTermMatch(text,0).firstPosition < 3.5, if (attribute(user_followers_count) < 114.5, 0.0149219697, 0.0383892131), if (fieldMatch(text) < 0.3404299915, -0.0214082868, 0.0183146341)), -0.0199916697)), if (age(created_at) < 9000.0, -0.0076472907, if (fieldMatch(text) < 0.5607429743, -1.69083E-5, if (term(1).significance < 0.8870275021, 0.0275141633, 0.0058735097))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text) < 0.1415009946, -0.0331371143, 0.0016819061), if (term(0).significance < 0.9974014759, if (term(1).significance < 0.9943025112, -0.0561295193, -0.0143235877), if (term(0).significance < 0.9975079894, 0.0456376595, if (term(0).significance < 0.9976614714, -0.0617225433, -0.0089081592)))), if (age(created_at) < 12600.0, -0.0250708949, -0.0120490174)), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).significantOccurrence < 0.0422540009, 0.0132403332, if (fieldMatch(text).importance < 0.7485179901, if (fieldMatch(text).tail < 10.5, 0.0224059642, 0.0317363105), 0.0363809447)), -0.0059409077), if (age(created_at) < 12600.0, -0.005953322, if (match < 0.7504960299, if (fieldMatch(text).occurrence < 0.1318840086, -0.0092412181, 0.0036779089), 0.0067221979)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text) < 0.2731105089, -0.0169751683, if (attribute(yst_reply_auth) < 16.5, 0.0106972872, if (term(1).significance < 0.8159549832, 0.0454901055, -0.0067703435))), if (term(0).significance < 0.9966599941, -0.0415369371, if (match.totalWeight < 250.0, -0.0080140966, 0.0488608858))), if (age(created_at) < 12600.0, -0.0229133495, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0125676511, 2.328845E-4))), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.9184160233, -0.0549048781, if (fieldLength(text) < 23.5, 0.0226244877, 0.0077881056)), 0.0280730521), if (age(created_at) < 9000.0, if (fieldMatch(text) < 0.3730605245, -0.0167181189, -0.0044234172), if (fieldMatch(text) < 0.5543889999, -8.4709E-4, 0.0055458527)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (match.totalWeight < 250.0, if (fieldTermMatch(text,1).firstPosition < 13.5, if (term(0).significance < 0.9701889753, -0.0195353072, if (term(0).significance < 0.9965775013, 0.0073931107, -0.0074860039)), if (fieldMatch(text) < 0.6285369992, -0.0322505986, -0.0073317181)), 0.0451330307), if (age(created_at) < 45000.0, -0.0206455453, -0.0085888986)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 14.5, 0.0265081733, if (fieldMatch(text) < 0.3104079962, -0.0291219391, if (attribute(yst_tweet_language) < 3271.5, if (attribute(user_friends_count) < 146.5, 0.0133927786, 0.0247206105), -0.0249098053))), if (age(created_at) < 12600.0, if (fieldTermMatch(text,0).firstPosition < 1.5, 0.0013211001, if (fieldMatch(text).importance < 0.6664245129, -0.0124234916, -0.0044820648)), if (fieldMatch(text).significantOccurrence < 0.0555050001, 6.152863E-4, 0.0069791274)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0442178195, if (term(0).significance < 0.9492504597, -0.0249224413, if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).importance < 0.7480239868, -0.009906585, if (term(0).significance < 0.9947484732, 0.0121048215, -0.0132930884)), 6.079666E-4))), if (age(created_at) < 9000.0, -0.0219397199, -0.0106952111)), if (age(created_at) < 1830.0, if (fieldMatch(text).absoluteOccurrence < 0.0136665003, if (fieldMatch(text).importance < 0.7488800287, if (fieldTermMatch(text,0).firstPosition < 6.5, if (attribute(user_followers_count) < 866.5, 0.0181298105, 0.0303594396), 0.0126963345), 0.028578828), 0.0333028419), if (age(created_at) < 27000.0, if (fieldTermMatch(text,3).firstPosition < 7.5, 0.0067345611, if (fieldTermMatch(text,0).firstPosition < 4.5, -0.0013179334, -0.0081428248)), if (fieldMatch(text) < 0.5568180084, 4.597678E-4, 0.0093837881)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.4998250008, if (term(0).significance < 0.9983664751, -0.0058107338, 0.0245069566), if (fieldMatch(text).importance < 0.4998745024, -0.0308383904, -0.0106009672)), 0.00109712), if (age(created_at) < 45000.0, if (fieldMatch(text).weightedOccurrence < 0.0912880003, if (age(created_at) < 3570.0, -0.0267460073, -0.0152835256), -0.0298858389), -0.0088562145)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989734888, if (fieldMatch(text).importance < 0.4988874793, 0.0045636472, -0.1210997623), if (fieldMatch(text) < 0.3135755062, if (term(1).significance < 0.9852235317, if (term(0).significance < 0.9929184914, -0.0444011152, 0.0156709024), 0.0178486139), if (attribute(yst_tweet_language) < 3243.5, 0.0236557227, -0.0055893686))), if (age(created_at) < 12600.0, -0.0039404999, 0.0040376803))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (term(0).significance < 0.9964904785, if (fieldMatch(text).gapLength < 3.5, 0.003496796, -0.0136111988), if (term(0).significance < 0.9975960255, if (term(1).significance < 0.9943025112, -0.0474034255, -0.0139986631), if (fieldMatch(text).importance < 0.6665844917, -0.0083848009, if (term(0).significance < 0.9992945194, if (term(1).significance < 0.9993695021, 0.0095761689, 0.0714217668), -0.014142042)))), if (age(created_at) < 5400.0, -0.0204021576, if (fieldMatch(text).longestSequence < 1.5, -0.0152195185, -0.0074091603))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.127717495, if (fieldMatch(text) < 0.845182538, 0.0072816766, 0.0185451686), 0.0243676179), if (fieldMatch(text).importance < 0.666454494, if (age(created_at) < 5400.0, -0.0113001116, -0.0020866841), if (age(created_at) < 30600.0, -2.226823E-4, 0.0054407552)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (attribute(user_friends_count) < 1202.5, if (attribute(yst_tweet_language) < 3243.5, if (attribute(user_statuses_count) < 491.5, if (attribute(user_followers_count) < 39.5, -0.0053604202, 0.0112837612), -0.0076658014), -0.0344819911), 0.015286062), if (age(created_at) < 52200.0, if (fieldMatch(text).importance < 0.6658334732, -0.0236404883, -0.0155495401), -0.0063627489)), if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).significantOccurrence < 0.0377494991, -0.0247545653, if (term(0).significance < 0.9914690256, if (term(0).significance < 0.9911389947, 0.0074545408, -0.0712173039), 0.0174505123)), 0.0227466857), if (fieldMatch(text).importance < 0.6664484739, if (fieldMatch(text) < 0.3570200205, -0.0164480209, -0.0029063778), if (age(created_at) < 30600.0, -8.955043E-4, if (fieldLength(text) < 22.5, 0.0066513594, -0.0017231871))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0266703268, if (attribute(yst_tweet_language) < 3243.5, if (match < 0.6756634712, -0.0178612015, if (fieldMatch(text).weightedOccurrence < 0.1012820005, if (fieldMatch(text).earliness < 0.8834840059, -0.0031504958, 0.0108290236), -0.0132400721)), -0.0294468679)), if (fieldMatch(text).longestSequence < 1.5, -0.0183944645, if (age(created_at) < 45000.0, -0.0132570328, -0.0044394895))), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.1225000024, if (fieldMatch(text).head < 9.5, if (term(0).significance < 0.973123014, -0.0399055768, 0.0171512303), 0.0047044679), if (attribute(user_statuses_count) < 6.5, -0.0303006102, 0.0230432421)), if (fieldMatch(text).importance < 0.6664534807, -0.0054238462, if (term(2).significance < 0.9981180429, if (fieldMatch(text).completeness < 0.959010005, -0.0041171766, 0.0032716696), 0.0055362698)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).tail < 16.5, if (term(2).significance < 0.9960604906, -0.008761479, if (fieldMatch(text).occurrence < 0.1225000024, -0.0067609181, if (term(1).significance < 0.9832755327, 0.0282354539, 0.002590827))), 0.0073141014), if (age(created_at) < 45000.0, -0.0156556128, -0.0071654687)), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.075498499, 0.007334316, if (fieldMatch(text) < 0.3130764961, if (term(1).significance < 0.9978075027, -0.001460364, 0.0291628398), if (attribute(user_statuses_count) < 29.5, -0.005500918, if (attribute(yst_reply_auth) < 476.0, 0.0209690045, -0.0105504498)))), if (age(created_at) < 30600.0, if (fieldTermMatch(text,1).occurrences < 1.5, if (fieldMatch(text) < 0.4531754851, -0.0106616269, if (fieldMatch(text).importance < 0.4999470115, -0.0131472535, -8.043613E-4)), 0.0065678273), 0.0043163871))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).tail < 12.5, -0.0083194933, 0.0035102742), if (attribute(user_friends_count) < 263.5, if (fieldMatch(text).importance < 0.6659150124, 0.0142259075, -0.0029315835), 0.0146667338)), if (age(created_at) < 37800.0, if (fieldMatch(text).significantOccurrence < 0.0833195001, -0.0135039055, -0.0220540111), if (fieldMatch(text).longestSequenceRatio < 0.5357145071, -0.013039774, -0.0039578022))), if (age(created_at) < 1830.0, if (term(0).significance < 0.9184160233, if (fieldMatch(text).longestSequence < 1.5, -0.0589194117, 0.0129273078), if (fieldTermMatch(text,1).firstPosition < 7.5, 0.0206642588, if (attribute(user_statuses_count) < 63762.0, 0.0138866614, -0.0354735543))), if (fieldTermMatch(text,0).firstPosition < 1.5, if (age(created_at) < 52200.0, 0.0028235989, 0.0121270804), if (fieldMatch(text).importance < 0.666454494, -0.0071218235, 5.514519E-4)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldTermMatch(text,1).firstPosition < 4.5, if (match < 0.938462019, 0.0165893189, -0.0049824111), -0.0046803205), if (age(created_at) < 41400.0, -0.0148008888, -0.0071343073)), if (age(created_at) < 1830.0, if (fieldTermMatch(text,0).firstPosition < 6.5, if (attribute(user_followers_count) < 945.5, 0.0158379194, 0.0252891613), if (fieldMatch(text) < 0.2744970024, if (term(0).significance < 0.9929184914, if (fieldMatch(text).tail < 6.5, -0.0518040838, 0.0076190376), 0.0091624226), if (attribute(yst_tweet_language) < 3243.5, if (attribute(yst_reply_auth) < 469.5, if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0078021755, 0.0161894548), -0.0230367514), -0.0302108693))), if (age(created_at) < 30600.0, if (fieldMatch(text) < 0.370840013, -0.0093177671, -8.716804E-4), if (fieldMatch(text) < 0.5607429743, -4.994075E-4, 0.0075202897)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (match < 0.6055585146, if (fieldTermMatch(text,1).firstPosition < 9.5, -0.0038025793, -0.036586404), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).significantOccurrence < 0.1483514905, if (term(1).significance < 0.7788045406, 0.0569638816, -6.508355E-4), -0.0215954499), -0.028835301)), if (age(created_at) < 5400.0, -0.0155259431, -0.0084487818)), if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 2333.0, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, 2.952785E-4, -0.1374273254), if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (attribute(yst_reply_auth) < 22.5, 0.0126010812, 0.0015712189), if (fieldTermMatch(text,1).firstPosition < 8.5, 0.0140537649, 0.0345167434))), 0.0247552557), if (fieldMatch(text).importance < 0.6664245129, if (match < 0.9277470112, -0.0104111915, -6.36678E-4), 0.0023009658))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 11.5, if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3554.0, if (fieldMatch(text).importance < 0.665607512, if (term(0).significance < 0.9996379614, -0.0108858514, 0.0223953057), -7.23685E-4), -0.0527538471), if (age(created_at) < 5400.0, -0.0167835591, -0.0101222507)), if (attribute(user_friends_count) < 103.5, -0.0058634359, if (age(created_at) < 1770.0, 0.0106468506, -0.0022715192))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.9184160233, -0.0441538866, 0.0103401752), 0.0169759088), if (term(0).significance < 0.9990385175, 0.0064769128, -0.0484309871)), if (fieldMatch(text).longestSequence < 1.5, -0.0038310021, if (term(0).significance < 0.9686380029, 0.0135820391, if (attribute(user_followers_count) < 719.5, -2.012513E-4, 0.0056425249))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 630.0, if (fieldMatch(text).importance < 0.499478519, 0.0221233715, if (attribute(user_followers_count) < 926.5, -0.0044878516, 0.0128654737)), if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).importance < 0.6665325165, -0.0138860556, -0.0083897223), -0.004508875)), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).importance < 0.4989734888, -0.008760469, if (attribute(user_followers_count) < 1733.5, if (fieldMatch(text).occurrence < 0.1455025077, if (fieldMatch(text) < 0.5567239523, if (term(2).significance < 0.9795899987, -0.0543641627, 0.0024748648), 0.0124403853), 0.0174741297), 0.0222181645)), -0.0288913368), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.6664659977, -0.0065555429, 5.791831E-4), if (fieldMatch(text).earliness < 0.93541646, if (fieldMatch(text).significantOccurrence < 0.0339080021, -0.0694353726, 0.0012739636), 0.0075882453)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 12.5, if (fieldMatch(text).longestSequence < 1.5, if (age(created_at) < 210.0, 5.10467E-4, if (term(0).significance < 0.991086483, -0.0213498049, if (term(0).significance < 0.9921205044, 0.0187676178, if (fieldMatch(text).tail < 5.5, -0.0155349434, -0.0073599141)))), if (fieldMatch(text).importance < 0.6662604809, if (attribute(user_followers_count) < 1875.0, 9.507365E-4, 0.0527948179), -0.0073533813)), if (age(created_at) < 810.0, 0.0076378446, -0.0027198247)), if (age(created_at) < 1830.0, if (fieldMatch(text).significantOccurrence < 0.0424195006, 0.001535613, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).importance < 0.7466344833, if (fieldMatch(text).importance < 0.6666129827, 0.0067592681, -0.0205924309), 0.0160937308), 0.0167252945)), if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0049627365, if (fieldMatch(text).importance < 0.6664254665, -0.0065001791, 0.0010821803)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 630.0, if (age(created_at) < 510.0, -0.0026815916, if (fieldMatch(text).importance < 0.4997144938, 0.0414511969, 0.004406815)), if (fieldMatch(text).tail < 11.5, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).importance < 0.4994869828, 0.0066561273, -0.0136850009), -0.0069580048), if (attribute(yst_reply_auth) < 16.5, 0.0023887625, -0.0083630492))), if (age(created_at) < 1830.0, if (fieldMatch(text).fieldCompleteness < 0.0425724983, if (attribute(user_friends_count) < 252.5, -0.020119899, 0.0154324464), if (fieldMatch(text).tail < 5.5, 0.005278601, 0.0135236791)), if (fieldMatch(text).longestSequence < 1.5, -0.0049136258, if (age(created_at) < 27000.0, -4.128224E-4, if (term(2).significance < 0.9998239875, if (fieldMatch(text).completeness < 0.9559409618, -0.0209409304, if (term(1).significance < 0.9128689766, 0.0195740015, 0.0035250792)), 0.0133058164))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1770.0, if (fieldMatch(text).tail < 7.5, -0.004792789, if (fieldMatch(text).importance < 0.6652389765, -0.0020739127, 0.0102078569)), if (fieldMatch(text).importance < 0.6657874584, if (attribute(user_friends_count) < 16.5, 4.515464E-4, -0.0178576762), if (term(0).significance < 0.9976029992, -0.0101410825, -0.004854538))), if (age(created_at) < 1770.0, if (attribute(yst_tweet_language) < 3243.5, if (attribute(user_followers_count) < 606.0, if (fieldMatch(text).importance < 0.7488585114, 0.0066962893, 0.0158854368), if (fieldMatch(text).significantOccurrence < 0.0555564985, if (attribute(user_statuses_count) < 13511.5, 0.0163436735, -0.0031528673), 0.0222661817)), -0.0172180317), if (fieldMatch(text).importance < 0.6664534807, if (fieldMatch(text) < 0.3528665006, -0.0116436179, -0.0025309342), if (term(2).significance < 0.9981445074, if (fieldTermMatch(text,3).firstPosition < 4.5, 0.0069706912, -0.0021646003), 0.0046032512)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(user_name).fieldCompleteness < 0.2916665077, if (attribute(user_statuses_count) < 497.5, 0.0022748148, if (attribute(user_followers_count) < 960.5, if (term(0).significance < 0.9725670218, -0.0246347116, if (term(0).significance < 0.9963495135, if (fieldMatch(text).tail < 9.5, if (fieldMatch(text).head < 11.5, 0.0013827683, -0.0212024376), 0.016150842), -0.0104714457)), 0.0070006551)), 0.0447412235), if (age(created_at) < 16200.0, -0.012466698, -0.0057219106)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4999520183, 0.0038677446, if (fieldMatch(text) < 0.4182469845, 0.0026964712, 0.0132060784)), if (fieldMatch(text).weightedOccurrence < 0.0513554998, if (match < 0.6867794991, if (fieldMatch(text).importance < 0.6665154696, -0.02010221, if (term(2).significance < 0.9950574636, -0.0169792919, 0.0050699268)), -0.0013697969), 0.0020313056))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.665607512, if (fieldMatch(text) < 0.2109414935, -0.0320963356, -0.0049411304), if (attribute(yst_tweet_language) < 3243.5, if (term(2).significance < 0.9769929647, if (fieldMatch(text).importance < 0.6665915251, 0.0282759231, if (fieldMatch(text).importance < 0.7399419546, -0.063959372, -0.0116583984)), if (term(0).significance < 0.9964904785, 0.0113601762, if (term(0).significance < 0.9984384775, -0.0096497985, 0.0065807303))), -0.0430967785)), if (fieldMatch(text).longestSequence < 1.5, -0.012735201, -0.0062520929)), if (age(created_at) < 1770.0, if (fieldMatch(text).earliness < 0.7211109996, 0.0056563829, if (attribute(user_followers_count) < 812.5, 0.0100250822, 0.0209608983)), if (fieldTermMatch(text,0).firstPosition < 4.5, 0.0027791958, if (fieldMatch(text).importance < 0.666424036, if (match < 0.9192979932, -0.0152598227, -0.0032270961), -5.422229E-4)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 459.5, if (attribute(user_statuses_count) < 496.5, 8.480388E-4, -0.0090870631), if (attribute(yst_reply_auth) < 244.5, 0.0126503896, -0.0054197846)), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0148159779, 0.0029148481), -0.0058224247)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7490880489, if (fieldMatch(text) < 0.5652275085, if (term(2).significance < 0.9916304946, -0.0285282409, 0.0051566337), if (attribute(user_followers_count) < 104.5, if (fieldMatch(text) < 0.8065220118, 0.0383292168, 0.0010266011), if (attribute(yst_reply_auth) < 391.0, 0.0133363207, -0.0143777685))), 0.0164241107), if (fieldTermMatch(text,0).firstPosition < 4.5, 0.0032745831, if (fieldMatch(text).importance < 0.7496404648, if (term(1).significance < 0.9979525208, -0.0096924346, -9.781494E-4), 0.0015180251)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9995554686, if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 92.5, if (fieldLength(text) < 27.5, 0.0012866951, -0.0265027781), 0.0104770861), if (attribute(yst_link_array_size) < 0.0041509997, -0.0099713041, 0.0011954032)), if (term(0).significance < 0.9996379614, -0.0297536383, -2.317059E-4)), if (fieldMatch(text).longestSequence < 1.5, -0.0121670225, -0.0054595694)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989734888, if (fieldMatch(text).importance < 0.4986799955, 0.007117559, if (fieldLength(text) < 18.5, 0.0043567972, -0.0954988221)), if (attribute(user_statuses_count) < 5.5, -0.0298547936, 0.0103403639)), if (fieldMatch(text).occurrence < 0.1348485053, if (match < 0.9276950359, if (term(0).significance < 0.9981074929, -0.0125565952, -0.0030946195), 0.0023600605), 0.0024001179))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7413114905, if (fieldMatch(text).importance < 0.6666384935, if (fieldMatch(text).earliness < 0.8681160212, if (match < 0.6799730062, -0.0205917268, -0.0018660452), if (fieldMatch(text) < 0.8819584846, if (fieldMatch(text).completeness < 0.9544465542, 0.0070580213, 0.0359145), -0.0174774107)), -0.0421236424), if (fieldMatch(text).completeness < 0.957596004, if (fieldMatch(text).earliness < 0.4128789902, -0.0155841429, 0.0271271066), 0.0016623712)), if (age(created_at) < 5400.0, -0.0114561422, -0.0053122836)), if (age(created_at) < 1830.0, if (term(0).significance < 0.9184160233, if (fieldMatch(text).longestSequence < 1.5, -0.070303917, 0.0039828312), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).occurrence < 0.1455025077, 0.0064206057, 0.0136203784), -0.0155537108)), if (fieldMatch(text).importance < 0.6656370163, -0.0057846012, 0.0015276435))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 12.5, if (fieldMatch(text) < 0.4900699854, if (term(0).significance < 0.9883320332, -0.0195571527, -0.0078552672), if (fieldMatch(text).weightedOccurrence < 0.0929629952, if (age(created_at) < 1530.0, if (fieldTermMatch(text,1).firstPosition < 6.5, 0.0158849428, -0.0010859682), -0.0055859102), -0.0109538484)), if (fieldMatch(user_name) < 0.0710614994, 2.014444E-4, 0.0605228154)), if (age(created_at) < 1830.0, if (fieldMatch(text) < 0.2891100049, -0.0017665209, if (attribute(yst_reply_auth) < 471.5, if (attribute(user_followers_count) < 2200.0, 0.0095812144, 0.0192088364), -0.0097908152)), if (fieldMatch(text).occurrence < 0.1348485053, if (match < 0.9285860062, if (attribute(user_statuses_count) < 2957.5, -0.003199469, -0.0120737981), 5.44995E-4), if (term(2).significance < 0.9939094782, if (fieldMatch(text).absoluteProximity < 0.0212500002, -0.0227140008, -3.328979E-4), 0.0055961138)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 176.0, if (attribute(yst_tweet_language) < 3243.5, if (term(1).significance < 0.7788045406, 0.0578500341, 0.0011485747), -0.0253253039), if (attribute(yst_link_array_size) < 0.0223225001, -0.0156277732, 0.0023478823)), if (fieldMatch(text).longestSequence < 1.5, -0.0107069928, if (attribute(user_followers_count) < 1710.5, -0.0053639058, 0.0090303888))), if (fieldMatch(text) < 0.5406639576, if (fieldMatch(text).occurrence < 0.1165160015, if (term(1).significance < 0.9973840117, -0.0174263062, -0.0012512051), -4.493622E-4), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4997544885, if (fieldMatch(text).importance < 0.4997400045, 0.0011570612, if (attribute(user_statuses_count) < 2389.0, -0.075250743, 0.0078353389)), if (attribute(yst_tweet_language) < 3243.5, 0.0095661855, -0.0134482465)), if (fieldMatch(text).importance < 0.4999470115, -0.0103740403, 0.0025010891)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.3427360058, if (fieldMatch(text).earliness < 0.6939799786, -0.0183828125, -0.0073742585), if (age(created_at) < 1830.0, if (term(0).significance < 0.9958745241, if (term(1).significance < 0.8159549832, 0.0358430149, if (term(1).significance < 0.9927034974, if (fieldMatch(text).importance < 0.6640119553, 0.0448918743, -0.0109749723), if (term(1).significance < 0.996638, 0.021964601, 0.0055406966))), -0.0040966912), if (fieldLength(text) < 14.5, -0.0121807234, -0.0043039012))), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7479754686, if (term(0).significance < 0.9139549732, -0.0340629156, if (fieldMatch(text).tail < 7.5, 8.350066E-4, if (fieldMatch(text) < 0.8667535186, 0.0042479503, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.009079718, 0.0217882168)))), 0.0152961627), if (fieldTermMatch(text,1).firstPosition < 6.5, 0.0019770381, -0.0024650391))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1650.0, -5.35497E-4, if (fieldMatch(text).absoluteOccurrence < 0.0126785003, if (fieldMatch(text).longestSequence < 1.5, -0.0102881411, -0.0050682353), 0.0015992266)), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.0425724983, -0.0103390097, if (fieldTermMatch(text,0).firstPosition < 4.5, if (attribute(user_followers_count) < 807.5, 0.009014829, 0.0188702216), if (fieldMatch(text).importance < 0.7488585114, if (term(1).significance < 0.9833209515, if (fieldMatch(text).absoluteOccurrence < 0.0116665, -0.0062725138, -0.0573762051), 0.0061628636), 0.0150568527))), if (fieldMatch(text) < 0.4552929997, if (fieldMatch(user_name) < 0.3179910183, if (fieldMatch(text).importance < 0.6665154696, if (fieldMatch(text).occurrence < 0.1188234985, -0.017059865, -9.6562E-6), if (term(1).significance < 0.9965360165, -0.0071076426, 0.0044895619)), 0.0279607247), if (term(2).significance < 0.8411514759, -0.0033532749, 0.0026365471)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0276561682, if (term(1).significance < 0.9822615385, if (match < 0.7864329815, -0.0313860274, if (fieldMatch(text).longestSequenceRatio < 0.7749999762, 0.026549556, -0.0146271425)), if (term(0).significance < 0.9987125397, if (term(0).significance < 0.9821995497, -0.0131166881, if (age(created_at) < 1770.0, 0.002490936, 0.0242417466)), if (term(0).significance < 0.9987905025, -0.0474544221, -0.003321698)))), if (fieldMatch(text).significance < 0.6663454771, -0.0112682274, -0.0050036035)), if (age(created_at) < 1770.0, if (attribute(user_followers_count) < 812.5, 0.0053496824, 0.0125479103), if (fieldTermMatch(text,0).firstPosition < 1.5, 0.0037986168, if (fieldMatch(text).importance < 0.7498390079, if (term(0).significance < 0.9974490404, -0.0066723085, -6.70732E-4), if (attribute(user_friends_count) < 20.5, -0.005179231, 0.0048035663))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 80.5, -0.0011002979, 0.0120623048), if (fieldMatch(text).importance < 0.4994869828, if (term(0).significance < 0.991086483, -0.015038199, 0.0335272034), if (attribute(yst_link_array_size) < 0.0041354997, -0.0126645698, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0132560119, -0.0052124596)))), if (fieldMatch(text).importance < 0.6665325165, if (fieldMatch(text).tail < 7.5, -0.0136798779, -0.0055728098), if (term(0).significance < 0.9954190254, -0.0075794485, -8.289554E-4))), if (age(created_at) < 1770.0, if (fieldMatch(text).earliness < 0.3779760003, -0.0012520588, 0.0078110682), if (fieldMatch(text).significantOccurrence < 0.0547899976, if (match < 0.7503944635, if (fieldMatch(text).importance < 0.6665270329, if (fieldMatch(text).earliness < 0.6339714527, -0.0338858554, -0.009096585), -0.0037668704), 3.149666E-4), 0.0028579202))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldLength(text) < 8.5, -0.0119762189, if (attribute(user_friends_count) < 15.5, 0.0150834311, if (fieldMatch(text) < 0.4306970239, if (term(1).significance < 0.9976525307, -0.018499759, 0.0133471677), if (attribute(user_friends_count) < 1375.5, 5.505579E-4, 0.0194310919)))), -0.0058711817), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).tail < 3.5, -0.0081336884, if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0074424529, if (fieldMatch(text).importance < 0.7463564873, 0.0063782103, 0.0133409187))), -0.0205400734), if (fieldMatch(text).importance < 0.6664534807, if (fieldMatch(text).significantOccurrence < 0.1249409989, if (fieldTermMatch(text,0).firstPosition < 2.5, 0.0031349276, -0.0052935732), -0.0275582309), if (age(created_at) < 63000.0, if (fieldMatch(text) < 0.3207110167, -0.0114880439, 9.364683E-4), 0.0048748147)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9995759726, if (fieldMatch(text).significantOccurrence < 0.0363755003, -0.01574373, if (term(1).significance < 0.7788045406, 0.0476961371, 0.001213897)), -0.0123574496), if (fieldMatch(text).importance < 0.6664404869, -0.0101736132, if (term(0).significance < 0.9954395294, if (fieldMatch(text).orderness < 0.5357145071, -0.0179059498, -0.0054873409), -0.0020453926))), if (fieldMatch(text) < 0.3861989975, if (term(1).significance < 0.9980044961, if (age(created_at) < 330.0, 0.0172467014, -0.0090164842), 0.0018725599), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 557.0, if (attribute(user_statuses_count) < 7.5, if (attribute(user_followers_count) < 147.0, -7.864416E-4, -0.1136937768), if (fieldMatch(text).importance < 0.4989485145, -0.017996364, 0.0077754184)), -0.0216960094), if (fieldMatch(text).importance < 0.6657680273, -0.0047764491, 0.002661354)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0247499999, -0.0106958848, if (fieldLength(text) < 14.5, if (fieldMatch(text).importance < 0.4994429946, 0.0219589017, -0.0076174869), if (fieldMatch(text).occurrence < 0.0816664994, -0.0053254707, if (fieldMatch(text).longestSequenceRatio < 0.8166664839, -0.0034099679, if (attribute(user_followers_count) < 3130.0, 0.0023795826, 0.0305866803))))), if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 579.5, if (fieldMatch(text).importance < 0.7473194599, if (term(1).significance < 0.9967114925, if (fieldMatch(text).orderness < 0.25, 0.0199100212, if (age(created_at) < 630.0, 1.241074E-4, -0.0217275952)), 0.0052103051), 0.0100794994), if (fieldMatch(text).weightedOccurrence < 0.057417497, 0.0046668785, 0.0152380854)), if (fieldMatch(text) < 0.279281497, if (term(1).significance < 0.996701479, -0.0134490906, 5.562205E-4), if (fieldTermMatch(text,1).firstPosition < 9.5, 0.0022888891, -0.0019659597)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 7.5, if (fieldMatch(text).importance < 0.6656044722, if (match < 0.7144390345, -0.0310311214, -0.0088877493), if (fieldMatch(text).earliness < 0.0727514997, 0.0149462312, -0.0044403174)), if (attribute(user_friends_count) < 14.5, 0.0092716632, -0.0015500378)), if (age(created_at) < 1830.0, if (fieldMatch(text) < 0.2904269993, if (attribute(user_friends_count) < 127.0, 0.0056714395, if (fieldTermMatch(text,1).firstPosition < 10.5, -0.0061125596, -0.0400728335)), if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 9.5, -0.014240152, if (attribute(yst_reply_auth) < 209.5, 0.0090823293, if (attribute(user_followers_count) < 1677.5, -0.0133225099, 0.0160376802))), if (fieldTermMatch(text,0).firstPosition < 6.5, 0.002067266, -0.0469488746))), if (fieldMatch(text).significantOccurrence < 0.0543674976, -0.0022326468, if (age(created_at) < 63000.0, 5.594003E-4, 0.0062407904)))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0232298047, if (attribute(yst_reply_auth) < 176.0, 5.588745E-4, -0.0071946844)), if (fieldLength(text) < 14.5, -0.0100784734, if (fieldMatch(text).occurrence < 0.1043554991, -0.0078973837, -0.0021612919))), if (fieldMatch(text) < 0.4846429825, if (term(1).significance < 0.9980959892, if (fieldMatch(text).significantOccurrence < 0.0594874993, if (fieldMatch(text).importance < 0.7486180067, if (fieldMatch(text).head < 1.5, -0.0040433128, -0.0206367481), if (attribute(user_followers_count) < 592.0, 0.003159187, -0.014456241)), 0.0020151861), 0.0050398144), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4997544885, if (fieldMatch(text).importance < 0.4997400045, if (fieldLength(text) < 10.5, -0.0498201605, 0.0052238898), -0.0305327007), 0.0074862309), if (fieldTermMatch(text,1).firstPosition < 12.5, 0.0024835558, -0.0030184713)))) + +if (fieldMatch(text) < 0.2844820023, if (fieldMatch(text).importance < 0.6664454937, if (fieldMatch(text).earliness < 0.6909815073, -0.0202083466, -0.007230346), if (term(0).significance < 0.9991005063, -0.0078735247, 0.0050183478)), if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4999545217, if (fieldMatch(text).importance < 0.4999495149, 0.0010054634, if (attribute(yst_reply_auth) < 24.5, -0.0091545768, -0.0585794793)), if (attribute(user_followers_count) < 496.5, if (attribute(yst_reply_auth) < 64.5, if (attribute(yst_tweet_language) < 3583.5, 0.0060891079, -0.0311686318), -0.0029778507), 0.0105208941)), if (fieldTermMatch(text,2).firstPosition < 9.5, if (term(2).significance < 0.9943845272, -0.0014959657, 0.0041636931), if (fieldMatch(text).earliness < 0.8651515245, if (fieldMatch(user_name).completeness < 0.9791665077, if (attribute(user_followers_count) < 680.5, -0.005581803, if (term(1).significance < 0.9998655319, 0.002594313, -0.0096650963)), 0.0535512397), 0.0020234411)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0491665006, -0.0097379318, if (age(created_at) < 1710.0, if (term(0).significance < 0.9986090064, if (term(0).significance < 0.9982025027, if (term(2).significance < 0.9959775209, -0.0019926524, if (fieldMatch(text).importance < 0.665396452, -0.0116503978, 0.0109466166)), 0.0189828366), if (fieldMatch(text).importance < 0.6666469574, if (fieldMatch(text).importance < 0.4998664856, -0.0224440709, if (attribute(yst_reply_auth) < 18.5, 0.0032764517, -0.0102488229)), 0.0230535914)), -0.00426531)), if (age(created_at) < 1650.0, if (attribute(user_followers_count) < 105.5, -6.226235E-4, if (fieldMatch(text).importance < 0.4989485145, -0.0224164552, 0.0086177649)), if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0097048559, if (fieldMatch(text) < 0.4509834945, if (term(1).significance < 0.9981694818, -0.0078122033, 0.003806844), if (term(0).significance < 0.9841674566, 0.0097951581, 9.24353E-4))))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.2836354971, if (term(1).significance < 0.9996379614, -0.0108495877, 0.0148675984), if (fieldMatch(text).tail < 17.5, if (term(2).significance < 0.9099119902, if (fieldMatch(text).importance < 0.4998250008, if (fieldMatch(text).significance < 0.4995914996, -0.0043957101, 0.0231344492), -0.008840382), if (attribute(user_followers_count) < 506.5, if (fieldMatch(text).importance < 0.6659464836, if (fieldMatch(text).importance < 0.6658334732, -3.420491E-4, 0.0373151929), -0.0037227166), 0.0035857585)), 0.0053145038)), if (fieldMatch(text) < 0.2904269993, if (term(1).significance < 0.998260498, if (term(2).significance < 0.978690505, -0.0302698107, if (fieldMatch(text).occurrence < 0.1188234985, -0.0139381667, 3.038E-7)), 0.0019017619), if (age(created_at) < 1830.0, if (fieldMatch(text).occurrence < 0.075498499, -4.698689E-4, 0.0075287937), if (term(2).significance < 0.9939094782, -0.0018063524, 0.0022813626)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).absoluteProximity < 0.0537500009, -0.0076941292, if (attribute(user_friends_count) < 13.5, if (attribute(user_followers_count) < 180.5, 7.683782E-4, 0.0254407298), if (term(1).significance < 0.9965690374, if (attribute(user_friends_count) < 1596.0, -9.906495E-4, 0.017963509), if (term(1).significance < 0.9967479706, -0.030275409, if (term(0).significance < 0.9986245036, if (term(0).significance < 0.9983664751, -0.0033463225, if (fieldMatch(text).importance < 0.4998250008, 0.0252647259, 0.0031275837)), if (fieldMatch(text).importance < 0.6665315032, -0.0095874064, -0.002315628)))))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, 0.0044201553, 0.0117690347), -0.0145395863), if (fieldTermMatch(text,0).firstPosition < 10.5, if (fieldMatch(user_name).significantOccurrence < 0.4166665077, if (match < 0.7534494996, -0.0026555272, 0.0017879837), 0.014526025), -0.0044547476))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).tail < 5.5, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).completeness < 0.9522235394, 0.0042831561, -0.0095824673), if (fieldMatch(text).weightedOccurrence < 0.084523499, -6.026845E-4, -0.0078613786)), if (fieldMatch(user_name).importance < 0.1997880042, -0.0010588456, 0.0257470432)), if (fieldMatch(text) < 0.2779855132, if (fieldMatch(text).significantOccurrence < 0.0599530004, if (fieldMatch(text).importance < 0.6665154696, -0.0140299808, if (term(2).significance < 0.967427969, -0.0277817247, 0.0028355135)), 0.0034763323), if (age(created_at) < 2370.0, if (attribute(user_statuses_count) < 10.5, if (attribute(yst_link_array_size) < 1.2E-5, -0.0032323662, -0.1013679738), if (fieldTermMatch(text,1).firstPosition < 5.5, 0.0092460814, if (attribute(user_statuses_count) < 72785.0, 0.0043196848, -0.0308937796))), if (fieldMatch(text).significantOccurrence < 0.0339080021, -0.0524175559, if (fieldTermMatch(text,1).firstPosition < 17.5, 0.0020057038, -0.0052555353))))) + +if (age(created_at) < 1830.0, if (fieldMatch(text).significantOccurrence < 0.0382340029, if (fieldMatch(text) < 0.8533049822, if (term(0).significance < 0.9981650114, -0.0368343915, -0.0056610638), 0.0049994224), if (fieldMatch(text).tail < 5.5, if (fieldMatch(text).importance < 0.4989485145, -0.0226356769, 8.778837E-4), if (term(0).significance < 0.9986659884, 0.0085648682, if (fieldMatch(text).earliness < 0.7071075439, if (fieldMatch(text).importance < 0.6665714979, -0.0079427382, 0.0095678431), if (term(0).significance < 0.9988600016, -0.0090514905, if (attribute(yst_tweet_language) < 3243.5, 0.011654868, -0.0250363073)))))), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, -0.0063330281, 0.0017606811), if (attribute(user_followers_count) < 521.5, if (attribute(yst_reply_auth) < 6.5, if (fieldLength(text) < 26.5, 8.975568E-4, -0.0115152224), if (age(created_at) < 12600.0, -0.0079829768, -7.770708E-4)), 0.0029601612))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.8159549832, 0.0344152691, if (attribute(yst_tweet_language) < 3243.5, -9.31972E-5, -0.0184607413)), if (fieldMatch(text) < 0.8700245023, if (age(created_at) < 81000.0, -0.0081256943, -0.002474476), if (fieldMatch(text).completeness < 0.9577934742, 0.006010286, -0.0049642463))), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).completeness < 0.9521285295, -0.0086391614, 0.0063207862), if (term(0).significance < 0.998996973, 0.0055342844, if (term(0).significance < 0.999627471, -0.053409278, 0.0110272216))), if (fieldMatch(text).head < 1.5, 0.0038230846, if (fieldMatch(text).importance < 0.6663914919, -0.0050094296, if (term(2).significance < 0.9939705133, if (term(2).significance < 0.9904664755, -6.273878E-4, if (term(1).significance < 0.9985420108, -0.0060480992, -0.0305338408)), 0.0023578375))))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0500283244, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4985739887, -0.0040890196, -0.0504476618), if (fieldMatch(text).importance < 0.4994869828, 0.0226347107, if (fieldMatch(text).importance < 0.7494400144, -0.0013898685, if (term(1).significance < 0.9945595264, -0.0032601315, 0.0245669695))))), -0.0050325999), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 218.5, if (attribute(user_followers_count) < 516.5, if (attribute(yst_reply_auth) < 5.5, if (fieldMatch(text).importance < 0.6662045121, if (fieldMatch(text).tail < 15.5, -9.372615E-4, 0.0137497531), 0.0092949266), -0.0038400009), 0.0088974242), -0.0063462945), if (fieldMatch(text).fieldCompleteness < 0.1863425076, if (attribute(user_followers_count) < 82.5, -0.004996894, if (term(1).significance < 0.9968400002, -0.0035744289, 0.0022489806)), 0.0031548406))) + +if (attribute(user_followers_count) < 1739.0, if (attribute(yst_reply_auth) < 28.5, if (attribute(user_followers_count) < 86.5, if (fieldMatch(text).importance < 0.6655265093, -0.0070659027, if (age(created_at) < 1890.0, if (term(1).significance < 0.9980455041, 0.0084857763, if (attribute(user_friends_count) < 38.5, 0.008263962, -0.013053989)), -0.0029246429)), if (fieldMatch(user_name) < 0.3153960109, if (fieldMatch(text) < 0.5473589897, if (match < 0.54053545, -0.0265778832, -0.0016129946), if (fieldMatch(text).occurrence < 0.0816664994, if (attribute(yst_link_array_size) < 0.001784, -0.0042828731, if (age(created_at) < 1830.0, 0.019961191, -0.0028795459)), if (age(created_at) < 1710.0, 0.0085534102, 0.0023027773))), if (fieldMatch(text).significantOccurrence < 0.0327955, -0.0415331084, 0.0263336717))), -0.004577551), if (fieldMatch(text).tail < 7.5, if (term(2).significance < 0.8023320436, -0.009778886, 0.0038323247), 0.0081719743)) + +if (attribute(user_followers_count) < 437.5, if (fieldMatch(text).significantOccurrence < 0.1246850044, if (attribute(yst_reply_auth) < 22.5, if (fieldMatch(text) < 0.3409180045, if (fieldMatch(text).importance < 0.6665065289, -0.0102582795, if (term(1).significance < 0.9962199926, if (term(1).significance < 0.994343996, -0.0021503448, -0.0306146076), 0.0068595469)), if (fieldMatch(text).earliness < 0.93541646, if (fieldMatch(user_name) < 0.5095770359, if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, 0.0020429611, -0.0019541993), -0.0132421664), 0.027938898), 0.0043877081)), -0.0053086844), -0.0109310616), if (age(created_at) < 1530.0, if (term(0).significance < 0.9986474514, if (term(0).significance < 0.9980379939, if (fieldMatch(text).earliness < 0.2440474927, -0.0189069835, 0.0085132629), 0.0221469666), 2.679538E-4), if (attribute(user_statuses_count) < 2928.5, 0.003703727, if (match < 0.5710045099, -0.0125698441, -3.892576E-4)))) + +if (fieldMatch(text).tail < 3.5, if (attribute(yst_reply_auth) < 278.5, if (fieldLength(text) < 24.5, if (attribute(yst_link_array_size) < 0.0885144994, if (term(1).significance < 0.7788045406, 0.0265531093, -0.0020354187), -0.0289274408), -0.0107078153), -0.0129870071), if (age(created_at) < 1830.0, if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 9.5, -0.0103056223, if (fieldMatch(user_name) < 0.2000829875, if (fieldMatch(text).importance < 0.7970539927, 0.0035454291, 0.0126182815), 0.0233580846)), if (fieldLength(text) < 22.5, if (term(0).significance < 0.9991210103, 0.0080261443, -0.0294237431), -0.0673310696)), if (fieldMatch(text).importance < 0.6664265394, if (attribute(yst_reply_auth) < 13.5, -0.0018649103, -0.0077154393), if (term(0).significance < 0.9997465014, if (attribute(user_followers_count) < 717.5, -0.0012717951, 0.002903754), if (fieldTermMatch(text,1).firstPosition < 15.5, 0.0107252476, -0.0062640981))))) + +if (attribute(ythl) < 0.5, if (attribute(user_followers_count) < 483.5, if (attribute(user_statuses_count) < 491.5, if (fieldMatch(text).tail < 3.5, -0.0057958616, if (age(created_at) < 1410.0, 0.0070562486, -7.664522E-4)), if (attribute(user_friends_count) < 8.5, 0.0087335556, -0.0058603167)), if (age(created_at) < 210.0, 0.0246066286, 3.480739E-4)), if (fieldMatch(text) < 0.5547109842, if (fieldMatch(text).occurrence < 0.1348485053, if (attribute(user_statuses_count) < 2933.0, -0.0023188146, if (attribute(yst_reply_auth) < 1.5, -0.0279839136, if (fieldTermMatch(text,1).firstPosition < 12.5, if (age(created_at) < 2730.0, 0.0153842703, -0.0081351611), -0.0240346583))), 8.863957E-4), if (fieldLength(text) < 9.5, -0.0106073655, if (fieldMatch(text).earliness < 0.9393379688, if (fieldMatch(text).occurrence < 0.0655914992, -0.0023447985, if (age(created_at) < 1950.0, 0.0063181854, 0.0015014161)), 0.0074385233)))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.8502080441, if (fieldMatch(text).importance < 0.7468224764, -0.0067962178, -0.0019381191), if (attribute(user_statuses_count) < 16627.5, if (attribute(user_followers_count) < 515.5, if (fieldMatch(text).weightedOccurrence < 0.0944940001, if (attribute(user_statuses_count) < 109.5, 0.0079116741, -4.709728E-4), -0.0057247378), if (term(0).significance < 0.9991005063, if (age(created_at) < 1350.0, 0.0233500539, 0.0061626722), -0.0047207579)), -0.0108453748)), if (fieldTermMatch(text,2).firstPosition < 8.5, if (attribute(user_followers_count) < 16.5, -0.0054426486, if (age(created_at) < 2430.0, 0.0098679265, 0.0032263599)), if (match < 0.9289889932, if (fieldMatch(text).tail < 3.5, if (fieldMatch(text).occurrence < 0.0976189971, -0.0211220829, -0.0037859558), if (attribute(user_statuses_count) < 12392.5, 8.0234E-5, if (attribute(user_followers_count) < 317.5, -0.0361792845, -0.0049548586))), 0.0025926241))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (fieldMatch(text).longestSequence < 1.5, -0.003165344, if (fieldMatch(text).importance < 0.74989748, if (term(1).significance < 0.998134017, 0.0094378769, if (term(1).significance < 0.9994934797, if (term(1).significance < 0.9991415143, 3.721852E-4, -0.0259010774), 0.0124789418)), if (term(2).significance < 0.9737149477, -0.0202297481, 0.0040219128))), -0.0045478246), if (age(created_at) < 1590.0, if (fieldTermMatch(text,1).firstPosition < 7.5, 0.0081043971, if (fieldMatch(text) < 0.8496830463, -0.0035718865, 0.0048047847)), if (fieldMatch(text).head < 1.5, if (fieldLength(text) < 8.5, -0.0224047487, 0.0036563528), if (fieldMatch(text) < 0.2774904966, if (term(0).significance < 0.997859478, -0.0142881937, -0.0014981238), if (attribute(yst_reply_auth) < 70.5, 1.609764E-4, if (attribute(user_followers_count) < 397.5, -0.01809199, -0.0021113953)))))) + +if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 14.5, if (fieldMatch(text).absoluteOccurrence < 0.0136665003, if (fieldMatch(text).significance < 0.7493325472, if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 5.5, -0.0282346598, if (term(0).significance < 0.9725670218, if (term(1).significance < 0.9792364836, -0.0784237022, -0.0059997941), if (fieldTermMatch(text,1).firstPosition < 10.5, 0.0078147345, if (match < 0.9353330135, -0.0019768224, if (fieldMatch(text).earliness < 0.9198719859, if (attribute(yst_link_array_size) < 2.88E-4, -0.0041019966, 0.0264356088), 5.631411E-4))))), -0.0050485104), 0.0082039036), 0.0051839504), if (term(0).significance < 0.9982025027, -0.0142960741, -0.0033997299)), if (attribute(yst_tweet_language) < 3587.5, if (age(created_at) < 1770.0, 0.0049084121, if (attribute(ythl) < 0.5, -0.0020172224, if (attribute(yst_link_array_size) < 0.0056419997, 0.0029607752, -0.0028856329))), -0.0261107048)) + +if (fieldMatch(text) < 0.2898915112, if (fieldMatch(text).head < 1.5, 0.0014847907, if (fieldMatch(text).importance < 0.6663334966, -0.0119151319, -0.0040477723)), if (attribute(user_followers_count) < 519.5, if (term(2).significance < 0.9943574667, if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 1256.0, 0.0034757752, if (attribute(user_statuses_count) < 1397.0, -0.0279955298, if (fieldMatch(text).tail < 15.5, -0.0046990807, if (match < 0.935085535, -0.004028571, 0.0289488138)))), if (term(0).significance < 0.9971770048, if (term(1).significance < 0.9985420108, -0.0058304095, -0.017290911), if (fieldTermMatch(text,1).firstPosition < 17.5, 0.0053683293, -0.0057448395))), if (attribute(yst_reply_auth) < 236.5, 0.0018130071, -0.0097022524)), if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.498603493, -0.0030858773, -0.0594499645), if (fieldTermMatch(text,0).firstPosition < 13.5, 0.0046047026, -0.0041554082)))) + +if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.6381819844, if (term(0).significance < 0.9928579926, if (attribute(yst_reply_auth) < 3.5, if (attribute(user_followers_count) < 18.5, -0.0266701398, 0.0020537489), -0.0226862335), if (fieldMatch(text).importance < 0.4997234941, if (attribute(yst_link_array_size) < 5.03E-4, 0.0166948856, -0.0034104232), if (age(created_at) < 270.0, 0.0057923479, if (fieldMatch(text).importance < 0.499758482, -0.0291936745, -0.0059528701)))), if (term(0).significance < 0.8512874842, -0.0436401448, if (age(created_at) < 1830.0, 0.0039537575, -0.0025333564))), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 158.5, if (attribute(yst_reply_auth) < 149.5, if (term(2).significance < 0.9389865398, -0.0024084291, if (term(2).significance < 0.9853805304, 0.0179593679, 0.0053348502)), 0.0511133688), -0.0040868819), if (attribute(user_followers_count) < 172.5, -0.0015268821, 0.001811508))) + +if (fieldMatch(text) < 0.4593589902, if (term(0).significance < 0.9975925088, if (fieldMatch(text).occurrence < 0.1188234985, if (fieldMatch(text).earliness < 0.9486839771, if (term(0).significance < 0.9817185402, -0.033583138, -0.0111131767), 0.0027474033), -0.0042505836), -3.239219E-4), if (fieldLength(text) < 9.5, if (term(1).significance < 0.996538043, if (fieldMatch(text).importance < 0.6660010219, 0.0283916092, -0.0014717607), -0.0092449117), if (age(created_at) < 1770.0, if (attribute(user_statuses_count) < 5.5, if (attribute(yst_link_array_size) < 1.2E-5, -0.0060483091, -0.0952850231), if (term(2).significance < 0.9389865398, if (attribute(user_statuses_count) < 27302.5, if (term(0).significance < 0.9943234921, -0.0055188147, 0.0033202683), if (attribute(yst_reply_auth) < 19.5, 0.0086451663, -0.0362288139)), 0.0064556248)), if (fieldMatch(text).occurrence < 0.078461498, -0.0044655704, if (fieldTermMatch(text,0).firstPosition < 0.5, 0.0052075545, 7.92364E-5))))) + +if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9936410189, -0.090488338, -0.0089960419), if (fieldMatch(text).importance < 0.4999005198, if (attribute(user_friends_count) < 7.5, 0.0105675969, -2.271753E-4), if (fieldMatch(text).importance < 0.4999030232, -0.0345873832, if (fieldMatch(text).earliness < 0.7165180445, if (term(1).significance < 0.9967604876, -0.0115456455, -0.0042372928), -0.0012917255)))), if (age(created_at) < 1830.0, if (attribute(yst_reply_auth) < 26.5, 0.0072943729, if (attribute(user_followers_count) < 3340.0, if (fieldMatch(text).importance < 0.6665514708, if (fieldMatch(text).significance < 0.6661305428, -0.0013967805, -0.0212892006), if (attribute(yst_reply_auth) < 48.5, -0.0095375798, if (fieldMatch(text).importance < 0.7498970032, 0.015502273, -0.0026741211))), 0.0188293335)), if (attribute(user_followers_count) < 72.5, -0.0024566452, if (attribute(yst_reply_auth) < 50.5, 0.0027297795, -0.0018218561)))) + +if (fieldMatch(text).earliness < 0.929802537, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(user_name).importance < 0.3325359821, if (term(1).significance < 0.997274518, -0.0074074056, if (attribute(user_statuses_count) < 5.5, if (fieldMatch(text) < 0.8386005163, 0.0032054405, -0.0402836718), if (fieldMatch(text).earliness < 0.6381819844, -0.0037883, 8.924796E-4))), if (attribute(user_statuses_count) < 3055.5, 0.0281949081, -0.0048301755)), if (age(created_at) < 1770.0, 0.0037513988, if (attribute(yst_reply_auth) < 71.5, if (attribute(user_followers_count) < 68.5, -0.0032099164, if (fieldMatch(text).weightedOccurrence < 0.0510035008, if (term(2).significance < 0.9982124567, -0.0067952208, 0.0019990379), if (fieldTermMatch(text,1).firstPosition < 3.5, -0.0014223313, 0.0056390354))), -0.0052017914))), if (fieldMatch(text).tail < 8.5, if (fieldTermMatch(text,1).firstPosition < 21.5, 7.76589E-5, -0.0251022513), if (age(created_at) < 1770.0, 0.0107898472, 0.0035281034))) + +if (fieldMatch(text) < 0.4136639833, if (fieldMatch(text).earliness < 0.6228449941, -0.0108022756, if (term(0).significance < 0.98062253, if (fieldMatch(text).importance < 0.739367485, if (term(1).significance < 0.9957709908, -0.0755310801, -0.0197330906), -0.0036348641), -0.0013106391)), if (fieldLength(text) < 9.5, if (term(1).significance < 0.9965360165, 0.0028564297, -0.0093397644), if (fieldMatch(text).earliness < 0.9321835041, if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, if (attribute(user_followers_count) < 40.5, if (fieldMatch(text).importance < 0.7464824915, if (fieldMatch(text).importance < 0.7373905182, -0.0053736193, -0.0642881769), 0.0068053454), if (term(0).significance < 0.9995554686, if (fieldMatch(text).tail < 21.5, 0.0051189016, -0.0221854397), -0.005375067)), if (term(2).significance < 0.9946069717, -0.003229661, if (attribute(yst_link_array_size) < 0.0122835003, 0.0019118484, -0.0049101186))), -0.010971348), 0.0042358084))) + +if (attribute(ythl) < 0.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0501539771, if (attribute(user_statuses_count) < 8152.0, if (attribute(user_followers_count) < 443.5, -7.68504E-4, if (attribute(yst_link_array_size) < 0.0250005014, if (fieldMatch(text).importance < 0.6649650335, -0.0040469549, 0.0165434132), if (attribute(user_followers_count) < 1371.0, 0.0529050928, 0.0097057892))), -0.0074806913)), if (fieldLength(text) < 14.5, -0.0075857569, if (fieldMatch(text).significantOccurrence < 0.0476144999, -0.0052872985, if (attribute(yst_reply_auth) < 33.5, 0.0021247688, -0.004326499)))), if (fieldTermMatch(text,1).firstPosition < 12.5, if (fieldMatch(text) < 0.5566675067, -0.0012160193, 0.0034883449), if (attribute(yst_reply_auth) < 20.5, if (attribute(user_followers_count) < 213.5, -0.0026673084, 0.0029249608), if (attribute(user_statuses_count) < 7554.0, -0.0021650101, if (term(0).significance < 0.9956585169, -0.0326924993, -0.0081739014))))) + +if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9981595278, -0.0839195878, -0.0112331884), if (fieldMatch(text).importance < 0.4998250008, if (term(0).significance < 0.9983350039, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, -0.0037133544, -0.1248149534), if (fieldMatch(text).importance < 0.4997234941, 0.0058967543, if (attribute(yst_reply_auth) < 0.5, -0.0271620138, 2.819878E-4))), 0.0163052773), if (fieldTermMatch(text,0).firstPosition < 5.5, -9.410187E-4, if (fieldMatch(text).importance < 0.666454494, -0.0070269578, if (term(2).significance < 0.9931030273, -0.0096096659, 0.0015413452))))), if (attribute(user_followers_count) < 520.5, if (attribute(yst_reply_auth) < 8.5, if (fieldMatch(text).significantOccurrence < 0.0363755003, -0.0183942755, if (age(created_at) < 1710.0, 0.0052904688, 3.556613E-4)), -0.0034358951), if (attribute(user_followers_count) < 534.5, 0.0246325003, 0.0032783956))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text) < 0.4860935211, if (term(1).significance < 0.9882720113, -0.0113150549, -0.003513259), if (attribute(user_statuses_count) < 14534.0, if (attribute(user_followers_count) < 457.5, -0.0013098462, if (term(4).significance < 0.9912315011, 0.0071257515, -0.0107882556)), -0.0080098717)), if (fieldMatch(text).occurrence < 0.1348485053, if (term(1).significance < 0.9926555157, if (attribute(yst_link_array_size) < 6.15E-5, if (fieldTermMatch(text,0).firstPosition < 7.5, -0.0054509513, -0.0267164116), -2.251203E-4), if (fieldMatch(text).completeness < 0.9520415068, -0.0121998182, 6.584783E-4)), if (age(created_at) < 1770.0, 0.0078526654, if (fieldTermMatch(text,3).firstPosition < 2.5, 0.010815374, if (term(2).significance < 0.992915988, if (term(2).significance < 0.9923814535, -0.0011187617, if (fieldMatch(text).completeness < 0.9579474926, -0.0550616595, -0.0034789409)), if (attribute(yst_tweet_adult_score) < 0.5, 0.0029410626, -0.0116671785)))))) + +if (attribute(yst_tweet_language) < 3243.5, if (age(created_at) < 1830.0, if (term(0).significance < 0.9725670218, if (fieldMatch(text) < 0.6660829782, -0.0173866153, -0.001017438), if (attribute(yst_reply_auth) < 355.5, if (term(0).significance < 0.9816665053, 0.0179367183, if (fieldMatch(text).importance < 0.749382019, if (fieldMatch(text).tail < 6.5, if (attribute(user_friends_count) < 560.5, if (fieldMatch(text) < 0.8736619949, 0.0023509846, -0.0157312448), if (fieldMatch(text) < 0.1418584883, -0.0659559738, -0.0072510736)), 0.004454443), 0.008319561)), if (attribute(yst_link_array_size) < 0.0586175025, -0.0091289813, 0.0114658081))), if (fieldMatch(text).importance < 0.666454494, if (fieldMatch(user_name).fieldCompleteness < 0.5833334923, -0.0041143634, 0.0401025109), if (term(2).significance < 0.9991624951, if (fieldMatch(text) < 0.5540195107, -0.0037827224, 1.685363E-4), if (fieldLength(text) < 8.5, -0.0122620665, 0.0037138353)))), -0.0101985628) + +if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, if (age(created_at) < 690.0, -0.0256320594, 0.0079996205), -0.1224294269), if (attribute(yst_tweet_language) < 3243.5, if (fieldMatch(text).importance < 0.7491755486, if (term(0).significance < 0.9184160233, if (term(2).significance < 0.9980159998, -0.0525545375, 0.0023785461), if (fieldMatch(text).importance < 0.7490389943, if (attribute(user_followers_count) < 787.5, 0.0010999135, if (fieldMatch(text).importance < 0.499848485, 0.0162069505, 0.0035170311)), -0.0287539558)), 0.0072807979), -0.0106468395)), if (fieldMatch(text).importance < 0.6664534807, -0.0041764714, if (term(0).significance < 0.9954395294, if (fieldMatch(user_name).significantOccurrence < 0.2916665077, if (fieldMatch(text).importance < 0.8318179846, -0.0042118878, 0.0043649147), 0.0158696258), if (term(1).significance < 0.9986659884, if (fieldMatch(text).tail < 1.5, -0.0046863462, 0.0042419546), -8.82831E-4)))) + +if (age(created_at) < 1830.0, if (attribute(user_statuses_count) < 3.5, -0.0248705295, if (attribute(yst_reply_auth) < 247.5, if (attribute(user_followers_count) < 97.5, if (attribute(yst_link_array_size) < 7.835E-4, if (fieldLength(text) < 27.5, if (fieldMatch(text).significantOccurrence < 0.0591179989, 0.007083232, if (fieldTermMatch(text,1).firstPosition < 1.5, 0.0146283297, -0.0028201578)), if (attribute(user_followers_count) < 23.5, -0.0472201281, 8.078028E-4)), if (fieldTermMatch(text,1).firstPosition < 4.5, 0.0067052369, if (fieldMatch(text).earliness < 0.8221344948, -0.007636276, -0.0324826734))), if (fieldLength(text) < 10.5, -0.0052857206, 0.0059849079)), if (attribute(user_followers_count) < 1692.5, -0.0091360049, 0.0091123239))), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.6825754642, if (term(0).significance < 0.9944700003, -0.014802095, -0.0053042509), if (fieldMatch(text).significantOccurrence < 0.1249970049, -0.0014558335, -0.0200174998)), -8.72652E-5)) + +if (attribute(yst_reply_auth) < 26.5, if (attribute(user_followers_count) < 86.5, if (fieldMatch(text).significantOccurrence < 0.1216785014, -6.414838E-4, -0.0099978879), if (fieldMatch(text) < 0.843991518, if (fieldMatch(text).absoluteOccurrence < 0.0126785003, if (fieldMatch(text).importance < 0.6664454937, if (age(created_at) < 5400.0, -9.786234E-4, -0.010880796), 4.683724E-4), if (fieldTermMatch(text,1).firstPosition < 3.5, -0.0013491196, 0.0100488776)), if (fieldMatch(text).importance < 0.4989485145, -0.018038959, if (age(created_at) < 1770.0, if (term(0).significance < 0.9986659884, if (fieldMatch(text).earliness < 0.6099034548, 0.018822136, 0.0069414922), 0.0010695341), if (attribute(user_statuses_count) < 728.0, 0.0078644585, 0.0013385568))))), if (attribute(user_followers_count) < 1995.0, -0.0037600829, if (term(0).significance < 0.998976469, if (age(created_at) < 2310.0, if (attribute(yst_reply_auth) < 38.5, -0.0117528259, 0.0193329084), 0.0028282077), -0.002709802))) + +if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8512874842, if (term(2).significance < 0.9981595278, -0.0893712611, 0.0139821391), if (fieldMatch(text).absoluteOccurrence < 0.0129164997, if (fieldMatch(user_name).fieldCompleteness < 0.5833334923, if (fieldMatch(text).importance < 0.4999005198, if (fieldMatch(text).importance < 0.4992579818, -0.0088299338, if (fieldLength(text) < 24.5, 0.0031375211, -0.0078301854)), if (fieldMatch(text).importance < 0.4999030232, -0.0514023475, if (fieldMatch(text).earliness < 0.6079194546, -0.0064381419, if (fieldMatch(text) < 0.8824554682, -0.001304103, -0.0302990737)))), 0.0378075574), 0.0037149737)), if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).absoluteProximity < 0.0573749989, -0.0037219953, if (attribute(user_followers_count) < 719.5, if (fieldMatch(text).importance < 0.6660234928, if (term(1).significance < 0.9864724874, -0.0057017615, 0.0142744959), if (fieldMatch(text).importance < 0.6664794683, -0.0033669884, 0.0011748423)), 0.0048833724)), -0.0235368129)) + +if (fieldMatch(text).tail < 3.5, if (term(0).significance < 0.9849029779, -0.0096533539, if (age(created_at) < 210.0, 0.0086447306, if (term(3).significance < 0.9972594976, -0.0049080669, 0.0019732467))), if (attribute(user_followers_count) < 682.5, if (attribute(yst_reply_auth) < 91.5, if (attribute(yst_tweet_language) < 3243.5, if (term(4).significance < 0.9962199926, if (fieldMatch(text).tail < 11.5, if (term(1).significance < 0.9847429991, if (fieldMatch(text).importance < 0.7463389635, if (term(0).significance < 0.9929184914, -0.0337962464, -0.0042731663), -0.0018179748), -5.816172E-4), if (age(created_at) < 1710.0, 0.0058304594, -1.954122E-4)), if (term(1).significance < 0.8054080009, 0.020695941, 0.0029922212)), -0.010391704), if (attribute(yst_link_array_size) < 0.009443, -0.0102022704, if (attribute(user_friends_count) < 89.0, 0.0214038413, -0.003022702))), if (fieldMatch(text).significantOccurrence < 0.0556650013, 9.656227E-4, 0.0055771237))) + +if (attribute(ythl) < 0.5, if (fieldMatch(text).importance < 0.4997234941, if (fieldMatch(text).importance < 0.4988809824, -0.0156475694, if (fieldMatch(text).importance < 0.4988874793, 0.0871791947, if (fieldMatch(text).importance < 0.4997065067, 0.005454559, if (term(0).significance < 0.9976885319, 0.1581759963, 0.0057478578)))), if (fieldMatch(text).importance < 0.4997634888, -0.0264810886, if (fieldMatch(text).importance < 0.4998250008, 0.0106354371, if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).earliness < 0.701851964, -0.0083100057, -0.0018931553), if (attribute(user_followers_count) < 701.0, -0.0020476113, 0.0053824373))))), if (fieldMatch(text) < 0.4141010046, if (fieldMatch(user_name) < 0.3179910183, if (term(0).significance < 0.9793410301, if (term(2).significance < 0.9970530272, -0.0054897109, if (fieldMatch(text).importance < 0.6620055437, 0.0060602187, -0.0517823718)), -0.0021590151), 0.0298259094), if (fieldMatch(text).significantOccurrence < 0.0392310023, -0.0063084285, 0.0020013986))) + +if (attribute(yst_reply_auth) < 236.5, if (age(created_at) < 1830.0, if (term(1).significance < 0.7788045406, 0.0479141837, if (term(0).significance < 0.9139549732, if (fieldMatch(text).longestSequenceRatio < 0.4166665077, -0.0732771007, -0.0090569203), if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, -0.0042825791, -0.1098205261), if (attribute(user_followers_count) < 98.5, if (attribute(user_statuses_count) < 1260.5, if (match < 0.6491410136, -0.011081957, if (term(0).significance < 0.9986954927, 0.0051994695, -0.0019674695)), -0.0076605248), if (attribute(user_statuses_count) < 5.5, -0.1055265232, if (fieldMatch(text).earliness < 0.3726850152, -7.649567E-4, if (fieldMatch(text).occurrence < 0.1519230008, 0.0078900808, if (attribute(user_followers_count) < 583.5, -0.0022611347, 0.0078521247)))))))), if (fieldMatch(text).importance < 0.6664505005, -0.0037578539, if (fieldMatch(text).significantOccurrence < 0.0386575013, -0.0059854357, 0.0010571345))), -0.0046469325) + +if (age(created_at) < 1830.0, if (fieldMatch(text).importance < 0.7467460036, if (fieldMatch(text).tail < 7.5, if (term(0).significance < 0.8547105193, if (term(2).significance < 0.9980159998, -0.0945680172, 0.0133542), if (term(0).significance < 0.9995139837, -9.1115E-6, if (term(0).significance < 0.9996379614, if (fieldMatch(text).tail < 6.5, if (attribute(yst_link_array_size) < 0.0028985001, -0.0033284425, -0.0414096377), -0.069049086), 0.0066873893))), if (attribute(user_friends_count) < 179.5, 0.0013706096, 0.0065517887)), if (attribute(yst_tweet_language) < 3243.5, 0.0068726824, -0.03635308)), if (fieldMatch(text).longestSequence < 1.5, if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (fieldMatch(user_name).importance < 0.4999729991, -0.0053854995, 0.0353914791), if (attribute(user_friends_count) < 4181.0, 0.0041747763, -0.0313684077)), if (fieldMatch(text) < 0.5549424887, -0.0034986093, if (match < 0.7940984964, 0.0068157141, 5.80831E-4)))) + +if (fieldMatch(text).importance < 0.4989485145, if (fieldMatch(text).importance < 0.4988874793, if (term(0).significance < 0.991086483, if (fieldMatch(text).importance < 0.4985739887, -0.0080603652, -0.0525354118), 0.030992111), -0.1167737387), if (age(created_at) < 1830.0, if (term(0).significance < 0.973123014, if (term(1).significance < 0.9992040396, if (term(1).significance < 0.9977560043, if (fieldMatch(text) < 0.5470744967, if (fieldMatch(text).importance < 0.7365344763, -0.075573942, -0.0140588177), -0.0021536645), 0.0179729341), -0.0499824746), if (term(0).significance < 0.9751809835, 0.0490509939, if (attribute(yst_reply_auth) < 20.5, if (attribute(user_followers_count) < 97.5, 0.0015097125, if (attribute(user_statuses_count) < 5.5, -0.09483518, 0.0071205807)), if (attribute(yst_link_array_size) < 0.0028940002, -0.0084126298, 0.0022449562)))), if (fieldMatch(text).importance < 0.6664534807, -0.0042838031, if (term(0).significance < 0.9982124567, -0.0016253125, 0.0026475634)))) + +if (fieldMatch(text).longestSequence < 1.5, if (fieldTermMatch(text,0).firstPosition < 2.5, if (fieldMatch(text).importance < 0.498980999, -0.0234510876, 0.0020809399), if (attribute(yst_reply_auth) < 302.5, if (fieldMatch(text).importance < 0.666454494, if (fieldMatch(text).importance < 0.4998220205, if (attribute(yst_reply_auth) < 42.5, 0.0050751752, -0.0083413352), if (fieldMatch(text).absoluteOccurrence < 0.0124999993, if (age(created_at) < 30600.0, if (fieldMatch(text).absoluteProximity < 0.0125000002, 0.0047520251, if (term(1).significance < 0.988929987, -0.0182384871, -0.0050169041)), -0.0135494985), 0.0043484195)), if (term(0).significance < 0.9941140413, -0.005205476, 0.0033577205)), -0.0100142174)), if (age(created_at) < 1770.0, if (fieldMatch(text) < 0.8529180288, 5.433753E-4, 0.0060936539), if (fieldMatch(text).earliness < 0.929802537, if (fieldLength(text) < 29.5, -5.482093E-4, -0.0264980545), if (fieldMatch(text).occurrence < 0.2290209979, 0.0068625219, -5.992389E-4)))) + +if (fieldTermMatch(text,2).firstPosition < 13.5, if (term(2).significance < 0.9519284964, -0.0041815526, if (age(created_at) < 5400.0, if (fieldMatch(text).significance < 0.7492735386, if (fieldMatch(text).tail < 20.5, 0.0019880057, 0.0345569075), 0.008782237), if (term(2).significance < 0.9934439659, if (term(2).significance < 0.99218297, 4.470985E-4, -0.0109745339), 0.0025543252))), if (attribute(yst_reply_auth) < 22.5, if (term(1).significance < 0.9951915145, if (fieldMatch(text).fieldCompleteness < 0.0816664994, -0.0140486512, -0.0020108004), if (fieldMatch(user_name).head < 0.5, if (attribute(yst_tweet_language) < 3243.5, if (fieldLength(text) < 8.5, -0.0088342224, 0.0012278464), -0.0093805182), 0.0236651466)), if (attribute(user_followers_count) < 125.5, if (age(created_at) < 270.0, if (term(0).significance < 0.9985035062, 0.0238630955, -0.018749544), -0.0107232535), if (attribute(yst_reply_auth) < 813.0, -0.0012233981, -0.0114833082)))) + +if (fieldMatch(text).absoluteOccurrence < 0.0122500006, if (fieldMatch(text).earliness < 0.8596060276, if (attribute(yst_reply_auth) < 302.5, if (fieldMatch(user_name).completeness < 0.9791665077, if (fieldMatch(text).longestSequence < 1.5, if (age(created_at) < 1830.0, -8.288048E-4, if (fieldMatch(text).importance < 0.6664404869, if (fieldMatch(text).fieldCompleteness < 0.0976189971, -0.0053357392, -0.013793688), if (fieldMatch(text).occurrence < 0.1558704972, 0.002492224, -0.0089043788))), if (fieldMatch(text).weightedOccurrence < 0.0385860018, -0.0071451431, 5.167991E-4)), 0.0407753689), if (attribute(user_followers_count) < 1721.5, -0.0105255162, 0.0015551667)), if (fieldLength(text) < 8.5, -0.0088276495, if (fieldTermMatch(text,0).firstPosition < 15.5, 0.0024878063, -0.0103987822))), if (attribute(user_followers_count) < 27.5, if (fieldTermMatch(text,1).firstPosition < 17.5, if (term(0).significance < 0.9853284955, -0.0219723254, -0.0024166886), 0.018923986), if (attribute(user_statuses_count) < 93.5, 0.0212835416, 0.0045698024))) + +if (fieldMatch(text).longestSequence < 1.5, if (term(0).significance < 0.8509274721, if (term(2).significance < 0.9981595278, -0.0629348018, -0.0067077117), if (fieldMatch(text).significance < 0.7493325472, if (fieldMatch(text).importance < 0.4999189973, if (fieldMatch(text).importance < 0.4999135137, if (fieldMatch(text).importance < 0.4999005198, if (fieldMatch(text).importance < 0.4997529984, -0.0025494044, if (fieldMatch(text).importance < 0.4998250008, 0.010539188, if (fieldMatch(text).occurrence < 0.0425724983, -0.0160413841, 0.0020384205))), -0.0118667123), 0.019974214), if (fieldMatch(text) < 0.8826240301, if (fieldMatch(text).absoluteOccurrence < 0.0116665, if (fieldMatch(text).earliness < 0.6554945111, -0.0060372501, -0.0016907417), 0.0022069234), -0.0320846717)), 0.0077067193)), if (term(1).significance < 0.8159549832, 0.0094136687, if (attribute(yst_tweet_language) < 3587.5, if (fieldMatch(text).earliness < 0.929802537, 3.53609E-4, if (fieldMatch(text).fieldCompleteness < 0.1519230008, 0.0079910001, 9.053355E-4)), -0.0202098115))) + +if (age(created_at) < 60.0, 0.05, 0.0) + +if (age(created_at) < 120.0, 0.0125, 0.0) + +if (age(created_at) < 240.0, 0.0125, 0.0) + +if (age(created_at) < 360.0, 0.0125, 0.0) + +if (age(created_at) < 480.0, 0.0125, 0.0) + +if (age(created_at) < 600.0, 0.017, 0.0) + +if (age(created_at) < 1200.0, 0.017, 0.0) + +if (age(created_at) < 2400.0, 0.017, 0.0) + +if (age(created_at) < 3600.0, 0.025, 0.0) + +if (age(created_at) < 7200.0, 0.025, 0.0) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking03.expression b/searchlib/src/test/files/ranking03.expression new file mode 100644 index 00000000000..d6837511a28 --- /dev/null +++ b/searchlib/src/test/files/ranking03.expression @@ -0,0 +1,97 @@ +if (MAX_SCORE < 270055.0, if (MAX_SCORE < 241174.0, 0.234534, if (ISABSTRACT_AVG < 0.105, 0.248214, 0.239032)), if (ISABSTRACT_AVG < 0.13, if (DAY_HITS_FRAC < 0.765, if (MAX_SCORE < 347793.0, 0.258244, 0.268225), 0.271744), 0.247728)) + +if (MAX_SCORE < 270055.0, if (MAX_SCORE < 252585.0, -0.0118809, -0.00253128), if (ISABSTRACT_AVG < 0.21, if (DAY_LW_DAY_HITS_RATIO < 4.345, if (MAX_SCORE < 354461.0, 0.00546628, 0.0164708), 0.0188771), if (DAY_PREV_DAY_HITS_FRAC < 0.805, -0.0092059, 0.00324753))) + +if (MAX_SCORE < 270290.0, if (MAX_SCORE < 236242.0, -0.0121516, if (DAY_LW_DAY_HITS_RATIO < 3.45, -0.00767489, if (ISABSTRACT_AVG < 0.12, 0.00622939, -0.00488712))), if (ISABSTRACT_AVG < 0.105, if (WEEKAVG < 0.5, 0.00766953, 0.0156887), -7.73677E-4)) + +if (MAX_SCORE < 270061.0, if (MAX_SCORE < 238942.0, -0.0111281, if (DAY_LW_DAY_HITS_RATIO < 3.9, -0.00750282, 0.00220298)), if (ISABSTRACT_AVG < 0.105, if (DAY_HITS_FRAC < 0.795, if (MAX_SCORE < 348364.0, 0.00374845, 0.0131108), 0.0161683), -0.00111039)) + +if (MAX_SCORE < 270289.0, if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.0106179, if (ISTITLE_AVG < 0.705, 0.00251111, -0.0060076)), if (ISABSTRACT_AVG < 0.21, if (PUB_TODAY_AVG < 0.13, 0.00368181, if (ISTITLE_AVG < 0.845, 0.0126785, 0.00345445)), -0.00136004)) + +if (MAX_SCORE < 271459.0, if (MAX_SCORE < 238606.0, -0.00948395, if (ISABSTRACT_AVG < 0.105, if (DAY_LW_DAY_HITS_RATIO < 2.805, -0.00477034, 0.00664777), -0.00676399)), if (ISABSTRACT_AVG < 0.13, if (DAY_PREV_DAY_HITS_FRAC < 0.675, 0.00489261, 0.0122925), -9.20098E-4)) + +if (MAX_SCORE < 271407.0, if (DAY_LW_DAY_HITS_RATIO < 3.485, -0.00827195, if (NATIONALNEWS < 0.185, -0.00376713, if (MAX_SCORE < 245976.0, 3.52932E-4, 0.0160415))), if (ISTITLE_AVG < 0.705, if (DAY_PREV_DAY_HITS_FRAC < 0.675, 0.00314056, 0.0112222), 9.24328E-4)) + +if (MAX_SCORE < 253367.0, if (MAX_SCORE < 177746.0, -0.0118279, -0.00453188), if (ISABSTRACT_AVG < 0.105, if (DAY_LW_DAY_HITS_RATIO < 4.25, if (MAX_SCORE < 354461.0, -7.20492E-4, 0.00994136), 0.0104822), if (DAY_PREV_DAY_HITS_FRAC < 0.915, -0.00613264, 0.00119411))) + +if (MAX_SCORE < 270055.0, if (DAY_LW_DAY_HITS_RATIO < 4.635, if (MAX_SCORE < 221962.0, -0.00963481, -0.00428119), -3.41413E-4), if (ISTITLE_AVG < 0.565, if (MAX_SCORE < 354542.0, if (DAY_HITS < 1.5, -0.00205135, 0.00751225), 0.0111239), 2.2935E-4)) + +if (MAX_SCORE < 263726.0, if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.00728388, if (ISTITLE_AVG < 0.73, 0.00311214, -0.00320301)), if (ISABSTRACT_AVG < 0.105, if (WEEKAVG < 0.215, -0.00472856, if (DAY_LW_DAY_HITS_RATIO < 47.0, 0.00641873, 0.0215092)), -0.00106176)) + +if (MAX_SCORE < 263734.0, if (DAY_LW_DAY_HITS_RATIO < 3.635, -0.0061738, if (ISTITLE_AVG < 0.05, 0.00678624, -0.0034547)), if (ISABSTRACT_AVG < 0.105, if (LOCALNEWS < 0.105, 0.00405055, 0.00975544), if (DAY_PREV_DAY_HITS_FRAC < 0.905, -0.00538249, 0.00274471))) + +if (MAX_SCORE < 252459.0, -0.00466436, if (ISABSTRACT_AVG < 0.21, if (SUPERDUPER_AVG < 0.115, if (MAX_SCORE < 254916.0, 0.0300376, 0.00749701), if (PUB_TODAY_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.085, 0.0121226, -0.00799009), 0.00453286)), -0.00140668)) + +if (MAX_SCORE < 264515.0, if (DAY_PREV_DAY_HITS_FRAC < 0.725, -0.00554549, if (DAY_HITS < 30.5, -0.0016496, 0.0125357)), if (ISTITLE_AVG < 0.585, if (WEEKAVG < 0.5, 0.00111467, 0.0073944), if (MAX_SCORE < 356177.0, -0.00212114, 0.00636485))) + +if (DAY_PREV_DAY_HITS_FRAC < 0.825, if (PREV_DAY_HITS < 17.5, -0.00398853, 0.00429611), if (ISTITLE_AVG < 0.73, if (MIN_RANK < 9.0, if (DAY_HITS < 46.0, 0.00610951, 0.0271326), -0.00242626), if (MAX_SCORE < 374204.0, -0.00141569, 0.00890749))) + +if (MAX_SCORE < 249898.0, -0.00399302, if (BUSINESS < 0.315, if (ISTITLE_AVG < 0.73, if (SUPERDUPER_AVG < 0.105, 0.00904674, if (WEEKAVG < 5.5, if (PUB_TODAY_AVG < 0.13, -0.00769757, 0.00217607), 0.0110208)), -5.93102E-4), -0.00318209)) + +if (MAX_SCORE < 276408.0, if (DAY_LW_DAY_HITS_RATIO < 4.535, -0.00344589, if (LOCALNEWS < 0.53, -6.70599E-5, 0.0175562)), if (ISABSTRACT_AVG < 0.685, if (DAY_LW_DAY_HITS_RATIO < 33.5, if (LOCALNEWS < 0.115, 0.00202221, 0.00726641), 0.0142841), -0.00307504)) + +if (MAX_SCORE < 348857.0, if (DAY_PREV_DAY_HITS_FRAC < 0.725, -0.00419409, if (PUB_TODAY_AVG < 0.185, -0.00386261, if (ISTITLE_AVG < 0.705, if (BUSINESS < 0.21, 0.0063503, if (DAY_HITS_FRAC < 0.555, -0.0102176, 0.00356215)), -4.78923E-4))), 0.00498293) + +if (MAX_SCORE < 286123.0, if (DAY_PD_HITS_RATIO < 48.0, if (ISTITLE_AVG < 0.61, if (MAX_RANK < 9.0, if (MAX_SCORE < 226208.0, -0.00741311, 0.0138247), if (AVG_RANK < 9.635, 3.60768E-4, -0.00785446)), -0.00329639), 0.0225017), 0.00337188) + +if (DAY_LW_DAY_HITS_RATIO < 7.25, if (BUSINESS < 0.05, if (ISTITLE_AVG < 0.895, if (MIN_RANK < 1.0, 0.0175483, 0.00215143), -0.00158754), if (DAY_WEEK_AVG_RATIO < 0.325, 0.0165492, if (MAX_SCORE < 448185.0, -0.00386364, 0.0131047))), 0.00394983) + +if (MAX_SCORE < 271407.0, if (MAX_SCORE < 177474.0, -0.00525936, if (SPORTS < 0.645, -0.00170921, 0.00426429)), if (DAY_PD_HITS_RATIO < 0.085, 0.0151019, if (BUSINESS < 0.645, if (LW_DAY_HITS < 2.5, 0.00244345, -0.0099429), -0.00501617))) + +if (DAY_PREV_DAY_HITS_FRAC < 0.945, if (ISTITLE_AVG < 0.95, if (BUSINESS < 0.235, 0.0017119, -0.00331729), -0.00374611), if (WEEKAVG < 0.215, -0.004784, if (DAY_PD_HITS_RATIO < 0.145, -0.00631232, if (BUSINESS < 0.685, 0.00413018, -0.00259307)))) + +if (PUB_TODAY_AVG < 0.87, if (BUSINESS < 0.235, if (ISTITLE_AVG < 0.39, if (WEEKAVG < 7.855, if (SUPERDUPER_AVG < 0.115, 0.00505563, -3.98588E-4), 0.017327), -0.00158729), -0.00349104), if (WEEKAVG < 0.36, -0.00122032, 0.00412986)) + +if (MAX_SCORE < 235342.0, -0.00255699, if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 0.215, -0.00736871, if (LOCALNEWS < 0.775, if (SUPERDUPER_AVG < 0.315, 0.00356548, if (WEEKAVG < 7.07, -0.00254331, 0.00782112)), 0.0187086)), -0.0015245)) + +if (DAY_PREV_DAY_HITS_FRAC < 0.825, -0.0017775, if (WEEKAVG < 0.36, -0.00236106, if (MAX_MIN_RANK < 3.0, if (PUB_TODAY_AVG < 0.27, -0.00108329, if (DAY_WEEK_AVG_RATIO < 2.615, if (MAX_SCORE < 248412.0, 0.00662755, 0.0252786), 0.00570542)), 0.00183161))) + +if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (MAX_SCORE < 453346.0, if (PREV_DAY_HITS < 15.5, -0.00263045, 0.00362606), 0.0113911), if (DAY_LW_DAY_HITS_RATIO < 47.0, if (MAX_SCORE < 214610.0, -0.00305392, if (PREV_DAY_HITS < 46.5, 0.00171595, 0.0157708)), 0.0123294)) + +if (PUB_TODAY_AVG < 0.815, if (BUSINESS < 0.05, if (PUB_TODAY_AVG < 0.155, -0.00239543, if (PREV_DAY_HITS < 0.5, -0.00219916, 0.00267906)), -0.00274426), if (ISTITLE_AVG < 0.95, if (DAY_PD_HITS_RATIO < 1.445, -0.012251, 0.00474059), -6.50252E-4)) + +if (ISABSTRACT_AVG < 0.105, if (MAX_SCORE < 235080.0, -0.00337944, if (SUPERDUPER_AVG < 0.105, if (BUSINESS < 0.435, if (MAX_SCORE < 293262.0, 0.00942708, 0.00296784), -0.00165307), if (MAX_SCORE < 262829.0, -0.00745914, 0.0011197))), -0.0017808) + +if (MAX_SCORE < 347080.0, if (DAY_LW_DAY_HITS_RATIO < 4.31, if (NATIONALNEWS < 0.295, -0.00181733, 0.00242649), if (MAX_SCORE < 313528.0, if (LOCALNEWS < 0.53, if (TOPSTORY < 0.355, 0.00109569, 0.00947164), 0.0165664), -0.00846682)), 0.00293581) + +if (MAX_SCORE < 177806.0, -0.00360187, if (TOPSTORY < 0.295, if (LOCALNEWS < 0.765, 4.80638E-6, if (ISTITLE_AVG < 0.29, 0.0164568, 0.00112041)), if (INTLNEWS < 0.355, 0.00153933, if (WEEKAVG < 0.36, -0.00129083, 0.0150131)))) + +if (ISTITLE_AVG < 0.73, if (BUSINESS < 0.27, if (MAX_MIN_RANK < 9.0, if (SUPERDUPER_AVG < 0.315, if (PREV_DAY_HITS < 17.5, 0.00272769, 0.0136338), -3.41266E-4), 0.0154743), if (NATIONALNEWS < 0.21, -0.0029607, 0.0128593)), -0.00131249) + +if (DAY_LW_DAY_HITS_RATIO < 7.585, if (MAX_SCORE < 424137.0, if (DAY_WEEK_AVG_RATIO < 4.78, -8.74675E-4, -0.0111332), if (ENTERTAINMENT < 0.12, if (DAY_LW_DAY_HITS_RATIO < 2.5, if (DAY_WEEK_AVG_RATIO < 0.74, 0.00976536, 0.0306272), 0.00520021), 8.67293E-5)), 0.00333736) + +if (DAY_PD_HITS_RATIO < 0.085, 0.00972107, if (SPORTS < 0.845, if (PUB_TODAY_AVG < 0.95, if (PREV_DAY_HITS < 0.5, -0.00391231, -2.54135E-4), if (MAX_MIN_RANK < 7.0, if (DAY_LW_DAY_HITS_RATIO < 19.5, -1.75771E-4, 0.00523989), 0.0109531)), 0.00548354)) + +if (MAX_SCORE < 466894.0, if (NATIONALNEWS < 0.21, if (DAY_PD_HITS_RATIO < 0.055, 0.0159556, if (REGIONALNEWS < 0.05, -0.00112302, if (DAY_PD_HITS_RATIO < 1.105, 0.0140125, -7.24566E-4))), if (AVG_RANK < 8.1, -2.73744E-4, 0.00546871)), 0.00555251) + +if (MAX_SCORE < 286123.0, -8.23047E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.435, if (DAY_WEEK_AVG_RATIO < 1.555, 0.0222361, 1.89447E-4), if (MAX_MIN_RANK < 7.0, if (DAY_PREV_DAY_HITS_FRAC < 0.755, -0.00264164, 0.00200226), if (WEEKAVG < 1.07, 0.0151659, 0.00171852)))) + +if (BUSINESS < 0.05, if (SUPERDUPER_AVG < 0.115, if (ISTITLE_AVG < 0.895, 0.00536839, 7.53571E-5), if (AVG_RANK < 8.21, if (PUB_TODAY_AVG < 0.13, -0.00865216, if (DAY_HITS_FRAC < 0.115, 0.0146316, -0.00249)), 0.00159523)), -0.00131884) + +if (DAY_LW_DAY_HITS_RATIO < 33.5, if (LIFESTYLE < 0.05, if (DAY_PD_HITS_RATIO < 0.065, 0.0130228, if (SPORTS < 0.39, if (NATIONALNEWS < 0.05, -0.00128992, 0.00127302), if (DAY_PD_HITS_RATIO < 13.5, 0.00304904, -0.0168329))), -0.00545277), 0.00512552) + +if (DAY_HITS_FRAC < 0.765, -5.27346E-4, if (PUB_TODAY_AVG < 0.355, -0.0153305, if (DAY_HITS < 46.5, if (DAY_PD_HITS_RATIO < 29.5, if (NATIONALNEWS < 0.105, 7.3747E-4, if (DAY_WEEK_AVG_RATIO < 8.47, 0.00769293, -0.0125825)), -0.0108761), 0.00977691))) + +if (MAX_SCORE < 177732.0, -0.00260643, if (BUSINESS < 0.05, if (WEEKAVG < 0.215, -0.00327106, if (AVG_RANK < 8.635, if (SUPERDUPER_AVG < 0.235, if (ISABSTRACT_AVG < 0.415, 0.00414333, -0.00152725), -0.00286672), 0.00429432)), -4.07557E-4)) + +if (WEEKAVG < 0.64, if (SUPERDUPER_AVG < 0.29, -1.3784E-4, -0.00368109), if (MAX_SCORE < 271407.0, if (MAX_MIN_RANK < 5.0, 9.85637E-4, if (SUPERDUPER_AVG < 0.115, if (DAY_LW_DAY_HITS_RATIO < 4.415, -0.00258674, 0.00694569), -0.00593057)), 0.00237623)) + +if (MAX_SCORE < 177732.0, -0.00248172, if (LIFESTYLE < 0.13, if (PUB_TODAY_AVG < 0.105, if (DAY_HITS < 3.5, -7.2429E-4, -0.0150678), if (DAY_HITS_FRAC < 0.075, 0.0156611, if (BUSINESS < 0.05, 0.00219968, -3.65826E-4))), -0.00592673)) + +if (WEEKAVG < 0.215, -0.0047613, if (SPORTS < 0.355, if (MIN_RANK < 5.0, if (MAX_SCORE < 467877.0, -0.0025312, if (MAX_SCORE < 576366.0, 0.0134173, -0.00903108)), 3.0441E-4), if (WEEKAVG < 5.07, 0.00170865, 0.0116233))) + +if (WEEKAVG < 0.215, -0.00445856, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (MAX_SCORE < 459781.0, -0.00110273, 0.01224), if (SPORTS < 0.87, if (NATIONALNEWS < 0.05, 1.76374E-4, if (BUSINESS < 0.185, 0.00101462, 0.0103262)), 0.00758848))) + +if (MAX_SCORE < 588664.0, if (MAX_SCORE < 453568.0, if (PREV_DAY_HITS < 26.5, -7.786E-5, if (WEEKAVG < 9.215, -0.012221, -0.00126183)), if (DAY_PREV_DAY_HITS_FRAC < 0.555, 0.0175351, if (AVG_RANK < 9.7, 7.47189E-5, 0.0152525))), -0.0113374) + +if (TOPSTORY < 0.295, -3.12071E-4, if (MAX_MIN_RANK < 7.0, if (ISTITLE_AVG < 0.185, if (MAX_SCORE < 378124.0, 0.00111897, if (MAX_SCORE < 408027.0, -0.0203516, 0.0012991)), if (INTLNEWS < 0.13, 7.74937E-4, 0.00732047)), 0.0117253)) + +if (MAX_SCORE < 178085.0, -0.00220705, if (WEEKAVG < 6.64, if (SUPERDUPER_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.115, 0.0149532, 0.00106296), if (AVG_RANK < 7.73, if (PUB_TODAY_AVG < 0.13, -0.0104993, -0.00177497), 2.5899E-7)), 0.00418893)) + +if (ISTITLE_AVG < 0.585, if (AVG_RANK < 8.47, 0.00253086, if (DAY_HITS_FRAC < 0.885, if (LOCALNEWS < 0.13, -8.94801E-4, -0.00988189), if (NATIONALNEWS < 0.11, if (MAX_SCORE < 282066.0, 0.00689219, -0.00300841), 0.0150141))), -0.00102603) + +if (DAY_PREV_DAY_HITS_FRAC < 0.985, -8.43826E-4, if (MIN_RANK < 9.0, if (AVG_RANK < 8.71, 4.51436E-4, if (SUPERDUPER_AVG < 0.27, if (DAY_WEEK_AVG_RATIO < 5.05, 0.00639888, 0.020614), if (NATIONALNEWS < 0.185, -0.00149465, 0.0118779))), -0.00241922)) + +if (HEALTH < 0.105, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (INTLNEWS < 0.315, 0.00140618, 0.0145332), -0.00130877), if (WEEKAVG < 0.5, -6.96011E-4, if (MAX_MIN_RANK < 5.0, 0.0028215, -9.0855E-4))), -0.0059993) + +if (SPORTS < 0.585, if (TOPSTORY < 0.295, -3.94764E-4, if (ENTERTAINMENT < 0.05, 0.00143724, 0.00930005)), if (AVG_RANK < 5.55, if (DAY_WEEK_AVG_RATIO < 1.955, 0.00377635, 0.0210534), if (MAX_SCORE < 389202.0, 0.00246072, -0.012129))) + +if (MAX_SCORE < 406793.0, if (DAY_PD_HITS_RATIO < 0.075, 0.0102381, if (PUB_TODAY_AVG < 0.295, -0.00209613, if (MAX_SCORE < 305867.0, 9.38554E-4, if (MAX_SCORE < 347812.0, -0.00625349, -3.2361E-5)))), if (PREV_DAY_HITS < 17.5, 0.00426042, -0.0139803)) + +if (MAX_SCORE < 187757.0, -0.00190196, if (NATIONALNEWS < 0.185, 1.08423E-5, if (PREV_DAY_HITS < 4.5, if (ISTITLE_AVG < 0.585, 0.00687766, 0.00142303), if (SUPERDUPER_AVG < 0.275, -0.00581088, if (INTLNEWS < 0.315, 0.0130163, -0.00562813))))) + +if (MAX_SCORE < 423724.0, if (MAX_SCORE < 408911.0, -3.03869E-4, -0.00754368), if (MAX_SCORE < 435668.0, 0.0194021, if (DAY_HITS < 5.5, if (AVG_RANK < 9.265, 0.00209562, 0.0171146), if (MAX_SCORE < 466889.0, -0.0147582, 0.00191369)))) + +if (PREV_DAY_HITS < 26.5, if (PREV_DAY_HITS < 19.5, 4.07731E-6, if (ISTITLE_AVG < 0.7, 0.0180989, 0.0014322)), if (MAX_SCORE < 378124.0, if (INTLNEWS < 0.25, -0.00926901, if (ISTITLE_AVG < 0.15, 0.00951019, -0.00389496)), -0.0168153)) + +if (ISABSTRACT_AVG < 0.815, if (PUB_TODAY_AVG < 0.05, if (PREV_DAY_HITS < 16.5, -0.00256108, 0.010687), if (BUSINESS < 0.05, 0.00107951, -0.00114831)), if (AVG_RANK < 8.31, -4.90289E-4, if (SPORTS < 0.315, 0.00273855, 0.0123011))) + +if (SUPERDUPER_AVG < 0.115, if (DAY_PD_HITS_RATIO < 0.115, 0.0119548, 4.25021E-4), if (INTLNEWS < 0.155, if (ISTITLE_AVG < 0.185, if (INTLNEWS < 0.05, -0.00395117, -0.0145832), -0.00135759), if (TOPSTORY < 0.295, -0.00119962, 0.00380053))) + +if (MAX_SCORE < 187608.0, -0.00129909, if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 9.235, if (DAY_PD_HITS_RATIO < 0.055, 0.0114518, if (PREV_DAY_HITS < 26.5, 9.65212E-4, if (LOCALNEWS < 0.05, -0.00805593, 0.00585007))), -0.0101744), 0.010206)) + +if (DAY_LW_DAY_HITS_RATIO < 4.71, if (SUPERDUPER_AVG < 0.315, -2.36511E-4, -0.00312389), if (DAY_PD_HITS_RATIO < 13.5, if (MAX_SCORE < 253372.0, -0.00118965, 0.00291415), if (ENTERTAINMENT < 0.05, if (SPORTS < 0.315, -0.00292663, -0.0194296), 0.00658386))) + +if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (WEEKAVG < 0.5, 0.0176531, 9.11096E-4), if (SPORTS < 0.39, -6.82618E-4, if (DAY_PD_HITS_RATIO < 11.5, if (DAY_LW_DAY_HITS_RATIO < 4.75, 5.16855E-4, if (DAY_HITS_FRAC < 0.41, 0.0159972, 0.00324363)), -0.0108331))) + +if (WEEKAVG < 0.36, -0.00257521, if (TOPSTORY < 0.635, if (PUB_TODAY_AVG < 0.79, if (DAY_PD_HITS_RATIO < 2.185, if (DAY_PREV_DAY_HITS_FRAC < 0.415, 0.00607155, -1.26015E-4), if (ISTITLE_AVG < 0.13, -0.00728662, -9.28754E-4)), 0.00147343), 0.014873)) + +if (TOPSTORY < 0.185, -2.97667E-4, if (DAY_LW_DAY_HITS_RATIO < 6.3, if (PREV_DAY_HITS < 19.5, if (DAY_PD_HITS_RATIO < 0.13, -0.00702476, if (LOCALNEWS < 0.05, 0.00592136, -7.83801E-4)), if (ISABSTRACT_AVG < 0.15, 0.023326, 8.03551E-4)), -0.00103664)) + +if (WEEKAVG < 0.215, -0.00379646, if (MAX_MIN_RANK < 3.0, if (DAY_HITS < 1.5, -0.00199037, if (DAY_PD_HITS_RATIO < 1.125, if (DAY_LW_DAY_HITS_RATIO < 4.375, 0.00535447, if (DAY_PD_HITS_RATIO < 0.825, 0.00562457, 0.0330072)), 0.00138881)), -7.58841E-4)) + +if (DAY_HITS_FRAC < 0.435, if (NATIONALNEWS < 0.685, if (INTLNEWS < 0.47, -2.35511E-4, if (MAX_SCORE < 290762.0, 6.19978E-4, if (SUPERDUPER_AVG < 0.155, 0.0182407, 0.00521312))), 0.0140779), if (MAX_SCORE < 484643.0, -5.18234E-4, -0.00804112)) + +if (DAY_PD_HITS_RATIO < 0.055, 0.0111333, if (DAY_LW_DAY_HITS_RATIO < 0.355, -0.00829529, if (PUB_TODAY_AVG < 0.95, if (DAY_PD_HITS_RATIO < 1.74, 5.29497E-4, if (NATIONALNEWS < 0.415, -0.00200727, 0.0081622)), if (INTLNEWS < 0.47, 0.00260098, -0.001284)))) + +if (MAX_MIN_RANK < 7.0, if (MAX_MIN_RANK < 5.0, 3.81058E-4, if (SUPERDUPER_AVG < 0.13, 3.29065E-4, -0.00386397)), if (MAX_SCORE < 266105.0, -5.80382E-4, if (MAX_SCORE < 322321.0, if (DAY_WEEK_AVG_RATIO < 5.0, 0.0158417, 2.53264E-4), 0.00216101))) + +if (WEEKAVG < 0.215, if (SUPERDUPER_AVG < 0.5, if (LOCALNEWS < 0.5, -0.00995113, 0.00292683), 0.00105182), if (DAY_PREV_DAY_HITS_FRAC < 0.725, -5.75584E-4, if (TOPSTORY < 0.585, if (SPORTS < 0.87, 7.83846E-4, 0.00745576), 0.0129932))) + +if (HEALTH < 0.115, if (DAY_PD_HITS_RATIO < 25.165, if (DAY_WEEK_AVG_RATIO < 10.115, if (DAY_PREV_DAY_HITS_FRAC < 0.405, 0.00322116, -1.4541E-4), 0.00878821), if (DAY_PREV_DAY_HITS_FRAC < 0.975, -0.0149181, -0.00209673)), if (MAX_SCORE < 286434.0, -0.00861656, 0.00142851)) + +if (LIFESTYLE < 0.185, if (MISC < 0.105, if (DAY_LW_DAY_HITS_RATIO < 0.925, if (MAX_SCORE < 273352.0, if (ISTITLE_AVG < 0.39, 0.00606893, -0.00394074), -0.012762), -8.2932E-5), 0.00878689), if (MAX_SCORE < 250603.0, -0.00131893, -0.0107682)) + +if (DAY_HITS < 13.5, 1.03863E-4, if (ENTERTAINMENT < 0.415, if (MIN_RANK < 7.0, if (DAY_PREV_DAY_HITS_FRAC < 0.875, if (MAX_SCORE < 261175.0, -0.00601924, 0.00517774), -0.00731704), if (ISABSTRACT_AVG < 0.685, -0.00243371, 0.0102497)), 0.0109447)) + +if (DAY_WEEK_AVG_RATIO < 4.855, if (DAY_WEEK_AVG_RATIO < 4.625, 3.35357E-4, 0.00823829), if (NATIONALNEWS < 0.39, if (ISABSTRACT_AVG < 0.295, if (INTLNEWS < 0.95, if (NATIONALNEWS < 0.155, -0.00576638, 0.00314375), 0.0086362), 0.00191928), -0.0129199)) + +if (WEEKAVG < 1.07, -6.88613E-4, if (INTLNEWS < 0.355, if (POLITICS < 0.05, if (PREV_DAY_HITS < 33.5, 8.33826E-4, -0.0106428), -0.0100621), if (DAY_HITS < 5.5, if (DAY_PD_HITS_RATIO < 0.105, -0.00557824, 0.0173808), 0.00181211))) + +if (PUB_TODAY_AVG < 0.815, -6.47154E-4, if (DAY_PD_HITS_RATIO < 1.53, -0.00676558, if (SCIENCE < 0.05, if (PREV_DAY_HITS < 5.5, if (SUPERDUPER_AVG < 0.27, if (ISTITLE_AVG < 0.95, 0.00474205, -0.00112826), -0.00194945), 0.00633626), 0.0126675))) + +if (MAX_SCORE < 347896.0, if (NUM_WORDS < 2.5, -8.59477E-5, -0.00464466), if (LOCALNEWS < 0.105, if (PREV_DAY_HITS < 17.5, 8.61947E-4, -0.00908692), if (SUPERDUPER_AVG < 0.415, if (SPORTS < 0.125, 0.00451276, 0.0182081), -0.0128104))) + +if (SCIENCE < 0.365, if (MAX_SCORE < 588664.0, if (SUPERDUPER_AVG < 0.115, 4.73474E-4, if (MAX_SCORE < 282998.0, -0.00203992, if (SCIENCE < 0.105, if (SPORTS < 0.465, 0.00173095, -0.00632811), -0.013829))), -0.0095913), -0.00990551) + +if (NATIONALNEWS < 0.105, -6.2577E-4, if (SPORTS < 0.13, if (DAY_WEEK_AVG_RATIO < 9.235, if (DAY_WEEK_AVG_RATIO < 0.505, 0.00990844, 2.4663E-4), -0.0117063), if (MAX_SCORE < 277259.0, if (DAY_WEEK_AVG_RATIO < 1.955, -0.00262119, 0.0102735), 0.0198781))) + +if (MAX_SCORE < 382346.0, if (MAX_SCORE < 378950.0, -2.01382E-4, -0.0116932), if (MAX_SCORE < 385719.0, 0.0202474, if (AVG_RANK < 8.27, if (PREV_DAY_HITS < 9.5, 3.03439E-4, -0.0119779), if (ENTERTAINMENT < 0.315, 0.00225595, 0.0201995)))) + +if (DAY_PD_HITS_RATIO < 47.0, if (DAY_PD_HITS_RATIO < 27.25, if (DAY_LW_DAY_HITS_RATIO < 31.5, if (LOCALNEWS < 0.765, -3.99432E-4, 0.00362509), if (DAY_WEEK_AVG_RATIO < 3.98, -0.00769823, if (DAY_WEEK_AVG_RATIO < 5.4, 0.018687, 0.00240302))), -0.00932172), 0.00902439) + +if (LW_DAY_HITS < 2.5, if (MIN_RANK < 7.0, -1.57235E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.555, if (INTLNEWS < 0.225, 0.00402381, 0.0196079), if (DAY_WEEK_AVG_RATIO < 6.01, if (DAY_WEEK_AVG_RATIO < 3.805, 2.36934E-4, 0.00609122), -0.0040371))), -0.00301446) + +if (DAY_PD_HITS_RATIO < 0.085, if (AVG_RANK < 8.9, 0.0124264, 1.8696E-4), if (PREV_DAY_HITS < 26.5, if (PREV_DAY_HITS < 19.5, -3.01051E-5, if (MAX_SCORE < 294152.0, 0.010709, -0.00331498)), if (WEEKAVG < 10.785, -0.0102542, -1.3417E-4))) + +if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 4.835, -7.25075E-5, if (ENTERTAINMENT < 0.05, if (ISABSTRACT_AVG < 0.83, if (INTLNEWS < 0.635, -0.00614185, if (MAX_MIN_RANK < 1.0, -0.00349054, 0.0109974)), 0.0043811), 0.00439437)), 0.00656011) + +if (INTLNEWS < 0.185, if (INTLNEWS < 0.115, if (DAY_PD_HITS_RATIO < 0.105, 0.016428, if (AVG_RANK < 8.365, if (ISTITLE_AVG < 0.435, if (DAY_PD_HITS_RATIO < 11.25, 0.00358374, -0.0162655), -7.99475E-4), -0.00198299)), -0.00662532), 8.7188E-4) + +if (WEEKAVG < 0.64, -4.7051E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.965, 2.24471E-4, if (BUSINESS < 0.53, if (LOCALNEWS < 0.27, 0.00103714, if (NATIONALNEWS < 0.155, 0.0140465, -0.00245531)), if (BUSINESS < 0.645, 0.0237968, 0.00736313)))) + +if (LOCALNEWS < 0.05, -5.73509E-4, if (MAX_SCORE < 253515.0, -0.00114612, if (SUPERDUPER_AVG < 0.315, if (MAX_SCORE < 255248.0, 0.0174812, if (PUB_TODAY_AVG < 0.05, -0.00327708, if (DAY_PD_HITS_RATIO < 0.425, 0.0118621, 0.0033546))), -0.00154643))) + +if (MISC < 0.105, if (DAY_WEEK_AVG_RATIO < 10.06, if (DAY_WEEK_AVG_RATIO < 9.235, if (MISC < 0.05, if (WEEKAVG < 0.215, -0.00242466, if (DAY_PREV_DAY_HITS_FRAC < 0.985, -9.73666E-5, 0.00142133)), -0.00753159), -0.00863543), 0.0086817), 0.0102311) + +if (DAY_PD_HITS_RATIO < 47.5, if (DAY_PD_HITS_RATIO < 32.5, if (DAY_PREV_DAY_HITS_FRAC < 0.405, if (PUB_TODAY_AVG < 0.155, if (DAY_WEEK_AVG_RATIO < 0.67, -0.00300426, 0.0155993), -1.60259E-4), -3.24256E-4), -0.0118989), if (MAX_SCORE < 286380.0, 0.0153486, 6.7452E-4)) + +if (SUPERDUPER_AVG < 0.105, 9.4466E-4, if (MAX_SCORE < 277301.0, if (ISABSTRACT_AVG < 0.39, if (DAY_PREV_DAY_HITS_FRAC < 0.795, -9.24515E-4, if (SPORTS < 0.275, if (ENTERTAINMENT < 0.05, -0.00695203, 0.00896934), -0.0197272)), 1.67123E-4), 5.7116E-4)) + +if (DAY_WEEK_AVG_RATIO < 3.635, if (DAY_LW_DAY_HITS_RATIO < 21.25, if (PREV_DAY_HITS < 19.5, -1.99036E-4, if (INTLNEWS < 0.25, -0.00106649, 0.0081603)), -0.0104178), if (DAY_WEEK_AVG_RATIO < 3.845, if (DAY_HITS_FRAC < 0.94, 0.00340684, 0.0209321), 0.00113853)) + +if (REGIONALNEWS < 0.275, if (LAW < 0.105, if (DAY_WEEK_AVG_RATIO < 6.01, 1.36175E-4, if (NATIONALNEWS < 0.31, if (DAY_HITS_FRAC < 0.895, 0.00575266, if (AVG_RANK < 7.53, -0.0119194, -0.00112094)), -0.0166441)), 0.00794833), -0.0103064) + +if (TOPSTORY < 0.315, -7.18271E-4, if (DAY_PD_HITS_RATIO < 0.13, -0.0105571, if (DAY_WEEK_AVG_RATIO < 5.82, if (DAY_WEEK_AVG_RATIO < 4.82, if (DAY_LW_DAY_HITS_RATIO < 11.5, 0.00414548, -0.010294), 0.0157636), if (ISABSTRACT_AVG < 0.135, -0.0110257, 0.00663564)))) + +if (MAX_SCORE < 362776.0, if (MAX_SCORE < 361504.0, 1.23708E-4, 0.0215766), if (INTLNEWS < 0.155, if (WEEKAVG < 2.36, -0.00603082, 0.00760337), if (TOPSTORY < 0.275, -7.04669E-4, if (PUB_TODAY_AVG < 0.86, 0.0139844, -0.00288551)))) + +if (REGIONALNEWS < 0.115, if (DAY_PREV_DAY_HITS_FRAC < 0.725, if (DAY_LW_DAY_HITS_RATIO < 21.25, if (MAX_SCORE < 242944.0, 4.82612E-4, -0.00179648), -0.0123423), 6.23923E-4), if (DAY_HITS < 4.5, if (DAY_PREV_DAY_HITS_FRAC < 0.87, 0.00798437, -0.0109132), 0.0140617)) + +if (MAX_SCORE < 322221.0, 4.09287E-4, if (MAX_SCORE < 334601.0, -0.00880555, if (AVG_RANK < 7.58, if (SUPERDUPER_AVG < 0.295, -0.00215568, -0.0124233), if (NATIONALNEWS < 0.21, 0.00160963, if (ISABSTRACT_AVG < 0.185, -0.00703363, 0.00777402))))) + +if (ENTERTAINMENT < 0.21, -1.9687E-4, if (DAY_PD_HITS_RATIO < 15.75, if (SUPERDUPER_AVG < 0.415, if (MAX_MIN_RANK < 3.0, -0.00268736, if (MAX_MIN_RANK < 5.0, 0.0078927, if (DAY_PD_HITS_RATIO < 2.835, 0.00387939, -0.00410318))), -0.00538175), 0.0142121)) + +if (SPORTS < 0.415, -3.44051E-5, if (MAX_MIN_RANK < 1.0, -0.00501256, if (SUPERDUPER_AVG < 0.05, if (MAX_SCORE < 229196.0, -1.5078E-4, if (MAX_SCORE < 258856.0, if (ISTITLE_AVG < 0.355, 0.0314869, 0.00734956), 0.00421683)), 8.4287E-4))) + +if (MAX_MIN_RANK < 9.0, if (MAX_SCORE < 382719.0, if (MAX_SCORE < 362503.0, 3.58027E-5, if (MAX_SCORE < 364403.0, -0.0154942, -0.00276027)), 0.00210644), if (WEEKAVG < 1.36, if (MAX_SCORE < 269970.0, -0.00118638, 0.0203373), -0.00169747)) + +if (PREV_DAY_HITS < 26.5, if (MAX_SCORE < 187757.0, -0.00124276, 4.03197E-4), if (DAY_PREV_DAY_HITS_FRAC < 0.795, if (DAY_PREV_DAY_HITS_FRAC < 0.675, -0.00288805, 0.00935152), if (WEEKAVG < 11.93, if (DAY_PD_HITS_RATIO < 0.27, -0.00585669, -0.0173104), 0.003142))) + +if (MAX_MIN_RANK < 7.0, -5.68858E-4, if (PUB_TODAY_AVG < 0.885, if (TOPSTORY < 0.27, if (INTLNEWS < 0.365, -0.0025302, if (AVG_RANK < 6.9, -0.00341549, 0.00979915)), 0.0137197), if (DAY_HITS < 10.5, 0.0179211, -2.30543E-4))) + +if (MIN_RANK < 7.0, -6.2463E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.475, if (AVG_RANK < 9.745, 0.0142049, -3.73764E-4), if (PUB_TODAY_AVG < 0.27, if (MAX_SCORE < 276860.0, -0.00522975, 0.00307189), if (PREV_DAY_HITS < 1.5, 2.5413E-4, 0.00490254)))) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking04.expression b/searchlib/src/test/files/ranking04.expression new file mode 100644 index 00000000000..087d305cd95 --- /dev/null +++ b/searchlib/src/test/files/ranking04.expression @@ -0,0 +1,103 @@ +if (AVG_SCORE < 236210.0, if (AVG_SCORE < 151678.0, 0.205803, 0.214904), if (ISABSTRACT_AVG < 0.155, if (WEEKAVG < 0.93, if (TOPSTORY < 0.365, 0.234927, 0.262252), if (MAX_SCORE < 271454.0, 0.236303, 0.251477)), 0.224603)) + +if (AVG_SCORE < 240282.0, if (AVG_SCORE < 153656.0, -0.0157043, -0.00743688), if (ISTITLE_AVG < 0.705, if (WEEKAVG < 0.93, 0.0116703, if (MAX_SCORE < 266499.0, 0.00213746, if (TWO_DAY_WF < 0.826656, 0.0214705, 0.0350738))), 0.00284844)) + +if (AVG_SCORE < 239849.0, if (AVG_SCORE < 230612.0, -0.0105243, -2.07603E-4), if (ISTITLE_AVG < 0.95, if (MAX_MIN_SCORE < 36505.8, if (WEEKAVG < 10.925, 0.0103073, 0.0445006), if (MAX_SCORE < 267687.0, 0.00115576, 0.023751)), 0.00109943)) + +if (AVG_SCORE < 242149.0, if (AVG_SCORE < 153383.0, -0.0131014, if (WEEKAVG < 1.5, -0.00720755, -3.77073E-4)), if (ISTITLE_AVG < 0.705, if (MAX_MIN_SCORE < 36505.0, if (BUSINESS < 0.13, 0.0164936, 0.00560036), 0.0218971), 0.00392608)) + +if (MAX_SCORE < 264139.0, if (MIN_SCORE < 222136.0, -0.0079708, 0.00140823), if (ISABSTRACT_AVG < 0.315, if (DAY_WEEK_AVG_DERIV < 10.5, if (BUSINESS < 0.105, 0.016512, 0.00726199), 0.0306897), if (SPORTS < 0.42, -9.76569E-4, 0.0183973))) + +if (AVG_SCORE < 231394.0, -0.00698348, if (ISTITLE_AVG < 0.645, if (MAX_SCORE < 271880.0, 0.00669893, if (AVG_SCORE < 281369.0, 0.0209096, 0.0117951)), if (MIN_SCORE < 318875.0, if (WEEKAVG < 1.5, -0.00428011, 0.00596324), 0.0116652))) + +if (MIN_SCORE < 222028.0, if (SUPERDUPER_AVG < 0.27, -0.00761706, if (INTLNEWS < 0.535, -0.00274344, 0.0225782)), if (WEEKAVG < 0.93, if (ISTITLE_AVG < 0.71, 0.00689051, -0.00500438), if (MIN_SCORE < 319119.0, 0.00977814, 0.0200288))) + +if (MIN_SCORE < 222028.0, if (AVG_SCORE < 158974.0, -0.00918892, if (NUM_WORDS < 1.5, if (WEEKAVG < 2.93, -0.00158808, 0.0119896), -0.00568155)), if (ISTITLE_AVG < 0.95, if (ISABSTRACT_AVG < 0.155, 0.0116413, 0.00150493), -0.00110515)) + +if (AVG_SCORE < 241264.0, if (MIN_SCORE < 132718.0, -0.00978209, if (WEEKAVG < 0.93, -0.00610293, -2.95273E-4)), if (ISABSTRACT_AVG < 0.185, if (DAY_LW_DAY_HITS_RATIO < 11.835, 0.00900634, 0.0221056), if (WEEKAVG < 1.07, -0.00392509, 0.00615921))) + +if (AVG_SCORE < 233949.0, -0.00503156, if (ISTITLE_AVG < 0.645, if (TOPSTORY < 0.05, if (PUB_TODAY_AVG < 0.105, -0.00286006, if (DAY_PD_HITS_RATIO < 0.65, 0.0275142, 0.00616295)), 0.0129407), if (DAY_LW_DAY_HITS_DERIV < 7.5, -0.00186065, 0.00771893))) + +if (AVG_SCORE < 241955.0, if (MAX_SCORE < 170767.0, -0.00748858, -0.00266952), if (MIN_SCORE < 321219.0, if (TOPSTORY < 0.05, -0.00143781, if (AVG_RANK < 9.735, 0.00571239, 0.0197833)), if (WEEKAVG < 0.93, 0.00690536, 0.017048))) + +if (AVG_SCORE < 245333.0, if (TOPSTORY < 0.355, -0.00376047, 0.00963479), if (WEEKAVG < 0.93, 7.35298E-4, if (ISABSTRACT_AVG < 0.705, if (TWO_DAY_WF < 0.872534, if (MAX_MIN_SCORE < 52145.2, 0.00973324, -0.00849394), 0.0177153), 3.37073E-4))) + +if (MIN_SCORE < 219800.0, -0.00352861, if (ISTITLE_AVG < 0.73, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 7.5, if (PUB_TODAY_AVG < 0.11, 0.00174614, 0.0120552), 0.0184693), 0.00396387), if (DAY_LW_DAY_HITS_DERIV < 7.5, -0.00165207, 0.00838864))) + +if (AVG_SCORE < 332830.0, if (AVG_SCORE < 221523.0, if (ISABSTRACT_AVG < 0.845, -0.00518819, if (MAX_MIN_SCORE < 41116.5, -0.0022638, 0.010561)), if (MIN_SCORE < 275020.0, if (ISABSTRACT_AVG < 0.13, 0.00532774, -0.00140785), -0.00673306)), 0.0089962) + +if (AVG_SCORE < 230217.0, -0.00305127, if (WEEKAVG < 0.93, 7.79225E-4, if (BUSINESS < 0.05, if (LOCALNEWS < 0.47, if (MAX_MIN_SCORE < 28240.8, 0.0179967, 0.00806848), -0.00831202), if (AVG_SCORE < 340223.0, 2.31883E-4, 0.0140119)))) + +if (AVG_SCORE < 245333.0, -0.00229594, if (ISABSTRACT_AVG < 0.315, if (LOCALNEWS < 0.05, if (DAY_LW_DAY_HITS_RATIO < 1.75, -0.0039683, 0.00569577), if (PREV_DAY_HITS < 8.5, 0.007769, if (DAY_HITS < 2.5, 0.0344185, 0.0117709))), -6.76423E-4)) + +if (MAX_SCORE < 249988.0, if (INTLNEWS < 0.105, -0.00430418, if (AVG_SCORE < 158414.0, -0.00416318, if (MAX_SCORE < 242790.0, 0.00543383, -0.00915253))), if (ISABSTRACT_AVG < 0.155, 0.00464962, if (SPORTS < 0.365, -0.00278462, 0.0111898))) + +if (AVG_SCORE < 249330.0, -0.00159136, if (WEEKAVG < 1.07, if (TOPSTORY < 0.07, -0.00243507, 0.00585214), if (TWO_DAY_WF < 0.9518, if (EIGHT_HOUR_WF < 0.108586, if (INTLNEWS < 0.42, 0.00435459, 0.0191599), -0.00770634), 0.013571))) + +if (AVG_SCORE < 332253.0, if (TOPSTORY < 0.355, if (BUSINESS < 0.05, 6.41958E-4, -0.00274201), 0.00886024), if (DAY_PD_HITS_DERIV < 1.5, if (AVG_SCORE < 336554.0, 0.0191918, if (SUPERDUPER_AVG < 0.415, -0.00116436, 0.0183934)), 0.0116471)) + +if (MAX_SCORE < 249072.0, if (INTLNEWS < 0.185, -0.00383726, if (ISABSTRACT_AVG < 0.61, -0.00202529, if (WEEKAVG < 0.785, -0.0038571, if (AVG_SCORE < 169471.0, 0.00474293, 0.0278332)))), if (BUSINESS < 0.05, 0.00491784, -7.28088E-4)) + +if (AVG_SCORE < 223608.0, -0.00242896, if (PREV_DAY_HITS < 7.5, if (ISABSTRACT_AVG < 0.05, if (MAX_MIN_RANK < 7.0, 0.00151785, 0.0118374), -0.00165444), if (SPORTS < 0.34, if (DAY_WEEK_AVG_DERIV < -1.93, -0.00307953, 0.00717407), 0.0154963))) + +if (MIN_SCORE < 319241.0, if (INTLNEWS < 0.73, if (TOPSTORY < 0.355, if (NUM_WORDS < 2.5, if (PREV_DAY_HITS < 3.5, -0.00228523, 0.00146239), -0.00850081), 0.00776825), 0.0160753), if (DAY_LW_DAY_HITS_DERIV < 12.5, 0.00439757, 0.0197836)) + +if (INTLNEWS < 0.705, if (TOPSTORY < 0.355, if (MIN_SCORE < 323992.0, if (LIFESTYLE < 0.13, if (LOCALNEWS < 0.315, -2.91455E-4, -0.00459663), -0.00868291), if (MIN_SCORE < 325835.0, 0.0255955, 0.00222024)), 0.00689548), 0.0147049) + +if (DAY_WEEK_AVG_DERIV < 41.5, if (AVG_SCORE < 222620.0, -0.00230434, if (ISTITLE_AVG < 0.95, if (DAY_LW_DAY_HITS_RATIO < 2.9, if (ISABSTRACT_AVG < 0.685, -9.8145E-4, 0.0175646), if (FOUR_HOUR_WF < 0.0415469, 0.00693887, 3.52143E-4)), -0.00149738)), 0.0156711) + +if (BUSINESS < 0.105, if (DAY_WEEK_AVG_RATIO < 5.705, if (AVG_SCORE < 155902.0, -0.0033031, if (WEEKAVG < 0.64, if (MAX_SCORE < 363895.0, -0.00281287, if (MAX_MIN_SCORE < 19200.5, -0.00201482, 0.0209412)), 0.00313704)), 0.0198315), -0.0020926) + +if (DAY_PD_HITS_DERIV < -8.5, if (SPORTS < 0.42, if (TOPSTORY < 0.05, -0.00256178, 0.0069554), 0.0189865), if (MAX_SCORE < 455757.0, if (LIFESTYLE < 0.13, if (DAY_WEEK_AVG_RATIO < 4.535, -0.00125806, 0.00573954), -0.00869664), 0.00982766)) + +if (AVG_SCORE < 158740.0, -0.00306382, if (WEEKAVG < 0.93, if (TOPSTORY < 0.365, -0.00140654, 0.00834836), if (BUSINESS < 0.05, if (MAX_MIN_SCORE < 52064.2, if (MAX_MIN_RANK < 7.0, 0.00487329, 0.0143334), -0.00637212), -1.62153E-4))) + +if (DAY_PD_HITS_DERIV < -4.5, if (LOCALNEWS < 0.355, if (HEALTH < 0.05, if (MAX_MIN_SCORE < 42320.2, 1.65828E-4, if (TWELVE_HOUR_WF < 0.0923295, 0.00978237, -0.00925785)), 0.0176032), -0.00980315), if (DAY_WEEK_AVG_DERIV < 65.215, -9.40015E-4, 0.0153051)) + +if (INTLNEWS < 0.53, if (DAY_PD_HITS_RATIO < 0.305, if (SPORTS < 0.115, 2.04707E-4, if (MAX_SCORE < 258205.0, 0.00170055, if (AVG_SCORE < 263393.0, 0.0247726, 0.00690842))), -0.00116708), if (DAY_LW_DAY_HITS_DERIV < 6.5, -5.66203E-5, 0.0136829)) + +if (TOPSTORY < 0.355, if (MAX_SCORE < 455757.0, if (PREV_DAY_HITS < 59.5, if (MIN_SCORE < 132399.0, -0.00370024, -2.34946E-5), 0.0131047), if (SUPERDUPER_AVG < 0.105, -0.00138025, 0.0159936)), if (AVG_RANK < 9.55, 0.00325951, 0.0248619)) + +if (TOPSTORY < 0.21, if (PREV_DAY_HITS < 40.5, if (DAY_WEEK_AVG_RATIO < 2.665, -0.00132885, if (AVG_SCORE < 321396.0, 3.75419E-4, 0.0087578)), 0.0103933), if (MAX_SCORE < 258688.0, -0.00128842, if (DAY_LW_DAY_HITS_RATIO < 10.5, 0.00789361, -0.00472212))) + +if (LIFESTYLE < 0.13, if (MAX_SCORE < 170767.0, -0.00265193, if (REGIONALNEWS < 0.225, if (INTLNEWS < 0.73, if (AVG_SCORE < 446461.0, if (DAY_LW_DAY_HITS_RATIO < 11.835, 3.02165E-4, 0.00420729), 0.0104384), 0.0112014), -0.0150576)), -0.00724807) + +if (TOPSTORY < 0.21, if (LW_DAY_HITS < 0.5, -2.72826E-4, -0.0037519), if (MAX_SCORE < 249540.0, -0.00257574, if (DAY_WEEK_AVG_DERIV < 3.285, 0.00890149, if (BUSINESS < 0.05, if (EIGHT_HOUR_WF < 0.108586, -0.00485603, 0.0137625), -0.0117843)))) + +if (BUSINESS < 0.05, if (PREV_DAY_HITS < 2.5, if (MAX_MIN_RANK < 9.0, -1.15002E-4, 0.013627), 0.00426589), if (SPORTS < 0.05, if (WEEKAVG < 1.07, -0.00209775, 0.00207151), if (MAX_SCORE < 282458.0, -0.00363773, -0.0170095))) + +if (PREV_DAY_HITS < 6.5, if (INTLNEWS < 0.73, -8.40229E-4, 0.0123079), if (TWO_DAY_WF < 0.647854, -0.00158583, if (DAY_WEEK_AVG_RATIO < 0.525, -0.00426295, if (TWELVE_HOUR_WF < 0.0863095, 0.010427, if (WEEKAVG < 2.5, -0.00797465, 0.00511912))))) + +if (DAY_WEEK_AVG_DERIV < 43.215, if (MAX_SCORE < 171575.0, -0.00279218, if (INTLNEWS < 0.73, if (LIFESTYLE < 0.13, if (AVG_RANK < 5.29, if (AVG_RANK < 4.145, -0.00292507, 0.0109271), 2.53288E-4), -0.00584756), 0.0132182)), 0.00991648) + +if (DAY_WEEK_AVG_RATIO < 0.305, -0.00885189, if (NATIONALNEWS < 0.105, -5.31735E-4, if (TWELVE_HOUR_WF < 0.685185, if (SPORTS < 0.465, if (ISTITLE_AVG < 0.105, if (DAY_PD_HITS_RATIO < 0.365, -0.00269593, 0.0112221), 8.19631E-4), 0.0143634), -0.00725209))) + +if (AVG_SCORE < 340384.0, if (AVG_SCORE < 336375.0, -5.08552E-4, -0.0113797), if (MIN_SCORE < 326287.0, 0.0209452, if (ONE_DAY_WF < 0.567629, -0.00150548, if (ENTERTAINMENT < 0.05, if (AVG_SCORE < 375038.0, 0.0210937, 0.00330692), -0.00420613)))) + +if (AVG_SCORE < 245150.0, -7.56654E-4, if (FOUR_HOUR_WF < 0.436508, if (TWO_DAY_WF < 0.876894, if (DAY_HITS < 4.5, 0.00184962, -0.00446764), if (AVG_SCORE < 247846.0, 0.0195391, if (MAX_SCORE < 264008.0, -0.0119329, 0.0045953))), -0.0101769)) + +if (TOPSTORY < 0.39, if (SPORTS < 0.73, -1.67518E-4, if (WEEKAVG < 0.785, 9.19437E-5, 0.00941928)), if (AVG_RANK < 9.55, if (AVG_RANK < 8.9, if (MAX_SCORE < 270912.0, 0.0142439, -7.62818E-4), -0.00563315), 0.019371)) + +if (MAX_MIN_SCORE < 16050.8, -0.00187676, if (LW_DAY_HITS < 3.5, if (MAX_SCORE < 178349.0, -0.00168833, if (WEEKAVG < 0.93, -9.59413E-5, if (FOUR_HOUR_WF < 0.0415469, 0.00437212, if (MAX_MIN_SCORE < 26173.2, 0.013711, -0.00373247)))), -0.00746144)) + +if (SPORTS < 0.73, if (INTLNEWS < 0.53, -6.62401E-4, if (TWO_DAY_WF < 0.564784, 0.0155579, if (DAY_WEEK_AVG_RATIO < 4.08, -0.00298146, 0.015513))), if (DAY_PD_HITS_RATIO < 0.31, 0.0153445, if (SUPERDUPER_AVG < 0.155, 0.00486013, -0.00508073))) + +if (MAX_MIN_RANK < 7.0, -2.14923E-4, if (ISTITLE_AVG < 0.55, if (MAX_MIN_SCORE < 41838.0, if (MAX_SCORE < 235701.0, 0.00139705, 0.0257242), 0.00119704), if (NATIONALNEWS < 0.115, -0.00295678, if (MIN_SCORE < 211652.0, 4.9411E-4, 0.0246476)))) + +if (SPORTS < 0.47, if (SPORTS < 0.05, 2.69559E-4, -0.00314174), if (MAX_SCORE < 187840.0, -0.00191667, if (ISABSTRACT_AVG < 0.415, if (MAX_MIN_RANK < 5.0, -0.00316349, 0.00674809), if (PREV_DAY_HITS < 2.5, 0.00653246, 0.0230973)))) + +if (NATIONALNEWS < 0.105, -8.06543E-4, if (DAY_PD_HITS_RATIO < 6.75, if (DAY_WEEK_AVG_RATIO < 3.505, if (MIN_RANK < 1.0, 0.0183563, if (NATIONALNEWS < 0.13, 0.0095701, 0.00111755)), 0.0178329), if (MAX_MIN_SCORE < 42048.8, 0.00161585, -0.0115306))) + +if (DAY_LW_DAY_HITS_RATIO < 2.225, if (ONE_DAY_WF < 0.370833, if (ENTERTAINMENT < 0.415, -0.00110057, 0.00599021), -0.0100266), if (LOCALNEWS < 0.645, if (FOUR_HOUR_WF < 0.0402559, 0.00156752, -0.00187101), if (TWELVE_HOUR_WF < 0.183333, -7.56081E-4, 0.0221542))) + +if (DAY_WEEK_AVG_DERIV < -0.5, if (TOPSTORY < 0.185, if (SPORTS < 0.775, if (INTLNEWS < 0.435, -8.58072E-4, if (TWO_DAY_WF < 0.607692, 0.0133903, -7.14716E-4)), 0.00949831), 0.00678994), if (LIFESTYLE < 0.13, -2.0835E-4, -0.00685168)) + +if (MAX_MIN_RANK < 3.0, if (TOPSTORY < 0.365, if (INTLNEWS < 0.585, if (DAY_WEEK_AVG_DERIV < 3.36, 0.00152356, if (MAX_MIN_SCORE < 41565.0, -0.00224511, -0.0154069)), 0.00771362), 0.0141704), if (POLITICS < 0.27, -7.41127E-4, 0.0123356)) + +if (REGIONALNEWS < 0.21, if (INTLNEWS < 0.415, if (NATIONALNEWS < 0.105, -7.72108E-4, if (TWO_DAY_WF < 0.587963, -0.00126111, 0.00367856)), if (MAX_MIN_SCORE < 41771.0, 6.62317E-4, if (MAX_MIN_SCORE < 45378.8, 0.0229089, 0.00361364))), -0.0100665) + +if (MAX_MIN_SCORE < 46045.5, if (ISTITLE_AVG < 0.415, if (ONE_DAY_WF < 0.0658009, -0.00148948, if (MIN_SCORE < 226178.0, -2.91172E-4, 0.00500722)), if (MAX_MIN_SCORE < 45527.5, -9.53746E-4, 0.00916764)), if (EIGHT_HOUR_WF < 0.0267094, -5.42017E-4, -0.00644438)) + +if (DAY_PD_HITS_DERIV < -4.5, if (DAY_PD_HITS_RATIO < 0.16, -0.00116809, if (MAX_SCORE < 178149.0, -0.00327374, if (MIN_SCORE < 172046.0, 0.017586, if (MIN_SCORE < 221890.0, -0.00260826, if (MIN_SCORE < 227242.0, 0.0209327, 0.00475773))))), -4.92544E-4) + +if (DAY_LW_DAY_HITS_RATIO < 0.83, if (AVG_SCORE < 237778.0, -0.00220195, -0.0171686), if (SPORTS < 0.79, if (SPORTS < 0.05, 2.36122E-4, if (MAX_MIN_SCORE < 46031.8, if (MAX_MIN_SCORE < 7978.0, -0.011323, -6.09338E-4), -0.0077154)), 0.00365925)) + +if (DAY_WEEK_AVG_DERIV < -3.93, 0.00918467, if (NATIONALNEWS < 0.105, -5.8362E-4, if (EIGHT_HOUR_WF < 0.480769, if (ISTITLE_AVG < 0.155, if (DAY_PD_HITS_RATIO < 0.39, -0.00366457, if (DAY_HITS < 5.5, 0.0162937, -0.00117921)), 0.001006), -0.00659191))) + +if (SPORTS < 0.705, -5.56203E-4, if (MAX_SCORE < 165481.0, -0.00498224, if (SUPERDUPER_AVG < 0.315, if (DAY_WEEK_AVG_DERIV < 0.36, if (AVG_RANK < 5.73, 0.00571545, 0.0237979), if (DAY_PD_HITS_DERIV < 3.5, -5.91932E-4, 0.011012)), -0.00679759))) + +if (REGIONALNEWS < 0.21, if (MISC < 0.105, if (TWO_DAY_WF < 0.492284, if (FOUR_HOUR_WF < 0.00462963, if (MAX_MIN_SCORE < 37344.8, -0.00252831, 0.00347229), if (MIN_SCORE < 216377.0, -0.00279429, -0.0169557)), 4.0157E-4), 0.00880965), -0.010143) + +if (DAY_LW_DAY_HITS_RATIO < 0.645, -0.00704886, if (AVG_SCORE < 291527.0, if (MAX_SCORE < 287802.0, 1.91564E-4, if (ISTITLE_AVG < 0.95, 0.0133402, 5.16175E-4)), if (AVG_SCORE < 317516.0, -0.0121501, if (SPORTS < 0.315, -0.00154239, 0.00704715)))) + +if (INTLNEWS < 0.705, if (DAY_PD_HITS_RATIO < 5.045, if (WEEKAVG < 0.785, -9.4492E-4, if (ONE_DAY_WF < 0.644009, 9.62924E-4, 0.00910092)), if (LOCALNEWS < 0.295, if (ISTITLE_AVG < 0.185, -0.0103214, -0.0014478), 0.0144627)), 0.00968091) + +if (ONE_DAY_WF < 0.328096, if (ONE_DAY_WF < 0.246773, if (TOPSTORY < 0.47, 1.96656E-4, 0.0166177), if (MIN_SCORE < 224862.0, 0.00164784, if (AVG_SCORE < 264251.0, 0.0251889, if (AVG_SCORE < 339362.0, -0.00430853, 0.0156826)))), -8.83446E-4) + +if (MISC < 0.105, if (MIN_SCORE < 445730.0, if (MIN_SCORE < 371741.0, if (REGIONALNEWS < 0.21, 5.14634E-4, -0.0078218), if (INTLNEWS < 0.145, if (ISTITLE_AVG < 0.105, 0.00884708, -0.00706471), -0.0155934)), 0.00871224), 0.0109724) + +if (AVG_RANK < 5.07, if (MIN_SCORE < 237760.0, -8.82014E-5, 0.0155208), if (ENTERTAINMENT < 0.05, if (MAX_MIN_SCORE < 14449.2, if (TOPSTORY < 0.225, -0.00152853, -0.0123906), if (TOPSTORY < 0.365, 3.30374E-4, 0.00622094)), -0.0026428)) + +if (LW_DAY_HITS < 0.5, if (MISC < 0.105, 2.26343E-4, 0.00938518), if (MAX_SCORE < 254898.0, if (MAX_SCORE < 249948.0, -0.0010896, 0.0106866), if (ONE_DAY_WF < 0.537727, if (MAX_MIN_RANK < 5.0, -6.68387E-4, -0.0113993), -0.0159024))) + +if (WEEKAVG < 0.93, if (MIN_RANK < 1.0, 0.0105569, -0.00122424), if (HEALTH < 0.105, if (SPORTS < 0.47, if (NATIONALNEWS < 0.105, -0.00127666, 0.00259145), 0.00361046), if (PREV_DAY_HITS < 4.5, -0.00160398, 0.0210051))) + +if (MAX_MIN_SCORE < 62647.2, if (ISTITLE_AVG < 0.05, if (MAX_MIN_SCORE < 45894.2, if (DAY_PD_HITS_RATIO < 0.675, if (MAX_MIN_SCORE < 41917.0, if (MIN_SCORE < 227128.0, 0.00157123, -0.0091657), 0.00650689), 7.34575E-4), -0.00919073), -2.54308E-4), 0.0114536) + +if (BUSINESS < 0.05, if (DAY_WEEK_AVG_RATIO < 0.505, if (LOCALNEWS < 0.115, if (DAY_WEEK_AVG_RATIO < 0.36, if (DAY_WEEK_AVG_RATIO < 0.275, 0.0111617, -0.00883723), if (DAY_PD_HITS_RATIO < 0.185, 0.0229969, -9.84798E-4)), -0.00222586), 2.55018E-4), -0.00124642) + +if (DAY_LW_DAY_HITS_RATIO < 0.645, -0.0067218, if (WEEKAVG < 26.715, if (AVG_RANK < 9.225, 1.58773E-4, if (TWO_DAY_WF < 0.976136, 0.00457414, if (DAY_WEEK_AVG_DERIV < 5.785, -0.00480445, if (DAY_LW_DAY_HITS_DERIV < 27.5, 0.016426, -0.00308634)))), -0.00901968)) + +if (LOCALNEWS < 0.295, if (LOCALNEWS < 0.155, -3.03017E-4, if (MIN_SCORE < 222112.0, 2.10471E-4, if (ENTERTAINMENT < 0.05, if (ISTITLE_AVG < 0.185, 0.0201204, 0.00543219), -0.00443157))), if (DAY_PD_HITS_DERIV < 6.5, -0.00312853, 0.0100428)) + +if (DAY_PD_HITS_RATIO < 32.5, if (DAY_PD_HITS_DERIV < 20.5, if (MAX_MIN_RANK < 7.0, -1.09244E-4, if (DAY_LW_DAY_HITS_DERIV < 9.5, if (MIN_SCORE < 215422.0, 4.54762E-4, if (ISABSTRACT_AVG < 0.225, 0.0146831, -7.90241E-4)), -0.00837207)), -0.00476979), 0.0102445) + +if (MAX_MIN_SCORE < 45353.5, if (MAX_MIN_SCORE < 44594.8, if (MISC < 0.105, -9.71934E-5, 0.00961124), 0.00683718), if (TWELVE_HOUR_WF < 0.0451153, if (TOPSTORY < 0.05, if (ISTITLE_AVG < 0.79, -0.0112723, 0.00207341), 0.00460994), -0.00460433)) + +if (SPORTS < 0.47, if (DAY_PD_HITS_RATIO < 4.725, if (DAY_WEEK_AVG_RATIO < 2.74, -3.01001E-4, if (INTLNEWS < 0.415, if (MIN_SCORE < 337522.0, 0.0052542, 0.0240026), -0.00664368)), if (DAY_WEEK_AVG_RATIO < 4.15, -0.00601665, 0.00254873)), 0.00244646) + +if (TOPSTORY < 0.47, if (LW_DAY_HITS < 0.5, 5.1525E-4, if (ONE_DAY_WF < 0.398413, if (DAY_PD_HITS_RATIO < 0.61, -0.00298415, 0.00362271), if (AVG_SCORE < 242552.0, -0.004241, -0.0152224))), if (MAX_SCORE < 264598.0, 0.0026109, -0.013849)) + +if (PREV_DAY_HITS < 59.5, if (MAX_SCORE < 455608.0, if (DAY_PD_HITS_DERIV < 64.0, if (SUPERDUPER_AVG < 0.725, if (MIN_SCORE < 132886.0, -0.0021355, 2.53917E-4), -0.0118492), 0.00977612), if (INTLNEWS < 0.145, 0.0130923, -0.00429049)), 0.0115797) + +if (DAY_PD_HITS_RATIO < 0.115, -0.00460369, if (DAY_WEEK_AVG_RATIO < 0.455, 0.00654726, if (ISTITLE_AVG < 0.565, if (DAY_PD_HITS_DERIV < 38.0, if (DAY_WEEK_AVG_DERIV < -0.785, -0.00572704, 0.00211848), -0.0128642), if (MAX_SCORE < 261066.0, 1.64546E-4, -0.00330215)))) + +if (DAY_WEEK_AVG_DERIV < 8.36, if (AVG_SCORE < 266020.0, 5.44955E-4, if (TOPSTORY < 0.21, if (MAX_SCORE < 343351.0, -0.00739666, -0.00159552), 0.00316353)), if (EIGHT_HOUR_WF < 0.117802, if (AVG_SCORE < 264897.0, 0.00293963, 0.016815), -4.81606E-4)) + +if (AVG_SCORE < 446571.0, if (DAY_WEEK_AVG_RATIO < 4.59, if (MAX_SCORE < 390560.0, -8.80057E-6, if (PREV_DAY_HITS < 2.5, if (INTLNEWS < 0.135, -0.00440415, -0.0221297), 8.10032E-4)), if (ENTERTAINMENT < 0.15, 0.00319307, 0.0160496)), 0.00840475) + +if (MAX_SCORE < 390244.0, if (AVG_SCORE < 360833.0, if (AVG_SCORE < 352194.0, 1.75857E-4, if (DAY_WEEK_AVG_DERIV < 0.855, -0.0187021, 0.00356778)), 0.00780008), if (INTLNEWS < 0.145, if (BUSINESS < 0.135, 0.00421198, -0.0102414), -0.00922112)) + +if (ONE_DAY_WF < 0.605556, -4.61057E-4, if (MIN_SCORE < 332098.0, if (MAX_SCORE < 355711.0, if (WEEKAVG < 1.5, 2.20435E-5, 0.00562666), -0.0153817), if (INTLNEWS < 0.145, 0.018971, if (SUPERDUPER_AVG < 0.275, -0.00766663, 0.0121696)))) + +if (ENTERTAINMENT < 0.05, 2.07331E-5, if (ENTERTAINMENT < 0.415, if (AVG_SCORE < 237084.0, -0.00148349, -0.00781033), if (AVG_SCORE < 340606.0, if (ONE_DAY_WF < 0.015625, 0.0143289, 0.00105025), if (WEEKAVG < 0.785, 0.00610972, -0.020138)))) + +if (DAY_LW_DAY_HITS_DERIV < 17.5, if (DAY_LW_DAY_HITS_RATIO < 16.5, 1.88349E-4, 0.0147086), if (TWO_DAY_WF < 0.743223, if (MIN_SCORE < 212511.0, -0.00155285, -0.0125926), if (TWO_DAY_WF < 0.980566, 0.00635603, if (ISTITLE_AVG < 0.15, -0.0163438, -0.00197531)))) + +if (AVG_SCORE < 281850.0, if (MAX_SCORE < 288032.0, 3.35293E-4, if (TWELVE_HOUR_WF < 0.358289, 0.00952171, -0.00693432)), if (EIGHT_HOUR_WF < 0.584928, if (MAX_MIN_SCORE < 2471.25, -0.0141419, if (MAX_MIN_SCORE < 6867.75, 0.010146, -0.00291703)), 0.00765541)) + +if (PREV_DAY_HITS < 3.5, -7.47677E-4, if (EIGHT_HOUR_WF < 0.147108, if (AVG_RANK < 7.69, -8.12926E-4, if (ISABSTRACT_AVG < 0.95, 0.00246382, if (SUPERDUPER_AVG < 0.21, 0.00464639, 0.0245523))), if (ISTITLE_AVG < 0.05, -0.0184693, -8.90194E-4))) + +if (HEALTH < 0.27, if (DAY_PD_HITS_RATIO < 5.045, 3.07572E-5, if (ISTITLE_AVG < 0.125, if (PUB_TODAY_AVG < 0.95, -0.0154824, -0.00294081), if (TWELVE_HOUR_WF < 0.226496, -0.00622506, 0.00166554))), if (ISABSTRACT_AVG < 0.17, 0.0165858, -0.00330948)) + +if (DAY_WEEK_AVG_DERIV < 44.86, if (SUPERDUPER_AVG < 0.39, 3.33898E-4, if (TWO_DAY_WF < 0.825226, if (MAX_MIN_SCORE < 42418.5, -0.00881739, if (EIGHT_HOUR_WF < 0.0825189, if (BUSINESS < 0.05, 0.0117745, -0.00306987), -0.0111028)), 0.00218894)), 0.00834421) + +if (MIN_SCORE < 230160.0, if (MIN_SCORE < 229092.0, -8.07888E-4, -0.0133966), if (TWO_DAY_WF < 0.518064, if (WEEKAVG < 1.5, if (AVG_SCORE < 359388.0, 0.00892968, -0.00763611), -0.00138809), if (MIN_SCORE < 232622.0, 0.0102781, -6.34821E-4))) + +if (MAX_MIN_RANK < 7.0, -2.44784E-5, if (ISTITLE_AVG < 0.55, if (DAY_WEEK_AVG_DERIV < 4.57, if (DAY_WEEK_AVG_RATIO < 0.76, -8.94147E-4, if (MIN_SCORE < 215272.0, 0.00546979, 0.0273153)), -0.00734683), if (NATIONALNEWS < 0.21, -0.00138435, 0.00911761))) + +if (DAY_WEEK_AVG_RATIO < 3.83, if (ENTERTAINMENT < 0.53, -2.83843E-4, if (ISABSTRACT_AVG < 0.21, 0.00786177, -7.5151E-4)), if (AVG_RANK < 9.465, if (INTLNEWS < 0.21, 0.00394069, -0.00383803), if (AVG_SCORE < 258669.0, 0.0034867, 0.0179637))) + +if (SUPERDUPER_AVG < 0.725, if (SPORTS < 0.685, -2.3907E-4, if (AVG_SCORE < 264275.0, if (MIN_SCORE < 219502.0, -4.38684E-4, if (MAX_RANK < 9.0, 0.00337648, 0.0167784)), -0.00334013)), if (WEEKAVG < 12.785, -0.0162992, 2.52385E-5)) + +if (ISABSTRACT_AVG < 0.895, -2.10253E-4, if (AVG_SCORE < 247839.0, if (PREV_DAY_HITS < 1.5, -3.98583E-4, if (TWO_DAY_WF < 0.551797, -6.35903E-4, if (MIN_SCORE < 141715.0, 0.00351871, if (INTLNEWS < 0.105, 0.00858437, 0.0254582)))), -0.00273198)) + +if (ONE_DAY_WF < 0.605556, if (EIGHT_HOUR_WF < 0.0411953, 4.98775E-4, -0.00236496), if (AVG_SCORE < 342691.0, if (MAX_MIN_SCORE < 6080.75, if (MIN_SCORE < 236879.0, 0.00101981, -0.0190995), 0.00142291), if (INTLNEWS < 0.275, 0.0147214, -4.75944E-4))) + +if (DAY_PD_HITS_DERIV < -13.5, if (WEEKAVG < 3.785, -0.00749366, if (PREV_DAY_HITS < 22.5, 0.0146922, if (TWO_DAY_WF < 0.822683, if (PUB_TODAY_AVG < 0.45, -0.00577822, 0.00590076), 0.00846642))), if (MISC < 0.105, -1.96119E-4, 0.0069636)) + +if (DAY_WEEK_AVG_DERIV < -3.93, 0.00797481, if (DAY_WEEK_AVG_RATIO < 3.865, if (DAY_WEEK_AVG_RATIO < 3.61, if (DAY_WEEK_AVG_RATIO < 3.245, -4.72322E-4, 0.00505862), -0.00837491), if (TWELVE_HOUR_WF < 0.202675, -0.00493061, if (TWELVE_HOUR_WF < 0.36039, 0.0124758, 0.00250066)))) + +if (MAX_MIN_SCORE < 60845.5, if (MAX_MIN_SCORE < 52128.0, if (MAX_MIN_SCORE < 51264.2, -1.32387E-4, 0.0106899), if (MIN_SCORE < 218318.0, -0.00277432, -0.0140369)), if (ISABSTRACT_AVG < 0.05, 0.0138962, if (MAX_MIN_SCORE < 61542.5, 0.00445669, -0.00606652))) + +if (BUSINESS < 0.315, -6.66016E-4, if (AVG_RANK < 8.535, if (MAX_MIN_SCORE < 7884.5, 0.0134214, if (ISTITLE_AVG < 0.465, if (MIN_SCORE < 217640.0, -9.67523E-4, 0.0135374), 1.47833E-4)), if (DAY_PD_HITS_RATIO < 0.235, 0.00921797, -0.00182698))) + +if (AVG_RANK < 5.29, if (AVG_SCORE < 318378.0, 0.00224509, 0.0161861), if (MAX_MIN_SCORE < 51537.0, if (MAX_MIN_SCORE < 50910.2, if (DAY_LW_DAY_HITS_DERIV < 71.0, -2.73537E-4, -0.00811121), 0.0109085), if (SPORTS < 0.415, -0.00253066, -0.0129268))) + +if (LOCALNEWS < 0.61, if (WEEKAVG < 0.5, -0.00262523, if (NATIONALNEWS < 0.105, -6.35021E-4, if (ISTITLE_AVG < 0.155, 0.005402, 6.7829E-5))), if (TWELVE_HOUR_WF < 0.133929, -0.00168908, if (PUB_TODAY_AVG < 0.535, 0.00693807, 0.0227961))) + +if (DAY_WEEK_AVG_DERIV < -0.785, -0.0021874, if (ENTERTAINMENT < 0.05, if (MAX_SCORE < 363930.0, 4.68954E-4, if (MAX_SCORE < 384272.0, if (DAY_PD_HITS_RATIO < 0.495, 0.00125669, 0.0177669), if (INTLNEWS < 0.21, 0.0060623, -0.00814847))), -0.00164111)) + +if (MIN_SCORE < 334353.0, if (NUM_WORDS < 2.5, 5.19901E-5, if (AVG_SCORE < 316903.0, if (ISTITLE_AVG < 0.05, -0.0220661, -0.00683671), -0.00100144)), if (INTLNEWS < 0.13, 0.00532815, if (MIN_SCORE < 357183.0, 0.0067984, -0.00805372))) + +if (INTLNEWS < 0.53, -4.58708E-5, if (TWELVE_HOUR_WF < 0.21385, if (LOCALNEWS < 0.05, if (TWO_DAY_WF < 0.585356, 0.00547473, if (MAX_SCORE < 244158.0, 0.00333793, -0.00880659)), 0.00966211), if (FOUR_HOUR_WF < 0.0240968, 0.019307, 0.00126046))) + +if (BUSINESS < 0.05, if (DAY_PD_HITS_DERIV < 18.5, if (DAY_WEEK_AVG_DERIV < 12.93, if (INTLNEWS < 0.73, if (INTLNEWS < 0.315, 0.00145589, -0.00205678), 0.0114136), 0.0135475), if (MIN_RANK < 7.0, -0.0106123, 0.00101067)), -9.65519E-4) + +if (PREV_DAY_HITS < 6.5, -5.0304E-4, if (TWO_DAY_WF < 0.825345, if (ONE_DAY_WF < 0.275028, -0.00134104, 0.00516485), if (TWO_DAY_WF < 0.861643, 0.0172774, if (LOCALNEWS < 0.05, -0.00143161, if (INTLNEWS < 0.21, 0.0022491, 0.0202453))))) + +if (TWO_DAY_WF < 0.477226, if (BUSINESS < 0.685, if (MAX_MIN_SCORE < 41352.5, if (SUPERDUPER_AVG < 0.315, -0.00389642, -0.0130707), if (AVG_SCORE < 253118.0, -0.00411848, 0.0126594)), if (DAY_LW_DAY_HITS_RATIO < 1.71, 0.0118462, -0.00195941)), 6.56261E-4) + +if (POLITICS < 0.235, if (MAX_MIN_SCORE < 8349.75, if (DAY_LW_DAY_HITS_RATIO < 5.335, 0.00316005, -0.00643477), if (MAX_MIN_SCORE < 16062.8, -0.00319606, if (MAX_MIN_SCORE < 16303.8, 0.0107361, -3.21466E-5))), if (ISABSTRACT_AVG < 0.05, 0.0148344, -0.00402193)) + +if (ENTERTAINMENT < 0.585, if (MAX_MIN_SCORE < 36987.5, -0.00141265, 3.38741E-4), if (PUB_TODAY_AVG < 0.235, if (MAX_MIN_SCORE < 20990.0, 0.00745281, -0.0127174), if (AVG_SCORE < 239671.0, 3.61332E-4, if (ISTITLE_AVG < 0.735, 0.0208577, 0.00530017)))) + +if (DAY_PD_HITS_DERIV < -3.5, if (HEALTH < 0.105, if (DAY_PD_HITS_RATIO < 0.165, -0.00212795, if (LW_DAY_HITS < 3.5, if (AVG_SCORE < 258650.0, if (ISABSTRACT_AVG < 0.225, -0.00378895, 0.00511293), 0.0070848), -0.0101628)), 0.0144615), -5.10098E-4) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking05.expression b/searchlib/src/test/files/ranking05.expression new file mode 100644 index 00000000000..028979c9d12 --- /dev/null +++ b/searchlib/src/test/files/ranking05.expression @@ -0,0 +1,77 @@ +if (AVG_SCORE < 240274.0, if (AVG_SCORE < 152115.0, 0.222147, 0.231999), if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.785, 0.254209, if (TWO_DAY_WF < 0.849242, 0.260625, 0.274218)), if (MIN_SCORE < 328158.0, 0.240699, 0.25683))) + +if (MIN_SCORE < 224388.0, if (AVG_SCORE < 229835.0, if (MAX_SCORE < 171144.0, -0.0167726, -0.0105451), -0.00242754), if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.36, 0.0097257, 0.0198661), if (MIN_SCORE < 330678.0, -3.0123E-4, 0.0145117))) + +if (AVG_SCORE < 240820.0, if (AVG_SCORE < 159292.0, -0.0146681, -0.00755839), if (ISABSTRACT_AVG < 0.21, if (WEEKAVG < 1.64, 0.0094277, if (AVG_RANK < 7.1, 0.00789525, 0.020449)), if (TOPSTORY < 0.05, -0.00410248, 0.00584918))) + +if (MIN_SCORE < 226846.0, if (AVG_SCORE < 221526.0, -0.0110986, if (TOPSTORY < 0.365, -0.00479654, 0.020019)), if (WEEKAVG < 1.07, if (ISTITLE_AVG < 0.93, 0.0050835, -0.00497491), if (ISABSTRACT_AVG < 0.05, 0.0175386, 0.00636247))) + +if (MIN_SCORE < 226373.0, if (AVG_SCORE < 151768.0, -0.0132135, if (WEEKAVG < 2.5, -0.00784849, if (ISABSTRACT_AVG < 0.95, -0.00100834, 0.0234278))), if (ISABSTRACT_AVG < 0.235, if (MAX_MIN_SCORE < 20325.0, 0.00102421, 0.0145312), 8.03179E-4)) + +if (AVG_SCORE < 234937.0, -0.00791142, if (ISABSTRACT_AVG < 0.13, if (WEEKAVG < 1.5, 0.00645581, if (TWO_DAY_WF < 0.925548, 0.0121753, if (LOCALNEWS < 0.05, 0.013493, 0.0339803))), if (AVG_SCORE < 492653.0, -0.00104991, 0.0192918))) + +if (MIN_SCORE < 231118.0, if (AVG_SCORE < 223663.0, -0.00824326, if (ISABSTRACT_AVG < 0.05, 0.00253809, -0.00637355)), if (ISTITLE_AVG < 0.885, if (DAY_LW_DAY_HITS_RATIO < 4.045, if (LW_DAY_HITS < 0.5, 0.00769331, -0.00570545), 0.0136625), -5.66847E-4)) + +if (MIN_SCORE < 222204.0, -0.00655529, if (ISTITLE_AVG < 0.95, if (DAY_LW_DAY_HITS_RATIO < 15.5, if (TOPSTORY < 0.185, if (INTLNEWS < 0.39, 0.00204263, if (FOUR_HOUR_WF < 0.004, 0.0157981, -0.00641486)), 0.0124789), 0.0166668), -0.00299979)) + +if (MIN_SCORE < 224388.0, -0.00568771, if (WEEKAVG < 1.07, if (ISTITLE_AVG < 0.845, 0.00360536, -0.00514951), if (BUSINESS < 0.05, if (MAX_MIN_RANK < 3.0, 0.0231505, 0.0105904), if (MIN_SCORE < 400082.0, 0.00196019, 0.0152236)))) + +if (MAX_SCORE < 264920.0, if (AVG_SCORE < 159289.0, -0.00812678, -0.003174), if (DAY_WEEK_AVG_DERIV < 30.715, if (ISABSTRACT_AVG < 0.115, if (MAX_MIN_SCORE < 163787.0, 0.0097262, if (AVG_SCORE < 400330.0, -0.00390127, 0.00706031)), 5.49425E-5), 0.0204424)) + +if (AVG_SCORE < 241590.0, -0.00459592, if (TOPSTORY < 0.05, if (DAY_WEEK_AVG_RATIO < 4.205, -1.26418E-4, 0.0204507), if (TWO_DAY_WF < 0.86039, if (ISABSTRACT_AVG < 0.585, if (TOPSTORY < 0.365, 0.00566686, 0.0196157), -0.00511988), 0.0124928))) + +if (MIN_SCORE < 222204.0, if (PREV_DAY_HITS < 4.5, -0.00589519, -6.31753E-4), if (ISABSTRACT_AVG < 0.235, if (MAX_MIN_SCORE < 171496.0, if (MAX_SCORE < 558130.0, if (AVG_RANK < 7.125, 8.6138E-4, 0.00952768), 0.0206013), 0.00133279), -0.00112547)) + +if (MIN_SCORE < 222204.0, if (DAY_PD_HITS_DERIV < -12.5, 0.00847214, if (DAY_WEEK_AVG_DERIV < 36.785, -0.00450293, 0.0134303)), if (ISABSTRACT_AVG < 0.05, if (DAY_WEEK_AVG_RATIO < 4.83, if (NATIONALNEWS < 0.355, 0.00488766, 0.0169425), 0.0204287), -1.32037E-4)) + +if (MAX_SCORE < 250058.0, if (INTLNEWS < 0.105, -0.0059595, 2.21029E-4), if (TOPSTORY < 0.355, if (MIN_SCORE < 385241.0, if (BUSINESS < 0.05, 0.00367059, -0.00223683), if (INTLNEWS < 0.365, 0.00575046, 0.0237395)), 0.0174135)) + +if (MAX_SCORE < 265638.0, -0.00287962, if (ISABSTRACT_AVG < 0.235, if (DAY_PD_HITS_RATIO < 18.75, if (MAX_MIN_SCORE < 67687.2, if (PREV_DAY_HITS < 6.5, 0.00469885, 0.0145573), if (AVG_SCORE < 399037.0, -0.00312833, 0.00680591)), 0.0238803), -0.00158812)) + +if (MIN_SCORE < 222204.0, if (TOPSTORY < 0.355, -0.00377546, 0.0099145), if (ISTITLE_AVG < 0.885, if (MAX_MIN_SCORE < 57965.2, if (MIN_SCORE < 223217.0, 0.0291906, 0.00802385), if (AVG_SCORE < 402324.0, -0.00259188, 0.00560142)), -0.0015883)) + +if (MAX_SCORE < 252015.0, -0.00312417, if (DAY_WEEK_AVG_RATIO < 5.91, if (TOPSTORY < 0.185, if (MAX_MIN_SCORE < 123158.0, if (ISABSTRACT_AVG < 0.13, 0.0054303, if (PREV_DAY_HITS < 7.5, -0.00350664, 0.0115054)), -0.00200056), 0.00612929), 0.0248479)) + +if (MIN_SCORE < 232158.0, if (DAY_PD_HITS_DERIV < -13.5, 0.012118, if (SPORTS < 0.685, -0.00337721, if (MAX_SCORE < 165958.0, -0.00648055, 0.00734207))), if (ISABSTRACT_AVG < 0.635, if (EIGHT_HOUR_WF < 0.493902, 0.00519362, -0.00533505), -0.00220591)) + +if (AVG_SCORE < 387415.0, if (PREV_DAY_HITS < 2.5, -0.00321038, if (BUSINESS < 0.05, 0.00353532, -0.0020425)), if (TWO_DAY_WF < 0.979149, 0.00271552, if (TWELVE_HOUR_WF < 0.104418, 0.00180155, if (MIN_SCORE < 350308.0, 0.0370742, 0.0145313)))) + +if (MAX_SCORE < 248824.0, if (INTLNEWS < 0.185, -0.00381799, 0.00109643), if (TOPSTORY < 0.185, if (TWO_DAY_WF < 0.779514, -0.0015664, if (WEEKAVG < 4.07, 0.00171319, 0.0126131)), if (MAX_MIN_RANK < 7.0, 0.00411675, 0.0149353))) + +if (MIN_SCORE < 233311.0, -0.00183471, if (LW_DAY_HITS < 0.5, if (SUPERDUPER_AVG < 0.21, if (MIN_RANK < 1.0, 0.0173917, 6.43665E-4), if (LOCALNEWS < 0.185, if (DAY_PD_HITS_RATIO < 8.795, 0.00308276, 0.0169982), 0.0159792)), -0.00499866)) + +if (TOPSTORY < 0.39, if (MAX_SCORE < 176763.0, -0.00448387, if (INTLNEWS < 0.415, if (BUSINESS < 0.05, if (MAX_MIN_SCORE < 20408.8, -0.00328596, if (TWO_DAY_WF < 0.512854, -0.00211998, 0.00522867)), -0.00226038), 0.00574748)), 0.00900215) + +if (TWO_DAY_WF < 0.75074, if (BUSINESS < 0.05, if (FOUR_HOUR_WF < 0.0149554, if (WEEKAVG < 0.785, -0.00184131, if (AVG_SCORE < 167616.0, -0.00305123, 0.00685803)), -0.00470139), -0.003457), if (MAX_SCORE < 504246.0, 8.78955E-4, 0.00850264)) + +if (BUSINESS < 0.105, if (AVG_SCORE < 160899.0, -0.00270644, if (AVG_SCORE < 194764.0, if (ISABSTRACT_AVG < 0.315, -0.00800918, 0.012943), if (NATIONALNEWS < 0.355, 9.01868E-4, 0.0112161))), if (INTLNEWS < 0.39, -0.00269415, 0.00725021)) + +if (BUSINESS < 0.105, if (MAX_SCORE < 188088.0, -0.00298371, if (AVG_SCORE < 190784.0, if (ISABSTRACT_AVG < 0.55, -0.00171064, 0.0237327), if (AVG_RANK < 9.755, 0.00131049, if (WEEKAVG < 0.93, -0.00199335, 0.020099)))), -0.00222399) + +if (NATIONALNEWS < 0.115, if (PREV_DAY_HITS < 27.5, if (INTLNEWS < 0.725, if (AVG_SCORE < 629440.0, -0.00184197, 0.0166573), 0.0148512), if (TWO_DAY_WF < 0.773805, if (TWELVE_HOUR_WF < 0.114144, 0.00583361, -0.012718), 0.0149618)), 0.00280466) + +if (TOPSTORY < 0.355, if (DAY_PD_HITS_DERIV < -4.5, 0.00287102, if (SPORTS < 0.73, -0.00185575, 0.00272133)), if (AVG_RANK < 9.55, if (DAY_PD_HITS_RATIO < 0.405, -0.00518413, if (MAX_MIN_SCORE < 115612.0, 0.00438781, 0.0211867)), 0.0209324)) + +if (AVG_SCORE < 147623.0, -0.00405691, if (DAY_PD_HITS_DERIV < -23.5, 0.00920672, if (INTLNEWS < 0.725, if (TOPSTORY < 0.39, 7.21159E-5, if (INTLNEWS < 0.05, if (SUPERDUPER_AVG < 0.155, 0.00462984, 0.0231233), 5.62082E-4)), 0.0141075))) + +if (AVG_SCORE < 159075.0, -0.00305707, if (TOPSTORY < 0.05, if (SPORTS < 0.73, -9.35589E-4, if (AVG_RANK < 5.635, -0.00405106, 0.0119584)), if (LW_DAY_HITS < 0.5, 0.0045483, if (PREV_DAY_HITS < 30.0, -0.00540909, 0.00895866)))) + +if (MAX_SCORE < 507014.0, if (AVG_RANK < 6.775, -0.00328147, if (MAX_MIN_SCORE < 150474.0, 3.93348E-4, -0.00536951)), if (DAY_PD_HITS_RATIO < 7.885, if (ENTERTAINMENT < 0.05, if (MAX_SCORE < 516938.0, 0.0171772, 0.00382646), -0.00447429), 0.0153178)) + +if (DAY_WEEK_AVG_RATIO < 5.905, if (NATIONALNEWS < 0.105, -8.32529E-4, if (MAX_MIN_RANK < 7.0, if (NATIONALNEWS < 0.13, 0.0108634, 3.13874E-4), if (AVG_SCORE < 231880.0, -0.00104106, if (PREV_DAY_HITS < 5.5, 0.0263191, 0.00601508)))), 0.0149012) + +if (PREV_DAY_HITS < 59.5, if (NATIONALNEWS < 0.05, if (DAY_PD_HITS_DERIV < -6.5, if (DAY_WEEK_AVG_RATIO < 1.285, if (MAX_MIN_SCORE < 160894.0, 0.00449479, -0.00886993), 0.0153285), -0.00178263), if (POLITICS < 0.05, 0.00198329, -0.00586162)), 0.00935161) + +if (MIN_SCORE < 132626.0, -0.00387076, if (WEEKAVG < 0.93, -0.00135437, if (MAX_MIN_SCORE < 46712.0, 0.00347721, if (AVG_SCORE < 404994.0, if (MIN_SCORE < 241776.0, if (SPORTS < 0.79, -0.00180685, 0.0168028), -0.00853053), 0.00228774)))) + +if (LW_DAY_HITS < 0.5, if (AVG_SCORE < 159292.0, -0.00244777, if (LIFESTYLE < 0.155, if (DAY_PD_HITS_DERIV < -3.5, 0.00412328, if (TOPSTORY < 0.39, 4.16163E-4, 0.0104883)), -0.00615481)), if (FOUR_HOUR_WF < 0.158004, -0.00212154, -0.0150848)) + +if (DAY_PD_HITS_RATIO < 43.0, if (LW_DAY_HITS < 0.5, if (DAY_PD_HITS_DERIV < -5.5, if (AVG_RANK < 9.265, if (TOPSTORY < 0.05, -0.00313951, 0.00432897), 0.0117073), -6.92E-4), if (MAX_MIN_SCORE < 120702.0, -0.00138028, -0.00945152)), 0.0160989) + +if (LW_DAY_HITS < 0.5, if (MAX_SCORE < 507008.0, if (MAX_SCORE < 339502.0, if (ENTERTAINMENT < 0.415, 3.0327E-4, 0.00803638), -0.0034615), 0.00344157), if (FOUR_HOUR_WF < 0.101282, if (DAY_WEEK_AVG_DERIV < 17.5, -0.00192815, 0.00914257), -0.0127954)) + +if (DAY_WEEK_AVG_RATIO < 0.255, 0.0131801, if (MAX_MIN_SCORE < 312687.0, if (MAX_MIN_SCORE < 296243.0, if (NATIONALNEWS < 0.105, -6.09993E-4, if (MAX_MIN_RANK < 7.0, 8.98274E-4, if (MAX_SCORE < 234190.0, 9.67677E-4, 0.0163215))), 0.0161496), -0.0112906)) + +if (TOPSTORY < 0.39, if (MIN_SCORE < 220684.0, -0.00143961, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 277398.0, if (DAY_LW_DAY_HITS_RATIO < 1.75, -9.51177E-4, 0.00924989), if (BUSINESS < 0.39, -0.00128495, 0.0103605)), -0.00106493)), 0.00610485) + +if (AVG_SCORE < 500951.0, if (DAY_HITS < 42.5, -6.76917E-4, if (INTLNEWS < 0.45, if (ISTITLE_AVG < 0.05, -0.0122069, 0.00752268), 0.0144731)), if (MIN_SCORE < 362007.0, 0.0202143, if (PUB_TODAY_AVG < 0.05, -0.0107444, 0.00512166))) + +if (WEEKAVG < 1.07, -0.00167316, if (DAY_WEEK_AVG_RATIO < 6.14, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 59.5, if (AVG_RANK < 9.225, if (DAY_HITS < 30.5, 0.00155636, -0.0130867), 0.0105919), 0.0218533), -8.02313E-4), 0.0145597)) + +if (LIFESTYLE < 0.05, if (MAX_MIN_RANK < 7.0, 2.20029E-4, if (ISABSTRACT_AVG < 0.115, if (TWO_DAY_WF < 0.580973, -1.79904E-4, if (EIGHT_HOUR_WF < 0.0125776, 0.0222343, 0.00659678)), if (DAY_WEEK_AVG_DERIV < -0.93, 0.0146586, -0.0018679))), -0.0043182) + +if (AVG_SCORE < 500853.0, if (MIN_SCORE < 435034.0, if (DAY_WEEK_AVG_RATIO < 4.15, -6.00797E-4, 0.00413062), -0.0152667), if (MAX_SCORE < 660352.0, if (TWO_DAY_WF < 0.744565, 0.0172406, if (MAX_SCORE < 596568.0, -0.0069398, 0.0163258)), -0.00228486)) + +if (SPORTS < 0.685, -3.28185E-4, if (AVG_SCORE < 446734.0, if (MAX_SCORE < 500264.0, if (MAX_SCORE < 450904.0, if (MIN_SCORE < 254311.0, if (WEEKAVG < 0.785, -0.00158584, 0.0075942), -0.0103296), 0.0212781), -0.0121229), 0.0182724)) + +if (TOPSTORY < 0.39, if (PREV_DAY_HITS < 59.5, -5.72966E-5, if (NATIONALNEWS < 0.05, 0.0144398, -0.00316385)), if (FOUR_HOUR_WF < 0.0201025, if (TWELVE_HOUR_WF < 0.163978, 0.00366064, 0.0227011), if (ONE_DAY_WF < 0.658333, -0.0114776, 0.00740238))) + +if (TOPSTORY < 0.585, if (ENTERTAINMENT < 0.05, if (DAY_WEEK_AVG_DERIV < 43.145, 4.86446E-4, if (DAY_HITS < 78.5, 0.0210513, if (SUPERDUPER_AVG < 0.65, -0.00387695, 0.013128))), if (AVG_RANK < 5.465, 0.00674178, -0.00228932)), -0.0121137) + +if (TWO_DAY_WF < 0.488162, -0.00237763, if (WEEKAVG < 1.215, -7.73205E-4, if (EIGHT_HOUR_WF < 0.0444065, if (DAY_HITS < 19.5, 0.00278939, 0.0115461), if (NATIONALNEWS < 0.155, -0.00189416, if (SPORTS < 0.105, 0.0072781, -0.00903706))))) + +if (SPORTS < 0.47, if (SPORTS < 0.105, -4.47312E-5, -0.00348966), if (MAX_RANK < 9.0, -0.0016478, if (EIGHT_HOUR_WF < 0.0459777, if (TWO_DAY_WF < 0.539394, if (WEEKAVG < 1.07, -0.00892999, 0.00865732), 0.0121605), 0.00131641))) + +if (INTLNEWS < 0.725, if (DAY_LW_DAY_HITS_RATIO < 124.5, if (AVG_SCORE < 628258.0, if (DAY_HITS < 55.5, -1.93067E-4, if (TWELVE_HOUR_WF < 0.117879, 0.0187097, if (TWELVE_HOUR_WF < 0.350814, -0.00734127, 0.0131678))), 0.00987754), -0.0156063), 0.00929408) + +if (DAY_WEEK_AVG_DERIV < -3.36, -0.00956624, if (TOPSTORY < 0.39, -2.16336E-4, if (AVG_RANK < 9.55, if (AVG_RANK < 8.735, if (TWO_DAY_WF < 0.531551, -0.00490451, if (FOUR_HOUR_WF < 0.0142857, 0.0229256, 3.12813E-4)), -0.00418916), 0.0189348))) + +if (AVG_SCORE < 625182.0, if (DAY_LW_DAY_HITS_DERIV < 55.5, if (DAY_WEEK_AVG_DERIV < 19.36, if (DAY_WEEK_AVG_DERIV < 13.5, -1.95177E-4, 0.00629794), if (ISTITLE_AVG < 0.05, -0.0149349, if (TWELVE_HOUR_WF < 0.383204, -0.00516327, 0.00921651))), 0.00647785), 0.0102664) + +if (DAY_WEEK_AVG_RATIO < 0.385, -0.00586045, if (LIFESTYLE < 0.155, if (MAX_MIN_SCORE < 16288.0, -0.0016458, if (MAX_MIN_SCORE < 45875.2, if (MAX_MIN_SCORE < 45537.5, if (AVG_SCORE < 229848.0, -2.97351E-4, 0.00475294), 0.0181171), -5.74173E-4)), -0.00499598)) + +if (SPORTS < 0.815, if (TWO_DAY_WF < 0.460499, if (INTLNEWS < 0.365, -0.00422695, if (DAY_HITS < 4.5, 0.011483, -0.00407438)), -4.78506E-6), if (DAY_HITS < 1.5, 0.00980267, if (EIGHT_HOUR_WF < 0.301948, -0.00316423, 0.0125528))) + +if (PREV_DAY_HITS < 0.5, -0.00190281, if (LAW < 0.05, if (PUB_TODAY_AVG < 0.05, -0.00103893, if (EIGHT_HOUR_WF < 0.0492709, if (ISABSTRACT_AVG < 0.05, 0.0053372, 9.62476E-4), if (NATIONALNEWS < 0.13, -0.00161984, 0.005538))), -0.00741284)) + +if (DAY_LW_DAY_HITS_RATIO < 0.69, if (AVG_SCORE < 229191.0, -0.00217119, -0.0135186), if (DAY_LW_DAY_HITS_RATIO < 125.5, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 218595.0, -0.00358076, if (BUSINESS < 0.685, 0.00139556, 0.0140572)), -5.71652E-4), -0.0128262)) + +if (WEEKAVG < 1.5, if (DAY_WEEK_AVG_DERIV < 3.5, if (SUPERDUPER_AVG < 0.355, if (LW_DAY_HITS < 0.5, 5.21639E-4, -0.00451687), -0.00637359), -0.00562351), if (TWO_DAY_WF < 0.829824, 2.82632E-5, if (TWELVE_HOUR_WF < 0.940588, 0.00527366, -0.011917))) + +if (TWELVE_HOUR_WF < 0.742581, if (TOPSTORY < 0.355, -1.2321E-4, if (FOUR_HOUR_WF < 0.026084, if (MIN_SCORE < 356232.0, 0.0111342, -0.00292376), -0.00479873)), if (ISABSTRACT_AVG < 0.185, if (MIN_RANK < 5.0, -1.25896E-4, -0.0115332), -7.63903E-4)) + +if (NATIONALNEWS < 0.27, -3.28182E-4, if (ISABSTRACT_AVG < 0.05, if (AVG_SCORE < 225658.0, -0.00305383, if (MIN_SCORE < 231962.0, 0.0181265, if (MAX_MIN_SCORE < 33119.5, -0.00486977, if (WEEKAVG < 1.785, 0.0229851, 0.00588037)))), -6.67257E-4)) + +if (MAX_MIN_SCORE < 312575.0, if (TWO_DAY_WF < 0.531754, -0.00112552, if (MAX_SCORE < 669432.0, if (BUSINESS < 0.05, if (PREV_DAY_HITS < 41.5, 0.00191169, 0.0126963), if (DAY_LW_DAY_HITS_RATIO < 37.5, -0.00101754, 0.00846513)), -0.00792694)), -0.00834756) + +if (PUB_TODAY_AVG < 0.05, if (SPORTS < 0.645, if (AVG_SCORE < 395268.0, if (MAX_SCORE < 460268.0, -0.00227942, 0.00899341), -0.00823655), if (MIN_RANK < 5.0, -0.00122777, 0.0163908)), if (TWO_DAY_WF < 0.95119, -1.8789E-4, 0.0021492)) + +if (DAY_WEEK_AVG_RATIO < 3.985, -2.26985E-4, if (DAY_WEEK_AVG_RATIO < 5.525, if (DAY_WEEK_AVG_RATIO < 4.95, if (AVG_SCORE < 373867.0, if (ENTERTAINMENT < 0.05, 0.00254281, -0.0107653), if (PREV_DAY_HITS < 3.0, 0.0220568, 0.00220059)), 0.0155791), -0.00294274)) + +if (EIGHT_HOUR_WF < 0.349537, if (LOCALNEWS < 0.315, 0.00100629, if (TWELVE_HOUR_WF < 0.324561, if (MAX_SCORE < 547636.0, -0.00365503, 0.00844103), if (MAX_SCORE < 249971.0, -0.00217393, 0.020902))), if (ISTITLE_AVG < 0.05, -0.00679711, -2.87657E-6)) + +if (DAY_WEEK_AVG_DERIV < 60.285, if (DAY_WEEK_AVG_DERIV < -4.07, if (DAY_WEEK_AVG_DERIV < -5.785, 8.89976E-4, -0.016703), if (DAY_WEEK_AVG_RATIO < 0.285, 0.0108868, if (DAY_WEEK_AVG_RATIO < 0.34, -0.0115452, if (DAY_PD_HITS_DERIV < -24.5, 0.00709642, 5.85454E-5)))), 0.00724335) + +if (WEEKAVG < 31.07, if (DAY_PD_HITS_DERIV < -8.5, if (ONE_DAY_WF < 0.209914, if (TWO_DAY_WF < 0.537088, 0.00770858, -0.00166542), if (FOUR_HOUR_WF < 0.00547982, 0.0185133, 3.03571E-4)), -3.1074E-4), if (MIN_SCORE < 398722.0, -0.0135078, 0.00113129)) + +if (AVG_RANK < 9.53, if (INTLNEWS < 0.73, if (SUPERDUPER_AVG < 0.61, -4.09752E-4, -0.00974984), 0.0133732), if (SPORTS < 0.05, if (TOPSTORY < 0.315, -0.00110238, 0.0155814), if (AVG_SCORE < 258098.0, 0.0025561, 0.0218633))) + +if (WEEKAVG < 4.215, 6.4852E-4, if (MAX_MIN_SCORE < 163619.0, if (TWO_DAY_WF < 0.463325, -0.00769416, 5.3643E-4), if (FOUR_HOUR_WF < 0.060024, if (TWELVE_HOUR_WF < 0.0127518, -0.0161574, if (LOCALNEWS < 0.05, 0.00613049, -0.00855688)), -0.0167968))) + +if (DAY_WEEK_AVG_RATIO < 0.255, 0.00985333, if (DAY_WEEK_AVG_RATIO < 0.335, -0.00951258, if (MAX_MIN_SCORE < 307460.0, if (MAX_SCORE < 517912.0, if (MAX_MIN_SCORE < 61870.2, if (DAY_PD_HITS_RATIO < 2.865, 0.00152167, -0.0017641), -0.00258099), 0.00333836), -0.00739588))) + +if (WEEKAVG < 12.785, if (AVG_SCORE < 629228.0, 3.70823E-6, 0.0113262), if (INTLNEWS < 0.25, -0.00996717, if (EIGHT_HOUR_WF < 0.205476, if (MAX_MIN_SCORE < 105786.0, if (FOUR_HOUR_WF < 0.00848006, 0.0151832, 8.57643E-4), -0.00763046), -0.015098))) + +if (SPORTS < 0.47, -2.09978E-4, if (DAY_PD_HITS_RATIO < 9.5, if (MAX_RANK < 9.0, -0.00197712, if (MAX_SCORE < 188360.0, -4.64352E-4, if (MAX_MIN_SCORE < 45863.5, if (DAY_PD_HITS_RATIO < 0.31, 0.029664, 0.00977088), 0.00333723))), -0.012958)) + +if (EIGHT_HOUR_WF < 0.397041, if (TWELVE_HOUR_WF < 0.327106, 6.40846E-5, if (MAX_MIN_SCORE < 129263.0, 0.00114373, if (SUPERDUPER_AVG < 0.105, if (AVG_SCORE < 390145.0, 0.00590447, 0.0270599), 0.00116164))), if (DAY_LW_DAY_HITS_RATIO < 27.5, -0.00395192, 0.00937395)) + +if (LW_DAY_HITS < 3.5, if (ENTERTAINMENT < 0.845, if (LIFESTYLE < 0.115, if (AVG_RANK < 4.45, -0.00817106, if (AVG_RANK < 5.225, if (ISABSTRACT_AVG < 0.55, 0.00926281, -0.00505226), 1.81535E-4)), -0.00449952), 0.00814113), -0.00517456) + +if (TWO_DAY_WF < 0.439697, if (AVG_RANK < 8.7, -0.00429929, if (MAX_MIN_SCORE < 47973.5, -9.03138E-4, 0.0125022)), if (AVG_RANK < 8.635, if (DAY_WEEK_AVG_DERIV < 29.5, 8.00681E-4, 0.0125881), if (ISTITLE_AVG < 0.05, -0.00468934, 3.1406E-4))) + +if (LW_DAY_HITS < 0.5, if (WEEKAVG < 39.855, 2.77242E-4, -0.0132349), if (MIN_SCORE < 234431.0, if (MIN_SCORE < 225952.0, -0.00160465, 0.01256), if (PREV_DAY_HITS < 19.5, -0.0107505, if (WEEKAVG < 12.575, 0.0119228, -0.00600679)))) + +if (WEEKAVG < 26.93, if (TOPSTORY < 0.39, -1.40614E-4, if (AVG_RANK < 9.55, if (AVG_RANK < 7.755, 0.0151495, if (TOPSTORY < 0.45, 0.0043054, -0.00734039)), 0.0204375)), if (SUPERDUPER_AVG < 0.55, -0.0146963, -1.2832E-4)) + +if (TOPSTORY < 0.55, if (WEEKAVG < 0.5, if (MAX_MIN_SCORE < 165073.0, if (MIN_SCORE < 215208.0, 1.70833E-4, -0.0051217), 0.0104792), if (WEEKAVG < 10.93, 7.81356E-4, if (EIGHT_HOUR_WF < 0.00663439, 0.00783355, -0.00393311))), -0.0097211) + +if (DAY_WEEK_AVG_RATIO < 0.225, 0.0105222, if (SPORTS < 0.73, -6.88094E-4, if (MAX_MIN_RANK < 5.0, -0.00146174, if (PREV_DAY_HITS < 6.5, if (MIN_SCORE < 144075.0, -0.00415946, if (PREV_DAY_HITS < 2.5, 0.010148, 0.0262199)), -0.00602654)))) + +if (DAY_WEEK_AVG_DERIV < -3.5, if (TWO_DAY_WF < 0.635642, if (DAY_WEEK_AVG_DERIV < -5.93, 0.00406292, -0.0105257), -0.0144987), if (DAY_PD_HITS_RATIO < 43.0, if (DAY_WEEK_AVG_RATIO < 5.55, -6.55387E-5, if (ISTITLE_AVG < 0.05, -0.0189073, 0.00177271)), 0.0120068)) + +if (DAY_WEEK_AVG_RATIO < 0.235, 0.0112125, if (DAY_WEEK_AVG_RATIO < 0.345, -0.00610693, if (WEEKAVG < 0.93, -0.00104389, if (WEEKAVG < 4.215, if (TOPSTORY < 0.13, 5.13026E-4, 0.00509033), if (MAX_MIN_SCORE < 206736.0, -4.1877E-4, -0.0108199))))) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking06.expression b/searchlib/src/test/files/ranking06.expression new file mode 100644 index 00000000000..017f5ed49b6 --- /dev/null +++ b/searchlib/src/test/files/ranking06.expression @@ -0,0 +1,85 @@ +if (MAX_SCORE < 364352.0, if (NUM_WORDS < 1.5, 0.106529, if (WEEKAVG < 0.665, 0.113339, 0.129744)), if (WEEKAVG < 0.35, 0.125401, 0.148456)) + +if (MAX_SCORE < 386454.0, if (NUM_WORDS < 2.5, if (MAX_SCORE < 266558.0, -0.00435683, 0.00232626), 0.00527105), if (DAY_LW_DAY_HITS_RATIO < 3.75, 0.0125759, 0.0415964)) + +if (MAX_SCORE < 285564.0, if (NUM_WORDS < 3.5, -0.00312935, 0.0139702), if (DAY_LW_DAY_HITS_RATIO < 4.645, 0.00510366, if (ISABSTRACT_AVG < 0.225, 0.0376987, 0.00704226))) + +if (MAX_SCORE < 354060.0, if (NUM_WORDS < 1.5, -0.00557684, if (DAY_LW_DAY_HITS_RATIO < 7.25, -4.11611E-4, 0.0176971)), if (ISTITLE_AVG < 0.845, 0.0209172, 0.00437892)) + +if (MAX_SCORE < 357048.0, if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 6.75, -0.00214246, 0.00909381), 0.0132498), if (DAY_WEEK_AVG_DERIV < 2.785, 0.00781954, 0.0325808)) + +if (MAX_SCORE < 391984.0, if (NUM_WORDS < 1.5, -0.00479641, if (WEEKAVG < 0.805, 3.14606E-4, 0.0174789)), if (DAY_WEEK_AVG_DERIV < 2.5, 0.0100076, 0.0303617)) + +if (NUM_WORDS < 2.5, if (MAX_SCORE < 273725.0, -0.0031024, if (ISTITLE_AVG < 0.955, 0.0093897, -0.00177918)), if (WEEKAVG < 0.35, 0.00506228, 0.0238941)) + +if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 7.835, if (NUM_WORDS < 2.5, -0.00222361, 0.00275911), if (ISABSTRACT_AVG < 0.185, 0.0286851, 0.0025611)), 0.0150946) + +if (MAX_SCORE < 463634.0, if (DAY_LW_DAY_HITS_RATIO < 14.36, if (NUM_WORDS < 2.5, -0.00168161, 0.00306928), if (ISABSTRACT_AVG < 0.05, 0.03626, 0.00702238)), 0.018646) + +if (MAX_SCORE < 291384.0, -0.00120841, if (ISTITLE_AVG < 0.845, if (WEEKAVG < 0.325, 0.0070091, if (DAY_WEEK_AVG_DERIV < 27.5, 0.021833, 0.0670236)), -2.39127E-4)) + +if (MAX_SCORE < 392716.0, if (WEEKAVG < 0.915, if (NUM_WORDS < 1.5, -0.00408665, 1.2681E-5), if (ISABSTRACT_AVG < 0.05, 0.0315009, 0.00309315)), 0.0101865) + +if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, if (NUM_WORDS < 1.5, -0.00378383, -1.55068E-4), if (ISTITLE_AVG < 0.915, 0.0164901, 3.5458E-4)), 0.0111533) + +if (NUM_WORDS < 2.5, if (DAY_LW_DAY_HITS_RATIO < 12.165, -0.00137589, if (ISTITLE_AVG < 0.73, 0.0299723, 0.00442332)), if (DAY_HITS < 2.125, 0.00279729, 0.0157199)) + +if (MAX_SCORE < 391997.0, if (NUM_WORDS < 1.5, -0.00289017, if (PREV_DAY_HITS < 6.33333, 1.53177E-4, 0.0114408)), if (PREV_DAY_HITS < 9.5, 0.00746655, 0.040233)) + +if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, -0.00103084, if (ISTITLE_AVG < 0.915, if (DAY_HITS_FRAC < 0.645, 0.0230528, 0.00568694), 6.15028E-4)), 0.00901386) + +if (MAX_SCORE < 291385.0, -9.44169E-4, if (ISTITLE_AVG < 0.72, if (DAY_WEEK_AVG_DERIV < 28.855, if (NATIONALNEWS < 0.355, 0.00617921, 0.0271174), 0.0534392), -9.21153E-4)) + +if (NUM_WORDS < 3.5, if (PREV_DAY_HITS < 6.16667, -6.82897E-4, if (MIN_SCORE < 254342.0, 0.00193942, if (NATIONALNEWS < 0.21, 0.0131534, 0.0605109))), 0.00697463) + +if (NUM_WORDS < 2.5, -9.31934E-4, if (DAY_WEEK_AVG_RATIO < 3.17, if (ISTITLE_AVG < 0.685, if (NATIONALNEWS < 0.225, 0.00281994, 0.0214747), -0.00300096), 0.0138056)) + +if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 4.9, -8.81061E-4, if (AVG_SCORE < 268471.0, -8.77741E-5, if (ISABSTRACT_AVG < 0.105, 0.0164307, 9.85136E-4))), 0.00654057) + +if (TOPSTORY < 0.03, if (ISTITLE_AVG < 0.62, if (AVG_SCORE < 268819.0, -3.15955E-4, 0.00518778), -0.00177677), if (ISTITLE_AVG < 0.72, 0.0116413, 0.00139452)) + +if (MAX_SCORE < 472738.0, if (ISTITLE_AVG < 0.63, if (DAY_WEEK_AVG_DERIV < 1.825, 4.94339E-4, if (MIN_SCORE < 250779.0, -1.72329E-5, 0.012004)), -0.00134588), 0.0100001) + +if (NUM_WORDS < 3.5, if (DAY_LW_DAY_HITS_RATIO < 24.9, if (PREV_DAY_HITS < 14.8333, -5.01675E-4, 0.00773172), if (LOCALNEWS < 0.11, 0.00984187, 0.0382478)), 0.00673426) + +if (PREV_DAY_HITS < 4.35, if (NUM_WORDS < 1.5, -0.00227953, 8.50381E-5), if (ISTITLE_AVG < 0.905, if (DAY_LW_DAY_HITS_RATIO < 1.6, -0.00121777, 0.0155982), 0.00116876)) + +if (DAY_LW_DAY_HITS_RATIO < 4.73, -2.72614E-4, if (NATIONALNEWS < 0.58, if (DAY_PD_HITS_RATIO < 0.63, if (DAY_HITS_FRAC < 0.265, 0.0099063, 0.0510568), 0.00250323), 0.0470183)) + +if (DAY_LW_DAY_HITS_RATIO < 5.915, -3.36118E-4, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.735, 9.24298E-4, if (DAY_HITS_FRAC < 0.165, 0.0161363, 0.0802279)), 0.00327439)) + +if (NUM_WORDS < 2.5, -6.56912E-4, if (BUSINESS < 0.275, if (MAX_MIN_SCORE < 50.25, 0.00224658, if (ISABSTRACT_AVG < 0.415, 0.013094, -0.0054932)), -0.00808819)) + +if (NATIONALNEWS < 0.135, -1.40405E-4, if (AVG_SCORE < 263507.0, -1.19297E-4, if (ISTITLE_AVG < 0.73, if (ENTERTAINMENT < 0.05, 0.0220643, -0.00416695), 0.00371154))) + +if (DAY_WEEK_AVG_RATIO < 14.28, if (NUM_WORDS < 4.5, if (MIN_SCORE < 245866.0, -9.84768E-4, if (ISTITLE_AVG < 0.72, 0.00341093, -9.73418E-4)), 0.0106439), 0.0360619) + +if (MIN_SCORE < 472207.0, -2.00627E-4, if (WEEKAVG < 0.325, 0.00409488, if (AVG_SCORE < 531893.0, 0.0518209, if (MAX_SCORE < 602809.0, -0.0080393, 0.0383655)))) + +if (ISTITLE_AVG < 0.72, if (AVG_SCORE < 268824.0, -1.31907E-4, if (MAX_MIN_SCORE < 7909.75, 0.00173958, if (NATIONALNEWS < 0.39, 0.010916, 0.0343348))), -0.00113192) + +if (NUM_WORDS < 2.5, -6.26265E-4, if (BUSINESS < 0.115, if (MAX_MIN_SCORE < 15489.8, 0.00192349, if (MAX_MIN_SCORE < 35950.8, 0.0188263, 0.00372838)), -0.00528885)) + +if (MAX_SCORE < 468155.0, -1.13066E-4, if (ENTERTAINMENT < 0.235, if (TOPSTORY < 0.22, -3.22423E-4, 0.0188811), if (AVG_RANK < 8.365, 0.00856273, 0.064677))) + +if (DAY_LW_DAY_HITS_RATIO < 38.5, if (AVG_SCORE < 259970.0, -6.11764E-4, if (INTLNEWS < 0.045, 1.28558E-4, if (LOCALNEWS < 0.28, 0.00350635, 0.0165708))), 0.018775) + +if (DAY_LW_DAY_HITS_RATIO < 28.5, if (ISTITLE_AVG < 0.585, if (MAX_RANK < 9.0, 1.72066E-4, if (AVG_SCORE < 269329.0, -1.02726E-4, 0.00688386)), -0.00101067), 0.0149278) + +if (DAY_LW_DAY_HITS_DERIV < 14.5, -1.72386E-4, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.82, 0.00314713, 0.044771), if (LOCALNEWS < 0.115, 0.00112935, 0.0165557))) + +if (DAY_PD_HITS_RATIO < 33.75, if (AVG_SCORE < 597646.0, if (DAY_PD_HITS_DERIV < -4.9, if (DAY_PREV_DAY_HITS_FRAC < 0.845, -0.00115559, 0.0101514), -1.88064E-4), 0.0159463), 0.024791) + +if (MIN_SCORE < 481598.0, -4.83069E-5, if (MIN_SCORE < 512815.0, if (MAX_SCORE < 507654.0, 0.00563943, 0.0345982), if (MAX_SCORE < 584112.0, -0.00935941, 0.0104819))) + +if (TOPSTORY < 0.105, -3.22897E-4, if (PREV_DAY_HITS < 1.45833, -6.16703E-4, if (MIN_SCORE < 253414.0, 0.00194629, if (MIN_SCORE < 255748.0, 0.0480784, 0.00955667)))) + +if (NUM_WORDS < 3.5, if (NATIONALNEWS < 0.39, -2.85208E-4, if (PREV_DAY_HITS < 6.83333, 0.00277459, 0.0334432)), if (BUSINESS < 0.77, 0.00558387, -0.0192348)) + +if (BUSINESS < 0.19, if (INTLNEWS < 0.095, 1.07539E-4, if (TOPSTORY < 0.03, 0.00158516, if (NUM_WORDS < 3.5, 0.00653366, 0.0388007))), -0.00186321) + +if (DAY_LW_DAY_HITS_RATIO < 3.635, -3.80753E-4, if (LOCALNEWS < 0.185, 5.67701E-4, if (ISTITLE_AVG < 0.585, if (MAX_RANK < 9.0, -0.00212514, 0.0247626), 0.00163409))) + +if (DAY_WEEK_AVG_RATIO < 14.28, if (TOPSTORY < 0.115, -3.69615E-4, if (DAY_LW_DAY_HITS_RATIO < 4.875, -4.56219E-4, if (DAY_PD_HITS_RATIO < 2.275, 0.0169104, 0.0021639))), 0.0238394) + +if (NUM_WORDS < 1.5, -0.00139356, if (BUSINESS < 0.13, if (BUSINESS < 0.05, 0.00103638, if (DAY_LW_DAY_HITS_RATIO < 9.5, 0.0441388, -0.00313189)), -0.00195899)) + +if (NATIONALNEWS < 0.13, -2.4886E-5, if (HEALTH < 0.105, if (WEEKAVG < 0.93, 0.00135398, 0.0138431), if (MIN_RANK < 3.0, 0.0513615, -0.00585742))) + +if (NATIONALNEWS < 0.225, -8.48873E-5, if (MIN_SCORE < 259062.0, -3.6897E-4, if (ISTITLE_AVG < 0.71, if (ISTITLE_AVG < 0.45, 0.0126383, 0.0410443), 0.00420061))) + +if (DAY_HITS < 15.25, -3.22532E-4, if (LOCALNEWS < 0.13, 0.00109495, if (WEEKAVG < 6.715, if (MAX_MIN_SCORE < 42695.8, 0.0489508, 0.00942793), -0.00595868))) + +if (DAY_LW_DAY_HITS_RATIO < 38.5, if (NUM_WORDS < 2.5, -2.53621E-4, 0.00124598), if (DAY_WEEK_AVG_DERIV < 49.715, if (ISTITLE_AVG < 0.74, 0.0495711, 0.00323737), -0.00771975)) + +if (NUM_WORDS < 1.5, -0.001351, if (DAY_WEEK_AVG_RATIO < 13.99, if (BUSINESS < 0.105, if (DAY_PD_HITS_DERIV < -10.5, 0.0106695, 5.27774E-4), -0.00157676), 0.0239454)) + +if (DAY_LW_DAY_HITS_RATIO < 24.9, -6.42273E-5, if (MIN_SCORE < 247776.0, if (TOPSTORY < 0.05, -0.0160384, 0.00605178), if (DAY_PREV_DAY_HITS_FRAC < 0.715, -0.00225007, 0.0290955))) + +if (PREV_DAY_HITS < 1.46429, -3.57361E-4, if (AVG_SCORE < 242369.0, -8.84977E-4, if (WEEKAVG < 5.975, if (DAY_HITS_FRAC < 0.135, 0.00160961, 0.00805956), -0.0114471))) + +if (PREV_DAY_HITS < 26.5, if (ISABSTRACT_AVG < 0.155, 1.96561E-4, -0.00121068), if (DAY_HITS_FRAC < 0.265, if (DAY_WEEK_AVG_RATIO < 2.12, -0.00523257, 0.0187339), 0.0345852)) + +if (NUM_WORDS < 4.5, if (MAX_MIN_SCORE < 55839.5, -1.41945E-4, -0.00394864), if (DAY_WEEK_AVG_RATIO < 3.505, if (AVG_RANK < 8.395, 0.0095287, -0.0143254), 0.0273452)) + +if (PREV_DAY_HITS < 16.5, 1.80791E-4, if (AVG_SCORE < 312154.0, if (DAY_HITS_FRAC < 0.325, if (MIN_SCORE < 254301.0, -0.00274788, 0.0174896), -0.0248862), -0.023664)) + +if (MIN_SCORE < 245164.0, -5.61547E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.395, -0.00410237, if (MAX_RANK < 9.0, -7.35691E-5, if (MIN_SCORE < 560324.0, 0.00230962, 0.0217268)))) + +if (DAY_WEEK_AVG_RATIO < 0.925, if (MAX_SCORE < 405533.0, -5.83987E-4, -0.00549206), if (MIN_SCORE < 479344.0, 3.03644E-4, if (MIN_SCORE < 489462.0, 0.038018, 0.00297502))) + +if (MIN_SCORE < 475038.0, if (MAX_SCORE < 498633.0, -4.41489E-5, -0.0267606), if (ISABSTRACT_AVG < 0.1, if (ENTERTAINMENT < 0.31, 0.00455624, 0.0204099), -0.00647491)) + +if (DAY_PD_HITS_RATIO < 0.115, -0.00668954, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_PREV_DAY_HITS_FRAC < 0.975, if (DAY_PD_HITS_RATIO < 0.515, -0.00146208, 0.0238106), 0.0377246), -9.33641E-5)) + +if (DAY_WEEK_AVG_RATIO < 10.84, if (DAY_WEEK_AVG_RATIO < 6.845, 1.28893E-4, if (MIN_SCORE < 367047.0, -0.00823593, 0.0102315)), if (AVG_SCORE < 279315.0, -0.00477584, 0.0265572)) + +if (PREV_DAY_HITS < 9.75, -2.66304E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.925, -2.69406E-4, if (AVG_RANK < 8.45, if (INTLNEWS < 0.295, 0.0146136, -0.0235187), 0.0313855))) + +if (MIN_SCORE < 483511.0, -1.81558E-4, if (MIN_SCORE < 498030.0, if (DAY_WEEK_AVG_RATIO < 1.68, 0.00293744, 0.0371557), if (SUPERDUPER_AVG < 0.53, 0.00413503, -0.0112815))) + +if (DAY_PD_HITS_RATIO < 26.5, if (DAY_PD_HITS_DERIV < -11.5, if (DAY_WEEK_AVG_RATIO < 1.735, 5.1364E-4, 0.0186441), -7.75501E-5), if (WEEKAVG < 5.57, 0.0278366, -0.00263107)) + +if (NUM_WORDS < 1.5, -8.926E-4, if (DAY_WEEK_AVG_DERIV < 47.86, if (DAY_LW_DAY_HITS_RATIO < 38.5, if (ISABSTRACT_AVG < 0.235, 9.72798E-4, -0.00127979), 0.0250611), -0.0239326)) + +if (MAX_SCORE < 407652.0, if (MAX_SCORE < 395501.0, 1.49872E-5, if (DAY_PD_HITS_RATIO < 0.285, 0.0434173, if (BUSINESS < 0.05, 0.0102549, -0.010691))), -0.00277705) + +if (DAY_WEEK_AVG_RATIO < 6.355, 1.3418E-4, if (MIN_SCORE < 405020.0, if (AVG_SCORE < 356693.0, if (MAX_SCORE < 327611.0, -0.00546237, 0.0146496), -0.0196891), 0.0208141)) + +if (AVG_SCORE < 526352.0, if (MAX_SCORE < 521635.0, -6.26311E-5, 0.0210967), if (MAX_SCORE < 550983.0, -0.0232122, if (AVG_RANK < 5.5, -0.0243343, -0.00151995))) + +if (DAY_PD_HITS_RATIO < 26.5, if (DAY_PD_HITS_RATIO < 20.625, -5.89198E-5, -0.0216644), if (MAX_SCORE < 200640.0, -0.011139, if (AVG_RANK < 8.55, 0.0390014, 0.00966164))) + +if (NATIONALNEWS < 0.27, -1.74062E-4, if (HEALTH < 0.05, if (AVG_SCORE < 342310.0, 8.35476E-4, if (MIN_SCORE < 347780.0, 0.0334442, 0.00624751)), 0.025545)) + +if (ISTITLE_AVG < 0.72, if (MAX_MIN_SCORE < 43995.2, 3.96726E-4, if (DAY_PREV_DAY_HITS_FRAC < 0.935, 0.00123996, if (MAX_MIN_SCORE < 56002.2, 0.0235285, -0.00154573))), -4.78464E-4) + +if (BUSINESS < 0.21, if (DAY_LW_DAY_HITS_DERIV < 14.5, 2.41495E-5, if (NATIONALNEWS < 0.225, if (LOCALNEWS < 0.035, -0.00142155, 0.0129645), 0.0297085)), -0.0014897) + +if (SPORTS < 0.585, -3.04907E-4, if (MAX_SCORE < 285618.0, 2.21636E-4, if (ISTITLE_AVG < 0.7, if (MIN_SCORE < 269093.0, 0.0417159, 0.00987586), 0.00129559))) + +if (DAY_PD_HITS_RATIO < 12.28, -5.73419E-5, if (LOCALNEWS < 0.03, -0.00224701, if (WEEKAVG < 5.57, if (AVG_RANK < 8.1, 0.0150017, 0.0490061), 8.99967E-4))) + +if (NATIONALNEWS < 0.28, -2.02096E-4, if (MIN_SCORE < 259050.0, -4.61524E-4, if (PREV_DAY_HITS < 5.5, if (ISTITLE_AVG < 0.085, 0.0107478, 6.58206E-4), 0.039025))) + +if (DAY_WEEK_AVG_RATIO < 0.885, -8.98287E-4, if (MIN_SCORE < 482615.0, 1.37426E-4, if (AVG_SCORE < 506793.0, if (AVG_SCORE < 493340.0, 0.011503, 0.0451903), 1.36945E-5))) + +if (DAY_PD_HITS_DERIV < -4.5, if (HEALTH < 0.13, if (BUSINESS < 0.96, 0.00144328, if (MAX_SCORE < 239157.0, -4.31323E-5, 0.0302083)), 0.0263586), 3.93517E-5) + +if (DAY_LW_DAY_HITS_DERIV < 1.91, -3.30312E-4, if (MIN_SCORE < 254252.0, -2.87448E-4, if (WEEKAVG < 4.5, if (WEEKAVG < 0.93, 0.00341942, 0.0180965), -0.0144877))) + +if (NUM_WORDS < 4.5, if (MAX_MIN_SCORE < 56141.5, 3.55635E-5, if (ISTITLE_AVG < 0.69, -0.0120653, -0.00193295)), if (DAY_HITS_FRAC < 0.585, 0.0109657, -0.00562292)) + +if (MAX_SCORE < 597411.0, if (AVG_SCORE < 525986.0, if (AVG_SCORE < 504944.0, 6.12611E-5, if (AVG_SCORE < 512650.0, 0.0310299, 6.64858E-4)), -0.010433), 0.00965011) + +if (DAY_PD_HITS_RATIO < 5.945, if (MAX_SCORE < 629654.0, 2.34339E-4, -0.01439), if (MAX_MIN_SCORE < 63226.5, if (LOCALNEWS < 0.28, -0.00423293, 0.00606695), 0.0188983)) + +if (MIN_SCORE < 670535.0, if (DAY_PD_HITS_RATIO < 5.845, if (DAY_LW_DAY_HITS_RATIO < 5.47, -8.49912E-5, if (TOPSTORY < 0.105, -3.4055E-4, 0.0101604)), -0.00330677), 0.0174593) + +if (DAY_PD_HITS_RATIO < 33.75, if (WEEKAVG < 4.46, 3.99921E-5, if (DAY_PREV_DAY_HITS_FRAC < 0.945, if (DAY_LW_DAY_HITS_RATIO < 48.5, -0.00420023, 0.0191669), -0.0241434)), 0.0157146) + +if (DAY_WEEK_AVG_RATIO < 6.3, 2.4645E-4, if (DAY_WEEK_AVG_DERIV < 13.785, if (MIN_SCORE < 397526.0, -0.00908083, 0.00977666), if (AVG_SCORE < 289007.0, -0.00132101, 0.0196639))) + +if (MIN_SCORE < 672810.0, if (MIN_SCORE < 631089.0, if (MAX_SCORE < 611207.0, if (MIN_SCORE < 512782.0, -3.64401E-5, -0.00741622), 0.0190309), -0.0227335), 0.0167703) + +if (MAX_SCORE < 439769.0, 4.38016E-5, if (TOPSTORY < 0.22, -0.00593521, if (SUPERDUPER_AVG < 0.45, if (DAY_PD_HITS_DERIV < -1.5, 0.0429213, 0.0049244), -0.0152763))) + +if (REGIONALNEWS < 0.105, if (POLITICS < 0.29, -8.83284E-5, -0.00708574), if (MAX_SCORE < 291999.0, 3.87947E-4, if (DAY_WEEK_AVG_DERIV < 2.145, -0.00777391, -0.0310452))) + +if (MAX_MIN_SCORE < 61554.2, -2.35487E-5, if (NUM_WORDS < 2.5, if (MAX_MIN_SCORE < 88657.5, -0.00672369, if (DAY_PREV_DAY_HITS_FRAC < 0.295, -0.0026578, 0.0151957)), -0.0135855)) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking07.expression b/searchlib/src/test/files/ranking07.expression new file mode 100644 index 00000000000..97b6528aa33 --- /dev/null +++ b/searchlib/src/test/files/ranking07.expression @@ -0,0 +1,200 @@ +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100200034,100200186,100400141,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100400142,100200054,100300073,100200192,100300212,100300209,100400079,100200170,100300169,100400080,100200176,100300200,100200028,100300076,100200232], if (attribute(catid) in [100200186,100200068,100300121,100300019,100200176,100300200,100200028,100300076], if (attribute(catid) in [100200068,100300019,100200176,100300200], -0.0249999798, 0.0022099815), if (attribute(catid) in [0,100300011,100300014,100300077,100200034,100400141,100300165,100300005,100200172,100300008,100300027,100200053,100200192,100300209,100400079,100200170,100300169,100400080], if (attribute(catid) in [100300011,100300165,100300005,100300027,100200192,100300209,100400079,100400080], 0.013160154, if (attribute(catid) in [100300014,100200034,100400141,100200172,100300008,100200053,100200170,100300169], 0.0191030525, 0.021725414)), if (attribute(catid) in [100200130,100400142,100200054,100300073], 0.0270836867, 0.0305748922))), if (attribute(catid) in [100300058,100300166,100300102,100400037,100400038,100300065,100300127,100200087,100300066,100300006], 0.0410066553, if (attribute(catid) in [100300093,100200234,100300126,100200193,100300122,100300074], 0.0557829172, 0.0704327304))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200234,100300019,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100300076,100200055,100200232,100300214], if (attribute(catid) in [100300011,100300014,100300058,100300005,100200068,100300019,100300209,100400079,100200170,100400080,100200176,100300076,100200055,100200232,100300214], if (attribute(catid) in [100200068,100300019,100200055,100200232,100300214], -0.03599083, -0.0027644159), if (attribute(catid) in [100200171,100200034,100200186,100300008,100300116,100300073,100400038,100200192,100300127,100300169,100200087,100300200], if (attribute(catid) in [100200171,100200034,100200186,100300008,100300116,100200192], 0.0113307, 0.0164266261), if (attribute(catid) in [0,100400141,100200052,100300102,100300027,100300121,100300065], 0.021255028, 0.0272380704))), if (attribute(catid) in [100200130,100300013,100300166,100300004,100200054,100200193,100300212,100300074,100300066], if (attribute(catid) in [100200130,100300166], 0.0328865429, 0.0399735491), if (attribute(catid) in [100300165,100300093,100400142,100300122,100300006,100300146], 0.0477513417, 0.0587510469))) + +if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100300005,100200172,100300008,100200068,100300027,100300121,100200053,100300019,100300004,100300073,100400038,100200192,100300065,100300212,100400079,100200170,100300169,100400080,100200087,100300200,100300076,100300006,100200232,100300146], if (attribute(catid) in [100300014,100300058,100200034,100200186,100300008,100200068,100300019,100300212,100200232], if (attribute(catid) in [100300008,100200068,100300019,100200232], -0.0260716807, -8.004775E-4), if (attribute(catid) in [100300165,100300005,100300073,100400079,100200170,100400080,100200087,100300146], 0.0126841581, if (attribute(catid) in [0,100200171,100300077,100200172,100300065,100300006], 0.022881461, 0.0298499891))), if (attribute(catid) in [100300011,100200130,100300013,100300166,100200052,100300102,100300116,100200234,100400142,100200054,100300209,100300127,100300074,100300066,100200176,100200028], if (attribute(catid) in [100200130,100200052,100300102,100300116,100200234,100200054,100300209], 0.0393021257, 0.0475085975), if (attribute(catid) in [100400037,100300122,100200067], 0.0575085503, 0.0751742626))) + +if (attribute(catid) in [0,100300011,100300014,100300077,100200186,100400141,100300165,100300005,100300008,100200068,100300032,100300027,100300121,100300019,100300126,100300073,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200067,100200055,100300006], if (attribute(catid) in [100300005,100300008,100200068,100300032,100300209,100400080,100200028,100200067,100200055], if (attribute(catid) in [100300005,100300008,100300032,100300209,100200067,100200055], -0.0365460976, -0.0109180769), if (attribute(catid) in [100300014,100300073,100200192,100300212,100400079,100300074,100200176], 0.0093762436, if (attribute(catid) in [100200186,100300165,100300126,100200170,100300169,100300200,100300076], 0.0193739138, if (attribute(catid) in [0,100300077,100200087], 0.0231180054, 0.0274056462)))), if (attribute(catid) in [100200171,100200130,100300058,100200034,100200052,100200172,100300116,100200053,100400142,100200054,100300066], if (attribute(catid) in [100200171,100200130,100300058,100300116,100200054,100300066], 0.0339904435, 0.0402629873), if (attribute(catid) in [100300013,100300166,100300102,100200234,100300004,100400038,100300122,100300127,100200185], 0.0471640537, 0.0679501752))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100200034,100200186,100400141,100300165,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300004,100300073,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100200186,100400141,100300005,100300008,100200068,100300032,100300121,100300019,100300004,100200192,100300212,100300209,100400079,100200170,100400080,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100200068,100300032,100300019,100300212,100300209,100200170,100300200,100200028,100200067], if (attribute(catid) in [100300032,100300019,100300212,100300209,100300200,100200028,100200067], -0.0252149649, 5.982331E-4), 0.0109551118), if (attribute(catid) in [0,100200171,100200172,100300027,100300073,100300065,100300169,100200087,100300074], if (attribute(catid) in [100200171,100300073,100300169,100200087,100300074], 0.0192764204, 0.023932401), 0.0295724103)), if (attribute(catid) in [100300011,100300077,100300166,100200052,100200234,100200053,100400142,100400038,100300122,100300127,100300066], if (attribute(catid) in [100300011,100300077,100300166,100200052,100200234,100400038], 0.0362646736, 0.045898507), if (attribute(catid) in [100300143,100300093,100300102,100300126,100200193,100300006], 0.0576959337, 0.0940124464))) + +if (attribute(catid) in [0,100200171,100300014,100300013,100300077,100200034,100200186,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300073,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300200,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300014,100300102,100300005,100300008,100200068,100300032,100300027,100300212,100400079,100300076,100300214,100300146], if (attribute(catid) in [100300008,100200068,100300032,100300212,100300214], if (attribute(catid) in [100300008,100300032,100300214], -0.0524432898, -0.0132279367), 0.003480139), if (attribute(catid) in [100200171,100300013,100200034,100200186,100200052,100300073,100400038,100200192,100300169,100400080,100300074,100300066,100300200,100200067,100300006], if (attribute(catid) in [100300013,100200186,100200052,100300073,100400038,100300169,100400080,100300074,100200067,100300006], 0.017975983, 0.0212068067), if (attribute(catid) in [100300077,100300165,100300209,100200170,100200176], 0.0229665861, 0.0258231076))), if (attribute(catid) in [100300011,100200130,100300058,100300166,100300143,100400141,100300093,100300116,100300121,100200053,100300004,100400142,100200054,100300122,100300127,100200087,100200232], if (attribute(catid) in [100300011,100200130,100300058,100300166,100300143,100300093,100300116,100200053,100200054,100300122,100200087], 0.0353581654, 0.0430524781), if (attribute(catid) in [100200234,100300019,100400037,100200028], 0.0542526213, 0.0961212144))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100200034,100200186,100400141,100200052,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100200054,100300073,100200192,100300209,100400079,100200170,100300169,100400080,100200087,100300200,100200028,100300076,100200067,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300011,100300014,100300102,100300005,100300008,100200068,100300032,100300027,100200192,100300209,100400079,100400080,100200087,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300008,100200068,100300032,100300209,100300214], -0.0256804569, 0.0046816048), if (attribute(catid) in [100300058,100200186,100400141,100300121,100300019,100200170,100300169,100300200], 0.0160713107, if (attribute(catid) in [0,100200034,100200052,100200232], 0.022153881, 0.0250017744))), if (attribute(catid) in [100300166,100300143,100300165,100300093,100200172,100200234,100300004,100300126,100400142,100400038,100300065,100300127,100300074,100300066,100200185], if (attribute(catid) in [100300143,100300165,100300093,100200172,100200234,100300126,100400142,100400038,100300065,100300066,100200185], if (attribute(catid) in [100300093,100200234,100400038,100300065,100300066], 0.0315719603, 0.0353792385), 0.0430233685), if (attribute(catid) in [100200193,100300122], 0.0518243263, 0.0744220771))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300073,100400037,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100300076,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300014,100200034,100200186,100400141,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100200192,100400079,100400080,100200176,100300200,100300214,100300146], if (attribute(catid) in [100300011,100300102,100300008,100200068,100300032,100300200,100300214], -0.0241441823, if (attribute(catid) in [100300014,100400141,100300005,100200192,100400080,100200176], 0.0020142953, 0.0081257199)), if (attribute(catid) in [100300058,100300077,100200052,100300093,100300073,100400037,100300065,100200170,100200087,100200067], 0.014123946, if (attribute(catid) in [0,100200171,100300165,100300121,100300019,100300169], if (attribute(catid) in [0], 0.0217711535, 0.023534876), 0.0294010162))), if (attribute(catid) in [100200130,100300143,100200172,100200234,100200053,100400142,100400038,100300212,100300209,100300066], if (attribute(catid) in [100200130,100200172,100200053,100400142,100400038,100300212,100300209], 0.03583431, 0.0447717702), if (attribute(catid) in [100300004,100300126,100300074,100300007,100300045,100200028,100200185,100200232], 0.0622909986, 0.0942393297))) + +if (attribute(catid) in [100300014,100200034,100300102,100300005,100200068,100300019,100200054,100300209,100400079,100200170,100400080,100300200,100200028,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300005,100200068,100300209,100200028,100200067,100200232,100300214], -0.0222756779, -0.0032979771), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200186,100400141,100300165,100200052,100300093,100200172,100300008,100300032,100300027,100300116,100300121,100200053,100400142,100300073,100200192,100300065,100300212,100300122,100300127,100300169,100200087,100300074,100300006], if (attribute(catid) in [100300058,100300077,100200186,100400141,100200052,100300008,100300032,100300073,100200192,100300212,100300169,100200087], if (attribute(catid) in [100300077,100200186,100400141,100300032,100200192,100300212], 0.011447905, 0.0165377861), if (attribute(catid) in [100200171,100300165,100300093,100200172,100300121], 0.0209845722, if (attribute(catid) in [0,100200130], 0.0242667474, 0.0268049425))), if (attribute(catid) in [100300011,100300166,100200234,100300004,100400037,100400038,100300066,100200176], 0.0447283469, 0.0603545392))) + +if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200053,100200054,100300073,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100300076,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300005,100300008,100200068,100300032,100300121,100300212,100400079,100400080,100300200,100300076,100200055,100300214], if (attribute(catid) in [100200068,100300076,100200055,100300214], -0.0265329011, -0.003851894), if (attribute(catid) in [100200171,100300014,100400141,100300027,100200054,100300073,100200192,100200087,100300074,100200067], 0.0107802387, if (attribute(catid) in [0,100300058,100300077,100200186,100300093,100200053,100200170,100300169,100200232], 0.0212053257, 0.0251822224))), if (attribute(catid) in [100300011,100200130,100300013,100300166,100300143,100300102,100300116,100200234,100300004,100400142,100200193,100300122,100300127,100300066,100200176,100200028,100300006], if (attribute(catid) in [100200130,100300116,100200234,100300006], 0.031445719, if (attribute(catid) in [100300166,100400142,100300122,100300127,100300066,100200028], 0.0391757711, 0.0454843261)), 0.0873814277)) + +if (attribute(catid) in [0,100300014,100300058,100300013,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100300073,100400038,100200192,100300212,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100200028,100300076,100300006,100200232], if (attribute(catid) in [100300013,100200186,100300165,100200052,100300102,100300005,100200068,100300116,100200234,100300073,100400079,100300169,100400080,100200087,100300200,100300076], if (attribute(catid) in [100300102,100200068,100200234,100400080,100200087,100300200], if (attribute(catid) in [100200068], -0.0151909005, -0.0021225032), if (attribute(catid) in [100300005,100300073,100400079,100300169,100300076], 0.0086835438, 0.0120329553)), if (attribute(catid) in [100300058,100400141,100300008,100300032,100300027,100300121,100400038,100200192,100300212,100200170,100200028,100300006], 0.0171461073, 0.0218015413)), if (attribute(catid) in [100200171,100300011,100200130,100300077,100300166,100200034,100200053,100300019,100400142,100200054,100400037,100300065,100300122,100300127,100200176], if (attribute(catid) in [100200171,100300011,100200034,100200053,100300019,100200054,100300065], 0.0280408356, 0.0355357753), if (attribute(catid) in [100300093,100300004,100300126,100200185], 0.0584272687, 0.0854108429))) + +if (attribute(catid) in [100300011,100300143,100200034,100300093,100300005,100300008,100200068,100300019,100300073,100400079,100200170,100400080,100200087,100200176,100300200,100300076,100200055,100200185,100300006], if (attribute(catid) in [100300008,100200068,100400080,100200176,100200055], -0.0169257508, 0.0096089202), if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100300166,100200186,100400141,100300165,100200052,100300102,100200172,100300032,100300027,100300121,100200053,100300004,100400142,100200054,100400037,100200192,100300065,100300209,100300122,100300127,100300169,100300074], if (attribute(catid) in [0,100300014,100300077,100300166,100200186,100400141,100200052,100200172,100300032,100300121,100200053,100400037,100200192,100300065,100300169], if (attribute(catid) in [100300077,100200186,100200052,100300032,100400037,100200192], 0.0188605145, if (attribute(catid) in [0,100300014,100200053], 0.0230703185, 0.0248762385)), 0.0312398602), if (attribute(catid) in [100300116,100400038,100300212,100300066,100200232], 0.0379114379, 0.0540119608))) + +if (attribute(catid) in [100300011,100300014,100300058,100200186,100400141,100300102,100300005,100200068,100300032,100300121,100300073,100200192,100300209,100400079,100300169,100400080,100200087,100200176,100200028,100200067,100200055,100200232], if (attribute(catid) in [100300011,100300014,100300058,100300005,100200068,100300209,100400080,100200087,100200028,100200067,100200055,100200232], if (attribute(catid) in [100300209,100200087,100200028,100200067,100200055,100200232], -0.0225817796, -8.082327E-4), if (attribute(catid) in [100300102,100300121,100300073,100200192,100400079], 0.0088591799, 0.0138162711)), if (attribute(catid) in [0,100200171,100300013,100200034,100300165,100200172,100300027,100300116,100200234,100300004,100400142,100200054,100300065,100300122,100300127,100200170,100300006], if (attribute(catid) in [0,100200034,100200172,100300027,100300116,100200054,100300065,100300127], if (attribute(catid) in [100200034,100200172,100300027,100300116,100200054,100300065], 0.0185182017, 0.0221653757), 0.0258671547), if (attribute(catid) in [100300166,100200052,100300093,100200053,100400037,100300076], 0.0319314298, if (attribute(catid) in [100200130,100300143,100300008,100400038,100300074,100300066], 0.0399544136, 0.0491124971)))) + +if (attribute(catid) in [0,100300011,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300073,100400038,100300065,100300209,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300005,100300008,100300032,100400080,100200087,100300076,100300214], if (attribute(catid) in [100300008,100300032,100200087,100300076,100300214], -0.0316835796, -0.0075185917), if (attribute(catid) in [100300077,100200034,100400141,100300165,100200068,100300027,100300121,100300209,100200176,100300200,100200067,100300006], 0.0093589722, if (attribute(catid) in [0,100200052,100300073,100400079,100200232], 0.0185920468, 0.0231228547))), if (attribute(catid) in [100200171,100200130,100300014,100300058,100300166,100300093,100300102,100200172,100200234,100200193,100200192,100300122,100300127,100300074,100300066], if (attribute(catid) in [100200171,100200130,100300014,100300058,100300102,100200172,100200234,100200192,100300127], if (attribute(catid) in [100200130,100200234,100200192], 0.0282114001, 0.0319414987), 0.0377741997), if (attribute(catid) in [100200053,100300004,100400142,100200054,100400037,100200185], 0.0450431326, 0.0654935018))) + +if (attribute(catid) in [100300011,100300013,100200034,100200186,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300019,100300126,100300073,100400037,100200192,100300065,100300209,100400079,100200170,100400080,100200087,100300074,100300200,100200067,100300006], if (attribute(catid) in [100300005,100300008,100300032,100300019,100300209,100200067], -0.0352996105, if (attribute(catid) in [100300011,100200034,100200186,100200068,100200192,100200170,100200087,100300074], 0.0029355359, 0.0110257031)), if (attribute(catid) in [0,100200171,100300014,100300165,100200052,100200172,100300121,100200234,100200053,100400142,100200054,100300122,100300127,100300169,100300066], if (attribute(catid) in [100300165,100300121,100200053,100400142,100300127,100300169], 0.0200666023, if (attribute(catid) in [0,100200171,100200052,100200172,100200054], 0.0258497457, 0.0331869782)), if (attribute(catid) in [100200130,100300077,100300166,100400141,100300093,100300004,100300007,100200028], 0.0426763778, if (attribute(catid) in [100300143,100400038,100200176], 0.0551482574, 0.0805987774)))) + +if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100300166,100200034,100200186,100200052,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100200053,100300073,100400037,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200067,100300146], if (attribute(catid) in [100300014,100200034,100300005,100200068,100300032,100200192,100300212,100300209,100200176,100300200,100200028,100200067,100300146], if (attribute(catid) in [100200068,100300032,100200176,100200028,100200067,100300146], -0.0188052149, -0.0014384095), if (attribute(catid) in [100300058,100300102,100200053,100300073,100300065,100400079,100300169,100400080,100200087,100300076], 0.0138476724, if (attribute(catid) in [0,100200171,100200186,100200052,100400038,100200170], 0.0196068633, 0.0248333768))), if (attribute(catid) in [100300011,100200130,100300143,100400141,100300165,100300093,100300027,100200234,100300019,100300004,100400142,100200193,100300074,100300066,100200232], if (attribute(catid) in [100200130,100400141,100300165,100300027,100300019,100300004,100300074,100300066], 0.0337546327, 0.0412000578), 0.0666143289)) + +if (attribute(catid) in [100200171,100300011,100300077,100200034,100200186,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100300019,100300073,100400038,100300209,100400079,100400080,100300074,100300200,100200067,100200055,100300006], if (attribute(catid) in [100200034,100200186,100300005,100300008,100200068,100300019,100300200,100200067,100200055], if (attribute(catid) in [100300008,100200068,100200067,100200055], -0.0193944486, -0.0039850146), if (attribute(catid) in [100300011,100300102,100300027,100300116,100300121,100400038,100300209,100400080,100300074], 0.0038699264, 0.0111071757)), if (attribute(catid) in [0,100200130,100300014,100300058,100300013,100300166,100300143,100200052,100200172,100200054,100200192,100300065,100300127,100200170,100300169,100200087,100200176,100200028,100300076,100300146], if (attribute(catid) in [0,100200130,100300058,100300143,100200172,100200054,100200192,100200170,100300169,100200087,100200176,100200028,100300076], if (attribute(catid) in [100200130,100300143,100200192,100300169,100200176,100200028], 0.0184644801, 0.021360636), 0.0266245188), if (attribute(catid) in [100400141,100300165,100200053,100300004,100300126,100300212,100300122,100300066], if (attribute(catid) in [100200053,100300004,100300126,100300122], 0.0334635662, 0.0386077462), 0.0470519595))) + +if (attribute(catid) in [0,100300011,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100300093,100300102,100300005,100200172,100300008,100200068,100300027,100300121,100200053,100400142,100300073,100200192,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100200028,100200055,100200232,100300146], if (attribute(catid) in [100300011,100300014,100200034,100300008,100200068,100300121,100200192,100200170,100300074,100300200,100200055,100200232,100300146], if (attribute(catid) in [100200034,100300008,100300121,100300200,100200055,100200232,100300146], -0.0069202095, 0.0039000323), if (attribute(catid) in [100300058,100400141,100300093,100300102,100300005,100200172,100400142,100300073,100400080,100200087], 0.0156946965, if (attribute(catid) in [0], 0.0175514273, 0.0195153127))), if (attribute(catid) in [100200171,100200130,100300166,100300165,100200052,100300032,100300116,100200234,100300004,100300126,100400038,100300065,100300209,100300066,100300007,100200176,100300076], if (attribute(catid) in [100300166,100300165,100300004,100300126,100300065,100300209], 0.0260422255, if (attribute(catid) in [100200171,100200130,100300032,100400038,100300066,100300076], 0.0288416138, 0.0331073272)), if (attribute(catid) in [100200054,100200193,100300122,100300127,100300045,100200067], 0.0443969439, 0.0673805882))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100400142,100300073,100200192,100300209,100400079,100200170,100300169,100400080,100200087,100300200,100200028,100300076,100200055,100200232], if (attribute(catid) in [100200034,100200068,100300209,100200170,100300200,100200028,100200055,100200232], if (attribute(catid) in [100200034,100300209,100300200,100200028,100200055,100200232], -0.0248522225, -0.0018897827), if (attribute(catid) in [100200171,100300013,100300077,100200186,100400141,100300093,100300027,100300116,100400142,100400079,100400080], if (attribute(catid) in [100200171,100300077,100200186,100300116,100400079], 0.009897739, 0.0135323202), if (attribute(catid) in [100300058,100300165,100200052,100200172,100300169,100200087,100300076], 0.0178483129, 0.0206390742))), if (attribute(catid) in [100200130,100300166,100300008,100200234,100300004,100300126,100400037,100400038,100300065,100300122,100300074,100300066,100300006,100300146], if (attribute(catid) in [100200130,100300166,100200234,100300065,100300146], 0.0310277032, 0.0370699377), if (attribute(catid) in [100300121,100200053,100300212,100300127,100200176,100200185], 0.0485097295, 0.0645157682))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300143,100200034,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100300004,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300200,100200055,100300006,100200232,100300214], if (attribute(catid) in [100200171,100300011,100300077,100200034,100200186,100300005,100200068,100200053,100300019,100300004,100300073,100400038,100200192,100300209,100400079,100200087,100300074,100200055,100300006,100300214], if (attribute(catid) in [100300005,100200068,100300019,100300209,100200087,100200055,100300006,100300214], -0.0244019521, if (attribute(catid) in [100200186,100400038,100400079,100300074], 0.003439916, 0.0084132649)), if (attribute(catid) in [100200130,100400141,100200052,100200172,100300008,100300027,100300116,100300065,100300169,100400080,100300200], if (attribute(catid) in [100400141,100200052,100200172,100300116,100300065,100400080,100300200], 0.0146253305, 0.0185737842), 0.0224432378)), if (attribute(catid) in [100300166,100300165,100300032,100400142,100300122,100200170,100300007,100200028], if (attribute(catid) in [100300166,100300032,100400142,100200170], 0.0312540362, 0.0367389808), if (attribute(catid) in [100300014,100200234,100400037,100200193,100200176,100200067,100200185], 0.0515240946, 0.0623565161))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200234,100300019,100300004,100200054,100300073,100400037,100400038,100300212,100300209,100400079,100300169,100400080,100200087,100300066,100300200,100200028,100300076,100200067,100300006,100300214], if (attribute(catid) in [100300013,100300093,100300008,100200068,100300019,100300073,100300212,100300209,100400080,100200087,100300200,100200028,100300076,100200067,100300214], if (attribute(catid) in [100300008,100200068,100300019,100300212,100400080,100200067,100300214], -0.0143906523, 0.0034452824), if (attribute(catid) in [100300058,100200186,100400141,100300165,100200052,100300005,100300032,100300027,100200234,100200054,100400038,100400079,100300169,100300006], if (attribute(catid) in [100300058,100300005,100300027,100200234,100200054,100300169,100300006], 0.0099743393, 0.0144610757), if (attribute(catid) in [0,100300011,100300014,100300102,100200172,100300004,100400037], 0.019416211, 0.0220846421))), if (attribute(catid) in [100200130,100300166,100200034,100200053,100200192,100300065,100200170,100200176], 0.0303840891, if (attribute(catid) in [100300116,100400142,100300122,100300127,100300074,100300045], 0.0417668157, 0.0552431545))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100200186,100400141,100300165,100300102,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300126,100200054,100300073,100400038,100200192,100300209,100400079,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300058,100200186,100300165,100300005,100200068,100300032,100300116,100300121,100200234,100300126,100200054,100400038,100200192,100300209,100400079,100200176,100200028,100200055,100200232], if (attribute(catid) in [100300058,100300005,100200068,100300032,100300209,100200028,100200055,100200232], if (attribute(catid) in [100300058,100300005,100300032,100300209,100200055], -0.0199572721, -1.34782E-5), 0.00950394), if (attribute(catid) in [100200171,100300077,100400141,100300073,100400080,100200087,100300200], 0.0170204672, if (attribute(catid) in [0,100200130,100200053,100300006], 0.0199906818, 0.0230038494))), if (attribute(catid) in [100300014,100300013,100300166,100200034,100200052,100300093,100300008,100400142,100200193,100300065,100300122,100300127,100200170,100300074,100300066,100300045], if (attribute(catid) in [100300013,100300166,100200034,100300093,100300008,100400142,100300127], 0.0272410205, 0.035385042), 0.0546059415)) + +if (attribute(catid) in [100300014,100300013,100200186,100300165,100300093,100300102,100300005,100300008,100200068,100300027,100300116,100200234,100300073,100400038,100200192,100300212,100400079,100300169,100400080,100300074,100300200,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300008,100200068,100300027,100300212,100400080,100300074,100300200,100200067,100200232,100300214], if (attribute(catid) in [100300008,100200068,100300212,100300200,100200067,100200232,100300214], -0.0257347618, -0.0087401374), if (attribute(catid) in [100300014,100300165,100300102,100300073,100200192,100400079,100300076], 0.0079479453, 0.0122270306)), if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100200052,100300121,100300004,100300126,100400142,100200054,100400037,100300065,100300122,100300127,100200170,100200176], if (attribute(catid) in [0,100200130,100200052,100300121,100300004,100300126,100200054,100300065,100300122,100200176], if (attribute(catid) in [100200130,100200052,100300121,100300004,100300065,100300122], 0.019048709, 0.0215394009), 0.0276338957), if (attribute(catid) in [100200034,100400141,100200172,100300032,100200053,100300209,100300066,100200185], 0.0397536732, 0.0689753704))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100200068,100300027,100300121,100200234,100300019,100300073,100200193,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100200067,100200055,100300006,100300214], if (attribute(catid) in [100200171,100300011,100300077,100200186,100200052,100300093,100300005,100200068,100300027,100300121,100200234,100200192,100300065,100300212,100300127,100400079,100200170,100400080,100200087,100300200,100200067,100200055,100300214], if (attribute(catid) in [100200186,100300005,100200068,100300212,100200170,100200087,100300200,100200055,100300214], -0.0052715451, if (attribute(catid) in [100200171,100300011,100300093,100300027,100200234,100300127,100400079,100200067], 0.0075908988, 0.0114788963)), if (attribute(catid) in [100300019,100300073,100200193,100300169,100300074,100300066,100300006], 0.0146049077, 0.0198627318)), if (attribute(catid) in [100300165,100300008,100300032,100300116,100200053,100400142,100400037,100300122], 0.0286281196, if (attribute(catid) in [100200130,100300013,100300166,100300004,100300126,100200054,100300007,100200028], 0.0377626212, 0.0646214069))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300121,100200234,100300019,100400142,100300073,100200193,100200192,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100200028,100300006,100200232], if (attribute(catid) in [100300014,100300058,100200186,100400141,100300093,100300005,100300032,100200234,100300019,100300073,100200192,100300169,100200087,100300074,100300200,100200232], if (attribute(catid) in [100300014,100300058,100200186,100300032,100300019,100300074,100300200,100200232], -0.007621457, 0.0077673481), if (attribute(catid) in [0,100200171,100300011,100300077,100300165,100200068,100300121,100200193,100300209,100300122,100400079,100400080,100300066,100200028,100300006], if (attribute(catid) in [100200171,100300011,100300165,100400079,100300006], 0.0143096613, 0.0162758268), 0.02375285)), if (attribute(catid) in [100200130,100300166,100200034,100300116,100200053,100300004,100200054,100400038,100300065,100300212,100300007,100200176,100300045,100300076], if (attribute(catid) in [100200130,100300166,100200034,100300116,100200053], 0.0297411208, 0.0387614885), if (attribute(catid) in [100300143,100300126,100400037,100200185], 0.0600165302, 0.096147213))) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200034,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100300004,100300073,100400038,100200192,100300212,100300209,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100300200,100200028,100200067,100200055,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300005,100300008,100200068,100300116,100300212,100300209,100400079,100400080,100300200,100200067,100200055,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300008,100300212,100300209,100200055,100200232,100300214,100300146], -0.0143064261, 0.0061747257), if (attribute(catid) in [100200171,100300058,100200186,100400141,100200172,100300032,100300027,100300121,100300004,100400038,100200170,100200087,100300074,100200028], 0.0140796593, 0.0185037483)), if (attribute(catid) in [100300011,100300014,100300013,100300166,100300143,100300165,100300093,100300102,100400142,100400037,100300065,100300122,100300127,100300169,100300006], if (attribute(catid) in [100300011,100300014,100300013,100300166,100300165,100300102,100400142,100400037,100300169], 0.0279089674, 0.0344726516), 0.0515666225)) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300008,100200068,100300032,100300027,100300121,100200053,100400142,100200054,100300073,100400037,100200192,100300209,100300122,100400079,100200170,100300169,100400080,100200087,100300066,100200176,100300200,100200028,100300076,100200185], if (attribute(catid) in [100300011,100300014,100200068,100300032,100200053,100200192,100300209,100300122,100200170,100400080,100200176,100200028,100300076,100200185], if (attribute(catid) in [100300014,100200068,100300032,100300209,100400080,100200176,100200028,100300076,100200185], -0.0100026799, 0.0069768979), if (attribute(catid) in [100300077,100300143,100400141,100200052,100300005,100300008,100300121,100400079,100300169,100200087], 0.0149447853, if (attribute(catid) in [0,100300166,100200034,100200186,100200172,100400037,100300066,100300200], 0.0207339117, 0.0250275322))), if (attribute(catid) in [100200130,100300058,100300165,100300102,100300116,100200234,100300004,100300126,100200193,100400038,100300065,100300127,100300074,100300045,100300006], if (attribute(catid) in [100300058,100300102,100300116,100200234,100300126,100200193,100400038,100300127,100300006], 0.0305216411, 0.0392374586), 0.07351205)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300166,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300121,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100400038,100200192,100300065,100300209,100300122,100400079,100200170,100300169,100400080,100300066,100200176,100300076,100200067,100200055,100300006,100300214,100300146], if (attribute(catid) in [100200171,100300011,100300014,100400141,100300102,100300005,100300008,100300121,100300019,100200193,100200192,100300209,100200170,100400080,100200067,100200055,100300214], if (attribute(catid) in [100300102,100300008,100300019,100200193,100300209,100200067,100200055,100300214], -0.0452597976, if (attribute(catid) in [100400141,100300005,100200192,100400080], 0.002579873, 0.0085058714)), if (attribute(catid) in [100200130,100200186,100300165,100200172,100200068,100300027,100200234,100400142,100300066,100200176,100300076,100300146], if (attribute(catid) in [100200130,100200186,100200068,100300066,100200176,100300076,100300146], 0.0134972332, 0.0161598104), if (attribute(catid) in [0,100200053,100300073], 0.0201733337, 0.0242718101))), if (attribute(catid) in [100300013,100300077,100300143,100200034,100300093,100300116,100300127,100200087,100300074,100300007,100300200,100300045], 0.0376364024, 0.0722294524)) + +if (attribute(catid) in [100300014,100400141,100300102,100300008,100200068,100300032,100200234,100300019,100400038,100300212,100400080,100200176,100300200,100200028,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300014,100300102,100300008,100200068,100300032,100300019,100300212,100300200,100200055,100300214], if (attribute(catid) in [100300102,100300008,100300032,100300019,100300212,100300200,100200055,100300214], -0.0330162432, -0.0099054066), 0.0037184723), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100300143,100200034,100200186,100300165,100200052,100300093,100300005,100200172,100300027,100200053,100200192,100300065,100300127,100400079,100200170,100300169,100300074,100300076], if (attribute(catid) in [100200171,100300166,100200034,100200186,100300165,100300093,100300005,100300027,100200053,100400079,100300169,100300074], if (attribute(catid) in [100200171,100300166,100200186,100300005,100300027,100200053,100300169], 0.0128773968, 0.0170605503), if (attribute(catid) in [0,100300143,100200192,100300065,100200170,100300076], 0.0199592353, 0.0237865531)), if (attribute(catid) in [100300077,100300121,100400142,100300073,100200193,100200087,100300066,100300007], 0.0288156047, 0.0451598089))) + +if (attribute(catid) in [100200171,100300011,100300013,100200034,100200186,100200052,100300102,100300008,100200068,100300027,100300121,100200053,100300019,100300004,100300073,100200193,100400038,100200192,100300212,100300127,100400079,100200170,100400080,100200087,100300074,100300007,100300214,100300146], if (attribute(catid) in [100300013,100200034,100200068,100300121,100300019,100200170,100200087,100300214,100300146], if (attribute(catid) in [100300013,100200034,100300121,100300019,100300214,100300146], -0.0200374966, -0.0056497245), if (attribute(catid) in [100300011,100200186,100300102,100300008,100300004,100200192,100300212,100400079,100400080,100300074], 0.0036157343, 0.0117177746)), if (attribute(catid) in [0,100200130,100300014,100300058,100300166,100300143,100400141,100300165,100300093,100300005,100200172,100300032,100300116,100300126,100400142,100300065,100300122,100300169,100300066,100300200,100200028,100300006], if (attribute(catid) in [0,100200130,100300166,100400141,100300165,100200172,100300032,100300066,100300006], if (attribute(catid) in [100200130,100400141,100300165,100200172,100300032,100300006], 0.0173296173, 0.0209361475), 0.0268947656), if (attribute(catid) in [100300077,100400037,100300209,100200176,100300045,100300076,100200185], 0.0431779718, 0.0596202146))) + +if (attribute(catid) in [100300011,100300014,100200034,100300102,100300005,100200068,100300032,100300027,100300116,100200234,100300073,100400038,100200192,100400079,100200170,100400080,100300200,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300102,100200068,100300032,100300027,100400038,100300200,100200067,100200055], -0.0105115826, if (attribute(catid) in [100300014,100200234,100200028,100300006,100200232], 7.402621E-4, 0.0078629039)), if (attribute(catid) in [0,100200171,100200130,100300077,100300143,100200186,100400141,100300165,100200052,100200172,100300121,100200053,100300019,100400142,100300122,100300127,100300169,100200087,100200176], if (attribute(catid) in [100200171,100200130,100200186,100200052,100200172,100200053,100300122,100200176], 0.0148540934, if (attribute(catid) in [0,100300077,100300143,100400141,100300019,100400142,100300127], 0.0193734454, 0.021732037)), if (attribute(catid) in [100300166,100300126,100200054,100200193,100300065,100300212,100300066], 0.0305394508, 0.0503395698))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100200034,100200186,100400141,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100300126,100200054,100300073,100400038,100200192,100300065,100300212,100300209,100300122,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300014,100300013,100300077,100400141,100300165,100300005,100300008,100300116,100200053,100300126,100400038,100300212,100300209,100400079,100200170,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232], if (attribute(catid) in [100300005,100300126,100300209,100200087,100300200,100200055], -0.0155099848, if (attribute(catid) in [100300014,100300013,100400141,100400038,100300212,100400079,100200170,100200028,100300076,100200232], 0.0052367005, 0.0111703118)), if (attribute(catid) in [0,100200034,100300102,100200068,100300027,100300121,100300073,100200192,100300169,100400080,100300074], if (attribute(catid) in [100200034,100200068,100300027,100300121,100300073,100200192,100300169,100400080,100300074], 0.0150887568, 0.0184571681), 0.0221098064)), if (attribute(catid) in [100300011,100300166,100300093,100300004,100400142,100300127,100300066,100300045], 0.0294845237, if (attribute(catid) in [100300058,100300143,100200193,100300007], 0.0412156817, 0.0581097149))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300126,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100300066,100300007,100200176,100300200,100300076,100200067,100200055,100200185,100300214], if (attribute(catid) in [100300011,100300014,100300008,100200068,100300032,100300027,100300116,100200234,100300019,100400037,100300209,100200176,100200055,100200185,100300214], if (attribute(catid) in [100300008,100200068,100300032,100200055,100200185,100300214], -0.0168814696, -7.128433E-4), if (attribute(catid) in [100200171,100300058,100300077,100200034,100400141,100300102,100300005,100300126,100200054,100200193,100400038,100200192,100300212,100200170,100300169,100400080,100200087,100300076], if (attribute(catid) in [100300058,100200034,100200193,100400038,100200170,100300169,100400080,100300076], 0.0104471779, 0.0151303026), if (attribute(catid) in [0,100200186,100300093,100300121,100200053,100300073,100300200], 0.0192366025, 0.0235706074))), if (attribute(catid) in [100300143,100300165,100200172,100300004,100400142,100300122,100300074,100200232], if (attribute(catid) in [100200172,100400142,100300122,100200232], 0.0313391652, 0.0410697301), 0.0691824633)) + +if (attribute(catid) in [100300013,100200186,100400141,100300005,100200068,100300032,100300116,100200234,100300004,100200192,100300212,100300209,100200170,100400080,100300074,100300200,100200028,100200055,100300146], if (attribute(catid) in [100300032,100300212,100300209,100200028,100200055,100300146], -0.0313637219, if (attribute(catid) in [100300013,100200186,100400141,100200068,100200234,100300004,100400080,100300200], 0.0050115322, 0.0108163539)), if (attribute(catid) in [0,100200171,100200130,100300077,100200034,100300165,100300102,100200172,100300008,100300027,100300121,100200053,100300126,100400142,100300073,100400038,100300065,100300127,100400079,100300169,100200087,100300066,100300007,100300076,100300006], if (attribute(catid) in [0,100300077,100200034,100300165,100200172,100300008,100300121,100200053,100300126,100400142,100300073,100400038,100300065,100400079,100200087], if (attribute(catid) in [100300077,100300165,100200172,100300008,100300121,100300073,100400038,100200087], 0.0169532751, 0.019777196), 0.0229019262), if (attribute(catid) in [100300014,100300058,100300166,100300143,100200052,100300093,100200054,100300122], 0.0275740075, 0.0402576409))) + +if (attribute(catid) in [0,100200171,100300011,100300058,100300013,100300077,100200186,100300165,100200052,100300102,100300005,100200068,100300032,100300027,100300116,100200234,100300019,100300126,100400142,100300073,100200192,100400079,100300169,100400080,100300074,100300200,100300045,100200028,100300076,100200185,100200232,100300214], if (attribute(catid) in [100300011,100300013,100200052,100300102,100200068,100300032,100300116,100300019,100300126,100300076,100200185,100200232,100300214], if (attribute(catid) in [100300013,100200068,100300032,100300019,100300076,100200185,100200232,100300214], -0.0132508399, 2.145632E-4), if (attribute(catid) in [0,100300058,100300077,100200186,100300165,100300005,100300027,100300073,100200192,100300169,100400080], 0.0144506818, 0.0181232118)), if (attribute(catid) in [100200130,100300014,100300166,100300143,100200034,100400141,100300093,100200172,100300008,100300121,100200053,100300004,100300065,100300212,100300209,100300122,100300127,100200170,100200087,100300006], if (attribute(catid) in [100200130,100200034,100400141,100300093,100300121,100300004,100300065,100300212,100300127,100200170,100200087,100300006], 0.023751453, 0.0281193568), 0.0394520537)) + +if (attribute(catid) in [100300011,100300013,100300077,100200034,100200186,100300102,100300005,100200068,100300116,100300121,100400079,100200170,100400080,100300074,100300200,100300076,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300013,100200034,100200186,100300005,100200170,100300076,100200055,100300214], -0.011327312, 0.0081802635), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100400141,100300165,100200172,100300008,100300027,100200234,100200053,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300065,100300122,100300169,100300066,100300045,100200028,100200067,100300006], if (attribute(catid) in [100200130,100300058,100300165,100200172,100300027,100200234,100300073], 0.0154377299, if (attribute(catid) in [0,100400141,100300008,100400037,100400038,100200192,100300169,100300045,100200067], 0.0188466465, 0.0219373268)), if (attribute(catid) in [100300014,100200052,100300093,100300032,100300004,100300127,100200087,100200176,100200185], 0.0332492867, 0.0538118306))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300166,100300143,100200186,100300165,100200052,100300093,100300008,100200068,100300027,100300116,100300121,100200053,100300019,100300004,100300126,100400142,100200054,100300073,100400038,100200192,100300212,100300122,100200170,100300169,100400080,100200087,100300074,100300007,100300200,100200067,100200055,100300214,100300146], if (attribute(catid) in [100200171,100300014,100300013,100300008,100200068,100300116,100300121,100200054,100300073,100200192,100300212,100400080,100200087,100300200,100200055,100300214,100300146], if (attribute(catid) in [100300014,100300008,100200068,100200054,100300212,100300200,100200055,100300214], -0.0068335973, 0.0078647534), if (attribute(catid) in [100300165,100200052,100200053,100300019,100300004,100300126,100300122,100200170,100300169,100300074], 0.0135025323, if (attribute(catid) in [0,100300143,100300027,100400142,100400038], 0.0176344289, 0.019623595))), if (attribute(catid) in [100300058,100200034,100400141,100200193,100300065,100300209,100300127,100400079,100300066,100300045,100200028,100300076,100300006], 0.0257873841, if (attribute(catid) in [100200172,100300032,100200176], 0.03381447, 0.0425972117))) + +if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300102,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300126,100200054,100300073,100400037,100200192,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100200028,100300076], if (attribute(catid) in [100300011,100300014,100200034,100200186,100300102,100300008,100200068,100300027,100300116,100300073,100400037,100200192,100300209,100400079,100200087,100300074,100200176,100200028,100300076], if (attribute(catid) in [100300102,100300008,100400037,100200087,100200176,100300076], -0.0165719048, 0.0081977488), if (attribute(catid) in [100300013,100300165,100200052,100200172,100200053,100200054,100300169], 0.0136130091, 0.0180110506)), if (attribute(catid) in [100200171,100200130,100300058,100300093,100300032,100300121,100400142,100200170,100300200,100300006,100200232], 0.0236852926, if (attribute(catid) in [100300143,100300005,100200193,100300065,100300122,100200067], 0.0327973275, 0.0538361793))) + +if (attribute(catid) in [100200171,100300011,100300014,100300013,100300143,100200034,100200186,100300005,100300008,100200068,100300116,100200053,100300019,100400037,100200192,100200170,100300074,100300007,100300200,100200028,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300143,100200186,100300005,100300008,100200068,100300019,100200055,100300214,100300146], if (attribute(catid) in [100300011,100300143,100300005,100300019,100200055,100300214,100300146], -0.0264171514, -0.0055670691), if (attribute(catid) in [100300014,100300013,100300116,100400037,100200192,100200170,100300007], 0.0026101595, 0.0084034666)), if (attribute(catid) in [0,100200130,100300058,100300077,100300166,100400141,100200052,100300093,100300027,100300121,100200234,100300004,100300126,100200054,100300073,100300122,100300127,100400079,100300169,100400080,100200087,100200176,100300045,100300076,100200067,100300006,100200232], if (attribute(catid) in [0,100300058,100300077,100200052,100300093,100300027,100200234,100200054,100300073,100300169,100300045,100200067,100200232], if (attribute(catid) in [100300077,100300027,100200234,100200054,100300073,100300169], 0.013631975, 0.0160752676), if (attribute(catid) in [100200130,100400141,100300121,100300127,100400079,100200176,100300076], 0.0182663338, 0.0220047542)), if (attribute(catid) in [100300102,100200172,100400142,100300065,100300066,100200185], 0.026533065, if (attribute(catid) in [100300165,100400038], 0.0364634573, 0.0690252268)))) + +if (attribute(catid) in [0,100200171,100300077,100300166,100200034,100400141,100300165,100200052,100300093,100200172,100200068,100300032,100300027,100200234,100200053,100300004,100300126,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300076,100200067,100200055,100300006,100300214], if (attribute(catid) in [100400141,100300165,100300032,100300004,100300126,100200193,100300007,100200176,100200067,100200055,100300006,100300214], if (attribute(catid) in [100300032,100300126,100300007,100200176,100200067,100200055,100300006,100300214], -0.0053479534, 0.0054148332), if (attribute(catid) in [0,100300077,100200068,100300027,100200234,100200053,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300076], if (attribute(catid) in [100200068,100200053,100300065,100300209,100200170,100300169,100300076], 0.0124266534, 0.0155941575), if (attribute(catid) in [100300166,100200034,100300093,100200172,100300073], 0.0190612693, 0.0217608552))), if (attribute(catid) in [100200130,100300014,100300058,100300013,100200186,100300005,100300116,100300121,100300019,100400142,100300045,100200028,100200185], if (attribute(catid) in [100300058,100300013,100200186,100300005,100300116,100300121,100300019,100300045], 0.0276398014, 0.0339388499), 0.0448102783)) + +if (attribute(catid) in [100300011,100300014,100300058,100300143,100200052,100300005,100300008,100200068,100300032,100200234,100200054,100400037,100300209,100300127,100300169,100300074,100300007,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300014,100300058,100300005,100300008,100200068,100300032,100200234,100300209,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300005,100300032,100300209,100200067,100200232,100300214], -0.0271870843, -0.0066979774), if (attribute(catid) in [100300011,100300143,100400037,100300127,100300007,100300076,100300006], 3.261718E-4, 0.0072958932)), if (attribute(catid) in [0,100200171,100200130,100300077,100300166,100200186,100200172,100300121,100200053,100400142,100300073,100200192,100400079,100400080,100300066], if (attribute(catid) in [0,100200130,100300077,100200186,100200172,100300121,100300073,100400079,100400080], if (attribute(catid) in [100200130,100200172,100300121,100300073,100400079,100400080], 0.0130456694, 0.0171286061), 0.0214322678), if (attribute(catid) in [100400141,100300165,100300093,100300102,100300027,100300116,100300126,100400038,100300065,100300122,100200087,100300045,100200028], if (attribute(catid) in [100400141,100300165,100300102,100300027,100200087,100300045], 0.0264983702, 0.0310587203), 0.0435590971))) + +if (attribute(catid) in [100300013,100200034,100300093,100300102,100300008,100200068,100300032,100200234,100300019,100300004,100300212,100300209,100400079,100200170,100300169,100300200,100200028,100300076,100200067,100200055,100200185,100300006], if (attribute(catid) in [100300013,100300102,100300032,100300019,100300212,100300209,100300200,100200067,100200055], -0.0292043593, if (attribute(catid) in [100300093,100300008,100200234,100300004,100300076,100200185,100300006], -9.351701E-4, 0.0074356232)), if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300143,100200186,100300027,100300116,100200053,100400142,100300073,100200193,100400038,100200192,100300122,100400080,100200087,100300074,100300066,100200176,100300045], if (attribute(catid) in [0,100300058,100200186,100300027,100300116,100200087,100200176], 0.0139006166, 0.0181039982), if (attribute(catid) in [100300014,100300077,100400141,100300165,100200052,100300005,100200172,100300121,100300126,100300065,100300007], 0.0233872084, 0.0318278949))) + +if (attribute(catid) in [100300013,100300008,100200068,100300121,100300019,100300004,100200054,100400038,100300212,100200028,100200185,100200232], if (attribute(catid) in [100300013,100300008,100300019,100300212,100200185], -0.0407457887, -0.0116755527), if (attribute(catid) in [0,100200171,100300058,100300166,100200034,100200186,100200052,100300005,100300027,100300116,100200053,100300126,100400142,100300073,100400037,100200192,100300065,100300209,100400079,100300169,100400080,100200087,100300074,100300066,100200176,100300006], if (attribute(catid) in [100200171,100200034,100200052,100300005,100200053,100300073,100400037,100300209,100400079,100400080,100300074], if (attribute(catid) in [100200171,100200034,100300005,100200053,100300209,100400079], 0.0049667166, 0.0103313635), if (attribute(catid) in [100200186,100400142,100300065,100300169,100200087,100300066,100200176], 0.0145292773, 0.0169648891)), if (attribute(catid) in [100200130,100300014,100300077,100300143,100400141,100300165,100200172,100200193,100300122,100300127,100300200], if (attribute(catid) in [100200130,100300143,100400141,100300122,100300127], 0.0211036464, 0.0257964434), 0.0412799006))) + +if (attribute(catid) in [0,100200171,100200130,100300166,100300143,100200034,100200186,100400141,100300093,100300005,100200172,100300008,100300027,100300121,100300019,100300004,100300126,100200054,100300073,100400037,100200192,100300065,100300212,100400079,100200170,100300169,100400080,100200087,100300074,100300007,100300200,100200028,100200185,100300006], if (attribute(catid) in [100300143,100300093,100300005,100300008,100300019,100300212,100200028,100200185,100300006], -0.0120071553, if (attribute(catid) in [100200171,100300166,100200186,100400141,100200172,100300027,100300121,100300004,100200054,100300073,100400079,100200170,100200087,100300074], if (attribute(catid) in [100200186,100300004,100200054,100300073,100200087], 0.0078585148, 0.0109817855), if (attribute(catid) in [0,100200130,100300065,100400080], 0.0142642384, 0.0175222293))), if (attribute(catid) in [100300011,100300014,100300013,100200052,100200068,100200234,100200053,100400038,100300122,100300127,100300066,100200176,100300045,100300076], if (attribute(catid) in [100300014,100300013,100200068,100200234,100200053,100300127,100200176,100300076], 0.0221804998, 0.026563767), if (attribute(catid) in [100300077,100300165,100400142,100200232], 0.0309690505, 0.0459150714))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300166,100300143,100200186,100300165,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100400037,100200193,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300200,100300076,100200232,100300146], if (attribute(catid) in [100300014,100300102,100200068,100300032,100300121,100300019,100300004,100400142,100200193,100400038,100300212,100300127,100400079,100200170,100300074,100200176,100300200,100300076,100200232,100300146], if (attribute(catid) in [100300102,100200068,100300032,100300019,100300212,100200176,100200232], -0.0148052713, 0.0048766529), if (attribute(catid) in [100200171,100300165,100200052,100300005,100200172,100300116,100200234,100200192,100300169], if (attribute(catid) in [100200052,100300116,100200234,100200192,100300169], 0.0099622919, 0.0138380378), if (attribute(catid) in [0,100300027,100300126,100300066], 0.0153805374, 0.0183919749))), if (attribute(catid) in [100300011,100300077,100400141,100300093,100300073,100200087,100200028,100200185], 0.0247957566, 0.0395124104)) + +if (attribute(catid) in [100300011,100200130,100300014,100300058,100200034,100200186,100300008,100200068,100300073,100400038,100300065,100300127,100400079,100400080,100200087,100200176,100200028,100200067,100200055], if (attribute(catid) in [100300014,100300058,100200034,100300008,100200068,100200087,100200028,100200067,100200055], -0.0102104476, if (attribute(catid) in [100300011,100200186,100400038,100300127], 6.233907E-4, 0.0069244113)), if (attribute(catid) in [0,100200171,100300077,100300166,100300143,100400141,100300165,100300093,100300005,100200172,100300032,100300027,100300116,100300121,100200053,100400142,100200193,100200192,100200170,100300169,100300074,100300066,100300007,100300200,100300045,100300076], if (attribute(catid) in [0,100300077,100300166,100300143,100300165,100300005,100200172,100300032,100300027,100200053,100200192,100300045,100300076], if (attribute(catid) in [100300077,100300143,100300165,100200053,100200192,100300045], 0.0146529601, 0.0167435205), if (attribute(catid) in [100200171,100400141,100300116,100200170,100300074,100300007,100300200], 0.0216207477, 0.025299515)), if (attribute(catid) in [100200052,100300102,100200234,100300004,100300126,100300122,100200185,100300006,100200232], 0.0342788593, 0.0559275992))) + +if (attribute(catid) in [100200171,100300011,100200130,100300013,100400141,100200052,100300102,100300008,100200068,100300027,100300116,100200234,100200054,100300073,100400037,100300065,100300209,100300122,100300127,100400079,100200170,100300169,100300074,100200176,100200185,100300214], if (attribute(catid) in [100300008,100300027,100400037,100300074,100200176,100200185,100300214], -0.0102961911, if (attribute(catid) in [100200171,100300011,100200052,100300102,100200068,100300116,100200234,100300065,100300209,100400079,100300169], 0.0058614005, 0.0117994941)), if (attribute(catid) in [0,100300014,100300166,100200186,100300165,100300005,100300032,100300004,100300126,100400142,100200192,100400080,100200087,100300200,100200028,100200067,100300006], if (attribute(catid) in [0,100300166,100200186,100300005,100300032,100300126,100400142,100200067], 0.0176102969, 0.0199567396), if (attribute(catid) in [100300058,100300077,100300143,100200034,100300093,100200172,100300121,100300019,100400038,100300212,100300066,100300076,100200232], if (attribute(catid) in [100300077,100300093,100200172,100300019,100300066,100300076,100200232], 0.028069884, 0.0349016561), 0.0778635274))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300013,100300077,100300166,100300143,100200034,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100300073,100200193,100400038,100200192,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300200,100200028,100300076,100200055,100200185,100200232,100300146], if (attribute(catid) in [100200171,100300011,100400141,100300102,100300005,100300008,100200068,100300116,100200053,100300019,100300004,100400038,100300212,100300209,100400079,100200176,100300200,100200028,100300076,100200055,100200185,100300146], if (attribute(catid) in [100300011,100300019,100300209,100300200,100200028,100300076,100200055,100200185], -0.0255215536, 0.0049014532), if (attribute(catid) in [0,100300013,100300166,100300143,100200034,100300121,100300126,100200192,100300122,100300127,100300169,100300074,100300066], if (attribute(catid) in [100300013,100300143,100200192,100300127,100300074,100300066], 0.0096441943, 0.0132971959), 0.0172127947)), if (attribute(catid) in [100300014,100200186,100300093,100300027,100400142,100400037,100200087,100300006], 0.0253813497, if (attribute(catid) in [100300165,100300007,100300045], 0.0318318618, 0.0425817751))) + +if (attribute(catid) in [100300058,100300013,100300077,100200186,100400141,100300165,100200052,100300005,100200068,100300116,100200234,100300019,100300126,100200054,100400038,100300212,100300169,100300074,100300066,100300007,100300200,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100400141,100300005,100300116,100200234,100300019,100300212,100300200,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300005,100300019,100300200,100200232,100300214,100300146], -0.0378281153, -0.0106433322), if (attribute(catid) in [100300165,100200068,100300126,100400038,100300074,100300007], 0.0013709167, 0.0079886834)), if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100300102,100200172,100300008,100300027,100300121,100200053,100300004,100400142,100300073,100200192,100300065,100300122,100300127,100400079,100200170,100400080,100200087,100200028,100300076], if (attribute(catid) in [100200130,100300014,100200034,100300008,100300027,100300121,100200053,100400142,100300073,100200192,100300122,100300127,100200170,100400080,100200087], 0.0145425948, if (attribute(catid) in [0,100300004], 0.0195574674, 0.0232919623)), if (attribute(catid) in [100300011,100300166,100200176,100300045,100200185], 0.0300271939, 0.049494999))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100300076,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300014,100300058,100300077,100400141,100300165,100300102,100200068,100300032,100300116,100300019,100300004,100400142,100400037,100300212,100300127,100200170,100400080,100300007,100300045,100200028,100200055,100300214,100300146], if (attribute(catid) in [100300102,100300032,100300019,100300004,100400037,100300212,100200055,100300214,100300146], -0.023734737, if (attribute(catid) in [100300014,100300058,100300165,100200068,100300116,100300127,100400080,100300007,100200028], 0.001360616, 0.0065228229)), if (attribute(catid) in [0,100200171,100200130,100300166,100200034,100300005,100200172,100300121,100200193,100200192,100400079,100300169,100300200], if (attribute(catid) in [0,100200034,100200172,100200193,100200192,100300169], 0.0131275051, 0.0156511717), 0.0216393464)), if (attribute(catid) in [100300011,100300143,100300008,100300027,100300126,100200054,100300065,100300209,100200176,100200232], 0.0329759178, 0.0612562214)) + +if (attribute(catid) in [100300014,100300058,100300077,100300166,100400141,100300165,100200052,100300093,100200172,100300027,100300116,100300121,100200053,100300126,100200054,100300073,100200193,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100300200,100300076,100200185,100300214,100300146], if (attribute(catid) in [100300014,100300166,100200052,100300093,100300116,100300121,100200193,100300212,100300209,100400080,100300007,100200185,100300214], if (attribute(catid) in [100300014,100300121,100300212,100300209,100200185,100300214], -0.0165119187, 1.429856E-4), if (attribute(catid) in [100300058,100300077,100300165,100200172,100200053,100300126,100200054,100200170,100300074,100300200,100300146], 0.0055114285, 0.0104192793)), if (attribute(catid) in [0,100200171,100200130,100200034,100200186,100300005,100300008,100200068,100200234,100400142,100400038,100300065,100300045,100300006], if (attribute(catid) in [0,100200130,100200034,100300005,100400142,100300065,100300045,100300006], 0.0161701482, 0.0225059966), if (attribute(catid) in [100300013,100300143,100300004,100300122,100200087], 0.0319347909, 0.0503173002))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100300116,100200053,100300019,100300004,100200054,100300073,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100200087,100300074,100300066,100300007,100200176,100300045,100200028,100200067,100200232], if (attribute(catid) in [100200171,100300013,100300143,100200034,100300102,100300005,100300008,100200068,100300116,100300019,100200054,100400038,100300212,100300122,100300169,100200087,100300074,100300045,100200067,100200232], if (attribute(catid) in [100300143,100200034,100300102,100300008,100300019,100300212,100200067], -0.0135611192, if (attribute(catid) in [100300013,100200068,100300116,100200054,100300122], 0.0015450909, 0.0074784769)), if (attribute(catid) in [100200130,100300014,100300058,100200172,100300027,100200053,100300004,100300073,100200192,100200028], 0.0109788633, 0.01660535)), if (attribute(catid) in [100300165,100200234,100400142,100300065,100400080,100300076,100200185,100300006], if (attribute(catid) in [100200234,100300065,100400080,100300076], 0.0245937925, 0.0299316682), 0.0434718302)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100200186,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100300121,100200234,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100400079,100200170,100300074,100300066,100300200,100300045,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300077,100200186,100200172,100200068,100300027,100300116,100300121,100200234,100300019,100300073,100400037,100300212,100400079,100300200,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200234,100300019,100300212,100300200,100200067,100200055,100200232,100300214], -0.0132138062, if (attribute(catid) in [100300011,100300014,100200186,100200068,100300027,100300116,100300121,100400037,100300006,100300146], 0.0019211021, 0.0071648202)), if (attribute(catid) in [0,100200171,100200130,100200052,100300093,100300005,100200054,100200192,100300065,100300209,100200170,100300066,100300045], if (attribute(catid) in [100200171,100200130,100200052,100300093,100300005,100300209,100300066,100300045], 0.0114878654, 0.0141002634), 0.0189217722)), if (attribute(catid) in [100300143,100300165,100300102,100300008,100200053,100400142,100200193,100400038,100300122,100300127,100300169,100400080,100200087,100200176,100200185], if (attribute(catid) in [100300143,100300165,100300102,100200053,100400142,100300127,100200176], 0.0249929297, 0.0287505728), if (attribute(catid) in [100300058,100200034,100400141,100200028], 0.0405244074, 0.0625787358))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100200052,100300093,100300102,100300005,100300008,100300019,100300004,100300126,100200054,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100200170,100300169,100300066,100300200,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300014,100200034,100300093,100300102,100300008,100300019,100300004,100300126,100200054,100300212,100300209,100300200,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300008,100300019,100200054,100300209,100200067,100200232,100300214], -0.0251474123, -3.413433E-4), if (attribute(catid) in [100200130,100200052,100300073,100200192,100200170,100300169,100300066], 0.0080094607, 0.0126070285)), if (attribute(catid) in [100300011,100300077,100300166,100300143,100200186,100400141,100200068,100300116,100300121,100200053,100400142,100400037,100400079,100400080,100200087,100300074,100300045], if (attribute(catid) in [100300011,100300077,100300143,100200186,100400141,100200053,100400142,100400079,100400080,100200087,100300045], 0.0176471308, 0.0208465659), if (attribute(catid) in [100300165,100200172,100200234,100300122,100200176], 0.0268188222, 0.0367255273))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100300143,100200186,100400141,100300093,100300102,100200172,100300008,100300027,100300121,100200234,100200053,100300019,100300004,100300073,100400037,100200193,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300200,100300045,100200028,100300076,100200067,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300014,100300093,100300102,100300008,100200053,100300019,100300004,100300073,100300212,100300127,100300200,100200028,100300076,100200067,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300008,100300019,100200067,100200055,100200232,100300214], -0.0254633193, 0.0030878168), if (attribute(catid) in [0,100200171,100200130,100300058,100300027,100300121,100200234,100200193,100200170,100300045], if (attribute(catid) in [100200171,100200130,100300058,100300027,100300121,100200234,100200193,100300045], 0.0097072082, 0.0114229146), 0.0156613592)), if (attribute(catid) in [100200034,100200052,100300005,100200068,100300032,100300116,100400142,100200054,100200192,100300122], 0.0227531664, 0.0305920398)) + +if (attribute(catid) in [100300011,100200034,100300102,100300005,100200068,100300032,100300116,100300019,100300004,100200193,100300212,100300007,100300200,100200028,100200067,100200055,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300102,100300005,100300032,100300019,100300004,100200193,100300200,100200067,100200055,100300214,100300146], -0.0181634396, -0.0014727477), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300077,100200186,100400141,100200052,100200172,100300008,100200234,100200053,100400142,100300073,100400038,100200192,100300065,100300122,100200170,100300169,100200087,100300074,100200176,100300045,100300076], if (attribute(catid) in [100300014,100200186,100400141,100200052,100200234,100200053,100400038,100200192,100300065,100300169,100300074,100200176,100300045], 0.0098907776, if (attribute(catid) in [0,100200130,100300013,100300077,100200172,100300008,100400142,100300122,100200170], 0.0138164577, 0.017092541)), if (attribute(catid) in [100300166,100300143,100300165,100300093,100300027,100300127,100400079,100400080,100300066], 0.0219244924, 0.04056989))) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100200053,100300019,100300004,100300126,100400142,100300073,100400037,100200193,100400038,100200192,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100300200,100200028,100300076,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300143,100300093,100300102,100300116,100300019,100300004,100200193,100400038,100300212,100400080,100200028,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300019,100400038,100200028,100200055,100200232,100300146], -0.0376899039, -0.0084354615), if (attribute(catid) in [100200034,100200186,100400141,100300027,100300073,100300122,100300127,100400079,100200170,100200087,100300200], 0.0049554661, if (attribute(catid) in [0,100200171,100300058,100300165,100300005,100200172,100200053,100300126,100400142,100200192,100300169,100200176], if (attribute(catid) in [0,100300058,100200172,100200053,100300126,100400142,100200192], 0.0134283205, 0.0149300488), 0.0182669992))), if (attribute(catid) in [100300014,100300077,100300121,100200234,100200054,100300074,100300066,100200185], 0.0260635269, if (attribute(catid) in [100300011,100300166], 0.0344146236, 0.0489908315))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100200034,100200186,100400141,100300165,100300008,100300116,100200234,100300126,100400142,100300073,100200192,100300065,100300209,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300200,100300045,100200028,100300076,100200055,100200185,100300146], if (attribute(catid) in [100300011,100300014,100200034,100400141,100300165,100300008,100300116,100200234,100200192,100300065,100400079,100300169,100200087,100300200,100200028,100300076,100200055,100300146], if (attribute(catid) in [100300014,100300008,100400079,100300200,100300076,100200055], -0.0047600269, 0.0060145343), if (attribute(catid) in [0,100300073,100300209,100400080,100300066], 0.0120012047, 0.0143999679)), if (attribute(catid) in [100200130,100300058,100300143,100200052,100300093,100300102,100200172,100200068,100300027,100300121,100300004,100200054,100400037,100400038,100300212,100300122,100300127,100300007,100200176], if (attribute(catid) in [100300093,100300102,100200172,100200068,100300027,100300121,100400037,100300122,100300127,100200176], 0.0205502481, 0.0247979152), 0.0392012352)) + +if (attribute(catid) in [100300011,100200130,100300077,100400141,100300165,100300093,100300005,100300008,100300116,100200053,100200193,100400038,100200192,100300209,100300122,100200087,100300074,100300066,100200176,100300045,100200028,100200067], if (attribute(catid) in [100300011,100400141,100300005,100300008,100200193,100300209,100300122,100200087,100300074,100200176,100200067], if (attribute(catid) in [100300011,100200193,100300209,100200087,100200176,100200067], -0.0143613312, -0.0022635925), 0.0062470659), if (attribute(catid) in [0,100300058,100300166,100200034,100200186,100200052,100200172,100200068,100300027,100300121,100200234,100400142,100300073,100400037,100300127,100400079,100200170,100300169,100300076,100300006,100200232], if (attribute(catid) in [0,100300166,100200034,100200186,100200052,100200234,100400142,100200170,100300169,100300076,100300006], if (attribute(catid) in [100200034,100200186,100200234,100200170,100300169,100300076,100300006], 0.012173824, 0.0142409856), 0.0181125356), if (attribute(catid) in [100200171,100300014,100300013,100300019,100300004,100300126,100300065,100300212,100400080,100300007,100300200], 0.0273051157, 0.0468409464))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300143,100200034,100200186,100400141,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200053,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100200028,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300077,100300143,100200034,100300116,100300019,100300004,100400037,100300212,100300209,100300074,100200028,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300143,100300019,100300004,100400037,100300212,100300209,100300214,100300146], -0.0293561273, -0.0014538622), if (attribute(catid) in [100200186,100400141,100300093,100300005,100200172,100300121,100200054,100200192,100300065,100200232], 0.0075212689, if (attribute(catid) in [0,100200171,100300058,100300032,100200053,100400079,100300169], 0.0113283464, 0.0146230806))), if (attribute(catid) in [100300013,100300166,100300165,100300008,100200234,100400142,100200193,100400038,100300122,100200087,100300007,100300076], 0.0245202493, 0.0398629845)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100200034,100200186,100300165,100200052,100300093,100300102,100200172,100300008,100300032,100300116,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200192,100300212,100300209,100300127,100400079,100300169,100200087,100300074,100300007,100300045,100200028,100200067,100300006], if (attribute(catid) in [100300011,100300008,100200234,100300019,100300004,100400142,100400037,100300209,100200087,100200028,100200067,100300006], if (attribute(catid) in [100300011,100300008,100300019,100300209,100200087,100200028,100200067,100300006], -0.012550418, 5.157971E-4), if (attribute(catid) in [0,100200171,100200130,100300014,100300165,100300102,100300116,100200053,100400079,100300169,100300074,100300007,100300045], if (attribute(catid) in [100200171,100200130,100300014,100300102,100300116,100400079,100300169,100300074,100300007,100300045], 0.0073987391, 0.0089320166), 0.0143250256)), if (attribute(catid) in [100300166,100400141,100300005,100200068,100300027,100300121,100300126,100200054,100400038,100300065,100300122,100200170,100400080,100300066,100200176,100300076,100200185], if (attribute(catid) in [100400141,100300027,100300121,100300126,100300065,100200170,100400080,100300066], 0.0203552723, 0.0282273054), 0.0491434915)) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300143,100200034,100200186,100400141,100200052,100300005,100300008,100300032,100300027,100300116,100300121,100200234,100200053,100300126,100400142,100200054,100300073,100200193,100400038,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100200087,100200176,100200067,100300214,100300146], if (attribute(catid) in [100200130,100300058,100300143,100400141,100200052,100300005,100300008,100300032,100300027,100300121,100200234,100300126,100200054,100300073,100400038,100300212,100300209,100300127,100200170,100400080,100200087,100200176,100200067,100300214,100300146], if (attribute(catid) in [100300058,100300143,100300005,100300008,100300032,100300121,100200054,100300212,100300209,100200087,100200176,100200067,100300214], -0.0099342652, 0.0039806749), if (attribute(catid) in [0,100400142,100200193,100400079,100300169], 0.0115553152, 0.013542768)), if (attribute(catid) in [100300014,100300077,100300166,100200172,100200068,100300065,100300122,100300074,100300066,100300007,100300200,100300006], if (attribute(catid) in [100300077,100200172,100300065,100300066,100300200], 0.0169304303, 0.0205751961), if (attribute(catid) in [100300013,100300165,100300004,100300076], 0.0276389874, 0.0441753863))) + +if (attribute(catid) in [100300011,100300014,100300166,100200034,100200186,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300032,100300027,100200053,100300019,100400142,100400037,100200193,100200170,100300074,100300007,100300200,100200028,100200067,100300006], if (attribute(catid) in [100200034,100300102,100300005,100300008,100200068,100300032,100300019,100400037,100300007,100300200,100200067,100300006], if (attribute(catid) in [100300005,100300008,100300032,100300019,100400037,100200067], -0.0322662364, -0.0081793105), if (attribute(catid) in [100300011,100200186,100200052,100400142,100200193,100300074,100200028], 0.0017199453, 0.0060178344)), if (attribute(catid) in [0,100300013,100300077,100300143,100300165,100300116,100300121,100200234,100300004,100300073,100400038,100200192,100300065,100300127,100400079,100300169,100400080,100300066,100200176], if (attribute(catid) in [100300013,100300077,100300143,100300165,100300116,100300121,100200234,100300073,100300065,100300169,100400080,100300066], if (attribute(catid) in [100300013,100300143,100300165,100300116,100300169], 0.0103393155, 0.0134250404), 0.0166558979), if (attribute(catid) in [100200171,100200130,100200087,100300076,100200232], 0.0241920527, 0.0369765147))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300077,100300143,100200034,100400141,100300093,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300019,100300004,100400142,100200054,100300073,100400037,100400038,100300065,100300212,100300209,100300122,100400079,100200170,100300169,100200087,100300074,100300007,100200176,100300200,100200028,100200185,100300214,100300146], if (attribute(catid) in [100300143,100300008,100300019,100400037,100300212,100300209,100300007,100300200,100200185,100300214], -0.0243180335, if (attribute(catid) in [100300014,100300013,100200034,100400141,100200172,100200068,100400142,100200054,100400079,100200170,100300169,100200087,100300146], if (attribute(catid) in [100300014,100300013,100200034,100200068,100400142,100200054,100300146], 0.0025036422, 0.0060784676), if (attribute(catid) in [0,100200171,100200130,100200234,100200053,100300004,100300073,100400038,100200028], 0.010955047, 0.0136122663))), if (attribute(catid) in [100300011,100300166,100200186,100300165,100200052,100300102,100200192,100300127,100400080,100300066,100300045,100300076], if (attribute(catid) in [100300011,100200186,100200052,100300102,100200192,100400080,100300066,100300045,100300076], 0.0188007232, 0.0246438709), 0.0423545435)) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100200186,100400141,100300093,100300102,100300032,100300121,100200234,100300126,100400142,100300073,100400037,100200193,100200192,100300122,100200087,100300074,100300066,100200176,100300200,100200028,100200067,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300093,100300032,100200234,100300126,100400037,100200176,100300200,100200067,100200055,100200185,100300006,100200232,100300214,100300146], -0.0157352941, if (attribute(catid) in [100200171,100300011,100300058,100300102,100200193,100200192,100300122,100300074,100200028], 0.0030409464, if (attribute(catid) in [0,100200186,100400141,100300121,100400142], 0.008063718, 0.0107191663))), if (attribute(catid) in [100200130,100300013,100300166,100200034,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100200054,100400038,100300127,100400079,100200170,100300169,100400080,100300007,100300076], if (attribute(catid) in [100200130,100300013,100300165,100200172,100300027,100200054,100400079,100200170,100300169,100400080], if (attribute(catid) in [100300013,100200172,100300027,100400079,100200170,100400080], 0.0154977719, 0.0195741488), 0.0236445967), if (attribute(catid) in [100300116,100200053,100300065], 0.0324449764, 0.0541648949))) + +if (attribute(catid) in [100300011,100300058,100300143,100200052,100300093,100200068,100300032,100300027,100200054,100300073,100400037,100200193,100200192,100300209,100300122,100300127,100400079,100200170,100400080,100300200,100300045,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300143,100200068,100300032,100300209,100200170,100200055,100300214,100300146], -0.0241523508, if (attribute(catid) in [100300011,100300058,100200054,100200192,100300127,100400080,100300200,100300045,100200185], -9.657677E-4, 0.005124115)), if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100200186,100400141,100300165,100300102,100300116,100300121,100200234,100300126,100400142,100400038,100300065,100300169,100300066,100300006], if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100200186,100400141,100300102,100300116,100300126,100300006], if (attribute(catid) in [100300013,100300077,100200034,100200186,100400141], 0.0090067376, 0.0127908297), 0.0164902475), if (attribute(catid) in [100200171,100200130,100300166,100200172,100200053,100200087,100200176,100300076], 0.0228127126, 0.0418200655))) + +if (attribute(catid) in [100200171,100200130,100300014,100300013,100300077,100300166,100200186,100400141,100300165,100200052,100200172,100300008,100200068,100300032,100300027,100300116,100200234,100200053,100300019,100300004,100400142,100300073,100400038,100300212,100300209,100400079,100300169,100400080,100300074,100300007,100200176,100300200,100200028,100200067,100300006,100300146], if (attribute(catid) in [100300013,100400141,100300032,100200053,100300019,100300004,100300212,100300209,100400080,100300007,100200176,100200028,100200067], if (attribute(catid) in [100300032,100300019,100300212,100300209,100200176,100200028,100200067], -0.038608259, -0.0063382264), if (attribute(catid) in [100300014,100300077,100200186,100300165,100300008,100200234,100400142,100300073,100400038,100300169,100300074,100300200,100300146], if (attribute(catid) in [100300077,100300008,100300073,100400038,100300074,100300146], 0.0014081125, 0.0048469355), 0.0085143275)), if (attribute(catid) in [0,100300058,100200034,100300121,100300126,100200054,100200193,100200192,100300065,100300122,100300127,100200170,100200087,100300066,100300045], if (attribute(catid) in [0,100300058,100200193,100200192,100300122,100300127,100200170,100200087], 0.0154377122, 0.0222690511), 0.0445329146)) + +if (attribute(catid) in [100300011,100300058,100300013,100200186,100300165,100300102,100300008,100200068,100300027,100300116,100300121,100200234,100300019,100200054,100300212,100300127,100400079,100200087,100300074,100300007,100200176,100300076,100200055,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300165,100300102,100300027,100300121,100300019,100200087,100300074,100200176,100200055,100300214], -0.0084086451, if (attribute(catid) in [100300013,100200186,100300008,100200234,100200054,100300212,100300127,100300006], 5.019617E-4, 0.0061914097)), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200052,100300093,100300005,100200172,100200053,100300004,100300126,100400142,100300073,100400038,100200192,100300122,100200170,100300169,100300066,100300200,100300045], if (attribute(catid) in [100200130,100300014,100300093,100300005,100300004,100300126,100400038,100200192,100300169,100300200], 0.0114280621, if (attribute(catid) in [0,100200171,100300077,100200052,100300073,100300066], 0.0141959004, 0.0160019821)), if (attribute(catid) in [100300143,100400141,100300032,100300065,100300209,100200067], 0.0245106044, 0.0334093506))) + +if (attribute(catid) in [100300011,100200130,100300014,100300143,100400141,100300165,100200052,100300005,100300027,100300116,100200053,100300019,100300004,100300073,100400037,100300209,100400079,100200170,100300169,100300074,100300007,100200176,100300045,100300076,100200067,100200055,100200185,100300214], if (attribute(catid) in [100300011,100300005,100300019,100400037,100300209,100200176,100200055,100200185,100300214], -0.0208490757, if (attribute(catid) in [100300143,100400141,100300116,100200053,100300004,100300073,100400079,100300074], 2.97283E-5, 0.0064274847)), if (attribute(catid) in [0,100200171,100300058,100300077,100300166,100200034,100200186,100200172,100200068,100200234,100300126,100200192,100300065,100300122,100300127,100400080,100200087,100300066,100200028,100300006], if (attribute(catid) in [0,100300077,100300166,100200186,100200172,100300122,100200087,100200028,100300006], if (attribute(catid) in [0,100300166,100300122,100200028], 0.0124031076, 0.0150987823), 0.0188364733), if (attribute(catid) in [100300093,100300102,100300008,100300121,100400142,100200054,100300200,100200232], 0.0293407993, 0.0513888162))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100400141,100300165,100200052,100300008,100200068,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100200054,100300073,100400038,100200192,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200176,100300200,100300076,100200067,100200185,100200232], if (attribute(catid) in [100300014,100300008,100300121,100200234,100200053,100300019,100300212,100400080,100200176,100300200,100200067,100200185,100200232], if (attribute(catid) in [100300008,100300019,100300212,100200176,100300200,100200067,100200185,100200232], -0.0190877492, -7.557548E-4), if (attribute(catid) in [100400141,100200052,100300126,100200054,100300065,100400079,100200170,100300169], 0.0071693422, if (attribute(catid) in [0,100200130,100300073,100400038,100200192,100300127,100300076], 0.0119746374, 0.0136797362))), if (attribute(catid) in [100300011,100300058,100300166,100200034,100200186,100300093,100300005,100200172,100300027,100400142,100300122,100200087,100300066,100300045,100300006], if (attribute(catid) in [100300058,100200186,100300093,100300005,100200172,100300027,100400142,100300122,100200087,100300045,100300006], 0.0192765099, 0.0227594602), 0.0348341149)) + +if (attribute(catid) in [100300011,100300013,100300143,100300008,100200068,100300019,100200054,100400079,100200170,100400080,100300066,100200176,100300200,100200028,100300006,100300146], if (attribute(catid) in [100300011,100300013,100300008,100300019,100200176,100200028], -0.0154031193, -7.651129E-4), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300102,100200172,100300027,100300121,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100300127,100300169,100200087,100300074,100300007,100300045,100300076,100200067], if (attribute(catid) in [100200171,100200130,100300077,100200034,100400141,100300102,100200172,100300027,100300004,100400142,100400037,100300127,100200087,100300074,100300007,100300045], if (attribute(catid) in [100200130,100400141,100300102,100200172,100300027,100300004,100400037,100300127,100300074,100300007], 0.0066677335, 0.0095565475), if (attribute(catid) in [0,100200052,100300065,100300169,100300076,100200067], 0.0113431678, 0.015684483)), if (attribute(catid) in [100300165,100300116,100200234,100200053,100300126,100400038,100200185,100200232], 0.023118108, 0.0499183157))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300013,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100300032,100300027,100300121,100200234,100300004,100300126,100400142,100300073,100400037,100200193,100300065,100300209,100200170,100300169,100400080,100300066,100300200,100300045,100300076,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200171,100200130,100300058,100200034,100400141,100300093,100300102,100300005,100300032,100300027,100300073,100400037,100300209,100400080,100300200,100200055,100200232,100300146], if (attribute(catid) in [100300102,100300032,100300027,100400037,100300209,100400080,100200055,100300146], -0.0210007071, if (attribute(catid) in [100200171,100300058,100200034], -0.0023338437, 0.0034194175)), if (attribute(catid) in [0,100300077,100200186,100200234,100300126,100400142,100200193,100300065,100200170,100300169,100300066,100300045], 0.0096608445, 0.0144728932)), if (attribute(catid) in [100300014,100300165,100300008,100200068,100200192,100300122,100300127,100400079,100200087,100300074,100200028,100200185], if (attribute(catid) in [100300014,100300165,100200068,100200192,100400079,100200087], 0.0193246792, 0.0247702235), 0.0360951958)) + +if (attribute(catid) in [100300011,100300143,100200186,100400141,100300165,100300008,100300032,100300116,100300019,100300004,100300126,100400142,100300073,100400037,100400038,100200192,100300212,100300122,100300169,100400080,100300066,100300007,100300076,100200067,100200055,100200185,100300146], if (attribute(catid) in [100300143,100300032,100300019,100300126,100400038,100200067,100200055,100200185], -0.0215257824, if (attribute(catid) in [100300011,100400141,100300008,100300004,100400142,100300073,100400037,100200192,100300066,100300076,100300146], 0.0024762462, 0.0073887199)), if (attribute(catid) in [0,100200171,100300014,100300058,100300013,100300077,100300166,100200034,100300093,100200172,100200068,100300027,100200053,100300065,100300209,100300127,100400079,100200170,100200087,100300074,100300200], if (attribute(catid) in [0,100300014,100300013,100300077,100200034,100300093,100200172,100300027,100200053,100300209,100400079,100300074,100300200], 0.0122813047, 0.0177730971), if (attribute(catid) in [100200130,100300121,100200234,100200193,100200176,100200028], 0.0248372595, 0.0378712543))) + +if (attribute(catid) in [100200171,100200186,100300165,100300102,100300005,100200068,100300032,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100400038,100300212,100300122,100400079,100400080,100200087,100200067,100200055,100200232], if (attribute(catid) in [100300102,100200068,100300032,100300019,100300126,100400038,100300212,100200067,100200055,100200232], if (attribute(catid) in [100300102,100200068,100300032,100300019,100200067,100200055,100200232], -0.0209289749, -0.0056438478), if (attribute(catid) in [100200186,100300165,100300121,100200053,100300004,100400079,100400080,100200087], 0.0024863738, 0.0054172149)), if (attribute(catid) in [0,100300011,100200130,100300014,100300058,100300166,100400141,100200052,100200172,100300027,100300073,100200192,100300127,100200170,100300169,100300066,100300200,100200028], if (attribute(catid) in [0,100300014,100300058,100300166,100400141,100200052,100200192,100300066], if (attribute(catid) in [100300014,100300058,100300166,100400141,100200192], 0.0098208012, 0.0120469551), 0.0162551324), if (attribute(catid) in [100300077,100200034,100300093,100300008,100300116,100200054,100300065,100300074,100300007,100200176], 0.0250861627, 0.0414462132))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100200034,100200186,100200052,100300093,100300102,100200068,100300027,100300121,100200234,100200053,100300019,100300004,100200054,100300073,100400037,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100300076,100200067,100300006,100300214], if (attribute(catid) in [100200052,100200068,100200234,100300019,100300004,100300212,100400079,100300169,100300074,100200176,100300076,100200067,100300214], if (attribute(catid) in [100200234,100300019,100300004,100300212,100300214], -0.0247332845, -0.0060255621), if (attribute(catid) in [0,100300058,100300077,100200034,100200186,100300093,100300102,100300027,100300121,100200054,100300073,100400037,100200192,100300122,100300127,100300006], if (attribute(catid) in [100300058,100200186,100300093,100300121,100300073,100400037,100200192,100300127], 0.0043627132, 0.0088967157), 0.0143359261)), if (attribute(catid) in [100300143,100200172,100400038,100200087,100300200,100200028,100200185], 0.020278194, if (attribute(catid) in [100300014,100300166,100400141,100300165,100300005,100300116,100300126,100400142], 0.0288593151, 0.045065206))) + +if (attribute(catid) in [100200171,100300011,100300058,100300013,100300077,100300166,100200186,100300093,100300005,100300008,100300027,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300209,100300127,100400079,100300169,100400080,100200087,100300074,100200176,100200028,100200185], if (attribute(catid) in [100300011,100300013,100300008,100300027,100300126,100200054,100400038,100300209,100300127,100400080,100200185], if (attribute(catid) in [100300011,100300013,100300008,100300126,100200054,100200185], -0.0134142246, -0.002161146), if (attribute(catid) in [100200171,100200186,100300093,100400037,100400079,100200087,100300074,100200176,100200028], 0.0042397431, 0.0082573117)), if (attribute(catid) in [0,100200130,100300143,100400141,100300165,100200052,100200172,100200068,100300032,100300116,100200234,100200053,100300004,100300065,100300122,100200170,100300066,100300076,100300006], if (attribute(catid) in [0,100200130,100400141,100300116,100300004,100300076,100300006], 0.0125577031, if (attribute(catid) in [100300143,100300165,100200052,100200172,100300032,100300065], 0.015170776, 0.0177925563)), 0.0282720969)) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300013,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100300073,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100200028,100300076,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200171,100300013,100300093,100300005,100200068,100300121,100200234,100300019,100300212,100300209,100200170,100200028,100200067,100200055,100300006,100300146], if (attribute(catid) in [100300013,100300005,100300121,100300212,100200067,100200055], -0.0177817471, -0.0032656602), if (attribute(catid) in [0,100200130,100300014,100300077,100200034,100200186,100300165,100200052,100200172,100300032,100300027,100300004,100200192,100300065,100300122,100300127,100400079,100300074,100300066,100300076], if (attribute(catid) in [100300014,100200186,100200052,100300122,100300066,100300076], 0.0049749252, if (attribute(catid) in [100300077,100200034,100300165,100200172,100300027,100300004,100200192,100400079,100300074], 0.0100440563, 0.0117011752)), 0.0178896771)), if (attribute(catid) in [100300058,100200054,100400037,100200193,100300169,100300200], 0.0319705253, 0.0522115674)) + +if (attribute(catid) in [100300058,100200034,100400141,100300093,100300102,100300005,100300008,100300004,100200054,100400038,100300212,100300122,100200087,100300074,100200176,100200055,100300214], if (attribute(catid) in [100300005,100300008,100300212,100200055,100300214], -0.0403440609, if (attribute(catid) in [100300058,100200034,100300102,100300004,100200054,100400038,100200176], -0.011425447, -0.0039460534)), if (attribute(catid) in [0,100200171,100300011,100200130,100300077,100200052,100200172,100400142,100300073,100200193,100200192,100300065,100300127,100400079,100200170,100300045,100200028,100300006,100200232], if (attribute(catid) in [0,100300077,100400142,100300073,100300065,100300127,100200170,100300045,100200028,100300006], if (attribute(catid) in [100300077,100300065,100300127,100200170,100300006], 0.0038995654, 0.0096106726), 0.0136466804), if (attribute(catid) in [100300014,100300013,100300166,100300165,100200068,100300032,100300027,100300121,100200234,100200053,100300126,100400080,100300066,100300007,100300200], if (attribute(catid) in [100300013,100300166,100200068,100300027,100300121,100200234,100200053,100300126,100400080,100300007,100300200], 0.0192859199, 0.0245003908), 0.0334815162))) + +if (attribute(catid) in [100200171,100200130,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100300165,100200052,100300093,100300008,100200068,100300032,100300027,100300116,100200053,100300019,100300126,100300073,100200193,100300209,100400079,100200170,100300066,100200176,100300076,100200067,100200055,100300214], if (attribute(catid) in [100200052,100300008,100300032,100300019,100300126,100300209,100200176,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300008,100300032,100300209,100200176,100300076,100200067,100200055,100300214], -0.0379621177, -0.0077279547), if (attribute(catid) in [100200171,100300058,100300013,100300077,100200034,100300027,100200053], 0.0040069447, 0.0080600184)), if (attribute(catid) in [0,100300011,100300014,100400141,100300102,100300005,100200172,100300121,100200234,100400037,100400038,100200192,100300065,100400080,100300074,100300006,100200232], if (attribute(catid) in [0,100300011,100300014,100400141,100300102,100300005,100200172,100400038], 0.0135824088, 0.0165574836), if (attribute(catid) in [100300004,100400142,100300127,100300169,100200087,100300007,100200185], 0.0214892901, 0.0327877321))) + +if (attribute(catid) in [0,100200130,100300014,100300058,100300143,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200068,100300032,100300121,100200234,100200053,100300004,100300126,100400142,100300073,100200193,100200192,100300065,100300212,100400079,100300169,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200055,100300214], if (attribute(catid) in [100200130,100300058,100300143,100200186,100200052,100300093,100300102,100300005,100300121,100200234,100300004,100300126,100200193,100300212,100200087,100300007,100200176,100300045,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300058,100300143,100300102,100300005,100200234,100200193,100200087,100200176,100300045,100300076,100200067,100200055,100300214], -0.010705309, 3.635835E-4), if (attribute(catid) in [100300014,100300165,100200068,100200053,100400142,100300073,100300065], 0.0065823776, 0.0100610854)), if (attribute(catid) in [100300011,100300077,100300166,100200172,100400037,100300122,100300127], 0.017594626, if (attribute(catid) in [100200171,100200034,100300027,100300116,100300209,100200170,100400080,100200028,100200185], 0.0254523278, 0.0397162435))) + +if (attribute(catid) in [100300058,100300013,100300077,100200186,100300165,100300093,100200172,100300008,100300032,100300027,100300116,100300121,100200234,100300019,100400142,100300073,100200193,100300212,100400079,100200087,100300066,100200176,100300200,100300045,100200028,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300013,100200186,100300008,100300032,100300027,100300019,100400142,100300212,100200087,100200176,100300200,100300045,100200055], if (attribute(catid) in [100300008,100300032,100300019,100200176,100300200,100200055], -0.0294108915, -0.0054927303), if (attribute(catid) in [100300093,100300116,100300121,100200234,100300073,100200193,100400079,100300066,100300076,100300006], 0.0042404411, 0.0073750844)), if (attribute(catid) in [0,100200171,100200130,100300014,100300166,100200034,100400141,100300102,100200068,100200053,100300004,100200054,100200192,100300065,100300122,100300127,100300169,100400080], if (attribute(catid) in [0,100200171,100300166,100200034,100300102,100200053,100300004,100200192,100300065,100300122,100300127,100300169,100400080], if (attribute(catid) in [100200171,100300166,100300004,100300065,100300127,100300169], 0.0124777156, 0.0143158121), 0.0177029723), if (attribute(catid) in [100300011,100300143,100200052,100300126,100300074,100200067], 0.0257271811, 0.0375978662))) + +if (attribute(catid) in [100300011,100300058,100300013,100300143,100200034,100400141,100300008,100400038,100300169,100300200,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300013,100300143,100200034,100300008,100300200,100200028,100200067,100200055,100200232,100300146], -0.0179030459, -0.0025885619), if (attribute(catid) in [0,100300077,100200186,100300165,100300102,100300005,100200068,100300027,100300116,100300126,100400142,100300073,100200192,100300122,100400079,100200170,100300074,100300066], if (attribute(catid) in [0,100200186,100300165,100300005,100300027,100300116,100300126,100400079], 0.0078435164, 0.0118007064), if (attribute(catid) in [100200171,100200130,100300014,100300166,100200172,100300121,100300004,100200054,100300065,100300127,100400080,100200087,100200185], 0.0172799995, if (attribute(catid) in [100200052,100200234,100200053,100400037,100200193,100200176], 0.0276688136, 0.0491582153)))) + +if (attribute(catid) in [100200130,100300014,100300058,100300077,100200034,100200186,100400141,100300165,100300005,100200172,100300032,100300116,100200234,100300004,100400142,100300073,100200193,100400038,100200192,100300212,100300127,100400079,100200170,100400080,100200087,100300007,100200176,100200028,100300076,100200055,100200232,100300214], if (attribute(catid) in [100200186,100300032,100300116,100300073,100400038,100300212,100400079,100200087,100300007,100300076,100200055,100200232,100300214], if (attribute(catid) in [100300032,100300212,100300076,100200055,100200232,100300214], -0.0300814303, -0.0044598873), if (attribute(catid) in [100300058,100300077,100200034,100400141,100300005,100300004,100400142,100200193,100200192,100300127,100200170,100400080,100200176], 0.0033831149, 0.0061763311)), if (attribute(catid) in [0,100200171,100300166,100300143,100200052,100300102,100200068,100300027,100200053,100400037,100300065,100300209,100300122], if (attribute(catid) in [0,100300102,100200053,100400037,100300209,100300122], 0.0108688948, 0.0140670577), if (attribute(catid) in [100300121,100200054,100300169,100300074,100300066,100300200,100200185], 0.0219282043, 0.0332033624))) + +if (attribute(catid) in [0,100300011,100300058,100300013,100300077,100300166,100400141,100200052,100300093,100300005,100200068,100300032,100300027,100200053,100200054,100400037,100200193,100400038,100300209,100300122,100300127,100400079,100200170,100300066,100300045,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300013,100300005,100300032,100300027,100200193,100300209,100300122,100200170,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300005,100300032,100200193,100300209,100200185,100300214,100300146], -0.0217026454, -0.0034858812), if (attribute(catid) in [100300058,100300077,100400141,100200068,100200053,100400079,100300066], 0.00306927, if (attribute(catid) in [0,100400037], 0.0062304681, 0.0091439421))), if (attribute(catid) in [100200171,100200130,100200034,100200186,100300165,100200172,100300116,100200234,100300073,100200192,100300065,100300169,100400080,100200087,100200028,100300076,100200232], if (attribute(catid) in [100200130,100300165,100200172,100300116,100200234,100300073,100200192,100300169,100200232], 0.0117914211, 0.0156586974), if (attribute(catid) in [100300121,100300126,100400142,100300074,100300006], 0.0236611361, 0.0326251935))) + +if (attribute(catid) in [100300013,100300008,100300032,100300121,100200234,100300004,100300126,100200054,100200193,100300212,100300122,100300127,100400079,100200087,100200176,100300045,100200028,100300076,100200067,100300006], if (attribute(catid) in [100300121,100200234,100300004,100200054,100300212,100200087,100300076,100200067], -0.0242668043, -0.0057454024), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100300143,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100400142,100300073,100400037,100400038,100200192,100300065,100200170,100300169,100400080,100300074,100300007], if (attribute(catid) in [100200130,100300166,100300143,100400141,100300165,100300093,100200172,100300027,100300116,100300073,100400038,100200170,100300169,100400080,100300074], if (attribute(catid) in [100300143,100400141,100300093,100300027,100300116,100300073,100400080], 0.0044087377, 0.0084781159), if (attribute(catid) in [0,100200068,100400142,100300065], 0.0107211296, 0.0151177012)), if (attribute(catid) in [100300011,100300014,100300077,100300102,100200053,100300209], 0.022720962, 0.0333058662))) + +if (attribute(catid) in [100300011,100300014,100300143,100200052,100300102,100300005,100300116,100200053,100300126,100400038,100200192,100300074,100300066,100300007,100300200,100300045,100200028,100300214], if (attribute(catid) in [100300143,100300102,100300005,100300116,100200053,100300126,100400038,100300007,100200028,100300214], -0.0112761132, 5.180964E-4), if (attribute(catid) in [0,100200130,100300166,100300093,100200172,100300121,100200234,100400142,100200054,100300073,100400037,100300122,100300127,100400079,100300169,100400080,100300076,100200067], if (attribute(catid) in [0,100300093,100200172,100300121,100200054,100300073,100300122,100300127,100400079,100300169,100300076], if (attribute(catid) in [100300093,100200172,100200054,100300073,100300122,100300127,100400079,100300169,100300076], 0.0062969186, 0.0079944471), 0.0107843133), if (attribute(catid) in [100200171,100300058,100300013,100300077,100200034,100200186,100400141,100300165,100200068,100300027,100300065,100300212,100200170,100200087,100300006], 0.0156933768, 0.0301329171))) + +if (attribute(catid) in [100200171,100300011,100300014,100300058,100300013,100300077,100200034,100400141,100200068,100300116,100200234,100300019,100400037,100200193,100400038,100200192,100300212,100300209,100300127,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100200067,100200185,100300006,100300214], if (attribute(catid) in [100300011,100300013,100300116,100400038,100300212,100300209,100200087,100200176,100200067,100200185,100300214], -0.0153646097, if (attribute(catid) in [100200171,100300014,100300058,100200034,100200068,100200234,100300019,100200193,100400080,100300066,100300007], -0.0021431391, 0.0027978033)), if (attribute(catid) in [0,100200130,100300166,100200186,100300165,100200052,100300093,100300102,100300005,100200172,100300027,100300121,100200053,100300004,100300126,100400142,100200054,100300073,100300065,100300122,100400079,100200170,100300045,100200028,100300076], if (attribute(catid) in [0,100200130,100200186,100300165,100300102,100300005,100200172,100300027,100300121,100200053,100300073,100300045,100200028], if (attribute(catid) in [100200130,100300165,100300102,100300027,100300121,100200053,100300073,100300045,100200028], 0.0092257166, 0.0114031662), 0.0178845925), 0.0516414799)) + +if (attribute(catid) in [100300032,100200234,100300212,100300209,100300045,100200055,100300146], -0.0281888364, if (attribute(catid) in [0,100200130,100300058,100300013,100300077,100300166,100300165,100200052,100200172,100200068,100300027,100300019,100300004,100400142,100400037,100200192,100300122,100400079,100200170,100300169,100400080,100300074,100200176,100200028,100300076,100300006], if (attribute(catid) in [100300058,100300077,100300165,100200068,100300019,100400037,100200192,100300122,100400079,100200170,100300169,100400080,100200028,100300006], if (attribute(catid) in [100300058,100200068,100300019,100400037,100200192,100400079,100200170,100200028,100300006], -5.203242E-4, 0.0053481381), if (attribute(catid) in [100200130,100200052,100200172,100300074], 0.0070360348, 0.0095044104)), if (attribute(catid) in [100200171,100400141,100300093,100300008,100300121,100300126,100300073,100300127,100200087,100300066], 0.0150338406, if (attribute(catid) in [100300011,100300014,100200186,100300116,100200053,100400038,100300065,100300200,100200185,100200232], 0.0219656474, 0.0303343362)))) + +if (attribute(catid) in [100300011,100300058,100200034,100300165,100300093,100300102,100300032,100300027,100300121,100300019,100300073,100400037,100200193,100300127,100300076,100200232,100300214], if (attribute(catid) in [100300011,100300058,100200034,100300093,100300102,100300032,100200193,100300076,100200232,100300214], -0.0146956932, -0.0021492979), if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200186,100400141,100200052,100200172,100200068,100300116,100300004,100400142,100200054,100400038,100200192,100300065,100300122,100400079,100200170,100300169,100400080,100200087,100200185,100300006], if (attribute(catid) in [100200130,100300166,100200052,100200172,100200068,100300004,100400142,100400038,100300065,100400079,100400080,100200087], if (attribute(catid) in [100200130,100200172,100200068,100400142,100400079,100400080], 0.0060613479, 0.0086309145), if (attribute(catid) in [0,100200186,100400141,100300116,100200054,100300122,100300169], 0.0111211317, 0.0167951946)), if (attribute(catid) in [100300143,100300005,100200234,100200053,100300126,100300212,100300074,100300066,100300200,100200028,100200067], if (attribute(catid) in [100200053,100300126,100300074,100300066,100200028], 0.0204993878, 0.0274572562), 0.0495878122))) + +if (attribute(catid) in [100300011,100300014,100300058,100300166,100300143,100200034,100300165,100200052,100300093,100200172,100300032,100300116,100200053,100300019,100400142,100400037,100400038,100200192,100300209,100300122,100300127,100400079,100300074,100200176,100300200,100200028,100200232], if (attribute(catid) in [100300011,100200034,100300032,100300019,100400037,100300127,100400079,100300074,100200176,100300200,100200028,100200232], if (attribute(catid) in [100300032,100300019,100400037,100300200,100200028,100200232], -0.0422634866, -0.0088083561), if (attribute(catid) in [100300014,100300143,100300165,100300093,100200172,100200192,100300209,100300122], -4.084485E-4, 0.0040562959)), if (attribute(catid) in [0,100200130,100300077,100400141,100300005,100300027,100200234,100300004,100300126,100300073,100200193,100300065,100300212,100200170,100300169,100200087,100300066,100300045,100300076], if (attribute(catid) in [100200130,100400141,100300005,100300065,100300212,100200170,100300169,100300045], 0.0081251015, if (attribute(catid) in [0,100200234], 0.0119875946, 0.0148183346)), if (attribute(catid) in [100200171,100200186,100200068,100300121,100300006], 0.0204518722, 0.0367144755))) + +if (attribute(catid) in [100300058,100300013,100200034,100300165,100300093,100300005,100200172,100300027,100200053,100300019,100300126,100300073,100300212,100300209,100300169,100400080,100200087,100300007,100300045,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300013,100200034,100300005,100300027,100300019,100300212,100200067,100200055,100200232,100300214], if (attribute(catid) in [100300013,100300005,100300019,100300212,100200055,100200232,100300214], -0.041013169, -0.0176924609), if (attribute(catid) in [100300058,100300165,100300093,100200053,100300126,100300073,100300209,100300007,100300045], -0.0030372433, 0.0030417758)), if (attribute(catid) in [0,100200171,100300077,100300166,100200186,100400141,100200052,100200068,100300116,100300121,100200054,100200192,100300065,100300122,100300127,100200170,100300066,100200028,100300076,100300006], if (attribute(catid) in [0,100300077,100300166,100200186,100200052,100300121,100300065,100300122,100300066,100300076,100300006], if (attribute(catid) in [100300077,100300166,100300121,100300122,100300066,100300076], 0.0066574572, 0.0093092556), 0.0138817683), if (attribute(catid) in [100300011,100300102,100300032,100200234,100300004,100400142,100400079,100300074,100200176,100300200], 0.021173827, if (attribute(catid) in [100200130,100300143,100200193,100400038], 0.027045199, 0.0447659217)))) + +if (attribute(catid) in [100300014,100300013,100300008,100200068,100200234,100300004,100200193,100300212,100300209,100400080,100200087,100200176,100200028,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300013,100300008,100200068,100300212,100300209,100200028,100200067,100200055,100200185], if (attribute(catid) in [100300013,100300008,100300209,100200028,100200067,100200055], -0.0410751689, -0.0211139959), -0.0060525832), if (attribute(catid) in [100200171,100200130,100300077,100300143,100400141,100200052,100300102,100200172,100300073,100400037,100300122,100300074,100300200,100300045], if (attribute(catid) in [100200171,100300077,100300143,100200052,100300102,100200172,100400037,100300045], 0.0022476773, 0.0072496833), if (attribute(catid) in [0,100300011,100300166,100200186,100300165,100300005,100300027,100300116,100200053,100300126,100300127,100400079,100200170,100300169,100300066,100300076], if (attribute(catid) in [0,100300166,100300165,100300005,100200053,100400079,100300169], if (attribute(catid) in [100300166,100300165,100300005,100200053,100400079,100300169], 0.0108065489, 0.0121894583), 0.0150333423), if (attribute(catid) in [100200034,100300032,100400142,100400038,100200192,100300065], 0.0204411972, 0.0355402269)))) + +if (attribute(catid) in [100300058,100300143,100200034,100200186,100400141,100300093,100300102,100300027,100300004,100200054,100200193,100400038,100300065,100300212,100300209,100300007,100200028,100200067,100200185,100300146], if (attribute(catid) in [100300058,100400038,100300065,100300209,100200028,100200067,100200185], -0.0271979436, -0.0038611614), if (attribute(catid) in [0,100300014,100200052,100200172,100400142,100200192,100300122,100300127,100400079,100200170,100300074,100300076], if (attribute(catid) in [0,100200172,100400142,100200192,100300122,100200170,100300076], 0.0073159372, 0.0115841741), if (attribute(catid) in [100200171,100200130,100300013,100300166,100300008,100200068,100300116,100300121,100200053,100300126,100300073,100300169,100400080,100200087,100300066,100200176], if (attribute(catid) in [100200130,100300013,100200068,100300121,100200053,100300073,100300169], 0.013576241, 0.0164515309), 0.0222390758))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300166,100300143,100400141,100200052,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100400142,100200054,100300073,100200193,100400038,100200192,100300209,100300122,100200170,100200087,100300074,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100300006,100300214], if (attribute(catid) in [100300011,100300166,100300143,100300008,100400142,100300073,100200193,100300209,100300007,100200176,100300214], if (attribute(catid) in [100300143,100200193,100300007,100200176,100300214], -0.0297063352, -0.0033584809), if (attribute(catid) in [0,100200130,100400141,100200052,100200172,100300027,100300116,100200234,100200053,100200054,100400038,100200192,100300122,100200170,100300076,100300006], if (attribute(catid) in [100200130,100200052,100200172,100300027,100300116,100200234,100200053,100200054,100400038,100200192,100200170,100300006], 0.0038378464, 0.0065133022), 0.0116394129)), if (attribute(catid) in [100300014,100300077,100200034,100200186,100300165,100300121,100300004,100300126,100300065,100300127,100300169,100400080,100300066,100200185,100200232], if (attribute(catid) in [100300014,100300077,100300004,100300127,100300169,100300066,100200185,100200232], 0.0160520754, 0.0218263304), 0.0403414109)) + +if (attribute(catid) in [100300077,100300143,100200034,100300102,100300005,100300008,100300116,100300121,100300004,100300126,100300073,100400038,100300212,100300209,100300007,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300143,100200034,100300005,100300209,100200067,100200055,100300214], -0.0407918257, if (attribute(catid) in [100300102,100300008,100300116,100300004,100400038,100300007,100300146], -0.0072483912, 8.912521E-4)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100400141,100200052,100300093,100200172,100200068,100300032,100300027,100200053,100400142,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100200176,100200028,100300076], if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100300093,100200172,100200053,100300122,100300127,100400079,100300169,100200176,100200028], if (attribute(catid) in [100200171,100300166,100200186,100300093,100200053,100300127,100300169,100200176,100200028], 0.0062732454, 0.0100875564), if (attribute(catid) in [100200068,100400142,100300065,100400080,100300074], 0.0120096679, 0.0160208786)), if (attribute(catid) in [100300014,100300165,100200234,100200054,100200193,100200087,100300200,100200185], 0.0234816349, 0.0499780329))) + +if (attribute(catid) in [100300014,100400141,100300093,100300102,100300008,100200068,100300116,100200234,100200053,100300019,100300004,100400142,100400038,100300127,100200087,100300074,100200176,100300200,100300045,100200028,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300102,100300008,100200068,100300019,100300004,100400038,100300127,100200176,100300200,100300045,100200067,100300006,100200232,100300214], -0.0139227136, if (attribute(catid) in [100400142,100300074], -0.0037315997, 0.0024425812)), if (attribute(catid) in [0,100200171,100200130,100300077,100300166,100300165,100200052,100300073,100300065,100300212,100400079,100300169,100300007], if (attribute(catid) in [100200171,100200130,100300077,100300166,100300165,100200052,100300212,100400079], 0.0062742131, 0.010721818), if (attribute(catid) in [100300058,100300013,100300143,100200034,100200186,100300005,100200172,100300027,100300121,100200192,100300209,100300122,100200170,100400080,100300066], 0.0153086152, 0.0249190643))) + +if (attribute(catid) in [100300143,100200186,100200052,100300093,100300102,100300008,100300121,100200234,100200053,100300019,100300004,100400038,100300065,100300212,100200087,100300200,100200028,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300143,100300102,100300008,100300121,100300019,100300004,100400038,100200067,100200055,100300006,100300214,100300146], -0.0160264772, -0.0023817409), if (attribute(catid) in [0,100300013,100300166,100200034,100300032,100400142,100300073,100200193,100200192,100400079,100200170,100300066], if (attribute(catid) in [0,100300013,100300073,100200192,100400079,100300066], 0.0078781947, 0.0111361463), if (attribute(catid) in [100200171,100300011,100200130,100300014,100300077,100300165,100200172,100200068,100300027,100300126,100200054,100300209,100300122,100300169,100400080,100300074,100200176], if (attribute(catid) in [100300011,100200130,100300014,100300077,100300165,100200054,100300209,100300122,100300169,100200176], 0.0143761703, 0.018000531), 0.0280056529))) + +if (attribute(catid) in [100300011,100300013,100300166,100200034,100200052,100300008,100200068,100300116,100300121,100300019,100200193,100200192,100300065,100300209,100300127,100300074,100200176,100300200,100300045,100300076,100200067,100200055,100300214,100300146], if (attribute(catid) in [100300011,100200034,100200068,100300019,100300209,100200176,100300200,100300045,100300076,100200067,100200055,100300214,100300146], -0.0143446006, -5.59109E-4), if (attribute(catid) in [0,100200171,100300014,100300077,100300143,100200186,100400141,100300165,100300102,100300005,100200172,100300027,100300004,100300126,100400142,100200054,100300073,100400038,100400079,100200170,100300169,100400080,100300066,100200028,100200185,100300006], if (attribute(catid) in [0,100200171,100200186,100400141,100300165,100300102,100300005,100200172,100300027,100200054,100300073,100400038,100300169,100400080,100300066,100200028], if (attribute(catid) in [100200186,100400141,100300005,100200172,100200054,100300073,100300169,100300066], 0.0063901469, 0.0098655154), 0.0150886566), if (attribute(catid) in [100200130,100300058,100300093,100300032,100300122], 0.0218922437, 0.033132652))) + +if (attribute(catid) in [100200171,100300011,100300013,100200034,100200186,100400141,100200052,100300093,100300102,100200172,100300008,100300032,100300027,100300116,100200054,100300073,100400037,100200193,100200170,100300066,100300007,100300200,100300045,100300076,100200055], if (attribute(catid) in [100300011,100200186,100300102,100300008,100400037,100200193,100200170,100300007,100300200,100200055], -0.0100694371, -0.0013265371), if (attribute(catid) in [0,100200130,100300014,100300058,100300166,100300165,100300005,100300121,100200234,100300004,100300126,100400142,100200192,100300065,100300212,100300122,100300127,100400079,100300169,100300074,100200028,100200067,100300006], if (attribute(catid) in [0,100300058,100300005,100300121,100200234,100300065,100300212,100300127], 0.0083645817, 0.0130866864), if (attribute(catid) in [100300077,100200068,100200053,100400080,100200087,100200176], 0.0197130039, 0.0369209199))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300077,100300166,100300143,100200186,100400141,100200052,100300093,100300102,100300005,100300027,100200234,100200053,100300004,100200054,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100300127,100400080,100200087,100300074,100300007,100300045,100300076,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300011,100300143,100200052,100300093,100200054,100200193,100300209,100300074,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300143,100300093,100200193,100300209,100300074,100200055,100200185,100300214], -0.0161446372, -0.0058395053), if (attribute(catid) in [0,100300014,100200186,100400141,100300102,100200234,100300004,100300073,100400037,100300212,100300122,100300127,100300045,100300076], if (attribute(catid) in [100300014,100200186,100400141,100300102,100200234,100300004,100300073,100400037,100300212,100300045], 0.001863977, 0.0041634423), 0.0073459177)), if (attribute(catid) in [100200130,100300058,100200068,100300032,100300116,100300121,100300126,100400142,100300065,100400079,100200170,100300169,100300066], 0.0148443276, if (attribute(catid) in [100200034,100300165,100200172], 0.0223682677, 0.0294343337))) + +if (attribute(catid) in [100300011,100300143,100300102,100200234,100300019,100400037,100300074,100300066,100300045,100200067,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300143,100200234,100300019,100200055,100300214], -0.024786788, -0.0075505833), if (attribute(catid) in [0,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100200172,100200068,100300027,100300116,100200053,100400142,100200054,100300073,100400038,100200192,100300065,100300122,100400079,100300169,100400080,100200087,100300007,100300076], if (attribute(catid) in [0,100200130,100300077,100300166,100200034,100300165,100200052,100300093,100300027,100300116,100200053,100300073,100400079,100400080,100200087,100300076], if (attribute(catid) in [100200130,100300077,100300093,100300027,100200053,100400079,100200087,100300076], 0.0019910708, 0.0076733873), if (attribute(catid) in [100300014,100400141,100200172,100400142,100300065,100300122,100300007], 0.0110065874, 0.0140312744)), if (attribute(catid) in [100200171,100300121,100300004,100300126,100200193,100300209,100300127,100200170,100200176,100200028], 0.0200792046, 0.0371851273))) + +if (attribute(catid) in [100300011,100300014,100300077,100300165,100300093,100300005,100200068,100300032,100300121,100200234,100300126,100200054,100400037,100200193,100400038,100300122,100200087,100200176,100300200,100300076,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300165,100300093,100200068,100300032,100200193,100200087,100200176,100200067,100200232,100300214], if (attribute(catid) in [100300014,100300093,100300032,100200176,100200067,100200232,100300214], -0.0238697728, -0.0068920318), -8.192848E-4), if (attribute(catid) in [0,100200171,100200130,100300166,100200034,100200186,100400141,100200052,100200172,100300027,100300116,100200053,100400142,100300073,100200192,100300212,100300127,100200170,100300074,100300066,100300007,100200028], if (attribute(catid) in [100200130,100400141,100200052,100300027,100400142,100200192,100300127,100200170,100300074,100300066], 0.0063630817, if (attribute(catid) in [100200171,100300166,100200034,100200186,100200053,100300073,100300007], 0.0102002983, 0.011707377)), if (attribute(catid) in [100300008,100300004,100300065,100400079,100300169,100400080,100200185,100300006], 0.0199785583, 0.0332135569))) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100400141,100300165,100300093,100300005,100200172,100200068,100200053,100300004,100400142,100200192,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300014,100300004,100400142,100300209,100400080,100200087,100300007,100300200,100200067,100200055,100300006,100200232], if (attribute(catid) in [100300011,100300014,100300004,100300209,100400080,100300007,100300200,100200067,100200055,100200232], -0.0226068659, -0.0049292005), if (attribute(catid) in [0,100200130,100300077,100300165,100200053,100300122,100400079,100300169,100300074,100300066,100300045,100200028], if (attribute(catid) in [100200130,100300077,100300165,100400079,100300066,100300045,100200028], 0.0024587834, 0.0048477347), 0.0100107901)), if (attribute(catid) in [100200171,100300058,100300013,100200034,100200186,100200052,100300027,100300116,100200234,100300073,100400037,100200193,100400038,100300065,100200170,100300076], if (attribute(catid) in [100300013,100200186,100300116,100200234,100300073,100200193,100400038,100300065], 0.0134952529, 0.0171978597), if (attribute(catid) in [100300143,100300121,100200176], 0.0268710783, 0.0389950015))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100200186,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300121,100200053,100300004,100300126,100200054,100400037,100200193,100400038,100200192,100300122,100300127,100400079,100200170,100300169,100200087,100300074,100300007,100300045,100200028,100300076,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300011,100200130,100300014,100300058,100300013,100300166,100200186,100300165,100300093,100300102,100300005,100200172,100300008,100200068,100200053,100300004,100200054,100400037,100400038,100200192,100300122,100400079,100200170,100200028,100300076,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300008,100300004,100200054,100400038,100200170,100200028,100300076,100300214,100300146], if (attribute(catid) in [100300058,100300013,100200054,100400038,100300214,100300146], -0.0127825599, -0.0040621004), if (attribute(catid) in [100300165,100300102,100300005,100200068,100200053,100400037,100300122,100300006], 3.831181E-4, 0.0035617568)), if (attribute(catid) in [100300077,100300169,100300074,100300045], 0.0064465003, 0.0097160619)), if (attribute(catid) in [100300143,100400141,100300032,100300027,100300116,100200234,100400142,100300073,100300065,100400080,100300066,100300200,100200185], if (attribute(catid) in [100300143,100300032,100300116,100400142,100400080,100300066,100200185], 0.0176560057, 0.0221678704), 0.0407063066)) + +if (attribute(catid) in [100300011,100300143,100300102,100300008,100200068,100300116,100300121,100200234,100200054,100300073,100400037,100200193,100300212,100300209,100300066,100300200,100200028,100300076,100200067,100300006,100200232], if (attribute(catid) in [100300011,100300143,100300102,100300116,100200234,100200054,100400037,100300212,100300209,100300066,100300200,100200067,100300006,100200232], if (attribute(catid) in [100300143,100300102,100200054,100300212,100300209,100300200,100200067,100200232], -0.031389503, -0.0136037814), -0.0032608717), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200186,100400141,100200052,100200172,100300032,100300027,100200053,100300004,100300126,100400038,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200176,100300045,100200185], if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100200052,100300032,100200053,100300126,100200192,100300065,100300122,100300127,100400079,100200176,100300045], if (attribute(catid) in [100200130,100300014,100300166,100200052,100200192,100300065,100200176], 0.0042937725, 0.0066960091), 0.011520582), if (attribute(catid) in [100200034,100300165,100300093,100300005,100200087], 0.0194805523, 0.0346934783))) + +if (attribute(catid) in [100300013,100300077,100300143,100200186,100300008,100300027,100300116,100300019,100300004,100300126,100300073,100400037,100200192,100300209,100300122,100200170,100300169,100300066,100300007,100300076,100200055,100300146], if (attribute(catid) in [100300143,100300008,100300019,100400037,100300122,100200055,100300146], -0.0346820188, if (attribute(catid) in [100300013,100300077,100300027,100300126,100300169,100300007,100300076], -0.0068202013, -6.852405E-4)), if (attribute(catid) in [0,100200171,100300011,100300058,100300166,100400141,100300165,100200172,100200068,100300032,100300121,100200234,100200053,100200054,100300127,100400079,100400080,100200087,100300074,100300045,100200185], if (attribute(catid) in [0,100200171,100300058,100300166,100400141,100300121,100200234,100200054,100400079,100200087,100300045], if (attribute(catid) in [100300058,100300166,100300121,100200234,100400079,100200087], 0.0054906177, 0.0089382511), 0.0137638882), if (attribute(catid) in [100200130,100200052,100300093,100400142,100200193,100400038,100300065,100200028], 0.0231575087, 0.0350257823))) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300166,100300143,100200186,100400141,100300102,100200172,100300008,100300032,100300027,100300121,100200234,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300045,100300076,100200055,100300006,100300146], if (attribute(catid) in [100200186,100300102,100300008,100300032,100200234,100200193,100300212,100300209,100300122,100200087,100300076,100200055,100300146], if (attribute(catid) in [100300102,100200234,100300212,100300209,100300076,100200055], -0.0272486986, -0.0121921962), if (attribute(catid) in [100200171,100200130,100300058,100300143,100400141,100300121,100400142,100400037,100200192,100400080,100300074,100300045,100300006], if (attribute(catid) in [100200171,100200192,100400080,100300074,100300045,100300006], -3.106606E-4, 0.0031181748), if (attribute(catid) in [0,100300013,100200172,100300027,100400079,100300066], 0.0075863782, 0.0110589536))), if (attribute(catid) in [100300011,100300077,100200034,100300165,100200052,100300093,100300005,100200068,100300126,100200054,100300065,100300127,100200170,100200185,100200232], if (attribute(catid) in [100300165,100200052,100300093,100300126,100200170,100200185], 0.0158814592, 0.021324049), 0.0395357198)) + +if (attribute(catid) in [100300058,100300013,100300077,100300166,100200034,100300165,100200052,100300093,100300005,100200172,100300008,100200068,100300019,100300004,100200054,100400038,100300212,100400079,100200170,100300074,100300066,100200176,100300200,100200028,100300076,100200055,100200232,100300146], if (attribute(catid) in [100300013,100300008,100200068,100300019,100400038,100200055,100200232,100300146], -0.0281903745, if (attribute(catid) in [100300058,100200034,100300165,100200052,100300005,100300004,100300212,100400079,100200170,100300074,100200176,100300200,100200028], -0.0038124936, 0.0031637671)), if (attribute(catid) in [0,100200171,100200130,100300143,100200186,100400141,100300027,100300116,100200234,100200053,100300126,100400142,100300073,100200193,100200192,100300065,100300122,100300127,100300169,100400080,100300045,100300006], if (attribute(catid) in [0,100200171,100200130,100300143,100200186,100400141,100300027,100300116,100200234,100300126,100400142,100200193,100200192,100300122,100300045,100300006], if (attribute(catid) in [100200171,100200130,100400141,100300027,100300116,100200234,100400142,100200193,100300122,100300045,100300006], 0.0097327734, 0.0117883652), 0.0163276141), 0.037807689)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300166,100300143,100200186,100400141,100300165,100300093,100300102,100300008,100200068,100300032,100300116,100300121,100200234,100200053,100300126,100400142,100300073,100200193,100200192,100300212,100300127,100400079,100400080,100200087,100300066,100300007,100200176,100300200,100300045,100200028,100200055,100200185,100300006], if (attribute(catid) in [100200171,100300011,100300013,100300102,100300008,100200068,100300032,100300116,100300121,100200234,100300126,100300127,100400079,100400080,100200176,100300045,100200028,100200055,100200185], if (attribute(catid) in [100300011,100300013,100300102,100300008,100300032,100300116,100300121,100300126,100300127,100200176,100200055,100200185], -0.0183851919, -0.0026357282), if (attribute(catid) in [100200130,100300166,100200186,100400141,100400142], 0.0029870563, if (attribute(catid) in [100300143,100200053,100300073,100200193,100200192,100200087,100300066,100300200], 0.0070772264, 0.0091929568))), if (attribute(catid) in [100300058,100300077,100200052,100300005,100200172,100300027,100300019,100300004,100400037,100400038,100300065,100200170,100300169,100300076,100200067], 0.0192905696, 0.0373801828)) + +if (attribute(catid) in [100200171,100200130,100300014,100300013,100200186,100300165,100200052,100300102,100300008,100200068,100300027,100300116,100300004,100200192,100300065,100300212,100300122,100300127,100200170,100300074,100200176,100300045,100300076,100200055,100200185,100300214], if (attribute(catid) in [100300102,100300008,100300212,100200170,100300074,100200055,100300214], -0.0234366968, if (attribute(catid) in [100300014,100300013,100200186,100300165,100200068,100300116,100300127,100300076,100200185], -0.0041768475, 0.0012159251)), if (attribute(catid) in [0,100300011,100300058,100300077,100200034,100400141,100200172,100300032,100300121,100200234,100300126,100400142,100300073,100400037,100400079,100300169,100200087,100300200,100200028,100300006], if (attribute(catid) in [0,100300077,100200034,100200172,100200234,100300073,100400037,100300169,100300200,100200028], if (attribute(catid) in [100300077,100200172,100200234,100400037,100300169,100300200], 0.0073379486, 0.0101628542), 0.0145361756), if (attribute(catid) in [100300166,100300093,100300005,100400080,100300066,100200232], 0.0211367281, 0.0344431588))) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300143,100200034,100400141,100300165,100300093,100200172,100300032,100300027,100300116,100200053,100300004,100300126,100200054,100400037,100200193,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300143,100300093,100200053,100200054,100400037,100300007,100200176,100200067,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300093,100400037,100300007,100200176,100200067,100200055,100300214,100300146], -0.0290230654, -0.0106383395), if (attribute(catid) in [100200034,100300165,100200172,100300116,100300126,100400079,100200170,100400080,100300074,100300045,100300006], -0.0013386003, if (attribute(catid) in [0,100300027,100200087,100300066], 0.0034896642, 0.0060074974))), if (attribute(catid) in [100200171,100300058,100300166,100200186,100200052,100300005,100200068,100300121,100400142,100300073,100200192,100300065,100300122,100300127,100300169,100300200], if (attribute(catid) in [100200186,100200052,100300005,100400142,100300073,100300122,100300169], 0.0118090198, 0.018597715), if (attribute(catid) in [100300008,100200234,100400038], 0.0272065563, 0.0476478756))) + +if (attribute(catid) in [100300011,100300013,100200034,100400141,100200052,100300005,100300008,100200068,100300027,100300116,100200234,100300019,100300004,100400142,100300073,100400038,100300209,100300127,100400079,100200170,100300066,100300007,100200176,100300045,100300076,100200055,100300214,100300146], if (attribute(catid) in [100300013,100300008,100300019,100300209,100200176,100200055,100300214,100300146], -0.0323268404, if (attribute(catid) in [100300011,100200052,100300005,100200068,100300004,100300066,100300007], -0.0057248097, if (attribute(catid) in [100200234,100400142,100300073,100400079,100300076], -0.0010770901, 0.0026132947))), if (attribute(catid) in [0,100200130,100300077,100300166,100200186,100300165,100200172,100300121,100200053,100300126,100200193,100200192,100300065,100300122,100300169,100400080,100300074], if (attribute(catid) in [100200130,100300166,100200186,100300165,100200172,100200053,100300126,100300169,100300074], 0.0070056177, 0.0111605097), if (attribute(catid) in [100200171,100300014,100300143,100200054,100200087,100300200,100200028], 0.0189654625, 0.0388492541))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300077,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300027,100300116,100300121,100200053,100300019,100400142,100300073,100400037,100200192,100300065,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100300045,100200028,100200067,100200185,100200232,100300146], if (attribute(catid) in [100300011,100300058,100300102,100300005,100300027,100200053,100300019,100300066,100300007,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300011,100300102,100300005,100300019,100300007,100200028,100200067,100200232,100300146], -0.0351424027, -0.0101917869), if (attribute(catid) in [0,100300014,100300165,100300093,100300116,100300073,100300127,100200170,100300169,100400080,100300045,100200185], if (attribute(catid) in [0,100300093,100300116,100300073,100200170,100400080,100300045,100200185], 0.0023576953, 0.0043646024), if (attribute(catid) in [100200171,100200052,100200172,100400142], 0.0064763216, 0.0093043399))), if (attribute(catid) in [100200130,100300013,100300166,100300143,100200034,100200186,100300008,100300032,100200234,100300004,100300126,100200054,100400038,100300122,100200176,100300200,100300006], if (attribute(catid) in [100200130,100300166,100300143,100300032,100300004,100300126,100200054,100400038,100300122,100200176,100300200], 0.015361415, 0.0203450573), 0.0370146837)) + +if (attribute(catid) in [100200171,100300011,100200034,100200186,100300093,100300102,100300008,100300032,100300121,100300126,100300122,100200087,100300200,100300076,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300093,100300008,100300032,100300076,100200067,100200055,100200185,100300214,100300146], -0.0283721448, if (attribute(catid) in [100200171,100300011,100200034,100200186,100300102,100300200,100300006], -0.0097302345, -0.0016235117)), if (attribute(catid) in [0,100200130,100300058,100300013,100300143,100400141,100300165,100200052,100300005,100200068,100300027,100200053,100300004,100400142,100300073,100400037,100200193,100200192,100300212,100300127,100200170,100300169,100300074,100300066,100300045,100200028,100200232], if (attribute(catid) in [0,100300058,100300013,100300143,100400141,100200052,100200068,100200053,100300073,100400037,100200193,100200192,100300127,100200170,100300074,100300045], if (attribute(catid) in [100300058,100200052,100200068,100200053,100300073,100400037,100200193,100200192,100300127,100200170,100300074,100300045], 0.0055504107, 0.0071774828), 0.0090674985), if (attribute(catid) in [100300014,100300077,100300166,100200054,100400038,100300065], 0.0138239288, if (attribute(catid) in [100200172,100200234,100400079,100400080], 0.0189710965, 0.0320267193)))) + +if (attribute(catid) in [100200171,100300014,100300058,100300013,100300166,100400141,100200172,100300008,100200068,100300121,100200234,100200053,100300019,100300004,100300126,100400037,100200192,100300212,100300209,100300127,100400080,100200087,100300074,100200176,100300076,100200055,100300214], if (attribute(catid) in [100300014,100300058,100300013,100300008,100200068,100200053,100300019,100300212,100300209,100300127,100400080,100200087,100200055,100300214], if (attribute(catid) in [100300013,100300008,100300019,100300212,100200087,100200055,100300214], -0.0279752223, -0.0097745433), if (attribute(catid) in [100200171,100400141,100200234,100300004,100400037,100300074,100200176,100300076], -0.0015465151, 0.0023515763)), if (attribute(catid) in [0,100200130,100300143,100200034,100200052,100300027,100400142,100200054,100300073,100200193,100400038,100300065,100400079,100300169,100300045,100200028], if (attribute(catid) in [100200130,100300143,100200034,100300027,100400142,100200054,100300073,100400038,100300065,100400079,100300169], 0.0070041173, 0.0113800549), if (attribute(catid) in [100300077,100200186,100300165,100300102,100300116,100300122,100200170,100300066,100200185,100200232], 0.0215968499, 0.0448418659))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100200186,100300165,100300093,100300102,100300008,100300032,100300027,100300116,100200234,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300122,100300127,100400080,100300007,100300200,100300045,100200028,100300076,100200185,100300006,100200232], if (attribute(catid) in [100300011,100200130,100300014,100300058,100300013,100300032,100300065,100300007,100300200,100300045,100200028,100300076,100200185,100300006], -0.007403056, if (attribute(catid) in [0,100200186,100300165,100300102,100300008,100300027,100300116,100200234,100400037,100200192,100300122,100400080], if (attribute(catid) in [100300165,100300008,100300027,100200234,100400037,100200192,100300122,100400080], 0.0018774622, 0.0037801837), 0.0085665512)), if (attribute(catid) in [100200034,100400141,100200052,100200172,100200054,100400038,100400079,100200170,100300169,100200087,100200067], if (attribute(catid) in [100200034,100400141,100400038,100400079,100200087,100200067], 0.0118341371, 0.0154556164), if (attribute(catid) in [100200068,100200053,100300126,100300212,100300074,100300066], 0.0234779795, 0.0334980927))) + +if (attribute(catid) in [100200171,100300058,100300013,100300143,100200186,100200052,100300102,100300008,100300116,100300121,100200234,100300019,100300004,100200054,100200193,100400038,100300212,100300209,100300127,100200170,100200176,100200067,100200055], if (attribute(catid) in [100300013,100300102,100200234,100400038,100300212,100300209,100200176,100200067,100200055], if (attribute(catid) in [100300013,100300102,100400038,100200176,100200067,100200055], -0.0390121048, -0.0143867192), if (attribute(catid) in [100300143,100200186,100300008,100300116,100300121,100300004,100200054,100200193], -0.0044006932, 0.0015511826)), if (attribute(catid) in [0,100200130,100300077,100300166,100200034,100400141,100300165,100300093,100300005,100200172,100200068,100300027,100200053,100400142,100300073,100200192,100300065,100300122,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100300200,100200028,100300076,100200185,100200232], if (attribute(catid) in [100200130,100300077,100300166,100200034,100400141,100300165,100300005,100200172,100200068,100200053,100400142,100300073,100300065,100400079,100300169,100400080,100200087,100300074,100200185,100200232], if (attribute(catid) in [100200130,100400141,100200068,100400142,100300073,100300065,100400079,100300169,100400080,100300074,100200185], 0.0060636247, 0.0088886465), if (attribute(catid) in [0,100300093,100300007], 0.0120697556, 0.0177577497)), 0.0377473017)) + +if (attribute(catid) in [100300014,100300077,100200034,100300165,100300093,100300005,100300008,100200053,100300004,100300126,100400142,100300212,100300209,100300169,100300200,100300076,100200067,100200185,100300146], if (attribute(catid) in [100200034,100300093,100300005,100300004,100300126,100400142,100300209,100300076,100200067,100200185,100300146], -0.0137318062, -0.001492581), if (attribute(catid) in [0,100300011,100200130,100300058,100300166,100200186,100400141,100200068,100300027,100200234,100200054,100300073,100200193,100200192,100300065,100300122,100400079,100200170,100400080,100300074,100200028], if (attribute(catid) in [0,100300058,100200186,100400141,100200193,100200192,100400080,100200028], 0.0060199161, 0.0102196174), if (attribute(catid) in [100200171,100200052,100200172,100300032,100300116,100400038,100200087,100300066], 0.0151193734, 0.0263768697))) + +if (attribute(catid) in [0,100300058,100200034,100200052,100300093,100300102,100300005,100300008,100300027,100300004,100300126,100400142,100300073,100300212,100400079,100400080,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100300146], if (attribute(catid) in [100200034,100200052,100300102,100300008,100300027,100300126,100300007,100200176,100200067,100200185,100300146], if (attribute(catid) in [100300102,100300008,100300027,100300126,100200067,100200185,100300146], -0.0197844476, -0.0061589619), if (attribute(catid) in [100300058,100400142,100300212,100400079,100400080,100300200,100300045,100300076], -9.327577E-4, 0.0041966425)), if (attribute(catid) in [100200171,100200130,100300077,100300166,100300143,100200186,100400141,100300165,100200172,100200068,100300032,100300116,100300121,100200053,100400038,100200192,100300065,100200170,100300169,100300074,100300066,100200028,100300006], if (attribute(catid) in [100200171,100200130,100300077,100400141,100300165,100200172,100300032,100300116,100300121,100300065,100200170], 0.0090658634, 0.0131790639), if (attribute(catid) in [100300011,100300013,100400037,100300122,100300127,100200087], 0.0204665481, 0.0306124846))) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300077,100300166,100200052,100300005,100200172,100300008,100300116,100200234,100400142,100200054,100300073,100400037,100200193,100200192,100300212,100300209,100300122,100300127,100200170,100300169,100300074,100300007,100200176,100300200,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200130,100300013,100300077,100300005,100300008,100300116,100200234,100200054,100300073,100200193,100300212,100300209,100300122,100200170,100300007,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300116,100200234,100200054,100200193,100300212,100300209,100300007,100200055,100200185,100300006,100200232,100300214,100300146], -0.0108827313, -5.017785E-4), if (attribute(catid) in [0,100300058,100300166,100200172,100400037,100200176], if (attribute(catid) in [100300058,100300166,100200172,100400037], 0.0023128525, 0.0053288841), 0.0080470055)), if (attribute(catid) in [100300014,100300143,100200034,100200186,100400141,100300165,100200068,100300032,100300027,100300121,100200053,100300004,100300065,100400079,100200087,100300066], if (attribute(catid) in [100300014,100300143,100200034,100400141,100300165,100300032,100200053,100300004,100300065,100400079,100300066], 0.0118541533, 0.0171167195), if (attribute(catid) in [100300093,100300126,100400038,100400080], 0.0252859922, 0.0427399285))) + +if (attribute(catid) in [100200171,100300011,100200130,100300058,100300166,100200034,100200186,100400141,100300093,100300005,100300008,100200068,100300116,100200053,100300126,100400142,100200054,100300073,100200193,100300065,100400079,100300169,100400080,100300074,100300007,100200176,100300045,100200028,100200067,100300006,100300214,100300146], if (attribute(catid) in [100300011,100300058,100200034,100200186,100300005,100300008,100200054,100400080,100300074,100300007,100200176,100200067,100300214,100300146], if (attribute(catid) in [100200034,100200054,100400080,100300074,100300007,100200067,100300214,100300146], -0.0139396834, -0.0053561842), if (attribute(catid) in [100200171,100200130,100300093,100200068,100200053,100300126,100400142,100400079,100300045,100300006], -0.0012208885, 0.0034171063)), if (attribute(catid) in [0,100300014,100300013,100300143,100300165,100200052,100300102,100200172,100300032,100300121,100400037,100400038,100200192,100300122,100300127,100200170,100200087,100300066,100300200], if (attribute(catid) in [0,100300143,100300165,100200052,100300102,100300032,100300121,100400037,100400038,100300122,100300127,100200170], if (attribute(catid) in [100300143,100200052,100300102,100300032,100400037,100400038,100200170], 0.0071786267, 0.0105454236), 0.0151332724), 0.0271687303)) + +if (attribute(catid) in [0,100300011,100200130,100300058,100300013,100300077,100300166,100300143,100200034,100200052,100200172,100300032,100300121,100300019,100400142,100300073,100200193,100400038,100300065,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300007,100200176,100200028,100300076,100200067,100300006], if (attribute(catid) in [100300011,100200034,100300019,100200193,100400038,100300209,100300122,100300007,100200176,100200028,100200067,100300006], -0.0107750803, if (attribute(catid) in [100400142,100300073,100400079,100400080,100300076], 4.14432E-5, if (attribute(catid) in [0,100200130,100300077,100300166,100300143,100200172], 0.0037799336, 0.0062373044))), if (attribute(catid) in [100200171,100200186,100300165,100300102,100300005,100300027,100300116,100200234,100300004,100300126,100200054,100400037,100200170,100300074,100300066], if (attribute(catid) in [100200171,100200186,100300165,100200234,100300074], 0.010552703, 0.0153207486), if (attribute(catid) in [100300014,100300093,100200068,100200053,100200192,100300200,100200232], 0.0240361458, 0.0348297568))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300166,100200034,100200186,100400141,100300165,100200052,100200172,100200068,100300032,100300027,100300116,100300121,100200053,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200130,100300014,100300058,100200034,100300165,100200068,100200054,100300209,100200170,100400080,100300074,100300007,100200176,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300058,100200068,100200054,100300209,100200176,100200055,100200232,100300146], -0.0154543001, if (attribute(catid) in [100200034,100300165,100200170,100300007,100300006], -0.0053315859, -8.526404E-4)), if (attribute(catid) in [0,100300011,100300013,100400141,100200052,100200172,100300121,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300122,100300127,100400079,100300066,100300200,100300045,100200028,100300076], if (attribute(catid) in [100300011,100300013,100400141,100300121,100400142,100400037,100200193,100200192,100300065,100300122,100300127], 0.002970819, 0.0066974843), if (attribute(catid) in [100200171,100300027,100200053,100300169], 0.0109293572, 0.0151670383))), if (attribute(catid) in [100300077,100300093,100300102,100300005,100300008,100200087], 0.0226972124, 0.0401505411)) + +if (attribute(catid) in [100200130,100300014,100300077,100200034,100300165,100300102,100300032,100300116,100300121,100200234,100200053,100400038,100300212,100300127,100400080,100200087,100300074,100300007,100200028,100200067,100200055,100200185,100300006], if (attribute(catid) in [100300014,100300032,100400038,100300212,100200087,100300007,100200055,100200185], -0.0294546465, if (attribute(catid) in [100300165,100300121,100200234,100300127,100400080,100200028,100200067,100300006], -0.0041076181, -3.32312E-4)), if (attribute(catid) in [0,100300011,100300058,100300166,100200186,100300093,100300005,100200172,100300008,100200068,100300073,100400037,100200192,100300122,100400079,100300169,100300066,100200176,100300076], if (attribute(catid) in [0,100300011,100300058,100300093,100300005,100200172,100200068,100300073,100200192,100400079,100300169,100300076], if (attribute(catid) in [100300011,100300058,100300093,100300005,100200172,100200068,100300073,100200192,100400079,100300169], 0.006046367, 0.0075872041), 0.0099717066), if (attribute(catid) in [100200171,100400141,100200052,100300004,100300126,100400142,100200054,100300065,100200170], 0.0147808083, 0.0271833064))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100200172,100300008,100300032,100300027,100300121,100200234,100200053,100300019,100300004,100300073,100400037,100400038,100200192,100300065,100300212,100300122,100300127,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300200,100200028,100200067], if (attribute(catid) in [100300014,100300013,100300077,100300143,100200186,100300093,100300102,100300008,100300032,100200053,100300019,100400037,100400038,100300212,100300007,100200028,100200067], if (attribute(catid) in [100200186,100300032,100300019,100300212,100200067], -0.0346775988, -0.0084286391), if (attribute(catid) in [100200034,100400141,100200052,100200172,100200234,100300073,100300065,100200170,100200087,100300074,100200176], 0.0021428485, if (attribute(catid) in [0,100200171,100200130,100300166,100300165,100300027,100200192,100300122,100300169,100300066,100300200], 0.006946403, 0.0110205277))), if (attribute(catid) in [100300058,100200068,100300116,100300126,100400142], 0.0228118568, 0.0343825826)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300143,100200186,100400141,100200052,100300093,100300102,100300005,100200068,100300032,100300027,100300121,100200234,100200053,100300126,100400142,100300073,100400037,100200192,100300065,100300209,100300127,100400079,100200170,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100200028,100300076,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300011,100200186,100300102,100300005,100200068,100300032,100300121,100200053,100300126,100400037,100300209,100300007,100300045,100200028,100300076,100200055,100200185,100300006], if (attribute(catid) in [100300011,100300102,100300005,100300121,100300126,100400037,100300209,100300076,100200055,100200185,100300006], -0.0217391763, -0.0085505926), if (attribute(catid) in [100200171,100200130,100300014,100300143,100400141,100200052,100300027,100200234,100300073,100200192,100300127,100200170,100200087,100300074,100300066,100200176], if (attribute(catid) in [100200171,100200130,100300143,100200052,100200170,100300074,100200176], 1.581819E-4, 0.0028254989), 0.0069841416)), if (attribute(catid) in [100300013,100300166,100300165,100200172,100300008,100300116,100300004,100200054,100400038,100300212,100300122,100300169,100300200,100200067], if (attribute(catid) in [100200172,100300116,100300004,100300122,100300169,100300200], 0.0144252893, 0.0197908458), 0.04284419)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100300143,100200186,100400141,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100300019,100300126,100300073,100400037,100200193,100200192,100300209,100300127,100400079,100200170,100300169,100300066,100300007,100200176,100300200,100200028,100300076,100200185,100300006,100300146], if (attribute(catid) in [100300011,100300014,100300143,100300005,100300027,100200234,100300126,100300209,100300066,100300007,100200176,100300200,100200185,100300146], if (attribute(catid) in [100300011,100300005,100300126,100300209,100300007,100200176,100200185], -0.0278752616, -0.0114872053), if (attribute(catid) in [100200171,100300058,100300077,100300166,100200172,100300008,100200068,100300116,100200193,100400079], if (attribute(catid) in [100300058,100200172,100200068,100300116,100400079], -0.0025940117, 5.790855E-4), if (attribute(catid) in [0,100400141,100300019,100300073,100200192,100300076], 0.005097188, 0.0074013229))), if (attribute(catid) in [100200034,100300093,100300121,100200053,100300004,100400142,100200054,100400038,100300065,100300122,100400080,100300074], if (attribute(catid) in [100300093,100300121,100200053,100300004,100400142,100300065,100400080], 0.0112491051, 0.0165554655), if (attribute(catid) in [100300165,100200087,100200232], 0.025263808, 0.039519728))) + +if (attribute(catid) in [0,100200130,100300058,100300013,100200034,100400141,100300165,100300093,100300102,100200172,100300008,100200068,100300032,100300116,100300121,100200234,100200053,100300004,100300126,100400142,100300073,100200193,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100200087,100300066,100300200,100300045,100200028,100300076,100200067,100200055], if (attribute(catid) in [100300093,100300008,100300032,100300121,100200234,100300004,100300212,100300169,100200087,100300045,100200067,100200055], if (attribute(catid) in [100300032,100200234,100300212,100200087,100200055], -0.0204195189, -0.0097651462), if (attribute(catid) in [0,100200130,100300058,100300165,100300126,100400142,100200192,100400079,100300066,100300200,100200028,100300076], 0.0029821664, 0.007455649)), if (attribute(catid) in [100200171,100300077,100300166,100200186,100300027,100200054,100400038,100300065,100300122,100300007,100200232], 0.0123063395, if (attribute(catid) in [100300014,100200052,100300005,100400037,100400080], 0.021957365, 0.0390618799))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100300121,100200053,100300019,100300126,100200054,100300073,100400037,100200193,100200192,100300122,100400079,100200170,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300166,100300143,100400141,100300093,100300005,100200172,100200068,100300116,100300121,100300019,100300126,100200054,100300073,100200193,100200087,100300074,100300007,100300200,100300045,100200028,100200067,100200232,100300214], if (attribute(catid) in [100300143,100400141,100300116,100300019,100300126,100200054,100300074,100200067,100200232,100300214], -0.0084514959, -0.0026252948), if (attribute(catid) in [0,100200130,100300013,100200034,100300027,100200053,100400037,100200192,100200170,100300169,100300006], 0.0047022442, 0.0075241336)), if (attribute(catid) in [100300032,100200234,100300004,100400142,100400038,100300065,100300212,100300127,100400080,100300076], 0.0167624654, if (attribute(catid) in [100300165], 0.0213460841, 0.0385220165))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100200068,100300032,100300027,100300116,100200234,100200053,100300126,100200054,100300073,100200193,100400038,100200192,100300065,100300209,100300122,100300127,100400079,100300169,100400080,100200087,100300066,100300007,100200176,100300045,100200028,100200055,100300006], if (attribute(catid) in [100300011,100300166,100300143,100300093,100200068,100300032,100200054,100400038,100300209,100300127,100400079,100200087,100200176,100300045,100200028,100200055,100300006], if (attribute(catid) in [100200068,100300032,100300209,100200176,100200028,100200055], -0.0269726448, -0.0065892436), if (attribute(catid) in [0,100200171,100200130,100300058,100300005,100200172,100300027,100300116,100200053,100300073,100200193,100200192,100300065,100300122,100300169,100400080,100300066,100300007], if (attribute(catid) in [100200171,100200130,100300005,100200172,100300027,100300073,100200193,100200192,100300122,100300169,100300007], 0.0012062164, 0.0058063938), 0.011591703)), if (attribute(catid) in [100300014,100300121,100300004,100400142,100200170,100300074,100300076,100200067,100300146], 0.0227008484, 0.0502957296)) + +if (attribute(catid) in [0,100200171,100300013,100300166,100200186,100300093,100200172,100300008,100300032,100300027,100300116,100200234,100300004,100400142,100200193,100400038,100300212,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300066,100300007,100300200,100300045,100200028,100300006,100200232], if (attribute(catid) in [100300013,100300032,100200193,100300212,100300127,100300007], -0.031900249, if (attribute(catid) in [100200171,100300166,100300093,100200172,100300008,100200234,100300004,100400142,100400038,100300169,100200087,100300066,100300200,100300045,100300006,100200232], if (attribute(catid) in [100300166,100300093,100300008,100200234,100300004,100400142,100200087,100300066,100300200,100300045,100300006], -0.0040077306, 5.434632E-4), 0.004429589)), if (attribute(catid) in [100200130,100300058,100300077,100300143,100400141,100300165,100300102,100300005,100300121,100200054,100300073,100200192,100300065,100300209,100200176,100300076,100200185], if (attribute(catid) in [100300058,100300077,100300102,100300005,100300073,100300065,100300209], 0.0088697865, 0.0120553652), if (attribute(catid) in [100200034,100200052,100200068,100200053,100300126], 0.0200329832, 0.0315067216))) + +if (attribute(catid) in [100300011,100200130,100300058,100300013,100300165,100200052,100300005,100200172,100300008,100200068,100300027,100300116,100200234,100200053,100300126,100400142,100200054,100300073,100400037,100300122,100300127,100200170,100300169,100300074,100300007,100300045,100200028,100200185,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300165,100200052,100300008,100200054,100300074,100300007,100200185,100300146], if (attribute(catid) in [100300011,100300013,100300008,100200054,100300007,100300146], -0.0282159404, -0.0102832725), if (attribute(catid) in [100200068,100300116,100200234,100300126,100300073,100400037,100300122,100300127,100300045,100200028], -0.0021058619, 0.0025428121)), if (attribute(catid) in [0,100200171,100300014,100300077,100300166,100300143,100200186,100400141,100300032,100300121,100300004,100400038,100300065,100400079,100300066,100200067,100300006], if (attribute(catid) in [0,100300014,100300077,100200186,100300032,100300121,100300004,100400038,100300066,100300006], 0.009136363, 0.0165605827), if (attribute(catid) in [100200034,100200192,100200087,100200176,100300200], 0.0258553552, 0.0438879554))) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100300143,100200034,100400141,100200052,100300093,100300102,100300005,100300008,100300121,100200053,100300004,100300126,100400142,100400037,100200193,100400038,100200192,100300065,100300212,100300122,100300127,100400079,100200170,100400080,100300066,100200176,100200028,100200067,100200055,100300146], if (attribute(catid) in [100300011,100200130,100300014,100300143,100200034,100300093,100300008,100300121,100200053,100300004,100200193,100400038,100200192,100300212,100400079,100200170,100200176,100200067,100200055,100300146], if (attribute(catid) in [100300011,100300143,100300121,100300212,100200067,100200055,100300146], -0.0232281903, -0.0044814242), if (attribute(catid) in [100300166,100200052,100300126,100400142,100400037,100300122,100400080,100200028], 0.0018401795, 0.0038857825)), if (attribute(catid) in [100200171,100200186,100300165,100200172,100200068,100300027,100300116,100200234,100300019,100300073,100300169,100200087,100300074,100300200,100300076,100200232], if (attribute(catid) in [100200186,100300165,100200172,100200068,100300027,100200234,100300073,100200087,100300200], 0.0097089357, 0.0155015911), 0.0416491361)) + +if (attribute(catid) in [100300011,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300032,100300027,100300121,100200053,100300019,100300004,100200054,100200193,100400038,100300127,100300169,100300074,100300007,100200176,100200028,100200232,100300214,100300146], if (attribute(catid) in [100300011,100200186,100300093,100300005,100300027,100300019,100200054,100400038,100300007,100200028,100200232,100300214,100300146], -0.0086481469, if (attribute(catid) in [100300165,100200172,100300121,100200193,100300074], -0.0026659712, 1.319612E-4)), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100300166,100300143,100200034,100200068,100300116,100200234,100300126,100400142,100300073,100400037,100200192,100300065,100300212,100300122,100400079,100200170,100400080,100300066,100300200,100300006], if (attribute(catid) in [0,100300058,100300077,100200034,100200234,100300073,100400037,100200192,100300065,100300122,100400079,100400080,100300066], 0.0058612042, 0.0117793447), 0.0334049996)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100200172,100200068,100300032,100300027,100300121,100200234,100200053,100300019,100300004,100400142,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200067,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300014,100300166,100300143,100400141,100200052,100300102,100200068,100300019,100300004,100200054,100400037,100400038,100300212,100300122,100200170,100200087,100200067,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300143,100300102,100300019,100200054,100400038,100300212,100200067,100200055,100200185,100300214], -0.025649727, -0.0043110057), if (attribute(catid) in [0,100300011,100200130,100200034,100300165,100200172,100300032,100300121,100200053,100300073,100200193,100200192,100300209,100400079,100300169,100300074,100300066,100300007], if (attribute(catid) in [100300011,100200130,100200034,100200053,100300073,100300169,100300066,100300007], 0.0027424259, 0.0069640136), 0.0123173309)), if (attribute(catid) in [100300058,100300077,100300005,100300116,100300126,100300200,100200028,100300076], 0.0267885216, 0.0575708735)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200186,100400141,100200052,100300102,100300005,100200172,100200068,100300032,100300027,100300116,100300121,100200234,100200053,100400142,100300073,100400038,100300065,100300209,100300122,100300127,100400079,100200170,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100200028,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300011,100300143,100200186,100200068,100200234,100200053,100300209,100200087,100300066,100300045,100200055,100200185,100300006], if (attribute(catid) in [100300011,100200234,100300209,100300066,100200055,100200185], -0.0209712583, -0.0093038797), if (attribute(catid) in [100300077,100200052,100300102,100200172,100300121,100400142,100300122,100200170,100300074,100300007,100200176,100200232], 1.606661E-4, if (attribute(catid) in [0,100200130,100300166,100300005,100300073,100300065,100400079,100300200], 0.0052908982, 0.0074959812))), if (attribute(catid) in [100300165,100300093,100400037,100200192,100300169,100400080,100300076], 0.0146939559, 0.0295044733)) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300013,100300077,100200034,100200186,100400141,100300093,100300005,100300008,100300027,100300116,100300121,100200234,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100300065,100300212,100300122,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100300045,100300076,100200055,100300214], if (attribute(catid) in [100300013,100300093,100300005,100200234,100300004,100200193,100400038,100300212,100300122,100300169,100200087,100300074,100300200,100300045,100300076,100200055,100300214], if (attribute(catid) in [100200234,100300004,100300212,100300200,100200055,100300214], -0.0225234294, -0.0073428723), if (attribute(catid) in [100200130,100300077,100400141,100300027,100300126,100200054,100300073,100400037,100300065,100400080,100200176], -7.043255E-4, 0.0046211972)), if (attribute(catid) in [100300058,100300166,100300143,100200052,100200172,100300032,100200192,100300209,100200170,100200232,100300146], 0.0127794955, if (attribute(catid) in [100300165,100200053,100300019], 0.0212698998, 0.0283454496))) + +if (attribute(catid) in [0,100300014,100300013,100300077,100200186,100400141,100200172,100300116,100300004,100300126,100400142,100200054,100200192,100300212,100300209,100400079,100200170,100300169,100400080,100300045,100200028,100300076,100200067,100300006,100300146], if (attribute(catid) in [100300126,100400142,100200054,100300212,100300045,100200067,100300146], if (attribute(catid) in [100300126,100300212,100200067,100300146], -0.0423375801, -0.0094881961), if (attribute(catid) in [0,100300014,100300013,100200186,100400141,100300116,100300004,100300209,100400079,100200028,100300076,100300006], -0.0010068479, 0.0043349463)), if (attribute(catid) in [100200171,100200130,100300058,100300166,100200034,100300165,100300093,100300005,100200068,100300032,100300027,100300121,100200234,100200053,100300073,100300065,100300122,100300127,100200176], if (attribute(catid) in [100200130,100300166,100300165,100300093,100300121,100200053,100300073,100300065,100300122,100300127], if (attribute(catid) in [100300166,100300121,100300073,100300065,100300122,100300127], 0.0078139636, 0.0103953208), 0.0132055533), if (attribute(catid) in [100300011,100200052,100300102,100300008,100400037,100200193,100400038,100200087,100300074], 0.0206601511, 0.0355590743))) + +if (attribute(catid) in [100200171,100200130,100300166,100300143,100200034,100400141,100300093,100300102,100300005,100300008,100200068,100300116,100300073,100200193,100200192,100300209,100300122,100300169,100400080,100200087,100300074,100300066,100300045,100200028,100200067], if (attribute(catid) in [100400141,100300102,100300008,100200193,100300209,100400080,100300066,100300045,100200028,100200067], -0.0141770941, if (attribute(catid) in [100200171,100200130,100300166,100200034,100300005,100200068,100200192,100200087,100300074], -0.0034615278, 0.0015215338)), if (attribute(catid) in [0,100300014,100300058,100300013,100300077,100300165,100200052,100200172,100300032,100300121,100200053,100300004,100300126,100400142,100200054,100400038,100300065,100400079,100200176,100300076,100200185,100300006], if (attribute(catid) in [0,100300014,100300058,100300013,100300165,100300126,100200054,100400079,100300076,100200185,100300006], if (attribute(catid) in [100300014,100300013,100300165,100300126,100200054,100400079,100300076,100200185,100300006], 0.0055004027, 0.0079799037), 0.0113299928), 0.0216285145)) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300143,100200034,100400141,100200052,100300093,100300102,100200172,100200068,100300027,100300121,100200234,100200053,100300019,100300126,100400142,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300122,100300127,100400079,100300169,100400080,100300007,100200176,100300200,100300045,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300058,100300013,100300143,100200034,100200052,100300093,100300102,100200053,100300019,100200054,100200193,100400080,100300007,100300200,100300045,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300143,100300093,100300102,100300019,100200193,100300007,100200055,100300006,100200232,100300214,100300146], -0.021450023, -0.0079398906), if (attribute(catid) in [0,100300077,100400141,100200068,100300027,100300121,100300126,100400142,100300073,100400037,100300065,100300212,100300127,100400079,100300169], if (attribute(catid) in [100300077,100200068,100300027,100300121,100300073,100300127,100400079], 0.0017634542, 0.0044214643), 0.0078134161)), if (attribute(catid) in [100200130,100300166,100200186,100300032,100400038,100300209,100200170,100200087], 0.013392468, 0.0239045512)) + +if (attribute(catid) in [100300013,100300165,100300102,100300008,100200068,100300116,100300121,100200053,100200054,100200192,100300209,100300122,100200170,100400080,100200087,100300066,100200176,100300200,100300045,100300076,100200067,100300214], if (attribute(catid) in [100300013,100300102,100300008,100300116,100300121,100200176,100300200,100300076,100200067,100300214], -0.0229996772, -0.0059777362), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200052,100300027,100300004,100300126,100300073,100200193,100300065,100300212,100300127,100400079,100300169,100300074,100200028], if (attribute(catid) in [100200130,100300014,100300166,100300004,100300065,100300212,100400079,100300169,100300074], 0.003077496, if (attribute(catid) in [0,100300027,100300126,100300127], 0.0063783302, 0.0094067973)), if (attribute(catid) in [100200186,100400141,100200172,100300032,100200234,100300019,100400142,100400037,100300006,100300146], 0.0137033492, 0.0253895093))) + +if (attribute(catid) in [0,100300014,100300058,100300013,100300166,100300143,100300165,100200172,100200068,100300116,100300121,100200053,100300004,100400038,100300065,100300212,100200170,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300045,100300214], if (attribute(catid) in [100300013,100300121,100400038,100300212,100200087,100300045,100300214], if (attribute(catid) in [100300013,100300212,100300214], -0.0545034219, -0.0112857727), if (attribute(catid) in [100300014,100300143,100300165,100200068,100200053,100300065,100200170,100300074,100300007], -0.0011777575, if (attribute(catid) in [0,100200172,100300116,100200176], 0.0034598533, 0.0071870285))), if (attribute(catid) in [100200171,100300011,100200130,100300077,100200034,100200186,100400141,100200052,100300093,100300102,100300027,100200234,100300126,100400142,100200054,100300073,100400037,100200192,100300122,100300127,100400079,100300200,100200028], if (attribute(catid) in [100200130,100300077,100200186,100400141,100300102,100200234,100300126,100400142,100200054,100300073,100200192,100300122,100400079,100300200,100200028], 0.0113718649, if (attribute(catid) in [100200171,100300011,100300027,100400037], 0.0157280324, 0.0222069557)), 0.0489348021)) + +if (attribute(catid) in [100300011,100300014,100300058,100300013,100300166,100200034,100400141,100300165,100200052,100300093,100300102,100300008,100300027,100200053,100300019,100300126,100400142,100300073,100400038,100300209,100200170,100400080,100200087,100300007,100300200,100300076,100200067,100200055,100300146], if (attribute(catid) in [100300008,100300019,100300126,100300007,100300200,100200055,100300146], -0.0393976544, if (attribute(catid) in [100300011,100300014,100300058,100300013,100300166,100200034,100300093,100300102,100400038,100300209,100200170,100400080,100300076,100200067], -0.0070993241, -7.351884E-4)), if (attribute(catid) in [0,100200171,100200130,100300077,100200186,100200172,100200234,100200054,100400037,100300122,100300127,100400079,100300074,100300066,100200028,100200232], if (attribute(catid) in [100200171,100300077,100200234,100400079,100300066,100200028], 0.0040677589, 0.0073363935), if (attribute(catid) in [100300143,100300005,100200068,100300116,100200192,100300065,100300169,100200176], 0.0152680762, 0.0386360428))) + +if (attribute(catid) in [100200171,100300143,100400141,100300093,100300005,100300008,100200068,100300027,100300121,100300126,100200054,100400038,100200192,100300212,100300122,100300007,100200176,100300045,100200028,100300076,100200067,100300006,100200232,100300214], if (attribute(catid) in [100300093,100300027,100300212,100200176,100200067,100200232,100300214], -0.0439825609, if (attribute(catid) in [100300143,100400141,100300005,100300008,100300121,100300126,100200192,100300007,100300045,100300076,100300006], -0.0078490055, -0.0029548934)), if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100300166,100200034,100200186,100200052,100200172,100300032,100300116,100200234,100200053,100300004,100400142,100300073,100200193,100300065,100300127,100400079,100300169,100400080,100300066,100300200], if (attribute(catid) in [0,100300011,100300014,100300013,100300077,100200034,100200172,100300116,100200053,100400142,100200193,100300065,100300127,100400079,100300169,100400080,100300066], if (attribute(catid) in [100300014,100200172,100300116,100400142,100200193,100300065,100400079], 0.0037085174, 0.0067213111), 0.0103041294), if (attribute(catid) in [100200130,100300165,100300209,100200087], 0.0193058409, 0.0282268747))) + +if (attribute(catid) in [100300011,100200130,100300058,100300143,100300165,100200052,100300093,100300102,100300005,100200068,100300027,100300116,100200053,100300019,100300004,100400037,100200193,100300209,100200170,100300074,100300007,100200176,100300045,100200055,100200185,100200232,100300214], if (attribute(catid) in [100300102,100300005,100300116,100300019,100300209,100200176,100200055,100200185,100200232,100300214], -0.0293243609, if (attribute(catid) in [100200052,100300093,100200068,100300004,100200193,100300074,100300045], -0.0070078206, -9.152377E-4)), if (attribute(catid) in [0,100200171,100300013,100300077,100200186,100400141,100300126,100200054,100300073,100300065,100300122,100400079,100300169,100400080,100200087,100300066], if (attribute(catid) in [100200171,100300013,100200186,100300073,100300065,100300122,100300169], 0.0041690469, 0.0070675916), if (attribute(catid) in [100300166,100200034,100300032,100400142,100200192,100300076,100200067], 0.0123326309, 0.0192701631))) + +if (attribute(catid) in [100400141,100300093,100300102,100200068,100300032,100300116,100200234,100300004,100200193,100400038,100300212,100300209,100400079,100300066,100200176,100300200,100300045,100300076,100300214,100300146], if (attribute(catid) in [100300093,100300102,100200068,100300032,100200234,100400038,100300212,100300214,100300146], -0.0199825802, -0.0078667369), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100200186,100300165,100200052,100300005,100200172,100300027,100200053,100400142,100300073,100200192,100300122,100200170,100300169,100400080,100200087,100300074,100300007,100300006], if (attribute(catid) in [0,100200171,100300014,100300058,100300077,100300166,100300005,100200172,100300027,100200053,100300073,100200170,100400080,100200087,100300007], if (attribute(catid) in [100200171,100300014,100300077,100300166,100300005,100200172,100300027,100200170,100400080,100300007], 0.0025693654, 0.0050450725), 0.0104631035), if (attribute(catid) in [100300011,100300143,100300121,100300126,100400037,100300065,100300127,100200028], 0.0209253237, 0.0358125311))) + +if (attribute(catid) in [100200171,100300058,100300077,100200034,100200186,100300005,100300008,100200068,100300121,100200053,100300019,100300126,100200193,100200192,100300065,100300212,100300209,100400079,100200087,100300045,100200028,100300076,100200067,100200055,100300006,100300146], if (attribute(catid) in [100300008,100300121,100300019,100300126,100300212,100300209,100200028,100200055,100300146], -0.0326978382, if (attribute(catid) in [100300077,100200034,100200186,100300005,100200068,100200053,100300045,100300076,100200067], -0.0102205963, -0.0022839975)), if (attribute(catid) in [0,100300011,100200130,100300014,100300013,100400141,100300165,100200052,100300027,100300116,100300004,100400142,100300073,100400037,100400038,100300122,100200170,100300169,100400080,100300066,100200176,100300200,100200185], if (attribute(catid) in [100200130,100300014,100300165,100200052,100300116,100300004,100400142,100300073,100400038,100300122,100200170,100300169,100300200], 0.0024615075, if (attribute(catid) in [0,100300013,100300027], 0.0070973911, 0.0098426968)), if (attribute(catid) in [100300166,100300093,100200172,100200234,100200054,100300127,100300074,100300007,100200232], 0.0164836278, 0.0507175593))) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100300005,100300008,100200068,100300032,100300121,100200234,100200053,100300004,100300126,100400037,100300065,100300122,100300127,100400080,100200087,100300066,100300007,100200176,100200067,100300006,100300214], if (attribute(catid) in [100300008,100300032,100300004,100300126,100400037,100200176,100300214], -0.0321717738, if (attribute(catid) in [100300011,100200130,100300014,100300077,100200052,100200234,100200053,100300122,100300127,100400080,100200067,100300006], -0.0016525958, 0.0046522992)), if (attribute(catid) in [100200171,100300143,100300165,100200172,100300027,100300116,100300019,100400142,100200054,100300073,100200193,100400038,100200192,100400079,100200170,100300169,100300200,100300076], if (attribute(catid) in [100200171,100200172,100200054,100300073,100200192,100400079,100200170,100300169], 0.011142025, 0.0154339977), 0.0284170367)) + +if (attribute(catid) in [100200130,100300058,100300077,100200034,100400141,100300102,100300005,100300008,100200053,100300004,100400037,100300212,100300122,100400079,100400080,100300074,100300007,100200176,100200185,100300006,100200232], if (attribute(catid) in [100200034,100300102,100300005,100300008,100400037,100300074,100300007,100200185,100300006], -0.0179586062, -0.0035242201), if (attribute(catid) in [0,100300014,100300013,100300166,100200186,100300165,100200052,100300093,100200172,100200068,100300027,100200234,100400142,100200054,100300073,100400038,100200192,100300065,100300127,100300169,100300200,100300076], if (attribute(catid) in [100300013,100200052,100400142,100300073,100400038,100300065,100300076], 0.002685538, if (attribute(catid) in [0,100300165,100300093,100200172,100200234,100200054,100200192], 0.00646939, 0.0086837084)), if (attribute(catid) in [100200171,100300143,100300032,100300121,100300126,100200170,100200087,100300066,100200028], 0.014146174, 0.0293888308))) + +if (attribute(catid) in [100200171,100300011,100200130,100300014,100300058,100300166,100300143,100200186,100300165,100300008,100200068,100300032,100300027,100300116,100200234,100300126,100200054,100400037,100200193,100400038,100300209,100300127,100200170,100400080,100300074,100300007,100300045,100200028,100300076,100200067,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300014,100300058,100300143,100200186,100300165,100300032,100300116,100200234,100300126,100200193,100400038,100300209,100200170,100300007,100300045,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300014,100300032,100300116,100200234,100300209,100300007,100300045,100200028,100200067,100200232,100300146], -0.0140634241, -0.0065657437), if (attribute(catid) in [100200171,100200068,100200054,100400080,100300074,100300076,100300006], -0.0013766512, 0.0012665197)), if (attribute(catid) in [0,100300077,100200052,100300005,100300121,100200053,100300004,100300073,100200192,100300122,100300169,100200087,100200176,100300200], if (attribute(catid) in [100200052,100300005,100300073,100200192], 0.0044075628, 0.0068551736), if (attribute(catid) in [100200172,100300065,100400079,100300066], 0.014273406, 0.0204258682))) + +if (attribute(catid) in [100300014,100300166,100300143,100200068,100300032,100300116,100200234,100300126,100400037,100200193,100400038,100300212,100300127,100200087,100300200,100200055,100300214,100300146], if (attribute(catid) in [100300143,100200234,100400037,100200193,100300212,100300200,100200055,100300214,100300146], -0.0253090838, -0.0066897329), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300121,100200053,100300004,100400142,100300073,100300065,100300122,100200170,100300169,100400080,100300074,100300066,100300007,100200176,100300076,100200067,100200185], if (attribute(catid) in [100200171,100200130,100300077,100200186,100200053,100300004,100300073,100300169,100300074,100300066,100200176,100300076], if (attribute(catid) in [100200171,100300004,100300073,100300169,100300066,100300076], 1.455877E-4, 0.0029189153), if (attribute(catid) in [0,100200034,100400141,100300005,100400142,100300065,100300007,100200185], 0.0054511445, 0.0085988325)), if (attribute(catid) in [100300013,100300165,100300102,100300027,100200192,100400079,100200028,100300006,100200232], 0.018000307, 0.0331227663))) + +if (attribute(catid) in [0,100200130,100300014,100300058,100300077,100200034,100400141,100300165,100300093,100200172,100300008,100300027,100300116,100300121,100200234,100300004,100400142,100300073,100400037,100200193,100300212,100300127,100400080,100300074,100300066,100300007,100300200,100300045,100200028,100300076,100300006,100300146], if (attribute(catid) in [100300014,100300058,100300077,100300093,100300008,100200234,100400142,100400037,100300074,100300200,100300045,100200028,100300076,100300006,100300146], if (attribute(catid) in [100300014,100300058,100300093,100400142,100300200,100300045,100300076,100300006,100300146], -0.012997967, -0.0044598258), if (attribute(catid) in [0,100400141,100200172,100300116,100300004,100200193,100300212,100300127,100400080,100300066], if (attribute(catid) in [100400141,100200172,100300116,100300212,100300127,100400080], -3.302269E-4, 0.0020272308), 0.0056490673)), if (attribute(catid) in [100200171,100300011,100300166,100200186,100200052,100300102,100300005,100200068,100300032,100200053,100400038,100200192,100300065,100300122,100400079,100200170,100300169,100200176,100200232], if (attribute(catid) in [100200171,100300011,100200052,100300005,100200068,100200053,100400038,100300065,100300122,100400079,100200170,100300169,100200232], if (attribute(catid) in [100200052,100300005,100200053,100400038,100400079,100200170,100300169], 0.0089176032, 0.0121994068), 0.0173516652), 0.0325017104)) + +if (attribute(catid) in [0,100200171,100300014,100300013,100300077,100300166,100200186,100400141,100300102,100200172,100300008,100300116,100200234,100200053,100300019,100300004,100400142,100400037,100200193,100300212,100300209,100300122,100300127,100200170,100200087,100200176,100300200,100300076,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300166,100300102,100300008,100300116,100200234,100300019,100300004,100400142,100300212,100300209,100200176,100300200,100200055,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300166,100300019,100300212,100300200,100200055,100300214,100300146], -0.0132054608, -0.0064396815), if (attribute(catid) in [0,100300014,100300077,100200172,100400037,100200193], 0.0018800662, 0.0043219093)), if (attribute(catid) in [100200130,100300058,100200034,100300165,100200052,100200068,100300032,100300121,100300126,100300073,100200192,100300065,100400079,100300169,100400080,100300074,100300066,100200028], if (attribute(catid) in [100300058,100300165,100300121,100300126,100300073,100400079,100300066,100200028], 0.0086112093, 0.0133895272), 0.0254777637)) + +if (attribute(catid) in [0,100200171,100300013,100300077,100300143,100200186,100400141,100200052,100300102,100200172,100300027,100300116,100200053,100300004,100400142,100200054,100200192,100300065,100300212,100300209,100300169,100400080,100200087,100300066,100200176,100300200,100300045,100300076,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300013,100300077,100300102,100200053,100300004,100400142,100200054,100300065,100300209,100300066,100300200,100300045,100200067,100200055,100200185,100300006,100300214,100300146], if (attribute(catid) in [100300077,100300102,100300066,100300200,100300045,100200067,100200055,100200185,100300214,100300146], -0.01999683, -0.0066258033), if (attribute(catid) in [100200171,100400141,100200052,100200172,100300027,100200192,100400080,100200087,100200176,100300076], 2.616813E-4, 0.0029401657)), if (attribute(catid) in [100200130,100300014,100300166,100200034,100300165,100200068,100200234,100300073,100400037,100300122,100400079,100200170,100300074,100200028], if (attribute(catid) in [100300166,100200034,100300165,100200068,100300122,100400079,100200170], 0.010002913, 0.0150210552), 0.0282741486)) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300013,100300166,100200034,100400141,100300165,100200052,100300102,100200172,100300032,100300027,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200176,100300045,100200028,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300014,100300058,100300166,100200034,100300165,100300102,100300032,100200053,100300019,100300004,100300212,100300209,100300127,100200170,100300169,100400080,100200176,100300045,100200067,100200055,100200185,100300006,100200232], if (attribute(catid) in [100300032,100300019,100300212,100300209,100200176,100200067,100200055,100200185,100200232], -0.036557541, if (attribute(catid) in [100300166,100300102,100200053,100300004,100300169,100400080,100300006], -0.0063417149, -0.0028552545)), if (attribute(catid) in [0,100200130,100400141,100200234,100400142,100300073,100200193,100300065,100300122,100400079,100200028], if (attribute(catid) in [100200234,100400142,100300065,100300122,100400079,100200028], 0.0013434898, 0.0041326482), 0.0076971051)), if (attribute(catid) in [100300011,100300143,100200186,100300005,100200068,100400038,100200087,100300066,100300007], 0.0168824118, if (attribute(catid) in [100300077,100300116,100300121,100200054,100300074], 0.0265247041, 0.0418405954))) + +if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100300093,100300005,100200068,100300032,100300027,100300116,100300004,100300126,100400142,100200054,100300073,100400037,100400038,100200192,100300212,100300209,100300122,100300127,100400079,100200170,100400080,100300074,100300066,100300007,100300045,100300076,100300006,100200232,100300214], if (attribute(catid) in [100200171,100200034,100300093,100300005,100200068,100300027,100300116,100300004,100300126,100200054,100400037,100400038,100300212,100300209,100400079,100300074,100300214], if (attribute(catid) in [100300005,100300027,100300116,100200054,100400037,100300209,100300074,100300214], -0.0185907409, -0.0037649772), if (attribute(catid) in [100300014,100300058,100300013,100300077,100300166,100300066,100300007,100300076,100300006], 7.932193E-4, 0.0048541117)), if (attribute(catid) in [100200130,100200052,100300102,100200172,100200193,100300169,100200176,100200028,100300146], 0.0114452275, if (attribute(catid) in [100300008,100200234,100300065,100200087], 0.0212529341, 0.0435817724))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300077,100300166,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100300027,100300116,100300121,100200053,100300004,100400142,100300073,100400037,100400038,100200192,100300212,100300122,100300127,100300169,100400080,100300007,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300014,100300058,100300143,100400141,100300093,100300102,100300116,100400142,100400037,100400038,100300122,100300007,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300143,100300093,100300102,100400037,100300007,100200028,100200055,100200185,100300006,100200232,100300146], -0.024040692, -0.0039451648), if (attribute(catid) in [0,100300077,100300166,100200172,100300027,100300004,100200192,100300212,100300127], if (attribute(catid) in [100300077,100300166,100200172,100300027,100200192,100300212], 7.340608E-4, 0.003096223), 0.0055831787)), if (attribute(catid) in [100300011,100300165,100300032,100200234,100300126,100200193,100300065,100400079,100200170,100200087,100300074,100300066,100300200], if (attribute(catid) in [100300165,100300032,100200234,100300126,100200193,100400079,100200170,100300074,100300066,100300200], 0.0106987137, 0.0159048063), 0.0286231943)) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300143,100200034,100200186,100400141,100200052,100300093,100300102,100300005,100200172,100200068,100300027,100300116,100300121,100200234,100200053,100300004,100400142,100200054,100400037,100200193,100400038,100200192,100300122,100300169,100400080,100200087,100300074,100300066,100300200,100300045,100300076,100200055,100200232], if (attribute(catid) in [100300011,100300014,100200034,100400141,100300102,100300005,100200172,100200234,100200193,100400080,100300066,100300200,100300045,100300076,100200055,100200232], if (attribute(catid) in [100300011,100200034,100300102,100200193,100400080,100200055,100200232], -0.0213247733, -0.00351169), if (attribute(catid) in [100200130,100200052,100200068,100300121,100200053,100300004,100200054,100300169], 6.568155E-4, 0.0043360634)), if (attribute(catid) in [100200171,100300077,100300166,100300165,100300032,100300073,100300065,100300209,100300127,100400079,100200170,100200028,100300146], if (attribute(catid) in [100200171,100300077,100300165,100300032,100300073,100300209,100300127,100200170], 0.0086409598, 0.0123374521), 0.02758533)) + +if (attribute(catid) in [100300013,100200034,100400141,100300165,100300093,100300102,100300008,100300032,100300116,100300121,100200234,100300019,100300004,100300126,100200054,100300073,100400037,100200192,100300212,100300209,100300127,100400079,100200170,100300169,100200087,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100200232], if (attribute(catid) in [100300013,100200034,100400141,100300102,100300008,100300032,100300116,100200234,100300019,100300004,100300126,100300212,100300209,100300200,100200067,100200185,100200232], if (attribute(catid) in [100300102,100300032,100300019,100300212,100300209,100200067,100200185,100200232], -0.05249066, -0.0117164184), if (attribute(catid) in [100300093,100200054,100200192,100400079,100200170,100300066,100300045,100300076], -0.0050797102, -1.8254E-5)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100200186,100300005,100200172,100200068,100300027,100200053,100400142,100300065,100300122,100400080,100200028,100300006,100300146], if (attribute(catid) in [0,100200171,100300166,100200186,100300005,100400142,100300065,100300122,100200028], if (attribute(catid) in [100200171,100300166,100300005,100300065,100300122,100200028], 0.0045192638, 0.0060994023), 0.0117841342), if (attribute(catid) in [100300077,100300143,100200052,100300074], 0.0191236363, 0.037609952))) + +if (attribute(catid) in [0,100200171,100200130,100300058,100300013,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300005,100200172,100300027,100300121,100300019,100300004,100400142,100300073,100400037,100300065,100300212,100300122,100400079,100300169,100400080,100200087,100300074,100200176,100300045,100200067,100200185], if (attribute(catid) in [100200171,100300013,100300019,100400037,100300212,100200087,100200067,100200185], if (attribute(catid) in [100300013,100300019,100300212,100200185], -0.0408187739, -0.0118328301), if (attribute(catid) in [0,100200130,100300166,100200034,100200186,100400141,100200052,100300093,100300005,100200172,100300027,100300121,100300004,100400142,100300073,100300065,100400079,100300169,100300074,100200176,100300045], if (attribute(catid) in [100200034,100200186,100300093,100300005,100200172,100300027,100300121,100300004,100400142,100300073,100400079,100300169,100300074,100200176], if (attribute(catid) in [100200034,100300093,100300005,100300027,100400079,100300169,100300074,100200176], -0.0023114463, 6.956308E-4), 0.0033093887), 0.0090068346)), if (attribute(catid) in [100300011,100300014,100300077,100300116,100200053,100300126,100200193,100400038,100200192,100300127,100200170,100300066,100200028,100300076,100200232,100300146], 0.015754669, 0.0276821855)) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100300143,100200186,100300165,100200052,100300093,100300005,100200068,100300032,100300116,100300121,100200234,100200053,100300019,100300004,100300126,100400142,100200054,100300073,100200193,100400038,100200192,100300065,100300212,100300209,100300127,100400079,100200170,100300169,100400080,100300074,100300007,100200176,100300200,100300045,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100200052,100200068,100300121,100200053,100400142,100400038,100200192,100300212,100300209,100200170,100300200,100300045,100200028,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300121,100300212,100300209,100200028,100200067,100200055,100200232,100300146], -0.0318529122, -0.0076204316), if (attribute(catid) in [0,100200171,100300014,100300058,100200186,100300165,100300004,100300073,100200193,100300065,100300127,100400079,100300074,100300007,100200176], if (attribute(catid) in [100300014,100300058,100200186,100300165,100300004,100200193,100300065,100300127,100400079,100300074,100300007], 0.0024278683, 0.0048565045), 0.0078949518)), if (attribute(catid) in [100300077,100200034,100400141,100200172,100300027,100300122,100200087], 0.014410917, 0.0330128168)) + +if (attribute(catid) in [100300077,100300166,100300143,100200034,100200186,100300032,100300121,100300019,100300004,100300126,100400142,100300073,100400037,100400038,100300065,100300212,100300209,100300127,100400079,100200087,100200176,100300200,100300076,100200067,100200055,100300214], if (attribute(catid) in [100300143,100300032,100300126,100300212,100300209,100200176,100200055,100300214], -0.0309719186, if (attribute(catid) in [100300019,100300004,100400142,100400037,100400038,100300065,100200087,100300200,100200067], -0.0082013563, -0.0020664794)), if (attribute(catid) in [0,100200171,100300014,100300058,100300013,100400141,100300165,100200172,100300008,100300027,100300116,100200053,100200192,100300122,100200170,100300169,100400080,100300074,100300045,100200028], if (attribute(catid) in [0,100200171,100300014,100300058,100300008,100300116,100200053,100200192,100300169,100300074,100300045,100200028], if (attribute(catid) in [100200171,100300014,100300058,100300008,100300116,100200053,100200192,100300074,100200028], 0.0024438226, 0.0050777724), 0.0078352283), if (attribute(catid) in [100300011,100200130,100200052,100200054,100200193,100300066,100200232], 0.0123902667, 0.0178145861))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300058,100300077,100300166,100300143,100200052,100300093,100200172,100300008,100300032,100300027,100300116,100200234,100400142,100200054,100200193,100400038,100200192,100300065,100300209,100300122,100300127,100400079,100200170,100200087,100300074,100300066,100300007,100300200,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300143,100300032,100200054,100300074,100300007,100300076,100200055,100300006], if (attribute(catid) in [100300058,100300143,100300032,100300076,100200055,100300006], -0.0392140951, -0.0178716478), if (attribute(catid) in [100300011,100200130,100200052,100300093,100300008,100300027,100300116,100200234,100400142,100200193,100400038,100300209,100300127,100400079,100200170,100200087,100300066,100300200], if (attribute(catid) in [100300011,100200052,100300008,100300027,100200234,100400038,100300209,100200170], -0.0066755608, -0.0018428992), if (attribute(catid) in [0,100200171,100300077,100300166], 0.0036371154, 0.0064922752))), if (attribute(catid) in [100300013,100200034,100200186,100400141,100300165,100300005,100200068,100200053,100300073,100300169,100400080,100200067,100200185,100300146], 0.0121567368, if (attribute(catid) in [100300014,100300121,100300004,100400037,100200176,100200028], 0.0238269627, 0.0454004741))) + +if (attribute(catid) in [0,100300011,100300013,100200034,100400141,100200052,100300093,100300102,100200172,100300008,100200068,100300116,100300004,100300126,100400142,100400038,100300209,100300127,100400079,100300169,100300200,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300013,100300008,100300126,100300200,100200055,100200185,100300006,100200232,100300146], -0.0236401207, if (attribute(catid) in [100200034,100200052,100300093,100300102,100200068,100300127], -0.0040925434, if (attribute(catid) in [0,100400142,100400079], 0.0014214596, 0.0034589578))), if (attribute(catid) in [100200171,100200130,100300058,100300077,100300166,100200186,100300165,100300027,100300121,100200234,100200053,100300073,100200192,100300065,100300122,100400080,100300074], if (attribute(catid) in [100300077,100300166,100300165,100300027,100200192,100300065,100300122,100400080], 0.006993185, 0.0098596774), if (attribute(catid) in [100300005,100300032,100400037,100200193,100200170,100300066,100200176,100200028], 0.0175470785, 0.0289653859))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200034,100200186,100400141,100200052,100300093,100200172,100300116,100200234,100200053,100400142,100200054,100200193,100400038,100300065,100300212,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300200,100300045,100300076,100200055,100300006,100300214], if (attribute(catid) in [100300014,100200034,100200052,100300093,100300116,100200053,100200193,100300212,100300127,100200176,100300200,100200055,100300214], if (attribute(catid) in [100300014,100200053,100300212,100300200,100200055,100300214], -0.0378968222, -0.0078372609), if (attribute(catid) in [0,100200171,100300077,100200186,100400142,100200054,100400038,100300065,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100300045], if (attribute(catid) in [100200171,100400142,100200054,100400038,100300065,100200170,100300169,100400080,100300066], -1.641778E-4, 0.0015184867), 0.0061033364)), if (attribute(catid) in [100300058,100300143,100200068,100300032,100300121,100300004,100300126,100300073,100400037,100200192], 0.0125327069, if (attribute(catid) in [100300011,100300013,100300165,100300102,100300005,100300027,100300122], 0.0195931033, 0.0422132813))) + +if (attribute(catid) in [0,100200171,100300014,100300077,100200186,100300165,100300102,100300005,100200172,100300008,100200068,100300116,100200234,100300004,100300126,100400142,100200054,100300073,100200193,100200192,100300065,100300212,100200170,100200087,100300074,100300066,100300200,100300045,100200028,100300076,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300014,100300102,100300116,100200234,100400142,100200054,100200087,100300200,100300045,100200055,100200185,100300214,100300146], -0.0134771, if (attribute(catid) in [0,100300165,100300005,100300008,100200068,100200193,100300212,100300066,100200028,100300076], if (attribute(catid) in [100300165,100300005,100300008,100300066,100300076], -4.632359E-4, 0.0017931695), 0.0047361213)), if (attribute(catid) in [100300011,100200130,100300166,100400141,100200052,100300032,100300027,100300121,100200053,100400038,100300209,100300122,100400079,100300169,100400080,100300007,100200176], if (attribute(catid) in [100200130,100400141,100200052,100300032,100300027,100200053,100300209,100300122,100400079], 0.0106878879, 0.0162088762), 0.0270716952)) + +if (attribute(catid) in [100300011,100200130,100300058,100300013,100400141,100300165,100300093,100300005,100300027,100300116,100200234,100300004,100300126,100300073,100200193,100300209,100300122,100300007,100200176,100200028,100200055,100200185,100300006], if (attribute(catid) in [100300011,100300058,100400141,100300005,100200234,100300004,100200193,100300209,100300122,100200028,100200055,100200185], if (attribute(catid) in [100300011,100200193,100300209,100200028,100200055], -0.033539512, -0.0118989268), if (attribute(catid) in [100300013,100300165,100300126,100200176], -0.0067126195, -0.0029610222)), if (attribute(catid) in [0,100300014,100300166,100200034,100200186,100200052,100200172,100200068,100200053,100300019,100200054,100400038,100200192,100300065,100300127,100400079,100200170,100400080,100300074,100300066,100300076,100300146], if (attribute(catid) in [0,100300166,100200186,100200172,100200053,100300019,100400038,100200192,100300065,100200170,100400080,100300074,100300076], if (attribute(catid) in [100200186,100200172,100200053,100300019,100400038,100200170,100400080], 0.0031153692, 0.00551757), 0.0106121829), if (attribute(catid) in [100200171,100300077,100300008,100400142,100300169,100200087,100300200], 0.0168125614, 0.0398137842))) + +if (attribute(catid) in [100200130,100300013,100300077,100300166,100400141,100300165,100300102,100200068,100300032,100300121,100200234,100300019,100300004,100300126,100300073,100400037,100200193,100300212,100300127,100400079,100200170,100300066,100300045,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300032,100300121,100300019,100400037,100200193,100300212,100200170,100300045,100300076,100200067,100200185,100300214,100300146], -0.015876685, if (attribute(catid) in [100300166,100300165,100200068,100300004,100300126,100300073,100300127,100300066,100200232], -0.0044887193, -3.188875E-4)), if (attribute(catid) in [0,100300014,100300143,100200186,100200053,100400142,100200054,100200192,100300065,100300122,100300169,100300074,100300200], if (attribute(catid) in [0,100300014,100300143,100200186,100200053,100200192,100300065,100300074], 0.0053430945, 0.0090007568), if (attribute(catid) in [100200171,100200052,100300093,100300005,100200172,100300027,100300116,100400080,100200087,100300007], 0.0122908466, 0.0204822127))) + +if (attribute(catid) in [100200130,100300077,100300143,100200034,100300005,100300032,100300116,100300121,100300019,100200054,100200192,100300212,100300209,100300127,100400079,100200170,100400080,100300074,100300007,100300200,100300045,100200028,100300076,100200055,100200232], if (attribute(catid) in [100300143,100300032,100300121,100300019,100300212,100300200,100200028,100200055], -0.0260564211, if (attribute(catid) in [100200034,100300005,100200054,100300209,100400080,100300074,100300045], -0.0098627619, -0.0040846106)), if (attribute(catid) in [0,100200171,100300011,100300014,100300013,100300166,100200186,100400141,100300165,100200052,100200172,100300027,100200053,100300004,100400142,100300073,100400037,100200193,100300065,100300122,100300169,100300066], if (attribute(catid) in [100200171,100300011,100300013,100300166,100300165,100200172,100400142,100300073,100400037,100300122,100300169,100300066], if (attribute(catid) in [100300011,100300013,100300166,100300165,100300073,100400037,100300066], 0.0011147417, 0.004298363), if (attribute(catid) in [0,100300014,100200053,100300004,100300065], 0.0064436133, 0.0100503455)), if (attribute(catid) in [100300058,100300093,100300008,100200234,100200087,100200176], 0.0146658078, 0.0247793114))) + +if (attribute(catid) in [100300013,100300165,100300102,100300005,100200068,100300027,100300121,100400037,100200193,100300212,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100200028,100200067], if (attribute(catid) in [100300013,100300165,100300102,100300005,100200068,100400037,100200193,100300007,100300200,100200067], -0.01649051, -0.0065097756), if (attribute(catid) in [0,100300011,100200130,100300058,100300166,100200186,100400141,100200052,100300093,100200172,100300116,100200053,100300019,100300004,100300126,100300073,100300065,100300209,100300122,100400079,100200170,100300076], if (attribute(catid) in [0,100200130,100300058,100200186,100300093,100200172,100300004,100300073,100300209,100300122,100200170,100300076], if (attribute(catid) in [100200130,100200186,100200172,100300073,100200170,100300076], 0.002507844, 0.0047210186), 0.0076570114), if (attribute(catid) in [100200171,100300014,100300077,100200034,100300008,100300032,100200234,100200054,100400038,100200192,100300127,100400080,100300006,100300146], 0.0139365828, 0.0234469942))) + +if (attribute(catid) in [100300013,100300077,100300165,100200052,100300102,100200068,100300032,100300116,100300121,100200234,100300004,100300126,100400142,100200054,100300073,100200192,100300065,100300212,100200170,100400080,100300074,100300007,100200176,100300045,100200067,100200055,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300032,100300121,100300212,100300007,100200176,100200067,100200055,100200185,100300214,100300146], -0.0389470287, if (attribute(catid) in [100200052,100300102,100200068,100300116,100200234,100300126,100200054,100300065,100200170,100300074], -0.0087219876, -0.0029990733)), if (attribute(catid) in [0,100200130,100300014,100300143,100200034,100200186,100400141,100300005,100200172,100200053,100400038,100300122,100300127,100400079,100300169,100300066,100300076,100300006], if (attribute(catid) in [0,100200130,100300014,100300143,100400141,100400038,100300122,100300127,100400079,100300066,100300076,100300006], 0.0052739356, 0.0099085929), if (attribute(catid) in [100200171,100300011,100300166,100300093,100300027,100200193,100300209,100200087,100300200], if (attribute(catid) in [100200171,100200193,100200087], 0.0138275479, 0.0190545276), 0.0389964998))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300013,100300077,100300166,100300143,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300027,100300116,100300121,100200234,100200053,100300019,100300004,100400142,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100300066,100200176,100300045,100200055,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300077,100300005,100300008,100200068,100400142,100300065,100300122,100400079,100400080,100200087,100300045,100200055,100200185,100200232,100300214], if (attribute(catid) in [100300058,100300005,100400142,100200087,100300045,100200055,100200185,100200232,100300214], -0.019364671, -0.0065149978), if (attribute(catid) in [0,100200171,100200130,100300014,100200034,100200186,100300093,100300102,100300116,100300121,100200234,100200053,100300019,100300073,100200193,100200192,100300127,100300169,100300074,100300006], if (attribute(catid) in [0,100200171,100300014,100200186,100300102,100200193,100300127,100300074,100300006], 0.0016997077, 0.0042301416), if (attribute(catid) in [100300166,100400141,100200172,100300027,100300004,100300209,100200170,100300066], 0.008322508, 0.0106242146))), if (attribute(catid) in [100300126,100200054,100300212,100300200,100300076], 0.0274475014, 0.0436714177)) + +if (attribute(catid) in [0,100200130,100300014,100300058,100200034,100200186,100300165,100200052,100300093,100300102,100300027,100300121,100200234,100300019,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100300065,100300212,100300209,100300122,100300127,100400079,100300169,100200087,100300074,100300066,100300007,100300200,100300045,100300076,100200067,100200185], if (attribute(catid) in [100300014,100200034,100200186,100300121,100300004,100400142,100300073,100400037,100300065,100300212,100300209,100300122,100300127,100400079,100200087,100300074,100200067,100200185], if (attribute(catid) in [100300014,100300121,100400037,100300212,100300209,100200087,100200067,100200185], -0.0261969274, -0.0052677773), if (attribute(catid) in [100200130,100300058,100300165,100300019,100300126,100200193,100300169,100300066,100300007,100300200,100300076], 6.535877E-4, 0.0044410765)), if (attribute(catid) in [100200171,100300011,100300013,100300166,100400141,100300005,100200172,100200068,100300032,100300116,100200192,100200170,100300006,100200232,100300146], if (attribute(catid) in [100200171,100300005,100200172,100300006,100200232,100300146], 0.0098796141, 0.0147635144), if (attribute(catid) in [100300077,100200053,100400080,100200028], 0.022608657, 0.0384965531))) + +if (attribute(catid) in [100200130,100200052,100300093,100300102,100300005,100200172,100300008,100200068,100300032,100200234,100200053,100300004,100300126,100400142,100200054,100200193,100300209,100300074,100300066,100300007,100300045,100200028,100200055,100200232], if (attribute(catid) in [100300093,100300102,100300008,100400142,100200054,100300209,100300074,100200028,100200055], -0.0206111829, if (attribute(catid) in [100200052,100300005,100200172,100200234,100300004,100300126,100300066], -0.00770466, -0.0024157474)), if (attribute(catid) in [0,100200171,100300011,100300014,100300058,100300013,100300077,100300166,100200034,100400141,100300027,100300121,100300073,100400038,100200192,100300065,100300127,100400079,100200170,100300169,100400080,100200087,100200067], if (attribute(catid) in [0,100300013,100400141,100300027,100300121,100400038,100200192,100300127,100400079,100300169,100200087,100200067], if (attribute(catid) in [100300013,100400141,100200192,100300127,100400079,100300169,100200087], 0.002893737, 0.005681885), 0.0099814265), if (attribute(catid) in [100200186,100300165,100300212,100300122,100300076], 0.0162181832, 0.0314914649))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300058,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100300027,100300116,100200053,100300019,100300126,100200054,100300073,100200193,100200192,100300212,100300209,100300122,100300127,100300169,100400080,100200087,100300074,100200176,100300200,100300045,100200067,100300006,100200232,100300214,100300146], if (attribute(catid) in [100200171,100300058,100200034,100400141,100300093,100300005,100300027,100300116,100200053,100300126,100200054,100200193,100200192,100300212,100300209,100300122,100300074,100200176,100200067,100200232,100300214,100300146], if (attribute(catid) in [100200034,100300126,100200193,100200192,100300209,100300122,100200176,100200067,100200232,100300214,100300146], -0.0183821122, -0.007384387), if (attribute(catid) in [100300011,100300014,100300077,100300166,100200052,100300102,100300019,100300073,100300127,100400080,100200087,100300200,100300006], -6.604841E-4, 0.0030934288)), if (attribute(catid) in [100300013,100200172,100300008,100200068,100200234,100300004,100400142,100300065,100400079,100200170,100200185], if (attribute(catid) in [100300013,100200172,100300008,100300004,100400142,100300065,100400079,100200185], 0.0088954013, 0.0132240377), 0.0239626156)) + +if (attribute(catid) in [100200171,100300014,100300143,100300165,100300102,100300008,100300032,100300027,100300121,100200053,100400037,100300122,100300127,100300007,100200176,100200028,100200067,100200232,100300146], if (attribute(catid) in [100300143,100300008,100400037,100300122,100300007,100200176,100200028,100200067,100200232,100300146], -0.0238688087, -0.0050071269), if (attribute(catid) in [0,100300011,100200130,100300058,100300077,100300166,100200186,100400141,100200052,100300116,100200234,100300019,100300004,100400142,100200054,100300073,100200193,100200192,100300212,100400079,100200170,100300169,100200087,100300074,100300066,100300200,100300006], if (attribute(catid) in [100300058,100300077,100300166,100200052,100300116,100200234,100400142,100200054,100300073,100200192,100300212,100400079,100300074,100300066,100300200,100300006], 0.0013911393, 0.0050101023), if (attribute(catid) in [100200034,100300005,100200172,100200068,100300126,100400038,100300065], 0.0131280626, 0.026491322))) + +if (attribute(catid) in [100300011,100300166,100300165,100300102,100300008,100300032,100200234,100400037,100300209,100300169,100300200,100300045,100200028,100300076,100200055,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300011,100300102,100300008,100200234,100400037,100300209,100300076,100200055,100300006,100200232,100300146], -0.0278018549, if (attribute(catid) in [100300165,100200185], -0.0124509833, -0.0065030338)), if (attribute(catid) in [0,100200171,100300013,100300077,100200186,100400141,100200052,100300093,100200068,100300027,100200053,100300004,100400142,100200054,100300073,100200193,100300122,100300127,100400079,100200170,100400080,100200087,100300074,100300066], if (attribute(catid) in [100300077,100400141,100300027,100300004,100400142,100200054,100200193,100300122,100300127,100200087,100300074], -6.424011E-4, if (attribute(catid) in [0,100200171,100200186,100200068,100200053,100400079,100400080], 0.0042262873, 0.0083942658)), if (attribute(catid) in [100200130,100300058,100200034,100300005,100200172,100300121,100200192,100300065,100300212,100300007], if (attribute(catid) in [100300058,100300005,100200172,100200192], 0.0108729295, 0.0157560823), 0.0252591937))) + +if (attribute(catid) in [100200171,100200130,100300014,100300058,100300013,100300166,100300143,100300093,100300102,100300008,100300027,100300019,100300004,100200054,100300073,100400037,100200193,100300212,100300209,100300127,100200170,100400080,100300066,100200176,100300200,100200067,100300146], if (attribute(catid) in [100300143,100300093,100300102,100300008,100300019,100200054,100400037,100300212,100300209,100200176,100300146], -0.0224536708, if (attribute(catid) in [100200171,100300014,100300013,100300027,100300004,100300066,100200067], -0.0094554624, -0.0036901881)), if (attribute(catid) in [0,100300077,100200034,100200186,100400141,100200052,100300005,100200172,100300116,100300121,100200053,100400038,100200192,100300065,100300122,100400079,100300169,100200087,100200028], if (attribute(catid) in [0,100200186,100200052,100200172,100300121,100200192], 0.0020254456, 0.0072521361), if (attribute(catid) in [100300165,100300126,100400142,100300074,100300007,100300006], 0.0116654091, 0.0258802787))) + +if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300166,100200186,100400141,100300165,100200052,100300093,100300102,100300005,100200172,100200068,100300032,100300116,100300121,100300019,100300004,100300126,100200054,100300073,100400037,100200193,100200192,100300065,100300212,100300209,100300122,100300127,100400079,100200170,100200087,100300066,100300007,100200176,100300200,100300045,100200028,100300076,100200067,100200232], if (attribute(catid) in [100200171,100200130,100300166,100200186,100300093,100300102,100300005,100200172,100200068,100300116,100300019,100300126,100200054,100200193,100200192,100300212,100300209,100300127,100200170,100200087,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200232], if (attribute(catid) in [100200130,100200186,100300102,100300005,100300116,100300019,100200054,100200193,100200192,100300212,100300209,100300127,100200087,100300066,100200176,100300200,100300045,100200067], -0.0063942493, -0.0019096403), if (attribute(catid) in [100300014,100300077,100300165,100300004,100300073,100400037,100300122,100400079,100200028], 0.0026034959, if (attribute(catid) in [0,100300032], 0.0043048063, 0.0068527373))), if (attribute(catid) in [100300011,100300058,100200034,100300027,100200234,100200053,100400142,100300169,100400080,100300146], 0.0134919535, 0.0274589028)) + +if (attribute(catid) in [0,100200130,100300014,100300058,100300013,100300077,100300166,100200034,100200186,100300165,100200052,100300093,100300005,100200172,100200068,100300027,100300116,100300004,100300126,100400142,100200054,100300073,100400037,100200193,100400038,100200192,100300065,100300212,100300209,100400079,100200170,100300169,100400080,100300074,100300066,100300200,100300045,100300076,100200185,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300013,100300165,100200052,100300093,100200068,100300126,100400142,100200054,100200193,100300209,100300200,100300045,100200185,100300214,100300146], if (attribute(catid) in [100300013,100400142,100200054,100300209,100300200,100200185,100300214,100300146], -0.0220987103, -0.0066619739), if (attribute(catid) in [100200130,100300014,100300058,100300166,100200034,100200186,100300005,100200172,100300027,100400038,100200170,100300074,100300076,100200232], -9.424528E-4, if (attribute(catid) in [0,100300004,100300212,100400079], 0.0034943052, 0.0064497198))), if (attribute(catid) in [100200171,100300011,100300143,100400141,100300102,100300032,100300121,100200053,100300019,100300127,100200067], 0.0127972052, 0.0287759999)) + +if (attribute(catid) in [100200171,100300011,100300014,100300058,100300143,100200034,100300093,100300102,100300005,100300008,100200068,100300032,100300027,100300116,100300019,100300126,100200054,100400038,100200192,100300209,100300122,100300169,100400080,100300007,100200176,100300045,100200185,100200232,100300146], if (attribute(catid) in [100300102,100300008,100300032,100300116,100300019,100300126,100300209,100400080,100200176,100200185,100200232,100300146], -0.0303496242, if (attribute(catid) in [100300011,100300143,100200034,100300093,100200068,100300027,100200054,100300045], -0.0083664582, -0.0038598802)), if (attribute(catid) in [0,100200130,100300077,100300166,100200186,100400141,100200052,100200234,100200053,100400142,100300073,100300065,100300127,100400079,100200170,100300066,100300076,100300006], if (attribute(catid) in [100200130,100300077,100300166,100200186,100200053,100300073,100300065,100300127,100400079,100200170,100300066], 0.001261608, 0.0050477397), if (attribute(catid) in [100300165,100200172,100200193,100300212], 0.0108120161, 0.023552562))) + +if (attribute(catid) in [100300011,100300058,100300013,100300077,100200186,100300102,100300005,100200172,100300008,100300116,100300121,100300126,100200054,100400038,100200170,100300200,100200055,100200185,100200232], if (attribute(catid) in [100300011,100300008,100200054,100400038,100300200,100200055,100200185,100200232], -0.0268832718, -0.0068696426), if (attribute(catid) in [0,100200171,100200130,100300166,100300143,100200034,100400141,100300165,100200052,100200053,100300004,100400142,100400037,100200193,100200192,100300065,100300122,100300127,100300169,100400080,100300074,100300066,100200176,100300045,100200028,100300006], if (attribute(catid) in [0,100200034,100400141,100300165,100400142,100200193,100200192,100300122,100300127,100400080,100300045,100200028,100300006], if (attribute(catid) in [100200034,100400141,100300165,100400142,100200193,100300122,100200028], -6.90536E-5, 0.003180608), if (attribute(catid) in [100200171,100200130,100300143,100200053,100400037,100300074,100200176], 0.0046312438, 0.0072930454)), if (attribute(catid) in [100300014,100300093,100200068,100300032,100300027,100300019,100300073,100400079,100300007,100300146], 0.0137098872, 0.0263591456))) + +if (attribute(catid) in [0,100300011,100200130,100300014,100300058,100300166,100200034,100400141,100300165,100200052,100300005,100200172,100300008,100300032,100300027,100200053,100300019,100400142,100300073,100400038,100200192,100300065,100300209,100300122,100400079,100300169,100400080,100200087,100300074,100300007,100300200,100300045,100200055,100200185,100300006,100200232,100300214], if (attribute(catid) in [100300011,100300014,100300058,100300165,100200052,100200172,100300008,100300032,100200053,100300019,100400142,100300073,100400038,100300209,100300122,100400080,100300074,100300007,100300200,100200055,100200185,100300006,100300214], if (attribute(catid) in [100300008,100300032,100300019,100400038,100300209,100300122,100200055,100200185,100300006,100300214], -0.0161057659, if (attribute(catid) in [100300014,100300058,100300165,100200053,100400142,100300074], -0.0064541439, -0.0026860316)), if (attribute(catid) in [0,100200034,100300005,100300065,100400079,100300169], 0.001936498, 0.0039988711)), if (attribute(catid) in [100200171,100300077,100300143,100200186,100200068,100300116,100200234,100300126,100200193,100200170,100300066,100200176,100300076], 0.0130783191, if (attribute(catid) in [100300093,100300121,100200054,100400037,100300127,100200028,100200067], 0.0218038927, 0.0414721313))) + +if (attribute(catid) in [100200171,100300014,100300058,100300077,100200186,100300093,100300008,100200068,100300032,100300027,100300019,100300004,100300126,100200054,100400037,100400038,100300209,100400079,100200170,100300169,100200087,100300007,100300200,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300032,100300027,100300019,100300004,100400038,100200055,100200185,100300214,100300146], -0.0294531155, if (attribute(catid) in [100300058,100300008,100200068,100300126,100200054,100400037,100300007,100300200], -0.0064558393, -0.0017589508)), if (attribute(catid) in [0,100200130,100300013,100300166,100200052,100300102,100300121,100200234,100400142,100300073,100300065,100300122,100300066,100200028], if (attribute(catid) in [100200130,100300166,100200052,100300102,100400142,100300065], 0.0028476082, 0.0049351263), if (attribute(catid) in [100300143,100200034,100400141,100300165,100200172,100200193,100200192,100300074,100300006], 0.0122604781, 0.0164705118))) + +if (attribute(catid) in [0,100200171,100300011,100300077,100300166,100200034,100400141,100300165,100300102,100200172,100300032,100300027,100300116,100200053,100300019,100400038,100200192,100300212,100300122,100400079,100300169,100200087,100300200,100300045,100200028,100300076], if (attribute(catid) in [100300077,100300102,100300032,100200053,100300019,100400038,100300122,100300169,100200087,100200028,100300076], if (attribute(catid) in [100300032,100300019,100200087,100200028,100300076], -0.0267943136, -0.00735905), if (attribute(catid) in [0,100200171,100300011,100200034,100400141,100200172,100300027,100300116,100200192,100300212,100300045], if (attribute(catid) in [100200171,100200034,100400141,100200172,100300027,100300212,100300045], -0.0027994712, -9.574051E-4), 0.0035088242)), if (attribute(catid) in [100200130,100300014,100300058,100300013,100300143,100200186,100200052,100200068,100300121,100200234,100300126,100400142,100300073,100200193,100300065,100300209,100300127,100200170,100400080,100300066,100200176,100300006,100200232,100300146], if (attribute(catid) in [100200130,100300143,100200186,100200234,100300073,100300065,100300127,100400080,100300066], 0.0073203788, 0.0143066526), 0.0363911505)) + +if (attribute(catid) in [100300014,100300058,100300166,100200034,100300093,100300102,100300005,100200172,100200068,100300032,100300027,100300019,100300004,100400038,100300212,100300209,100200170,100300169,100200087,100300074,100300066,100200028,100300006,100200232,100300214,100300146], if (attribute(catid) in [100300102,100300005,100300032,100300019,100300004,100300209,100300074,100300066,100200232,100300214,100300146], if (attribute(catid) in [100300005,100300032,100300019,100300209,100300066,100200232,100300214], -0.0402234424, -0.0155707935), if (attribute(catid) in [100300014,100300166,100200068,100300027,100300212,100200028,100300006], -0.0067321936, -0.0015157833)), if (attribute(catid) in [0,100200171,100300013,100300077,100300143,100200186,100200052,100300121,100200054,100300073,100200192,100300065,100400079,100300045,100200185], if (attribute(catid) in [0,100200171,100300077,100200186,100300121,100200054,100400079,100300045,100200185], if (attribute(catid) in [100200171,100300077,100200186,100300121,100200054,100400079,100300045], 0.0027009397, 0.004912775), 0.0074818877), if (attribute(catid) in [100300011,100200130,100400141,100200234,100200053,100400142,100400037,100200193,100300122,100300127,100400080], 0.0114687451, 0.0184788462))) + +if (attribute(catid) in [100300011,100300014,100300058,100300143,100300165,100300093,100300102,100300008,100300116,100200234,100300019,100400142,100200193,100400038,100300065,100300127,100300169,100400080,100300074,100300007,100200176,100300200,100300076,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300011,100300014,100300165,100300008,100300116,100300019,100400038,100300007,100200176,100200067,100200185,100200232,100300214,100300146], if (attribute(catid) in [100300014,100300019,100400038,100300007,100200067,100200185,100200232,100300214], -0.0315173226, -0.0166807619), if (attribute(catid) in [100300058,100300143,100300102,100200234,100200193,100400080,100300076], -0.0097877493, -0.003192232)), if (attribute(catid) in [0,100300013,100400141,100200052,100200172,100200053,100300004,100300073,100400037,100200192,100300122,100400079,100300066,100200028,100300006], if (attribute(catid) in [100200172,100300004,100300073,100400037,100300122,100200028,100300006], 0.0016110349, 0.0058680637), if (attribute(catid) in [100200171,100200130,100300077,100300166,100200186,100300005,100200068,100300126,100200170], 0.012220867, 0.0260145679))) + +if (attribute(catid) in [0,100300011,100300014,100300058,100300013,100300077,100300143,100200186,100300165,100300005,100300121,100200053,100300004,100300126,100400038,100200192,100300122,100300169,100200087,100300074,100300200,100300045,100200028,100300006], if (attribute(catid) in [100300011,100300013,100300143,100300005,100300126,100400038,100200087,100300200,100200028], if (attribute(catid) in [100300013,100300143,100300005,100300126], -0.0362895954, -0.0192422418), if (attribute(catid) in [100300058,100300121,100200053,100300169,100300074,100300006], -0.0047104781, 2.148509E-4)), if (attribute(catid) in [100200171,100200130,100300166,100200034,100400141,100200052,100300093,100200172,100300027,100300116,100400142,100300073,100400037,100200193,100300065,100300209,100300127,100400079,100400080,100300066,100300007], if (attribute(catid) in [100200171,100200130,100200052,100300093,100300073,100400037,100200193,100300209,100300066], 0.0059481372, if (attribute(catid) in [100300166,100200034,100300027,100300116,100300065], 0.0086482206, 0.0113173904)), if (attribute(catid) in [100300102,100200068,100300212,100200170,100300076,100200067,100200232,100300146], 0.0189016022, 0.0294237004))) + +if (attribute(catid) in [100300011,100300014,100200034,100200186,100300165,100200172,100300008,100200068,100300121,100200234,100300126,100200054,100300073,100400037,100200193,100200192,100300065,100300209,100300122,100200170,100400080,100300007,100200176,100300045,100200028,100300076,100200185,100300146], if (attribute(catid) in [100300011,100200034,100300165,100300008,100200234,100300126,100200054,100400037,100300209,100400080,100300007,100200176,100200028,100200185,100300146], if (attribute(catid) in [100300011,100200034,100200234,100300126,100400037,100300209,100300007,100200176,100200185,100300146], -0.0183846087, -0.0112640996), if (attribute(catid) in [100200068,100300121,100300073,100300065,100200170,100300045,100300076], -0.0047781445, -0.0011633168)), if (attribute(catid) in [0,100200130,100300058,100300077,100300093,100300005,100300027,100300127,100400079,100200087,100300074,100300066], if (attribute(catid) in [0,100200130,100300027,100300127,100400079,100200087], 0.0039708336, 0.0079025406), if (attribute(catid) in [100200171,100300166,100400141,100200052,100300032,100300116,100200053,100300004,100400142,100300169,100300200,100300006], if (attribute(catid) in [100300166,100300032,100200053,100400142,100300169,100300006], 0.0121783231, 0.0159340797), 0.0363585815))) + +if (attribute(catid) in [100300011,100300013,100300077,100300143,100200034,100200186,100300165,100300102,100300008,100200053,100300019,100400142,100200054,100400037,100300127,100400079,100400080,100200087,100300007,100300200,100300045,100200067,100200055,100200185,100300006,100300146], if (attribute(catid) in [100300013,100300143,100300102,100200053,100300019,100300200,100200067,100200055,100200185,100300006,100300146], -0.0269176309, if (attribute(catid) in [100300011,100200186,100300008,100200054,100400037,100400079,100400080,100300045], -0.0082753604, -0.0027168619)), if (attribute(catid) in [0,100200171,100200130,100300058,100300166,100400141,100200172,100200068,100300121,100300073,100200193,100200192,100300065,100200170,100300169,100300066,100200028], if (attribute(catid) in [0,100200130,100200172,100300121,100300073,100200192,100300065,100300169,100200028], 0.0030863813, 0.0082511598), if (attribute(catid) in [100300014,100200052,100300027,100300116,100200234,100300004,100400038,100300122,100300074], 0.0142405946, 0.0282071621))) + +if (attribute(catid) in [100300058,100300143,100300093,100300005,100300116,100300121,100300019,100300004,100200193,100400038,100300200,100300045,100300076,100200055], if (attribute(catid) in [100300143,100300121,100300019,100200193,100300076,100200055], -0.031432122, -0.0099657936), if (attribute(catid) in [0,100200171,100200130,100300166,100200186,100400141,100300165,100200052,100200053,100400142,100300073,100200192,100300065,100300122,100300127,100400079,100200170,100300169,100400080,100200087,100300074,100200176,100200028,100200067,100300006], if (attribute(catid) in [100400141,100200052,100200192,100300122,100300127,100200087,100300074,100300006], -0.0018808186, if (attribute(catid) in [0,100200171,100200186,100300165,100200053,100400142,100300065,100300169,100200028,100200067], 0.0034558143, 0.0071443084)), if (attribute(catid) in [100300011,100300014,100300013,100300077,100200034,100200172,100200068,100300027,100200234,100200054,100300212,100300066,100300007,100300146], 0.011374724, 0.0365678279))) + +if (attribute(catid) in [100200130,100300013,100300077,100300166,100200034,100400141,100300165,100200172,100200068,100300004,100400142,100200054,100400037,100300212,100300127,100400079,100400080,100300074,100300066,100200028,100200067,100200055,100300006], if (attribute(catid) in [100200034,100300165,100300004,100200054,100300127,100200028,100200067,100200055,100300006], if (attribute(catid) in [100200034,100300004,100300127,100200028,100200067,100200055], -0.0176698057, -0.0096668582), if (attribute(catid) in [100200130,100300013,100300077,100300166,100400142,100400037,100300212,100400079], -0.0070606716, -0.0037781209)), if (attribute(catid) in [0,100200171,100300058,100200186,100200052,100300102,100300027,100200053,100300073,100200193,100200192,100300065,100300122,100300169,100200087,100200176,100300076,100200232], if (attribute(catid) in [0,100300058,100200052,100300102,100300027,100200192,100300065,100300122,100200232], 0.0055156165, 0.0073414677), if (attribute(catid) in [100300005,100300008,100300032,100300121,100200234,100300126,100400038,100200170,100300200,100200185], 0.0138952295, 0.0286522384))) + +if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100300143,100200186,100300165,100300093,100300102,100300005,100200172,100300008,100300027,100300116,100300121,100200234,100200053,100300004,100200054,100400038,100200192,100400079,100300169,100400080,100200087,100300007,100200176,100300200,100300045,100200055,100300006,100300214], if (attribute(catid) in [100300011,100300014,100300143,100300093,100300102,100200172,100200054,100400079,100200087,100300007,100300200,100300045,100200055,100300006,100300214], if (attribute(catid) in [100300014,100300093,100300102,100200054,100300007,100300200,100200055,100300006,100300214], -0.0266674639, -0.0102733938), if (attribute(catid) in [0,100200171,100300077,100300027,100200053,100300004,100200192,100300169,100400080,100200176], -8.805496E-4, 0.0030248341)), if (attribute(catid) in [100300058,100200034,100400141,100200052,100300126,100300073,100300065,100300212,100300122,100300127,100200170,100300076,100200185,100200232,100300146], if (attribute(catid) in [100300058,100400141,100300126,100300073,100300065,100300212,100300122,100200185], 0.0088319892, 0.0145181522), if (attribute(catid) in [100200068,100400142,100200193,100300074,100300066], 0.02038035, 0.0412794221))) + +if (attribute(catid) in [100300011,100300014,100300013,100300077,100300143,100200186,100300093,100300005,100300008,100300032,100300121,100200054,100200192,100300122,100300007,100300200,100200067,100200055,100200185,100200232], if (attribute(catid) in [100300143,100300005,100300008,100300032,100200054,100300007,100200067,100200055,100200232], -0.0313843116, -0.0090786448), if (attribute(catid) in [0,100200171,100200130,100300058,100400141,100300165,100200052,100300102,100200172,100200068,100300116,100300004,100400142,100300073,100200193,100400038,100300065,100300127,100400079,100300169,100300066,100200176,100300045,100200028,100300006], if (attribute(catid) in [0,100200171,100200130,100200052,100300116,100400038,100400079,100300169,100300045,100200028], 0.0019399364, 0.0057587234), if (attribute(catid) in [100300166,100300027,100200234,100200053,100200170,100200087,100300074,100300076], 0.011806527, 0.0240753548))) + +if (attribute(catid) in [100300011,100300014,100200034,100300165,100300093,100300032,100300121,100300019,100300004,100400142,100400038,100300122,100300127,100200170,100300074,100200176,100300200,100200067,100200055,100300006,100200232,100300146], if (attribute(catid) in [100300011,100200034,100300093,100300032,100300019,100400038,100200176,100200067,100200055], -0.029749012, if (attribute(catid) in [100300165,100300004,100300200,100300006,100200232,100300146], -0.0129734582, -0.0048159123)), if (attribute(catid) in [0,100200171,100200130,100300058,100300077,100300166,100200186,100400141,100200052,100300102,100300005,100200172,100200068,100300027,100300116,100200234,100200053,100200054,100300073,100400037,100200193,100200192,100300065,100300169,100400080,100300066,100300045,100200028,100300076,100200185], if (attribute(catid) in [0,100200130,100300058,100300077,100300166,100200186,100300102,100300116,100200053,100200192,100300065,100300045,100200028], if (attribute(catid) in [100300077,100300166,100200186,100300116,100200192], -4.494225E-4, 0.0030538822), if (attribute(catid) in [100200171,100400141,100200234,100300073,100400037,100300169,100300066], 0.0068726028, 0.0116359714)), if (attribute(catid) in [100300143,100300126,100400079,100200087], 0.0222596119, 0.0442934684))) + +if (attribute(catid) in [100300011,100300077,100200186,100400141,100300165,100200052,100300005,100200172,100300008,100200068,100300032,100300116,100300019,100400142,100200054,100200193,100400038,100300209,100300127,100200170,100300169,100300074,100200176,100300045,100200067,100200055,100200232,100300146], if (attribute(catid) in [100300011,100300008,100200068,100300032,100300019,100200193,100300074,100200176,100200067,100200055,100200232,100300146], if (attribute(catid) in [100300032,100300019,100200193,100300074,100200176,100200067,100200055,100200232,100300146], -0.0304543117, -0.0136959974), if (attribute(catid) in [100400141,100400142,100300127,100200170], -0.0090390793, -0.0027839113)), if (attribute(catid) in [0,100200171,100200130,100300014,100300058,100300166,100200034,100300027,100300121,100200234,100200053,100300004,100300073,100200192,100300122,100400079,100400080,100200087,100300066,100300200,100200028,100300076], if (attribute(catid) in [0,100300058,100300027,100200234,100200053,100300004,100300073,100200192,100400080,100300066,100300200], 0.0035175195, 0.0082798864), if (attribute(catid) in [100300093,100300126,100300065], 0.0167800289, 0.0355577197))) + +if (attribute(catid) in [0,100200130,100300013,100300077,100300166,100300143,100200186,100200052,100300005,100200172,100200234,100200053,100300019,100300004,100400142,100200054,100400037,100200193,100300212,100300127,100400079,100300169,100400080,100200087,100300074,100300066,100300007,100200176,100300200,100300045,100300076,100200067,100200185,100300006,100200232,100300146], if (attribute(catid) in [100300013,100200186,100200234,100300019,100300004,100200054,100200193,100300212,100400080,100200087,100200176,100300200,100200067,100200185,100200232,100300146], if (attribute(catid) in [100300013,100300019,100200193,100300212,100200176,100200067,100200185,100200232], -0.0351013956, -0.0146983415), if (attribute(catid) in [0,100300077,100300143,100300005,100200053,100300169,100300074,100300066,100300007,100300045,100300076,100300006], if (attribute(catid) in [100300077,100300143,100300169,100300074,100300007,100300045], -0.0036169246, 6.508121E-4), 0.0039348871)), if (attribute(catid) in [100200171,100300011,100300058,100400141,100300165,100300093,100200068,100300032,100300027,100300121,100300126,100300073,100400038,100200192,100300065,100200170], if (attribute(catid) in [100300058,100400141,100300165,100300027,100300121,100300065,100200170], 0.0092359739, 0.0139160873), if (attribute(catid) in [100300014,100200034,100300122,100200028], 0.0220286224, 0.0419934945))) + +if (attribute(catid) in [100300013,100300143,100300005,100300116,100300019,100300004,100200054,100300065,100300212,100300209,100200087,100200232,100300214], if (attribute(catid) in [100300143,100300019,100300065,100300209,100200232,100300214], -0.0342436123, -0.0130043453), if (attribute(catid) in [0,100200171,100200130,100300014,100200186,100400141,100200052,100200172,100200068,100200053,100400142,100300073,100400037,100200193,100400038,100300122,100400079,100200170,100300007,100300045,100200185,100300006], if (attribute(catid) in [100200130,100300014,100400141,100200172,100200068,100400142,100400037,100200193,100300007,100300006], -0.0025589925, if (attribute(catid) in [100200171,100300073,100400038], 0.0013996109, 0.0034393713)), if (attribute(catid) in [100300011,100300077,100300166,100200034,100300165,100300093,100300027,100200192,100300169,100300066,100300200,100200028,100300076], if (attribute(catid) in [100300077,100300166,100200192,100300169,100300066,100200028], 0.0071225815, 0.0104333907), 0.0168963783))) + +if (attribute(catid) in [100300011,100300014,100300058,100300013,100300143,100300027,100200053,100300019,100300126,100400037,100300169,100400080,100200176,100300076,100200067,100200055,100200185,100300214,100300146], if (attribute(catid) in [100300011,100300058,100300013,100300143,100300019,100400037,100200176,100200067,100200055,100200185,100300214], -0.0290173523, -0.0086034947), if (attribute(catid) in [0,100300077,100300166,100200034,100200186,100400141,100300165,100200052,100300093,100200172,100200068,100300116,100200234,100300004,100400142,100300073,100400038,100200192,100300065,100300209,100300127,100400079,100200170,100200087,100300074,100300045,100200232], if (attribute(catid) in [100300077,100200186,100400141,100300093,100200172,100200068,100300004,100300065,100300074], -7.979247E-4, if (attribute(catid) in [0,100300166,100300165,100200052,100300045], 0.0031637733, 0.0058373245)), if (attribute(catid) in [100200171,100200130,100300005,100300008,100300121,100300066,100300007,100200028,100300006], 0.0142240118, if (attribute(catid) in [100200054,100200193,100300122], 0.0228053439, 0.0584841669)))) + +if (attribute(catid) in [100300143,100300165,100300093,100300008,100300116,100200234,100300004,100300126,100200054,100400037,100200193,100400038,100300212,100300122,100200170,100300066,100300007,100200055,100200232,100300146], if (attribute(catid) in [100300008,100300116,100300126,100300212,100300007,100200055,100200232,100300146], -0.0246224156, if (attribute(catid) in [100300143,100300165,100200234,100300004,100400037,100300122], -0.0104770173, -0.0059185929)), if (attribute(catid) in [0,100200171,100300011,100200130,100300014,100300077,100300166,100200172,100300121,100200053,100300073,100300127,100400079,100200028], if (attribute(catid) in [0,100200171,100200130,100300014,100300077,100300121,100200053,100300073], if (attribute(catid) in [100200171,100200130,100300077,100200053,100300073], 0.0010797418, 0.0024531718), 0.0064407369), if (attribute(catid) in [100200186,100400141,100200052,100300102,100200068,100300027,100300019,100400142,100200192,100300065,100300169,100400080,100300200,100300076,100300006], 0.0120768393, if (attribute(catid) in [100300058,100200034,100300209,100200087,100300074], 0.018583513, 0.0301892716)))) \ No newline at end of file diff --git a/searchlib/src/test/files/ranking08.expression b/searchlib/src/test/files/ranking08.expression new file mode 100644 index 00000000000..e2bab6c082c --- /dev/null +++ b/searchlib/src/test/files/ranking08.expression @@ -0,0 +1,5 @@ +if (CT$ in ["Wiki","Web","Image","Video","Finance"], if (SDSF_LOCAL < 0.6359952986, if (CT$ in ["Image","Video"], -0.1846455351, -0.0057844764), -0.4039473684), 0.2900655347) + +if (CT$ in ["Wiki","Web","Image","Video","KGMovie","Finance","Timezone"], -0.079079733, if (CT$ in ["Local","Q2A"], if (SDSF_LOCAL < 0.5348491371, -0.0304336373, 0.2401947405), 0.373999153)) + +if (CT$ in ["Web","Image","Video","Timezone"], -0.0572267897, if (CT$ in ["Wiki","Local","KGMovie"], if (SDSF_LOCAL < 0.4078139514, -0.0295648159, 0.1601345785), 0.2612064355)) + +if (CT$ in ["Image","Video","Timezone"], -0.1103244788, if (CT$ in ["Wiki","Web","Local","KGMovie"], if (QPSCOREFOR_KG_PEOPLE < 0.9930000007, 0.0194079789, -0.2056829336), 0.1987635246)) + +if (CT$ in ["Image","Video","Event","Timezone"], if (SDSF_WEB < 0.3725785315, -0.0680975953, -0.2264832978), if (CT$ in ["Wiki","Web","Local","Q2A"], 0.010592822, 0.1366891795)) \ No newline at end of file diff --git a/searchlib/src/test/files/s-expression.vre b/searchlib/src/test/files/s-expression.vre new file mode 100644 index 00000000000..13719ad411a --- /dev/null +++ b/searchlib/src/test/files/s-expression.vre @@ -0,0 +1 @@ +if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0238095, if (POS_20 < 0.5, if (TERM_CASE_3 < 0.5, 0.0320624, 0.0456067), -0.0294118)), -0.0011905), -0.0138889), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0167606, -0.0306452), -0.0182927), -0.0271429), -0.022549)), 0.0455737), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0399584, -0.0283827), if (CHUNKTYPE < 0.5, if (LENGTH < 11.5, -0.027549, -0.0189895), -0.0031792)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0441547, -0.0346561), -0.0471542))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.026149, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0416028, 0.030199), -0.0316177)), -0.0029114), -0.0143587), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0178874, -0.0223683), -0.0192148), -0.0298584), -0.0213989)), 0.0439902), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0421426, -0.0308258), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0277896, -0.0182189), -0.0040314)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0426529, -0.0330161), -0.0457574))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0225608, if (TERM_CASE_4 < 0.5, 0.0401168, 0.0290235)), -0.0415782), -0.0031138), -0.0153018), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0154691, -0.0262869), -0.0244862), -0.0298703), -0.0205396)), 0.0433266), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0380008, -0.0275714), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0269285, -0.0167566), -0.0042621)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0419571, -0.0321239), -0.0449586))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.027675, if (TERM_CASE_4 < 0.5, 0.0388147, 0.0278569)), -0.0018196), -0.0152118), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0161232, -0.0204323), -0.0198792), -0.0240534), -0.0196779)), 0.0421041), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0407405, -0.0293697), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0261933, -0.0163427), -0.0035442)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.041222, if (CONCEPTTYPE < 0.5, -0.0369189, -0.026805)), -0.0438784))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0293154, if (POS_20 < 0.5, if (TERM_CASE_3 < 0.5, 0.0288701, 0.0416719), -0.0310674)), -3.423E-4), -0.0141918), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0147251, -0.0260802), -0.0173092), -0.0249324), -0.0197797)), 0.0409582), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0370935, -0.0269369), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0254922, -0.0158792), -0.002589)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0397203, -0.0311777), -0.0426463))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196502, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.028683, 0.0168366), -0.0244408), 0.0384706)), -0.0067946), if (POS_10 < 0.5, 0.0107138, -0.0269118)), -0.0152566), -0.0177037), if (STOP_WORD_3 < 0.5, 0.0422887, 0.0287074)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0356967, -0.0254432), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0244727, -0.0159428), -0.0052851)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0387765, -0.0297345), -0.0414916))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0278568, if (TERM_CASE_4 < 0.5, 0.0359876, 0.0261228)), -0.0010129), -0.0123932), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0127637, -0.0176055), -0.0244544), if (POS_11 < 0.5, -0.0122589, -0.0454474))), 0.0392691), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0377529, -0.0273269), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0235671, -0.0154739), -0.0030262)), if (ENTITYPLACETYPE < 0.5, -0.0392548, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0335334, -0.0234854), -0.039021)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0255861, 0.0264454), -0.0328458), 0.0118034), 0.0351067), -0.0249263), -0.0107626), -0.0138085), -0.015843), 0.0387274), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0338899, -0.0247335), if (CHUNKTYPE < 0.5, if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0249901, -0.0160191), -0.0082827), -0.0036917)), if (ENTITYPLACETYPE < 0.5, -0.0382492, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0320091, -0.0216203), -0.038139)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0222249, if (POS_20 < 0.5, 0.0261828, -0.0267141)), 0.01303), 0.0349923), -0.0257607), -0.0103532), if (EXTENDEDTYPE < 0.5, 0.0302186, -0.0189039)), if (POS_11 < 0.5, -0.0104305, -0.0438767)), 0.0380382), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0364953, -0.0259777), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0229979, -0.0142767), -0.0021224)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0359463, -0.0278147), -0.0387552))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0340066, if (GOOD_SYNTAX < 0.5, -0.0200785, 0.0240284)), 0.0014722), -0.0142752), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0135552, -0.0203988), -0.0159875), -0.0252406), -0.0198481)), if (GOOD_SYNTAX < 0.5, -0.035675, 0.0371821)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0347253, -0.0245341), if (CHUNKTYPE < 0.5, -0.0194266, -0.0039127)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.035227, if (CONCEPTTYPE < 0.5, -0.0308237, -0.0206793)), -0.0379667))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.027432, if (POS_20 < 0.5, 0.0273692, -0.0296341)), -0.0014755), -0.0104811), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (CHUNKTYPE < 0.5, 0.0109256, 0.0306595), -0.0185903), -0.023881), -0.0152296)), 0.0358929), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0338809, -0.0243573), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0215008, -0.0132609), -0.0030742)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0341674, if (CONCEPTTYPE < 0.5, -0.0309224, -0.0209575)), -0.0369798))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0189961, if (TERM_CASE_4 < 0.5, 0.0317904, 0.02261)), 0.00149), if (LENGTH < 13.5, 0.0202299, -0.0227074)), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0132167, -0.0277941), -0.0139034), -0.0239364), -0.0178965)), 0.0348111), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0327393, -0.0243069), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.021828, -0.0132156), -0.0040227)), if (ENTITYPLACETYPE < 0.5, -0.0346582, if (ORDER_IN_CLUSTER < 3.5, -0.026238, -0.0340471)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, 0.0229223, -0.0247442), 0.0341614), 2.957E-4), -0.013085), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0103332, -0.0263093), -0.0125637), 0.0295777), -0.0192543), -0.015281)), 0.033784), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.032373, -0.0232716), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0211888, -0.0139731), -0.0030902)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0322893, -0.0256094), -0.0352917))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0314522, 0.0210548), -0.027565), -0.0032209), -0.0104833), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0119084, -0.0213878), -0.0171946), -0.0239646), -0.0162698)), 0.0334209), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0313708, -0.0224933), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0209707, -0.0127074), -0.0035211)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0318281, if (CONCEPTTYPE < 0.5, -0.0285518, -0.0189534)), -0.0344255))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0247167, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, 0.0220308, -0.0338461), 0.0113261), 0.030921)), -0.0077814), -0.0227584), if (EXTENDEDTYPE < 0.5, 0.0290777, -0.0172571)), -0.0148965), if (STOP_WORD_3 < 0.5, 0.0347528, 0.0225359)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0323354, -0.022918), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0201202, -0.0114946), -0.0023924)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0312292, -0.0242211), -0.033578))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0172385, if (POS_20 < 0.5, 0.0240968, -0.023822)), -9.647E-4), -0.010356), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (CHUNKTYPE < 0.5, 0.0071835, 0.0266903), -0.0148186), -0.0227919), -0.012414)), 0.0315926), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0286387, if (TERM_CASE_3 < 0.5, -0.0163821, -0.0270757)), if (CHUNKTYPE < 0.5, -0.0165336, -0.003919)), if (ENTITYPLACETYPE < 0.5, -0.0317233, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0270231, -0.0185894), -0.0309025)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0229224, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0204102, 0.0106909), 0.0289595)), -0.0084357), -0.0200925), -0.0088235), if (POS_11 < 0.5, -0.0102852, -0.0386286)), 0.0315906), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0278104, if (TERM_CASE_3 < 0.5, -0.0167703, -0.0260821)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0221284, -0.0134741), -0.0120638), -0.0017045)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0296597, -0.0229542), -0.0319876))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.020537, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0277037, 0.0200641), -0.023268)), 7.965E-4), -0.0085846), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0103221, -0.0168052), -0.0204982), -0.0164096)), 0.0302062), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0269195, if (TERM_CASE_3 < 0.5, -0.0160655, -0.0265024)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0189328, -0.0107779), -0.0023674)), if (ENTITYPLACETYPE < 0.5, -0.030323, if (ORDER_IN_CLUSTER < 3.5, -0.0219148, -0.0299687)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0214866, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 1.5, 0.0198002, -0.0017229), 0.0288705), -0.0298617)), if (POS_10 < 0.5, 0.009464, -0.0155948)), -0.0093611), -0.0123187), -0.0132916), 0.0301288), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0264592, -0.0187969), if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, if (CONCEPTTYPE < 0.5, -0.0190504, -0.0118572), -0.0047683), -0.001059)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0282433, -0.0221125), -0.0308641))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_20 < 0.5, 0.0194071, -0.0236163), 0.0099873), 0.0270746), -0.0059088), -0.0168318), if (EXTENDEDTYPE < 0.5, 0.0305566, -0.0143625)), if (POS_11 < 0.5, -0.0082314, -0.0370651)), 0.0294435), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0272551, -0.0191542), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0176287, -0.0105903), -0.0017609)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0273562, if (CONCEPTTYPE < 0.5, -0.0247411, -0.0169687)), -0.0301205))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.019051, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, 0.0181601, -0.0227417), 0.0267265)), -0.0024569), if (POS_10 < 0.5, 0.0069209, -0.0141317)), if (EXTENDEDTYPE < 0.5, 0.0319879, -0.0149185)), -0.0128912), 0.0289185), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.02486, if (TERM_CASE_3 < 0.5, -0.0141723, -0.0247693)), if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, -0.0172354, if (TERM_CASE_3 < 0.5, -0.0015111, -0.0152732)), -0.0030835)), if (ORDER_IN_CLUSTER < 3.5, -0.0245878, -0.0292687))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0210502, if (POS_20 < 0.5, 0.019687, -0.0249092)), -0.0043769), if (POS_10 < 0.5, 0.0069284, -0.017452)), if (EXTENDEDTYPE < 0.5, 0.029387, -0.0148346)), -0.0119228), 0.0280001), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0246005, -0.0179612), if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, -0.0166346, if (TERM_CASE_4 < 0.5, -0.0149392, 7.37E-5)), -9.508E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0265787, if (CONCEPTTYPE < 0.5, -0.0246949, -0.0130917)), -0.0288808))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0213783, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0175062, 0.0255312), -0.0300307)), -0.0052822), if (POS_10 < 0.5, 0.0069152, -0.0155832)), -0.0103763), if (POS_11 < 0.5, -0.0065774, -0.0370802)), 0.0274144), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0239849, if (TERM_CASE_3 < 0.5, -0.0140748, -0.0242957)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0166813, -0.0106118), -0.002076)), if (ENTITYPLACETYPE < 0.5, -0.0272019, if (CONCEPTTYPE < 0.5, -0.0253823, -0.0176928)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0175067, -0.0259902), 6.34E-5), 0.027027), -0.0106422), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, 0.0040347, -0.0239956), 0.0338428), 0.0258125), -0.013453)), 0.0265121), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0233444, if (TERM_CASE_3 < 0.5, -0.0124575, -0.0232757)), if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, -0.0146049, -0.0033815), -0.0026193)), if (ENTITYPLACETYPE < 0.5, -0.0267216, if (ORDER_IN_CLUSTER < 3.5, -0.0192017, -0.0261826)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0246778, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, 0.0165603, -0.0254062), 0.0252378)), -0.0035612), if (POS_10 < 0.5, 0.005575, -0.0140081)), if (EXTENDEDTYPE < 0.5, 0.027077, -0.015631)), -0.0120579), 0.0264607), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.025323, -0.0173393), if (CHUNKTYPE < 0.5, -0.0135205, -0.0010786)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.024778, if (CONCEPTTYPE < 0.5, -0.0229531, if (LENGTH < 9.5, -0.0060947, -0.0195776))), -0.0272583))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.01557, 0.0232402), -0.0156369), if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, 0.0144031, 0.0019319), -0.0107302)), -0.0120357), if (STOP_WORD_3 < 0.5, 0.0274022, 0.0170157)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0258868, if (TERM_CASE_4 < 0.5, -0.0196026, -0.013761)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.01608, -0.0091723), 1.449E-4)), if (ENTITYPLACETYPE < 0.5, -0.0255402, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0215057, if (LENGTH < 9.5, -0.0060495, -0.0204416)), -0.0250903)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161394, 0.0178085), -0.0054002), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.00706, -0.0218277), -0.0126976), -0.024607), 0.0347621), 0.0247592), -0.0129957)), 0.0251434), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 9.5, -0.0231928, -0.0162238), if (CHUNKTYPE < 0.5, -0.0129107, -0.0021479)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0236224, if (CONCEPTTYPE < 0.5, -0.0217007, if (LENGTH < 8.5, -0.005477, -0.0192289))), -0.0259407))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0162787, 0.0189037), -0.0244341), 2.038E-4), -0.0050615), if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (EXTENDEDTYPE < 0.5, 0.0135523, 0.0046004), -0.0122361), -0.0158453), -0.0097432)), 0.0242278), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0250713, -0.0167955), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0148256, if (TERM_CASE_1 < 0.5, -0.0099836, 0.0248802)), -0.0024053)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0227493, -0.0179295), -0.0252643))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0193283, 0.01592), -0.0067908), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, 0.0022836, -0.0184071), -0.0398454), 0.0300278), -0.0334954), 0.0209461)), 0.02435), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0244106, -0.0163105), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0172558, -0.0089579), -0.0084621), -0.0026096)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0226243, if (CONCEPTTYPE < 0.5, -0.020762, -0.0122463)), -0.0249212))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0211361, 0.0157079), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_13 < 0.5, if (POS_18 < 0.5, 0.0056327, if (LENGTH < 20.5, 0.0114886, -0.027261)), -0.0388943), -0.0230606), 0.031081), -0.0328684), 0.0216318)), -0.0122586), 0.0238667), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0189804, if (LENGTH < 11.5, -0.0146883, -0.0086626)), if (ENTITYPLACETYPE < 0.5, -0.0106364, -5.685E-4)), if (ENTITYPLACETYPE < 0.5, -0.0238854, if (CONCEPTTYPE < 0.5, -0.0216013, -0.0150148)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.0200161, if (POS_18 < 0.5, 0.0138241, -0.0047691)), -0.0071074), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (EXTENDEDTYPE < 0.5, 0.0127718, 0.003632), -0.0133302), -0.013666), -0.0118496)), if (STOP_WORD_1 < 0.5, 0.0241087, 0.0099508)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0236828, -0.0154686), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.013553, -0.0073624), -6.143E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0217912, if (CONCEPTTYPE < 0.5, -0.020586, -0.012412)), -0.0238811))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, 0.0140879, -4.32E-4), 0.0230054), -0.0089385), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0057379, -0.0224734), -0.0131509), -0.0152052), 0.0214004), -0.013384)), 0.0225425), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0224854, -0.0152947), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0136659, -0.007325), -5.115E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0214652, if (LENGTH < 9.5, -0.013739, -0.0208746)), -0.0237957))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0148701, -0.0229642), 0.00104), 0.0219624), -0.0082162), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0045602, -0.0144154), -0.0193818), 0.029159), 0.0194212), -0.0109387)), 0.0223692), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0197204, if (TERM_CASE_3 < 0.5, -0.0095159, -0.019108)), if (CHUNKTYPE < 0.5, -0.0109369, -0.0018483)), if (ENTITYPLACETYPE < 0.5, -0.0220807, if (CONCEPTTYPE < 0.5, -0.0204406, -0.0145144)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0204859, 0.0130288), -0.0014037), -0.0077292), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0072626, -0.0204056), -0.0110636), -0.0136554), if (POS_11 < 0.5, -0.0062418, -0.0338774))), if (STOP_WORD_1 < 0.5, 0.0227449, 0.0109107)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0213143, -0.0143339), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0128657, -0.0063313), -6.136E-4)), if (ENTITYPLACETYPE < 0.5, -0.0216506, if (CONCEPTTYPE < 0.5, -0.0199993, -0.0140245)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0146672, -0.0018224), -0.0067699), if (POS_11 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 0.001774, -0.0100986), -0.0194597), 0.028292), 0.0153051), -0.0335927)), 0.0216562), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0202377, if (POS_18 < 0.5, -0.0127352, -0.0299783)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0157168, -0.0069759), -0.0070018), -7.154E-4)), if (ENTITYPLACETYPE < 0.5, -0.021247, if (ORDER_IN_CLUSTER < 3.5, -0.0145454, -0.0207958)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0205616, 0.0129939), -0.0044695), if (POS_10 < 0.5, 0.0044668, -0.0129957)), if (EXTENDEDTYPE < 0.5, 0.0293093, -0.0107582)), 0.0198518), -0.0095002), 0.0215686), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0173628, if (TERM_CASE_4 < 0.5, -0.0166649, -0.0029049)), if (CHUNKTYPE < 0.5, -0.010394, -0.0015807)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0195013, if (CONCEPTTYPE < 0.5, -0.0181298, if (LENGTH < 9.5, -0.0041341, -0.0161438))), -0.0219923))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0263639, if (ENTITYPLACETYPE < 0.5, 0.0126362, 0.0201228)), -0.0101454), if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, 0.0105334, 0.0015241), -0.010678)), -0.0083597), 0.0209341), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, if (LENGTH < 7.5, -0.0204873, -0.0145965), -0.0063365), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0125304, -0.0063154), -4.819E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0194133, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0151592, -0.003694), -0.0183897)), -0.0215651))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0137721, -0.0018103), if (LENGTH < 13.5, 0.0275377, -0.0116633)), if (POS_11 < 0.5, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, 9.752E-4, -0.0177054), 0.0258804), 0.0148485), -0.028177)), 0.0198978), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0175399, -0.0113568), if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0121294, 5.33E-4), -0.0048164)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0187967, if (CONCEPTTYPE < 0.5, -0.0169728, if (LENGTH < 9.5, -0.001531, -0.0160204))), -0.0210201))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0203391, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0130113, 0.0203999), -0.0250044)), if (POS_10 < 0.5, 0.0061418, -0.0119255)), -0.007534), if (EXTENDEDTYPE < 0.5, 0.0257184, -0.0117189)), -0.0096893), 0.0206417), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0174476, if (TERM_CASE_3 < 0.5, -0.009257, -0.0183869)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.012927, -0.0063419), -0.0050945)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0186469, -0.0141184), -0.0207074))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161598, if (POS_18 < 0.5, 0.0131595, -5.738E-4)), -0.0065529), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, 0.0020625, -0.0361297), -0.017138), -0.0188942), -0.0316437), 0.0294602), 0.0170694)), 0.0198958), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, -0.014876, -0.0052357), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0129979, -0.0066152), -0.0041822)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.017963, -0.0129878), -0.0203123))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0206834, if (POS_18 < 0.5, 0.0134348, -0.0020072)), if (LENGTH < 13.5, 0.025581, -0.0114808)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0045001, -0.0123061), -0.0198902), -0.0116103), 0.0278925), 0.0156693)), if (STOP_WORD_1 < 0.5, 0.0206524, 0.0087084)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0161757, if (CONCEPTTYPE < 0.5, -0.0128887, -0.0050251)), if (TERM_CASE_4 < 0.5, -0.0099339, -0.0035159)), if (ENTITYPLACETYPE < 0.5, -0.019166, -0.0154052))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0177521, 0.0106682), -0.0053279), -0.0047629), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, 0.0046018, -0.0227924), -0.0169681), -0.0177201), -0.010902), 0.0262335), 0.0149068)), if (STOP_WORD_1 < 0.5, 0.0201364, 0.0073277)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0154863, -0.0078134), if (LENGTH < 17.5, -0.0089874, 0.0010552)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0176944, -0.0126937), -0.0196004))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0156388, 0.0130898), -0.0013247), -0.0067409), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, 8.397E-4, -0.0195198), -0.0259057), 0.0249483), 0.0153473)), 0.0190098), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.015095, if (TERM_CASE_4 < 0.5, -0.0150926, -0.0019354)), if (CHUNKTYPE < 0.5, -0.0087801, -6.083E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0175396, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0148435, -0.0020005), -0.0167526)), -0.0194128))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196083, 0.0120079), -0.0259077), if (LENGTH < 13.5, 0.0212285, -0.0137183)), if (STOP_WORD_2 < 0.5, if (CHUNKTYPE < 0.5, 0.0010741, 0.0180207), -0.0094372)), if (GOOD_SYNTAX < 0.5, -0.0415369, 0.0185524)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0158674, if (TERM_CASE_3 < 0.5, -0.0070903, -0.0162977)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0128093, if (TERM_CASE_4 < 0.5, -0.0094791, 0.00345)), if (STOP_WORD_2 < 0.5, -0.0023198, -0.0292396))), if (ENTITYPLACETYPE < 0.5, -0.0181954, -0.0145461))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0170163, 0.0121323), -0.0042232), if (LENGTH < 13.5, 0.0220881, -0.0102208)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, 7.481E-4, -0.034593), -0.0247906), 0.026535), 0.0140904)), if (STOP_WORD_3 < 0.5, 0.019341, 0.0096431)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.017048, -0.0114585), if (CHUNKTYPE < 0.5, -0.008748, -7.903E-4)), if (ENTITYPLACETYPE < 0.5, -0.0176339, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0141374, -0.0070836), -0.0170066)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0155011, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0116785, 0.0055628), 0.0164792)), -0.035647), -0.0121403), -0.007518), if (EXTENDEDTYPE < 0.5, 0.0221764, -0.0114313)), -0.0088787), 0.0182672), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0175676, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, -0.0115627, if (STOP_WORD_3 < 1.5, 0.0040489, -0.018267)), -0.0284101)), if (CHUNKTYPE < 0.5, -0.0084511, 8.382E-4)), if (ORDER_IN_CLUSTER < 3.5, -0.0145335, -0.0182791))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_20 < 0.5, 0.0104568, -0.0222549), 0.0044133), -0.0119989), -0.0064236), if (LENGTH < 12.5, 0.0257311, -0.010041)), 0.0163182), -0.0104437), if (GOOD_SYNTAX < 0.5, -0.0423132, 0.0178122)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0178496, if (POS_18 < 0.5, -0.0097905, -0.0243957)), if (LENGTH < 17.5, if (CONCEPTTYPE < 0.5, -0.0099414, if (TERM_CASE_1 < 0.5, -0.0047538, 0.0254415)), 0.0010132)), if (ORDER_IN_CLUSTER < 3.5, -0.0145401, -0.0182917))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0171785, if (POS_18 < 0.5, 0.0111746, -0.0020704)), if (LENGTH < 13.5, 0.0239552, -0.0082275)), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 2.53E-4, -0.0181176), -0.0111345), 0.0144392), 0.0277661), 0.0140805)), 0.0177412), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0164346, -0.0111249), -0.0081683), -0.0030415), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.015696, if (LENGTH < 8.5, -0.0076235, -0.0142175)), -0.0179409))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0173813, if (ENTITYPLACETYPE < 0.5, 0.0090219, 0.0165077)), -0.0086629), -0.0049417), if (POS_17 < 0.5, -0.0128903, 0.0094969)), -0.0088222), if (STOP_WORD_3 < 0.5, 0.0184481, 0.0098499)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0162087, -0.0111325), if (STOP_WORD_2 < 0.5, -0.0075486, -0.0226605)), -0.0035147), if (ENTITYPLACETYPE < 0.5, -0.0164968, if (CONCEPTTYPE < 0.5, -0.0152376, if (LENGTH < 8.5, if (ORDER_IN_CLUSTER < 3.5, 6.5E-6, -0.0159595), -0.0131528))))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0140484, if (POS_10 < 0.5, 0.0074606, -0.0102378)), -0.0062586), if (EXTENDEDTYPE < 0.5, 0.0228139, -0.0112505)), -0.007237), if (STOP_WORD_1 < 0.5, 0.0173949, 0.0056298)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0143091, -0.0088984), -0.0052595), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0088795, if (STOP_WORD_1 < 0.5, -0.0020573, -0.0225052)), 0.0010976)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0151987, if (CONCEPTTYPE < 0.5, -0.0138226, -0.0065649)), -0.0174896))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0097569, -0.0031364), if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, 0.0086995, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0030369, 0.0101077), 0.0240326)), -0.0268141)), 0.0161601), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0166546, if (POS_18 < 0.5, -0.0096908, -0.024367)), if (LENGTH < 10.5, if (CONCEPTTYPE < 0.5, -0.0114941, if (TERM_CASE_4 < 0.5, 0.0066344, -0.0084878)), if (TERM_CASE_4 < 0.5, if (CHUNKTYPE < 0.5, -0.0096005, -0.0010291), 0.0025618))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0151401, -0.0108582), -0.0172704))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0095209, -0.0232525), -0.0022657), if (POS_10 < 0.5, if (POS_18 < 0.5, 0.0042226, -0.0093108), -0.0106807)), 0.0140721), -0.0069394), if (STOP_WORD_1 < 0.5, 0.0174307, 0.0080683)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0137755, if (TERM_CASE_3 < 0.5, -0.0059039, -0.0149249)), if (LENGTH < 10.5, -0.0093623, -0.0032503)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0149707, if (CONCEPTTYPE < 0.5, -0.0136787, if (LENGTH < 9.5, -2.229E-4, -0.0136523))), -0.0167219))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, 0.0138732, 0.008398), -0.0259663), -0.0016621), if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_3 < 1.5, 0.0043232, -0.0044986), -0.0084717), -0.0180725), 0.0233834)), if (STOP_WORD_1 < 0.5, 0.0166125, 0.0053323)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0163843, -0.0095754), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, -0.0104948, 0.0012228), -0.0214262), -0.0017735), -0.0045475)), if (ORDER_IN_CLUSTER < 3.5, -0.0129966, -0.0164255))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0228565, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0092807, -0.0232311), -0.0047011), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, if (POS_15 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, 0.0044601, -0.0323762), -0.0190737), -0.0183194), -0.0156696), -0.0241681), 0.0252131)), 0.0145758)), if (STOP_WORD_3 < 0.5, 0.0171706, 0.007469)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0120993, -0.0074616), -0.0031379), if (ENTITYPLACETYPE < 0.5, -0.015522, if (CONCEPTTYPE < 0.5, -0.0139359, -0.0086416)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.0136454, 0.0070859), if (LENGTH < 12.5, 0.0293225, -0.0072539)), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 7.079E-4, -0.0155087), -0.0118841), 0.0221595), 0.0138053)), if (STOP_WORD_1 < 0.5, 0.0159814, 0.0045131)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0131261, if (STOP_WORD_1 < 0.5, -0.0080341, 0.0163991)), if (STOP_WORD_2 < 0.5, -0.0064649, -0.0217399)), -4.319E-4), if (ENTITYPLACETYPE < 0.5, -0.0151802, if (CONCEPTTYPE < 0.5, -0.0136413, -0.0088831)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0164443, 0.0103647), -0.0032517), 0.0012502), -0.0065228), -0.0072638), if (STOP_WORD_3 < 0.5, 0.016558, 0.0080528)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, -0.0141238, -0.0094132), -0.0016775), -0.0220858), -4.456E-4), if (CHUNKTYPE < 0.5, -0.0065359, 0.0026063)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0135472, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0104012, -0.0010905), -0.0128092)), -0.0155384))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0126237, if (POS_19 < 0.5, 0.0068935, -0.0053442)), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, 0.0018569, 0.0136748), -0.0041457), -0.0172089), 0.0214328), -0.0237328)), 0.0149798), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0138775, -0.0084083), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.008515, -0.0033346), 0.0016883)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0138494, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0105677, -1.645E-4), -0.0126896)), -0.0158156))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, 0.0074348, 0.0146558), -0.0052259), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, -3.521E-4, -0.0134149), 0.0103603), 0.0224311), -0.0222263)), 0.0147576), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0140817, -0.00951), -0.0071937), if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, -0.00762, 0.0124743), 0.001789)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0134273, if (CONCEPTTYPE < 0.5, -0.0113881, if (LENGTH < 8.5, -3.79E-5, -0.0102505))), -0.0152726))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.007474, -0.0063134), -0.0071365), 0.0136863), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 6.559E-4, -0.0085589), -0.0160333), 0.0219506), 0.0130889)), 0.0150582), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0146211, if (TERM_CASE_3 < 0.5, if (CHUNKTYPE < 0.5, -0.0094537, 7.042E-4), -0.0138315)), if (CHUNKTYPE < 0.5, -0.00638, 0.0011708)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.010724, -0.0188838), -0.0189123), -0.015))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0229233, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0074292, -0.0028084), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, 9.274E-4, -0.0137277), -0.0310847), -0.0228395), 0.0241976)), 0.013276)), 0.0146802), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0122109, if (TERM_CASE_3 < 0.5, -0.0058119, -0.0129394)), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 26.5, -0.0069115, 0.0097361), -0.0245469), 9.084E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0130609, -0.0081145), -0.0148501))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, 0.011759, 0.006059), -0.0061099), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, 0.0065599, -0.0010844), -0.0220849), 0.0224768)), 0.0140095), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0115323, if (CONCEPTTYPE < 0.5, -0.0091078, if (STOP_WORD_3 < 0.5, 7.263E-4, -0.0201621))), if (TERM_CASE_4 < 0.5, -0.0071175, if (LENGTH < 15.5, 0.0081473, -0.0018688))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0128936, if (CONCEPTTYPE < 0.5, -0.0116723, if (LENGTH < 9.5, -2.605E-4, -0.0106371))), -0.014597))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0219888, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0071866, -0.0033938), 0.0131146), -0.0053445), if (REGEXTYPE < 0.5, -4.858E-4, 0.0216005)), 0.0128922)), if (STOP_WORD_1 < 0.5, 0.0149918, 0.0049231)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0136976, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, -0.0082875, 7.551E-4), -0.0248618), -0.0219885)), -0.0051402), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0125589, if (LENGTH < 9.5, -0.0059241, -0.0123873)), -0.014485))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (TERM_CASE_3 < 0.5, 0.0071765, 0.0139718), -0.0063484), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, 0.002737, -0.0131905), -0.0078928)), if (STOP_WORD_1 < 0.5, 0.0145544, 0.0032937)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0113455, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -0.0077787, -2.138E-4), -0.0191808)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0035509, 0.0077407), if (ENTITYPLACETYPE < 0.5, -0.0111466, -0.0041847))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0126288, if (LENGTH < 9.5, -0.0062011, -0.0122915)), -0.0142805))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0193475, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.0088922, -0.0014413), -0.0243768), if (LENGTH < 13.5, 0.0192753, -0.0066051))), if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0074286, if (EXTENDEDTYPE < 0.5, if (LENGTH < 24.5, 0.0017664, 0.0167767), -0.001159)), 0.0130916)), if (STOP_WORD_3 < 0.5, 0.0147469, 0.0072769)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, -0.0103194, if (CONCEPTTYPE < 0.5, -0.0078001, -0.0022905)), -0.0020796), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, -0.0104979, -0.0178771), -0.0140859))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0137203, 0.0064025), if (GOOD_SYNTAX < 0.5, -0.0224563, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.005608, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, 0.0019291, -0.0051123), 0.0182142), -0.0213983)), 0.012703))), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0113072, if (TERM_CASE_3 < 0.5, -0.004186, -0.0117898)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0089815, -0.0020853), -0.0016058)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0120968, if (CONCEPTTYPE < 0.5, -0.0098191, if (LENGTH < 9.5, 0.0018021, -0.0097175))), -0.0139154))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0103513, 0.0045689), if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0056874, -0.0016569), 0.0192383), -0.0228582)), 0.0133348), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0117483, -0.0062281), if (TERM_CASE_1 < 0.5, if (LENGTH < 18.5, -0.0068632, 0.001308), 0.0063511)), -0.0173735), if (CONCEPTTYPE < 0.5, -0.0035826, 0.0050191)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.011691, if (CONCEPTTYPE < 0.5, -0.0103893, if (LENGTH < 8.5, 0.0018196, -0.0088273))), -0.0136453))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0087542, -0.0270729), -0.0030851), if (LENGTH < 15.5, 0.0129051, -0.0116668)), if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, 0.0030751, -0.0139397), -0.0062987)), if (STOP_WORD_1 < 0.5, 0.0137908, 0.0039076)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, -0.0106848, -0.006511), -0.0025831), if (STOP_WORD_1 < 0.5, -0.0021885, 0.0128339)), if (ENTITYPLACETYPE < 0.5, -0.0130512, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0102829, if (LENGTH < 11.5, -0.00111, -0.0108079)), -0.0125744)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0111577, if (POS_19 < 0.5, 0.0055582, -0.0044153)), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, -0.0023607, -0.0173584), 0.0166121), 0.0095357)), 0.0129467), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.003286, -0.0125259), -0.0056692), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0084425, if (TERM_CASE_4 < 0.5, -0.0065046, 0.004556)), -0.0022756)), -0.0159046), -0.0023817), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0113171, -0.0079431), -0.0133184))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, 0.0050979, 0.0129595), if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0049818, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, -5.948E-4, -0.0160451), -0.0109574)), 0.0184255), 0.0110931)), 0.0127109), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0116395, -0.0070247), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0079596, -0.0017647), if (TERM_CASE_4 < 0.5, -0.0045248, 0.006299))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0113466, if (LENGTH < 7.5, if (CONCEPTTYPE < 0.5, -0.007685, 0.0026876), -0.0099917)), -0.0131104))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (GOOD_SYNTAX < 0.5, -0.0228671, if (LENGTH < 24.5, if (POS_10 < 0.5, 0.0071132, -0.0090998), 0.0134041)), if (STOP_WORD_3 < 1.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.0063903, -0.0050451), -0.0238561), -0.0090155), -0.0075331)), 0.0127866), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0118255, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -0.0042627, -0.0158184), -0.011879)), if (CHUNKTYPE < 0.5, -0.0052357, 0.0027169)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0085676, -0.0166909), -0.0164321), -0.0128791))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0195794, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0066964, -0.0035265), -0.0038897), if (REGEXTYPE < 0.5, if (POS_11 < 0.5, -8.263E-4, -0.0221394), 0.0196917)), 0.0116728)), 0.0126207), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, -0.0091239, -0.0030069), if (LENGTH < 12.5, if (CONCEPTTYPE < 0.5, -0.0075137, if (STOP_WORD_3 < 0.5, 4.698E-4, -0.018769)), if (TERM_CASE_3 < 0.5, 0.0088638, -0.0050337))), if (ENTITYPLACETYPE < 0.5, -0.0120097, if (CONCEPTTYPE < 0.5, -0.0105747, if (LENGTH < 11.5, -0.0033156, -0.0107989))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.024075, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (POS_10 < 0.5, if (POS_13 < 0.5, 0.0071824, -0.0298384), -0.0101318), 0.0140101), if (POS_19 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_20 < 0.5, if (POS_18 < 0.5, 0.006749, -0.0042325), -0.0278094), -0.0064296), -0.0098671))), 0.012667), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0117698, -0.0065941), if (CHUNKTYPE < 0.5, -0.0045716, 0.0020334)), if (ENTITYPLACETYPE < 0.5, -0.0121529, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0070855, 0.0020866), -0.0094693), -0.0111803)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0077284, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 27.5, 0.0037692, 0.0092086), -4.152E-4), -0.0091993), -0.0074325), -0.0068167)), 0.011709), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 17.5, if (CHUNKTYPE < 0.5, -0.0101472, if (STOP_WORD_1 < 0.5, -0.0075701, 0.0192057)), -0.0038737), if (CONCEPTTYPE < 0.5, -0.0054107, if (LENGTH < 25.5, -0.0014838, 0.0156292))), if (ENTITYPLACETYPE < 0.5, -0.0118794, if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, -0.0090476, if (LENGTH < 8.5, 0.0021917, -0.0068113)), -0.0109229)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0227448, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, if (LENGTH < 24.5, 0.0060057, 0.0125722), -0.0101504), if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, 0.0032162, -0.0199493), -0.0217609), -0.006034))), if (STOP_WORD_3 < 0.5, 0.0132183, 0.0057335)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0102233, -0.0063653), 0.002999), if (TERM_CASE_4 < 0.5, -0.0063009, 1.69E-5)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0106072, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0069765, 0.0023083), -0.009208)), -0.0123613))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0173462, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (POS_10 < 0.5, if (POS_13 < 0.5, 0.0057288, -0.0292491), -0.0095924), -0.0243283), -0.0049234), -0.0051067), -0.0204223), 0.0104389)), 0.0118102), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 1.5, -0.0076967, -0.0207207), 0.0012527), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0026614, 0.0062031), -0.0072729)), if (ENTITYPLACETYPE < 0.5, -0.0117006, if (CONCEPTTYPE < 0.5, -0.0099539, if (ORDER_IN_CLUSTER < 3.5, -0.0020873, -0.0096474))))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0097428, 0.0040314), if (REGEXTYPE < 0.5, if (POS_13 < 0.5, if (POS_11 < 0.5, 5.285E-4, -0.0183482), -0.0286601), 0.0195782)), 0.0113925), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0114242, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, -0.0111308, -0.0033015), 0.0042831), -0.0211832), -0.0097492), -0.0186218)), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0065837, -4.198E-4), if (TERM_CASE_3 < 0.5, 0.0057224, -0.0039242))), if (ORDER_IN_CLUSTER < 3.5, -0.0087263, -0.0120898))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_4 < 0.5, 0.0078785, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, 0.00325, -0.0101417), 0.0078278), -0.0054123), -0.0103609), -0.0125887)), -0.0079703), if (STOP_WORD_1 < 0.5, 0.012286, 0.0023518)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 20.5, -0.0085329, -0.0029867), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, -0.0041063, -0.0192657), 0.0027515)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0099399, if (LENGTH < 7.5, -0.0024334, if (CHUNKTYPE < 0.5, -0.0104878, 0.0020285))), -0.0118966))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0184519, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, 0.0038235, 0.0171261), -0.0193481), -0.0053696), 0.0105468)), -0.0252286), if (STOP_WORD_1 < 0.5, 0.0124447, 0.0024796)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, -0.0095294, if (CONCEPTTYPE < 0.5, -0.0076215, -5.182E-4)), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.001379, if (LENGTH < 22.5, -0.0108893, 0.0020565)), if (ENTITYPLACETYPE < 0.5, -0.0103963, -0.0037001))), if (ENTITYPLACETYPE < 0.5, -0.010986, if (ORDER_IN_CLUSTER < 3.5, -0.006132, -0.0105592)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0047532, -0.0064666), 0.0102174), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, -0.0024277, 0.0183349), 0.0070475), 0.0115305)), if (STOP_WORD_1 < 0.5, 0.0121889, 0.003729)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0105445, if (POS_10 < 0.5, -0.0053669, -0.0207266)), if (LENGTH < 24.5, -0.0040706, 0.007517)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0097368, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (CONCEPTTYPE < 0.5, -0.0074429, 0.00157), -0.0102013), 0.0045523)), -0.0115658))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0199225, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0045463, -0.0067636), 0.010348), if (REGEXTYPE < 0.5, if (POS_19 < 0.5, -2.734E-4, -0.0166088), 0.0187737)), 0.0105994)), if (STOP_WORD_1 < 0.5, 0.0121884, 0.0016164)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 9.5, -0.0100509, if (TERM_CASE_3 < 0.5, -0.004709, -0.0101587)), -0.0144494), -0.0128056), if (ORDER_IN_CLUSTER < 3.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, if (CHUNKTYPE < 0.5, -0.0071529, 3.019E-4), 0.0029028), -9.705E-4), -0.0101829))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0133766, 0.0064295), if (POS_11 < 0.5, if (POS_13 < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_10 < 0.5, 0.0064927, -0.0067917), -4.751E-4), -0.0279461), -0.0186938)), -0.0068121), if (STOP_WORD_3 < 0.5, 0.0122787, 0.0050219)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0092979, if (TERM_CASE_3 < 0.5, -0.0018497, -0.0102131)), if (LENGTH < 18.5, if (CONCEPTTYPE < 0.5, -0.0055809, -9.664E-4), 0.0024321)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0069548, -0.0150524), -0.0148514), -0.0111524))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0199523, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0055288, -0.0027995), if (STOP_WORD_3 < 2.5, -9.661E-4, 0.0140189)), 0.0100237)), if (STOP_WORD_3 < 0.5, 0.0120304, 0.0049867)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, -0.0084903, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0055562, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0033318, 0.0045948), -0.0053979), 0.0026091)), -0.0213714)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0095493, if (LENGTH < 7.5, -0.0014674, if (CHUNKTYPE < 0.5, -0.0095668, 0.0028625))), -0.0111755))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0196303, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0030396, 0.0103852), if (POS_11 < 0.5, 8.51E-4, -0.0168136)), 0.0099631)), if (STOP_WORD_1 < 0.5, 0.0115791, 0.0028478)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, -0.0090833, -0.0043938), if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0039915, 2.299E-4), if (ENTITYPLACETYPE < 0.5, -0.0095076, -0.0038278))), -0.0123815), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.009674, if (CONCEPTTYPE < 0.5, -0.0079674, if (LENGTH < 8.5, 0.0032913, -0.0061521))), -0.0112111))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, 0.003502, -0.004849), 0.0095946), 0.0096953), if (POS_11 < 0.5, if (POS_13 < 0.5, 8.188E-4, -0.0274095), -0.0183945)), if (STOP_WORD_1 < 0.5, 0.0112363, 0.0011995)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, -0.006842, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, -0.0037862, 0.0072035), -0.0185828), 0.0016982)), 0.0042357), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.008899, if (CONCEPTTYPE < 0.5, -0.0079921, if (LENGTH < 8.5, 0.0034053, -0.0055219))), -0.010813))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0044217, -0.0254605), -0.0059356), 0.0092415), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 5.27E-4, -0.0127815), -0.0189175), -0.0079803), 0.0194397), 0.0077178)), if (STOP_WORD_3 < 0.5, 0.0113611, 0.0030626)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 17.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0099975, -0.0047484), -0.0039338), -0.0129044), -0.0011493), if (ENTITYPLACETYPE < 0.5, -0.0100912, if (CONCEPTTYPE < 0.5, -0.0088244, -0.0051965)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0172671, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0038607, -0.0070186), 0.0092221), if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, -3.41E-5, -0.0125225), 0.0120708), -0.0207583)), 0.0092476)), if (STOP_WORD_3 < 0.5, 0.0114261, 0.0037361)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0089222, -0.0049235), -0.0035604), -0.0144894), -4.387E-4), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0069498, -0.0138425), -0.0146887), -0.0110439))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.017575, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0046367, -0.0028153), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 16.5, -0.0063287, if (STOP_WORD_3 < 1.5, 0.0019162, -0.0049296)), 0.0088493), 0.0153082)), 0.0093202)), if (STOP_WORD_3 < 0.5, 0.0111777, 0.0040758)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, -0.007783, -0.0044667), 0.0032427), if (LENGTH < 11.5, 0.0058521, -0.0021999)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0069747, -0.0142179), -0.0139126), -0.0104687))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0043383, -0.0045935), 0.0101261), if (POS_19 < 0.5, if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, if (LENGTH < 20.5, -0.0027714, 0.0041583), 0.0115687), -0.0077298), 0.0130931), -0.020191)), if (STOP_WORD_1 < 0.5, 0.0105555, 4.583E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0102799, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -0.0040687, -0.0127478), -0.0107389)), -0.0037055), 6.93E-4), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, -0.0074529, -0.0140807), -0.0105013))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0105702, 0.0034017), if (GOOD_SYNTAX < 0.5, -0.0190164, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (LENGTH < 25.5, 0.0016483, 0.0062856), -0.0067828), if (LENGTH < 13.5, 0.0131435, -0.0107525)), -0.0266499), 0.0084024))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, -0.009195, if (POS_1 < 0.5, -0.0039017, -0.0152131)), 0.0045369), -0.0128674), -9.539E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0085395, if (CONCEPTTYPE < 0.5, -0.007369, -0.002642)), -0.0102687))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0099611, 0.0030696), if (LENGTH < 25.5, 0.0022581, 0.0075257)), if (STOP_WORD_3 < 1.5, -0.0183479, 0.0086277)), -0.0054524), -0.0061213), -0.0057235), 0.0095436), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_1 < 0.5, -0.0072819, 0.0037503), if (ENTITYPLACETYPE < 0.5, -0.0040748, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0025632, 0.0058961), -0.0031178))), -0.0117283), if (ENTITYPLACETYPE < 0.5, -0.0097562, if (CONCEPTTYPE < 0.5, -0.0083735, -0.0045189)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0031857, 0.0086866), if (LENGTH < 24.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 2.5, -0.0038614, 0.0175429), 0.0108061), if (EXTENDEDTYPE < 0.5, 0.0137676, 3.506E-4))), if (STOP_WORD_3 < 0.5, 0.0108081, 0.0040597)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 10.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, -0.0086597, 0.0060508), if (CONCEPTTYPE < 0.5, -0.0061168, 0.0017479)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0016474, 0.0050863), -0.0050703)), -0.0129486), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, -0.006801, -0.0136904), -0.0103789))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0154696, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_3 < 0.5, 0.004912, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_2 < 0.5, 0.0023838, -0.0065138), -0.0112473), 0.0126552)), -0.0049311), if (LENGTH < 15.5, 0.0066143, -0.0100076)), 0.0083943)), if (STOP_WORD_3 < 0.5, 0.0107557, 0.003532)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 24.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.00415, -0.0120295), -0.0054989), -0.0026859), 0.0025229), if (ENTITYPLACETYPE < 0.5, -0.0095499, if (CONCEPTTYPE < 0.5, -0.0083022, -0.0036627)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0038506, -0.0044521), 0.0082738), if (ENTITYPLACETYPE < 0.5, -0.001014, 0.0072563)), 0.0096636), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0102008, if (POS_10 < 0.5, if (ORDER_IN_CLUSTER < 2.5, -0.0047438, -0.0080274), -0.0166333)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0050233, if (TERM_CASE_3 < 0.5, 0.0016495, -0.0057258)), -0.0161277), -0.0086894)), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (TERM_CASE_4 < 0.5, -0.0065386, 0.0025704), -0.0096212), 0.0029606), 0.0107795))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0062343, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, 0.0027147, -0.0024574), 0.0058186), -0.0058367), -0.0094021)), if (STOP_WORD_1 < 0.5, 0.0101689, 0.0017153)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, if (LENGTH < 5.5, -0.005605, -0.0123399), -0.0041545), 0.0029762), -0.0117731), if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0073463, if (CONCEPTTYPE < 0.5, -0.0016419, 0.0165596)), 0.002869)), if (ENTITYPLACETYPE < 0.5, -0.0091448, if (CONCEPTTYPE < 0.5, -0.0079953, -0.0035768)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0047094, if (POS_11 < 0.5, if (REGEXTYPE < 0.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, if (LENGTH < 20.5, if (CHUNKTYPE < 0.5, -0.0025457, 0.0097957), 0.0053202), if (TERM_CASE_4 < 0.5, 0.0085641, -0.0065398)), -0.0122317), 0.0164329), -0.0173095)), 0.0091212), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0094751, if (POS_18 < 0.5, -0.0050056, -0.0167212)), -0.0030324), if (CONCEPTTYPE < 0.5, -0.00178, if (TERM_CASE_3 < 0.5, 0.0193394, 0.0029243))), if (ENTITYPLACETYPE < 0.5, -0.0091874, if (CONCEPTTYPE < 0.5, -0.0072158, -0.0030999)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (TERM_CASE_3 < 0.5, 0.0027316, 0.008725), 0.0088773), if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, if (CONCEPTTYPE < 0.5, 0.0032587, -0.0036558), -0.0051207), 0.0179723)), if (STOP_WORD_1 < 0.5, 0.0096336, 7.218E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, -0.0055952, 0.0012386), -0.0179516), if (TERM_CASE_4 < 0.5, -0.0038354, if (ENTITYPLACETYPE < 0.5, -0.0010097, if (STOP_WORD_3 < 0.5, 0.0134052, -6.04E-5)))), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0054568, -0.0130658), -0.013045), -0.0096278))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (GOOD_SYNTAX < 0.5, -0.0227151, if (LENGTH < 25.5, if (POS_10 < 0.5, 0.0042345, -0.0100974), 0.0118472)), if (POS_19 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0043759, -0.0210411), -0.0071613), -0.0055258), -0.0088723)), if (STOP_WORD_3 < 0.5, 0.0101478, 0.003266)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0087731, -0.0037382), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0053575, -7.922E-4), 5.438E-4)), -0.0108268), if (ENTITYPLACETYPE < 0.5, -0.008961, if (CONCEPTTYPE < 0.5, -0.0076917, -0.003026)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0069296, 0.0031202), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 14.5, -0.0064692, if (STOP_WORD_3 < 1.5, 0.0019488, if (TERM_CASE_4 < 0.5, 0.0085374, -0.0067782))), 0.00637), 0.0180096)), 0.0088053), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 21.5, -0.0061255, -5.129E-4), if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, -0.0048713, 8.81E-5), if (TERM_CASE_3 < 0.5, 0.005674, -0.0027696))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0078337, if (LENGTH < 10.5, if (CONCEPTTYPE < 0.5, -0.0050692, 0.0022481), -0.0075866)), -0.0096561))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0048174, -0.0065418), -0.0039589), -0.0069102), -0.0044218), -0.0249516), if (STOP_WORD_1 < 0.5, 0.0098977, 0.0017658)), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 25.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 0.5, -0.0086001, -0.0023197), -0.0045721), -0.0133756), if (CONCEPTTYPE < 0.5, -0.0057091, -0.0021427)), -0.012263), 0.0037549), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0039936, 0.0016112), 0.0156535)), -0.0095359)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0179962, 0.0047526), if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (ENTITYPLACETYPE < 0.5, -0.0012421, 0.0066496), 0.0111163), -0.0183546)), if (STOP_WORD_1 < 0.5, 0.0096607, 0.0026903)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 13.5, -0.0069672, if (TERM_CASE_3 < 0.5, -3.931E-4, -0.0085633)), if (TERM_CASE_1 < 0.5, -0.0030407, if (LENGTH < 2.5, -0.0186193, 0.0154944))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0075719, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (CONCEPTTYPE < 0.5, -0.0053055, 0.0020692), -0.0077103), 0.0036415)), -0.0091236))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0161771, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (POS_18 < 0.5, if (POS_11 < 0.5, if (POS_10 < 0.5, 0.0034826, -0.007235), -0.0148214), -0.0046901), -0.0259102), if (LENGTH < 12.5, 0.0222894, -0.0085975)), 0.0080958)), -0.0232361), if (STOP_WORD_3 < 0.5, 0.009658, 0.0023804)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0081212, -0.0041938), if (CHUNKTYPE < 0.5, -0.0026353, 0.0030653)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0054007, -0.0117554), -0.0125025), -0.0091279))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0020329, 0.0065302), -0.0049206), 0.0082494), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0079899, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (POS_1 < 0.5, -9.112E-4, -0.0175155), -0.018083), -0.0075432), -0.0158243)), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, if (STOP_WORD_1 < 0.5, -0.0017277, -0.0189726), if (STOP_WORD_3 < 0.5, 0.0125924, -0.0011788)), -0.0053552), 0.004024)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0077747, if (LENGTH < 7.5, -0.0016187, -0.0067493)), -0.0091496))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0039294, -0.0030351), 0.0077582), if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, 0.002138, -0.0103903), -0.0100254), -0.0051292)), 0.0085061), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0077625, -0.0039689), if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, if (LENGTH < 5.5, -0.0033501, -0.0113932), -0.002125), 0.008145)), 0.0011904), -0.0137035), 6.225E-4), if (ENTITYPLACETYPE < 0.5, -0.0083126, if (CONCEPTTYPE < 0.5, -0.0073919, -0.0037914)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0142326, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_20 < 0.5, 0.0036802, -0.018494), -0.0147649), 0.0078761), -0.0071625)), -0.0061183), -0.0051327), if (STOP_WORD_3 < 0.5, 0.0095996, 0.0031133)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0073096, -0.003005), if (TERM_CASE_1 < 0.5, -0.0027642, if (CONCEPTTYPE < 0.5, -7.958E-4, 0.029568))), 8.063E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0071546, if (CONCEPTTYPE < 0.5, -0.0064773, -0.0012095)), -0.0086887))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0138905, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0030357, 0.0095136), if (POS_19 < 0.5, if (POS_20 < 0.5, 0.0016078, -0.0186607), -0.0064311))), if (STOP_WORD_1 < 0.5, 0.0090583, 0.0010345)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 8.5, -0.0095116, if (TERM_CASE_3 < 0.5, -0.0026424, -0.0079867)), -0.002998), if (POS_10 < 0.5, if (ORDER_IN_CLUSTER < 2.5, 0.0094607, -0.0032605), -0.0164541)), -0.0102694), -0.0124184), if (CONCEPTTYPE < 0.5, -0.0039713, -9.36E-5)), -0.0088542)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0054645, if (POS_18 < 0.5, if (POS_19 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0015531, -0.0146532), if (EXTENDEDTYPE < 0.5, 0.0022157, -0.014745)), 0.0057213), if (LENGTH < 13.5, 0.0115641, -0.0118718)), -0.0097261)), if (STOP_WORD_1 < 0.5, 0.0089889, -0.0019033)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0083219, if (TERM_CASE_4 < 0.5, -0.0049572, -6.174E-4)), -0.0014242), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 14.5, -0.004242, -0.0079644), -0.011519), -0.0120036), -0.0086982))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, 0.0074906, if (POS_18 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, 0.0020388, 0.0062093), -0.0047986), -0.0173869), -0.0034556), -0.0104258)), if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.007155, if (CONCEPTTYPE < 0.5, -0.0049511, 0.001242)), 0.0069858), if (TERM_CASE_3 < 0.5, 0.0010511, if (ENTITYPLACETYPE < 0.5, -0.0077005, -0.0017536))), -0.0110892), if (ENTITYPLACETYPE < 0.5, -0.0079187, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 10.5, if (CHUNKTYPE < 0.5, -0.0020188, 0.0204519), -0.0076923), -0.0073604)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0029762, -0.006914), if (POS_17 < 0.5, -0.0083768, 0.008933)), -0.0043244), 0.0066431), -0.0053725), if (STOP_WORD_1 < 0.5, 0.0088945, if (LENGTH < 13.5, 0.0104462, -0.0058015))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0074769, if (STOP_WORD_3 < 1.5, -0.0035436, -0.0121068)), if (CHUNKTYPE < 0.5, -0.0027859, 0.0026616)), 0.0037022), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0046449, -0.0118423), -0.011505), -0.0086359))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0074625, 0.0020221), if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.002583, -0.021274), if (STOP_WORD_1 < 0.5, -0.0102947, 0.0010559))), -0.0230981), 0.0079978), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 12.5, -0.0053582, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.001733, 0.0057119), -0.004411)), 0.0087413), -0.011294), 4.195E-4), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.006941, if (CONCEPTTYPE < 0.5, -0.0058254, if (LENGTH < 9.5, 0.00278, -0.0057821))), -0.0086608))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0074243, 0.0015777), if (POS_18 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0026429, -0.0142038), if (STOP_WORD_1 < 0.5, -0.0102012, 0.001646)), -0.0075344)), if (STOP_WORD_1 < 0.5, 0.0083933, 1.793E-4)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, -0.0047917, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0011668, 0.0046828), -0.005016)), -0.0105396), if (CONCEPTTYPE < 0.5, -0.0014444, 0.0048186)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0068452, if (LENGTH < 10.5, -0.0012332, -0.0074714)), -0.0083915))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0027744, 0.0078097), if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, -7.1E-6, if (LENGTH < 22.5, -0.0014678, -0.0144231)), 0.0167356)), if (STOP_WORD_1 < 0.5, 0.0085575, 7.479E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0096899, -0.0055229), if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_3 < 0.5, -0.004084, -6.323E-4), -0.0120285), -0.0097655)), if (LENGTH < 25.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0044169, 0.0034553), if (LENGTH < 9.5, 0.002177, -0.0037432)), 0.0084802)), -0.0082518)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.018082, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.0031176, if (REGEXTYPE < 0.5, -0.0012278, 0.0168942)), -0.0207908), 0.0068542)), if (STOP_WORD_1 < 0.5, 0.0088343, if (LENGTH < 17.5, 0.0075565, -0.0119716))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0059198, if (STOP_WORD_1 < 0.5, -0.0018112, -0.0145256)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, 4.66E-5, if (STOP_WORD_3 < 0.5, 0.0125508, -7.222E-4)), -0.0040445)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0067296, if (CONCEPTTYPE < 0.5, -0.0059403, -3.154E-4)), -0.0082287))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0130698, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0029368, if (REGEXTYPE < 0.5, -0.0010747, 0.0131924)), 0.0066645)), if (LENGTH < 17.5, 0.0095125, if (STOP_WORD_3 < 0.5, 0.0072248, -0.0043409))), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0069589, if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0021523, -0.0066839), if (TERM_CASE_2 < 0.5, 0.0130775, -0.00491)), -0.0165904), -0.0149927)), -0.0016661), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0045425, -0.0106861), -0.0117066), -0.0080814))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0149992, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0021723, 0.0094263), if (POS_19 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 1.5, 0.0024393, if (STOP_WORD_1 < 0.5, -0.0093972, 0.0027647)), -0.0196084), -0.0062675))), 0.0079505), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 24.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0069596, -0.0027919), -0.0021029), if (TERM_CASE_4 < 0.5, -0.002159, 0.0099606)), -0.0125355), 0.0025012), if (ENTITYPLACETYPE < 0.5, -0.0074841, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, -5.58E-5, -0.005147), -0.0066608)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0167351, if (POS_20 < 0.5, if (POS_13 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, 0.0040521, -0.0028967), -0.0025444), -0.0156449), -0.0252961), -0.0167637)), if (STOP_WORD_1 < 0.5, 0.0082947, 0.0012067)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0055777, 0.0013311), if (TERM_CASE_4 < 0.5, -0.0049957, 0.0023179)), if (LENGTH < 12.5, -0.0021983, if (TERM_CASE_4 < 0.5, 1.283E-4, if (STOP_WORD_3 < 0.5, 0.0135179, 0.0010582)))), if (ENTITYPLACETYPE < 0.5, -0.0075836, if (CONCEPTTYPE < 0.5, -0.0068125, -0.0029257)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.009203, 0.0049422), 0.0012454), if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, -8.88E-5, 0.0034595), if (LENGTH < 13.5, 0.013485, -0.0097365)), -0.0162894), -0.0053687), 0.0134028), 0.0057942)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 22.5, -0.0050367, 4.752E-4), if (CONCEPTTYPE < 0.5, -0.0027557, if (TERM_CASE_1 < 0.5, -1.648E-4, 0.0231566))), if (ENTITYPLACETYPE < 0.5, -0.0073333, if (CONCEPTTYPE < 0.5, -0.0065181, if (LENGTH < 11.5, 5.864E-4, -0.0053126))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.0030356, if (EXTENDEDTYPE < 0.5, if (CONCEPTTYPE < 0.5, 0.0067235, if (LENGTH < 24.5, -0.0023484, 0.0128696)), -0.0023746)), if (STOP_WORD_3 < 0.5, 0.0084655, 0.0019471)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, if (TERM_CASE_2 < 0.5, -0.0089123, -0.0044822), if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0057812, -0.0013194), -0.0070555), if (TERM_CASE_2 < 0.5, 0.0065103, -0.0043464)), -0.0150195), -0.0130033)), if (CONCEPTTYPE < 0.5, -0.0037904, if (LENGTH < 9.5, 0.0036481, -0.0019782))), -0.0079288)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (GOOD_SYNTAX < 0.5, -0.010997, 0.0028358), -0.0130965), -0.0178932), -0.0247088), -0.0050676), if (STOP_WORD_3 < 0.5, 0.0082516, 0.0016223)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (POS_7 < 0.5, -0.0010904, -0.0114916), if (LENGTH < 7.5, -0.009007, -0.0035801)), -0.0012884), -0.0053262), if (CONCEPTTYPE < 0.5, -0.0024507, if (LENGTH < 16.5, 0.0090724, -0.0037266))), -0.0103176), -0.0095315), -0.0079757)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0024261, 0.0064191), -7.473E-4), if (LENGTH < 14.5, 0.0096266, if (STOP_WORD_3 < 0.5, 0.0069862, -0.0024304))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0073758, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.004182, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, 6.003E-4, -0.0154268), 0.0192921)), -0.0093216)), if (LENGTH < 25.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0037909, if (LENGTH < 9.5, 0.0023692, -0.0038749)), -0.0123673), if (LENGTH < 14.5, 0.0065791, -0.0031951)), 0.0097134)), -0.0075453)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0127214, if (POS_18 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, 0.0042794, -0.0149223), -0.0059644), -0.00434)), if (POS_17 < 0.5, -0.00853, 0.0083197)), -0.0154023), 0.0072666), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 26.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0073252, -0.0046927), if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0042392, 0.00568), if (STOP_WORD_1 < 0.5, 0.0016813, -0.0151251)), -0.0054961)), 0.0036687), if (LENGTH < 10.5, 0.0095857, -0.0017664)), -0.0075218)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0040851, if (POS_18 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, 5.489E-4, -0.0032057), if (EXTENDEDTYPE < 0.5, 0.0097395, 0.00173)), -0.0095617)), 0.0071657), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.002139, -0.0067802), if (TERM_CASE_3 < 0.5, if (LENGTH < 12.5, -0.0029803, if (ENTITYPLACETYPE < 0.5, 1.964E-4, if (LENGTH < 15.5, 0.0169786, 0.0010571))), -0.0036293)), -0.0158293), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0041655, -0.0076776), 4.427E-4), -0.0101598), -0.0074614))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0030204, if (STOP_WORD_3 < 2.5, if (CHUNKTYPE < 0.5, -0.0020253, 0.0061075), 0.0099431)), if (CHUNKTYPE < 0.5, 0.0052019, 0.0090719)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (LENGTH < 12.5, if (STOP_WORD_3 < 1.5, -0.0038004, -0.0136729), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -5.417E-4, 0.0076114), -0.0052943)), if (CONCEPTTYPE < 0.5, -8.815E-4, if (LENGTH < 15.5, 0.0130198, -3.208E-4))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0058622, if (LENGTH < 9.5, if (LENGTH < 4.5, -0.0084541, if (CONCEPTTYPE < 0.5, -0.0014463, 0.0069624)), -0.005469)), -0.0077466))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0073904, 8.639E-4), if (POS_18 < 0.5, if (LENGTH < 21.5, if (LENGTH < 16.5, 0.0015875, -0.0035187), 0.0028162), -0.0077068)), if (STOP_WORD_1 < 0.5, 0.0079288, 3.816E-4)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0051656, -8.779E-4), if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (CONCEPTTYPE < 0.5, -0.0024381, if (TERM_CASE_1 < 0.5, 6.55E-5, 0.0229835)), -0.01508), 0.0037615)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, -0.0044291, 1.517E-4), -0.0106843), -0.0103762), -0.007465))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 19.5, 0.0078365, 0.0033459), if (GOOD_SYNTAX < 0.5, -0.0166694, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_13 < 0.5, if (LENGTH < 12.5, 0.0044238, if (LENGTH < 25.5, -8.241E-4, 0.0029538)), -0.024156), -0.0088242), 0.0064533))), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 7.5, if (LENGTH < 3.5, if (LENGTH < 2.5, -0.0099841, 0.0077372), -0.0051692), if (TERM_CASE_4 < 0.5, -0.0026828, -7.36E-5)), -0.0169365), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0059379, -0.0085801), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0057262, -0.0027351), 0.0057282)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (GOOD_SYNTAX < 0.5, -0.012704, if (POS_20 < 0.5, if (POS_10 < 0.5, if (CONCEPTTYPE < 0.5, 0.001787, 0.0048038), -0.0084645), -0.0167773)), -0.0154151), if (LENGTH < 16.5, 0.0038769, -0.0108723)), if (STOP_WORD_3 < 0.5, 0.0076338, 7.876E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, -0.0062637, -0.002461), if (TERM_CASE_4 < 0.5, -0.0028453, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, 6.745E-4, 0.011796), -0.0077363))), if (ENTITYPLACETYPE < 0.5, -0.0069103, if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0061307, -0.0028226), 0.004202)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0065966, 7.641E-4), if (POS_18 < 0.5, if (POS_19 < 0.5, if (LENGTH < 21.5, -3.54E-5, 0.0030288), -0.0058081), -0.0089103)), if (STOP_WORD_1 < 0.5, 0.0075961, -0.0015237)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0014982, -0.0064615), if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0023943, 0.0017694), if (LENGTH < 14.5, -0.0011234, -0.0061581))), -0.0092837), 0.0021767), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0034896, -0.0093722), -0.0108028), -0.0071782))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 20.5, 0.0093178, 0.0024853), 0.0023846), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0016443, 0.0073395), 0.015653)), if (STOP_WORD_1 < 0.5, 0.0074044, -0.0028423)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 12.5, -0.0038378, if (TERM_CASE_3 < 0.5, 0.0011006, -0.0035059)), if (CONCEPTTYPE < 0.5, -1.936E-4, 0.0206681)), -0.0087056), if (TERM_CASE_4 < 0.5, -0.0012193, 0.0046032)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.005725, if (CONCEPTTYPE < 0.5, -0.0046372, 2.139E-4)), -0.007143))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 17.5, 0.0079157, if (STOP_WORD_3 < 0.5, 0.0048937, -7.541E-4)), if (POS_18 < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0012577, -0.0125994), -0.0050607), if (POS_19 < 0.5, 0.0056225, -0.012138)), -0.0083839)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0027176, -0.0065451), if (POS_10 < 0.5, if (POS_18 < 0.5, if (POS_1 < 0.5, -6.922E-4, -0.0115436), -0.0110793), -0.0154559)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.004107, 7.045E-4), -0.0096248), -0.0103169), -0.0071264))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, 0.0030903, if (POS_11 < 0.5, if (LENGTH < 17.5, -0.0024268, if (EXTENDEDTYPE < 0.5, 0.0062833, 2.239E-4)), -0.0151462)), -0.0228616), if (STOP_WORD_1 < 0.5, 0.0076768, if (LENGTH < 15.5, 0.0080515, -0.0094696))), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0074728, if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (POS_1 < 0.5, -0.0035588, -0.0110619), if (CHUNKTYPE < 0.5, 4.08E-4, 0.0166155)), -0.0135994), -0.0077268)), if (CHUNKTYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, -0.0012327, -0.0043065), if (LENGTH < 11.5, 0.0112089, 2.74E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, 0.0028562, -0.0028131), -0.0038413), -0.0176516), -0.0047792), 0.0066566), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.005756, -0.0017927), 0.0054392), if (CONCEPTTYPE < 0.5, -0.0023315, if (TERM_CASE_1 < 0.5, 6.15E-5, 0.0245159))), -0.0107661), 0.0029584), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0055612, if (LENGTH < 7.5, if (LENGTH < 4.5, -0.0053042, 0.0035985), if (CHUNKTYPE < 0.5, -0.006498, 0.0055994))), -0.0069048))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0048521, 0.0016563), if (REGEXTYPE < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 16.5, -0.0050001, -8.071E-4), 0.0052943), 0.0146182)), if (STOP_WORD_1 < 0.5, 0.0072708, -0.0014882)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, -0.0021294, -0.0078412), if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, -0.0011999, -0.0151139), -0.0051213)), -0.008518), -0.0103242), if (CONCEPTTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0035994, 0.0012388), if (STOP_WORD_3 < 1.5, 8.365E-4, -0.014637))), -0.0069901)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0181604, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0017905, -0.0051128), 0.0056444), -0.0015534), 0.0180978), 0.0056899)), 0.0068409), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0050839, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -0.0023704, 0.0013993), -0.0104769)), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -2.8E-6, 0.004893), -0.0030825)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0055769, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0030359, 0.0039209), -0.0053592)), -0.0070033))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0042003, if (POS_18 < 0.5, 0.0011643, -0.008305)), if (STOP_WORD_1 < 0.5, 0.0071109, -8.943E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (ENTITYPLACETYPE < 0.5, -0.0064374, -0.0022824), if (TERM_CASE_4 < 0.5, -0.0023902, if (LENGTH < 12.5, -0.0015923, if (LENGTH < 15.5, if (ENTITYPLACETYPE < 0.5, 0.0025889, 0.0166472), 8.685E-4)))), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 9.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0015311, -0.0069653), -1.135E-4), if (LENGTH < 25.5, -0.0052679, 0.0054241)), -0.0090196), -0.0098286), -0.0067572))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0147789, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, 0.0024208, -0.0175133), -0.0047134), if (EXTENDEDTYPE < 0.5, 0.0183169, -0.0075819)), -0.0160729), 0.0057837)), if (STOP_WORD_3 < 0.5, 0.0076398, if (LENGTH < 14.5, 0.0077263, -0.0059873))), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0056282, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, 4.266E-4, -0.0118752), -0.0073642)), -6.525E-4), 0.0032924), -0.0153252), if (ORDER_IN_CLUSTER < 3.5, -0.0045245, -0.006825))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 17.5, 0.0086494, 0.0046036), 4.573E-4), if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 23.5, 6.8E-4, 0.004053), -0.0043166), -0.0099531)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0055884, -0.0018455), if (STOP_WORD_3 < 1.5, 0.005074, -0.0084121)), if (LENGTH < 17.5, if (LENGTH < 5.5, 0.002617, -0.0017765), 0.0040191)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0055967, 0.0035883), if (LENGTH < 10.5, if (CHUNKTYPE < 0.5, -0.0018811, 0.0220776), -0.0055489)), -0.0067699))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0022766, if (STOP_WORD_3 < 2.5, if (STOP_WORD_2 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, -2.481E-4, 0.0085536), -0.0109575), -0.0063742), 0.0090221)), if (STOP_WORD_1 < 0.5, 0.0068809, -0.0013795)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0060405, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 12.5, -0.003122, 9.681E-4), -0.0081619), -0.0056294)), if (LENGTH < 5.5, 0.0033079, if (LENGTH < 6.5, -0.0088247, if (CHUNKTYPE < 0.5, -0.0011805, 0.0029874)))), if (ENTITYPLACETYPE < 0.5, -0.0060608, if (ORDER_IN_CLUSTER < 3.5, -0.0026822, -0.0055386)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 1.5, 0.0022037, if (STOP_WORD_3 < 2.5, -0.0056497, 0.0110546)), -0.0189511), -0.0058411), -0.0044991), 0.0062943), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 8.5, -0.0074056, if (TERM_CASE_3 < 0.5, -0.0016832, -0.0071036)), if (CHUNKTYPE < 0.5, -0.0012072, 0.0130635)), -0.0015317), -0.0094688), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, -0.0010929, if (CHUNKTYPE < 0.5, -0.0049078, 1.558E-4)), 0.0064741)), -0.0087076), -0.0066274)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 18.5, 0.0074476, 0.003019), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 11.5, 0.0084097, 4.69E-5), -0.0027729), if (LENGTH < 31.5, 0.0054712, -2.334E-4)), 0.0048071)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 10.5, if (ENTITYPLACETYPE < 0.5, -0.005338, if (LENGTH < 9.5, -8.877E-4, -0.0081681)), if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, 0.0010318, -0.0164364), -0.0034169)), -0.0082132), if (TERM_CASE_4 < 0.5, -8.338E-4, 0.0039692)), if (ENTITYPLACETYPE < 0.5, -0.0062033, if (CONCEPTTYPE < 0.5, -0.0048085, -0.0017919)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0128902, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (POS_10 < 0.5, 0.0019635, -0.0096927), 0.0093435), 5.47E-5)), if (STOP_WORD_1 < 0.5, 0.0070635, -8.492E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0062557, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_3 < 0.5, -0.0011822, -0.0067641), -0.0099856), if (LENGTH < 21.5, -0.0085105, -4.627E-4)), if (LENGTH < 9.5, 0.0084725, -0.0016164)), -0.0126015)), if (CHUNKTYPE < 0.5, -0.0017249, if (LENGTH < 11.5, 0.0112513, 7.338E-4))), -0.0065256)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0117586, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 8.047E-4, -0.0137932), 0.0050686)), -0.0193129), if (STOP_WORD_1 < 0.5, 0.0066615, if (LENGTH < 13.5, 0.0087308, -0.007798))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (LENGTH < 6.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0019189, -0.0082834), -0.0025075), if (ORDER_IN_CLUSTER < 2.5, -0.0010267, -0.0033121)), -0.0083573), -0.0084762), if (LENGTH < 10.5, 0.0094157, if (CONCEPTTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0044572, 0.002104), 0.0042305))), -0.0064498)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 0.0027241, -0.011797), -0.0041164), -0.0042097), if (EXTENDEDTYPE < 0.5, 0.002821, if (LENGTH < 20.5, 0.0025468, -0.0096094))), if (STOP_WORD_3 < 0.5, 0.0068954, 9.877E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0018671, -0.0075502), -0.0020919), if (POS_10 < 0.5, if (POS_18 < 0.5, -4.131E-4, -0.0108001), -0.0145574)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0037169, 0.0016967), -0.0089877), -0.009429), -0.0063806))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (LENGTH < 26.5, if (TERM_CASE_4 < 0.5, 0.003292, -4.955E-4), 0.0035947), -0.0150524), if (STOP_WORD_1 < 0.5, 0.0065514, -0.002053)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0078049, -0.003239), if (POS_18 < 0.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0024005, -0.0061275), 0.0025085), -0.0116573), -0.0106497)), -0.0078103), if (LENGTH < 25.5, if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, -0.0015786, -0.011678), if (LENGTH < 11.5, 0.0093368, -3.592E-4)), -0.0052594), 0.0084376))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_18 < 0.5, 0.0028103, -0.0049653), if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (CONCEPTTYPE < 0.5, 0.0018444, -0.0051396), if (LENGTH < 20.5, 1.64E-4, -0.0104176)), 0.0069841)), 0.0061372), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 9.5, if (TERM_CASE_4 < 0.5, -0.0022878, -0.0069758), -6.493E-4), if (CONCEPTTYPE < 0.5, -0.0021318, 0.0040021)), if (STOP_WORD_3 < 0.5, -0.0054074, 0.0013722)), if (LENGTH < 9.5, 0.0115688, -4.597E-4)), -0.0087464), -0.007742), -0.006447)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, if (CONCEPTTYPE < 0.5, 0.0014661, 0.0044539), -0.0077322), -0.0057684), -0.0043545), 0.0056773), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 9.5, -0.0065427, if (TERM_CASE_3 < 0.5, -6.373E-4, -0.0065177)), if (POS_7 < 0.5, -1.41E-4, -0.0108137)), if (POS_10 < 0.5, 0.0037478, -0.0116254)), -0.0074044), -0.0094093), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, -0.0014513, 0.0023516), -0.0034451), 0.0042849)), -0.0061508)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0061685, 5.15E-5), if (POS_19 < 0.5, 4.874E-4, if (LENGTH < 13.5, 0.0130888, -0.0097763))), 0.0060914), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.0054016, if (POS_18 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, 1.586E-4, -0.0133983), -0.0043648), -0.0116817)), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0030641, 0.0011094), if (TERM_CASE_1 < 0.5, if (LENGTH < 7.5, -0.0037408, 0.0033386), 0.0264346))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0049021, if (LENGTH < 10.5, -6.125E-4, -0.0049327)), -0.0062874))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, if (LENGTH < 28.5, 0.0024083, 0.006158), -0.0186422), if (LENGTH < 12.5, 0.0214161, -0.0063568)), -0.0137909), if (STOP_WORD_3 < 2.5, if (LENGTH < 20.5, 4.861E-4, if (STOP_WORD_2 < 0.5, -0.0057808, -0.0221523)), 0.0075378)), 0.0058266), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0057981, -0.0024142), if (LENGTH < 25.5, if (LENGTH < 5.5, 0.002449, if (LENGTH < 6.5, -0.0079902, -7.445E-4)), 0.0083487)), if (ENTITYPLACETYPE < 0.5, -0.0056205, if (CONCEPTTYPE < 0.5, -0.0047229, -0.0016518)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, 0.0021222, -0.0059273), -0.0042369), -0.0039936), 0.005264), -0.0050727), if (STOP_WORD_3 < 0.5, 0.0065396, 0.0013351)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, 3.97E-5, -0.0046735), if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0022059, if (CONCEPTTYPE < 0.5, -5.327E-4, 0.0060757)), -0.0034931)), -0.0076612), if (CONCEPTTYPE < 0.5, -4.815E-4, 0.0054866)), if (ENTITYPLACETYPE < 0.5, -0.0056475, if (CONCEPTTYPE < 0.5, -0.0045053, -0.0017705)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0055781, if (TERM_CASE_2 < 0.5, 0.0034575, -0.0054404)), if (LENGTH < 21.5, -9.944E-4, if (STOP_WORD_3 < 1.5, if (LENGTH < 32.5, 0.0056431, -0.0010274), -0.0050648))), -0.0047221), if (STOP_WORD_1 < 0.5, 0.0061927, -0.0023092)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0039235, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, -0.0038252, if (LENGTH < 13.5, 0.0059502, -0.0021774)), 9.908E-4)), if (LENGTH < 11.5, 0.0076676, -3.522E-4)), if (ENTITYPLACETYPE < 0.5, -0.0058481, if (CONCEPTTYPE < 0.5, -0.0050177, -7.061E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, 0.0010284, 0.0066736), 0.0065223), if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (LENGTH < 12.5, 0.0050687, if (STOP_WORD_2 < 0.5, -1.839E-4, -0.0065509)), -0.0117254), 0.0077351)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 7.5, -0.0070956, if (TERM_CASE_3 < 0.5, -0.002042, -0.0070271)), if (POS_7 < 0.5, 2.952E-4, -0.0105202)), if (CONCEPTTYPE < 0.5, -0.0023623, -3.16E-5)), if (CHUNKTYPE < 0.5, -9.757E-4, 0.0108694)), -0.00811), -0.0074097), -0.0057322)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0109055, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 7.382E-4, -0.0130298), 0.0048386)), 0.0058612), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 7.5, -0.0073629, -0.0041134), if (STOP_WORD_2 < 0.5, -0.0010755, -0.0093889)), -0.0092831), if (STOP_WORD_1 < 0.5, -0.0021651, 0.0174822)), if (LENGTH < 25.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, if (STOP_WORD_3 < 0.5, -0.002658, -0.0112811), if (ORDER_IN_CLUSTER < 2.5, 0.0021644, -0.0053512)), if (LENGTH < 9.5, 0.002741, -0.0016955)), 0.0075261)), -0.0060907)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 0.0011045, -0.0049646), if (STOP_WORD_1 < 0.5, 0.0056989, -0.0010321)), -0.0234215), -0.0139392), 0.0049608), -0.0096252), if (STOP_WORD_1 < 0.5, 0.0066785, -3.264E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, -0.0033941, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (LENGTH < 12.5, -0.0012222, if (STOP_WORD_3 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0023237, 0.0108239), -8.082E-4)), -0.0136204), if (STOP_WORD_3 < 0.5, -0.0032519, 0.0039417))), if (ORDER_IN_CLUSTER < 3.5, -0.0039092, -0.0059505))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0020916, -0.0038108), -0.0049058), if (LENGTH < 22.5, 6.65E-4, -0.0140022)), if (STOP_WORD_1 < 0.5, 0.0063283, -0.0013054)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, if (STOP_WORD_1 < 0.5, -0.0055597, if (STOP_WORD_3 < 1.5, 0.0025758, -0.0082833)), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_2 < 0.5, -2.99E-4, -0.0089974), -0.0052529), 0.0029078)), if (CONCEPTTYPE < 0.5, -0.002184, if (LENGTH < 11.5, if (STOP_WORD_2 < 0.5, 0.0036504, -0.0149534), if (LENGTH < 25.5, -0.0028139, 0.0095076)))), -0.0059727)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, if (GOOD_SYNTAX < 0.5, -0.0187617, 0.0012127), 0.0078167), if (STOP_WORD_3 < 0.5, 8.129E-4, -0.0025046)), if (STOP_WORD_1 < 0.5, 0.0061633, -0.0021292)), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0054452, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (TERM_CASE_3 < 0.5, -0.0010183, -0.0047755), -0.0092639), -0.0073327)), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 12.5, -8.616E-4, 0.0093598), -0.0073599), -0.0032817), if (LENGTH < 14.5, 0.0059709, -0.0035062))), 0.0037668), -0.0056394)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, 0.0058724, 0.0022579), if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, if (ENTITYPLACETYPE < 0.5, 0.0010854, 0.0045481), -0.0035701), -0.0060972)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 24.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 0.5, -0.0057599, 1.844E-4), -0.0016271), 0.0032631), -7.803E-4), -0.014377), if (CONCEPTTYPE < 0.5, 5.08E-5, if (LENGTH < 16.5, if (ENTITYPLACETYPE < 0.5, 0.023955, 0.0050936), -0.0025106))), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0042898, -0.0067438), if (CONCEPTTYPE < 0.5, -0.0041399, -1.466E-4)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (POS_19 < 0.5, if (POS_13 < 0.5, if (STOP_WORD_3 < 1.5, if (POS_10 < 0.5, 0.0023084, -0.01229), -0.0022428), -0.0229772), -0.0044172), 0.0146678), 0.005348), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 25.5, if (STOP_WORD_3 < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0047668, 8.196E-4), if (POS_7 < 0.5, -5.52E-4, -0.0108814)), -0.0088699), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0031281, if (TERM_CASE_3 < 0.5, 0.0027361, -0.003712)), if (LENGTH < 11.5, 0.0102952, -6.221E-4))), -0.009679), 0.003716), -0.0057841)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 22.5, 0.0069161, 0.0026916), 2.773E-4), if (POS_18 < 0.5, if (LENGTH < 25.5, if (GOOD_SYNTAX < 0.5, -0.0152795, 2.679E-4), 0.0034702), -0.0067681)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, if (TERM_CASE_4 < 0.5, -0.0018963, -0.0060569), 5.77E-5), -0.0069136), if (POS_10 < 0.5, 0.0037451, -0.010911)), -0.006892), -0.0086871), -0.006907), if (CONCEPTTYPE < 0.5, if (LENGTH < 11.5, if (LENGTH < 7.5, -0.0023858, -0.005575), -0.0012976), 3.127E-4))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.013416, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0017878, if (REGEXTYPE < 0.5, -0.001748, 0.0137358)), 0.0050504)), if (STOP_WORD_3 < 0.5, 0.0062387, 3.375E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (LENGTH < 9.5, -0.0061554, if (TERM_CASE_3 < 0.5, -9.229E-4, -0.0054569)), if (POS_7 < 0.5, 4.028E-4, -0.0099663)), if (POS_10 < 0.5, 0.0041051, -0.011965)), -0.0067372), -0.0089207), if (ORDER_IN_CLUSTER < 2.5, -3.53E-5, if (LENGTH < 10.5, -4.665E-4, -0.0050129))), -0.0060444)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (POS_11 < 0.5, if (LENGTH < 27.5, 5.492E-4, 0.0033153), -0.0136214), -0.0050055), if (STOP_WORD_3 < 0.5, 0.0061088, 4.413E-4)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0058789, if (STOP_WORD_1 < 0.5, -0.0038891, if (CHUNKTYPE < 0.5, if (STOP_WORD_3 < 1.5, -5.614E-4, -0.0076478), 0.0158819))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0024318, 0.0043387), 0.0028647), -0.0112887), -0.0034375), if (LENGTH < 11.5, 0.0110138, 3.4E-6)), if (LENGTH < 13.5, -0.005825, 0.0018065)))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, 0.0020399, if (POS_17 < 0.5, -0.0079739, 0.0077704)), -0.0045273), -0.0042705), if (GOOD_SYNTAX < 0.5, -0.044834, 0.005528)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.00137, -0.0072287), -0.0019477), if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -8.339E-4, if (LENGTH < 6.5, 2.63E-4, 0.0072602)), if (ORDER_IN_CLUSTER < 2.5, -0.0010374, -0.0052903))), if (POS_10 < 0.5, 0.0027254, -0.0107426)), -0.0075967), -0.0070029), -0.0058943)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, 7.304E-4, 0.0047063), 0.0053245), if (STOP_WORD_3 < 2.5, -0.0013843, 0.0071936)), if (CHUNKTYPE < 0.5, 0.0039065, 0.0069589)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, -8.39E-4, -0.0061029), if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 7.354E-4, -0.0132291), -0.0108053), -0.0041196)), 8.29E-5), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0044098, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0021542, if (LENGTH < 5.5, -0.0047024, 0.0085462)), -0.0039158)), -0.0056911))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 23.5, 0.0054057, -0.0050707), if (LENGTH < 21.5, if (LENGTH < 16.5, 6.25E-5, -0.0037585), if (STOP_WORD_3 < 1.5, 0.0025946, -0.004957))), -0.0147982), 0.0037873), if (STOP_WORD_1 < 0.5, 0.0060617, -6.732E-4)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, if (ENTITYPLACETYPE < 0.5, -0.0034023, if (CONCEPTTYPE < 0.5, if (LENGTH < 17.5, -0.0023813, 0.0122696), if (TERM_CASE_1 < 0.5, 0.0010462, 0.0245056))), 0.0026042), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, -0.0024896, -0.0075246), -0.0082288), -0.0053715))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0140922, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0012705, if (CHUNKTYPE < 0.5, if (REGEXTYPE < 0.5, -0.0030832, 0.0106479), 0.0069127)), 0.0043758)), if (STOP_WORD_3 < 0.5, 0.0057028, 6.454E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0048533, if (STOP_WORD_1 < 0.5, -0.00315, if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, if (LENGTH < 9.5, 0.0077806, -0.0014497), 0.0176083), -0.0072975))), if (CONCEPTTYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0024389, 0.0038358), if (LENGTH < 9.5, 0.0032891, if (LENGTH < 25.5, -0.0017344, 0.0116658)))), -0.0057079)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0099419, 0.0018142), if (TERM_CASE_2 < 0.5, -0.0090956, 0.0048625)), -0.0084599), if (STOP_WORD_1 < 0.5, 0.0060414, -0.0010435)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (EXTENDEDTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, if (CHUNKTYPE < 0.5, -0.0056868, if (CONCEPTTYPE < 0.5, -0.003887, 0.0064235)), if (POS_1 < 0.5, if (POS_7 < 0.5, 2.073E-4, -0.0106819), -0.0075795)), if (CHUNKTYPE < 0.5, -0.0012641, 0.0167626)), if (CHUNKTYPE < 0.5, -0.0011626, 0.0025646)), 0.0046914), -0.0069173), -0.0056397)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (TERM_CASE_4 < 0.5, 0.003112, if (LENGTH < 21.5, if (LENGTH < 9.5, 0.0075376, -0.0010629), if (POS_19 < 0.5, 0.0026077, -0.0099125))), -0.0224772), -0.0146677), -0.0187589), 0.0051532), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0042885, -7.172E-4), if (STOP_WORD_3 < 1.5, 0.0055999, -0.0083559)), -0.005241), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 9.5, if (CONCEPTTYPE < 0.5, -0.0014591, 0.0034826), if (LENGTH < 11.5, -0.0047794, -8.561E-4)), 0.0034623), -0.0040649))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 22.5, 0.0064964, 0.0017795), 3.438E-4), if (LENGTH < 27.5, if (GOOD_SYNTAX < 0.5, -0.0140793, 2.06E-4), if (EXTENDEDTYPE < 0.5, 0.007975, 0.0010111))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, if (LENGTH < 2.5, -0.0119005, if (TERM_CASE_4 < 0.5, 0.0066, if (ENTITYPLACETYPE < 0.5, -0.0059626, 0.0012436))), -0.004783), if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0020343, 5.341E-4), 0.0031655), -0.012697)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0023857, -0.0080862), -0.0079322), -0.0054873))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 19.5, 0.0060748, 0.0021505), if (ENTITYPLACETYPE < 0.5, -4.995E-4, 0.0029317)), if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (LENGTH < 25.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (LENGTH < 9.5, if (TERM_CASE_4 < 0.5, if (POS_1 < 0.5, -0.0015543, -0.0082744), -0.0066599), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (ORDER_IN_CLUSTER < 2.5, 0.0013058, -0.0033382), -0.0077107), -0.0062193)), if (STOP_WORD_3 < 1.5, if (CHUNKTYPE < 0.5, 0.0028224, 0.0161777), -0.007837)), if (CONCEPTTYPE < 0.5, -0.0021692, if (LENGTH < 11.5, 0.0035302, -0.002672))), 0.0030665), -0.0121954), -0.0054196)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (POS_11 < 0.5, if (POS_19 < 0.5, 0.0016966, -0.0032049), -0.0130873), -0.0175838), if (STOP_WORD_1 < 0.5, 0.0057807, -0.0019965)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 25.5, if (POS_1 < 0.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_2 < 0.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (STOP_WORD_3 < 0.5, -0.0057523, -3.969E-4), 0.0038301), -0.0016762), if (POS_7 < 0.5, 1.834E-4, -0.0095058)), if (LENGTH < 14.5, if (LENGTH < 9.5, 0.0135111, 0.0017529), if (LENGTH < 17.5, -0.0068988, -9.56E-5))), -0.0066534), 0.0024084), -0.0062302), -0.0051074)) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0105326, if (POS_11 < 0.5, 0.0018235, -0.0123765)), -0.0047604), if (LENGTH < 15.5, 0.0066237, -0.0072323)), -0.0156029), if (STOP_WORD_1 < 0.5, 0.0058986, -0.0018743)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, -0.0042957, if (POS_18 < 0.5, if (STOP_WORD_1 < 0.5, -0.0014716, if (POS_10 < 0.5, if (TERM_CASE_4 < 0.5, 0.0020924, 0.0152342), -0.0103134)), -0.0112)), 2.418E-4)), if (ENTITYPLACETYPE < 0.5, -0.0050267, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, 0.0016477, -0.0029925), -0.0043674))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0128497, if (POS_20 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 32.5, if (CHUNKTYPE < 0.5, 0.003191, -0.0010209), -0.004492), if (REGEXTYPE < 0.5, -0.0015492, 0.0137018)), 0.0037597), -0.0168394)), 0.0051081), if (CHUNKTYPE < 0.5, if (LENGTH < 7.5, -0.0030254, if (POS_10 < 0.5, -5.306E-4, -0.0122115)), if (LENGTH < 15.5, if (CONCEPTTYPE < 0.5, 0.0015788, if (LENGTH < 13.5, 0.0047772, 0.0255504)), if (LENGTH < 22.5, -0.0033459, 0.0067153)))), if (ENTITYPLACETYPE < 0.5, -0.0047279, if (CONCEPTTYPE < 0.5, -0.003954, -1.26E-5))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_13 < 0.5, 0.0014096, -0.0221129), if (LENGTH < 15.5, 0.0065941, if (STOP_WORD_3 < 0.5, 0.0042286, -0.0046691))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (POS_1 < 0.5, if (TERM_CASE_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0047686, if (STOP_WORD_3 < 1.5, 0.0031787, -0.0074552)), if (STOP_WORD_2 < 0.5, if (POS_7 < 0.5, 1.46E-4, -0.0088521), -0.0083276)), -0.0078294), 0.002584), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.00239, 0.0035981), 0.0015887), -0.0097062), -0.0033672), 0.0028288)), -0.0054668)) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_20 < 0.5, if (CONCEPTTYPE < 0.5, if (POS_19 < 0.5, 3.872E-4, -0.0048465), if (LENGTH < 25.5, 0.0015733, 0.0129061)), -0.0186174), if (STOP_WORD_3 < 0.5, 0.0054222, if (LENGTH < 13.5, 0.0074576, -0.0044036))), if (LENGTH < 8.5, -0.0025464, if (POS_10 < 0.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (LENGTH < 11.5, -4.7E-4, 0.0026154), -0.0094897), if (ENTITYPLACETYPE < 0.5, -0.0052109, 6.52E-5)), -0.0127982))), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0027834, 0.0020855), -0.0069906), -0.0081609), -0.0052273)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_20 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, -2.659E-4, 0.0040309), if (REGEXTYPE < 0.5, -0.0018569, 0.0107891)), -0.0151089), 0.0037686), if (LENGTH < 15.5, 0.0066249, 0.0035574)), if (CHUNKTYPE < 0.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, -0.004748, if (ORDER_IN_CLUSTER < 2.5, -0.0014364, -0.0044737)), if (ORDER_IN_CLUSTER < 3.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, if (CONCEPTTYPE < 0.5, -9.599E-4, 0.0020235), -0.0108906), -0.0036368), if (LENGTH < 13.5, -0.0053509, 0.0014776))), if (STOP_WORD_1 < 0.5, if (ENTITYPLACETYPE < 0.5, -0.0023834, 0.0031195), 0.0111238))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 16.5, 0.0064385, 0.0031692), if (LENGTH < 13.5, 0.005946, -0.002465)), if (GOOD_SYNTAX < 0.5, -0.0127792, if (ENTITYPLACETYPE < 0.5, if (REGEXTYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CHUNKTYPE < 0.5, 0.0022705, if (LENGTH < 22.5, -0.0042644, 0.0042572)), -0.0017904), 0.0117519), 0.0041327))), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 7.5, if (LENGTH < 5.5, if (ENTITYPLACETYPE < 0.5, -0.0043873, 0.0032623), -0.0047969), if (POS_10 < 0.5, if (POS_18 < 0.5, -3.178E-4, -0.0094845), -0.0118449)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0038449, -0.006342), -0.0026466))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, if (CHUNKTYPE < 0.5, 0.0017729, 0.0119581), if (STOP_WORD_3 < 0.5, if (TERM_CASE_3 < 0.5, if (CHUNKTYPE < 0.5, if (EXTENDEDTYPE < 0.5, 0.0053641, 6.043E-4), if (LENGTH < 24.5, -0.0042042, 0.0050383)), 0.0042807), -0.0012371)), if (CHUNKTYPE < 0.5, 0.0034585, 0.0064023)), if (ENTITYPLACETYPE < 0.5, if (LENGTH < 11.5, -0.0045099, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 2.905E-4, -0.0040922), -0.0044686)), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0027731, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, 0.0021029, -0.0118245), if (LENGTH < 10.5, 0.0033777, -0.0043585))), 0.0021061))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 9.693E-4, 0.0055008), if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, 7.057E-4, -0.0044153), -0.0096905), 0.0071207)), 0.0044157), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (POS_1 < 0.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.0055987, -9.953E-4), if (POS_18 < 0.5, if (POS_7 < 0.5, if (TERM_CASE_3 < 0.5, -4.215E-4, if (STOP_WORD_3 < 0.5, -0.0045512, 0.0072483)), -0.0107516), -0.0095758)), -0.00816), if (TERM_CASE_4 < 0.5, -0.0022175, if (CONCEPTTYPE < 0.5, -0.0010487, if (STOP_WORD_1 < 0.5, 0.0042406, -0.0102225)))), -0.0049476)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_19 < 0.5, if (POS_18 < 0.5, if (TERM_CASE_4 < 0.5, 0.0037161, if (REGEXTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.0016857, if (LENGTH < 24.5, -0.0040707, 0.0027269)), 0.0133696)), if (LENGTH < 24.5, -2.64E-4, -0.0132598)), -0.004131), if (STOP_WORD_1 < 0.5, 0.0052308, -0.0012387)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, -0.0039487, if (CONCEPTTYPE < 0.5, -0.0024748, 0.0038684)), 1.023E-4), -0.0062713), 0.0014277), if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_1 < 0.5, -0.0032143, 0.0020178), -0.0047324), -0.0076371))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (POS_19 < 0.5, 0.0016398, -0.0034922), if (LENGTH < 34.5, if (LENGTH < 13.5, 0.0014083, if (LENGTH < 14.5, -0.0142889, if (TERM_CASE_2 < 0.5, if (STOP_WORD_3 < 1.5, -0.0015192, -0.006168), 0.0040361))), 0.0040342)), if (STOP_WORD_1 < 0.5, 0.0050596, -0.0022307)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 18.5, if (ENTITYPLACETYPE < 0.5, if (CHUNKTYPE < 0.5, -0.0032075, 1.902E-4), if (CONCEPTTYPE < 0.5, -0.0018731, if (TERM_CASE_1 < 0.5, if (LENGTH < 7.5, -0.005371, 0.0013392), 0.0230781))), 0.0012825), if (ENTITYPLACETYPE < 0.5, -0.0047609, if (CONCEPTTYPE < 0.5, -0.0033201, -9.118E-4)))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (EXTENDEDTYPE < 0.5, if (LENGTH < 25.5, 0.0010085, 0.0069518), if (LENGTH < 22.5, 0.001243, if (POS_18 < 0.5, -8.336E-4, -0.0122694))), -0.0070642), if (STOP_WORD_3 < 0.5, 0.0053898, -1.218E-4)), if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 8.5, -0.0039162, -4.959E-4), -0.0052243), 0.0046194), if (CONCEPTTYPE < 0.5, -6.389E-4, 0.0022515))), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (POS_1 < 0.5, -0.0023654, -0.0074924), -0.007877), if (LENGTH < 14.5, 0.0061774, -0.0073295)), -0.0049754)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, 0.0017629, if (REGEXTYPE < 0.5, if (STOP_WORD_2 < 0.5, -6.818E-4, if (LENGTH < 22.5, -0.0015196, -0.0125621)), 0.011708)), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.003987, -0.0033875), 0.0062574)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0035205, -7.65E-4), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, -3.039E-4, 0.0116126), -0.0071126)), if (TERM_CASE_3 < 0.5, 9.175E-4, -0.0011282)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0036746, if (CHUNKTYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0050641, -3.915E-4), 0.0063316)), -0.0051913))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 2.5, -3.26E-4, 0.0083876), 0.0029369), if (STOP_WORD_3 < 0.5, 0.0053767, -1.916E-4)), if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_2 < 0.5, if (LENGTH < 26.5, if (POS_1 < 0.5, if (STOP_WORD_1 < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 6.5, -0.0036536, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, -0.0015087, if (LENGTH < 15.5, 0.0062319, -9.102E-4)), if (ENTITYPLACETYPE < 0.5, -0.0053479, -0.0010366))), 0.0049837), if (LENGTH < 14.5, -0.0021675, -0.0061561)), if (POS_10 < 0.5, 0.0022445, -0.0092867)), -0.0062426), 0.0029227), -0.0058051), -0.005008)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, -1.613E-4, if (STOP_WORD_3 < 0.5, 0.0054156, if (LENGTH < 20.5, -0.0033028, 0.0093069))), if (STOP_WORD_3 < 0.5, 0.0053995, 1.395E-4)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (POS_10 < 0.5, if (CHUNKTYPE < 0.5, if (LENGTH < 11.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 9.5, -0.0028307, -0.0075842), if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -0.0014079, if (LENGTH < 8.5, -0.0054515, -4.853E-4)), 0.0021386)), if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 0.0028417, -0.0015154), -0.002849)), if (STOP_WORD_1 < 0.5, -1.69E-4, 0.0092206)), -0.0100777), -0.0072433), -0.0046655)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, 0.0028838, if (LENGTH < 20.5, if (LENGTH < 15.5, 1.312E-4, -0.0031039), if (STOP_WORD_3 < 1.5, 0.0026176, if (LENGTH < 25.5, -0.0151727, -0.0015702)))), if (STOP_WORD_1 < 0.5, 0.0050691, -0.0028786)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 21.5, -0.0011859, 0.0020553), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (EXTENDEDTYPE < 0.5, -0.004109, 0.0049013), -0.0062146), if (LENGTH < 23.5, if (CONCEPTTYPE < 0.5, -0.0033266, if (LENGTH < 11.5, if (LENGTH < 4.5, -0.0058635, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, 0.011381, 3.091E-4), -0.0022451)), -0.0040578)), 0.0121527)))) + if (ORDER_IN_CLUSTER < 2.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_19 < 0.5, if (STOP_WORD_3 < 2.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, 0.0017708, if (LENGTH < 19.5, -0.0134732, if (LENGTH < 24.5, 0.0160392, -0.0107184))), if (STOP_WORD_2 < 0.5, -7.538E-4, -0.0112972)), 0.0098581), -0.0063261), 0.004338), if (LENGTH < 24.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.003957, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 1.5, if (STOP_WORD_1 < 0.5, -6.735E-4, 0.0054652), -0.0078441), -0.0049023)), 1.8E-6), 0.0035565)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0037295, -0.0060866), if (CHUNKTYPE < 0.5, -0.0028149, 0.0071955))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_19 < 0.5, if (POS_10 < 0.5, if (POS_20 < 0.5, if (GOOD_SYNTAX < 0.5, -0.0084904, if (LENGTH < 9.5, 0.0073514, 0.0019155)), -0.0147088), -0.0070136), -0.0044873), if (TERM_CASE_2 < 0.5, -0.010124, 0.0029707)), if (STOP_WORD_3 < 0.5, 0.0052352, if (LENGTH < 15.5, 0.0054464, -0.0068859))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_1 < 0.5, -0.001685, if (CONCEPTTYPE < 0.5, -0.0033985, 0.0170294)), 0.0011249), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0037196, -0.0059769), if (CONCEPTTYPE < 0.5, -0.003476, if (LENGTH < 11.5, 0.0010272, -0.0037164))))) + if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (ENTITYPLACETYPE < 0.5, -8.59E-5, 0.0032628), 0.0049911), if (LENGTH < 22.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 9.5, -0.0032104, 7.004E-4), -0.00509), if (CONCEPTTYPE < 0.5, -6.639E-4, 0.0018133)), 0.002738)), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, -0.0100488, if (POS_13 < 0.5, 0.0084514, -0.0191446)), -0.0075867)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0034391, if (CHUNKTYPE < 0.5, if (LENGTH < 8.5, if (LENGTH < 5.5, -0.00378, if (CONCEPTTYPE < 0.5, -0.0012524, 0.008936)), -0.0035542), 0.0063396)), -0.004791)) + if (ORDER_IN_CLUSTER < 1.5, if (POS_10 < 0.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 0.0014145, -0.0117675), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.0035616, -0.0045089), 0.0060398)), -0.0087104), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 10.5, if (LENGTH < 5.5, if (TERM_CASE_4 < 0.5, if (CONCEPTTYPE < 0.5, -0.0026657, 0.0125798), if (CONCEPTTYPE < 0.5, 0.0016637, -0.0046097)), -0.0026974), if (TERM_CASE_4 < 0.5, -0.0012374, if (LENGTH < 19.5, 0.0031377, -0.0015069))), if (ENTITYPLACETYPE < 0.5, if (EXTENDEDTYPE < 0.5, -0.0047287, 0.0015157), if (CONCEPTTYPE < 0.5, -0.0034802, if (LENGTH < 4.5, -0.0067615, if (LENGTH < 7.5, 0.0044384, -0.0012433)))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (STOP_WORD_2 < 0.5, 0.0013871, -0.0037106), if (LENGTH < 22.5, if (STOP_WORD_3 < 0.5, -0.0100153, 0.0191959), -0.0108371)), if (STOP_WORD_1 < 0.5, 0.0050918, -0.0016045)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 7.5, if (TERM_CASE_2 < 0.5, -0.00572, -0.0020626), if (STOP_WORD_1 < 0.5, if (LENGTH < 11.5, -0.0041242, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, 3.635E-4, -0.0055527), -0.0039023)), if (STOP_WORD_3 < 1.5, if (LENGTH < 9.5, 0.0108656, 8.607E-4), -0.0059998))), -0.0057206), if (CHUNKTYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, -2.344E-4, -0.0026647), 0.0031043))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_3 < 0.5, if (TERM_CASE_4 < 0.5, 0.0034454, if (CHUNKTYPE < 0.5, if (LENGTH < 32.5, 0.002382, -0.0055036), if (LENGTH < 22.5, -0.0045405, 0.0047748))), -0.0013219), 0.0044944), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 12.5, -0.0027877, if (TERM_CASE_3 < 0.5, 6.657E-4, -0.0040928)), if (STOP_WORD_3 < 0.5, 2.023E-4, -0.0037834)), if (ORDER_IN_CLUSTER < 3.5, if (LENGTH < 4.5, -0.0058118, if (POS_1 < 0.5, if (LENGTH < 9.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, -3.175E-4, -0.0048892), 0.0014306), if (LENGTH < 26.5, -0.0036394, 0.0072831)), -0.0070347)), -0.0044785))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 18.5, 0.0049755, 5.047E-4), if (REGEXTYPE < 0.5, -1.256E-4, 0.0095468)), if (CHUNKTYPE < 0.5, 0.0027465, 0.0059089)), if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 8.5, -0.003476, if (TERM_CASE_3 < 0.5, if (POS_18 < 0.5, if (POS_10 < 0.5, 7.516E-4, -0.0103132), -0.0095231), if (STOP_WORD_3 < 0.5, -0.0052481, 0.0088106))), if (LENGTH < 17.5, -2.658E-4, 0.0034851)), if (ORDER_IN_CLUSTER < 3.5, if (POS_1 < 0.5, if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0020572, 0.0019232), -0.0064383), -0.0071097), if (LENGTH < 12.5, -0.004996, -0.0020705)))) + if (ORDER_IN_CLUSTER < 2.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (LENGTH < 12.5, 0.0038949, if (POS_19 < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 22.5, 0.0044613, 8.62E-5), -1.14E-5), -0.0059595)), if (LENGTH < 8.5, if (TERM_CASE_4 < 0.5, 2.32E-5, -0.0045638), if (TERM_CASE_3 < 0.5, 4.242E-4, -0.002741))), -0.0186513), -0.0123259), if (ORDER_IN_CLUSTER < 1.5, if (CHUNKTYPE < 0.5, 0.002944, 0.0062291), 3.61E-4)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, -0.0034353, if (CHUNKTYPE < 0.5, if (LENGTH < 7.5, if (LENGTH < 5.5, -0.0028677, 0.0051801), -0.0031814), 0.0058335)), -0.004572)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (STOP_WORD_2 < 0.5, 0.0010304, if (LENGTH < 23.5, 3.895E-4, -0.015312)), if (CHUNKTYPE < 0.5, if (STOP_WORD_1 < 0.5, 0.003584, -0.003437), 0.0060657)), if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 25.5, if (POS_10 < 0.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_1 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0036443, 1.727E-4), if (TERM_CASE_4 < 0.5, -0.0022025, 2.902E-4)), 0.00482), if (STOP_WORD_3 < 1.5, if (TERM_CASE_4 < 0.5, 6.078E-4, if (LENGTH < 9.5, 0.0267202, 0.0028202)), -0.0059933)), -0.0117848), 0.0045257), if (ENTITYPLACETYPE < 0.5, -0.0040138, if (CONCEPTTYPE < 0.5, -0.0030944, -3.88E-5)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (GOOD_SYNTAX < 0.5, -0.012145, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, 2.346E-4, -0.0136071), 0.003872)), if (STOP_WORD_3 < 0.5, 0.0053479, -8.352E-4)), if (ORDER_IN_CLUSTER < 2.5, if (POS_10 < 0.5, if (LENGTH < 12.5, if (ENTITYPLACETYPE < 0.5, -0.0024083, -5.153E-4), if (TERM_CASE_3 < 0.5, if (LENGTH < 15.5, if (ENTITYPLACETYPE < 0.5, 0.0020101, 0.0153386), 6.866E-4), if (STOP_WORD_3 < 0.5, -0.0026273, 0.0078831))), -0.0111062), if (ENTITYPLACETYPE < 0.5, -0.0040042, if (CONCEPTTYPE < 0.5, -0.0025863, if (LENGTH < 8.5, if (LENGTH < 5.5, -0.0022329, if (ORDER_IN_CLUSTER < 3.5, 0.0114385, -0.0040637)), -0.0022211))))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (CONCEPTTYPE < 0.5, 6.395E-4, 0.0039129), -4.731E-4), if (STOP_WORD_1 < 0.5, 0.0048425, if (CHUNKTYPE < 0.5, -0.0050241, 0.0113075))), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 7.5, if (TERM_CASE_4 < 0.5, -0.0026423, -0.0058182), if (EXTENDEDTYPE < 0.5, if (POS_1 < 0.5, if (POS_11 < 0.5, -0.0016721, -0.0090084), -0.0068847), 0.0029151)), if (LENGTH < 25.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 12.5, if (CONCEPTTYPE < 0.5, -6.516E-4, if (LENGTH < 4.5, -0.0032959, 0.003969)), 0.00811), -0.0063281), -0.0013025), 0.0071419)), -0.0047354)) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, 4.683E-4, if (STOP_WORD_1 < 0.5, 0.0050506, -0.0015297)), if (ORDER_IN_CLUSTER < 3.5, if (ENTITYPLACETYPE < 0.5, if (LENGTH < 26.5, if (POS_1 < 0.5, if (LENGTH < 6.5, if (TERM_CASE_2 < 0.5, -0.0058316, 1.419E-4), if (STOP_WORD_2 < 0.5, if (STOP_WORD_1 < 0.5, -0.0018701, if (CHUNKTYPE < 0.5, -3.427E-4, 0.0145911)), -0.0054624)), -0.0071397), 0.0030205), if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (CONCEPTTYPE < 0.5, if (LENGTH < 12.5, -0.0014839, if (LENGTH < 15.5, 0.0155978, -6.027E-4)), if (STOP_WORD_1 < 0.5, 0.0037224, -0.0097389)), -0.0029211), if (LENGTH < 11.5, 0.0091976, 9.027E-4))), -0.0044954)) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_4 < 0.5, if (LENGTH < 20.5, 0.0046733, 8.251E-4), if (LENGTH < 21.5, -7.639E-4, if (LENGTH < 32.5, if (STOP_WORD_3 < 1.5, 0.0040487, -0.0026534), -0.0017195))), if (STOP_WORD_1 < 0.5, 0.0047493, -0.002194)), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_1 < 0.5, if (TERM_CASE_2 < 0.5, -0.0032711, -3.03E-5), if (STOP_WORD_3 < 1.5, 0.0053601, -0.004987)), -0.0039912), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_3 < 0.5, if (STOP_WORD_1 < 0.5, 7.016E-4, -0.009457), -0.0016048), if (LENGTH < 14.5, 0.0065525, -3.78E-4)), if (LENGTH < 13.5, -0.0043881, 0.0025122)))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_3 < 0.5, if (POS_10 < 0.5, if (LENGTH < 27.5, if (LENGTH < 23.5, 8.637E-4, -0.0021732), if (EXTENDEDTYPE < 0.5, 0.0082735, 1.661E-4)), -0.0074062), if (STOP_WORD_3 < 0.5, 0.0048262, 6.08E-4)), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (TERM_CASE_1 < 0.5, if (POS_10 < 0.5, -0.001591, -0.0107858), if (LENGTH < 2.5, -0.0070412, 0.0090356)), if (CONCEPTTYPE < 0.5, -2.129E-4, 0.0049083)), if (STOP_WORD_2 < 0.5, if (ORDER_IN_CLUSTER < 3.5, if (STOP_WORD_1 < 0.5, if (POS_1 < 0.5, -0.0025013, -0.0069603), 0.0013009), if (LENGTH < 12.5, -0.0049737, if (ENTITYPLACETYPE < 0.5, -0.0046367, 0.0020614))), -0.0065442))) + if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.0055498, 0.0020362), 4.82E-5), if (GOOD_SYNTAX < 0.5, -0.0113037, if (ENTITYPLACETYPE < 0.5, -2.67E-5, 0.0034735))), if (ORDER_IN_CLUSTER < 2.5, if (CHUNKTYPE < 0.5, if (STOP_WORD_2 < 0.5, if (TERM_CASE_1 < 0.5, if (LENGTH < 11.5, -0.002038, if (TERM_CASE_3 < 0.5, if (STOP_WORD_3 < 0.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.003187, 0.0039767), 0.0076656), -0.0018373), -0.0023729)), 0.0064775), -0.0051693), if (TERM_CASE_4 < 0.5, -0.0010081, 0.004785)), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, -0.0029171, if (LENGTH < 14.5, 0.005164, -0.0067269)), -0.0044377))) + if (ORDER_IN_CLUSTER < 2.5, if (LENGTH < 8.5, if (ORDER_IN_CLUSTER < 1.5, 0.0038333, if (LENGTH < 5.5, -3.037E-4, if (LENGTH < 6.5, -0.0058466, -0.0018246))), if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (POS_13 < 0.5, if (POS_10 < 0.5, if (POS_19 < 0.5, if (POS_20 < 0.5, 9.971E-4, -0.0118452), -0.0031073), -0.0056757), -0.0181566), -0.0106292), if (STOP_WORD_1 < 0.5, if (ORDER_IN_CLUSTER < 1.5, 0.0045608, if (CONCEPTTYPE < 0.5, 3.27E-4, if (TERM_CASE_3 < 0.5, 0.0080705, 0.002204))), -0.0036974))), if (ENTITYPLACETYPE < 0.5, if (ORDER_IN_CLUSTER < 3.5, -0.0032085, -0.0053955), if (CHUNKTYPE < 0.5, if (CONCEPTTYPE < 0.5, -0.0027009, -5.635E-4), 0.0046874))) + if (ORDER_IN_CLUSTER < 1.5, if (ENTITYPLACETYPE < 0.5, if (POS_11 < 0.5, if (STOP_WORD_3 < 2.5, if (POS_10 < 0.5, if (STOP_WORD_3 < 1.5, 0.0016465, if (LENGTH < 20.5, 0.0061307, -0.0080488)), -0.0083587), 0.0088823), -0.0118533), 0.0038776), if (ORDER_IN_CLUSTER < 3.5, if (POS_10 < 0.5, if (POS_7 < 0.5, if (POS_1 < 0.5, if (TERM_CASE_3 < 0.5, if (LENGTH < 11.5, if (ENTITYPLACETYPE < 0.5, if (TERM_CASE_2 < 0.5, -0.0038623, 7.132E-4), if (CONCEPTTYPE < 0.5, -0.0014892, 0.0026902)), if (POS_18 < 0.5, 0.0020488, -0.0074362)), if (CHUNKTYPE < 0.5, -0.0032111, if (LENGTH < 14.5, 0.0039343, -0.0033507))), -0.0056254), -0.0087829), -0.0095263), -0.0042301)) + if (ORDER_IN_CLUSTER < 2.5, if (STOP_WORD_3 < 1.5, if (POS_18 < 0.5, if (ORDER_IN_CLUSTER < 1.5, if (TERM_CASE_4 < 0.5, if (STOP_WORD_3 < 0.5, if (LENGTH < 21.5, 0.0058127, 0.001988), -0.001657), if (LENGTH < 22.5, if (LENGTH < 14.5, 0.0010777, -0.0020483), 0.0026629)), if (LENGTH < 7.5, if (LENGTH < 5.5, 8.059E-4, if (LENGTH < 6.5, -0.0069968, -0.0014324)), if (TERM_CASE_4 < 0.5, -7.077E-4, if (STOP_WORD_1 < 0.5, 7.588E-4, 0.0073991)))), -0.0073361), -0.0042277), if (ORDER_IN_CLUSTER < 3.5, if (CHUNKTYPE < 0.5, if (LENGTH < 25.5, if (LENGTH < 9.5, -0.0023911, -0.0044198), 0.007123), if (LENGTH < 14.5, 0.0051462, -0.0075633)), -0.0045164)) diff --git a/searchlib/src/test/files/simple.expression b/searchlib/src/test/files/simple.expression new file mode 100644 index 00000000000..745e8d376f7 --- /dev/null +++ b/searchlib/src/test/files/simple.expression @@ -0,0 +1 @@ +a + b diff --git a/searchlib/src/test/files/testAggregatorResults b/searchlib/src/test/files/testAggregatorResults new file mode 100644 index 00000000000..060b8b86bda Binary files /dev/null and b/searchlib/src/test/files/testAggregatorResults differ diff --git a/searchlib/src/test/files/testFunctionNodes b/searchlib/src/test/files/testFunctionNodes new file mode 100644 index 00000000000..646a445e857 Binary files /dev/null and b/searchlib/src/test/files/testFunctionNodes differ diff --git a/searchlib/src/test/files/testGroup b/searchlib/src/test/files/testGroup new file mode 100644 index 00000000000..6ecf091a865 Binary files /dev/null and b/searchlib/src/test/files/testGroup differ diff --git a/searchlib/src/test/files/testGrouping b/searchlib/src/test/files/testGrouping new file mode 100644 index 00000000000..5dbe3fd9c44 Binary files /dev/null and b/searchlib/src/test/files/testGrouping differ diff --git a/searchlib/src/test/files/testGroupingLevel b/searchlib/src/test/files/testGroupingLevel new file mode 100644 index 00000000000..fd212c02341 Binary files /dev/null and b/searchlib/src/test/files/testGroupingLevel differ diff --git a/searchlib/src/test/files/testHitCollection b/searchlib/src/test/files/testHitCollection new file mode 100644 index 00000000000..f75b8147529 Binary files /dev/null and b/searchlib/src/test/files/testHitCollection differ diff --git a/searchlib/src/test/files/testResultTypes b/searchlib/src/test/files/testResultTypes new file mode 100644 index 00000000000..c2055a34455 Binary files /dev/null and b/searchlib/src/test/files/testResultTypes differ diff --git a/searchlib/src/test/files/testSpecialNodes b/searchlib/src/test/files/testSpecialNodes new file mode 100644 index 00000000000..149cc17f99b Binary files /dev/null and b/searchlib/src/test/files/testSpecialNodes differ diff --git a/searchlib/src/test/files/treenet01.model b/searchlib/src/test/files/treenet01.model new file mode 100644 index 00000000000..6e9fda96f57 --- /dev/null +++ b/searchlib/src/test/files/treenet01.model @@ -0,0 +1,531 @@ +MODELBEGIN: + +/* Generated by DCP*/ +/* N trees: 10 */ + +link TN0; +pred = tnscore; /* predicted value for LABEL */ + + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + +TN0: + +tnscore = 0.0; + +/* Tree 1 of 10 */ + + +N0_1: + if attribute(b) < 0.65 then goto N0_2; + else goto N0_3; + +N0_2: + if attribute(c) < 0.55 then goto N0_4; + else goto N0_5; + +N0_4: + if attribute(a) < 0.55 then goto T0_8; + else goto T0_9; + +T0_8: + response = 0.369863; + goto D0; + +T0_9: + response = -0.6578947; + goto D0; + +N0_5: + if attribute(a) < 0.65 then goto T0_6; + else goto T0_7; + +T0_6: + response = -0.775; + goto D0; + +T0_7: + response = -1; + goto D0; + +N0_3: + if attribute(c) < 0.45 then goto T0_10; + else goto T0_11; + +T0_10: + response = -0.9090909; + goto D0; + +T0_11: + response = -1; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 10 */ + + +N1_1: + if attribute(a) < 0.55 then goto N1_2; + else goto N1_3; + +N1_2: + if attribute(b) < 0.35 then goto N1_4; + else goto N1_5; + +N1_4: + if attribute(c) < 0.75 then goto T1_8; + else goto T1_9; + +T1_8: + response = 0.4327977; + goto D1; + +T1_9: + response = 0.025; + goto D1; + +N1_5: + if attribute(c) < 0.75 then goto T1_6; + else goto T1_7; + +T1_6: + response = -0.1090028; + goto D1; + +T1_7: + response = -0.07682927; + goto D1; + +N1_3: + if attribute(c) < 0.55 then goto T1_10; + else goto T1_11; + +T1_10: + response = -0.04031544; + goto D1; + +T1_11: + response = -0.01875; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 10 */ + + +N2_1: + if attribute(b) < 0.35 then goto T2_2; + else goto N2_3; + +T2_2: + response = 0.06336273; + goto D2; + +N2_3: + if attribute(c) < 0.85 then goto N2_4; + else goto N2_5; + +N2_4: + if attribute(a) < 0.35 then goto N2_8; + else goto T2_9; + +N2_8: + if attribute(c) < 0.35 then goto T2_10; + else goto T2_11; + +T2_10: + response = 0.6091127; + goto D2; + +T2_11: + response = 0.02845135; + goto D2; + +T2_9: + response = -0.07638131; + goto D2; + +N2_5: + if attribute(a) < 0.75 then goto T2_6; + else goto T2_7; + +T2_6: + response = -0.018862; + goto D2; + +T2_7: + response = 0.01875; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 10 */ + + +N3_1: + if attribute(c) < 0.15 then goto N3_2; + else goto N3_3; + +N3_2: + if attribute(b) < 0.55 then goto N3_4; + else goto T3_5; + +N3_4: + if attribute(a) < 0.35 then goto N3_6; + else goto T3_7; + +N3_6: + if attribute(b) < 0.3 then goto T3_8; + else goto T3_9; + +T3_8: + response = -1.866023; + goto D3; + +T3_9: + response = 0.1300271; + goto D3; + +T3_7: + response = 0.6299557; + goto D3; + +T3_5: + response = 0.1788445; + goto D3; + +N3_3: + if attribute(b) < 0.65 then goto T3_10; + else goto T3_11; + +T3_10: + response = -0.1586424; + goto D3; + +T3_11: + response = 0.06778581; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 10 */ + + +N4_1: + if attribute(c) < 0.45 then goto N4_2; + else goto N4_3; + +N4_2: + if attribute(a) < 0.35 then goto N4_4; + else goto N4_5; + +N4_4: + if attribute(b) < 0.75 then goto T4_6; + else goto T4_7; + +T4_6: + response = 0.1426054; + goto D4; + +T4_7: + response = -0.2282; + goto D4; + +N4_5: + if attribute(b) < 0.85 then goto T4_8; + else goto T4_9; + +T4_8: + response = -0.09571452; + goto D4; + +T4_9: + response = -0.04941978; + goto D4; + +N4_3: + if attribute(a) < 0.25 then goto T4_10; + else goto T4_11; + +T4_10: + response = 0.2759441; + goto D4; + +T4_11: + response = 0.0172878; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 10 */ + + +N5_1: + if attribute(a) < 0.15 then goto N5_2; + else goto N5_3; + +N5_2: + if attribute(b) < 0.75 then goto T5_4; + else goto T5_5; + +T5_4: + response = 0.3165435; + goto D5; + +T5_5: + response = -0.04458321; + goto D5; + +N5_3: + if attribute(a) < 0.55 then goto T5_6; + else goto N5_7; + +T5_6: + response = -0.1137117; + goto D5; + +N5_7: + if attribute(b) < 0.75 then goto T5_8; + else goto N5_9; + +T5_8: + response = 0.04622166; + goto D5; + +N5_9: + if attribute(c) < 0.65 then goto T5_10; + else goto T5_11; + +T5_10: + response = 0.004746275; + goto D5; + +T5_11: + response = -0.03648972; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 10 */ + + +N6_1: + if attribute(a) < 0.95 then goto N6_2; + else goto T6_3; + +N6_2: + if attribute(b) < 0.25 then goto N6_4; + else goto T6_5; + +N6_4: + if attribute(c) < 0.25 then goto T6_6; + else goto N6_7; + +T6_6: + response = 0.7623822; + goto D6; + +N6_7: + if attribute(a) < 0.65 then goto T6_8; + else goto N6_9; + +T6_8: + response = 0.2338952; + goto D6; + +N6_9: + if attribute(c) < 0.85 then goto T6_10; + else goto T6_11; + +T6_10: + response = -0.06132011; + goto D6; + +T6_11: + response = 0.05052024; + goto D6; + +T6_5: + response = -0.04188744; + goto D6; + +T6_3: + response = -0.03245768; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 10 */ + + +N7_1: + if attribute(c) < 0.55 then goto N7_2; + else goto N7_3; + +N7_2: + if attribute(b) < 0.65 then goto T7_4; + else goto T7_5; + +T7_4: + response = -0.2042442; + goto D7; + +T7_5: + response = 0.03887484; + goto D7; + +N7_3: + if attribute(b) < 0.25 then goto T7_6; + else goto N7_7; + +T7_6: + response = -0.0474437; + goto D7; + +N7_7: + if attribute(a) < 0.15 then goto T7_8; + else goto N7_9; + +T7_8: + response = -0.3700475; + goto D7; + +N7_9: + if attribute(a) < 0.65 then goto T7_10; + else goto T7_11; + +T7_10: + response = 0.07656199; + goto D7; + +T7_11: + response = 0.1085871; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 10 */ + + +N8_1: + if attribute(a) < 0.75 then goto T8_2; + else goto N8_3; + +T8_2: + response = 0.0189638; + goto D8; + +N8_3: + if attribute(b) < 0.85 then goto T8_4; + else goto N8_5; + +T8_4: + response = 0.0001942833; + goto D8; + +N8_5: + if attribute(c) < 0.85 then goto N8_6; + else goto T8_7; + +N8_6: + if attribute(c) < 0.45 then goto T8_8; + else goto N8_9; + +T8_8: + response = -0.009795157; + goto D8; + +N8_9: + if attribute(a) < 0.85 then goto T8_10; + else goto T8_11; + +T8_10: + response = -0.01795083; + goto D8; + +T8_11: + response = -0.01329222; + goto D8; + +T8_7: + response = -0.1179778; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 10 */ + + +N9_1: + if attribute(c) < 0.75 then goto N9_2; + else goto N9_3; + +N9_2: + if attribute(b) < 0.45 then goto N9_4; + else goto T9_5; + +N9_4: + if attribute(c) < 0.15 then goto T9_6; + else goto N9_7; + +T9_6: + response = -0.4551494; + goto D9; + +N9_7: + if attribute(c) < 0.65 then goto T9_8; + else goto T9_9; + +T9_8: + response = 0.1471968; + goto D9; + +T9_9: + response = -0.06380587; + goto D9; + +T9_5: + response = 0.03410008; + goto D9; + +N9_3: + if attribute(b) < 0.65 then goto T9_10; + else goto T9_11; + +T9_10: + response = -0.06397114; + goto D9; + +T9_11: + response = -0.01491517; + goto D9; + +D9: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet02.model b/searchlib/src/test/files/treenet02.model new file mode 100644 index 00000000000..3c7522a76d6 --- /dev/null +++ b/searchlib/src/test/files/treenet02.model @@ -0,0 +1,11784 @@ +MODELBEGIN: + + /* N trees: 500 */ + +link TN0; +pred = tnscore; /* predicted value for GRADE */ + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +tnscore = 0.0; + +/* Tree 1 of 80 */ +N0_1: + if attribute(ythl) < 0.5000000000 then goto N0_2; + else goto N0_12; + +N0_2: + if age(created_at) < 1830.0000000000 then goto N0_3; + else goto N0_10; + +N0_3: + if term(0).significance < 0.9964904785 then goto N0_4; + else goto N0_5; + +N0_4: + if match < 0.6113165021 then goto T0_1; + else goto T0_2; + +T0_1: + response = -0.0284270267; + goto D0; + +T0_2: + response = 0.0003592783; + goto D0; + +N0_5: + if fieldMatch(text).significantOccurrence < 0.0488094985 then goto N0_6; + else goto N0_7; + +N0_6: + if attribute(user_friends_count) < 184.5000000000 then goto T0_3; + else goto T0_4; + +T0_3: + response = -0.0124428511; + goto D0; + +T0_4: + response = 0.0077143433; + goto D0; + +N0_7: + if term(0).significance < 0.9977675080 then goto T0_5; + else goto N0_8; + +T0_5: + response = -0.0390395696; + goto D0; + +N0_8: + if term(1).significance < 0.9895755053 then goto T0_6; + else goto N0_9; + +T0_6: + response = 0.0225917600; + goto D0; + +N0_9: + if fieldMatch(text).significantOccurrence < 0.1335410029 then goto T0_7; + else goto T0_8; + +T0_7: + response = -0.0167178600; + goto D0; + +T0_8: + response = -0.0425634221; + goto D0; + +N0_10: + if age(created_at) < 5400.0000000000 then goto T0_9; + else goto N0_11; + +T0_9: + response = -0.0466900690; + goto D0; + +N0_11: + if age(created_at) < 45000.0000000000 then goto T0_10; + else goto T0_11; + +T0_10: + response = -0.0257095410; + goto D0; + +T0_11: + response = -0.0128030420; + goto D0; + +N0_12: + if age(created_at) < 1830.0000000000 then goto N0_13; + else goto N0_16; + +N0_13: + if fieldMatch(text).importance < 0.7488905191 then goto N0_14; + else goto T0_15; + +N0_14: + if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto N0_15; + else goto T0_14; + +N0_15: + if fieldMatch(text) < 0.8492144942 then goto T0_12; + else goto T0_13; + +T0_12: + response = 0.0288744693; + goto D0; + +T0_13: + response = 0.0439309311; + goto D0; + +T0_14: + response = 0.0566558463; + goto D0; + +T0_15: + response = 0.0594293259; + goto D0; + +N0_16: + if age(created_at) < 5400.0000000000 then goto T0_16; + else goto N0_17; + +T0_16: + response = -0.0184100055; + goto D0; + +N0_17: + if age(created_at) < 27000.0000000000 then goto T0_17; + else goto T0_18; + +T0_17: + response = -0.0003458478; + goto D0; + +T0_18: + response = 0.0087464789; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 80 */ +N1_1: + if attribute(ythl) < 0.5000000000 then goto N1_2; + else goto N1_12; + +N1_2: + if age(created_at) < 1830.0000000000 then goto N1_3; + else goto N1_10; + +N1_3: + if term(0).significance < 0.9964904785 then goto N1_4; + else goto N1_5; + +N1_4: + if term(0).significance < 0.9914690256 then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.0118607453; + goto D1; + +T1_2: + response = 0.0031225791; + goto D1; + +N1_5: + if term(0).significance < 0.9977675080 then goto N1_6; + else goto N1_8; + +N1_6: + if term(1).significance < 0.9883980155 then goto T1_3; + else goto N1_7; + +T1_3: + response = -0.0803907557; + goto D1; + +N1_7: + if attribute(yst_link_array_size) < 0.0250600018 then goto T1_4; + else goto T1_5; + +T1_4: + response = -0.0303931857; + goto D1; + +T1_5: + response = 0.0259097321; + goto D1; + +N1_8: + if term(1).significance < 0.9972054958 then goto T1_6; + else goto N1_9; + +T1_6: + response = 0.0065438125; + goto D1; + +N1_9: + if term(1).significance < 0.9975290298 then goto T1_7; + else goto T1_8; + +T1_7: + response = -0.0913176725; + goto D1; + +T1_8: + response = -0.0123125115; + goto D1; + +N1_10: + if age(created_at) < 5400.0000000000 then goto T1_9; + else goto N1_11; + +T1_9: + response = -0.0448246506; + goto D1; + +N1_11: + if age(created_at) < 23400.0000000000 then goto T1_10; + else goto T1_11; + +T1_10: + response = -0.0262210797; + goto D1; + +T1_11: + response = -0.0146461827; + goto D1; + +N1_12: + if age(created_at) < 1830.0000000000 then goto N1_13; + else goto N1_15; + +N1_13: + if fieldMatch(text).importance < 0.7488645315 then goto N1_14; + else goto T1_14; + +N1_14: + if fieldMatch(text) < 0.8569909930 then goto T1_12; + else goto T1_13; + +T1_12: + response = 0.0307982478; + goto D1; + +T1_13: + response = 0.0469020946; + goto D1; + +T1_14: + response = 0.0558564997; + goto D1; + +N1_15: + if age(created_at) < 5400.0000000000 then goto T1_15; + else goto N1_16; + +T1_15: + response = -0.0166881751; + goto D1; + +N1_16: + if fieldMatch(text) < 0.3820354939 then goto T1_16; + else goto N1_17; + +T1_16: + response = -0.0035580609; + goto D1; + +N1_17: + if age(created_at) < 30600.0000000000 then goto T1_17; + else goto T1_18; + +T1_17: + response = 0.0028319521; + goto D1; + +T1_18: + response = 0.0104819912; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 80 */ +N2_1: + if attribute(ythl) < 0.5000000000 then goto N2_2; + else goto N2_12; + +N2_2: + if age(created_at) < 1830.0000000000 then goto N2_3; + else goto N2_9; + +N2_3: + if fieldMatch(text).tail < 7.5000000000 then goto N2_4; + else goto N2_6; + +N2_4: + if match < 0.6697604656 then goto T2_1; + else goto N2_5; + +T2_1: + response = -0.0323866906; + goto D2; + +N2_5: + if term(0).significance < 0.9955350161 then goto T2_2; + else goto T2_3; + +T2_2: + response = -0.0025720554; + goto D2; + +T2_3: + response = -0.0170321274; + goto D2; + +N2_6: + if attribute(user_followers_count) < 489.5000000000 then goto N2_7; + else goto T2_7; + +N2_7: + if term(0).significance < 0.9964904785 then goto T2_4; + else goto N2_8; + +T2_4: + response = 0.0035465045; + goto D2; + +N2_8: + if term(0).significance < 0.9992840290 then goto T2_5; + else goto T2_6; + +T2_5: + response = -0.0205069971; + goto D2; + +T2_6: + response = 0.0010003389; + goto D2; + +T2_7: + response = 0.0149904595; + goto D2; + +N2_9: + if age(created_at) < 9000.0000000000 then goto N2_10; + else goto N2_11; + +N2_10: + if age(created_at) < 3570.0000000000 then goto T2_8; + else goto T2_9; + +T2_8: + response = -0.0501614448; + goto D2; + +T2_9: + response = -0.0347695722; + goto D2; + +N2_11: + if age(created_at) < 45000.0000000000 then goto T2_10; + else goto T2_11; + +T2_10: + response = -0.0217186612; + goto D2; + +T2_11: + response = -0.0115826893; + goto D2; + +N2_12: + if age(created_at) < 1830.0000000000 then goto N2_13; + else goto N2_15; + +N2_13: + if fieldMatch(text).occurrence < 0.1277174950 then goto N2_14; + else goto T2_14; + +N2_14: + if fieldMatch(text) < 0.8662694693 then goto T2_12; + else goto T2_13; + +T2_12: + response = 0.0270881826; + goto D2; + +T2_13: + response = 0.0422977189; + goto D2; + +T2_14: + response = 0.0471192106; + goto D2; + +N2_15: + if age(created_at) < 5400.0000000000 then goto N2_16; + else goto N2_17; + +N2_16: + if fieldMatch(text).importance < 0.6664260030 then goto T2_15; + else goto T2_16; + +T2_15: + response = -0.0232114640; + goto D2; + +T2_16: + response = -0.0105863112; + goto D2; + +N2_17: + if age(created_at) < 19800.0000000000 then goto T2_17; + else goto T2_18; + +T2_17: + response = -0.0011091805; + goto D2; + +T2_18: + response = 0.0079984015; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 80 */ +N3_1: + if attribute(ythl) < 0.5000000000 then goto N3_2; + else goto N3_9; + +N3_2: + if age(created_at) < 1830.0000000000 then goto N3_3; + else goto N3_7; + +N3_3: + if term(1).significance < 0.8159549832 then goto T3_1; + else goto N3_4; + +T3_1: + response = 0.0342704034; + goto D3; + +N3_4: + if attribute(user_statuses_count) < 574.0000000000 then goto T3_2; + else goto N3_5; + +T3_2: + response = -0.0031829638; + goto D3; + +N3_5: + if fieldMatch(text).weightedOccurrence < 0.1091270000 then goto N3_6; + else goto T3_5; + +N3_6: + if fieldMatch(text) < 0.8472499847 then goto T3_3; + else goto T3_4; + +T3_3: + response = -0.0193605912; + goto D3; + +T3_4: + response = -0.0038142662; + goto D3; + +T3_5: + response = -0.0308342022; + goto D3; + +N3_7: + if age(created_at) < 5400.0000000000 then goto T3_6; + else goto N3_8; + +T3_6: + response = -0.0418216807; + goto D3; + +N3_8: + if age(created_at) < 48600.0000000000 then goto T3_7; + else goto T3_8; + +T3_7: + response = -0.0237625386; + goto D3; + +T3_8: + response = -0.0115288531; + goto D3; + +N3_9: + if age(created_at) < 1830.0000000000 then goto N3_10; + else goto N3_16; + +N3_10: + if fieldMatch(text).importance < 0.7488905191 then goto N3_11; + else goto T3_15; + +N3_11: + if term(0).significance < 0.9918209910 then goto N3_12; + else goto N3_14; + +N3_12: + if term(2).significance < 0.9838794470 then goto N3_13; + else goto T3_11; + +N3_13: + if fieldLength(text) < 23.5000000000 then goto T3_9; + else goto T3_10; + +T3_9: + response = 0.0091275797; + goto D3; + +T3_10: + response = -0.0681415824; + goto D3; + +T3_11: + response = 0.0280728758; + goto D3; + +N3_14: + if fieldTermMatch(text,0).firstPosition < 9.5000000000 then goto N3_15; + else goto T3_14; + +N3_15: + if attribute(user_followers_count) < 2165.5000000000 then goto T3_12; + else goto T3_13; + +T3_12: + response = 0.0378854321; + goto D3; + +T3_13: + response = 0.0555394610; + goto D3; + +T3_14: + response = 0.0261930857; + goto D3; + +T3_15: + response = 0.0496898680; + goto D3; + +N3_16: + if age(created_at) < 12600.0000000000 then goto N3_17; + else goto T3_18; + +N3_17: + if age(created_at) < 5400.0000000000 then goto T3_16; + else goto T3_17; + +T3_16: + response = -0.0136135545; + goto D3; + +T3_17: + response = -0.0029542657; + goto D3; + +T3_18: + response = 0.0066915734; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 80 */ +N4_1: + if attribute(ythl) < 0.5000000000 then goto N4_2; + else goto N4_10; + +N4_2: + if age(created_at) < 1830.0000000000 then goto N4_3; + else goto N4_8; + +N4_3: + if term(1).significance < 0.7788045406 then goto T4_1; + else goto N4_4; + +T4_1: + response = 0.0678231236; + goto D4; + +N4_4: + if fieldMatch(text).significantOccurrence < 0.1249914989 then goto N4_5; + else goto T4_6; + +N4_5: + if attribute(user_statuses_count) < 103.5000000000 then goto T4_2; + else goto N4_6; + +T4_2: + response = 0.0047730322; + goto D4; + +N4_6: + if attribute(user_followers_count) < 3070.5000000000 then goto N4_7; + else goto T4_5; + +N4_7: + if fieldMatch(text).earliness < 0.8834840059 then goto T4_3; + else goto T4_4; + +T4_3: + response = -0.0130691877; + goto D4; + +T4_4: + response = 0.0030931972; + goto D4; + +T4_5: + response = 0.0212955094; + goto D4; + +T4_6: + response = -0.0250041155; + goto D4; + +N4_8: + if age(created_at) < 5400.0000000000 then goto T4_7; + else goto N4_9; + +T4_7: + response = -0.0386563137; + goto D4; + +N4_9: + if age(created_at) < 48600.0000000000 then goto T4_8; + else goto T4_9; + +T4_8: + response = -0.0213844929; + goto D4; + +T4_9: + response = -0.0116543752; + goto D4; + +N4_10: + if age(created_at) < 1830.0000000000 then goto N4_11; + else goto N4_15; + +N4_11: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N4_12; + else goto T4_14; + +N4_12: + if fieldMatch(text).earliness < 0.5683230162 then goto N4_13; + else goto N4_14; + +N4_13: + if fieldMatch(text).gapLength < 5.5000000000 then goto T4_10; + else goto T4_11; + +T4_10: + response = 0.0208840083; + goto D4; + +T4_11: + response = -0.0392353393; + goto D4; + +N4_14: + if term(0).significance < 0.9139549732 then goto T4_12; + else goto T4_13; + +T4_12: + response = -0.0599160780; + goto D4; + +T4_13: + response = 0.0359567192; + goto D4; + +T4_14: + response = 0.0417870117; + goto D4; + +N4_15: + if age(created_at) < 5400.0000000000 then goto T4_15; + else goto N4_16; + +T4_15: + response = -0.0124473711; + goto D4; + +N4_16: + if age(created_at) < 27000.0000000000 then goto T4_16; + else goto N4_17; + +T4_16: + response = -0.0003908889; + goto D4; + +N4_17: + if fieldMatch(text) < 0.5566675067 then goto T4_17; + else goto T4_18; + +T4_17: + response = 0.0009453270; + goto D4; + +T4_18: + response = 0.0097812185; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 80 */ +N5_1: + if attribute(ythl) < 0.5000000000 then goto N5_2; + else goto N5_9; + +N5_2: + if age(created_at) < 1830.0000000000 then goto N5_3; + else goto N5_7; + +N5_3: + if term(1).significance < 0.8159549832 then goto N5_4; + else goto N5_5; + +N5_4: + if term(0).significance < 0.9873124957 then goto T5_1; + else goto T5_2; + +T5_1: + response = 0.0625267810; + goto D5; + +T5_2: + response = -0.0025881996; + goto D5; + +N5_5: + if fieldMatch(text).tail < 7.5000000000 then goto N5_6; + else goto T5_5; + +N5_6: + if attribute(user_statuses_count) < 504.0000000000 then goto T5_3; + else goto T5_4; + +T5_3: + response = -0.0072144471; + goto D5; + +T5_4: + response = -0.0184304751; + goto D5; + +T5_5: + response = -0.0041050691; + goto D5; + +N5_7: + if age(created_at) < 5400.0000000000 then goto T5_6; + else goto N5_8; + +T5_6: + response = -0.0342922301; + goto D5; + +N5_8: + if age(created_at) < 52200.0000000000 then goto T5_7; + else goto T5_8; + +T5_7: + response = -0.0213685384; + goto D5; + +T5_8: + response = -0.0114302758; + goto D5; + +N5_9: + if age(created_at) < 1830.0000000000 then goto N5_10; + else goto N5_16; + +N5_10: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N5_11; + else goto T5_15; + +N5_11: + if fieldMatch(text).tail < 7.5000000000 then goto N5_12; + else goto N5_15; + +N5_12: + if fieldMatch(text).significantOccurrence < 0.0574909970 then goto N5_13; + else goto N5_14; + +N5_13: + if term(0).significance < 0.9980279803 then goto T5_9; + else goto T5_10; + +T5_9: + response = -0.0131328933; + goto D5; + +T5_10: + response = 0.0192113014; + goto D5; + +N5_14: + if fieldMatch(text) < 0.8584204912 then goto T5_11; + else goto T5_12; + +T5_11: + response = 0.0314073419; + goto D5; + +T5_12: + response = -0.0026767115; + goto D5; + +N5_15: + if fieldMatch(text).fieldCompleteness < 0.0392310023 then goto T5_13; + else goto T5_14; + +T5_13: + response = -0.0016304919; + goto D5; + +T5_14: + response = 0.0347034740; + goto D5; + +T5_15: + response = 0.0373450153; + goto D5; + +N5_16: + if age(created_at) < 12600.0000000000 then goto N5_17; + else goto T5_18; + +N5_17: + if age(created_at) < 5400.0000000000 then goto T5_16; + else goto T5_17; + +T5_16: + response = -0.0106738218; + goto D5; + +T5_17: + response = -0.0029072167; + goto D5; + +T5_18: + response = 0.0056105069; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 80 */ +N6_1: + if attribute(ythl) < 0.5000000000 then goto N6_2; + else goto N6_9; + +N6_2: + if age(created_at) < 1830.0000000000 then goto N6_3; + else goto N6_6; + +N6_3: + if fieldMatch(text).tail < 12.5000000000 then goto N6_4; + else goto T6_4; + +N6_4: + if attribute(user_statuses_count) < 826.0000000000 then goto T6_1; + else goto N6_5; + +T6_1: + response = -0.0058871349; + goto D6; + +N6_5: + if fieldMatch(text).earliness < 0.8774999976 then goto T6_2; + else goto T6_3; + +T6_2: + response = -0.0128456148; + goto D6; + +T6_3: + response = -0.0362508217; + goto D6; + +T6_4: + response = 0.0039172531; + goto D6; + +N6_6: + if age(created_at) < 5400.0000000000 then goto T6_5; + else goto N6_7; + +T6_5: + response = -0.0332743660; + goto D6; + +N6_7: + if age(created_at) < 48600.0000000000 then goto N6_8; + else goto T6_8; + +N6_8: + if fieldMatch(text) < 0.5479695201 then goto T6_6; + else goto T6_7; + +T6_6: + response = -0.0292307762; + goto D6; + +T6_7: + response = -0.0167816152; + goto D6; + +T6_8: + response = -0.0103426077; + goto D6; + +N6_9: + if age(created_at) < 1830.0000000000 then goto N6_10; + else goto N6_15; + +N6_10: + if fieldMatch(text).earliness < 0.6510869861 then goto N6_11; + else goto N6_13; + +N6_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N6_12; + else goto T6_11; + +N6_12: + if fieldMatch(text) < 0.2023105025 then goto T6_9; + else goto T6_10; + +T6_9: + response = -0.0093665406; + goto D6; + +T6_10: + response = 0.0265294786; + goto D6; + +T6_11: + response = -0.0163231950; + goto D6; + +N6_13: + if term(2).significance < 0.9791975021 then goto N6_14; + else goto T6_14; + +N6_14: + if attribute(yst_reply_auth) < 2.5000000000 then goto T6_12; + else goto T6_13; + +T6_12: + response = 0.0195383609; + goto D6; + +T6_13: + response = 0.0376308584; + goto D6; + +T6_14: + response = 0.0386834550; + goto D6; + +N6_15: + if age(created_at) < 12600.0000000000 then goto N6_16; + else goto T6_18; + +N6_16: + if term(4).significance < 0.9926320314 then goto N6_17; + else goto T6_17; + +N6_17: + if age(created_at) < 5400.0000000000 then goto T6_15; + else goto T6_16; + +T6_15: + response = -0.0119040624; + goto D6; + +T6_16: + response = -0.0033941367; + goto D6; + +T6_17: + response = 0.0037785770; + goto D6; + +T6_18: + response = 0.0059147929; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 80 */ +N7_1: + if attribute(ythl) < 0.5000000000 then goto N7_2; + else goto N7_10; + +N7_2: + if age(created_at) < 1830.0000000000 then goto N7_3; + else goto N7_8; + +N7_3: + if fieldMatch(text).tail < 10.5000000000 then goto N7_4; + else goto T7_6; + +N7_4: + if fieldMatch(text).importance < 0.7443764806 then goto N7_5; + else goto N7_6; + +N7_5: + if match < 0.9134370089 then goto T7_1; + else goto T7_2; + +T7_1: + response = -0.0252698647; + goto D7; + +T7_2: + response = -0.0099637807; + goto D7; + +N7_6: + if term(2).significance < 0.7909680009 then goto T7_3; + else goto N7_7; + +T7_3: + response = -0.0344071695; + goto D7; + +N7_7: + if term(0).significance < 0.9947484732 then goto T7_4; + else goto T7_5; + +T7_4: + response = 0.0145760432; + goto D7; + +T7_5: + response = -0.0105169825; + goto D7; + +T7_6: + response = -0.0002567620; + goto D7; + +N7_8: + if age(created_at) < 12600.0000000000 then goto N7_9; + else goto T7_9; + +N7_9: + if age(created_at) < 5400.0000000000 then goto T7_7; + else goto T7_8; + +T7_7: + response = -0.0322518692; + goto D7; + +T7_8: + response = -0.0221817109; + goto D7; + +T7_9: + response = -0.0127554041; + goto D7; + +N7_10: + if age(created_at) < 1830.0000000000 then goto N7_11; + else goto N7_15; + +N7_11: + if fieldMatch(text).earliness < 0.7247474790 then goto N7_12; + else goto N7_14; + +N7_12: + if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T7_10; + else goto N7_13; + +T7_10: + response = 0.0292128233; + goto D7; + +N7_13: + if fieldMatch(text) < 0.3254045248 then goto T7_11; + else goto T7_12; + +T7_11: + response = -0.0359002315; + goto D7; + +T7_12: + response = 0.0194921959; + goto D7; + +N7_14: + if attribute(user_followers_count) < 609.5000000000 then goto T7_13; + else goto T7_14; + +T7_13: + response = 0.0299861583; + goto D7; + +T7_14: + response = 0.0421930400; + goto D7; + +N7_15: + if age(created_at) < 5400.0000000000 then goto N7_16; + else goto N7_17; + +N7_16: + if fieldMatch(text).occurrence < 0.1731635034 then goto T7_15; + else goto T7_16; + +T7_15: + response = -0.0134935559; + goto D7; + +T7_16: + response = -0.0027367126; + goto D7; + +N7_17: + if term(1).significance < 0.9878399968 then goto T7_17; + else goto T7_18; + +T7_17: + response = 0.0121433273; + goto D7; + +T7_18: + response = 0.0020006783; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 80 */ +N8_1: + if attribute(ythl) < 0.5000000000 then goto N8_2; + else goto N8_9; + +N8_2: + if age(created_at) < 1830.0000000000 then goto N8_3; + else goto N8_7; + +N8_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N8_4; + else goto N8_6; + +N8_4: + if fieldTermMatch(text,0).firstPosition < 14.5000000000 then goto N8_5; + else goto T8_3; + +N8_5: + if fieldMatch(text).importance < 0.7413114905 then goto T8_1; + else goto T8_2; + +T8_1: + response = -0.0091155042; + goto D8; + +T8_2: + response = 0.0233289393; + goto D8; + +T8_3: + response = -0.0239608468; + goto D8; + +N8_6: + if attribute(user_followers_count) < 2995.0000000000 then goto T8_4; + else goto T8_5; + +T8_4: + response = -0.0016973828; + goto D8; + +T8_5: + response = 0.0438873528; + goto D8; + +N8_7: + if age(created_at) < 12600.0000000000 then goto N8_8; + else goto T8_8; + +N8_8: + if age(created_at) < 3570.0000000000 then goto T8_6; + else goto T8_7; + +T8_6: + response = -0.0345805450; + goto D8; + +T8_7: + response = -0.0236031788; + goto D8; + +T8_8: + response = -0.0119280014; + goto D8; + +N8_9: + if age(created_at) < 1830.0000000000 then goto N8_10; + else goto N8_15; + +N8_10: + if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T8_9; + else goto N8_11; + +T8_9: + response = 0.0323605063; + goto D8; + +N8_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N8_12; + else goto T8_14; + +N8_12: + if fieldTermMatch(text,0).firstPosition < 3.5000000000 then goto N8_13; + else goto N8_14; + +N8_13: + if attribute(user_followers_count) < 114.5000000000 then goto T8_10; + else goto T8_11; + +T8_10: + response = 0.0149219697; + goto D8; + +T8_11: + response = 0.0383892131; + goto D8; + +N8_14: + if fieldMatch(text) < 0.3404299915 then goto T8_12; + else goto T8_13; + +T8_12: + response = -0.0214082868; + goto D8; + +T8_13: + response = 0.0183146341; + goto D8; + +T8_14: + response = -0.0199916697; + goto D8; + +N8_15: + if age(created_at) < 9000.0000000000 then goto T8_15; + else goto N8_16; + +T8_15: + response = -0.0076472907; + goto D8; + +N8_16: + if fieldMatch(text) < 0.5607429743 then goto T8_16; + else goto N8_17; + +T8_16: + response = -0.0000169083; + goto D8; + +N8_17: + if term(1).significance < 0.8870275021 then goto T8_17; + else goto T8_18; + +T8_17: + response = 0.0275141633; + goto D8; + +T8_18: + response = 0.0058735097; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 80 */ +N9_1: + if attribute(ythl) < 0.5000000000 then goto N9_2; + else goto N9_10; + +N9_2: + if age(created_at) < 1830.0000000000 then goto N9_3; + else goto N9_9; + +N9_3: + if term(0).significance < 0.9964904785 then goto N9_4; + else goto N9_5; + +N9_4: + if fieldMatch(text) < 0.1415009946 then goto T9_1; + else goto T9_2; + +T9_1: + response = -0.0331371143; + goto D9; + +T9_2: + response = 0.0016819061; + goto D9; + +N9_5: + if term(0).significance < 0.9974014759 then goto N9_6; + else goto N9_7; + +N9_6: + if term(1).significance < 0.9943025112 then goto T9_3; + else goto T9_4; + +T9_3: + response = -0.0561295193; + goto D9; + +T9_4: + response = -0.0143235877; + goto D9; + +N9_7: + if term(0).significance < 0.9975079894 then goto T9_5; + else goto N9_8; + +T9_5: + response = 0.0456376595; + goto D9; + +N9_8: + if term(0).significance < 0.9976614714 then goto T9_6; + else goto T9_7; + +T9_6: + response = -0.0617225433; + goto D9; + +T9_7: + response = -0.0089081592; + goto D9; + +N9_9: + if age(created_at) < 12600.0000000000 then goto T9_8; + else goto T9_9; + +T9_8: + response = -0.0250708949; + goto D9; + +T9_9: + response = -0.0120490174; + goto D9; + +N9_10: + if age(created_at) < 1830.0000000000 then goto N9_11; + else goto N9_15; + +N9_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N9_12; + else goto T9_14; + +N9_12: + if fieldMatch(text).significantOccurrence < 0.0422540009 then goto T9_10; + else goto N9_13; + +T9_10: + response = 0.0132403332; + goto D9; + +N9_13: + if fieldMatch(text).importance < 0.7485179901 then goto N9_14; + else goto T9_13; + +N9_14: + if fieldMatch(text).tail < 10.5000000000 then goto T9_11; + else goto T9_12; + +T9_11: + response = 0.0224059642; + goto D9; + +T9_12: + response = 0.0317363105; + goto D9; + +T9_13: + response = 0.0363809447; + goto D9; + +T9_14: + response = -0.0059409077; + goto D9; + +N9_15: + if age(created_at) < 12600.0000000000 then goto T9_15; + else goto N9_16; + +T9_15: + response = -0.0059533220; + goto D9; + +N9_16: + if match < 0.7504960299 then goto N9_17; + else goto T9_18; + +N9_17: + if fieldMatch(text).occurrence < 0.1318840086 then goto T9_16; + else goto T9_17; + +T9_16: + response = -0.0092412181; + goto D9; + +T9_17: + response = 0.0036779089; + goto D9; + +T9_18: + response = 0.0067221979; + goto D9; + +D9: + +tnscore = tnscore + response; + +/* Tree 11 of 80 */ +N10_1: + if attribute(ythl) < 0.5000000000 then goto N10_2; + else goto N10_11; + +N10_2: + if age(created_at) < 1770.0000000000 then goto N10_3; + else goto N10_9; + +N10_3: + if term(0).significance < 0.9964904785 then goto N10_4; + else goto N10_7; + +N10_4: + if fieldMatch(text) < 0.2731105089 then goto T10_1; + else goto N10_5; + +T10_1: + response = -0.0169751683; + goto D10; + +N10_5: + if attribute(yst_reply_auth) < 16.5000000000 then goto T10_2; + else goto N10_6; + +T10_2: + response = 0.0106972872; + goto D10; + +N10_6: + if term(1).significance < 0.8159549832 then goto T10_3; + else goto T10_4; + +T10_3: + response = 0.0454901055; + goto D10; + +T10_4: + response = -0.0067703435; + goto D10; + +N10_7: + if term(0).significance < 0.9966599941 then goto T10_5; + else goto N10_8; + +T10_5: + response = -0.0415369371; + goto D10; + +N10_8: + if match.totalWeight < 250.0000000000 then goto T10_6; + else goto T10_7; + +T10_6: + response = -0.0080140966; + goto D10; + +T10_7: + response = 0.0488608858; + goto D10; + +N10_9: + if age(created_at) < 12600.0000000000 then goto T10_8; + else goto N10_10; + +T10_8: + response = -0.0229133495; + goto D10; + +N10_10: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T10_9; + else goto T10_10; + +T10_9: + response = -0.0125676511; + goto D10; + +T10_10: + response = 0.0002328845; + goto D10; + +N10_11: + if age(created_at) < 1830.0000000000 then goto N10_12; + else goto N10_15; + +N10_12: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N10_13; + else goto T10_14; + +N10_13: + if term(0).significance < 0.9184160233 then goto T10_11; + else goto N10_14; + +T10_11: + response = -0.0549048781; + goto D10; + +N10_14: + if fieldLength(text) < 23.5000000000 then goto T10_12; + else goto T10_13; + +T10_12: + response = 0.0226244877; + goto D10; + +T10_13: + response = 0.0077881056; + goto D10; + +T10_14: + response = 0.0280730521; + goto D10; + +N10_15: + if age(created_at) < 9000.0000000000 then goto N10_16; + else goto N10_17; + +N10_16: + if fieldMatch(text) < 0.3730605245 then goto T10_15; + else goto T10_16; + +T10_15: + response = -0.0167181189; + goto D10; + +T10_16: + response = -0.0044234172; + goto D10; + +N10_17: + if fieldMatch(text) < 0.5543889999 then goto T10_17; + else goto T10_18; + +T10_17: + response = -0.0008470900; + goto D10; + +T10_18: + response = 0.0055458527; + goto D10; + +D10: + +tnscore = tnscore + response; + +/* Tree 12 of 80 */ +N11_1: + if attribute(ythl) < 0.5000000000 then goto N11_2; + else goto N11_9; + +N11_2: + if age(created_at) < 1770.0000000000 then goto N11_3; + else goto N11_8; + +N11_3: + if match.totalWeight < 250.0000000000 then goto N11_4; + else goto T11_6; + +N11_4: + if fieldTermMatch(text,1).firstPosition < 13.5000000000 then goto N11_5; + else goto N11_7; + +N11_5: + if term(0).significance < 0.9701889753 then goto T11_1; + else goto N11_6; + +T11_1: + response = -0.0195353072; + goto D11; + +N11_6: + if term(0).significance < 0.9965775013 then goto T11_2; + else goto T11_3; + +T11_2: + response = 0.0073931107; + goto D11; + +T11_3: + response = -0.0074860039; + goto D11; + +N11_7: + if fieldMatch(text) < 0.6285369992 then goto T11_4; + else goto T11_5; + +T11_4: + response = -0.0322505986; + goto D11; + +T11_5: + response = -0.0073317181; + goto D11; + +T11_6: + response = 0.0451330307; + goto D11; + +N11_8: + if age(created_at) < 45000.0000000000 then goto T11_7; + else goto T11_8; + +T11_7: + response = -0.0206455453; + goto D11; + +T11_8: + response = -0.0085888986; + goto D11; + +N11_9: + if age(created_at) < 1830.0000000000 then goto N11_10; + else goto N11_14; + +N11_10: + if fieldTermMatch(text,1).firstPosition < 14.5000000000 then goto T11_9; + else goto N11_11; + +T11_9: + response = 0.0265081733; + goto D11; + +N11_11: + if fieldMatch(text) < 0.3104079962 then goto T11_10; + else goto N11_12; + +T11_10: + response = -0.0291219391; + goto D11; + +N11_12: + if attribute(yst_tweet_language) < 3271.5000000000 then goto N11_13; + else goto T11_13; + +N11_13: + if attribute(user_friends_count) < 146.5000000000 then goto T11_11; + else goto T11_12; + +T11_11: + response = 0.0133927786; + goto D11; + +T11_12: + response = 0.0247206105; + goto D11; + +T11_13: + response = -0.0249098053; + goto D11; + +N11_14: + if age(created_at) < 12600.0000000000 then goto N11_15; + else goto N11_17; + +N11_15: + if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto T11_14; + else goto N11_16; + +T11_14: + response = 0.0013211001; + goto D11; + +N11_16: + if fieldMatch(text).importance < 0.6664245129 then goto T11_15; + else goto T11_16; + +T11_15: + response = -0.0124234916; + goto D11; + +T11_16: + response = -0.0044820648; + goto D11; + +N11_17: + if fieldMatch(text).significantOccurrence < 0.0555050001 then goto T11_17; + else goto T11_18; + +T11_17: + response = 0.0006152863; + goto D11; + +T11_18: + response = 0.0069791274; + goto D11; + +D11: + +tnscore = tnscore + response; + +/* Tree 13 of 80 */ +N12_1: + if attribute(ythl) < 0.5000000000 then goto N12_2; + else goto N12_9; + +N12_2: + if age(created_at) < 1830.0000000000 then goto N12_3; + else goto N12_8; + +N12_3: + if term(1).significance < 0.7788045406 then goto T12_1; + else goto N12_4; + +T12_1: + response = 0.0442178195; + goto D12; + +N12_4: + if term(0).significance < 0.9492504597 then goto T12_2; + else goto N12_5; + +T12_2: + response = -0.0249224413; + goto D12; + +N12_5: + if fieldMatch(text).tail < 11.5000000000 then goto N12_6; + else goto T12_6; + +N12_6: + if fieldMatch(text).importance < 0.7480239868 then goto T12_3; + else goto N12_7; + +T12_3: + response = -0.0099065850; + goto D12; + +N12_7: + if term(0).significance < 0.9947484732 then goto T12_4; + else goto T12_5; + +T12_4: + response = 0.0121048215; + goto D12; + +T12_5: + response = -0.0132930884; + goto D12; + +T12_6: + response = 0.0006079666; + goto D12; + +N12_8: + if age(created_at) < 9000.0000000000 then goto T12_7; + else goto T12_8; + +T12_7: + response = -0.0219397199; + goto D12; + +T12_8: + response = -0.0106952111; + goto D12; + +N12_9: + if age(created_at) < 1830.0000000000 then goto N12_10; + else goto N12_14; + +N12_10: + if fieldMatch(text).absoluteOccurrence < 0.0136665003 then goto N12_11; + else goto T12_13; + +N12_11: + if fieldMatch(text).importance < 0.7488800287 then goto N12_12; + else goto T12_12; + +N12_12: + if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto N12_13; + else goto T12_11; + +N12_13: + if attribute(user_followers_count) < 866.5000000000 then goto T12_9; + else goto T12_10; + +T12_9: + response = 0.0181298105; + goto D12; + +T12_10: + response = 0.0303594396; + goto D12; + +T12_11: + response = 0.0126963345; + goto D12; + +T12_12: + response = 0.0285788280; + goto D12; + +T12_13: + response = 0.0333028419; + goto D12; + +N12_14: + if age(created_at) < 27000.0000000000 then goto N12_15; + else goto N12_17; + +N12_15: + if fieldTermMatch(text,3).firstPosition < 7.5000000000 then goto T12_14; + else goto N12_16; + +T12_14: + response = 0.0067345611; + goto D12; + +N12_16: + if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T12_15; + else goto T12_16; + +T12_15: + response = -0.0013179334; + goto D12; + +T12_16: + response = -0.0081428248; + goto D12; + +N12_17: + if fieldMatch(text) < 0.5568180084 then goto T12_17; + else goto T12_18; + +T12_17: + response = 0.0004597678; + goto D12; + +T12_18: + response = 0.0093837881; + goto D12; + +D12: + +tnscore = tnscore + response; + +/* Tree 14 of 80 */ +N13_1: + if attribute(ythl) < 0.5000000000 then goto N13_2; + else goto N13_10; + +N13_2: + if age(created_at) < 1830.0000000000 then goto N13_3; + else goto N13_7; + +N13_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N13_4; + else goto T13_5; + +N13_4: + if fieldMatch(text).importance < 0.4998250008 then goto N13_5; + else goto N13_6; + +N13_5: + if term(0).significance < 0.9983664751 then goto T13_1; + else goto T13_2; + +T13_1: + response = -0.0058107338; + goto D13; + +T13_2: + response = 0.0245069566; + goto D13; + +N13_6: + if fieldMatch(text).importance < 0.4998745024 then goto T13_3; + else goto T13_4; + +T13_3: + response = -0.0308383904; + goto D13; + +T13_4: + response = -0.0106009672; + goto D13; + +T13_5: + response = 0.0010971200; + goto D13; + +N13_7: + if age(created_at) < 45000.0000000000 then goto N13_8; + else goto T13_9; + +N13_8: + if fieldMatch(text).weightedOccurrence < 0.0912880003 then goto N13_9; + else goto T13_8; + +N13_9: + if age(created_at) < 3570.0000000000 then goto T13_6; + else goto T13_7; + +T13_6: + response = -0.0267460073; + goto D13; + +T13_7: + response = -0.0152835256; + goto D13; + +T13_8: + response = -0.0298858389; + goto D13; + +T13_9: + response = -0.0088562145; + goto D13; + +N13_10: + if age(created_at) < 1830.0000000000 then goto N13_11; + else goto N13_17; + +N13_11: + if fieldMatch(text).importance < 0.4989734888 then goto N13_12; + else goto N13_13; + +N13_12: + if fieldMatch(text).importance < 0.4988874793 then goto T13_10; + else goto T13_11; + +T13_10: + response = 0.0045636472; + goto D13; + +T13_11: + response = -0.1210997623; + goto D13; + +N13_13: + if fieldMatch(text) < 0.3135755062 then goto N13_14; + else goto N13_16; + +N13_14: + if term(1).significance < 0.9852235317 then goto N13_15; + else goto T13_14; + +N13_15: + if term(0).significance < 0.9929184914 then goto T13_12; + else goto T13_13; + +T13_12: + response = -0.0444011152; + goto D13; + +T13_13: + response = 0.0156709024; + goto D13; + +T13_14: + response = 0.0178486139; + goto D13; + +N13_16: + if attribute(yst_tweet_language) < 3243.5000000000 then goto T13_15; + else goto T13_16; + +T13_15: + response = 0.0236557227; + goto D13; + +T13_16: + response = -0.0055893686; + goto D13; + +N13_17: + if age(created_at) < 12600.0000000000 then goto T13_17; + else goto T13_18; + +T13_17: + response = -0.0039404999; + goto D13; + +T13_18: + response = 0.0040376803; + goto D13; + +D13: + +tnscore = tnscore + response; + +/* Tree 15 of 80 */ +N14_1: + if attribute(ythl) < 0.5000000000 then goto N14_2; + else goto N14_12; + +N14_2: + if age(created_at) < 1770.0000000000 then goto N14_3; + else goto N14_10; + +N14_3: + if term(0).significance < 0.9964904785 then goto N14_4; + else goto N14_5; + +N14_4: + if fieldMatch(text).gapLength < 3.5000000000 then goto T14_1; + else goto T14_2; + +T14_1: + response = 0.0034967960; + goto D14; + +T14_2: + response = -0.0136111988; + goto D14; + +N14_5: + if term(0).significance < 0.9975960255 then goto N14_6; + else goto N14_7; + +N14_6: + if term(1).significance < 0.9943025112 then goto T14_3; + else goto T14_4; + +T14_3: + response = -0.0474034255; + goto D14; + +T14_4: + response = -0.0139986631; + goto D14; + +N14_7: + if fieldMatch(text).importance < 0.6665844917 then goto T14_5; + else goto N14_8; + +T14_5: + response = -0.0083848009; + goto D14; + +N14_8: + if term(0).significance < 0.9992945194 then goto N14_9; + else goto T14_8; + +N14_9: + if term(1).significance < 0.9993695021 then goto T14_6; + else goto T14_7; + +T14_6: + response = 0.0095761689; + goto D14; + +T14_7: + response = 0.0714217668; + goto D14; + +T14_8: + response = -0.0141420420; + goto D14; + +N14_10: + if age(created_at) < 5400.0000000000 then goto T14_9; + else goto N14_11; + +T14_9: + response = -0.0204021576; + goto D14; + +N14_11: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T14_10; + else goto T14_11; + +T14_10: + response = -0.0152195185; + goto D14; + +T14_11: + response = -0.0074091603; + goto D14; + +N14_12: + if age(created_at) < 1830.0000000000 then goto N14_13; + else goto N14_15; + +N14_13: + if fieldMatch(text).occurrence < 0.1277174950 then goto N14_14; + else goto T14_14; + +N14_14: + if fieldMatch(text) < 0.8451825380 then goto T14_12; + else goto T14_13; + +T14_12: + response = 0.0072816766; + goto D14; + +T14_13: + response = 0.0185451686; + goto D14; + +T14_14: + response = 0.0243676179; + goto D14; + +N14_15: + if fieldMatch(text).importance < 0.6664544940 then goto N14_16; + else goto N14_17; + +N14_16: + if age(created_at) < 5400.0000000000 then goto T14_15; + else goto T14_16; + +T14_15: + response = -0.0113001116; + goto D14; + +T14_16: + response = -0.0020866841; + goto D14; + +N14_17: + if age(created_at) < 30600.0000000000 then goto T14_17; + else goto T14_18; + +T14_17: + response = -0.0002226823; + goto D14; + +T14_18: + response = 0.0054407552; + goto D14; + +D14: + +tnscore = tnscore + response; + +/* Tree 16 of 80 */ +N15_1: + if attribute(ythl) < 0.5000000000 then goto N15_2; + else goto N15_9; + +N15_2: + if age(created_at) < 1770.0000000000 then goto N15_3; + else goto N15_7; + +N15_3: + if attribute(user_friends_count) < 1202.5000000000 then goto N15_4; + else goto T15_5; + +N15_4: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N15_5; + else goto T15_4; + +N15_5: + if attribute(user_statuses_count) < 491.5000000000 then goto N15_6; + else goto T15_3; + +N15_6: + if attribute(user_followers_count) < 39.5000000000 then goto T15_1; + else goto T15_2; + +T15_1: + response = -0.0053604202; + goto D15; + +T15_2: + response = 0.0112837612; + goto D15; + +T15_3: + response = -0.0076658014; + goto D15; + +T15_4: + response = -0.0344819911; + goto D15; + +T15_5: + response = 0.0152860620; + goto D15; + +N15_7: + if age(created_at) < 52200.0000000000 then goto N15_8; + else goto T15_8; + +N15_8: + if fieldMatch(text).importance < 0.6658334732 then goto T15_6; + else goto T15_7; + +T15_6: + response = -0.0236404883; + goto D15; + +T15_7: + response = -0.0155495401; + goto D15; + +T15_8: + response = -0.0063627489; + goto D15; + +N15_9: + if age(created_at) < 1830.0000000000 then goto N15_10; + else goto N15_14; + +N15_10: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N15_11; + else goto T15_13; + +N15_11: + if fieldMatch(text).significantOccurrence < 0.0377494991 then goto T15_9; + else goto N15_12; + +T15_9: + response = -0.0247545653; + goto D15; + +N15_12: + if term(0).significance < 0.9914690256 then goto N15_13; + else goto T15_12; + +N15_13: + if term(0).significance < 0.9911389947 then goto T15_10; + else goto T15_11; + +T15_10: + response = 0.0074545408; + goto D15; + +T15_11: + response = -0.0712173039; + goto D15; + +T15_12: + response = 0.0174505123; + goto D15; + +T15_13: + response = 0.0227466857; + goto D15; + +N15_14: + if fieldMatch(text).importance < 0.6664484739 then goto N15_15; + else goto N15_16; + +N15_15: + if fieldMatch(text) < 0.3570200205 then goto T15_14; + else goto T15_15; + +T15_14: + response = -0.0164480209; + goto D15; + +T15_15: + response = -0.0029063778; + goto D15; + +N15_16: + if age(created_at) < 30600.0000000000 then goto T15_16; + else goto N15_17; + +T15_16: + response = -0.0008955043; + goto D15; + +N15_17: + if fieldLength(text) < 22.5000000000 then goto T15_17; + else goto T15_18; + +T15_17: + response = 0.0066513594; + goto D15; + +T15_18: + response = -0.0017231871; + goto D15; + +D15: + +tnscore = tnscore + response; + +/* Tree 17 of 80 */ +N16_1: + if attribute(ythl) < 0.5000000000 then goto N16_2; + else goto N16_10; + +N16_2: + if age(created_at) < 1830.0000000000 then goto N16_3; + else goto N16_8; + +N16_3: + if term(1).significance < 0.8159549832 then goto T16_1; + else goto N16_4; + +T16_1: + response = 0.0266703268; + goto D16; + +N16_4: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N16_5; + else goto T16_6; + +N16_5: + if match < 0.6756634712 then goto T16_2; + else goto N16_6; + +T16_2: + response = -0.0178612015; + goto D16; + +N16_6: + if fieldMatch(text).weightedOccurrence < 0.1012820005 then goto N16_7; + else goto T16_5; + +N16_7: + if fieldMatch(text).earliness < 0.8834840059 then goto T16_3; + else goto T16_4; + +T16_3: + response = -0.0031504958; + goto D16; + +T16_4: + response = 0.0108290236; + goto D16; + +T16_5: + response = -0.0132400721; + goto D16; + +T16_6: + response = -0.0294468679; + goto D16; + +N16_8: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T16_7; + else goto N16_9; + +T16_7: + response = -0.0183944645; + goto D16; + +N16_9: + if age(created_at) < 45000.0000000000 then goto T16_8; + else goto T16_9; + +T16_8: + response = -0.0132570328; + goto D16; + +T16_9: + response = -0.0044394895; + goto D16; + +N16_10: + if age(created_at) < 1830.0000000000 then goto N16_11; + else goto N16_15; + +N16_11: + if fieldMatch(text).occurrence < 0.1225000024 then goto N16_12; + else goto N16_14; + +N16_12: + if fieldMatch(text).head < 9.5000000000 then goto N16_13; + else goto T16_12; + +N16_13: + if term(0).significance < 0.9731230140 then goto T16_10; + else goto T16_11; + +T16_10: + response = -0.0399055768; + goto D16; + +T16_11: + response = 0.0171512303; + goto D16; + +T16_12: + response = 0.0047044679; + goto D16; + +N16_14: + if attribute(user_statuses_count) < 6.5000000000 then goto T16_13; + else goto T16_14; + +T16_13: + response = -0.0303006102; + goto D16; + +T16_14: + response = 0.0230432421; + goto D16; + +N16_15: + if fieldMatch(text).importance < 0.6664534807 then goto T16_15; + else goto N16_16; + +T16_15: + response = -0.0054238462; + goto D16; + +N16_16: + if term(2).significance < 0.9981180429 then goto N16_17; + else goto T16_18; + +N16_17: + if fieldMatch(text).completeness < 0.9590100050 then goto T16_16; + else goto T16_17; + +T16_16: + response = -0.0041171766; + goto D16; + +T16_17: + response = 0.0032716696; + goto D16; + +T16_18: + response = 0.0055362698; + goto D16; + +D16: + +tnscore = tnscore + response; + +/* Tree 18 of 80 */ +N17_1: + if attribute(ythl) < 0.5000000000 then goto N17_2; + else goto N17_8; + +N17_2: + if age(created_at) < 1830.0000000000 then goto N17_3; + else goto N17_7; + +N17_3: + if fieldMatch(text).tail < 16.5000000000 then goto N17_4; + else goto T17_5; + +N17_4: + if term(2).significance < 0.9960604906 then goto T17_1; + else goto N17_5; + +T17_1: + response = -0.0087614790; + goto D17; + +N17_5: + if fieldMatch(text).occurrence < 0.1225000024 then goto T17_2; + else goto N17_6; + +T17_2: + response = -0.0067609181; + goto D17; + +N17_6: + if term(1).significance < 0.9832755327 then goto T17_3; + else goto T17_4; + +T17_3: + response = 0.0282354539; + goto D17; + +T17_4: + response = 0.0025908270; + goto D17; + +T17_5: + response = 0.0073141014; + goto D17; + +N17_7: + if age(created_at) < 45000.0000000000 then goto T17_6; + else goto T17_7; + +T17_6: + response = -0.0156556128; + goto D17; + +T17_7: + response = -0.0071654687; + goto D17; + +N17_8: + if age(created_at) < 1830.0000000000 then goto N17_9; + else goto N17_14; + +N17_9: + if fieldMatch(text).occurrence < 0.0754984990 then goto T17_8; + else goto N17_10; + +T17_8: + response = 0.0073343160; + goto D17; + +N17_10: + if fieldMatch(text) < 0.3130764961 then goto N17_11; + else goto N17_12; + +N17_11: + if term(1).significance < 0.9978075027 then goto T17_9; + else goto T17_10; + +T17_9: + response = -0.0014603640; + goto D17; + +T17_10: + response = 0.0291628398; + goto D17; + +N17_12: + if attribute(user_statuses_count) < 29.5000000000 then goto T17_11; + else goto N17_13; + +T17_11: + response = -0.0055009180; + goto D17; + +N17_13: + if attribute(yst_reply_auth) < 476.0000000000 then goto T17_12; + else goto T17_13; + +T17_12: + response = 0.0209690045; + goto D17; + +T17_13: + response = -0.0105504498; + goto D17; + +N17_14: + if age(created_at) < 30600.0000000000 then goto N17_15; + else goto T17_18; + +N17_15: + if fieldTermMatch(text,1).occurrences < 1.5000000000 then goto N17_16; + else goto T17_17; + +N17_16: + if fieldMatch(text) < 0.4531754851 then goto T17_14; + else goto N17_17; + +T17_14: + response = -0.0106616269; + goto D17; + +N17_17: + if fieldMatch(text).importance < 0.4999470115 then goto T17_15; + else goto T17_16; + +T17_15: + response = -0.0131472535; + goto D17; + +T17_16: + response = -0.0008043613; + goto D17; + +T17_17: + response = 0.0065678273; + goto D17; + +T17_18: + response = 0.0043163871; + goto D17; + +D17: + +tnscore = tnscore + response; + +/* Tree 19 of 80 */ +N18_1: + if attribute(ythl) < 0.5000000000 then goto N18_2; + else goto N18_10; + +N18_2: + if age(created_at) < 1830.0000000000 then goto N18_3; + else goto N18_7; + +N18_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N18_4; + else goto N18_5; + +N18_4: + if fieldMatch(text).tail < 12.5000000000 then goto T18_1; + else goto T18_2; + +T18_1: + response = -0.0083194933; + goto D18; + +T18_2: + response = 0.0035102742; + goto D18; + +N18_5: + if attribute(user_friends_count) < 263.5000000000 then goto N18_6; + else goto T18_5; + +N18_6: + if fieldMatch(text).importance < 0.6659150124 then goto T18_3; + else goto T18_4; + +T18_3: + response = 0.0142259075; + goto D18; + +T18_4: + response = -0.0029315835; + goto D18; + +T18_5: + response = 0.0146667338; + goto D18; + +N18_7: + if age(created_at) < 37800.0000000000 then goto N18_8; + else goto N18_9; + +N18_8: + if fieldMatch(text).significantOccurrence < 0.0833195001 then goto T18_6; + else goto T18_7; + +T18_6: + response = -0.0135039055; + goto D18; + +T18_7: + response = -0.0220540111; + goto D18; + +N18_9: + if fieldMatch(text).longestSequenceRatio < 0.5357145071 then goto T18_8; + else goto T18_9; + +T18_8: + response = -0.0130397740; + goto D18; + +T18_9: + response = -0.0039578022; + goto D18; + +N18_10: + if age(created_at) < 1830.0000000000 then goto N18_11; + else goto N18_15; + +N18_11: + if term(0).significance < 0.9184160233 then goto N18_12; + else goto N18_13; + +N18_12: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T18_10; + else goto T18_11; + +T18_10: + response = -0.0589194117; + goto D18; + +T18_11: + response = 0.0129273078; + goto D18; + +N18_13: + if fieldTermMatch(text,1).firstPosition < 7.5000000000 then goto T18_12; + else goto N18_14; + +T18_12: + response = 0.0206642588; + goto D18; + +N18_14: + if attribute(user_statuses_count) < 63762.0000000000 then goto T18_13; + else goto T18_14; + +T18_13: + response = 0.0138866614; + goto D18; + +T18_14: + response = -0.0354735543; + goto D18; + +N18_15: + if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto N18_16; + else goto N18_17; + +N18_16: + if age(created_at) < 52200.0000000000 then goto T18_15; + else goto T18_16; + +T18_15: + response = 0.0028235989; + goto D18; + +T18_16: + response = 0.0121270804; + goto D18; + +N18_17: + if fieldMatch(text).importance < 0.6664544940 then goto T18_17; + else goto T18_18; + +T18_17: + response = -0.0071218235; + goto D18; + +T18_18: + response = 0.0005514519; + goto D18; + +D18: + +tnscore = tnscore + response; + +/* Tree 20 of 80 */ +N19_1: + if attribute(ythl) < 0.5000000000 then goto N19_2; + else goto N19_6; + +N19_2: + if age(created_at) < 1830.0000000000 then goto N19_3; + else goto N19_5; + +N19_3: + if fieldTermMatch(text,1).firstPosition < 4.5000000000 then goto N19_4; + else goto T19_3; + +N19_4: + if match < 0.9384620190 then goto T19_1; + else goto T19_2; + +T19_1: + response = 0.0165893189; + goto D19; + +T19_2: + response = -0.0049824111; + goto D19; + +T19_3: + response = -0.0046803205; + goto D19; + +N19_5: + if age(created_at) < 41400.0000000000 then goto T19_4; + else goto T19_5; + +T19_4: + response = -0.0148008888; + goto D19; + +T19_5: + response = -0.0071343073; + goto D19; + +N19_6: + if age(created_at) < 1830.0000000000 then goto N19_7; + else goto N19_15; + +N19_7: + if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto N19_8; + else goto N19_9; + +N19_8: + if attribute(user_followers_count) < 945.5000000000 then goto T19_6; + else goto T19_7; + +T19_6: + response = 0.0158379194; + goto D19; + +T19_7: + response = 0.0252891613; + goto D19; + +N19_9: + if fieldMatch(text) < 0.2744970024 then goto N19_10; + else goto N19_12; + +N19_10: + if term(0).significance < 0.9929184914 then goto N19_11; + else goto T19_10; + +N19_11: + if fieldMatch(text).tail < 6.5000000000 then goto T19_8; + else goto T19_9; + +T19_8: + response = -0.0518040838; + goto D19; + +T19_9: + response = 0.0076190376; + goto D19; + +T19_10: + response = 0.0091624226; + goto D19; + +N19_12: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N19_13; + else goto T19_14; + +N19_13: + if attribute(yst_reply_auth) < 469.5000000000 then goto N19_14; + else goto T19_13; + +N19_14: + if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T19_11; + else goto T19_12; + +T19_11: + response = -0.0078021755; + goto D19; + +T19_12: + response = 0.0161894548; + goto D19; + +T19_13: + response = -0.0230367514; + goto D19; + +T19_14: + response = -0.0302108693; + goto D19; + +N19_15: + if age(created_at) < 30600.0000000000 then goto N19_16; + else goto N19_17; + +N19_16: + if fieldMatch(text) < 0.3708400130 then goto T19_15; + else goto T19_16; + +T19_15: + response = -0.0093177671; + goto D19; + +T19_16: + response = -0.0008716804; + goto D19; + +N19_17: + if fieldMatch(text) < 0.5607429743 then goto T19_17; + else goto T19_18; + +T19_17: + response = -0.0004994075; + goto D19; + +T19_18: + response = 0.0075202897; + goto D19; + +D19: + +tnscore = tnscore + response; + +/* Tree 21 of 80 */ +N20_1: + if attribute(ythl) < 0.5000000000 then goto N20_2; + else goto N20_9; + +N20_2: + if age(created_at) < 1830.0000000000 then goto N20_3; + else goto N20_8; + +N20_3: + if match < 0.6055585146 then goto N20_4; + else goto N20_5; + +N20_4: + if fieldTermMatch(text,1).firstPosition < 9.5000000000 then goto T20_1; + else goto T20_2; + +T20_1: + response = -0.0038025793; + goto D20; + +T20_2: + response = -0.0365864040; + goto D20; + +N20_5: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N20_6; + else goto T20_6; + +N20_6: + if fieldMatch(text).significantOccurrence < 0.1483514905 then goto N20_7; + else goto T20_5; + +N20_7: + if term(1).significance < 0.7788045406 then goto T20_3; + else goto T20_4; + +T20_3: + response = 0.0569638816; + goto D20; + +T20_4: + response = -0.0006508355; + goto D20; + +T20_5: + response = -0.0215954499; + goto D20; + +T20_6: + response = -0.0288353010; + goto D20; + +N20_8: + if age(created_at) < 5400.0000000000 then goto T20_7; + else goto T20_8; + +T20_7: + response = -0.0155259431; + goto D20; + +T20_8: + response = -0.0084487818; + goto D20; + +N20_9: + if age(created_at) < 1830.0000000000 then goto N20_10; + else goto N20_16; + +N20_10: + if attribute(user_followers_count) < 2333.0000000000 then goto N20_11; + else goto T20_15; + +N20_11: + if fieldMatch(text).importance < 0.4989485145 then goto N20_12; + else goto N20_13; + +N20_12: + if fieldMatch(text).importance < 0.4988874793 then goto T20_9; + else goto T20_10; + +T20_9: + response = 0.0002952785; + goto D20; + +T20_10: + response = -0.1374273254; + goto D20; + +N20_13: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N20_14; + else goto N20_15; + +N20_14: + if attribute(yst_reply_auth) < 22.5000000000 then goto T20_11; + else goto T20_12; + +T20_11: + response = 0.0126010812; + goto D20; + +T20_12: + response = 0.0015712189; + goto D20; + +N20_15: + if fieldTermMatch(text,1).firstPosition < 8.5000000000 then goto T20_13; + else goto T20_14; + +T20_13: + response = 0.0140537649; + goto D20; + +T20_14: + response = 0.0345167434; + goto D20; + +T20_15: + response = 0.0247552557; + goto D20; + +N20_16: + if fieldMatch(text).importance < 0.6664245129 then goto N20_17; + else goto T20_18; + +N20_17: + if match < 0.9277470112 then goto T20_16; + else goto T20_17; + +T20_16: + response = -0.0104111915; + goto D20; + +T20_17: + response = -0.0006366780; + goto D20; + +T20_18: + response = 0.0023009658; + goto D20; + +D20: + +tnscore = tnscore + response; + +/* Tree 22 of 80 */ +N21_1: + if attribute(ythl) < 0.5000000000 then goto N21_2; + else goto N21_10; + +N21_2: + if fieldMatch(text).tail < 11.5000000000 then goto N21_3; + else goto N21_8; + +N21_3: + if age(created_at) < 1830.0000000000 then goto N21_4; + else goto N21_7; + +N21_4: + if attribute(yst_tweet_language) < 3554.0000000000 then goto N21_5; + else goto T21_4; + +N21_5: + if fieldMatch(text).importance < 0.6656075120 then goto N21_6; + else goto T21_3; + +N21_6: + if term(0).significance < 0.9996379614 then goto T21_1; + else goto T21_2; + +T21_1: + response = -0.0108858514; + goto D21; + +T21_2: + response = 0.0223953057; + goto D21; + +T21_3: + response = -0.0007236850; + goto D21; + +T21_4: + response = -0.0527538471; + goto D21; + +N21_7: + if age(created_at) < 5400.0000000000 then goto T21_5; + else goto T21_6; + +T21_5: + response = -0.0167835591; + goto D21; + +T21_6: + response = -0.0101222507; + goto D21; + +N21_8: + if attribute(user_friends_count) < 103.5000000000 then goto T21_7; + else goto N21_9; + +T21_7: + response = -0.0058634359; + goto D21; + +N21_9: + if age(created_at) < 1770.0000000000 then goto T21_8; + else goto T21_9; + +T21_8: + response = 0.0106468506; + goto D21; + +T21_9: + response = -0.0022715192; + goto D21; + +N21_10: + if age(created_at) < 1830.0000000000 then goto N21_11; + else goto N21_15; + +N21_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N21_12; + else goto N21_14; + +N21_12: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N21_13; + else goto T21_12; + +N21_13: + if term(0).significance < 0.9184160233 then goto T21_10; + else goto T21_11; + +T21_10: + response = -0.0441538866; + goto D21; + +T21_11: + response = 0.0103401752; + goto D21; + +T21_12: + response = 0.0169759088; + goto D21; + +N21_14: + if term(0).significance < 0.9990385175 then goto T21_13; + else goto T21_14; + +T21_13: + response = 0.0064769128; + goto D21; + +T21_14: + response = -0.0484309871; + goto D21; + +N21_15: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T21_15; + else goto N21_16; + +T21_15: + response = -0.0038310021; + goto D21; + +N21_16: + if term(0).significance < 0.9686380029 then goto T21_16; + else goto N21_17; + +T21_16: + response = 0.0135820391; + goto D21; + +N21_17: + if attribute(user_followers_count) < 719.5000000000 then goto T21_17; + else goto T21_18; + +T21_17: + response = -0.0002012513; + goto D21; + +T21_18: + response = 0.0056425249; + goto D21; + +D21: + +tnscore = tnscore + response; + +/* Tree 23 of 80 */ +N22_1: + if attribute(ythl) < 0.5000000000 then goto N22_2; + else goto N22_7; + +N22_2: + if age(created_at) < 630.0000000000 then goto N22_3; + else goto N22_5; + +N22_3: + if fieldMatch(text).importance < 0.4994785190 then goto T22_1; + else goto N22_4; + +T22_1: + response = 0.0221233715; + goto D22; + +N22_4: + if attribute(user_followers_count) < 926.5000000000 then goto T22_2; + else goto T22_3; + +T22_2: + response = -0.0044878516; + goto D22; + +T22_3: + response = 0.0128654737; + goto D22; + +N22_5: + if fieldMatch(text).tail < 11.5000000000 then goto N22_6; + else goto T22_6; + +N22_6: + if fieldMatch(text).importance < 0.6665325165 then goto T22_4; + else goto T22_5; + +T22_4: + response = -0.0138860556; + goto D22; + +T22_5: + response = -0.0083897223; + goto D22; + +T22_6: + response = -0.0045088750; + goto D22; + +N22_7: + if age(created_at) < 1830.0000000000 then goto N22_8; + else goto N22_14; + +N22_8: + if attribute(yst_tweet_language) < 3587.5000000000 then goto N22_9; + else goto T22_13; + +N22_9: + if fieldMatch(text).importance < 0.4989734888 then goto T22_7; + else goto N22_10; + +T22_7: + response = -0.0087604690; + goto D22; + +N22_10: + if attribute(user_followers_count) < 1733.5000000000 then goto N22_11; + else goto T22_12; + +N22_11: + if fieldMatch(text).occurrence < 0.1455025077 then goto N22_12; + else goto T22_11; + +N22_12: + if fieldMatch(text) < 0.5567239523 then goto N22_13; + else goto T22_10; + +N22_13: + if term(2).significance < 0.9795899987 then goto T22_8; + else goto T22_9; + +T22_8: + response = -0.0543641627; + goto D22; + +T22_9: + response = 0.0024748648; + goto D22; + +T22_10: + response = 0.0124403853; + goto D22; + +T22_11: + response = 0.0174741297; + goto D22; + +T22_12: + response = 0.0222181645; + goto D22; + +T22_13: + response = -0.0288913368; + goto D22; + +N22_14: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N22_15; + else goto N22_16; + +N22_15: + if fieldMatch(text).importance < 0.6664659977 then goto T22_14; + else goto T22_15; + +T22_14: + response = -0.0065555429; + goto D22; + +T22_15: + response = 0.0005791831; + goto D22; + +N22_16: + if fieldMatch(text).earliness < 0.9354164600 then goto N22_17; + else goto T22_18; + +N22_17: + if fieldMatch(text).significantOccurrence < 0.0339080021 then goto T22_16; + else goto T22_17; + +T22_16: + response = -0.0694353726; + goto D22; + +T22_17: + response = 0.0012739636; + goto D22; + +T22_18: + response = 0.0075882453; + goto D22; + +D22: + +tnscore = tnscore + response; + +/* Tree 24 of 80 */ +N23_1: + if attribute(ythl) < 0.5000000000 then goto N23_2; + else goto N23_11; + +N23_2: + if fieldMatch(text).tail < 12.5000000000 then goto N23_3; + else goto N23_10; + +N23_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N23_4; + else goto N23_8; + +N23_4: + if age(created_at) < 210.0000000000 then goto T23_1; + else goto N23_5; + +T23_1: + response = 0.0005104670; + goto D23; + +N23_5: + if term(0).significance < 0.9910864830 then goto T23_2; + else goto N23_6; + +T23_2: + response = -0.0213498049; + goto D23; + +N23_6: + if term(0).significance < 0.9921205044 then goto T23_3; + else goto N23_7; + +T23_3: + response = 0.0187676178; + goto D23; + +N23_7: + if fieldMatch(text).tail < 5.5000000000 then goto T23_4; + else goto T23_5; + +T23_4: + response = -0.0155349434; + goto D23; + +T23_5: + response = -0.0073599141; + goto D23; + +N23_8: + if fieldMatch(text).importance < 0.6662604809 then goto N23_9; + else goto T23_8; + +N23_9: + if attribute(user_followers_count) < 1875.0000000000 then goto T23_6; + else goto T23_7; + +T23_6: + response = 0.0009507365; + goto D23; + +T23_7: + response = 0.0527948179; + goto D23; + +T23_8: + response = -0.0073533813; + goto D23; + +N23_10: + if age(created_at) < 810.0000000000 then goto T23_9; + else goto T23_10; + +T23_9: + response = 0.0076378446; + goto D23; + +T23_10: + response = -0.0027198247; + goto D23; + +N23_11: + if age(created_at) < 1830.0000000000 then goto N23_12; + else goto N23_16; + +N23_12: + if fieldMatch(text).significantOccurrence < 0.0424195006 then goto T23_11; + else goto N23_13; + +T23_11: + response = 0.0015356130; + goto D23; + +N23_13: + if fieldMatch(text).tail < 7.5000000000 then goto N23_14; + else goto T23_15; + +N23_14: + if fieldMatch(text).importance < 0.7466344833 then goto N23_15; + else goto T23_14; + +N23_15: + if fieldMatch(text).importance < 0.6666129827 then goto T23_12; + else goto T23_13; + +T23_12: + response = 0.0067592681; + goto D23; + +T23_13: + response = -0.0205924309; + goto D23; + +T23_14: + response = 0.0160937308; + goto D23; + +T23_15: + response = 0.0167252945; + goto D23; + +N23_16: + if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T23_16; + else goto N23_17; + +T23_16: + response = 0.0049627365; + goto D23; + +N23_17: + if fieldMatch(text).importance < 0.6664254665 then goto T23_17; + else goto T23_18; + +T23_17: + response = -0.0065001791; + goto D23; + +T23_18: + response = 0.0010821803; + goto D23; + +D23: + +tnscore = tnscore + response; + +/* Tree 25 of 80 */ +N24_1: + if attribute(ythl) < 0.5000000000 then goto N24_2; + else goto N24_9; + +N24_2: + if age(created_at) < 630.0000000000 then goto N24_3; + else goto N24_5; + +N24_3: + if age(created_at) < 510.0000000000 then goto T24_1; + else goto N24_4; + +T24_1: + response = -0.0026815916; + goto D24; + +N24_4: + if fieldMatch(text).importance < 0.4997144938 then goto T24_2; + else goto T24_3; + +T24_2: + response = 0.0414511969; + goto D24; + +T24_3: + response = 0.0044068150; + goto D24; + +N24_5: + if fieldMatch(text).tail < 11.5000000000 then goto N24_6; + else goto N24_8; + +N24_6: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N24_7; + else goto T24_6; + +N24_7: + if fieldMatch(text).importance < 0.4994869828 then goto T24_4; + else goto T24_5; + +T24_4: + response = 0.0066561273; + goto D24; + +T24_5: + response = -0.0136850009; + goto D24; + +T24_6: + response = -0.0069580048; + goto D24; + +N24_8: + if attribute(yst_reply_auth) < 16.5000000000 then goto T24_7; + else goto T24_8; + +T24_7: + response = 0.0023887625; + goto D24; + +T24_8: + response = -0.0083630492; + goto D24; + +N24_9: + if age(created_at) < 1830.0000000000 then goto N24_10; + else goto N24_13; + +N24_10: + if fieldMatch(text).fieldCompleteness < 0.0425724983 then goto N24_11; + else goto N24_12; + +N24_11: + if attribute(user_friends_count) < 252.5000000000 then goto T24_9; + else goto T24_10; + +T24_9: + response = -0.0201198990; + goto D24; + +T24_10: + response = 0.0154324464; + goto D24; + +N24_12: + if fieldMatch(text).tail < 5.5000000000 then goto T24_11; + else goto T24_12; + +T24_11: + response = 0.0052786010; + goto D24; + +T24_12: + response = 0.0135236791; + goto D24; + +N24_13: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T24_13; + else goto N24_14; + +T24_13: + response = -0.0049136258; + goto D24; + +N24_14: + if age(created_at) < 27000.0000000000 then goto T24_14; + else goto N24_15; + +T24_14: + response = -0.0004128224; + goto D24; + +N24_15: + if term(2).significance < 0.9998239875 then goto N24_16; + else goto T24_18; + +N24_16: + if fieldMatch(text).completeness < 0.9559409618 then goto T24_15; + else goto N24_17; + +T24_15: + response = -0.0209409304; + goto D24; + +N24_17: + if term(1).significance < 0.9128689766 then goto T24_16; + else goto T24_17; + +T24_16: + response = 0.0195740015; + goto D24; + +T24_17: + response = 0.0035250792; + goto D24; + +T24_18: + response = 0.0133058164; + goto D24; + +D24: + +tnscore = tnscore + response; + +/* Tree 26 of 80 */ +N25_1: + if attribute(ythl) < 0.5000000000 then goto N25_2; + else goto N25_8; + +N25_2: + if age(created_at) < 1770.0000000000 then goto N25_3; + else goto N25_5; + +N25_3: + if fieldMatch(text).tail < 7.5000000000 then goto T25_1; + else goto N25_4; + +T25_1: + response = -0.0047927890; + goto D25; + +N25_4: + if fieldMatch(text).importance < 0.6652389765 then goto T25_2; + else goto T25_3; + +T25_2: + response = -0.0020739127; + goto D25; + +T25_3: + response = 0.0102078569; + goto D25; + +N25_5: + if fieldMatch(text).importance < 0.6657874584 then goto N25_6; + else goto N25_7; + +N25_6: + if attribute(user_friends_count) < 16.5000000000 then goto T25_4; + else goto T25_5; + +T25_4: + response = 0.0004515464; + goto D25; + +T25_5: + response = -0.0178576762; + goto D25; + +N25_7: + if term(0).significance < 0.9976029992 then goto T25_6; + else goto T25_7; + +T25_6: + response = -0.0101410825; + goto D25; + +T25_7: + response = -0.0048545380; + goto D25; + +N25_8: + if age(created_at) < 1770.0000000000 then goto N25_9; + else goto N25_14; + +N25_9: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N25_10; + else goto T25_13; + +N25_10: + if attribute(user_followers_count) < 606.0000000000 then goto N25_11; + else goto N25_12; + +N25_11: + if fieldMatch(text).importance < 0.7488585114 then goto T25_8; + else goto T25_9; + +T25_8: + response = 0.0066962893; + goto D25; + +T25_9: + response = 0.0158854368; + goto D25; + +N25_12: + if fieldMatch(text).significantOccurrence < 0.0555564985 then goto N25_13; + else goto T25_12; + +N25_13: + if attribute(user_statuses_count) < 13511.5000000000 then goto T25_10; + else goto T25_11; + +T25_10: + response = 0.0163436735; + goto D25; + +T25_11: + response = -0.0031528673; + goto D25; + +T25_12: + response = 0.0222661817; + goto D25; + +T25_13: + response = -0.0172180317; + goto D25; + +N25_14: + if fieldMatch(text).importance < 0.6664534807 then goto N25_15; + else goto N25_16; + +N25_15: + if fieldMatch(text) < 0.3528665006 then goto T25_14; + else goto T25_15; + +T25_14: + response = -0.0116436179; + goto D25; + +T25_15: + response = -0.0025309342; + goto D25; + +N25_16: + if term(2).significance < 0.9981445074 then goto N25_17; + else goto T25_18; + +N25_17: + if fieldTermMatch(text,3).firstPosition < 4.5000000000 then goto T25_16; + else goto T25_17; + +T25_16: + response = 0.0069706912; + goto D25; + +T25_17: + response = -0.0021646003; + goto D25; + +T25_18: + response = 0.0046032512; + goto D25; + +D25: + +tnscore = tnscore + response; + +/* Tree 27 of 80 */ +N26_1: + if attribute(ythl) < 0.5000000000 then goto N26_2; + else goto N26_11; + +N26_2: + if age(created_at) < 1830.0000000000 then goto N26_3; + else goto N26_10; + +N26_3: + if fieldMatch(user_name).fieldCompleteness < 0.2916665077 then goto N26_4; + else goto T26_8; + +N26_4: + if attribute(user_statuses_count) < 497.5000000000 then goto T26_1; + else goto N26_5; + +T26_1: + response = 0.0022748148; + goto D26; + +N26_5: + if attribute(user_followers_count) < 960.5000000000 then goto N26_6; + else goto T26_7; + +N26_6: + if term(0).significance < 0.9725670218 then goto T26_2; + else goto N26_7; + +T26_2: + response = -0.0246347116; + goto D26; + +N26_7: + if term(0).significance < 0.9963495135 then goto N26_8; + else goto T26_6; + +N26_8: + if fieldMatch(text).tail < 9.5000000000 then goto N26_9; + else goto T26_5; + +N26_9: + if fieldMatch(text).head < 11.5000000000 then goto T26_3; + else goto T26_4; + +T26_3: + response = 0.0013827683; + goto D26; + +T26_4: + response = -0.0212024376; + goto D26; + +T26_5: + response = 0.0161508420; + goto D26; + +T26_6: + response = -0.0104714457; + goto D26; + +T26_7: + response = 0.0070006551; + goto D26; + +T26_8: + response = 0.0447412235; + goto D26; + +N26_10: + if age(created_at) < 16200.0000000000 then goto T26_9; + else goto T26_10; + +T26_9: + response = -0.0124666980; + goto D26; + +T26_10: + response = -0.0057219106; + goto D26; + +N26_11: + if age(created_at) < 1830.0000000000 then goto N26_12; + else goto N26_14; + +N26_12: + if fieldMatch(text).importance < 0.4999520183 then goto T26_11; + else goto N26_13; + +T26_11: + response = 0.0038677446; + goto D26; + +N26_13: + if fieldMatch(text) < 0.4182469845 then goto T26_12; + else goto T26_13; + +T26_12: + response = 0.0026964712; + goto D26; + +T26_13: + response = 0.0132060784; + goto D26; + +N26_14: + if fieldMatch(text).weightedOccurrence < 0.0513554998 then goto N26_15; + else goto T26_18; + +N26_15: + if match < 0.6867794991 then goto N26_16; + else goto T26_17; + +N26_16: + if fieldMatch(text).importance < 0.6665154696 then goto T26_14; + else goto N26_17; + +T26_14: + response = -0.0201022100; + goto D26; + +N26_17: + if term(2).significance < 0.9950574636 then goto T26_15; + else goto T26_16; + +T26_15: + response = -0.0169792919; + goto D26; + +T26_16: + response = 0.0050699268; + goto D26; + +T26_17: + response = -0.0013697969; + goto D26; + +T26_18: + response = 0.0020313056; + goto D26; + +D26: + +tnscore = tnscore + response; + +/* Tree 28 of 80 */ +N27_1: + if attribute(ythl) < 0.5000000000 then goto N27_2; + else goto N27_12; + +N27_2: + if age(created_at) < 1830.0000000000 then goto N27_3; + else goto N27_11; + +N27_3: + if fieldMatch(text).importance < 0.6656075120 then goto N27_4; + else goto N27_5; + +N27_4: + if fieldMatch(text) < 0.2109414935 then goto T27_1; + else goto T27_2; + +T27_1: + response = -0.0320963356; + goto D27; + +T27_2: + response = -0.0049411304; + goto D27; + +N27_5: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N27_6; + else goto T27_9; + +N27_6: + if term(2).significance < 0.9769929647 then goto N27_7; + else goto N27_9; + +N27_7: + if fieldMatch(text).importance < 0.6665915251 then goto T27_3; + else goto N27_8; + +T27_3: + response = 0.0282759231; + goto D27; + +N27_8: + if fieldMatch(text).importance < 0.7399419546 then goto T27_4; + else goto T27_5; + +T27_4: + response = -0.0639593720; + goto D27; + +T27_5: + response = -0.0116583984; + goto D27; + +N27_9: + if term(0).significance < 0.9964904785 then goto T27_6; + else goto N27_10; + +T27_6: + response = 0.0113601762; + goto D27; + +N27_10: + if term(0).significance < 0.9984384775 then goto T27_7; + else goto T27_8; + +T27_7: + response = -0.0096497985; + goto D27; + +T27_8: + response = 0.0065807303; + goto D27; + +T27_9: + response = -0.0430967785; + goto D27; + +N27_11: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T27_10; + else goto T27_11; + +T27_10: + response = -0.0127352010; + goto D27; + +T27_11: + response = -0.0062520929; + goto D27; + +N27_12: + if age(created_at) < 1770.0000000000 then goto N27_13; + else goto N27_15; + +N27_13: + if fieldMatch(text).earliness < 0.7211109996 then goto T27_12; + else goto N27_14; + +T27_12: + response = 0.0056563829; + goto D27; + +N27_14: + if attribute(user_followers_count) < 812.5000000000 then goto T27_13; + else goto T27_14; + +T27_13: + response = 0.0100250822; + goto D27; + +T27_14: + response = 0.0209608983; + goto D27; + +N27_15: + if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T27_15; + else goto N27_16; + +T27_15: + response = 0.0027791958; + goto D27; + +N27_16: + if fieldMatch(text).importance < 0.6664240360 then goto N27_17; + else goto T27_18; + +N27_17: + if match < 0.9192979932 then goto T27_16; + else goto T27_17; + +T27_16: + response = -0.0152598227; + goto D27; + +T27_17: + response = -0.0032270961; + goto D27; + +T27_18: + response = -0.0005422229; + goto D27; + +D27: + +tnscore = tnscore + response; + +/* Tree 29 of 80 */ +N28_1: + if attribute(ythl) < 0.5000000000 then goto N28_2; + else goto N28_8; + +N28_2: + if age(created_at) < 1830.0000000000 then goto N28_3; + else goto N28_6; + +N28_3: + if attribute(user_followers_count) < 459.5000000000 then goto N28_4; + else goto N28_5; + +N28_4: + if attribute(user_statuses_count) < 496.5000000000 then goto T28_1; + else goto T28_2; + +T28_1: + response = 0.0008480388; + goto D28; + +T28_2: + response = -0.0090870631; + goto D28; + +N28_5: + if attribute(yst_reply_auth) < 244.5000000000 then goto T28_3; + else goto T28_4; + +T28_3: + response = 0.0126503896; + goto D28; + +T28_4: + response = -0.0054197846; + goto D28; + +N28_6: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N28_7; + else goto T28_7; + +N28_7: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T28_5; + else goto T28_6; + +T28_5: + response = -0.0148159779; + goto D28; + +T28_6: + response = 0.0029148481; + goto D28; + +T28_7: + response = -0.0058224247; + goto D28; + +N28_8: + if age(created_at) < 1830.0000000000 then goto N28_9; + else goto N28_15; + +N28_9: + if fieldMatch(text).importance < 0.7490880489 then goto N28_10; + else goto T28_14; + +N28_10: + if fieldMatch(text) < 0.5652275085 then goto N28_11; + else goto N28_12; + +N28_11: + if term(2).significance < 0.9916304946 then goto T28_8; + else goto T28_9; + +T28_8: + response = -0.0285282409; + goto D28; + +T28_9: + response = 0.0051566337; + goto D28; + +N28_12: + if attribute(user_followers_count) < 104.5000000000 then goto N28_13; + else goto N28_14; + +N28_13: + if fieldMatch(text) < 0.8065220118 then goto T28_10; + else goto T28_11; + +T28_10: + response = 0.0383292168; + goto D28; + +T28_11: + response = 0.0010266011; + goto D28; + +N28_14: + if attribute(yst_reply_auth) < 391.0000000000 then goto T28_12; + else goto T28_13; + +T28_12: + response = 0.0133363207; + goto D28; + +T28_13: + response = -0.0143777685; + goto D28; + +T28_14: + response = 0.0164241107; + goto D28; + +N28_15: + if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto T28_15; + else goto N28_16; + +T28_15: + response = 0.0032745831; + goto D28; + +N28_16: + if fieldMatch(text).importance < 0.7496404648 then goto N28_17; + else goto T28_18; + +N28_17: + if term(1).significance < 0.9979525208 then goto T28_16; + else goto T28_17; + +T28_16: + response = -0.0096924346; + goto D28; + +T28_17: + response = -0.0009781494; + goto D28; + +T28_18: + response = 0.0015180251; + goto D28; + +D28: + +tnscore = tnscore + response; + +/* Tree 30 of 80 */ +N29_1: + if attribute(ythl) < 0.5000000000 then goto N29_2; + else goto N29_10; + +N29_2: + if age(created_at) < 1830.0000000000 then goto N29_3; + else goto N29_9; + +N29_3: + if term(0).significance < 0.9995554686 then goto N29_4; + else goto N29_8; + +N29_4: + if attribute(yst_reply_auth) < 26.5000000000 then goto N29_5; + else goto N29_7; + +N29_5: + if attribute(user_followers_count) < 92.5000000000 then goto N29_6; + else goto T29_3; + +N29_6: + if fieldLength(text) < 27.5000000000 then goto T29_1; + else goto T29_2; + +T29_1: + response = 0.0012866951; + goto D29; + +T29_2: + response = -0.0265027781; + goto D29; + +T29_3: + response = 0.0104770861; + goto D29; + +N29_7: + if attribute(yst_link_array_size) < 0.0041509997 then goto T29_4; + else goto T29_5; + +T29_4: + response = -0.0099713041; + goto D29; + +T29_5: + response = 0.0011954032; + goto D29; + +N29_8: + if term(0).significance < 0.9996379614 then goto T29_6; + else goto T29_7; + +T29_6: + response = -0.0297536383; + goto D29; + +T29_7: + response = -0.0002317059; + goto D29; + +N29_9: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T29_8; + else goto T29_9; + +T29_8: + response = -0.0121670225; + goto D29; + +T29_9: + response = -0.0054595694; + goto D29; + +N29_10: + if age(created_at) < 1830.0000000000 then goto N29_11; + else goto N29_15; + +N29_11: + if fieldMatch(text).importance < 0.4989734888 then goto N29_12; + else goto N29_14; + +N29_12: + if fieldMatch(text).importance < 0.4986799955 then goto T29_10; + else goto N29_13; + +T29_10: + response = 0.0071175590; + goto D29; + +N29_13: + if fieldLength(text) < 18.5000000000 then goto T29_11; + else goto T29_12; + +T29_11: + response = 0.0043567972; + goto D29; + +T29_12: + response = -0.0954988221; + goto D29; + +N29_14: + if attribute(user_statuses_count) < 5.5000000000 then goto T29_13; + else goto T29_14; + +T29_13: + response = -0.0298547936; + goto D29; + +T29_14: + response = 0.0103403639; + goto D29; + +N29_15: + if fieldMatch(text).occurrence < 0.1348485053 then goto N29_16; + else goto T29_18; + +N29_16: + if match < 0.9276950359 then goto N29_17; + else goto T29_17; + +N29_17: + if term(0).significance < 0.9981074929 then goto T29_15; + else goto T29_16; + +T29_15: + response = -0.0125565952; + goto D29; + +T29_16: + response = -0.0030946195; + goto D29; + +T29_17: + response = 0.0023600605; + goto D29; + +T29_18: + response = 0.0024001179; + goto D29; + +D29: + +tnscore = tnscore + response; + +/* Tree 31 of 80 */ +N30_1: + if attribute(ythl) < 0.5000000000 then goto N30_2; + else goto N30_12; + +N30_2: + if age(created_at) < 1830.0000000000 then goto N30_3; + else goto N30_11; + +N30_3: + if fieldMatch(text).importance < 0.7413114905 then goto N30_4; + else goto N30_9; + +N30_4: + if fieldMatch(text).importance < 0.6666384935 then goto N30_5; + else goto T30_6; + +N30_5: + if fieldMatch(text).earliness < 0.8681160212 then goto N30_6; + else goto N30_7; + +N30_6: + if match < 0.6799730062 then goto T30_1; + else goto T30_2; + +T30_1: + response = -0.0205917268; + goto D30; + +T30_2: + response = -0.0018660452; + goto D30; + +N30_7: + if fieldMatch(text) < 0.8819584846 then goto N30_8; + else goto T30_5; + +N30_8: + if fieldMatch(text).completeness < 0.9544465542 then goto T30_3; + else goto T30_4; + +T30_3: + response = 0.0070580213; + goto D30; + +T30_4: + response = 0.0359145000; + goto D30; + +T30_5: + response = -0.0174774107; + goto D30; + +T30_6: + response = -0.0421236424; + goto D30; + +N30_9: + if fieldMatch(text).completeness < 0.9575960040 then goto N30_10; + else goto T30_9; + +N30_10: + if fieldMatch(text).earliness < 0.4128789902 then goto T30_7; + else goto T30_8; + +T30_7: + response = -0.0155841429; + goto D30; + +T30_8: + response = 0.0271271066; + goto D30; + +T30_9: + response = 0.0016623712; + goto D30; + +N30_11: + if age(created_at) < 5400.0000000000 then goto T30_10; + else goto T30_11; + +T30_10: + response = -0.0114561422; + goto D30; + +T30_11: + response = -0.0053122836; + goto D30; + +N30_12: + if age(created_at) < 1830.0000000000 then goto N30_13; + else goto N30_17; + +N30_13: + if term(0).significance < 0.9184160233 then goto N30_14; + else goto N30_15; + +N30_14: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T30_12; + else goto T30_13; + +T30_12: + response = -0.0703039170; + goto D30; + +T30_13: + response = 0.0039828312; + goto D30; + +N30_15: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N30_16; + else goto T30_16; + +N30_16: + if fieldMatch(text).occurrence < 0.1455025077 then goto T30_14; + else goto T30_15; + +T30_14: + response = 0.0064206057; + goto D30; + +T30_15: + response = 0.0136203784; + goto D30; + +T30_16: + response = -0.0155537108; + goto D30; + +N30_17: + if fieldMatch(text).importance < 0.6656370163 then goto T30_17; + else goto T30_18; + +T30_17: + response = -0.0057846012; + goto D30; + +T30_18: + response = 0.0015276435; + goto D30; + +D30: + +tnscore = tnscore + response; + +/* Tree 32 of 80 */ +N31_1: + if attribute(ythl) < 0.5000000000 then goto N31_2; + else goto N31_9; + +N31_2: + if fieldMatch(text).tail < 12.5000000000 then goto N31_3; + else goto N31_8; + +N31_3: + if fieldMatch(text) < 0.4900699854 then goto N31_4; + else goto N31_5; + +N31_4: + if term(0).significance < 0.9883320332 then goto T31_1; + else goto T31_2; + +T31_1: + response = -0.0195571527; + goto D31; + +T31_2: + response = -0.0078552672; + goto D31; + +N31_5: + if fieldMatch(text).weightedOccurrence < 0.0929629952 then goto N31_6; + else goto T31_6; + +N31_6: + if age(created_at) < 1530.0000000000 then goto N31_7; + else goto T31_5; + +N31_7: + if fieldTermMatch(text,1).firstPosition < 6.5000000000 then goto T31_3; + else goto T31_4; + +T31_3: + response = 0.0158849428; + goto D31; + +T31_4: + response = -0.0010859682; + goto D31; + +T31_5: + response = -0.0055859102; + goto D31; + +T31_6: + response = -0.0109538484; + goto D31; + +N31_8: + if fieldMatch(user_name) < 0.0710614994 then goto T31_7; + else goto T31_8; + +T31_7: + response = 0.0002014444; + goto D31; + +T31_8: + response = 0.0605228154; + goto D31; + +N31_9: + if age(created_at) < 1830.0000000000 then goto N31_10; + else goto N31_13; + +N31_10: + if fieldMatch(text) < 0.2891100049 then goto T31_9; + else goto N31_11; + +T31_9: + response = -0.0017665209; + goto D31; + +N31_11: + if attribute(yst_reply_auth) < 471.5000000000 then goto N31_12; + else goto T31_12; + +N31_12: + if attribute(user_followers_count) < 2200.0000000000 then goto T31_10; + else goto T31_11; + +T31_10: + response = 0.0095812144; + goto D31; + +T31_11: + response = 0.0192088364; + goto D31; + +T31_12: + response = -0.0097908152; + goto D31; + +N31_13: + if fieldMatch(text).occurrence < 0.1348485053 then goto N31_14; + else goto N31_16; + +N31_14: + if match < 0.9285860062 then goto N31_15; + else goto T31_15; + +N31_15: + if attribute(user_statuses_count) < 2957.5000000000 then goto T31_13; + else goto T31_14; + +T31_13: + response = -0.0031994690; + goto D31; + +T31_14: + response = -0.0120737981; + goto D31; + +T31_15: + response = 0.0005449950; + goto D31; + +N31_16: + if term(2).significance < 0.9939094782 then goto N31_17; + else goto T31_18; + +N31_17: + if fieldMatch(text).absoluteProximity < 0.0212500002 then goto T31_16; + else goto T31_17; + +T31_16: + response = -0.0227140008; + goto D31; + +T31_17: + response = -0.0003328979; + goto D31; + +T31_18: + response = 0.0055961138; + goto D31; + +D31: + +tnscore = tnscore + response; + +/* Tree 33 of 80 */ +N32_1: + if attribute(ythl) < 0.5000000000 then goto N32_2; + else goto N32_9; + +N32_2: + if age(created_at) < 1830.0000000000 then goto N32_3; + else goto N32_7; + +N32_3: + if attribute(yst_reply_auth) < 176.0000000000 then goto N32_4; + else goto N32_6; + +N32_4: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N32_5; + else goto T32_3; + +N32_5: + if term(1).significance < 0.7788045406 then goto T32_1; + else goto T32_2; + +T32_1: + response = 0.0578500341; + goto D32; + +T32_2: + response = 0.0011485747; + goto D32; + +T32_3: + response = -0.0253253039; + goto D32; + +N32_6: + if attribute(yst_link_array_size) < 0.0223225001 then goto T32_4; + else goto T32_5; + +T32_4: + response = -0.0156277732; + goto D32; + +T32_5: + response = 0.0023478823; + goto D32; + +N32_7: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T32_6; + else goto N32_8; + +T32_6: + response = -0.0107069928; + goto D32; + +N32_8: + if attribute(user_followers_count) < 1710.5000000000 then goto T32_7; + else goto T32_8; + +T32_7: + response = -0.0053639058; + goto D32; + +T32_8: + response = 0.0090303888; + goto D32; + +N32_9: + if fieldMatch(text) < 0.5406639576 then goto N32_10; + else goto N32_12; + +N32_10: + if fieldMatch(text).occurrence < 0.1165160015 then goto N32_11; + else goto T32_11; + +N32_11: + if term(1).significance < 0.9973840117 then goto T32_9; + else goto T32_10; + +T32_9: + response = -0.0174263062; + goto D32; + +T32_10: + response = -0.0012512051; + goto D32; + +T32_11: + response = -0.0004493622; + goto D32; + +N32_12: + if age(created_at) < 1830.0000000000 then goto N32_13; + else goto N32_17; + +N32_13: + if fieldMatch(text).importance < 0.4997544885 then goto N32_14; + else goto N32_16; + +N32_14: + if fieldMatch(text).importance < 0.4997400045 then goto T32_12; + else goto N32_15; + +T32_12: + response = 0.0011570612; + goto D32; + +N32_15: + if attribute(user_statuses_count) < 2389.0000000000 then goto T32_13; + else goto T32_14; + +T32_13: + response = -0.0752507430; + goto D32; + +T32_14: + response = 0.0078353389; + goto D32; + +N32_16: + if attribute(yst_tweet_language) < 3243.5000000000 then goto T32_15; + else goto T32_16; + +T32_15: + response = 0.0095661855; + goto D32; + +T32_16: + response = -0.0134482465; + goto D32; + +N32_17: + if fieldMatch(text).importance < 0.4999470115 then goto T32_17; + else goto T32_18; + +T32_17: + response = -0.0103740403; + goto D32; + +T32_18: + response = 0.0025010891; + goto D32; + +D32: + +tnscore = tnscore + response; + +/* Tree 34 of 80 */ +N33_1: + if attribute(ythl) < 0.5000000000 then goto N33_2; + else goto N33_11; + +N33_2: + if fieldMatch(text) < 0.3427360058 then goto N33_3; + else goto N33_4; + +N33_3: + if fieldMatch(text).earliness < 0.6939799786 then goto T33_1; + else goto T33_2; + +T33_1: + response = -0.0183828125; + goto D33; + +T33_2: + response = -0.0073742585; + goto D33; + +N33_4: + if age(created_at) < 1830.0000000000 then goto N33_5; + else goto N33_10; + +N33_5: + if term(0).significance < 0.9958745241 then goto N33_6; + else goto T33_8; + +N33_6: + if term(1).significance < 0.8159549832 then goto T33_3; + else goto N33_7; + +T33_3: + response = 0.0358430149; + goto D33; + +N33_7: + if term(1).significance < 0.9927034974 then goto N33_8; + else goto N33_9; + +N33_8: + if fieldMatch(text).importance < 0.6640119553 then goto T33_4; + else goto T33_5; + +T33_4: + response = 0.0448918743; + goto D33; + +T33_5: + response = -0.0109749723; + goto D33; + +N33_9: + if term(1).significance < 0.9966380000 then goto T33_6; + else goto T33_7; + +T33_6: + response = 0.0219646010; + goto D33; + +T33_7: + response = 0.0055406966; + goto D33; + +T33_8: + response = -0.0040966912; + goto D33; + +N33_10: + if fieldLength(text) < 14.5000000000 then goto T33_9; + else goto T33_10; + +T33_9: + response = -0.0121807234; + goto D33; + +T33_10: + response = -0.0043039012; + goto D33; + +N33_11: + if age(created_at) < 1830.0000000000 then goto N33_12; + else goto N33_17; + +N33_12: + if fieldMatch(text).importance < 0.7479754686 then goto N33_13; + else goto T33_16; + +N33_13: + if term(0).significance < 0.9139549732 then goto T33_11; + else goto N33_14; + +T33_11: + response = -0.0340629156; + goto D33; + +N33_14: + if fieldMatch(text).tail < 7.5000000000 then goto T33_12; + else goto N33_15; + +T33_12: + response = 0.0008350066; + goto D33; + +N33_15: + if fieldMatch(text) < 0.8667535186 then goto T33_13; + else goto N33_16; + +T33_13: + response = 0.0042479503; + goto D33; + +N33_16: + if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T33_14; + else goto T33_15; + +T33_14: + response = 0.0090797180; + goto D33; + +T33_15: + response = 0.0217882168; + goto D33; + +T33_16: + response = 0.0152961627; + goto D33; + +N33_17: + if fieldTermMatch(text,1).firstPosition < 6.5000000000 then goto T33_17; + else goto T33_18; + +T33_17: + response = 0.0019770381; + goto D33; + +T33_18: + response = -0.0024650391; + goto D33; + +D33: + +tnscore = tnscore + response; + +/* Tree 35 of 80 */ +N34_1: + if attribute(ythl) < 0.5000000000 then goto N34_2; + else goto N34_5; + +N34_2: + if age(created_at) < 1650.0000000000 then goto T34_1; + else goto N34_3; + +T34_1: + response = -0.0005354970; + goto D34; + +N34_3: + if fieldMatch(text).absoluteOccurrence < 0.0126785003 then goto N34_4; + else goto T34_4; + +N34_4: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T34_2; + else goto T34_3; + +T34_2: + response = -0.0102881411; + goto D34; + +T34_3: + response = -0.0050682353; + goto D34; + +T34_4: + response = 0.0015992266; + goto D34; + +N34_5: + if age(created_at) < 1830.0000000000 then goto N34_6; + else goto N34_12; + +N34_6: + if fieldMatch(text).occurrence < 0.0425724983 then goto T34_5; + else goto N34_7; + +T34_5: + response = -0.0103390097; + goto D34; + +N34_7: + if fieldTermMatch(text,0).firstPosition < 4.5000000000 then goto N34_8; + else goto N34_9; + +N34_8: + if attribute(user_followers_count) < 807.5000000000 then goto T34_6; + else goto T34_7; + +T34_6: + response = 0.0090148290; + goto D34; + +T34_7: + response = 0.0188702216; + goto D34; + +N34_9: + if fieldMatch(text).importance < 0.7488585114 then goto N34_10; + else goto T34_11; + +N34_10: + if term(1).significance < 0.9833209515 then goto N34_11; + else goto T34_10; + +N34_11: + if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto T34_8; + else goto T34_9; + +T34_8: + response = -0.0062725138; + goto D34; + +T34_9: + response = -0.0573762051; + goto D34; + +T34_10: + response = 0.0061628636; + goto D34; + +T34_11: + response = 0.0150568527; + goto D34; + +N34_12: + if fieldMatch(text) < 0.4552929997 then goto N34_13; + else goto N34_17; + +N34_13: + if fieldMatch(user_name) < 0.3179910183 then goto N34_14; + else goto T34_16; + +N34_14: + if fieldMatch(text).importance < 0.6665154696 then goto N34_15; + else goto N34_16; + +N34_15: + if fieldMatch(text).occurrence < 0.1188234985 then goto T34_12; + else goto T34_13; + +T34_12: + response = -0.0170598650; + goto D34; + +T34_13: + response = -0.0000096562; + goto D34; + +N34_16: + if term(1).significance < 0.9965360165 then goto T34_14; + else goto T34_15; + +T34_14: + response = -0.0071076426; + goto D34; + +T34_15: + response = 0.0044895619; + goto D34; + +T34_16: + response = 0.0279607247; + goto D34; + +N34_17: + if term(2).significance < 0.8411514759 then goto T34_17; + else goto T34_18; + +T34_17: + response = -0.0033532749; + goto D34; + +T34_18: + response = 0.0026365471; + goto D34; + +D34: + +tnscore = tnscore + response; + +/* Tree 36 of 80 */ +N35_1: + if attribute(ythl) < 0.5000000000 then goto N35_2; + else goto N35_12; + +N35_2: + if age(created_at) < 1830.0000000000 then goto N35_3; + else goto N35_11; + +N35_3: + if term(1).significance < 0.8159549832 then goto T35_1; + else goto N35_4; + +T35_1: + response = 0.0276561682; + goto D35; + +N35_4: + if term(1).significance < 0.9822615385 then goto N35_5; + else goto N35_7; + +N35_5: + if match < 0.7864329815 then goto T35_2; + else goto N35_6; + +T35_2: + response = -0.0313860274; + goto D35; + +N35_6: + if fieldMatch(text).longestSequenceRatio < 0.7749999762 then goto T35_3; + else goto T35_4; + +T35_3: + response = 0.0265495560; + goto D35; + +T35_4: + response = -0.0146271425; + goto D35; + +N35_7: + if term(0).significance < 0.9987125397 then goto N35_8; + else goto N35_10; + +N35_8: + if term(0).significance < 0.9821995497 then goto T35_5; + else goto N35_9; + +T35_5: + response = -0.0131166881; + goto D35; + +N35_9: + if age(created_at) < 1770.0000000000 then goto T35_6; + else goto T35_7; + +T35_6: + response = 0.0024909360; + goto D35; + +T35_7: + response = 0.0242417466; + goto D35; + +N35_10: + if term(0).significance < 0.9987905025 then goto T35_8; + else goto T35_9; + +T35_8: + response = -0.0474544221; + goto D35; + +T35_9: + response = -0.0033216980; + goto D35; + +N35_11: + if fieldMatch(text).significance < 0.6663454771 then goto T35_10; + else goto T35_11; + +T35_10: + response = -0.0112682274; + goto D35; + +T35_11: + response = -0.0050036035; + goto D35; + +N35_12: + if age(created_at) < 1770.0000000000 then goto N35_13; + else goto N35_14; + +N35_13: + if attribute(user_followers_count) < 812.5000000000 then goto T35_12; + else goto T35_13; + +T35_12: + response = 0.0053496824; + goto D35; + +T35_13: + response = 0.0125479103; + goto D35; + +N35_14: + if fieldTermMatch(text,0).firstPosition < 1.5000000000 then goto T35_14; + else goto N35_15; + +T35_14: + response = 0.0037986168; + goto D35; + +N35_15: + if fieldMatch(text).importance < 0.7498390079 then goto N35_16; + else goto N35_17; + +N35_16: + if term(0).significance < 0.9974490404 then goto T35_15; + else goto T35_16; + +T35_15: + response = -0.0066723085; + goto D35; + +T35_16: + response = -0.0006707320; + goto D35; + +N35_17: + if attribute(user_friends_count) < 20.5000000000 then goto T35_17; + else goto T35_18; + +T35_17: + response = -0.0051792310; + goto D35; + +T35_18: + response = 0.0048035663; + goto D35; + +D35: + +tnscore = tnscore + response; + +/* Tree 37 of 80 */ +N36_1: + if attribute(ythl) < 0.5000000000 then goto N36_2; + else goto N36_12; + +N36_2: + if age(created_at) < 1830.0000000000 then goto N36_3; + else goto N36_9; + +N36_3: + if attribute(yst_reply_auth) < 26.5000000000 then goto N36_4; + else goto N36_5; + +N36_4: + if attribute(user_followers_count) < 80.5000000000 then goto T36_1; + else goto T36_2; + +T36_1: + response = -0.0011002979; + goto D36; + +T36_2: + response = 0.0120623048; + goto D36; + +N36_5: + if fieldMatch(text).importance < 0.4994869828 then goto N36_6; + else goto N36_7; + +N36_6: + if term(0).significance < 0.9910864830 then goto T36_3; + else goto T36_4; + +T36_3: + response = -0.0150381990; + goto D36; + +T36_4: + response = 0.0335272034; + goto D36; + +N36_7: + if attribute(yst_link_array_size) < 0.0041354997 then goto T36_5; + else goto N36_8; + +T36_5: + response = -0.0126645698; + goto D36; + +N36_8: + if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T36_6; + else goto T36_7; + +T36_6: + response = 0.0132560119; + goto D36; + +T36_7: + response = -0.0052124596; + goto D36; + +N36_9: + if fieldMatch(text).importance < 0.6665325165 then goto N36_10; + else goto N36_11; + +N36_10: + if fieldMatch(text).tail < 7.5000000000 then goto T36_8; + else goto T36_9; + +T36_8: + response = -0.0136798779; + goto D36; + +T36_9: + response = -0.0055728098; + goto D36; + +N36_11: + if term(0).significance < 0.9954190254 then goto T36_10; + else goto T36_11; + +T36_10: + response = -0.0075794485; + goto D36; + +T36_11: + response = -0.0008289554; + goto D36; + +N36_12: + if age(created_at) < 1770.0000000000 then goto N36_13; + else goto N36_14; + +N36_13: + if fieldMatch(text).earliness < 0.3779760003 then goto T36_12; + else goto T36_13; + +T36_12: + response = -0.0012520588; + goto D36; + +T36_13: + response = 0.0078110682; + goto D36; + +N36_14: + if fieldMatch(text).significantOccurrence < 0.0547899976 then goto N36_15; + else goto T36_18; + +N36_15: + if match < 0.7503944635 then goto N36_16; + else goto T36_17; + +N36_16: + if fieldMatch(text).importance < 0.6665270329 then goto N36_17; + else goto T36_16; + +N36_17: + if fieldMatch(text).earliness < 0.6339714527 then goto T36_14; + else goto T36_15; + +T36_14: + response = -0.0338858554; + goto D36; + +T36_15: + response = -0.0090965850; + goto D36; + +T36_16: + response = -0.0037668704; + goto D36; + +T36_17: + response = 0.0003149666; + goto D36; + +T36_18: + response = 0.0028579202; + goto D36; + +D36: + +tnscore = tnscore + response; + +/* Tree 38 of 80 */ +N37_1: + if attribute(ythl) < 0.5000000000 then goto N37_2; + else goto N37_8; + +N37_2: + if age(created_at) < 1830.0000000000 then goto N37_3; + else goto T37_7; + +N37_3: + if fieldLength(text) < 8.5000000000 then goto T37_1; + else goto N37_4; + +T37_1: + response = -0.0119762189; + goto D37; + +N37_4: + if attribute(user_friends_count) < 15.5000000000 then goto T37_2; + else goto N37_5; + +T37_2: + response = 0.0150834311; + goto D37; + +N37_5: + if fieldMatch(text) < 0.4306970239 then goto N37_6; + else goto N37_7; + +N37_6: + if term(1).significance < 0.9976525307 then goto T37_3; + else goto T37_4; + +T37_3: + response = -0.0184997590; + goto D37; + +T37_4: + response = 0.0133471677; + goto D37; + +N37_7: + if attribute(user_friends_count) < 1375.5000000000 then goto T37_5; + else goto T37_6; + +T37_5: + response = 0.0005505579; + goto D37; + +T37_6: + response = 0.0194310919; + goto D37; + +T37_7: + response = -0.0058711817; + goto D37; + +N37_8: + if age(created_at) < 1830.0000000000 then goto N37_9; + else goto N37_13; + +N37_9: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N37_10; + else goto T37_12; + +N37_10: + if fieldMatch(text).tail < 3.5000000000 then goto T37_8; + else goto N37_11; + +T37_8: + response = -0.0081336884; + goto D37; + +N37_11: + if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T37_9; + else goto N37_12; + +T37_9: + response = -0.0074424529; + goto D37; + +N37_12: + if fieldMatch(text).importance < 0.7463564873 then goto T37_10; + else goto T37_11; + +T37_10: + response = 0.0063782103; + goto D37; + +T37_11: + response = 0.0133409187; + goto D37; + +T37_12: + response = -0.0205400734; + goto D37; + +N37_13: + if fieldMatch(text).importance < 0.6664534807 then goto N37_14; + else goto N37_16; + +N37_14: + if fieldMatch(text).significantOccurrence < 0.1249409989 then goto N37_15; + else goto T37_15; + +N37_15: + if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto T37_13; + else goto T37_14; + +T37_13: + response = 0.0031349276; + goto D37; + +T37_14: + response = -0.0052935732; + goto D37; + +T37_15: + response = -0.0275582309; + goto D37; + +N37_16: + if age(created_at) < 63000.0000000000 then goto N37_17; + else goto T37_18; + +N37_17: + if fieldMatch(text) < 0.3207110167 then goto T37_16; + else goto T37_17; + +T37_16: + response = -0.0114880439; + goto D37; + +T37_17: + response = 0.0009364683; + goto D37; + +T37_18: + response = 0.0048748147; + goto D37; + +D37: + +tnscore = tnscore + response; + +/* Tree 39 of 80 */ +N38_1: + if attribute(ythl) < 0.5000000000 then goto N38_2; + else goto N38_9; + +N38_2: + if age(created_at) < 1830.0000000000 then goto N38_3; + else goto N38_6; + +N38_3: + if term(0).significance < 0.9995759726 then goto N38_4; + else goto T38_4; + +N38_4: + if fieldMatch(text).significantOccurrence < 0.0363755003 then goto T38_1; + else goto N38_5; + +T38_1: + response = -0.0157437300; + goto D38; + +N38_5: + if term(1).significance < 0.7788045406 then goto T38_2; + else goto T38_3; + +T38_2: + response = 0.0476961371; + goto D38; + +T38_3: + response = 0.0012138970; + goto D38; + +T38_4: + response = -0.0123574496; + goto D38; + +N38_6: + if fieldMatch(text).importance < 0.6664404869 then goto T38_5; + else goto N38_7; + +T38_5: + response = -0.0101736132; + goto D38; + +N38_7: + if term(0).significance < 0.9954395294 then goto N38_8; + else goto T38_8; + +N38_8: + if fieldMatch(text).orderness < 0.5357145071 then goto T38_6; + else goto T38_7; + +T38_6: + response = -0.0179059498; + goto D38; + +T38_7: + response = -0.0054873409; + goto D38; + +T38_8: + response = -0.0020453926; + goto D38; + +N38_9: + if fieldMatch(text) < 0.3861989975 then goto N38_10; + else goto N38_12; + +N38_10: + if term(1).significance < 0.9980044961 then goto N38_11; + else goto T38_11; + +N38_11: + if age(created_at) < 330.0000000000 then goto T38_9; + else goto T38_10; + +T38_9: + response = 0.0172467014; + goto D38; + +T38_10: + response = -0.0090164842; + goto D38; + +T38_11: + response = 0.0018725599; + goto D38; + +N38_12: + if age(created_at) < 1830.0000000000 then goto N38_13; + else goto N38_17; + +N38_13: + if attribute(yst_reply_auth) < 557.0000000000 then goto N38_14; + else goto T38_16; + +N38_14: + if attribute(user_statuses_count) < 7.5000000000 then goto N38_15; + else goto N38_16; + +N38_15: + if attribute(user_followers_count) < 147.0000000000 then goto T38_12; + else goto T38_13; + +T38_12: + response = -0.0007864416; + goto D38; + +T38_13: + response = -0.1136937768; + goto D38; + +N38_16: + if fieldMatch(text).importance < 0.4989485145 then goto T38_14; + else goto T38_15; + +T38_14: + response = -0.0179963640; + goto D38; + +T38_15: + response = 0.0077754184; + goto D38; + +T38_16: + response = -0.0216960094; + goto D38; + +N38_17: + if fieldMatch(text).importance < 0.6657680273 then goto T38_17; + else goto T38_18; + +T38_17: + response = -0.0047764491; + goto D38; + +T38_18: + response = 0.0026613540; + goto D38; + +D38: + +tnscore = tnscore + response; + +/* Tree 40 of 80 */ +N39_1: + if attribute(ythl) < 0.5000000000 then goto N39_2; + else goto N39_8; + +N39_2: + if fieldMatch(text).absoluteProximity < 0.0247499999 then goto T39_1; + else goto N39_3; + +T39_1: + response = -0.0106958848; + goto D39; + +N39_3: + if fieldLength(text) < 14.5000000000 then goto N39_4; + else goto N39_5; + +N39_4: + if fieldMatch(text).importance < 0.4994429946 then goto T39_2; + else goto T39_3; + +T39_2: + response = 0.0219589017; + goto D39; + +T39_3: + response = -0.0076174869; + goto D39; + +N39_5: + if fieldMatch(text).occurrence < 0.0816664994 then goto T39_4; + else goto N39_6; + +T39_4: + response = -0.0053254707; + goto D39; + +N39_6: + if fieldMatch(text).longestSequenceRatio < 0.8166664839 then goto T39_5; + else goto N39_7; + +T39_5: + response = -0.0034099679; + goto D39; + +N39_7: + if attribute(user_followers_count) < 3130.0000000000 then goto T39_6; + else goto T39_7; + +T39_6: + response = 0.0023795826; + goto D39; + +T39_7: + response = 0.0305866803; + goto D39; + +N39_8: + if age(created_at) < 1830.0000000000 then goto N39_9; + else goto N39_15; + +N39_9: + if attribute(user_followers_count) < 579.5000000000 then goto N39_10; + else goto N39_14; + +N39_10: + if fieldMatch(text).importance < 0.7473194599 then goto N39_11; + else goto T39_12; + +N39_11: + if term(1).significance < 0.9967114925 then goto N39_12; + else goto T39_11; + +N39_12: + if fieldMatch(text).orderness < 0.2500000000 then goto T39_8; + else goto N39_13; + +T39_8: + response = 0.0199100212; + goto D39; + +N39_13: + if age(created_at) < 630.0000000000 then goto T39_9; + else goto T39_10; + +T39_9: + response = 0.0001241074; + goto D39; + +T39_10: + response = -0.0217275952; + goto D39; + +T39_11: + response = 0.0052103051; + goto D39; + +T39_12: + response = 0.0100794994; + goto D39; + +N39_14: + if fieldMatch(text).weightedOccurrence < 0.0574174970 then goto T39_13; + else goto T39_14; + +T39_13: + response = 0.0046668785; + goto D39; + +T39_14: + response = 0.0152380854; + goto D39; + +N39_15: + if fieldMatch(text) < 0.2792814970 then goto N39_16; + else goto N39_17; + +N39_16: + if term(1).significance < 0.9967014790 then goto T39_15; + else goto T39_16; + +T39_15: + response = -0.0134490906; + goto D39; + +T39_16: + response = 0.0005562205; + goto D39; + +N39_17: + if fieldTermMatch(text,1).firstPosition < 9.5000000000 then goto T39_17; + else goto T39_18; + +T39_17: + response = 0.0022888891; + goto D39; + +T39_18: + response = -0.0019659597; + goto D39; + +D39: + +tnscore = tnscore + response; + +/* Tree 41 of 80 */ +N40_1: + if attribute(ythl) < 0.5000000000 then goto N40_2; + else goto N40_7; + +N40_2: + if fieldMatch(text).tail < 7.5000000000 then goto N40_3; + else goto N40_6; + +N40_3: + if fieldMatch(text).importance < 0.6656044722 then goto N40_4; + else goto N40_5; + +N40_4: + if match < 0.7144390345 then goto T40_1; + else goto T40_2; + +T40_1: + response = -0.0310311214; + goto D40; + +T40_2: + response = -0.0088877493; + goto D40; + +N40_5: + if fieldMatch(text).earliness < 0.0727514997 then goto T40_3; + else goto T40_4; + +T40_3: + response = 0.0149462312; + goto D40; + +T40_4: + response = -0.0044403174; + goto D40; + +N40_6: + if attribute(user_friends_count) < 14.5000000000 then goto T40_5; + else goto T40_6; + +T40_5: + response = 0.0092716632; + goto D40; + +T40_6: + response = -0.0015500378; + goto D40; + +N40_7: + if age(created_at) < 1830.0000000000 then goto N40_8; + else goto N40_16; + +N40_8: + if fieldMatch(text) < 0.2904269993 then goto N40_9; + else goto N40_11; + +N40_9: + if attribute(user_friends_count) < 127.0000000000 then goto T40_7; + else goto N40_10; + +T40_7: + response = 0.0056714395; + goto D40; + +N40_10: + if fieldTermMatch(text,1).firstPosition < 10.5000000000 then goto T40_8; + else goto T40_9; + +T40_8: + response = -0.0061125596; + goto D40; + +T40_9: + response = -0.0400728335; + goto D40; + +N40_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N40_12; + else goto N40_15; + +N40_12: + if fieldLength(text) < 9.5000000000 then goto T40_10; + else goto N40_13; + +T40_10: + response = -0.0142401520; + goto D40; + +N40_13: + if attribute(yst_reply_auth) < 209.5000000000 then goto T40_11; + else goto N40_14; + +T40_11: + response = 0.0090823293; + goto D40; + +N40_14: + if attribute(user_followers_count) < 1677.5000000000 then goto T40_12; + else goto T40_13; + +T40_12: + response = -0.0133225099; + goto D40; + +T40_13: + response = 0.0160376802; + goto D40; + +N40_15: + if fieldTermMatch(text,0).firstPosition < 6.5000000000 then goto T40_14; + else goto T40_15; + +T40_14: + response = 0.0020672660; + goto D40; + +T40_15: + response = -0.0469488746; + goto D40; + +N40_16: + if fieldMatch(text).significantOccurrence < 0.0543674976 then goto T40_16; + else goto N40_17; + +T40_16: + response = -0.0022326468; + goto D40; + +N40_17: + if age(created_at) < 63000.0000000000 then goto T40_17; + else goto T40_18; + +T40_17: + response = 0.0005594003; + goto D40; + +T40_18: + response = 0.0062407904; + goto D40; + +D40: + +tnscore = tnscore + response; + +/* Tree 42 of 80 */ +N41_1: + if attribute(ythl) < 0.5000000000 then goto N41_2; + else goto N41_7; + +N41_2: + if age(created_at) < 1830.0000000000 then goto N41_3; + else goto N41_5; + +N41_3: + if term(1).significance < 0.8159549832 then goto T41_1; + else goto N41_4; + +T41_1: + response = 0.0232298047; + goto D41; + +N41_4: + if attribute(yst_reply_auth) < 176.0000000000 then goto T41_2; + else goto T41_3; + +T41_2: + response = 0.0005588745; + goto D41; + +T41_3: + response = -0.0071946844; + goto D41; + +N41_5: + if fieldLength(text) < 14.5000000000 then goto T41_4; + else goto N41_6; + +T41_4: + response = -0.0100784734; + goto D41; + +N41_6: + if fieldMatch(text).occurrence < 0.1043554991 then goto T41_5; + else goto T41_6; + +T41_5: + response = -0.0078973837; + goto D41; + +T41_6: + response = -0.0021612919; + goto D41; + +N41_7: + if fieldMatch(text) < 0.4846429825 then goto N41_8; + else goto N41_13; + +N41_8: + if term(1).significance < 0.9980959892 then goto N41_9; + else goto T41_12; + +N41_9: + if fieldMatch(text).significantOccurrence < 0.0594874993 then goto N41_10; + else goto T41_11; + +N41_10: + if fieldMatch(text).importance < 0.7486180067 then goto N41_11; + else goto N41_12; + +N41_11: + if fieldMatch(text).head < 1.5000000000 then goto T41_7; + else goto T41_8; + +T41_7: + response = -0.0040433128; + goto D41; + +T41_8: + response = -0.0206367481; + goto D41; + +N41_12: + if attribute(user_followers_count) < 592.0000000000 then goto T41_9; + else goto T41_10; + +T41_9: + response = 0.0031591870; + goto D41; + +T41_10: + response = -0.0144562410; + goto D41; + +T41_11: + response = 0.0020151861; + goto D41; + +T41_12: + response = 0.0050398144; + goto D41; + +N41_13: + if age(created_at) < 1830.0000000000 then goto N41_14; + else goto N41_17; + +N41_14: + if fieldMatch(text).importance < 0.4997544885 then goto N41_15; + else goto T41_16; + +N41_15: + if fieldMatch(text).importance < 0.4997400045 then goto N41_16; + else goto T41_15; + +N41_16: + if fieldLength(text) < 10.5000000000 then goto T41_13; + else goto T41_14; + +T41_13: + response = -0.0498201605; + goto D41; + +T41_14: + response = 0.0052238898; + goto D41; + +T41_15: + response = -0.0305327007; + goto D41; + +T41_16: + response = 0.0074862309; + goto D41; + +N41_17: + if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto T41_17; + else goto T41_18; + +T41_17: + response = 0.0024835558; + goto D41; + +T41_18: + response = -0.0030184713; + goto D41; + +D41: + +tnscore = tnscore + response; + +/* Tree 43 of 80 */ +N42_1: + if fieldMatch(text) < 0.2844820023 then goto N42_2; + else goto N42_5; + +N42_2: + if fieldMatch(text).importance < 0.6664454937 then goto N42_3; + else goto N42_4; + +N42_3: + if fieldMatch(text).earliness < 0.6909815073 then goto T42_1; + else goto T42_2; + +T42_1: + response = -0.0202083466; + goto D42; + +T42_2: + response = -0.0072303460; + goto D42; + +N42_4: + if term(0).significance < 0.9991005063 then goto T42_3; + else goto T42_4; + +T42_3: + response = -0.0078735247; + goto D42; + +T42_4: + response = 0.0050183478; + goto D42; + +N42_5: + if age(created_at) < 1830.0000000000 then goto N42_6; + else goto N42_12; + +N42_6: + if fieldMatch(text).importance < 0.4999545217 then goto N42_7; + else goto N42_9; + +N42_7: + if fieldMatch(text).importance < 0.4999495149 then goto T42_5; + else goto N42_8; + +T42_5: + response = 0.0010054634; + goto D42; + +N42_8: + if attribute(yst_reply_auth) < 24.5000000000 then goto T42_6; + else goto T42_7; + +T42_6: + response = -0.0091545768; + goto D42; + +T42_7: + response = -0.0585794793; + goto D42; + +N42_9: + if attribute(user_followers_count) < 496.5000000000 then goto N42_10; + else goto T42_11; + +N42_10: + if attribute(yst_reply_auth) < 64.5000000000 then goto N42_11; + else goto T42_10; + +N42_11: + if attribute(yst_tweet_language) < 3583.5000000000 then goto T42_8; + else goto T42_9; + +T42_8: + response = 0.0060891079; + goto D42; + +T42_9: + response = -0.0311686318; + goto D42; + +T42_10: + response = -0.0029778507; + goto D42; + +T42_11: + response = 0.0105208941; + goto D42; + +N42_12: + if fieldTermMatch(text,2).firstPosition < 9.5000000000 then goto N42_13; + else goto N42_14; + +N42_13: + if term(2).significance < 0.9943845272 then goto T42_12; + else goto T42_13; + +T42_12: + response = -0.0014959657; + goto D42; + +T42_13: + response = 0.0041636931; + goto D42; + +N42_14: + if fieldMatch(text).earliness < 0.8651515245 then goto N42_15; + else goto T42_18; + +N42_15: + if fieldMatch(user_name).completeness < 0.9791665077 then goto N42_16; + else goto T42_17; + +N42_16: + if attribute(user_followers_count) < 680.5000000000 then goto T42_14; + else goto N42_17; + +T42_14: + response = -0.0055818030; + goto D42; + +N42_17: + if term(1).significance < 0.9998655319 then goto T42_15; + else goto T42_16; + +T42_15: + response = 0.0025943130; + goto D42; + +T42_16: + response = -0.0096650963; + goto D42; + +T42_17: + response = 0.0535512397; + goto D42; + +T42_18: + response = 0.0020234411; + goto D42; + +D42: + +tnscore = tnscore + response; + +/* Tree 44 of 80 */ +N43_1: + if attribute(ythl) < 0.5000000000 then goto N43_2; + else goto N43_11; + +N43_2: + if fieldMatch(text).absoluteProximity < 0.0491665006 then goto T43_1; + else goto N43_3; + +T43_1: + response = -0.0097379318; + goto D43; + +N43_3: + if age(created_at) < 1710.0000000000 then goto N43_4; + else goto T43_10; + +N43_4: + if term(0).significance < 0.9986090064 then goto N43_5; + else goto N43_8; + +N43_5: + if term(0).significance < 0.9982025027 then goto N43_6; + else goto T43_5; + +N43_6: + if term(2).significance < 0.9959775209 then goto T43_2; + else goto N43_7; + +T43_2: + response = -0.0019926524; + goto D43; + +N43_7: + if fieldMatch(text).importance < 0.6653964520 then goto T43_3; + else goto T43_4; + +T43_3: + response = -0.0116503978; + goto D43; + +T43_4: + response = 0.0109466166; + goto D43; + +T43_5: + response = 0.0189828366; + goto D43; + +N43_8: + if fieldMatch(text).importance < 0.6666469574 then goto N43_9; + else goto T43_9; + +N43_9: + if fieldMatch(text).importance < 0.4998664856 then goto T43_6; + else goto N43_10; + +T43_6: + response = -0.0224440709; + goto D43; + +N43_10: + if attribute(yst_reply_auth) < 18.5000000000 then goto T43_7; + else goto T43_8; + +T43_7: + response = 0.0032764517; + goto D43; + +T43_8: + response = -0.0102488229; + goto D43; + +T43_9: + response = 0.0230535914; + goto D43; + +T43_10: + response = -0.0042653100; + goto D43; + +N43_11: + if age(created_at) < 1650.0000000000 then goto N43_12; + else goto N43_14; + +N43_12: + if attribute(user_followers_count) < 105.5000000000 then goto T43_11; + else goto N43_13; + +T43_11: + response = -0.0006226235; + goto D43; + +N43_13: + if fieldMatch(text).importance < 0.4989485145 then goto T43_12; + else goto T43_13; + +T43_12: + response = -0.0224164552; + goto D43; + +T43_13: + response = 0.0086177649; + goto D43; + +N43_14: + if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T43_14; + else goto N43_15; + +T43_14: + response = -0.0097048559; + goto D43; + +N43_15: + if fieldMatch(text) < 0.4509834945 then goto N43_16; + else goto N43_17; + +N43_16: + if term(1).significance < 0.9981694818 then goto T43_15; + else goto T43_16; + +T43_15: + response = -0.0078122033; + goto D43; + +T43_16: + response = 0.0038068440; + goto D43; + +N43_17: + if term(0).significance < 0.9841674566 then goto T43_17; + else goto T43_18; + +T43_17: + response = 0.0097951581; + goto D43; + +T43_18: + response = 0.0009243530; + goto D43; + +D43: + +tnscore = tnscore + response; + +/* Tree 45 of 80 */ +N44_1: + if attribute(ythl) < 0.5000000000 then goto N44_2; + else goto N44_11; + +N44_2: + if fieldMatch(text) < 0.2836354971 then goto N44_3; + else goto N44_4; + +N44_3: + if term(1).significance < 0.9996379614 then goto T44_1; + else goto T44_2; + +T44_1: + response = -0.0108495877; + goto D44; + +T44_2: + response = 0.0148675984; + goto D44; + +N44_4: + if fieldMatch(text).tail < 17.5000000000 then goto N44_5; + else goto T44_10; + +N44_5: + if term(2).significance < 0.9099119902 then goto N44_6; + else goto N44_8; + +N44_6: + if fieldMatch(text).importance < 0.4998250008 then goto N44_7; + else goto T44_5; + +N44_7: + if fieldMatch(text).significance < 0.4995914996 then goto T44_3; + else goto T44_4; + +T44_3: + response = -0.0043957101; + goto D44; + +T44_4: + response = 0.0231344492; + goto D44; + +T44_5: + response = -0.0088403820; + goto D44; + +N44_8: + if attribute(user_followers_count) < 506.5000000000 then goto N44_9; + else goto T44_9; + +N44_9: + if fieldMatch(text).importance < 0.6659464836 then goto N44_10; + else goto T44_8; + +N44_10: + if fieldMatch(text).importance < 0.6658334732 then goto T44_6; + else goto T44_7; + +T44_6: + response = -0.0003420491; + goto D44; + +T44_7: + response = 0.0373151929; + goto D44; + +T44_8: + response = -0.0037227166; + goto D44; + +T44_9: + response = 0.0035857585; + goto D44; + +T44_10: + response = 0.0053145038; + goto D44; + +N44_11: + if fieldMatch(text) < 0.2904269993 then goto N44_12; + else goto N44_15; + +N44_12: + if term(1).significance < 0.9982604980 then goto N44_13; + else goto T44_14; + +N44_13: + if term(2).significance < 0.9786905050 then goto T44_11; + else goto N44_14; + +T44_11: + response = -0.0302698107; + goto D44; + +N44_14: + if fieldMatch(text).occurrence < 0.1188234985 then goto T44_12; + else goto T44_13; + +T44_12: + response = -0.0139381667; + goto D44; + +T44_13: + response = 0.0000003038; + goto D44; + +T44_14: + response = 0.0019017619; + goto D44; + +N44_15: + if age(created_at) < 1830.0000000000 then goto N44_16; + else goto N44_17; + +N44_16: + if fieldMatch(text).occurrence < 0.0754984990 then goto T44_15; + else goto T44_16; + +T44_15: + response = -0.0004698689; + goto D44; + +T44_16: + response = 0.0075287937; + goto D44; + +N44_17: + if term(2).significance < 0.9939094782 then goto T44_17; + else goto T44_18; + +T44_17: + response = -0.0018063524; + goto D44; + +T44_18: + response = 0.0022813626; + goto D44; + +D44: + +tnscore = tnscore + response; + +/* Tree 46 of 80 */ +N45_1: + if attribute(ythl) < 0.5000000000 then goto N45_2; + else goto N45_12; + +N45_2: + if fieldMatch(text).absoluteProximity < 0.0537500009 then goto T45_1; + else goto N45_3; + +T45_1: + response = -0.0076941292; + goto D45; + +N45_3: + if attribute(user_friends_count) < 13.5000000000 then goto N45_4; + else goto N45_5; + +N45_4: + if attribute(user_followers_count) < 180.5000000000 then goto T45_2; + else goto T45_3; + +T45_2: + response = 0.0007683782; + goto D45; + +T45_3: + response = 0.0254407298; + goto D45; + +N45_5: + if term(1).significance < 0.9965690374 then goto N45_6; + else goto N45_7; + +N45_6: + if attribute(user_friends_count) < 1596.0000000000 then goto T45_4; + else goto T45_5; + +T45_4: + response = -0.0009906495; + goto D45; + +T45_5: + response = 0.0179635090; + goto D45; + +N45_7: + if term(1).significance < 0.9967479706 then goto T45_6; + else goto N45_8; + +T45_6: + response = -0.0302754090; + goto D45; + +N45_8: + if term(0).significance < 0.9986245036 then goto N45_9; + else goto N45_11; + +N45_9: + if term(0).significance < 0.9983664751 then goto T45_7; + else goto N45_10; + +T45_7: + response = -0.0033463225; + goto D45; + +N45_10: + if fieldMatch(text).importance < 0.4998250008 then goto T45_8; + else goto T45_9; + +T45_8: + response = 0.0252647259; + goto D45; + +T45_9: + response = 0.0031275837; + goto D45; + +N45_11: + if fieldMatch(text).importance < 0.6665315032 then goto T45_10; + else goto T45_11; + +T45_10: + response = -0.0095874064; + goto D45; + +T45_11: + response = -0.0023156280; + goto D45; + +N45_12: + if age(created_at) < 1830.0000000000 then goto N45_13; + else goto N45_15; + +N45_13: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N45_14; + else goto T45_14; + +N45_14: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T45_12; + else goto T45_13; + +T45_12: + response = 0.0044201553; + goto D45; + +T45_13: + response = 0.0117690347; + goto D45; + +T45_14: + response = -0.0145395863; + goto D45; + +N45_15: + if fieldTermMatch(text,0).firstPosition < 10.5000000000 then goto N45_16; + else goto T45_18; + +N45_16: + if fieldMatch(user_name).significantOccurrence < 0.4166665077 then goto N45_17; + else goto T45_17; + +N45_17: + if match < 0.7534494996 then goto T45_15; + else goto T45_16; + +T45_15: + response = -0.0026555272; + goto D45; + +T45_16: + response = 0.0017879837; + goto D45; + +T45_17: + response = 0.0145260250; + goto D45; + +T45_18: + response = -0.0044547476; + goto D45; + +D45: + +tnscore = tnscore + response; + +/* Tree 47 of 80 */ +N46_1: + if attribute(ythl) < 0.5000000000 then goto N46_2; + else goto N46_7; + +N46_2: + if fieldMatch(text).tail < 5.5000000000 then goto N46_3; + else goto N46_6; + +N46_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N46_4; + else goto N46_5; + +N46_4: + if fieldMatch(text).completeness < 0.9522235394 then goto T46_1; + else goto T46_2; + +T46_1: + response = 0.0042831561; + goto D46; + +T46_2: + response = -0.0095824673; + goto D46; + +N46_5: + if fieldMatch(text).weightedOccurrence < 0.0845234990 then goto T46_3; + else goto T46_4; + +T46_3: + response = -0.0006026845; + goto D46; + +T46_4: + response = -0.0078613786; + goto D46; + +N46_6: + if fieldMatch(user_name).importance < 0.1997880042 then goto T46_5; + else goto T46_6; + +T46_5: + response = -0.0010588456; + goto D46; + +T46_6: + response = 0.0257470432; + goto D46; + +N46_7: + if fieldMatch(text) < 0.2779855132 then goto N46_8; + else goto N46_11; + +N46_8: + if fieldMatch(text).significantOccurrence < 0.0599530004 then goto N46_9; + else goto T46_10; + +N46_9: + if fieldMatch(text).importance < 0.6665154696 then goto T46_7; + else goto N46_10; + +T46_7: + response = -0.0140299808; + goto D46; + +N46_10: + if term(2).significance < 0.9674279690 then goto T46_8; + else goto T46_9; + +T46_8: + response = -0.0277817247; + goto D46; + +T46_9: + response = 0.0028355135; + goto D46; + +T46_10: + response = 0.0034763323; + goto D46; + +N46_11: + if age(created_at) < 2370.0000000000 then goto N46_12; + else goto N46_16; + +N46_12: + if attribute(user_statuses_count) < 10.5000000000 then goto N46_13; + else goto N46_14; + +N46_13: + if attribute(yst_link_array_size) < 0.0000120000 then goto T46_11; + else goto T46_12; + +T46_11: + response = -0.0032323662; + goto D46; + +T46_12: + response = -0.1013679738; + goto D46; + +N46_14: + if fieldTermMatch(text,1).firstPosition < 5.5000000000 then goto T46_13; + else goto N46_15; + +T46_13: + response = 0.0092460814; + goto D46; + +N46_15: + if attribute(user_statuses_count) < 72785.0000000000 then goto T46_14; + else goto T46_15; + +T46_14: + response = 0.0043196848; + goto D46; + +T46_15: + response = -0.0308937796; + goto D46; + +N46_16: + if fieldMatch(text).significantOccurrence < 0.0339080021 then goto T46_16; + else goto N46_17; + +T46_16: + response = -0.0524175559; + goto D46; + +N46_17: + if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto T46_17; + else goto T46_18; + +T46_17: + response = 0.0020057038; + goto D46; + +T46_18: + response = -0.0052555353; + goto D46; + +D46: + +tnscore = tnscore + response; + +/* Tree 48 of 80 */ +N47_1: + if age(created_at) < 1830.0000000000 then goto N47_2; + else goto N47_12; + +N47_2: + if fieldMatch(text).significantOccurrence < 0.0382340029 then goto N47_3; + else goto N47_5; + +N47_3: + if fieldMatch(text) < 0.8533049822 then goto N47_4; + else goto T47_3; + +N47_4: + if term(0).significance < 0.9981650114 then goto T47_1; + else goto T47_2; + +T47_1: + response = -0.0368343915; + goto D47; + +T47_2: + response = -0.0056610638; + goto D47; + +T47_3: + response = 0.0049994224; + goto D47; + +N47_5: + if fieldMatch(text).tail < 5.5000000000 then goto N47_6; + else goto N47_7; + +N47_6: + if fieldMatch(text).importance < 0.4989485145 then goto T47_4; + else goto T47_5; + +T47_4: + response = -0.0226356769; + goto D47; + +T47_5: + response = 0.0008778837; + goto D47; + +N47_7: + if term(0).significance < 0.9986659884 then goto T47_6; + else goto N47_8; + +T47_6: + response = 0.0085648682; + goto D47; + +N47_8: + if fieldMatch(text).earliness < 0.7071075439 then goto N47_9; + else goto N47_10; + +N47_9: + if fieldMatch(text).importance < 0.6665714979 then goto T47_7; + else goto T47_8; + +T47_7: + response = -0.0079427382; + goto D47; + +T47_8: + response = 0.0095678431; + goto D47; + +N47_10: + if term(0).significance < 0.9988600016 then goto T47_9; + else goto N47_11; + +T47_9: + response = -0.0090514905; + goto D47; + +N47_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto T47_10; + else goto T47_11; + +T47_10: + response = 0.0116548680; + goto D47; + +T47_11: + response = -0.0250363073; + goto D47; + +N47_12: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N47_13; + else goto N47_14; + +N47_13: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto T47_12; + else goto T47_13; + +T47_12: + response = -0.0063330281; + goto D47; + +T47_13: + response = 0.0017606811; + goto D47; + +N47_14: + if attribute(user_followers_count) < 521.5000000000 then goto N47_15; + else goto T47_18; + +N47_15: + if attribute(yst_reply_auth) < 6.5000000000 then goto N47_16; + else goto N47_17; + +N47_16: + if fieldLength(text) < 26.5000000000 then goto T47_14; + else goto T47_15; + +T47_14: + response = 0.0008975568; + goto D47; + +T47_15: + response = -0.0115152224; + goto D47; + +N47_17: + if age(created_at) < 12600.0000000000 then goto T47_16; + else goto T47_17; + +T47_16: + response = -0.0079829768; + goto D47; + +T47_17: + response = -0.0007770708; + goto D47; + +T47_18: + response = 0.0029601612; + goto D47; + +D47: + +tnscore = tnscore + response; + +/* Tree 49 of 80 */ +N48_1: + if attribute(ythl) < 0.5000000000 then goto N48_2; + else goto N48_8; + +N48_2: + if age(created_at) < 1830.0000000000 then goto N48_3; + else goto N48_5; + +N48_3: + if term(1).significance < 0.8159549832 then goto T48_1; + else goto N48_4; + +T48_1: + response = 0.0344152691; + goto D48; + +N48_4: + if attribute(yst_tweet_language) < 3243.5000000000 then goto T48_2; + else goto T48_3; + +T48_2: + response = -0.0000931972; + goto D48; + +T48_3: + response = -0.0184607413; + goto D48; + +N48_5: + if fieldMatch(text) < 0.8700245023 then goto N48_6; + else goto N48_7; + +N48_6: + if age(created_at) < 81000.0000000000 then goto T48_4; + else goto T48_5; + +T48_4: + response = -0.0081256943; + goto D48; + +T48_5: + response = -0.0024744760; + goto D48; + +N48_7: + if fieldMatch(text).completeness < 0.9577934742 then goto T48_6; + else goto T48_7; + +T48_6: + response = 0.0060102860; + goto D48; + +T48_7: + response = -0.0049642463; + goto D48; + +N48_8: + if age(created_at) < 1830.0000000000 then goto N48_9; + else goto N48_13; + +N48_9: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N48_10; + else goto N48_11; + +N48_10: + if fieldMatch(text).completeness < 0.9521285295 then goto T48_8; + else goto T48_9; + +T48_8: + response = -0.0086391614; + goto D48; + +T48_9: + response = 0.0063207862; + goto D48; + +N48_11: + if term(0).significance < 0.9989969730 then goto T48_10; + else goto N48_12; + +T48_10: + response = 0.0055342844; + goto D48; + +N48_12: + if term(0).significance < 0.9996274710 then goto T48_11; + else goto T48_12; + +T48_11: + response = -0.0534092780; + goto D48; + +T48_12: + response = 0.0110272216; + goto D48; + +N48_13: + if fieldMatch(text).head < 1.5000000000 then goto T48_13; + else goto N48_14; + +T48_13: + response = 0.0038230846; + goto D48; + +N48_14: + if fieldMatch(text).importance < 0.6663914919 then goto T48_14; + else goto N48_15; + +T48_14: + response = -0.0050094296; + goto D48; + +N48_15: + if term(2).significance < 0.9939705133 then goto N48_16; + else goto T48_18; + +N48_16: + if term(2).significance < 0.9904664755 then goto T48_15; + else goto N48_17; + +T48_15: + response = -0.0006273878; + goto D48; + +N48_17: + if term(1).significance < 0.9985420108 then goto T48_16; + else goto T48_17; + +T48_16: + response = -0.0060480992; + goto D48; + +T48_17: + response = -0.0305338408; + goto D48; + +T48_18: + response = 0.0023578375; + goto D48; + +D48: + +tnscore = tnscore + response; + +/* Tree 50 of 80 */ +N49_1: + if attribute(ythl) < 0.5000000000 then goto N49_2; + else goto N49_9; + +N49_2: + if age(created_at) < 1830.0000000000 then goto N49_3; + else goto T49_8; + +N49_3: + if term(1).significance < 0.7788045406 then goto T49_1; + else goto N49_4; + +T49_1: + response = 0.0500283244; + goto D49; + +N49_4: + if fieldMatch(text).importance < 0.4989485145 then goto N49_5; + else goto N49_6; + +N49_5: + if fieldMatch(text).importance < 0.4985739887 then goto T49_2; + else goto T49_3; + +T49_2: + response = -0.0040890196; + goto D49; + +T49_3: + response = -0.0504476618; + goto D49; + +N49_6: + if fieldMatch(text).importance < 0.4994869828 then goto T49_4; + else goto N49_7; + +T49_4: + response = 0.0226347107; + goto D49; + +N49_7: + if fieldMatch(text).importance < 0.7494400144 then goto T49_5; + else goto N49_8; + +T49_5: + response = -0.0013898685; + goto D49; + +N49_8: + if term(1).significance < 0.9945595264 then goto T49_6; + else goto T49_7; + +T49_6: + response = -0.0032601315; + goto D49; + +T49_7: + response = 0.0245669695; + goto D49; + +T49_8: + response = -0.0050325999; + goto D49; + +N49_9: + if age(created_at) < 1830.0000000000 then goto N49_10; + else goto N49_15; + +N49_10: + if attribute(yst_reply_auth) < 218.5000000000 then goto N49_11; + else goto T49_14; + +N49_11: + if attribute(user_followers_count) < 516.5000000000 then goto N49_12; + else goto T49_13; + +N49_12: + if attribute(yst_reply_auth) < 5.5000000000 then goto N49_13; + else goto T49_12; + +N49_13: + if fieldMatch(text).importance < 0.6662045121 then goto N49_14; + else goto T49_11; + +N49_14: + if fieldMatch(text).tail < 15.5000000000 then goto T49_9; + else goto T49_10; + +T49_9: + response = -0.0009372615; + goto D49; + +T49_10: + response = 0.0137497531; + goto D49; + +T49_11: + response = 0.0092949266; + goto D49; + +T49_12: + response = -0.0038400009; + goto D49; + +T49_13: + response = 0.0088974242; + goto D49; + +T49_14: + response = -0.0063462945; + goto D49; + +N49_15: + if fieldMatch(text).fieldCompleteness < 0.1863425076 then goto N49_16; + else goto T49_18; + +N49_16: + if attribute(user_followers_count) < 82.5000000000 then goto T49_15; + else goto N49_17; + +T49_15: + response = -0.0049968940; + goto D49; + +N49_17: + if term(1).significance < 0.9968400002 then goto T49_16; + else goto T49_17; + +T49_16: + response = -0.0035744289; + goto D49; + +T49_17: + response = 0.0022489806; + goto D49; + +T49_18: + response = 0.0031548406; + goto D49; + +D49: + +tnscore = tnscore + response; + +/* Tree 51 of 80 */ +N50_1: + if attribute(user_followers_count) < 1739.0000000000 then goto N50_2; + else goto N50_16; + +N50_2: + if attribute(yst_reply_auth) < 28.5000000000 then goto N50_3; + else goto T50_15; + +N50_3: + if attribute(user_followers_count) < 86.5000000000 then goto N50_4; + else goto N50_8; + +N50_4: + if fieldMatch(text).importance < 0.6655265093 then goto T50_1; + else goto N50_5; + +T50_1: + response = -0.0070659027; + goto D50; + +N50_5: + if age(created_at) < 1890.0000000000 then goto N50_6; + else goto T50_5; + +N50_6: + if term(1).significance < 0.9980455041 then goto T50_2; + else goto N50_7; + +T50_2: + response = 0.0084857763; + goto D50; + +N50_7: + if attribute(user_friends_count) < 38.5000000000 then goto T50_3; + else goto T50_4; + +T50_3: + response = 0.0082639620; + goto D50; + +T50_4: + response = -0.0130539890; + goto D50; + +T50_5: + response = -0.0029246429; + goto D50; + +N50_8: + if fieldMatch(user_name) < 0.3153960109 then goto N50_9; + else goto N50_15; + +N50_9: + if fieldMatch(text) < 0.5473589897 then goto N50_10; + else goto N50_11; + +N50_10: + if match < 0.5405354500 then goto T50_6; + else goto T50_7; + +T50_6: + response = -0.0265778832; + goto D50; + +T50_7: + response = -0.0016129946; + goto D50; + +N50_11: + if fieldMatch(text).occurrence < 0.0816664994 then goto N50_12; + else goto N50_14; + +N50_12: + if attribute(yst_link_array_size) < 0.0017840000 then goto T50_8; + else goto N50_13; + +T50_8: + response = -0.0042828731; + goto D50; + +N50_13: + if age(created_at) < 1830.0000000000 then goto T50_9; + else goto T50_10; + +T50_9: + response = 0.0199611910; + goto D50; + +T50_10: + response = -0.0028795459; + goto D50; + +N50_14: + if age(created_at) < 1710.0000000000 then goto T50_11; + else goto T50_12; + +T50_11: + response = 0.0085534102; + goto D50; + +T50_12: + response = 0.0023027773; + goto D50; + +N50_15: + if fieldMatch(text).significantOccurrence < 0.0327955000 then goto T50_13; + else goto T50_14; + +T50_13: + response = -0.0415331084; + goto D50; + +T50_14: + response = 0.0263336717; + goto D50; + +T50_15: + response = -0.0045775510; + goto D50; + +N50_16: + if fieldMatch(text).tail < 7.5000000000 then goto N50_17; + else goto T50_18; + +N50_17: + if term(2).significance < 0.8023320436 then goto T50_16; + else goto T50_17; + +T50_16: + response = -0.0097788860; + goto D50; + +T50_17: + response = 0.0038323247; + goto D50; + +T50_18: + response = 0.0081719743; + goto D50; + +D50: + +tnscore = tnscore + response; + +/* Tree 52 of 80 */ +N51_1: + if attribute(user_followers_count) < 437.5000000000 then goto N51_2; + else goto N51_12; + +N51_2: + if fieldMatch(text).significantOccurrence < 0.1246850044 then goto N51_3; + else goto T51_11; + +N51_3: + if attribute(yst_reply_auth) < 22.5000000000 then goto N51_4; + else goto T51_10; + +N51_4: + if fieldMatch(text) < 0.3409180045 then goto N51_5; + else goto N51_8; + +N51_5: + if fieldMatch(text).importance < 0.6665065289 then goto T51_1; + else goto N51_6; + +T51_1: + response = -0.0102582795; + goto D51; + +N51_6: + if term(1).significance < 0.9962199926 then goto N51_7; + else goto T51_4; + +N51_7: + if term(1).significance < 0.9943439960 then goto T51_2; + else goto T51_3; + +T51_2: + response = -0.0021503448; + goto D51; + +T51_3: + response = -0.0306146076; + goto D51; + +T51_4: + response = 0.0068595469; + goto D51; + +N51_8: + if fieldMatch(text).earliness < 0.9354164600 then goto N51_9; + else goto T51_9; + +N51_9: + if fieldMatch(user_name) < 0.5095770359 then goto N51_10; + else goto T51_8; + +N51_10: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N51_11; + else goto T51_7; + +N51_11: + if age(created_at) < 1830.0000000000 then goto T51_5; + else goto T51_6; + +T51_5: + response = 0.0020429611; + goto D51; + +T51_6: + response = -0.0019541993; + goto D51; + +T51_7: + response = -0.0132421664; + goto D51; + +T51_8: + response = 0.0279388980; + goto D51; + +T51_9: + response = 0.0043877081; + goto D51; + +T51_10: + response = -0.0053086844; + goto D51; + +T51_11: + response = -0.0109310616; + goto D51; + +N51_12: + if age(created_at) < 1530.0000000000 then goto N51_13; + else goto N51_16; + +N51_13: + if term(0).significance < 0.9986474514 then goto N51_14; + else goto T51_15; + +N51_14: + if term(0).significance < 0.9980379939 then goto N51_15; + else goto T51_14; + +N51_15: + if fieldMatch(text).earliness < 0.2440474927 then goto T51_12; + else goto T51_13; + +T51_12: + response = -0.0189069835; + goto D51; + +T51_13: + response = 0.0085132629; + goto D51; + +T51_14: + response = 0.0221469666; + goto D51; + +T51_15: + response = 0.0002679538; + goto D51; + +N51_16: + if attribute(user_statuses_count) < 2928.5000000000 then goto T51_16; + else goto N51_17; + +T51_16: + response = 0.0037037270; + goto D51; + +N51_17: + if match < 0.5710045099 then goto T51_17; + else goto T51_18; + +T51_17: + response = -0.0125698441; + goto D51; + +T51_18: + response = -0.0003892576; + goto D51; + +D51: + +tnscore = tnscore + response; + +/* Tree 53 of 80 */ +N52_1: + if fieldMatch(text).tail < 3.5000000000 then goto N52_2; + else goto N52_6; + +N52_2: + if attribute(yst_reply_auth) < 278.5000000000 then goto N52_3; + else goto T52_5; + +N52_3: + if fieldLength(text) < 24.5000000000 then goto N52_4; + else goto T52_4; + +N52_4: + if attribute(yst_link_array_size) < 0.0885144994 then goto N52_5; + else goto T52_3; + +N52_5: + if term(1).significance < 0.7788045406 then goto T52_1; + else goto T52_2; + +T52_1: + response = 0.0265531093; + goto D52; + +T52_2: + response = -0.0020354187; + goto D52; + +T52_3: + response = -0.0289274408; + goto D52; + +T52_4: + response = -0.0107078153; + goto D52; + +T52_5: + response = -0.0129870071; + goto D52; + +N52_6: + if age(created_at) < 1830.0000000000 then goto N52_7; + else goto N52_13; + +N52_7: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N52_8; + else goto N52_11; + +N52_8: + if fieldLength(text) < 9.5000000000 then goto T52_6; + else goto N52_9; + +T52_6: + response = -0.0103056223; + goto D52; + +N52_9: + if fieldMatch(user_name) < 0.2000829875 then goto N52_10; + else goto T52_9; + +N52_10: + if fieldMatch(text).importance < 0.7970539927 then goto T52_7; + else goto T52_8; + +T52_7: + response = 0.0035454291; + goto D52; + +T52_8: + response = 0.0126182815; + goto D52; + +T52_9: + response = 0.0233580846; + goto D52; + +N52_11: + if fieldLength(text) < 22.5000000000 then goto N52_12; + else goto T52_12; + +N52_12: + if term(0).significance < 0.9991210103 then goto T52_10; + else goto T52_11; + +T52_10: + response = 0.0080261443; + goto D52; + +T52_11: + response = -0.0294237431; + goto D52; + +T52_12: + response = -0.0673310696; + goto D52; + +N52_13: + if fieldMatch(text).importance < 0.6664265394 then goto N52_14; + else goto N52_15; + +N52_14: + if attribute(yst_reply_auth) < 13.5000000000 then goto T52_13; + else goto T52_14; + +T52_13: + response = -0.0018649103; + goto D52; + +T52_14: + response = -0.0077154393; + goto D52; + +N52_15: + if term(0).significance < 0.9997465014 then goto N52_16; + else goto N52_17; + +N52_16: + if attribute(user_followers_count) < 717.5000000000 then goto T52_15; + else goto T52_16; + +T52_15: + response = -0.0012717951; + goto D52; + +T52_16: + response = 0.0029037540; + goto D52; + +N52_17: + if fieldTermMatch(text,1).firstPosition < 15.5000000000 then goto T52_17; + else goto T52_18; + +T52_17: + response = 0.0107252476; + goto D52; + +T52_18: + response = -0.0062640981; + goto D52; + +D52: + +tnscore = tnscore + response; + +/* Tree 54 of 80 */ +N53_1: + if attribute(ythl) < 0.5000000000 then goto N53_2; + else goto N53_8; + +N53_2: + if attribute(user_followers_count) < 483.5000000000 then goto N53_3; + else goto N53_7; + +N53_3: + if attribute(user_statuses_count) < 491.5000000000 then goto N53_4; + else goto N53_6; + +N53_4: + if fieldMatch(text).tail < 3.5000000000 then goto T53_1; + else goto N53_5; + +T53_1: + response = -0.0057958616; + goto D53; + +N53_5: + if age(created_at) < 1410.0000000000 then goto T53_2; + else goto T53_3; + +T53_2: + response = 0.0070562486; + goto D53; + +T53_3: + response = -0.0007664522; + goto D53; + +N53_6: + if attribute(user_friends_count) < 8.5000000000 then goto T53_4; + else goto T53_5; + +T53_4: + response = 0.0087335556; + goto D53; + +T53_5: + response = -0.0058603167; + goto D53; + +N53_7: + if age(created_at) < 210.0000000000 then goto T53_6; + else goto T53_7; + +T53_6: + response = 0.0246066286; + goto D53; + +T53_7: + response = 0.0003480739; + goto D53; + +N53_8: + if fieldMatch(text) < 0.5547109842 then goto N53_9; + else goto N53_14; + +N53_9: + if fieldMatch(text).occurrence < 0.1348485053 then goto N53_10; + else goto T53_13; + +N53_10: + if attribute(user_statuses_count) < 2933.0000000000 then goto T53_8; + else goto N53_11; + +T53_8: + response = -0.0023188146; + goto D53; + +N53_11: + if attribute(yst_reply_auth) < 1.5000000000 then goto T53_9; + else goto N53_12; + +T53_9: + response = -0.0279839136; + goto D53; + +N53_12: + if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto N53_13; + else goto T53_12; + +N53_13: + if age(created_at) < 2730.0000000000 then goto T53_10; + else goto T53_11; + +T53_10: + response = 0.0153842703; + goto D53; + +T53_11: + response = -0.0081351611; + goto D53; + +T53_12: + response = -0.0240346583; + goto D53; + +T53_13: + response = 0.0008863957; + goto D53; + +N53_14: + if fieldLength(text) < 9.5000000000 then goto T53_14; + else goto N53_15; + +T53_14: + response = -0.0106073655; + goto D53; + +N53_15: + if fieldMatch(text).earliness < 0.9393379688 then goto N53_16; + else goto T53_18; + +N53_16: + if fieldMatch(text).occurrence < 0.0655914992 then goto T53_15; + else goto N53_17; + +T53_15: + response = -0.0023447985; + goto D53; + +N53_17: + if age(created_at) < 1950.0000000000 then goto T53_16; + else goto T53_17; + +T53_16: + response = 0.0063181854; + goto D53; + +T53_17: + response = 0.0015014161; + goto D53; + +T53_18: + response = 0.0074385233; + goto D53; + +D53: + +tnscore = tnscore + response; + +/* Tree 55 of 80 */ +N54_1: + if attribute(ythl) < 0.5000000000 then goto N54_2; + else goto N54_10; + +N54_2: + if fieldMatch(text) < 0.8502080441 then goto N54_3; + else goto N54_4; + +N54_3: + if fieldMatch(text).importance < 0.7468224764 then goto T54_1; + else goto T54_2; + +T54_1: + response = -0.0067962178; + goto D54; + +T54_2: + response = -0.0019381191; + goto D54; + +N54_4: + if attribute(user_statuses_count) < 16627.5000000000 then goto N54_5; + else goto T54_9; + +N54_5: + if attribute(user_followers_count) < 515.5000000000 then goto N54_6; + else goto N54_8; + +N54_6: + if fieldMatch(text).weightedOccurrence < 0.0944940001 then goto N54_7; + else goto T54_5; + +N54_7: + if attribute(user_statuses_count) < 109.5000000000 then goto T54_3; + else goto T54_4; + +T54_3: + response = 0.0079116741; + goto D54; + +T54_4: + response = -0.0004709728; + goto D54; + +T54_5: + response = -0.0057247378; + goto D54; + +N54_8: + if term(0).significance < 0.9991005063 then goto N54_9; + else goto T54_8; + +N54_9: + if age(created_at) < 1350.0000000000 then goto T54_6; + else goto T54_7; + +T54_6: + response = 0.0233500539; + goto D54; + +T54_7: + response = 0.0061626722; + goto D54; + +T54_8: + response = -0.0047207579; + goto D54; + +T54_9: + response = -0.0108453748; + goto D54; + +N54_10: + if fieldTermMatch(text,2).firstPosition < 8.5000000000 then goto N54_11; + else goto N54_13; + +N54_11: + if attribute(user_followers_count) < 16.5000000000 then goto T54_10; + else goto N54_12; + +T54_10: + response = -0.0054426486; + goto D54; + +N54_12: + if age(created_at) < 2430.0000000000 then goto T54_11; + else goto T54_12; + +T54_11: + response = 0.0098679265; + goto D54; + +T54_12: + response = 0.0032263599; + goto D54; + +N54_13: + if match < 0.9289889932 then goto N54_14; + else goto T54_18; + +N54_14: + if fieldMatch(text).tail < 3.5000000000 then goto N54_15; + else goto N54_16; + +N54_15: + if fieldMatch(text).occurrence < 0.0976189971 then goto T54_13; + else goto T54_14; + +T54_13: + response = -0.0211220829; + goto D54; + +T54_14: + response = -0.0037859558; + goto D54; + +N54_16: + if attribute(user_statuses_count) < 12392.5000000000 then goto T54_15; + else goto N54_17; + +T54_15: + response = 0.0000802340; + goto D54; + +N54_17: + if attribute(user_followers_count) < 317.5000000000 then goto T54_16; + else goto T54_17; + +T54_16: + response = -0.0361792845; + goto D54; + +T54_17: + response = -0.0049548586; + goto D54; + +T54_18: + response = 0.0025926241; + goto D54; + +D54: + +tnscore = tnscore + response; + +/* Tree 56 of 80 */ +N55_1: + if attribute(ythl) < 0.5000000000 then goto N55_2; + else goto N55_9; + +N55_2: + if age(created_at) < 1830.0000000000 then goto N55_3; + else goto T55_8; + +N55_3: + if fieldMatch(text).longestSequence < 1.5000000000 then goto T55_1; + else goto N55_4; + +T55_1: + response = -0.0031653440; + goto D55; + +N55_4: + if fieldMatch(text).importance < 0.7498974800 then goto N55_5; + else goto N55_8; + +N55_5: + if term(1).significance < 0.9981340170 then goto T55_2; + else goto N55_6; + +T55_2: + response = 0.0094378769; + goto D55; + +N55_6: + if term(1).significance < 0.9994934797 then goto N55_7; + else goto T55_5; + +N55_7: + if term(1).significance < 0.9991415143 then goto T55_3; + else goto T55_4; + +T55_3: + response = 0.0003721852; + goto D55; + +T55_4: + response = -0.0259010774; + goto D55; + +T55_5: + response = 0.0124789418; + goto D55; + +N55_8: + if term(2).significance < 0.9737149477 then goto T55_6; + else goto T55_7; + +T55_6: + response = -0.0202297481; + goto D55; + +T55_7: + response = 0.0040219128; + goto D55; + +T55_8: + response = -0.0045478246; + goto D55; + +N55_9: + if age(created_at) < 1590.0000000000 then goto N55_10; + else goto N55_12; + +N55_10: + if fieldTermMatch(text,1).firstPosition < 7.5000000000 then goto T55_9; + else goto N55_11; + +T55_9: + response = 0.0081043971; + goto D55; + +N55_11: + if fieldMatch(text) < 0.8496830463 then goto T55_10; + else goto T55_11; + +T55_10: + response = -0.0035718865; + goto D55; + +T55_11: + response = 0.0048047847; + goto D55; + +N55_12: + if fieldMatch(text).head < 1.5000000000 then goto N55_13; + else goto N55_14; + +N55_13: + if fieldLength(text) < 8.5000000000 then goto T55_12; + else goto T55_13; + +T55_12: + response = -0.0224047487; + goto D55; + +T55_13: + response = 0.0036563528; + goto D55; + +N55_14: + if fieldMatch(text) < 0.2774904966 then goto N55_15; + else goto N55_16; + +N55_15: + if term(0).significance < 0.9978594780 then goto T55_14; + else goto T55_15; + +T55_14: + response = -0.0142881937; + goto D55; + +T55_15: + response = -0.0014981238; + goto D55; + +N55_16: + if attribute(yst_reply_auth) < 70.5000000000 then goto T55_16; + else goto N55_17; + +T55_16: + response = 0.0001609764; + goto D55; + +N55_17: + if attribute(user_followers_count) < 397.5000000000 then goto T55_17; + else goto T55_18; + +T55_17: + response = -0.0180919900; + goto D55; + +T55_18: + response = -0.0021113953; + goto D55; + +D55: + +tnscore = tnscore + response; + +/* Tree 57 of 80 */ +N56_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N56_2; + else goto N56_14; + +N56_2: + if fieldTermMatch(text,0).firstPosition < 14.5000000000 then goto N56_3; + else goto N56_13; + +N56_3: + if fieldMatch(text).absoluteOccurrence < 0.0136665003 then goto N56_4; + else goto T56_11; + +N56_4: + if fieldMatch(text).significance < 0.7493325472 then goto N56_5; + else goto T56_10; + +N56_5: + if age(created_at) < 1830.0000000000 then goto N56_6; + else goto T56_9; + +N56_6: + if attribute(user_statuses_count) < 5.5000000000 then goto T56_1; + else goto N56_7; + +T56_1: + response = -0.0282346598; + goto D56; + +N56_7: + if term(0).significance < 0.9725670218 then goto N56_8; + else goto N56_9; + +N56_8: + if term(1).significance < 0.9792364836 then goto T56_2; + else goto T56_3; + +T56_2: + response = -0.0784237022; + goto D56; + +T56_3: + response = -0.0059997941; + goto D56; + +N56_9: + if fieldTermMatch(text,1).firstPosition < 10.5000000000 then goto T56_4; + else goto N56_10; + +T56_4: + response = 0.0078147345; + goto D56; + +N56_10: + if match < 0.9353330135 then goto T56_5; + else goto N56_11; + +T56_5: + response = -0.0019768224; + goto D56; + +N56_11: + if fieldMatch(text).earliness < 0.9198719859 then goto N56_12; + else goto T56_8; + +N56_12: + if attribute(yst_link_array_size) < 0.0002880000 then goto T56_6; + else goto T56_7; + +T56_6: + response = -0.0041019966; + goto D56; + +T56_7: + response = 0.0264356088; + goto D56; + +T56_8: + response = 0.0005631411; + goto D56; + +T56_9: + response = -0.0050485104; + goto D56; + +T56_10: + response = 0.0082039036; + goto D56; + +T56_11: + response = 0.0051839504; + goto D56; + +N56_13: + if term(0).significance < 0.9982025027 then goto T56_12; + else goto T56_13; + +T56_12: + response = -0.0142960741; + goto D56; + +T56_13: + response = -0.0033997299; + goto D56; + +N56_14: + if attribute(yst_tweet_language) < 3587.5000000000 then goto N56_15; + else goto T56_18; + +N56_15: + if age(created_at) < 1770.0000000000 then goto T56_14; + else goto N56_16; + +T56_14: + response = 0.0049084121; + goto D56; + +N56_16: + if attribute(ythl) < 0.5000000000 then goto T56_15; + else goto N56_17; + +T56_15: + response = -0.0020172224; + goto D56; + +N56_17: + if attribute(yst_link_array_size) < 0.0056419997 then goto T56_16; + else goto T56_17; + +T56_16: + response = 0.0029607752; + goto D56; + +T56_17: + response = -0.0028856329; + goto D56; + +T56_18: + response = -0.0261107048; + goto D56; + +D56: + +tnscore = tnscore + response; + +/* Tree 58 of 80 */ +N57_1: + if fieldMatch(text) < 0.2898915112 then goto N57_2; + else goto N57_4; + +N57_2: + if fieldMatch(text).head < 1.5000000000 then goto T57_1; + else goto N57_3; + +T57_1: + response = 0.0014847907; + goto D57; + +N57_3: + if fieldMatch(text).importance < 0.6663334966 then goto T57_2; + else goto T57_3; + +T57_2: + response = -0.0119151319; + goto D57; + +T57_3: + response = -0.0040477723; + goto D57; + +N57_4: + if attribute(user_followers_count) < 519.5000000000 then goto N57_5; + else goto N57_15; + +N57_5: + if term(2).significance < 0.9943574667 then goto N57_6; + else goto N57_14; + +N57_6: + if age(created_at) < 1830.0000000000 then goto N57_7; + else goto N57_11; + +N57_7: + if attribute(user_statuses_count) < 1256.0000000000 then goto T57_4; + else goto N57_8; + +T57_4: + response = 0.0034757752; + goto D57; + +N57_8: + if attribute(user_statuses_count) < 1397.0000000000 then goto T57_5; + else goto N57_9; + +T57_5: + response = -0.0279955298; + goto D57; + +N57_9: + if fieldMatch(text).tail < 15.5000000000 then goto T57_6; + else goto N57_10; + +T57_6: + response = -0.0046990807; + goto D57; + +N57_10: + if match < 0.9350855350 then goto T57_7; + else goto T57_8; + +T57_7: + response = -0.0040285710; + goto D57; + +T57_8: + response = 0.0289488138; + goto D57; + +N57_11: + if term(0).significance < 0.9971770048 then goto N57_12; + else goto N57_13; + +N57_12: + if term(1).significance < 0.9985420108 then goto T57_9; + else goto T57_10; + +T57_9: + response = -0.0058304095; + goto D57; + +T57_10: + response = -0.0172909110; + goto D57; + +N57_13: + if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto T57_11; + else goto T57_12; + +T57_11: + response = 0.0053683293; + goto D57; + +T57_12: + response = -0.0057448395; + goto D57; + +N57_14: + if attribute(yst_reply_auth) < 236.5000000000 then goto T57_13; + else goto T57_14; + +T57_13: + response = 0.0018130071; + goto D57; + +T57_14: + response = -0.0097022524; + goto D57; + +N57_15: + if fieldMatch(text).importance < 0.4989485145 then goto N57_16; + else goto N57_17; + +N57_16: + if fieldMatch(text).importance < 0.4986034930 then goto T57_15; + else goto T57_16; + +T57_15: + response = -0.0030858773; + goto D57; + +T57_16: + response = -0.0594499645; + goto D57; + +N57_17: + if fieldTermMatch(text,0).firstPosition < 13.5000000000 then goto T57_17; + else goto T57_18; + +T57_17: + response = 0.0046047026; + goto D57; + +T57_18: + response = -0.0041554082; + goto D57; + +D57: + +tnscore = tnscore + response; + +/* Tree 59 of 80 */ +N58_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N58_2; + else goto N58_12; + +N58_2: + if fieldMatch(text).earliness < 0.6381819844 then goto N58_3; + else goto N58_10; + +N58_3: + if term(0).significance < 0.9928579926 then goto N58_4; + else goto N58_6; + +N58_4: + if attribute(yst_reply_auth) < 3.5000000000 then goto N58_5; + else goto T58_3; + +N58_5: + if attribute(user_followers_count) < 18.5000000000 then goto T58_1; + else goto T58_2; + +T58_1: + response = -0.0266701398; + goto D58; + +T58_2: + response = 0.0020537489; + goto D58; + +T58_3: + response = -0.0226862335; + goto D58; + +N58_6: + if fieldMatch(text).importance < 0.4997234941 then goto N58_7; + else goto N58_8; + +N58_7: + if attribute(yst_link_array_size) < 0.0005030000 then goto T58_4; + else goto T58_5; + +T58_4: + response = 0.0166948856; + goto D58; + +T58_5: + response = -0.0034104232; + goto D58; + +N58_8: + if age(created_at) < 270.0000000000 then goto T58_6; + else goto N58_9; + +T58_6: + response = 0.0057923479; + goto D58; + +N58_9: + if fieldMatch(text).importance < 0.4997584820 then goto T58_7; + else goto T58_8; + +T58_7: + response = -0.0291936745; + goto D58; + +T58_8: + response = -0.0059528701; + goto D58; + +N58_10: + if term(0).significance < 0.8512874842 then goto T58_9; + else goto N58_11; + +T58_9: + response = -0.0436401448; + goto D58; + +N58_11: + if age(created_at) < 1830.0000000000 then goto T58_10; + else goto T58_11; + +T58_10: + response = 0.0039537575; + goto D58; + +T58_11: + response = -0.0025333564; + goto D58; + +N58_12: + if age(created_at) < 1830.0000000000 then goto N58_13; + else goto N58_17; + +N58_13: + if attribute(yst_reply_auth) < 158.5000000000 then goto N58_14; + else goto T58_16; + +N58_14: + if attribute(yst_reply_auth) < 149.5000000000 then goto N58_15; + else goto T58_15; + +N58_15: + if term(2).significance < 0.9389865398 then goto T58_12; + else goto N58_16; + +T58_12: + response = -0.0024084291; + goto D58; + +N58_16: + if term(2).significance < 0.9853805304 then goto T58_13; + else goto T58_14; + +T58_13: + response = 0.0179593679; + goto D58; + +T58_14: + response = 0.0053348502; + goto D58; + +T58_15: + response = 0.0511133688; + goto D58; + +T58_16: + response = -0.0040868819; + goto D58; + +N58_17: + if attribute(user_followers_count) < 172.5000000000 then goto T58_17; + else goto T58_18; + +T58_17: + response = -0.0015268821; + goto D58; + +T58_18: + response = 0.0018115080; + goto D58; + +D58: + +tnscore = tnscore + response; + +/* Tree 60 of 80 */ +N59_1: + if fieldMatch(text) < 0.4593589902 then goto N59_2; + else goto N59_6; + +N59_2: + if term(0).significance < 0.9975925088 then goto N59_3; + else goto T59_5; + +N59_3: + if fieldMatch(text).occurrence < 0.1188234985 then goto N59_4; + else goto T59_4; + +N59_4: + if fieldMatch(text).earliness < 0.9486839771 then goto N59_5; + else goto T59_3; + +N59_5: + if term(0).significance < 0.9817185402 then goto T59_1; + else goto T59_2; + +T59_1: + response = -0.0335831380; + goto D59; + +T59_2: + response = -0.0111131767; + goto D59; + +T59_3: + response = 0.0027474033; + goto D59; + +T59_4: + response = -0.0042505836; + goto D59; + +T59_5: + response = -0.0003239219; + goto D59; + +N59_6: + if fieldLength(text) < 9.5000000000 then goto N59_7; + else goto N59_9; + +N59_7: + if term(1).significance < 0.9965380430 then goto N59_8; + else goto T59_8; + +N59_8: + if fieldMatch(text).importance < 0.6660010219 then goto T59_6; + else goto T59_7; + +T59_6: + response = 0.0283916092; + goto D59; + +T59_7: + response = -0.0014717607; + goto D59; + +T59_8: + response = -0.0092449117; + goto D59; + +N59_9: + if age(created_at) < 1770.0000000000 then goto N59_10; + else goto N59_16; + +N59_10: + if attribute(user_statuses_count) < 5.5000000000 then goto N59_11; + else goto N59_12; + +N59_11: + if attribute(yst_link_array_size) < 0.0000120000 then goto T59_9; + else goto T59_10; + +T59_9: + response = -0.0060483091; + goto D59; + +T59_10: + response = -0.0952850231; + goto D59; + +N59_12: + if term(2).significance < 0.9389865398 then goto N59_13; + else goto T59_15; + +N59_13: + if attribute(user_statuses_count) < 27302.5000000000 then goto N59_14; + else goto N59_15; + +N59_14: + if term(0).significance < 0.9943234921 then goto T59_11; + else goto T59_12; + +T59_11: + response = -0.0055188147; + goto D59; + +T59_12: + response = 0.0033202683; + goto D59; + +N59_15: + if attribute(yst_reply_auth) < 19.5000000000 then goto T59_13; + else goto T59_14; + +T59_13: + response = 0.0086451663; + goto D59; + +T59_14: + response = -0.0362288139; + goto D59; + +T59_15: + response = 0.0064556248; + goto D59; + +N59_16: + if fieldMatch(text).occurrence < 0.0784614980 then goto T59_16; + else goto N59_17; + +T59_16: + response = -0.0044655704; + goto D59; + +N59_17: + if fieldTermMatch(text,0).firstPosition < 0.5000000000 then goto T59_17; + else goto T59_18; + +T59_17: + response = 0.0052075545; + goto D59; + +T59_18: + response = 0.0000792364; + goto D59; + +D59: + +tnscore = tnscore + response; + +/* Tree 61 of 80 */ +N60_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N60_2; + else goto N60_9; + +N60_2: + if term(0).significance < 0.8512874842 then goto N60_3; + else goto N60_4; + +N60_3: + if term(2).significance < 0.9936410189 then goto T60_1; + else goto T60_2; + +T60_1: + response = -0.0904883380; + goto D60; + +T60_2: + response = -0.0089960419; + goto D60; + +N60_4: + if fieldMatch(text).importance < 0.4999005198 then goto N60_5; + else goto N60_6; + +N60_5: + if attribute(user_friends_count) < 7.5000000000 then goto T60_3; + else goto T60_4; + +T60_3: + response = 0.0105675969; + goto D60; + +T60_4: + response = -0.0002271753; + goto D60; + +N60_6: + if fieldMatch(text).importance < 0.4999030232 then goto T60_5; + else goto N60_7; + +T60_5: + response = -0.0345873832; + goto D60; + +N60_7: + if fieldMatch(text).earliness < 0.7165180445 then goto N60_8; + else goto T60_8; + +N60_8: + if term(1).significance < 0.9967604876 then goto T60_6; + else goto T60_7; + +T60_6: + response = -0.0115456455; + goto D60; + +T60_7: + response = -0.0042372928; + goto D60; + +T60_8: + response = -0.0012917255; + goto D60; + +N60_9: + if age(created_at) < 1830.0000000000 then goto N60_10; + else goto N60_16; + +N60_10: + if attribute(yst_reply_auth) < 26.5000000000 then goto T60_9; + else goto N60_11; + +T60_9: + response = 0.0072943729; + goto D60; + +N60_11: + if attribute(user_followers_count) < 3340.0000000000 then goto N60_12; + else goto T60_15; + +N60_12: + if fieldMatch(text).importance < 0.6665514708 then goto N60_13; + else goto N60_14; + +N60_13: + if fieldMatch(text).significance < 0.6661305428 then goto T60_10; + else goto T60_11; + +T60_10: + response = -0.0013967805; + goto D60; + +T60_11: + response = -0.0212892006; + goto D60; + +N60_14: + if attribute(yst_reply_auth) < 48.5000000000 then goto T60_12; + else goto N60_15; + +T60_12: + response = -0.0095375798; + goto D60; + +N60_15: + if fieldMatch(text).importance < 0.7498970032 then goto T60_13; + else goto T60_14; + +T60_13: + response = 0.0155022730; + goto D60; + +T60_14: + response = -0.0026741211; + goto D60; + +T60_15: + response = 0.0188293335; + goto D60; + +N60_16: + if attribute(user_followers_count) < 72.5000000000 then goto T60_16; + else goto N60_17; + +T60_16: + response = -0.0024566452; + goto D60; + +N60_17: + if attribute(yst_reply_auth) < 50.5000000000 then goto T60_17; + else goto T60_18; + +T60_17: + response = 0.0027297795; + goto D60; + +T60_18: + response = -0.0018218561; + goto D60; + +D60: + +tnscore = tnscore + response; + +/* Tree 62 of 80 */ +N61_1: + if fieldMatch(text).earliness < 0.9298025370 then goto N61_2; + else goto N61_15; + +N61_2: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N61_3; + else goto N61_9; + +N61_3: + if fieldMatch(user_name).importance < 0.3325359821 then goto N61_4; + else goto N61_8; + +N61_4: + if term(1).significance < 0.9972745180 then goto T61_1; + else goto N61_5; + +T61_1: + response = -0.0074074056; + goto D61; + +N61_5: + if attribute(user_statuses_count) < 5.5000000000 then goto N61_6; + else goto N61_7; + +N61_6: + if fieldMatch(text) < 0.8386005163 then goto T61_2; + else goto T61_3; + +T61_2: + response = 0.0032054405; + goto D61; + +T61_3: + response = -0.0402836718; + goto D61; + +N61_7: + if fieldMatch(text).earliness < 0.6381819844 then goto T61_4; + else goto T61_5; + +T61_4: + response = -0.0037883000; + goto D61; + +T61_5: + response = 0.0008924796; + goto D61; + +N61_8: + if attribute(user_statuses_count) < 3055.5000000000 then goto T61_6; + else goto T61_7; + +T61_6: + response = 0.0281949081; + goto D61; + +T61_7: + response = -0.0048301755; + goto D61; + +N61_9: + if age(created_at) < 1770.0000000000 then goto T61_8; + else goto N61_10; + +T61_8: + response = 0.0037513988; + goto D61; + +N61_10: + if attribute(yst_reply_auth) < 71.5000000000 then goto N61_11; + else goto T61_14; + +N61_11: + if attribute(user_followers_count) < 68.5000000000 then goto T61_9; + else goto N61_12; + +T61_9: + response = -0.0032099164; + goto D61; + +N61_12: + if fieldMatch(text).weightedOccurrence < 0.0510035008 then goto N61_13; + else goto N61_14; + +N61_13: + if term(2).significance < 0.9982124567 then goto T61_10; + else goto T61_11; + +T61_10: + response = -0.0067952208; + goto D61; + +T61_11: + response = 0.0019990379; + goto D61; + +N61_14: + if fieldTermMatch(text,1).firstPosition < 3.5000000000 then goto T61_12; + else goto T61_13; + +T61_12: + response = -0.0014223313; + goto D61; + +T61_13: + response = 0.0056390354; + goto D61; + +T61_14: + response = -0.0052017914; + goto D61; + +N61_15: + if fieldMatch(text).tail < 8.5000000000 then goto N61_16; + else goto N61_17; + +N61_16: + if fieldTermMatch(text,1).firstPosition < 21.5000000000 then goto T61_15; + else goto T61_16; + +T61_15: + response = 0.0000776589; + goto D61; + +T61_16: + response = -0.0251022513; + goto D61; + +N61_17: + if age(created_at) < 1770.0000000000 then goto T61_17; + else goto T61_18; + +T61_17: + response = 0.0107898472; + goto D61; + +T61_18: + response = 0.0035281034; + goto D61; + +D61: + +tnscore = tnscore + response; + +/* Tree 63 of 80 */ +N62_1: + if fieldMatch(text) < 0.4136639833 then goto N62_2; + else goto N62_6; + +N62_2: + if fieldMatch(text).earliness < 0.6228449941 then goto T62_1; + else goto N62_3; + +T62_1: + response = -0.0108022756; + goto D62; + +N62_3: + if term(0).significance < 0.9806225300 then goto N62_4; + else goto T62_5; + +N62_4: + if fieldMatch(text).importance < 0.7393674850 then goto N62_5; + else goto T62_4; + +N62_5: + if term(1).significance < 0.9957709908 then goto T62_2; + else goto T62_3; + +T62_2: + response = -0.0755310801; + goto D62; + +T62_3: + response = -0.0197330906; + goto D62; + +T62_4: + response = -0.0036348641; + goto D62; + +T62_5: + response = -0.0013106391; + goto D62; + +N62_6: + if fieldLength(text) < 9.5000000000 then goto N62_7; + else goto N62_8; + +N62_7: + if term(1).significance < 0.9965360165 then goto T62_6; + else goto T62_7; + +T62_6: + response = 0.0028564297; + goto D62; + +T62_7: + response = -0.0093397644; + goto D62; + +N62_8: + if fieldMatch(text).earliness < 0.9321835041 then goto N62_9; + else goto T62_18; + +N62_9: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N62_10; + else goto T62_17; + +N62_10: + if age(created_at) < 1830.0000000000 then goto N62_11; + else goto N62_16; + +N62_11: + if attribute(user_followers_count) < 40.5000000000 then goto N62_12; + else goto N62_14; + +N62_12: + if fieldMatch(text).importance < 0.7464824915 then goto N62_13; + else goto T62_10; + +N62_13: + if fieldMatch(text).importance < 0.7373905182 then goto T62_8; + else goto T62_9; + +T62_8: + response = -0.0053736193; + goto D62; + +T62_9: + response = -0.0642881769; + goto D62; + +T62_10: + response = 0.0068053454; + goto D62; + +N62_14: + if term(0).significance < 0.9995554686 then goto N62_15; + else goto T62_13; + +N62_15: + if fieldMatch(text).tail < 21.5000000000 then goto T62_11; + else goto T62_12; + +T62_11: + response = 0.0051189016; + goto D62; + +T62_12: + response = -0.0221854397; + goto D62; + +T62_13: + response = -0.0053750670; + goto D62; + +N62_16: + if term(2).significance < 0.9946069717 then goto T62_14; + else goto N62_17; + +T62_14: + response = -0.0032296610; + goto D62; + +N62_17: + if attribute(yst_link_array_size) < 0.0122835003 then goto T62_15; + else goto T62_16; + +T62_15: + response = 0.0019118484; + goto D62; + +T62_16: + response = -0.0049101186; + goto D62; + +T62_17: + response = -0.0109713480; + goto D62; + +T62_18: + response = 0.0042358084; + goto D62; + +D62: + +tnscore = tnscore + response; + +/* Tree 64 of 80 */ +N63_1: + if attribute(ythl) < 0.5000000000 then goto N63_2; + else goto N63_12; + +N63_2: + if age(created_at) < 1830.0000000000 then goto N63_3; + else goto N63_9; + +N63_3: + if term(1).significance < 0.7788045406 then goto T63_1; + else goto N63_4; + +T63_1: + response = 0.0501539771; + goto D63; + +N63_4: + if attribute(user_statuses_count) < 8152.0000000000 then goto N63_5; + else goto T63_7; + +N63_5: + if attribute(user_followers_count) < 443.5000000000 then goto T63_2; + else goto N63_6; + +T63_2: + response = -0.0007685040; + goto D63; + +N63_6: + if attribute(yst_link_array_size) < 0.0250005014 then goto N63_7; + else goto N63_8; + +N63_7: + if fieldMatch(text).importance < 0.6649650335 then goto T63_3; + else goto T63_4; + +T63_3: + response = -0.0040469549; + goto D63; + +T63_4: + response = 0.0165434132; + goto D63; + +N63_8: + if attribute(user_followers_count) < 1371.0000000000 then goto T63_5; + else goto T63_6; + +T63_5: + response = 0.0529050928; + goto D63; + +T63_6: + response = 0.0097057892; + goto D63; + +T63_7: + response = -0.0074806913; + goto D63; + +N63_9: + if fieldLength(text) < 14.5000000000 then goto T63_8; + else goto N63_10; + +T63_8: + response = -0.0075857569; + goto D63; + +N63_10: + if fieldMatch(text).significantOccurrence < 0.0476144999 then goto T63_9; + else goto N63_11; + +T63_9: + response = -0.0052872985; + goto D63; + +N63_11: + if attribute(yst_reply_auth) < 33.5000000000 then goto T63_10; + else goto T63_11; + +T63_10: + response = 0.0021247688; + goto D63; + +T63_11: + response = -0.0043264990; + goto D63; + +N63_12: + if fieldTermMatch(text,1).firstPosition < 12.5000000000 then goto N63_13; + else goto N63_14; + +N63_13: + if fieldMatch(text) < 0.5566675067 then goto T63_12; + else goto T63_13; + +T63_12: + response = -0.0012160193; + goto D63; + +T63_13: + response = 0.0034883449; + goto D63; + +N63_14: + if attribute(yst_reply_auth) < 20.5000000000 then goto N63_15; + else goto N63_16; + +N63_15: + if attribute(user_followers_count) < 213.5000000000 then goto T63_14; + else goto T63_15; + +T63_14: + response = -0.0026673084; + goto D63; + +T63_15: + response = 0.0029249608; + goto D63; + +N63_16: + if attribute(user_statuses_count) < 7554.0000000000 then goto T63_16; + else goto N63_17; + +T63_16: + response = -0.0021650101; + goto D63; + +N63_17: + if term(0).significance < 0.9956585169 then goto T63_17; + else goto T63_18; + +T63_17: + response = -0.0326924993; + goto D63; + +T63_18: + response = -0.0081739014; + goto D63; + +D63: + +tnscore = tnscore + response; + +/* Tree 65 of 80 */ +N64_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N64_2; + else goto N64_13; + +N64_2: + if term(0).significance < 0.8512874842 then goto N64_3; + else goto N64_4; + +N64_3: + if term(2).significance < 0.9981595278 then goto T64_1; + else goto T64_2; + +T64_1: + response = -0.0839195878; + goto D64; + +T64_2: + response = -0.0112331884; + goto D64; + +N64_4: + if fieldMatch(text).importance < 0.4998250008 then goto N64_5; + else goto N64_10; + +N64_5: + if term(0).significance < 0.9983350039 then goto N64_6; + else goto T64_8; + +N64_6: + if fieldMatch(text).importance < 0.4989485145 then goto N64_7; + else goto N64_8; + +N64_7: + if fieldMatch(text).importance < 0.4988874793 then goto T64_3; + else goto T64_4; + +T64_3: + response = -0.0037133544; + goto D64; + +T64_4: + response = -0.1248149534; + goto D64; + +N64_8: + if fieldMatch(text).importance < 0.4997234941 then goto T64_5; + else goto N64_9; + +T64_5: + response = 0.0058967543; + goto D64; + +N64_9: + if attribute(yst_reply_auth) < 0.5000000000 then goto T64_6; + else goto T64_7; + +T64_6: + response = -0.0271620138; + goto D64; + +T64_7: + response = 0.0002819878; + goto D64; + +T64_8: + response = 0.0163052773; + goto D64; + +N64_10: + if fieldTermMatch(text,0).firstPosition < 5.5000000000 then goto T64_9; + else goto N64_11; + +T64_9: + response = -0.0009410187; + goto D64; + +N64_11: + if fieldMatch(text).importance < 0.6664544940 then goto T64_10; + else goto N64_12; + +T64_10: + response = -0.0070269578; + goto D64; + +N64_12: + if term(2).significance < 0.9931030273 then goto T64_11; + else goto T64_12; + +T64_11: + response = -0.0096096659; + goto D64; + +T64_12: + response = 0.0015413452; + goto D64; + +N64_13: + if attribute(user_followers_count) < 520.5000000000 then goto N64_14; + else goto N64_17; + +N64_14: + if attribute(yst_reply_auth) < 8.5000000000 then goto N64_15; + else goto T64_16; + +N64_15: + if fieldMatch(text).significantOccurrence < 0.0363755003 then goto T64_13; + else goto N64_16; + +T64_13: + response = -0.0183942755; + goto D64; + +N64_16: + if age(created_at) < 1710.0000000000 then goto T64_14; + else goto T64_15; + +T64_14: + response = 0.0052904688; + goto D64; + +T64_15: + response = 0.0003556613; + goto D64; + +T64_16: + response = -0.0034358951; + goto D64; + +N64_17: + if attribute(user_followers_count) < 534.5000000000 then goto T64_17; + else goto T64_18; + +T64_17: + response = 0.0246325003; + goto D64; + +T64_18: + response = 0.0032783956; + goto D64; + +D64: + +tnscore = tnscore + response; + +/* Tree 66 of 80 */ +N65_1: + if attribute(ythl) < 0.5000000000 then goto N65_2; + else goto N65_7; + +N65_2: + if fieldMatch(text) < 0.4860935211 then goto N65_3; + else goto N65_4; + +N65_3: + if term(1).significance < 0.9882720113 then goto T65_1; + else goto T65_2; + +T65_1: + response = -0.0113150549; + goto D65; + +T65_2: + response = -0.0035132590; + goto D65; + +N65_4: + if attribute(user_statuses_count) < 14534.0000000000 then goto N65_5; + else goto T65_6; + +N65_5: + if attribute(user_followers_count) < 457.5000000000 then goto T65_3; + else goto N65_6; + +T65_3: + response = -0.0013098462; + goto D65; + +N65_6: + if term(4).significance < 0.9912315011 then goto T65_4; + else goto T65_5; + +T65_4: + response = 0.0071257515; + goto D65; + +T65_5: + response = -0.0107882556; + goto D65; + +T65_6: + response = -0.0080098717; + goto D65; + +N65_7: + if fieldMatch(text).occurrence < 0.1348485053 then goto N65_8; + else goto N65_12; + +N65_8: + if term(1).significance < 0.9926555157 then goto N65_9; + else goto N65_11; + +N65_9: + if attribute(yst_link_array_size) < 0.0000615000 then goto N65_10; + else goto T65_9; + +N65_10: + if fieldTermMatch(text,0).firstPosition < 7.5000000000 then goto T65_7; + else goto T65_8; + +T65_7: + response = -0.0054509513; + goto D65; + +T65_8: + response = -0.0267164116; + goto D65; + +T65_9: + response = -0.0002251203; + goto D65; + +N65_11: + if fieldMatch(text).completeness < 0.9520415068 then goto T65_10; + else goto T65_11; + +T65_10: + response = -0.0121998182; + goto D65; + +T65_11: + response = 0.0006584783; + goto D65; + +N65_12: + if age(created_at) < 1770.0000000000 then goto T65_12; + else goto N65_13; + +T65_12: + response = 0.0078526654; + goto D65; + +N65_13: + if fieldTermMatch(text,3).firstPosition < 2.5000000000 then goto T65_13; + else goto N65_14; + +T65_13: + response = 0.0108153740; + goto D65; + +N65_14: + if term(2).significance < 0.9929159880 then goto N65_15; + else goto N65_17; + +N65_15: + if term(2).significance < 0.9923814535 then goto T65_14; + else goto N65_16; + +T65_14: + response = -0.0011187617; + goto D65; + +N65_16: + if fieldMatch(text).completeness < 0.9579474926 then goto T65_15; + else goto T65_16; + +T65_15: + response = -0.0550616595; + goto D65; + +T65_16: + response = -0.0034789409; + goto D65; + +N65_17: + if attribute(yst_tweet_adult_score) < 0.5000000000 then goto T65_17; + else goto T65_18; + +T65_17: + response = 0.0029410626; + goto D65; + +T65_18: + response = -0.0116671785; + goto D65; + +D65: + +tnscore = tnscore + response; + +/* Tree 67 of 80 */ +N66_1: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N66_2; + else goto T66_18; + +N66_2: + if age(created_at) < 1830.0000000000 then goto N66_3; + else goto N66_13; + +N66_3: + if term(0).significance < 0.9725670218 then goto N66_4; + else goto N66_5; + +N66_4: + if fieldMatch(text) < 0.6660829782 then goto T66_1; + else goto T66_2; + +T66_1: + response = -0.0173866153; + goto D66; + +T66_2: + response = -0.0010174380; + goto D66; + +N66_5: + if attribute(yst_reply_auth) < 355.5000000000 then goto N66_6; + else goto N66_12; + +N66_6: + if term(0).significance < 0.9816665053 then goto T66_3; + else goto N66_7; + +T66_3: + response = 0.0179367183; + goto D66; + +N66_7: + if fieldMatch(text).importance < 0.7493820190 then goto N66_8; + else goto T66_9; + +N66_8: + if fieldMatch(text).tail < 6.5000000000 then goto N66_9; + else goto T66_8; + +N66_9: + if attribute(user_friends_count) < 560.5000000000 then goto N66_10; + else goto N66_11; + +N66_10: + if fieldMatch(text) < 0.8736619949 then goto T66_4; + else goto T66_5; + +T66_4: + response = 0.0023509846; + goto D66; + +T66_5: + response = -0.0157312448; + goto D66; + +N66_11: + if fieldMatch(text) < 0.1418584883 then goto T66_6; + else goto T66_7; + +T66_6: + response = -0.0659559738; + goto D66; + +T66_7: + response = -0.0072510736; + goto D66; + +T66_8: + response = 0.0044544430; + goto D66; + +T66_9: + response = 0.0083195610; + goto D66; + +N66_12: + if attribute(yst_link_array_size) < 0.0586175025 then goto T66_10; + else goto T66_11; + +T66_10: + response = -0.0091289813; + goto D66; + +T66_11: + response = 0.0114658081; + goto D66; + +N66_13: + if fieldMatch(text).importance < 0.6664544940 then goto N66_14; + else goto N66_15; + +N66_14: + if fieldMatch(user_name).fieldCompleteness < 0.5833334923 then goto T66_12; + else goto T66_13; + +T66_12: + response = -0.0041143634; + goto D66; + +T66_13: + response = 0.0401025109; + goto D66; + +N66_15: + if term(2).significance < 0.9991624951 then goto N66_16; + else goto N66_17; + +N66_16: + if fieldMatch(text) < 0.5540195107 then goto T66_14; + else goto T66_15; + +T66_14: + response = -0.0037827224; + goto D66; + +T66_15: + response = 0.0001685363; + goto D66; + +N66_17: + if fieldLength(text) < 8.5000000000 then goto T66_16; + else goto T66_17; + +T66_16: + response = -0.0122620665; + goto D66; + +T66_17: + response = 0.0037138353; + goto D66; + +T66_18: + response = -0.0101985628; + goto D66; + +D66: + +tnscore = tnscore + response; + +/* Tree 68 of 80 */ +N67_1: + if age(created_at) < 1830.0000000000 then goto N67_2; + else goto N67_12; + +N67_2: + if fieldMatch(text).importance < 0.4989485145 then goto N67_3; + else goto N67_5; + +N67_3: + if fieldMatch(text).importance < 0.4988874793 then goto N67_4; + else goto T67_3; + +N67_4: + if age(created_at) < 690.0000000000 then goto T67_1; + else goto T67_2; + +T67_1: + response = -0.0256320594; + goto D67; + +T67_2: + response = 0.0079996205; + goto D67; + +T67_3: + response = -0.1224294269; + goto D67; + +N67_5: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N67_6; + else goto T67_11; + +N67_6: + if fieldMatch(text).importance < 0.7491755486 then goto N67_7; + else goto T67_10; + +N67_7: + if term(0).significance < 0.9184160233 then goto N67_8; + else goto N67_9; + +N67_8: + if term(2).significance < 0.9980159998 then goto T67_4; + else goto T67_5; + +T67_4: + response = -0.0525545375; + goto D67; + +T67_5: + response = 0.0023785461; + goto D67; + +N67_9: + if fieldMatch(text).importance < 0.7490389943 then goto N67_10; + else goto T67_9; + +N67_10: + if attribute(user_followers_count) < 787.5000000000 then goto T67_6; + else goto N67_11; + +T67_6: + response = 0.0010999135; + goto D67; + +N67_11: + if fieldMatch(text).importance < 0.4998484850 then goto T67_7; + else goto T67_8; + +T67_7: + response = 0.0162069505; + goto D67; + +T67_8: + response = 0.0035170311; + goto D67; + +T67_9: + response = -0.0287539558; + goto D67; + +T67_10: + response = 0.0072807979; + goto D67; + +T67_11: + response = -0.0106468395; + goto D67; + +N67_12: + if fieldMatch(text).importance < 0.6664534807 then goto T67_12; + else goto N67_13; + +T67_12: + response = -0.0041764714; + goto D67; + +N67_13: + if term(0).significance < 0.9954395294 then goto N67_14; + else goto N67_16; + +N67_14: + if fieldMatch(user_name).significantOccurrence < 0.2916665077 then goto N67_15; + else goto T67_15; + +N67_15: + if fieldMatch(text).importance < 0.8318179846 then goto T67_13; + else goto T67_14; + +T67_13: + response = -0.0042118878; + goto D67; + +T67_14: + response = 0.0043649147; + goto D67; + +T67_15: + response = 0.0158696258; + goto D67; + +N67_16: + if term(1).significance < 0.9986659884 then goto N67_17; + else goto T67_18; + +N67_17: + if fieldMatch(text).tail < 1.5000000000 then goto T67_16; + else goto T67_17; + +T67_16: + response = -0.0046863462; + goto D67; + +T67_17: + response = 0.0042419546; + goto D67; + +T67_18: + response = -0.0008828310; + goto D67; + +D67: + +tnscore = tnscore + response; + +/* Tree 69 of 80 */ +N68_1: + if age(created_at) < 1830.0000000000 then goto N68_2; + else goto N68_14; + +N68_2: + if attribute(user_statuses_count) < 3.5000000000 then goto T68_1; + else goto N68_3; + +T68_1: + response = -0.0248705295; + goto D68; + +N68_3: + if attribute(yst_reply_auth) < 247.5000000000 then goto N68_4; + else goto N68_13; + +N68_4: + if attribute(user_followers_count) < 97.5000000000 then goto N68_5; + else goto N68_12; + +N68_5: + if attribute(yst_link_array_size) < 0.0007835000 then goto N68_6; + else goto N68_10; + +N68_6: + if fieldLength(text) < 27.5000000000 then goto N68_7; + else goto N68_9; + +N68_7: + if fieldMatch(text).significantOccurrence < 0.0591179989 then goto T68_2; + else goto N68_8; + +T68_2: + response = 0.0070832320; + goto D68; + +N68_8: + if fieldTermMatch(text,1).firstPosition < 1.5000000000 then goto T68_3; + else goto T68_4; + +T68_3: + response = 0.0146283297; + goto D68; + +T68_4: + response = -0.0028201578; + goto D68; + +N68_9: + if attribute(user_followers_count) < 23.5000000000 then goto T68_5; + else goto T68_6; + +T68_5: + response = -0.0472201281; + goto D68; + +T68_6: + response = 0.0008078028; + goto D68; + +N68_10: + if fieldTermMatch(text,1).firstPosition < 4.5000000000 then goto T68_7; + else goto N68_11; + +T68_7: + response = 0.0067052369; + goto D68; + +N68_11: + if fieldMatch(text).earliness < 0.8221344948 then goto T68_8; + else goto T68_9; + +T68_8: + response = -0.0076362760; + goto D68; + +T68_9: + response = -0.0324826734; + goto D68; + +N68_12: + if fieldLength(text) < 10.5000000000 then goto T68_10; + else goto T68_11; + +T68_10: + response = -0.0052857206; + goto D68; + +T68_11: + response = 0.0059849079; + goto D68; + +N68_13: + if attribute(user_followers_count) < 1692.5000000000 then goto T68_12; + else goto T68_13; + +T68_12: + response = -0.0091360049; + goto D68; + +T68_13: + response = 0.0091123239; + goto D68; + +N68_14: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N68_15; + else goto T68_18; + +N68_15: + if fieldMatch(text).earliness < 0.6825754642 then goto N68_16; + else goto N68_17; + +N68_16: + if term(0).significance < 0.9944700003 then goto T68_14; + else goto T68_15; + +T68_14: + response = -0.0148020950; + goto D68; + +T68_15: + response = -0.0053042509; + goto D68; + +N68_17: + if fieldMatch(text).significantOccurrence < 0.1249970049 then goto T68_16; + else goto T68_17; + +T68_16: + response = -0.0014558335; + goto D68; + +T68_17: + response = -0.0200174998; + goto D68; + +T68_18: + response = -0.0000872652; + goto D68; + +D68: + +tnscore = tnscore + response; + +/* Tree 70 of 80 */ +N69_1: + if attribute(yst_reply_auth) < 26.5000000000 then goto N69_2; + else goto N69_14; + +N69_2: + if attribute(user_followers_count) < 86.5000000000 then goto N69_3; + else goto N69_4; + +N69_3: + if fieldMatch(text).significantOccurrence < 0.1216785014 then goto T69_1; + else goto T69_2; + +T69_1: + response = -0.0006414838; + goto D69; + +T69_2: + response = -0.0099978879; + goto D69; + +N69_4: + if fieldMatch(text) < 0.8439915180 then goto N69_5; + else goto N69_9; + +N69_5: + if fieldMatch(text).absoluteOccurrence < 0.0126785003 then goto N69_6; + else goto N69_8; + +N69_6: + if fieldMatch(text).importance < 0.6664454937 then goto N69_7; + else goto T69_5; + +N69_7: + if age(created_at) < 5400.0000000000 then goto T69_3; + else goto T69_4; + +T69_3: + response = -0.0009786234; + goto D69; + +T69_4: + response = -0.0108807960; + goto D69; + +T69_5: + response = 0.0004683724; + goto D69; + +N69_8: + if fieldTermMatch(text,1).firstPosition < 3.5000000000 then goto T69_6; + else goto T69_7; + +T69_6: + response = -0.0013491196; + goto D69; + +T69_7: + response = 0.0100488776; + goto D69; + +N69_9: + if fieldMatch(text).importance < 0.4989485145 then goto T69_8; + else goto N69_10; + +T69_8: + response = -0.0180389590; + goto D69; + +N69_10: + if age(created_at) < 1770.0000000000 then goto N69_11; + else goto N69_13; + +N69_11: + if term(0).significance < 0.9986659884 then goto N69_12; + else goto T69_11; + +N69_12: + if fieldMatch(text).earliness < 0.6099034548 then goto T69_9; + else goto T69_10; + +T69_9: + response = 0.0188221360; + goto D69; + +T69_10: + response = 0.0069414922; + goto D69; + +T69_11: + response = 0.0010695341; + goto D69; + +N69_13: + if attribute(user_statuses_count) < 728.0000000000 then goto T69_12; + else goto T69_13; + +T69_12: + response = 0.0078644585; + goto D69; + +T69_13: + response = 0.0013385568; + goto D69; + +N69_14: + if attribute(user_followers_count) < 1995.0000000000 then goto T69_14; + else goto N69_15; + +T69_14: + response = -0.0037600829; + goto D69; + +N69_15: + if term(0).significance < 0.9989764690 then goto N69_16; + else goto T69_18; + +N69_16: + if age(created_at) < 2310.0000000000 then goto N69_17; + else goto T69_17; + +N69_17: + if attribute(yst_reply_auth) < 38.5000000000 then goto T69_15; + else goto T69_16; + +T69_15: + response = -0.0117528259; + goto D69; + +T69_16: + response = 0.0193329084; + goto D69; + +T69_17: + response = 0.0028282077; + goto D69; + +T69_18: + response = -0.0027098020; + goto D69; + +D69: + +tnscore = tnscore + response; + +/* Tree 71 of 80 */ +N70_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N70_2; + else goto N70_12; + +N70_2: + if term(0).significance < 0.8512874842 then goto N70_3; + else goto N70_4; + +N70_3: + if term(2).significance < 0.9981595278 then goto T70_1; + else goto T70_2; + +T70_1: + response = -0.0893712611; + goto D70; + +T70_2: + response = 0.0139821391; + goto D70; + +N70_4: + if fieldMatch(text).absoluteOccurrence < 0.0129164997 then goto N70_5; + else goto T70_11; + +N70_5: + if fieldMatch(user_name).fieldCompleteness < 0.5833334923 then goto N70_6; + else goto T70_10; + +N70_6: + if fieldMatch(text).importance < 0.4999005198 then goto N70_7; + else goto N70_9; + +N70_7: + if fieldMatch(text).importance < 0.4992579818 then goto T70_3; + else goto N70_8; + +T70_3: + response = -0.0088299338; + goto D70; + +N70_8: + if fieldLength(text) < 24.5000000000 then goto T70_4; + else goto T70_5; + +T70_4: + response = 0.0031375211; + goto D70; + +T70_5: + response = -0.0078301854; + goto D70; + +N70_9: + if fieldMatch(text).importance < 0.4999030232 then goto T70_6; + else goto N70_10; + +T70_6: + response = -0.0514023475; + goto D70; + +N70_10: + if fieldMatch(text).earliness < 0.6079194546 then goto T70_7; + else goto N70_11; + +T70_7: + response = -0.0064381419; + goto D70; + +N70_11: + if fieldMatch(text) < 0.8824554682 then goto T70_8; + else goto T70_9; + +T70_8: + response = -0.0013041030; + goto D70; + +T70_9: + response = -0.0302990737; + goto D70; + +T70_10: + response = 0.0378075574; + goto D70; + +T70_11: + response = 0.0037149737; + goto D70; + +N70_12: + if attribute(yst_tweet_language) < 3587.5000000000 then goto N70_13; + else goto T70_18; + +N70_13: + if fieldMatch(text).absoluteProximity < 0.0573749989 then goto T70_12; + else goto N70_14; + +T70_12: + response = -0.0037219953; + goto D70; + +N70_14: + if attribute(user_followers_count) < 719.5000000000 then goto N70_15; + else goto T70_17; + +N70_15: + if fieldMatch(text).importance < 0.6660234928 then goto N70_16; + else goto N70_17; + +N70_16: + if term(1).significance < 0.9864724874 then goto T70_13; + else goto T70_14; + +T70_13: + response = -0.0057017615; + goto D70; + +T70_14: + response = 0.0142744959; + goto D70; + +N70_17: + if fieldMatch(text).importance < 0.6664794683 then goto T70_15; + else goto T70_16; + +T70_15: + response = -0.0033669884; + goto D70; + +T70_16: + response = 0.0011748423; + goto D70; + +T70_17: + response = 0.0048833724; + goto D70; + +T70_18: + response = -0.0235368129; + goto D70; + +D70: + +tnscore = tnscore + response; + +/* Tree 72 of 80 */ +N71_1: + if fieldMatch(text).tail < 3.5000000000 then goto N71_2; + else goto N71_5; + +N71_2: + if term(0).significance < 0.9849029779 then goto T71_1; + else goto N71_3; + +T71_1: + response = -0.0096533539; + goto D71; + +N71_3: + if age(created_at) < 210.0000000000 then goto T71_2; + else goto N71_4; + +T71_2: + response = 0.0086447306; + goto D71; + +N71_4: + if term(3).significance < 0.9972594976 then goto T71_3; + else goto T71_4; + +T71_3: + response = -0.0049080669; + goto D71; + +T71_4: + response = 0.0019732467; + goto D71; + +N71_5: + if attribute(user_followers_count) < 682.5000000000 then goto N71_6; + else goto N71_17; + +N71_6: + if attribute(yst_reply_auth) < 91.5000000000 then goto N71_7; + else goto N71_15; + +N71_7: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N71_8; + else goto T71_13; + +N71_8: + if term(4).significance < 0.9962199926 then goto N71_9; + else goto N71_14; + +N71_9: + if fieldMatch(text).tail < 11.5000000000 then goto N71_10; + else goto N71_13; + +N71_10: + if term(1).significance < 0.9847429991 then goto N71_11; + else goto T71_8; + +N71_11: + if fieldMatch(text).importance < 0.7463389635 then goto N71_12; + else goto T71_7; + +N71_12: + if term(0).significance < 0.9929184914 then goto T71_5; + else goto T71_6; + +T71_5: + response = -0.0337962464; + goto D71; + +T71_6: + response = -0.0042731663; + goto D71; + +T71_7: + response = -0.0018179748; + goto D71; + +T71_8: + response = -0.0005816172; + goto D71; + +N71_13: + if age(created_at) < 1710.0000000000 then goto T71_9; + else goto T71_10; + +T71_9: + response = 0.0058304594; + goto D71; + +T71_10: + response = -0.0001954122; + goto D71; + +N71_14: + if term(1).significance < 0.8054080009 then goto T71_11; + else goto T71_12; + +T71_11: + response = 0.0206959410; + goto D71; + +T71_12: + response = 0.0029922212; + goto D71; + +T71_13: + response = -0.0103917040; + goto D71; + +N71_15: + if attribute(yst_link_array_size) < 0.0094430000 then goto T71_14; + else goto N71_16; + +T71_14: + response = -0.0102022704; + goto D71; + +N71_16: + if attribute(user_friends_count) < 89.0000000000 then goto T71_15; + else goto T71_16; + +T71_15: + response = 0.0214038413; + goto D71; + +T71_16: + response = -0.0030227020; + goto D71; + +N71_17: + if fieldMatch(text).significantOccurrence < 0.0556650013 then goto T71_17; + else goto T71_18; + +T71_17: + response = 0.0009656227; + goto D71; + +T71_18: + response = 0.0055771237; + goto D71; + +D71: + +tnscore = tnscore + response; + +/* Tree 73 of 80 */ +N72_1: + if attribute(ythl) < 0.5000000000 then goto N72_2; + else goto N72_12; + +N72_2: + if fieldMatch(text).importance < 0.4997234941 then goto N72_3; + else goto N72_7; + +N72_3: + if fieldMatch(text).importance < 0.4988809824 then goto T72_1; + else goto N72_4; + +T72_1: + response = -0.0156475694; + goto D72; + +N72_4: + if fieldMatch(text).importance < 0.4988874793 then goto T72_2; + else goto N72_5; + +T72_2: + response = 0.0871791947; + goto D72; + +N72_5: + if fieldMatch(text).importance < 0.4997065067 then goto T72_3; + else goto N72_6; + +T72_3: + response = 0.0054545590; + goto D72; + +N72_6: + if term(0).significance < 0.9976885319 then goto T72_4; + else goto T72_5; + +T72_4: + response = 0.1581759963; + goto D72; + +T72_5: + response = 0.0057478578; + goto D72; + +N72_7: + if fieldMatch(text).importance < 0.4997634888 then goto T72_6; + else goto N72_8; + +T72_6: + response = -0.0264810886; + goto D72; + +N72_8: + if fieldMatch(text).importance < 0.4998250008 then goto T72_7; + else goto N72_9; + +T72_7: + response = 0.0106354371; + goto D72; + +N72_9: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N72_10; + else goto N72_11; + +N72_10: + if fieldMatch(text).earliness < 0.7018519640 then goto T72_8; + else goto T72_9; + +T72_8: + response = -0.0083100057; + goto D72; + +T72_9: + response = -0.0018931553; + goto D72; + +N72_11: + if attribute(user_followers_count) < 701.0000000000 then goto T72_10; + else goto T72_11; + +T72_10: + response = -0.0020476113; + goto D72; + +T72_11: + response = 0.0053824373; + goto D72; + +N72_12: + if fieldMatch(text) < 0.4141010046 then goto N72_13; + else goto N72_17; + +N72_13: + if fieldMatch(user_name) < 0.3179910183 then goto N72_14; + else goto T72_16; + +N72_14: + if term(0).significance < 0.9793410301 then goto N72_15; + else goto T72_15; + +N72_15: + if term(2).significance < 0.9970530272 then goto T72_12; + else goto N72_16; + +T72_12: + response = -0.0054897109; + goto D72; + +N72_16: + if fieldMatch(text).importance < 0.6620055437 then goto T72_13; + else goto T72_14; + +T72_13: + response = 0.0060602187; + goto D72; + +T72_14: + response = -0.0517823718; + goto D72; + +T72_15: + response = -0.0021590151; + goto D72; + +T72_16: + response = 0.0298259094; + goto D72; + +N72_17: + if fieldMatch(text).significantOccurrence < 0.0392310023 then goto T72_17; + else goto T72_18; + +T72_17: + response = -0.0063084285; + goto D72; + +T72_18: + response = 0.0020013986; + goto D72; + +D72: + +tnscore = tnscore + response; + +/* Tree 74 of 80 */ +N73_1: + if attribute(yst_reply_auth) < 236.5000000000 then goto N73_2; + else goto T73_18; + +N73_2: + if age(created_at) < 1830.0000000000 then goto N73_3; + else goto N73_16; + +N73_3: + if term(1).significance < 0.7788045406 then goto T73_1; + else goto N73_4; + +T73_1: + response = 0.0479141837; + goto D73; + +N73_4: + if term(0).significance < 0.9139549732 then goto N73_5; + else goto N73_6; + +N73_5: + if fieldMatch(text).longestSequenceRatio < 0.4166665077 then goto T73_2; + else goto T73_3; + +T73_2: + response = -0.0732771007; + goto D73; + +T73_3: + response = -0.0090569203; + goto D73; + +N73_6: + if fieldMatch(text).importance < 0.4989485145 then goto N73_7; + else goto N73_8; + +N73_7: + if fieldMatch(text).importance < 0.4988874793 then goto T73_4; + else goto T73_5; + +T73_4: + response = -0.0042825791; + goto D73; + +T73_5: + response = -0.1098205261; + goto D73; + +N73_8: + if attribute(user_followers_count) < 98.5000000000 then goto N73_9; + else goto N73_12; + +N73_9: + if attribute(user_statuses_count) < 1260.5000000000 then goto N73_10; + else goto T73_9; + +N73_10: + if match < 0.6491410136 then goto T73_6; + else goto N73_11; + +T73_6: + response = -0.0110819570; + goto D73; + +N73_11: + if term(0).significance < 0.9986954927 then goto T73_7; + else goto T73_8; + +T73_7: + response = 0.0051994695; + goto D73; + +T73_8: + response = -0.0019674695; + goto D73; + +T73_9: + response = -0.0076605248; + goto D73; + +N73_12: + if attribute(user_statuses_count) < 5.5000000000 then goto T73_10; + else goto N73_13; + +T73_10: + response = -0.1055265232; + goto D73; + +N73_13: + if fieldMatch(text).earliness < 0.3726850152 then goto T73_11; + else goto N73_14; + +T73_11: + response = -0.0007649567; + goto D73; + +N73_14: + if fieldMatch(text).occurrence < 0.1519230008 then goto T73_12; + else goto N73_15; + +T73_12: + response = 0.0078900808; + goto D73; + +N73_15: + if attribute(user_followers_count) < 583.5000000000 then goto T73_13; + else goto T73_14; + +T73_13: + response = -0.0022611347; + goto D73; + +T73_14: + response = 0.0078521247; + goto D73; + +N73_16: + if fieldMatch(text).importance < 0.6664505005 then goto T73_15; + else goto N73_17; + +T73_15: + response = -0.0037578539; + goto D73; + +N73_17: + if fieldMatch(text).significantOccurrence < 0.0386575013 then goto T73_16; + else goto T73_17; + +T73_16: + response = -0.0059854357; + goto D73; + +T73_17: + response = 0.0010571345; + goto D73; + +T73_18: + response = -0.0046469325; + goto D73; + +D73: + +tnscore = tnscore + response; + +/* Tree 75 of 80 */ +N74_1: + if age(created_at) < 1830.0000000000 then goto N74_2; + else goto N74_12; + +N74_2: + if fieldMatch(text).importance < 0.7467460036 then goto N74_3; + else goto N74_11; + +N74_3: + if fieldMatch(text).tail < 7.5000000000 then goto N74_4; + else goto N74_10; + +N74_4: + if term(0).significance < 0.8547105193 then goto N74_5; + else goto N74_6; + +N74_5: + if term(2).significance < 0.9980159998 then goto T74_1; + else goto T74_2; + +T74_1: + response = -0.0945680172; + goto D74; + +T74_2: + response = 0.0133542000; + goto D74; + +N74_6: + if term(0).significance < 0.9995139837 then goto T74_3; + else goto N74_7; + +T74_3: + response = -0.0000091115; + goto D74; + +N74_7: + if term(0).significance < 0.9996379614 then goto N74_8; + else goto T74_7; + +N74_8: + if fieldMatch(text).tail < 6.5000000000 then goto N74_9; + else goto T74_6; + +N74_9: + if attribute(yst_link_array_size) < 0.0028985001 then goto T74_4; + else goto T74_5; + +T74_4: + response = -0.0033284425; + goto D74; + +T74_5: + response = -0.0414096377; + goto D74; + +T74_6: + response = -0.0690490860; + goto D74; + +T74_7: + response = 0.0066873893; + goto D74; + +N74_10: + if attribute(user_friends_count) < 179.5000000000 then goto T74_8; + else goto T74_9; + +T74_8: + response = 0.0013706096; + goto D74; + +T74_9: + response = 0.0065517887; + goto D74; + +N74_11: + if attribute(yst_tweet_language) < 3243.5000000000 then goto T74_10; + else goto T74_11; + +T74_10: + response = 0.0068726824; + goto D74; + +T74_11: + response = -0.0363530800; + goto D74; + +N74_12: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N74_13; + else goto N74_16; + +N74_13: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N74_14; + else goto N74_15; + +N74_14: + if fieldMatch(user_name).importance < 0.4999729991 then goto T74_12; + else goto T74_13; + +T74_12: + response = -0.0053854995; + goto D74; + +T74_13: + response = 0.0353914791; + goto D74; + +N74_15: + if attribute(user_friends_count) < 4181.0000000000 then goto T74_14; + else goto T74_15; + +T74_14: + response = 0.0041747763; + goto D74; + +T74_15: + response = -0.0313684077; + goto D74; + +N74_16: + if fieldMatch(text) < 0.5549424887 then goto T74_16; + else goto N74_17; + +T74_16: + response = -0.0034986093; + goto D74; + +N74_17: + if match < 0.7940984964 then goto T74_17; + else goto T74_18; + +T74_17: + response = 0.0068157141; + goto D74; + +T74_18: + response = 0.0005808310; + goto D74; + +D74: + +tnscore = tnscore + response; + +/* Tree 76 of 80 */ +N75_1: + if fieldMatch(text).importance < 0.4989485145 then goto N75_2; + else goto N75_5; + +N75_2: + if fieldMatch(text).importance < 0.4988874793 then goto N75_3; + else goto T75_4; + +N75_3: + if term(0).significance < 0.9910864830 then goto N75_4; + else goto T75_3; + +N75_4: + if fieldMatch(text).importance < 0.4985739887 then goto T75_1; + else goto T75_2; + +T75_1: + response = -0.0080603652; + goto D75; + +T75_2: + response = -0.0525354118; + goto D75; + +T75_3: + response = 0.0309921110; + goto D75; + +T75_4: + response = -0.1167737387; + goto D75; + +N75_5: + if age(created_at) < 1830.0000000000 then goto N75_6; + else goto N75_16; + +N75_6: + if term(0).significance < 0.9731230140 then goto N75_7; + else goto N75_11; + +N75_7: + if term(1).significance < 0.9992040396 then goto N75_8; + else goto T75_9; + +N75_8: + if term(1).significance < 0.9977560043 then goto N75_9; + else goto T75_8; + +N75_9: + if fieldMatch(text) < 0.5470744967 then goto N75_10; + else goto T75_7; + +N75_10: + if fieldMatch(text).importance < 0.7365344763 then goto T75_5; + else goto T75_6; + +T75_5: + response = -0.0755739420; + goto D75; + +T75_6: + response = -0.0140588177; + goto D75; + +T75_7: + response = -0.0021536645; + goto D75; + +T75_8: + response = 0.0179729341; + goto D75; + +T75_9: + response = -0.0499824746; + goto D75; + +N75_11: + if term(0).significance < 0.9751809835 then goto T75_10; + else goto N75_12; + +T75_10: + response = 0.0490509939; + goto D75; + +N75_12: + if attribute(yst_reply_auth) < 20.5000000000 then goto N75_13; + else goto N75_15; + +N75_13: + if attribute(user_followers_count) < 97.5000000000 then goto T75_11; + else goto N75_14; + +T75_11: + response = 0.0015097125; + goto D75; + +N75_14: + if attribute(user_statuses_count) < 5.5000000000 then goto T75_12; + else goto T75_13; + +T75_12: + response = -0.0948351800; + goto D75; + +T75_13: + response = 0.0071205807; + goto D75; + +N75_15: + if attribute(yst_link_array_size) < 0.0028940002 then goto T75_14; + else goto T75_15; + +T75_14: + response = -0.0084126298; + goto D75; + +T75_15: + response = 0.0022449562; + goto D75; + +N75_16: + if fieldMatch(text).importance < 0.6664534807 then goto T75_16; + else goto N75_17; + +T75_16: + response = -0.0042838031; + goto D75; + +N75_17: + if term(0).significance < 0.9982124567 then goto T75_17; + else goto T75_18; + +T75_17: + response = -0.0016253125; + goto D75; + +T75_18: + response = 0.0026475634; + goto D75; + +D75: + +tnscore = tnscore + response; + +/* Tree 77 of 80 */ +N76_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N76_2; + else goto N76_13; + +N76_2: + if fieldTermMatch(text,0).firstPosition < 2.5000000000 then goto N76_3; + else goto N76_4; + +N76_3: + if fieldMatch(text).importance < 0.4989809990 then goto T76_1; + else goto T76_2; + +T76_1: + response = -0.0234510876; + goto D76; + +T76_2: + response = 0.0020809399; + goto D76; + +N76_4: + if attribute(yst_reply_auth) < 302.5000000000 then goto N76_5; + else goto T76_12; + +N76_5: + if fieldMatch(text).importance < 0.6664544940 then goto N76_6; + else goto N76_12; + +N76_6: + if fieldMatch(text).importance < 0.4998220205 then goto N76_7; + else goto N76_8; + +N76_7: + if attribute(yst_reply_auth) < 42.5000000000 then goto T76_3; + else goto T76_4; + +T76_3: + response = 0.0050751752; + goto D76; + +T76_4: + response = -0.0083413352; + goto D76; + +N76_8: + if fieldMatch(text).absoluteOccurrence < 0.0124999993 then goto N76_9; + else goto T76_9; + +N76_9: + if age(created_at) < 30600.0000000000 then goto N76_10; + else goto T76_8; + +N76_10: + if fieldMatch(text).absoluteProximity < 0.0125000002 then goto T76_5; + else goto N76_11; + +T76_5: + response = 0.0047520251; + goto D76; + +N76_11: + if term(1).significance < 0.9889299870 then goto T76_6; + else goto T76_7; + +T76_6: + response = -0.0182384871; + goto D76; + +T76_7: + response = -0.0050169041; + goto D76; + +T76_8: + response = -0.0135494985; + goto D76; + +T76_9: + response = 0.0043484195; + goto D76; + +N76_12: + if term(0).significance < 0.9941140413 then goto T76_10; + else goto T76_11; + +T76_10: + response = -0.0052054760; + goto D76; + +T76_11: + response = 0.0033577205; + goto D76; + +T76_12: + response = -0.0100142174; + goto D76; + +N76_13: + if age(created_at) < 1770.0000000000 then goto N76_14; + else goto N76_15; + +N76_14: + if fieldMatch(text) < 0.8529180288 then goto T76_13; + else goto T76_14; + +T76_13: + response = 0.0005433753; + goto D76; + +T76_14: + response = 0.0060936539; + goto D76; + +N76_15: + if fieldMatch(text).earliness < 0.9298025370 then goto N76_16; + else goto N76_17; + +N76_16: + if fieldLength(text) < 29.5000000000 then goto T76_15; + else goto T76_16; + +T76_15: + response = -0.0005482093; + goto D76; + +T76_16: + response = -0.0264980545; + goto D76; + +N76_17: + if fieldMatch(text).occurrence < 0.2290209979 then goto T76_17; + else goto T76_18; + +T76_17: + response = 0.0068625219; + goto D76; + +T76_18: + response = -0.0005992389; + goto D76; + +D76: + +tnscore = tnscore + response; + +/* Tree 78 of 80 */ +N77_1: + if fieldTermMatch(text,2).firstPosition < 13.5000000000 then goto N77_2; + else goto N77_8; + +N77_2: + if term(2).significance < 0.9519284964 then goto T77_1; + else goto N77_3; + +T77_1: + response = -0.0041815526; + goto D77; + +N77_3: + if age(created_at) < 5400.0000000000 then goto N77_4; + else goto N77_6; + +N77_4: + if fieldMatch(text).significance < 0.7492735386 then goto N77_5; + else goto T77_4; + +N77_5: + if fieldMatch(text).tail < 20.5000000000 then goto T77_2; + else goto T77_3; + +T77_2: + response = 0.0019880057; + goto D77; + +T77_3: + response = 0.0345569075; + goto D77; + +T77_4: + response = 0.0087822370; + goto D77; + +N77_6: + if term(2).significance < 0.9934439659 then goto N77_7; + else goto T77_7; + +N77_7: + if term(2).significance < 0.9921829700 then goto T77_5; + else goto T77_6; + +T77_5: + response = 0.0004470985; + goto D77; + +T77_6: + response = -0.0109745339; + goto D77; + +T77_7: + response = 0.0025543252; + goto D77; + +N77_8: + if attribute(yst_reply_auth) < 22.5000000000 then goto N77_9; + else goto N77_14; + +N77_9: + if term(1).significance < 0.9951915145 then goto N77_10; + else goto N77_11; + +N77_10: + if fieldMatch(text).fieldCompleteness < 0.0816664994 then goto T77_8; + else goto T77_9; + +T77_8: + response = -0.0140486512; + goto D77; + +T77_9: + response = -0.0020108004; + goto D77; + +N77_11: + if fieldMatch(user_name).head < 0.5000000000 then goto N77_12; + else goto T77_13; + +N77_12: + if attribute(yst_tweet_language) < 3243.5000000000 then goto N77_13; + else goto T77_12; + +N77_13: + if fieldLength(text) < 8.5000000000 then goto T77_10; + else goto T77_11; + +T77_10: + response = -0.0088342224; + goto D77; + +T77_11: + response = 0.0012278464; + goto D77; + +T77_12: + response = -0.0093805182; + goto D77; + +T77_13: + response = 0.0236651466; + goto D77; + +N77_14: + if attribute(user_followers_count) < 125.5000000000 then goto N77_15; + else goto N77_17; + +N77_15: + if age(created_at) < 270.0000000000 then goto N77_16; + else goto T77_16; + +N77_16: + if term(0).significance < 0.9985035062 then goto T77_14; + else goto T77_15; + +T77_14: + response = 0.0238630955; + goto D77; + +T77_15: + response = -0.0187495440; + goto D77; + +T77_16: + response = -0.0107232535; + goto D77; + +N77_17: + if attribute(yst_reply_auth) < 813.0000000000 then goto T77_17; + else goto T77_18; + +T77_17: + response = -0.0012233981; + goto D77; + +T77_18: + response = -0.0114833082; + goto D77; + +D77: + +tnscore = tnscore + response; + +/* Tree 79 of 80 */ +N78_1: + if fieldMatch(text).absoluteOccurrence < 0.0122500006 then goto N78_2; + else goto N78_14; + +N78_2: + if fieldMatch(text).earliness < 0.8596060276 then goto N78_3; + else goto N78_12; + +N78_3: + if attribute(yst_reply_auth) < 302.5000000000 then goto N78_4; + else goto N78_11; + +N78_4: + if fieldMatch(user_name).completeness < 0.9791665077 then goto N78_5; + else goto T78_8; + +N78_5: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N78_6; + else goto N78_10; + +N78_6: + if age(created_at) < 1830.0000000000 then goto T78_1; + else goto N78_7; + +T78_1: + response = -0.0008288048; + goto D78; + +N78_7: + if fieldMatch(text).importance < 0.6664404869 then goto N78_8; + else goto N78_9; + +N78_8: + if fieldMatch(text).fieldCompleteness < 0.0976189971 then goto T78_2; + else goto T78_3; + +T78_2: + response = -0.0053357392; + goto D78; + +T78_3: + response = -0.0137936880; + goto D78; + +N78_9: + if fieldMatch(text).occurrence < 0.1558704972 then goto T78_4; + else goto T78_5; + +T78_4: + response = 0.0024922240; + goto D78; + +T78_5: + response = -0.0089043788; + goto D78; + +N78_10: + if fieldMatch(text).weightedOccurrence < 0.0385860018 then goto T78_6; + else goto T78_7; + +T78_6: + response = -0.0071451431; + goto D78; + +T78_7: + response = 0.0005167991; + goto D78; + +T78_8: + response = 0.0407753689; + goto D78; + +N78_11: + if attribute(user_followers_count) < 1721.5000000000 then goto T78_9; + else goto T78_10; + +T78_9: + response = -0.0105255162; + goto D78; + +T78_10: + response = 0.0015551667; + goto D78; + +N78_12: + if fieldLength(text) < 8.5000000000 then goto T78_11; + else goto N78_13; + +T78_11: + response = -0.0088276495; + goto D78; + +N78_13: + if fieldTermMatch(text,0).firstPosition < 15.5000000000 then goto T78_12; + else goto T78_13; + +T78_12: + response = 0.0024878063; + goto D78; + +T78_13: + response = -0.0103987822; + goto D78; + +N78_14: + if attribute(user_followers_count) < 27.5000000000 then goto N78_15; + else goto N78_17; + +N78_15: + if fieldTermMatch(text,1).firstPosition < 17.5000000000 then goto N78_16; + else goto T78_16; + +N78_16: + if term(0).significance < 0.9853284955 then goto T78_14; + else goto T78_15; + +T78_14: + response = -0.0219723254; + goto D78; + +T78_15: + response = -0.0024166886; + goto D78; + +T78_16: + response = 0.0189239860; + goto D78; + +N78_17: + if attribute(user_statuses_count) < 93.5000000000 then goto T78_17; + else goto T78_18; + +T78_17: + response = 0.0212835416; + goto D78; + +T78_18: + response = 0.0045698024; + goto D78; + +D78: + +tnscore = tnscore + response; + +/* Tree 80 of 80 */ +N79_1: + if fieldMatch(text).longestSequence < 1.5000000000 then goto N79_2; + else goto N79_14; + +N79_2: + if term(0).significance < 0.8509274721 then goto N79_3; + else goto N79_4; + +N79_3: + if term(2).significance < 0.9981595278 then goto T79_1; + else goto T79_2; + +T79_1: + response = -0.0629348018; + goto D79; + +T79_2: + response = -0.0067077117; + goto D79; + +N79_4: + if fieldMatch(text).significance < 0.7493325472 then goto N79_5; + else goto T79_13; + +N79_5: + if fieldMatch(text).importance < 0.4999189973 then goto N79_6; + else goto N79_11; + +N79_6: + if fieldMatch(text).importance < 0.4999135137 then goto N79_7; + else goto T79_8; + +N79_7: + if fieldMatch(text).importance < 0.4999005198 then goto N79_8; + else goto T79_7; + +N79_8: + if fieldMatch(text).importance < 0.4997529984 then goto T79_3; + else goto N79_9; + +T79_3: + response = -0.0025494044; + goto D79; + +N79_9: + if fieldMatch(text).importance < 0.4998250008 then goto T79_4; + else goto N79_10; + +T79_4: + response = 0.0105391880; + goto D79; + +N79_10: + if fieldMatch(text).occurrence < 0.0425724983 then goto T79_5; + else goto T79_6; + +T79_5: + response = -0.0160413841; + goto D79; + +T79_6: + response = 0.0020384205; + goto D79; + +T79_7: + response = -0.0118667123; + goto D79; + +T79_8: + response = 0.0199742140; + goto D79; + +N79_11: + if fieldMatch(text) < 0.8826240301 then goto N79_12; + else goto T79_12; + +N79_12: + if fieldMatch(text).absoluteOccurrence < 0.0116665000 then goto N79_13; + else goto T79_11; + +N79_13: + if fieldMatch(text).earliness < 0.6554945111 then goto T79_9; + else goto T79_10; + +T79_9: + response = -0.0060372501; + goto D79; + +T79_10: + response = -0.0016907417; + goto D79; + +T79_11: + response = 0.0022069234; + goto D79; + +T79_12: + response = -0.0320846717; + goto D79; + +T79_13: + response = 0.0077067193; + goto D79; + +N79_14: + if term(1).significance < 0.8159549832 then goto T79_14; + else goto N79_15; + +T79_14: + response = 0.0094136687; + goto D79; + +N79_15: + if attribute(yst_tweet_language) < 3587.5000000000 then goto N79_16; + else goto T79_18; + +N79_16: + if fieldMatch(text).earliness < 0.9298025370 then goto T79_15; + else goto N79_17; + +T79_15: + response = 0.0003536090; + goto D79; + +N79_17: + if fieldMatch(text).fieldCompleteness < 0.1519230008 then goto T79_16; + else goto T79_17; + +T79_16: + response = 0.0079910001; + goto D79; + +T79_17: + response = 0.0009053355; + goto D79; + +T79_18: + response = -0.0202098115; + goto D79; + +D79: + +tnscore = tnscore + response; + +N80: + if age(created_at) < 60 then goto T80_1; + else goto T80_2 ; + +T80_1: + response = 0.05 ; + goto D80; + +T80_2: + response = 0 ; + goto D80; + +D80: + +tnscore = tnscore + response; + +N81: + if age(created_at) < 120 then goto T81_1; + else goto T81_2 ; + +T81_1: + response = 0.0125 ; + goto D81; + +T81_2: + response = 0 ; + goto D81; + +D81: + +tnscore = tnscore + response; + +N82: + if age(created_at) < 240 then goto T82_1; + else goto T82_2 ; + +T82_1: + response = 0.0125 ; + goto D82; + +T82_2: + response = 0 ; + goto D82; + +D82: + +tnscore = tnscore + response; + +N83: + if age(created_at) < 360 then goto T83_1; + else goto T83_2 ; + +T83_1: + response = 0.0125 ; + goto D83; + +T83_2: + response = 0 ; + goto D83; + +D83: + +tnscore = tnscore + response; + + +N84: + if age(created_at) < 480 then goto T84_1; + else goto T84_2 ; + +T84_1: + response = 0.0125 ; + goto D84; + +T84_2: + response = 0 ; + goto D84; + +D84: + +tnscore = tnscore + response; + +N85: + if age(created_at) < 600 then goto T85_1; + else goto T85_2 ; + +T85_1: + response = 0.017 ; + goto D85; + +T85_2: + response = 0 ; + goto D85; + +D85: + +tnscore = tnscore + response; + + +N86: + if age(created_at) < 1200 then goto T86_1; + else goto T86_2 ; + +T86_1: + response = 0.017 ; + goto D86; + +T86_2: + response = 0 ; + goto D86; + +D86: + +tnscore = tnscore + response; + +N87: + if age(created_at) < 2400 then goto T87_1; + else goto T87_2 ; + +T87_1: + response = 0.017 ; + goto D87; + +T87_2: + response = 0 ; + goto D87; + +D87: + +tnscore = tnscore + response; + +N88: + if age(created_at) < 3600 then goto T88_1; + else goto T88_2 ; + +T88_1: + response = 0.025 ; + goto D88; + +T88_2: + response = 0 ; + goto D88; + +D88: + +tnscore = tnscore + response; + + +N89: + if age(created_at) < 7200 then goto T89_1; + else goto T89_2 ; + +T89_1: + response = 0.025 ; + goto D89; + +T89_2: + response = 0 ; + goto D89; + +D89: + +tnscore = tnscore + response; + + +return; diff --git a/searchlib/src/test/files/treenet03.model b/searchlib/src/test/files/treenet03.model new file mode 100644 index 00000000000..dd84c120685 --- /dev/null +++ b/searchlib/src/test/files/treenet03.model @@ -0,0 +1,5880 @@ + +/* Data Dictionary, Number Of Variables = 33 */ +/* Name = NUM_WORDS, Type = continuous. */ +/* Name = DAY_HITS, Type = continuous. */ +/* Name = DAY_HITS_FRAC, Type = continuous. */ +/* Name = PREV_DAY_HITS, Type = continuous. */ +/* Name = DAY_PD_HITS_RATIO, Type = continuous. */ +/* Name = DAY_PREV_DAY_HITS_FRAC, Type = continuous. */ +/* Name = LW_DAY_HITS, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */ +/* Name = WEEKAVG, Type = continuous. */ +/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */ +/* Name = ISTITLE_AVG, Type = continuous. */ +/* Name = ISABSTRACT_AVG, Type = continuous. */ +/* Name = SUPERDUPER_AVG, Type = continuous. */ +/* Name = PUB_TODAY_AVG, Type = continuous. */ +/* Name = BUSINESS, Type = continuous. */ +/* Name = ENTERTAINMENT, Type = continuous. */ +/* Name = HEALTH, Type = continuous. */ +/* Name = INTLNEWS, Type = continuous. */ +/* Name = LAW, Type = continuous. */ +/* Name = LIFESTYLE, Type = continuous. */ +/* Name = LOCALNEWS, Type = continuous. */ +/* Name = MISC, Type = continuous. */ +/* Name = NATIONALNEWS, Type = continuous. */ +/* Name = POLITICS, Type = continuous. */ +/* Name = REGIONALNEWS, Type = continuous. */ +/* Name = SCIENCE, Type = continuous. */ +/* Name = SPORTS, Type = continuous. */ +/* Name = TOPSTORY, Type = continuous. */ +/* Name = AVG_RANK, Type = continuous. */ +/* Name = MAX_RANK, Type = continuous. */ +/* Name = MIN_RANK, Type = continuous. */ +/* Name = MAX_MIN_RANK, Type = continuous. */ +/* Name = MAX_SCORE, Type = continuous. */ + +MODELBEGIN: + +/* CART version: 5.0.9.156 */ +/* TreeNet: TreeNet20071008155525 */ +/* Grove: /home/rparekh/lb/lb_features_all_days.grv */ +/* N trees: 97 */ + +link TN0; +pred = tnscore; /* predicted value for IY_CTR */ + + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +/* Tree 1 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +tnscore = 0.0; + +N0_1: + if MAX_SCORE < 270055 then goto N0_2; + else goto N0_4; + +N0_2: + if MAX_SCORE < 241174 then goto T0_1; + else goto N0_3; + +T0_1: + response = 0.234534; + goto D0; + +N0_3: + if ISABSTRACT_AVG < 0.105 then goto T0_2; + else goto T0_3; + +T0_2: + response = 0.248214; + goto D0; + +T0_3: + response = 0.239032; + goto D0; + +N0_4: + if ISABSTRACT_AVG < 0.13 then goto N0_5; + else goto T0_7; + +N0_5: + if DAY_HITS_FRAC < 0.765 then goto N0_6; + else goto T0_6; + +N0_6: + if MAX_SCORE < 347793 then goto T0_4; + else goto T0_5; + +T0_4: + response = 0.258244; + goto D0; + +T0_5: + response = 0.268225; + goto D0; + +T0_6: + response = 0.271744; + goto D0; + +T0_7: + response = 0.247728; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N1_1: + if MAX_SCORE < 270055 then goto N1_2; + else goto N1_3; + +N1_2: + if MAX_SCORE < 252585 then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.0118809; + goto D1; + +T1_2: + response = -0.00253128; + goto D1; + +N1_3: + if ISABSTRACT_AVG < 0.21 then goto N1_4; + else goto N1_6; + +N1_4: + if DAY_LW_DAY_HITS_RATIO < 4.345 then goto N1_5; + else goto T1_5; + +N1_5: + if MAX_SCORE < 354461 then goto T1_3; + else goto T1_4; + +T1_3: + response = 0.00546628; + goto D1; + +T1_4: + response = 0.0164708; + goto D1; + +T1_5: + response = 0.0188771; + goto D1; + +N1_6: + if DAY_PREV_DAY_HITS_FRAC < 0.805 then goto T1_6; + else goto T1_7; + +T1_6: + response = -0.0092059; + goto D1; + +T1_7: + response = 0.00324753; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N2_1: + if MAX_SCORE < 270290 then goto N2_2; + else goto N2_5; + +N2_2: + if MAX_SCORE < 236242 then goto T2_1; + else goto N2_3; + +T2_1: + response = -0.0121516; + goto D2; + +N2_3: + if DAY_LW_DAY_HITS_RATIO < 3.45 then goto T2_2; + else goto N2_4; + +T2_2: + response = -0.00767489; + goto D2; + +N2_4: + if ISABSTRACT_AVG < 0.12 then goto T2_3; + else goto T2_4; + +T2_3: + response = 0.00622939; + goto D2; + +T2_4: + response = -0.00488712; + goto D2; + +N2_5: + if ISABSTRACT_AVG < 0.105 then goto N2_6; + else goto T2_7; + +N2_6: + if WEEKAVG < 0.5 then goto T2_5; + else goto T2_6; + +T2_5: + response = 0.00766953; + goto D2; + +T2_6: + response = 0.0156887; + goto D2; + +T2_7: + response = -0.000773677; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N3_1: + if MAX_SCORE < 270061 then goto N3_2; + else goto N3_4; + +N3_2: + if MAX_SCORE < 238942 then goto T3_1; + else goto N3_3; + +T3_1: + response = -0.0111281; + goto D3; + +N3_3: + if DAY_LW_DAY_HITS_RATIO < 3.9 then goto T3_2; + else goto T3_3; + +T3_2: + response = -0.00750282; + goto D3; + +T3_3: + response = 0.00220298; + goto D3; + +N3_4: + if ISABSTRACT_AVG < 0.105 then goto N3_5; + else goto T3_7; + +N3_5: + if DAY_HITS_FRAC < 0.795 then goto N3_6; + else goto T3_6; + +N3_6: + if MAX_SCORE < 348364 then goto T3_4; + else goto T3_5; + +T3_4: + response = 0.00374845; + goto D3; + +T3_5: + response = 0.0131108; + goto D3; + +T3_6: + response = 0.0161683; + goto D3; + +T3_7: + response = -0.00111039; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N4_1: + if MAX_SCORE < 270289 then goto N4_2; + else goto N4_4; + +N4_2: + if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T4_1; + else goto N4_3; + +T4_1: + response = -0.0106179; + goto D4; + +N4_3: + if ISTITLE_AVG < 0.705 then goto T4_2; + else goto T4_3; + +T4_2: + response = 0.00251111; + goto D4; + +T4_3: + response = -0.0060076; + goto D4; + +N4_4: + if ISABSTRACT_AVG < 0.21 then goto N4_5; + else goto T4_7; + +N4_5: + if PUB_TODAY_AVG < 0.13 then goto T4_4; + else goto N4_6; + +T4_4: + response = 0.00368181; + goto D4; + +N4_6: + if ISTITLE_AVG < 0.845 then goto T4_5; + else goto T4_6; + +T4_5: + response = 0.0126785; + goto D4; + +T4_6: + response = 0.00345445; + goto D4; + +T4_7: + response = -0.00136004; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N5_1: + if MAX_SCORE < 271459 then goto N5_2; + else goto N5_5; + +N5_2: + if MAX_SCORE < 238606 then goto T5_1; + else goto N5_3; + +T5_1: + response = -0.00948395; + goto D5; + +N5_3: + if ISABSTRACT_AVG < 0.105 then goto N5_4; + else goto T5_4; + +N5_4: + if DAY_LW_DAY_HITS_RATIO < 2.805 then goto T5_2; + else goto T5_3; + +T5_2: + response = -0.00477034; + goto D5; + +T5_3: + response = 0.00664777; + goto D5; + +T5_4: + response = -0.00676399; + goto D5; + +N5_5: + if ISABSTRACT_AVG < 0.13 then goto N5_6; + else goto T5_7; + +N5_6: + if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T5_5; + else goto T5_6; + +T5_5: + response = 0.00489261; + goto D5; + +T5_6: + response = 0.0122925; + goto D5; + +T5_7: + response = -0.000920098; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N6_1: + if MAX_SCORE < 271407 then goto N6_2; + else goto N6_5; + +N6_2: + if DAY_LW_DAY_HITS_RATIO < 3.485 then goto T6_1; + else goto N6_3; + +T6_1: + response = -0.00827195; + goto D6; + +N6_3: + if NATIONALNEWS < 0.185 then goto T6_2; + else goto N6_4; + +T6_2: + response = -0.00376713; + goto D6; + +N6_4: + if MAX_SCORE < 245976 then goto T6_3; + else goto T6_4; + +T6_3: + response = 0.000352932; + goto D6; + +T6_4: + response = 0.0160415; + goto D6; + +N6_5: + if ISTITLE_AVG < 0.705 then goto N6_6; + else goto T6_7; + +N6_6: + if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T6_5; + else goto T6_6; + +T6_5: + response = 0.00314056; + goto D6; + +T6_6: + response = 0.0112222; + goto D6; + +T6_7: + response = 0.000924328; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N7_1: + if MAX_SCORE < 253367 then goto N7_2; + else goto N7_3; + +N7_2: + if MAX_SCORE < 177746 then goto T7_1; + else goto T7_2; + +T7_1: + response = -0.0118279; + goto D7; + +T7_2: + response = -0.00453188; + goto D7; + +N7_3: + if ISABSTRACT_AVG < 0.105 then goto N7_4; + else goto N7_6; + +N7_4: + if DAY_LW_DAY_HITS_RATIO < 4.25 then goto N7_5; + else goto T7_5; + +N7_5: + if MAX_SCORE < 354461 then goto T7_3; + else goto T7_4; + +T7_3: + response = -0.000720492; + goto D7; + +T7_4: + response = 0.00994136; + goto D7; + +T7_5: + response = 0.0104822; + goto D7; + +N7_6: + if DAY_PREV_DAY_HITS_FRAC < 0.915 then goto T7_6; + else goto T7_7; + +T7_6: + response = -0.00613264; + goto D7; + +T7_7: + response = 0.00119411; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N8_1: + if MAX_SCORE < 270055 then goto N8_2; + else goto N8_4; + +N8_2: + if DAY_LW_DAY_HITS_RATIO < 4.635 then goto N8_3; + else goto T8_3; + +N8_3: + if MAX_SCORE < 221962 then goto T8_1; + else goto T8_2; + +T8_1: + response = -0.00963481; + goto D8; + +T8_2: + response = -0.00428119; + goto D8; + +T8_3: + response = -0.000341413; + goto D8; + +N8_4: + if ISTITLE_AVG < 0.565 then goto N8_5; + else goto T8_7; + +N8_5: + if MAX_SCORE < 354542 then goto N8_6; + else goto T8_6; + +N8_6: + if DAY_HITS < 1.5 then goto T8_4; + else goto T8_5; + +T8_4: + response = -0.00205135; + goto D8; + +T8_5: + response = 0.00751225; + goto D8; + +T8_6: + response = 0.0111239; + goto D8; + +T8_7: + response = 0.00022935; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N9_1: + if MAX_SCORE < 263726 then goto N9_2; + else goto N9_4; + +N9_2: + if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T9_1; + else goto N9_3; + +T9_1: + response = -0.00728388; + goto D9; + +N9_3: + if ISTITLE_AVG < 0.73 then goto T9_2; + else goto T9_3; + +T9_2: + response = 0.00311214; + goto D9; + +T9_3: + response = -0.00320301; + goto D9; + +N9_4: + if ISABSTRACT_AVG < 0.105 then goto N9_5; + else goto T9_7; + +N9_5: + if WEEKAVG < 0.215 then goto T9_4; + else goto N9_6; + +T9_4: + response = -0.00472856; + goto D9; + +N9_6: + if DAY_LW_DAY_HITS_RATIO < 47 then goto T9_5; + else goto T9_6; + +T9_5: + response = 0.00641873; + goto D9; + +T9_6: + response = 0.0215092; + goto D9; + +T9_7: + response = -0.00106176; + goto D9; + +D9: + +tnscore = tnscore + response; + +/* Tree 11 of 97 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N10_1: + if MAX_SCORE < 263734 then goto N10_2; + else goto N10_4; + +N10_2: + if DAY_LW_DAY_HITS_RATIO < 3.635 then goto T10_1; + else goto N10_3; + +T10_1: + response = -0.0061738; + goto D10; + +N10_3: + if ISTITLE_AVG < 0.05 then goto T10_2; + else goto T10_3; + +T10_2: + response = 0.00678624; + goto D10; + +T10_3: + response = -0.0034547; + goto D10; + +N10_4: + if ISABSTRACT_AVG < 0.105 then goto N10_5; + else goto N10_6; + +N10_5: + if LOCALNEWS < 0.105 then goto T10_4; + else goto T10_5; + +T10_4: + response = 0.00405055; + goto D10; + +T10_5: + response = 0.00975544; + goto D10; + +N10_6: + if DAY_PREV_DAY_HITS_FRAC < 0.905 then goto T10_6; + else goto T10_7; + +T10_6: + response = -0.00538249; + goto D10; + +T10_7: + response = 0.00274471; + goto D10; + +D10: + +tnscore = tnscore + response; + +/* Tree 12 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N11_1: + if MAX_SCORE < 252459 then goto T11_1; + else goto N11_2; + +T11_1: + response = -0.00466436; + goto D11; + +N11_2: + if ISABSTRACT_AVG < 0.21 then goto N11_3; + else goto T11_7; + +N11_3: + if SUPERDUPER_AVG < 0.115 then goto N11_4; + else goto N11_5; + +N11_4: + if MAX_SCORE < 254916 then goto T11_2; + else goto T11_3; + +T11_2: + response = 0.0300376; + goto D11; + +T11_3: + response = 0.00749701; + goto D11; + +N11_5: + if PUB_TODAY_AVG < 0.105 then goto N11_6; + else goto T11_6; + +N11_6: + if DAY_PD_HITS_RATIO < 0.085 then goto T11_4; + else goto T11_5; + +T11_4: + response = 0.0121226; + goto D11; + +T11_5: + response = -0.00799009; + goto D11; + +T11_6: + response = 0.00453286; + goto D11; + +T11_7: + response = -0.00140668; + goto D11; + +D11: + +tnscore = tnscore + response; + +/* Tree 13 of 97 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N12_1: + if MAX_SCORE < 264515 then goto N12_2; + else goto N12_4; + +N12_2: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T12_1; + else goto N12_3; + +T12_1: + response = -0.00554549; + goto D12; + +N12_3: + if DAY_HITS < 30.5 then goto T12_2; + else goto T12_3; + +T12_2: + response = -0.0016496; + goto D12; + +T12_3: + response = 0.0125357; + goto D12; + +N12_4: + if ISTITLE_AVG < 0.585 then goto N12_5; + else goto N12_6; + +N12_5: + if WEEKAVG < 0.5 then goto T12_4; + else goto T12_5; + +T12_4: + response = 0.00111467; + goto D12; + +T12_5: + response = 0.0073944; + goto D12; + +N12_6: + if MAX_SCORE < 356177 then goto T12_6; + else goto T12_7; + +T12_6: + response = -0.00212114; + goto D12; + +T12_7: + response = 0.00636485; + goto D12; + +D12: + +tnscore = tnscore + response; + +/* Tree 14 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N13_1: + if DAY_PREV_DAY_HITS_FRAC < 0.825 then goto N13_2; + else goto N13_3; + +N13_2: + if PREV_DAY_HITS < 17.5 then goto T13_1; + else goto T13_2; + +T13_1: + response = -0.00398853; + goto D13; + +T13_2: + response = 0.00429611; + goto D13; + +N13_3: + if ISTITLE_AVG < 0.73 then goto N13_4; + else goto N13_6; + +N13_4: + if MIN_RANK < 9 then goto N13_5; + else goto T13_5; + +N13_5: + if DAY_HITS < 46 then goto T13_3; + else goto T13_4; + +T13_3: + response = 0.00610951; + goto D13; + +T13_4: + response = 0.0271326; + goto D13; + +T13_5: + response = -0.00242626; + goto D13; + +N13_6: + if MAX_SCORE < 374204 then goto T13_6; + else goto T13_7; + +T13_6: + response = -0.00141569; + goto D13; + +T13_7: + response = 0.00890749; + goto D13; + +D13: + +tnscore = tnscore + response; + +/* Tree 15 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N14_1: + if MAX_SCORE < 249898 then goto T14_1; + else goto N14_2; + +T14_1: + response = -0.00399302; + goto D14; + +N14_2: + if BUSINESS < 0.315 then goto N14_3; + else goto T14_7; + +N14_3: + if ISTITLE_AVG < 0.73 then goto N14_4; + else goto T14_6; + +N14_4: + if SUPERDUPER_AVG < 0.105 then goto T14_2; + else goto N14_5; + +T14_2: + response = 0.00904674; + goto D14; + +N14_5: + if WEEKAVG < 5.5 then goto N14_6; + else goto T14_5; + +N14_6: + if PUB_TODAY_AVG < 0.13 then goto T14_3; + else goto T14_4; + +T14_3: + response = -0.00769757; + goto D14; + +T14_4: + response = 0.00217607; + goto D14; + +T14_5: + response = 0.0110208; + goto D14; + +T14_6: + response = -0.000593102; + goto D14; + +T14_7: + response = -0.00318209; + goto D14; + +D14: + +tnscore = tnscore + response; + +/* Tree 16 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N15_1: + if MAX_SCORE < 276408 then goto N15_2; + else goto N15_4; + +N15_2: + if DAY_LW_DAY_HITS_RATIO < 4.535 then goto T15_1; + else goto N15_3; + +T15_1: + response = -0.00344589; + goto D15; + +N15_3: + if LOCALNEWS < 0.53 then goto T15_2; + else goto T15_3; + +T15_2: + response = -6.70599e-05; + goto D15; + +T15_3: + response = 0.0175562; + goto D15; + +N15_4: + if ISABSTRACT_AVG < 0.685 then goto N15_5; + else goto T15_7; + +N15_5: + if DAY_LW_DAY_HITS_RATIO < 33.5 then goto N15_6; + else goto T15_6; + +N15_6: + if LOCALNEWS < 0.115 then goto T15_4; + else goto T15_5; + +T15_4: + response = 0.00202221; + goto D15; + +T15_5: + response = 0.00726641; + goto D15; + +T15_6: + response = 0.0142841; + goto D15; + +T15_7: + response = -0.00307504; + goto D15; + +D15: + +tnscore = tnscore + response; + +/* Tree 17 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N16_1: + if MAX_SCORE < 348857 then goto N16_2; + else goto T16_7; + +N16_2: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T16_1; + else goto N16_3; + +T16_1: + response = -0.00419409; + goto D16; + +N16_3: + if PUB_TODAY_AVG < 0.185 then goto T16_2; + else goto N16_4; + +T16_2: + response = -0.00386261; + goto D16; + +N16_4: + if ISTITLE_AVG < 0.705 then goto N16_5; + else goto T16_6; + +N16_5: + if BUSINESS < 0.21 then goto T16_3; + else goto N16_6; + +T16_3: + response = 0.0063503; + goto D16; + +N16_6: + if DAY_HITS_FRAC < 0.555 then goto T16_4; + else goto T16_5; + +T16_4: + response = -0.0102176; + goto D16; + +T16_5: + response = 0.00356215; + goto D16; + +T16_6: + response = -0.000478923; + goto D16; + +T16_7: + response = 0.00498293; + goto D16; + +D16: + +tnscore = tnscore + response; + +/* Tree 18 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N17_1: + if MAX_SCORE < 286123 then goto N17_2; + else goto T17_7; + +N17_2: + if DAY_PD_HITS_RATIO < 48 then goto N17_3; + else goto T17_6; + +N17_3: + if ISTITLE_AVG < 0.61 then goto N17_4; + else goto T17_5; + +N17_4: + if MAX_RANK < 9 then goto N17_5; + else goto N17_6; + +N17_5: + if MAX_SCORE < 226208 then goto T17_1; + else goto T17_2; + +T17_1: + response = -0.00741311; + goto D17; + +T17_2: + response = 0.0138247; + goto D17; + +N17_6: + if AVG_RANK < 9.635 then goto T17_3; + else goto T17_4; + +T17_3: + response = 0.000360768; + goto D17; + +T17_4: + response = -0.00785446; + goto D17; + +T17_5: + response = -0.00329639; + goto D17; + +T17_6: + response = 0.0225017; + goto D17; + +T17_7: + response = 0.00337188; + goto D17; + +D17: + +tnscore = tnscore + response; + +/* Tree 19 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N18_1: + if DAY_LW_DAY_HITS_RATIO < 7.25 then goto N18_2; + else goto T18_7; + +N18_2: + if BUSINESS < 0.05 then goto N18_3; + else goto N18_5; + +N18_3: + if ISTITLE_AVG < 0.895 then goto N18_4; + else goto T18_3; + +N18_4: + if MIN_RANK < 1 then goto T18_1; + else goto T18_2; + +T18_1: + response = 0.0175483; + goto D18; + +T18_2: + response = 0.00215143; + goto D18; + +T18_3: + response = -0.00158754; + goto D18; + +N18_5: + if DAY_WEEK_AVG_RATIO < 0.325 then goto T18_4; + else goto N18_6; + +T18_4: + response = 0.0165492; + goto D18; + +N18_6: + if MAX_SCORE < 448185 then goto T18_5; + else goto T18_6; + +T18_5: + response = -0.00386364; + goto D18; + +T18_6: + response = 0.0131047; + goto D18; + +T18_7: + response = 0.00394983; + goto D18; + +D18: + +tnscore = tnscore + response; + +/* Tree 20 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N19_1: + if MAX_SCORE < 271407 then goto N19_2; + else goto N19_4; + +N19_2: + if MAX_SCORE < 177474 then goto T19_1; + else goto N19_3; + +T19_1: + response = -0.00525936; + goto D19; + +N19_3: + if SPORTS < 0.645 then goto T19_2; + else goto T19_3; + +T19_2: + response = -0.00170921; + goto D19; + +T19_3: + response = 0.00426429; + goto D19; + +N19_4: + if DAY_PD_HITS_RATIO < 0.085 then goto T19_4; + else goto N19_5; + +T19_4: + response = 0.0151019; + goto D19; + +N19_5: + if BUSINESS < 0.645 then goto N19_6; + else goto T19_7; + +N19_6: + if LW_DAY_HITS < 2.5 then goto T19_5; + else goto T19_6; + +T19_5: + response = 0.00244345; + goto D19; + +T19_6: + response = -0.0099429; + goto D19; + +T19_7: + response = -0.00501617; + goto D19; + +D19: + +tnscore = tnscore + response; + +/* Tree 21 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N20_1: + if DAY_PREV_DAY_HITS_FRAC < 0.945 then goto N20_2; + else goto N20_4; + +N20_2: + if ISTITLE_AVG < 0.95 then goto N20_3; + else goto T20_3; + +N20_3: + if BUSINESS < 0.235 then goto T20_1; + else goto T20_2; + +T20_1: + response = 0.0017119; + goto D20; + +T20_2: + response = -0.00331729; + goto D20; + +T20_3: + response = -0.00374611; + goto D20; + +N20_4: + if WEEKAVG < 0.215 then goto T20_4; + else goto N20_5; + +T20_4: + response = -0.004784; + goto D20; + +N20_5: + if DAY_PD_HITS_RATIO < 0.145 then goto T20_5; + else goto N20_6; + +T20_5: + response = -0.00631232; + goto D20; + +N20_6: + if BUSINESS < 0.685 then goto T20_6; + else goto T20_7; + +T20_6: + response = 0.00413018; + goto D20; + +T20_7: + response = -0.00259307; + goto D20; + +D20: + +tnscore = tnscore + response; + +/* Tree 22 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N21_1: + if PUB_TODAY_AVG < 0.87 then goto N21_2; + else goto N21_6; + +N21_2: + if BUSINESS < 0.235 then goto N21_3; + else goto T21_5; + +N21_3: + if ISTITLE_AVG < 0.39 then goto N21_4; + else goto T21_4; + +N21_4: + if WEEKAVG < 7.855 then goto N21_5; + else goto T21_3; + +N21_5: + if SUPERDUPER_AVG < 0.115 then goto T21_1; + else goto T21_2; + +T21_1: + response = 0.00505563; + goto D21; + +T21_2: + response = -0.000398588; + goto D21; + +T21_3: + response = 0.017327; + goto D21; + +T21_4: + response = -0.00158729; + goto D21; + +T21_5: + response = -0.00349104; + goto D21; + +N21_6: + if WEEKAVG < 0.36 then goto T21_6; + else goto T21_7; + +T21_6: + response = -0.00122032; + goto D21; + +T21_7: + response = 0.00412986; + goto D21; + +D21: + +tnscore = tnscore + response; + +/* Tree 23 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N22_1: + if MAX_SCORE < 235342 then goto T22_1; + else goto N22_2; + +T22_1: + response = -0.00255699; + goto D22; + +N22_2: + if ISABSTRACT_AVG < 0.13 then goto N22_3; + else goto T22_7; + +N22_3: + if WEEKAVG < 0.215 then goto T22_2; + else goto N22_4; + +T22_2: + response = -0.00736871; + goto D22; + +N22_4: + if LOCALNEWS < 0.775 then goto N22_5; + else goto T22_6; + +N22_5: + if SUPERDUPER_AVG < 0.315 then goto T22_3; + else goto N22_6; + +T22_3: + response = 0.00356548; + goto D22; + +N22_6: + if WEEKAVG < 7.07 then goto T22_4; + else goto T22_5; + +T22_4: + response = -0.00254331; + goto D22; + +T22_5: + response = 0.00782112; + goto D22; + +T22_6: + response = 0.0187086; + goto D22; + +T22_7: + response = -0.0015245; + goto D22; + +D22: + +tnscore = tnscore + response; + +/* Tree 24 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N23_1: + if DAY_PREV_DAY_HITS_FRAC < 0.825 then goto T23_1; + else goto N23_2; + +T23_1: + response = -0.0017775; + goto D23; + +N23_2: + if WEEKAVG < 0.36 then goto T23_2; + else goto N23_3; + +T23_2: + response = -0.00236106; + goto D23; + +N23_3: + if MAX_MIN_RANK < 3 then goto N23_4; + else goto T23_7; + +N23_4: + if PUB_TODAY_AVG < 0.27 then goto T23_3; + else goto N23_5; + +T23_3: + response = -0.00108329; + goto D23; + +N23_5: + if DAY_WEEK_AVG_RATIO < 2.615 then goto N23_6; + else goto T23_6; + +N23_6: + if MAX_SCORE < 248412 then goto T23_4; + else goto T23_5; + +T23_4: + response = 0.00662755; + goto D23; + +T23_5: + response = 0.0252786; + goto D23; + +T23_6: + response = 0.00570542; + goto D23; + +T23_7: + response = 0.00183161; + goto D23; + +D23: + +tnscore = tnscore + response; + +/* Tree 25 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N24_1: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N24_2; + else goto N24_4; + +N24_2: + if MAX_SCORE < 453346 then goto N24_3; + else goto T24_3; + +N24_3: + if PREV_DAY_HITS < 15.5 then goto T24_1; + else goto T24_2; + +T24_1: + response = -0.00263045; + goto D24; + +T24_2: + response = 0.00362606; + goto D24; + +T24_3: + response = 0.0113911; + goto D24; + +N24_4: + if DAY_LW_DAY_HITS_RATIO < 47 then goto N24_5; + else goto T24_7; + +N24_5: + if MAX_SCORE < 214610 then goto T24_4; + else goto N24_6; + +T24_4: + response = -0.00305392; + goto D24; + +N24_6: + if PREV_DAY_HITS < 46.5 then goto T24_5; + else goto T24_6; + +T24_5: + response = 0.00171595; + goto D24; + +T24_6: + response = 0.0157708; + goto D24; + +T24_7: + response = 0.0123294; + goto D24; + +D24: + +tnscore = tnscore + response; + +/* Tree 26 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N25_1: + if PUB_TODAY_AVG < 0.815 then goto N25_2; + else goto N25_5; + +N25_2: + if BUSINESS < 0.05 then goto N25_3; + else goto T25_4; + +N25_3: + if PUB_TODAY_AVG < 0.155 then goto T25_1; + else goto N25_4; + +T25_1: + response = -0.00239543; + goto D25; + +N25_4: + if PREV_DAY_HITS < 0.5 then goto T25_2; + else goto T25_3; + +T25_2: + response = -0.00219916; + goto D25; + +T25_3: + response = 0.00267906; + goto D25; + +T25_4: + response = -0.00274426; + goto D25; + +N25_5: + if ISTITLE_AVG < 0.95 then goto N25_6; + else goto T25_7; + +N25_6: + if DAY_PD_HITS_RATIO < 1.445 then goto T25_5; + else goto T25_6; + +T25_5: + response = -0.012251; + goto D25; + +T25_6: + response = 0.00474059; + goto D25; + +T25_7: + response = -0.000650252; + goto D25; + +D25: + +tnscore = tnscore + response; + +/* Tree 27 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N26_1: + if ISABSTRACT_AVG < 0.105 then goto N26_2; + else goto T26_7; + +N26_2: + if MAX_SCORE < 235080 then goto T26_1; + else goto N26_3; + +T26_1: + response = -0.00337944; + goto D26; + +N26_3: + if SUPERDUPER_AVG < 0.105 then goto N26_4; + else goto N26_6; + +N26_4: + if BUSINESS < 0.435 then goto N26_5; + else goto T26_4; + +N26_5: + if MAX_SCORE < 293262 then goto T26_2; + else goto T26_3; + +T26_2: + response = 0.00942708; + goto D26; + +T26_3: + response = 0.00296784; + goto D26; + +T26_4: + response = -0.00165307; + goto D26; + +N26_6: + if MAX_SCORE < 262829 then goto T26_5; + else goto T26_6; + +T26_5: + response = -0.00745914; + goto D26; + +T26_6: + response = 0.0011197; + goto D26; + +T26_7: + response = -0.0017808; + goto D26; + +D26: + +tnscore = tnscore + response; + +/* Tree 28 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N27_1: + if MAX_SCORE < 347080 then goto N27_2; + else goto T27_7; + +N27_2: + if DAY_LW_DAY_HITS_RATIO < 4.31 then goto N27_3; + else goto N27_4; + +N27_3: + if NATIONALNEWS < 0.295 then goto T27_1; + else goto T27_2; + +T27_1: + response = -0.00181733; + goto D27; + +T27_2: + response = 0.00242649; + goto D27; + +N27_4: + if MAX_SCORE < 313528 then goto N27_5; + else goto T27_6; + +N27_5: + if LOCALNEWS < 0.53 then goto N27_6; + else goto T27_5; + +N27_6: + if TOPSTORY < 0.355 then goto T27_3; + else goto T27_4; + +T27_3: + response = 0.00109569; + goto D27; + +T27_4: + response = 0.00947164; + goto D27; + +T27_5: + response = 0.0165664; + goto D27; + +T27_6: + response = -0.00846682; + goto D27; + +T27_7: + response = 0.00293581; + goto D27; + +D27: + +tnscore = tnscore + response; + +/* Tree 29 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N28_1: + if MAX_SCORE < 177806 then goto T28_1; + else goto N28_2; + +T28_1: + response = -0.00360187; + goto D28; + +N28_2: + if TOPSTORY < 0.295 then goto N28_3; + else goto N28_5; + +N28_3: + if LOCALNEWS < 0.765 then goto T28_2; + else goto N28_4; + +T28_2: + response = 4.80638e-06; + goto D28; + +N28_4: + if ISTITLE_AVG < 0.29 then goto T28_3; + else goto T28_4; + +T28_3: + response = 0.0164568; + goto D28; + +T28_4: + response = 0.00112041; + goto D28; + +N28_5: + if INTLNEWS < 0.355 then goto T28_5; + else goto N28_6; + +T28_5: + response = 0.00153933; + goto D28; + +N28_6: + if WEEKAVG < 0.36 then goto T28_6; + else goto T28_7; + +T28_6: + response = -0.00129083; + goto D28; + +T28_7: + response = 0.0150131; + goto D28; + +D28: + +tnscore = tnscore + response; + +/* Tree 30 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N29_1: + if ISTITLE_AVG < 0.73 then goto N29_2; + else goto T29_7; + +N29_2: + if BUSINESS < 0.27 then goto N29_3; + else goto N29_6; + +N29_3: + if MAX_MIN_RANK < 9 then goto N29_4; + else goto T29_4; + +N29_4: + if SUPERDUPER_AVG < 0.315 then goto N29_5; + else goto T29_3; + +N29_5: + if PREV_DAY_HITS < 17.5 then goto T29_1; + else goto T29_2; + +T29_1: + response = 0.00272769; + goto D29; + +T29_2: + response = 0.0136338; + goto D29; + +T29_3: + response = -0.000341266; + goto D29; + +T29_4: + response = 0.0154743; + goto D29; + +N29_6: + if NATIONALNEWS < 0.21 then goto T29_5; + else goto T29_6; + +T29_5: + response = -0.0029607; + goto D29; + +T29_6: + response = 0.0128593; + goto D29; + +T29_7: + response = -0.00131249; + goto D29; + +D29: + +tnscore = tnscore + response; + +/* Tree 31 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N30_1: + if DAY_LW_DAY_HITS_RATIO < 7.585 then goto N30_2; + else goto T30_7; + +N30_2: + if MAX_SCORE < 424137 then goto N30_3; + else goto N30_4; + +N30_3: + if DAY_WEEK_AVG_RATIO < 4.78 then goto T30_1; + else goto T30_2; + +T30_1: + response = -0.000874675; + goto D30; + +T30_2: + response = -0.0111332; + goto D30; + +N30_4: + if ENTERTAINMENT < 0.12 then goto N30_5; + else goto T30_6; + +N30_5: + if DAY_LW_DAY_HITS_RATIO < 2.5 then goto N30_6; + else goto T30_5; + +N30_6: + if DAY_WEEK_AVG_RATIO < 0.74 then goto T30_3; + else goto T30_4; + +T30_3: + response = 0.00976536; + goto D30; + +T30_4: + response = 0.0306272; + goto D30; + +T30_5: + response = 0.00520021; + goto D30; + +T30_6: + response = 8.67293e-05; + goto D30; + +T30_7: + response = 0.00333736; + goto D30; + +D30: + +tnscore = tnscore + response; + +/* Tree 32 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N31_1: + if DAY_PD_HITS_RATIO < 0.085 then goto T31_1; + else goto N31_2; + +T31_1: + response = 0.00972107; + goto D31; + +N31_2: + if SPORTS < 0.845 then goto N31_3; + else goto T31_7; + +N31_3: + if PUB_TODAY_AVG < 0.95 then goto N31_4; + else goto N31_5; + +N31_4: + if PREV_DAY_HITS < 0.5 then goto T31_2; + else goto T31_3; + +T31_2: + response = -0.00391231; + goto D31; + +T31_3: + response = -0.000254135; + goto D31; + +N31_5: + if MAX_MIN_RANK < 7 then goto N31_6; + else goto T31_6; + +N31_6: + if DAY_LW_DAY_HITS_RATIO < 19.5 then goto T31_4; + else goto T31_5; + +T31_4: + response = -0.000175771; + goto D31; + +T31_5: + response = 0.00523989; + goto D31; + +T31_6: + response = 0.0109531; + goto D31; + +T31_7: + response = 0.00548354; + goto D31; + +D31: + +tnscore = tnscore + response; + +/* Tree 33 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N32_1: + if MAX_SCORE < 466894 then goto N32_2; + else goto T32_7; + +N32_2: + if NATIONALNEWS < 0.21 then goto N32_3; + else goto N32_6; + +N32_3: + if DAY_PD_HITS_RATIO < 0.055 then goto T32_1; + else goto N32_4; + +T32_1: + response = 0.0159556; + goto D32; + +N32_4: + if REGIONALNEWS < 0.05 then goto T32_2; + else goto N32_5; + +T32_2: + response = -0.00112302; + goto D32; + +N32_5: + if DAY_PD_HITS_RATIO < 1.105 then goto T32_3; + else goto T32_4; + +T32_3: + response = 0.0140125; + goto D32; + +T32_4: + response = -0.000724566; + goto D32; + +N32_6: + if AVG_RANK < 8.1 then goto T32_5; + else goto T32_6; + +T32_5: + response = -0.000273744; + goto D32; + +T32_6: + response = 0.00546871; + goto D32; + +T32_7: + response = 0.00555251; + goto D32; + +D32: + +tnscore = tnscore + response; + +/* Tree 34 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N33_1: + if MAX_SCORE < 286123 then goto T33_1; + else goto N33_2; + +T33_1: + response = -0.000823047; + goto D33; + +N33_2: + if DAY_PREV_DAY_HITS_FRAC < 0.435 then goto N33_3; + else goto N33_4; + +N33_3: + if DAY_WEEK_AVG_RATIO < 1.555 then goto T33_2; + else goto T33_3; + +T33_2: + response = 0.0222361; + goto D33; + +T33_3: + response = 0.000189447; + goto D33; + +N33_4: + if MAX_MIN_RANK < 7 then goto N33_5; + else goto N33_6; + +N33_5: + if DAY_PREV_DAY_HITS_FRAC < 0.755 then goto T33_4; + else goto T33_5; + +T33_4: + response = -0.00264164; + goto D33; + +T33_5: + response = 0.00200226; + goto D33; + +N33_6: + if WEEKAVG < 1.07 then goto T33_6; + else goto T33_7; + +T33_6: + response = 0.0151659; + goto D33; + +T33_7: + response = 0.00171852; + goto D33; + +D33: + +tnscore = tnscore + response; + +/* Tree 35 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N34_1: + if BUSINESS < 0.05 then goto N34_2; + else goto T34_7; + +N34_2: + if SUPERDUPER_AVG < 0.115 then goto N34_3; + else goto N34_4; + +N34_3: + if ISTITLE_AVG < 0.895 then goto T34_1; + else goto T34_2; + +T34_1: + response = 0.00536839; + goto D34; + +T34_2: + response = 7.53571e-05; + goto D34; + +N34_4: + if AVG_RANK < 8.21 then goto N34_5; + else goto T34_6; + +N34_5: + if PUB_TODAY_AVG < 0.13 then goto T34_3; + else goto N34_6; + +T34_3: + response = -0.00865216; + goto D34; + +N34_6: + if DAY_HITS_FRAC < 0.115 then goto T34_4; + else goto T34_5; + +T34_4: + response = 0.0146316; + goto D34; + +T34_5: + response = -0.00249; + goto D34; + +T34_6: + response = 0.00159523; + goto D34; + +T34_7: + response = -0.00131884; + goto D34; + +D34: + +tnscore = tnscore + response; + +/* Tree 36 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N35_1: + if DAY_LW_DAY_HITS_RATIO < 33.5 then goto N35_2; + else goto T35_7; + +N35_2: + if LIFESTYLE < 0.05 then goto N35_3; + else goto T35_6; + +N35_3: + if DAY_PD_HITS_RATIO < 0.065 then goto T35_1; + else goto N35_4; + +T35_1: + response = 0.0130228; + goto D35; + +N35_4: + if SPORTS < 0.39 then goto N35_5; + else goto N35_6; + +N35_5: + if NATIONALNEWS < 0.05 then goto T35_2; + else goto T35_3; + +T35_2: + response = -0.00128992; + goto D35; + +T35_3: + response = 0.00127302; + goto D35; + +N35_6: + if DAY_PD_HITS_RATIO < 13.5 then goto T35_4; + else goto T35_5; + +T35_4: + response = 0.00304904; + goto D35; + +T35_5: + response = -0.0168329; + goto D35; + +T35_6: + response = -0.00545277; + goto D35; + +T35_7: + response = 0.00512552; + goto D35; + +D35: + +tnscore = tnscore + response; + +/* Tree 37 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N36_1: + if DAY_HITS_FRAC < 0.765 then goto T36_1; + else goto N36_2; + +T36_1: + response = -0.000527346; + goto D36; + +N36_2: + if PUB_TODAY_AVG < 0.355 then goto T36_2; + else goto N36_3; + +T36_2: + response = -0.0153305; + goto D36; + +N36_3: + if DAY_HITS < 46.5 then goto N36_4; + else goto T36_7; + +N36_4: + if DAY_PD_HITS_RATIO < 29.5 then goto N36_5; + else goto T36_6; + +N36_5: + if NATIONALNEWS < 0.105 then goto T36_3; + else goto N36_6; + +T36_3: + response = 0.00073747; + goto D36; + +N36_6: + if DAY_WEEK_AVG_RATIO < 8.47 then goto T36_4; + else goto T36_5; + +T36_4: + response = 0.00769293; + goto D36; + +T36_5: + response = -0.0125825; + goto D36; + +T36_6: + response = -0.0108761; + goto D36; + +T36_7: + response = 0.00977691; + goto D36; + +D36: + +tnscore = tnscore + response; + +/* Tree 38 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N37_1: + if MAX_SCORE < 177732 then goto T37_1; + else goto N37_2; + +T37_1: + response = -0.00260643; + goto D37; + +N37_2: + if BUSINESS < 0.05 then goto N37_3; + else goto T37_7; + +N37_3: + if WEEKAVG < 0.215 then goto T37_2; + else goto N37_4; + +T37_2: + response = -0.00327106; + goto D37; + +N37_4: + if AVG_RANK < 8.635 then goto N37_5; + else goto T37_6; + +N37_5: + if SUPERDUPER_AVG < 0.235 then goto N37_6; + else goto T37_5; + +N37_6: + if ISABSTRACT_AVG < 0.415 then goto T37_3; + else goto T37_4; + +T37_3: + response = 0.00414333; + goto D37; + +T37_4: + response = -0.00152725; + goto D37; + +T37_5: + response = -0.00286672; + goto D37; + +T37_6: + response = 0.00429432; + goto D37; + +T37_7: + response = -0.000407557; + goto D37; + +D37: + +tnscore = tnscore + response; + +/* Tree 39 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N38_1: + if WEEKAVG < 0.64 then goto N38_2; + else goto N38_3; + +N38_2: + if SUPERDUPER_AVG < 0.29 then goto T38_1; + else goto T38_2; + +T38_1: + response = -0.00013784; + goto D38; + +T38_2: + response = -0.00368109; + goto D38; + +N38_3: + if MAX_SCORE < 271407 then goto N38_4; + else goto T38_7; + +N38_4: + if MAX_MIN_RANK < 5 then goto T38_3; + else goto N38_5; + +T38_3: + response = 0.000985637; + goto D38; + +N38_5: + if SUPERDUPER_AVG < 0.115 then goto N38_6; + else goto T38_6; + +N38_6: + if DAY_LW_DAY_HITS_RATIO < 4.415 then goto T38_4; + else goto T38_5; + +T38_4: + response = -0.00258674; + goto D38; + +T38_5: + response = 0.00694569; + goto D38; + +T38_6: + response = -0.00593057; + goto D38; + +T38_7: + response = 0.00237623; + goto D38; + +D38: + +tnscore = tnscore + response; + +/* Tree 40 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N39_1: + if MAX_SCORE < 177732 then goto T39_1; + else goto N39_2; + +T39_1: + response = -0.00248172; + goto D39; + +N39_2: + if LIFESTYLE < 0.13 then goto N39_3; + else goto T39_7; + +N39_3: + if PUB_TODAY_AVG < 0.105 then goto N39_4; + else goto N39_5; + +N39_4: + if DAY_HITS < 3.5 then goto T39_2; + else goto T39_3; + +T39_2: + response = -0.00072429; + goto D39; + +T39_3: + response = -0.0150678; + goto D39; + +N39_5: + if DAY_HITS_FRAC < 0.075 then goto T39_4; + else goto N39_6; + +T39_4: + response = 0.0156611; + goto D39; + +N39_6: + if BUSINESS < 0.05 then goto T39_5; + else goto T39_6; + +T39_5: + response = 0.00219968; + goto D39; + +T39_6: + response = -0.000365826; + goto D39; + +T39_7: + response = -0.00592673; + goto D39; + +D39: + +tnscore = tnscore + response; + +/* Tree 41 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N40_1: + if WEEKAVG < 0.215 then goto T40_1; + else goto N40_2; + +T40_1: + response = -0.0047613; + goto D40; + +N40_2: + if SPORTS < 0.355 then goto N40_3; + else goto N40_6; + +N40_3: + if MIN_RANK < 5 then goto N40_4; + else goto T40_5; + +N40_4: + if MAX_SCORE < 467877 then goto T40_2; + else goto N40_5; + +T40_2: + response = -0.0025312; + goto D40; + +N40_5: + if MAX_SCORE < 576366 then goto T40_3; + else goto T40_4; + +T40_3: + response = 0.0134173; + goto D40; + +T40_4: + response = -0.00903108; + goto D40; + +T40_5: + response = 0.00030441; + goto D40; + +N40_6: + if WEEKAVG < 5.07 then goto T40_6; + else goto T40_7; + +T40_6: + response = 0.00170865; + goto D40; + +T40_7: + response = 0.0116233; + goto D40; + +D40: + +tnscore = tnscore + response; + +/* Tree 42 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N41_1: + if WEEKAVG < 0.215 then goto T41_1; + else goto N41_2; + +T41_1: + response = -0.00445856; + goto D41; + +N41_2: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N41_3; + else goto N41_4; + +N41_3: + if MAX_SCORE < 459781 then goto T41_2; + else goto T41_3; + +T41_2: + response = -0.00110273; + goto D41; + +T41_3: + response = 0.01224; + goto D41; + +N41_4: + if SPORTS < 0.87 then goto N41_5; + else goto T41_7; + +N41_5: + if NATIONALNEWS < 0.05 then goto T41_4; + else goto N41_6; + +T41_4: + response = 0.000176374; + goto D41; + +N41_6: + if BUSINESS < 0.185 then goto T41_5; + else goto T41_6; + +T41_5: + response = 0.00101462; + goto D41; + +T41_6: + response = 0.0103262; + goto D41; + +T41_7: + response = 0.00758848; + goto D41; + +D41: + +tnscore = tnscore + response; + +/* Tree 43 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N42_1: + if MAX_SCORE < 588664 then goto N42_2; + else goto T42_7; + +N42_2: + if MAX_SCORE < 453568 then goto N42_3; + else goto N42_5; + +N42_3: + if PREV_DAY_HITS < 26.5 then goto T42_1; + else goto N42_4; + +T42_1: + response = -7.786e-05; + goto D42; + +N42_4: + if WEEKAVG < 9.215 then goto T42_2; + else goto T42_3; + +T42_2: + response = -0.012221; + goto D42; + +T42_3: + response = -0.00126183; + goto D42; + +N42_5: + if DAY_PREV_DAY_HITS_FRAC < 0.555 then goto T42_4; + else goto N42_6; + +T42_4: + response = 0.0175351; + goto D42; + +N42_6: + if AVG_RANK < 9.7 then goto T42_5; + else goto T42_6; + +T42_5: + response = 7.47189e-05; + goto D42; + +T42_6: + response = 0.0152525; + goto D42; + +T42_7: + response = -0.0113374; + goto D42; + +D42: + +tnscore = tnscore + response; + +/* Tree 44 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N43_1: + if TOPSTORY < 0.295 then goto T43_1; + else goto N43_2; + +T43_1: + response = -0.000312071; + goto D43; + +N43_2: + if MAX_MIN_RANK < 7 then goto N43_3; + else goto T43_7; + +N43_3: + if ISTITLE_AVG < 0.185 then goto N43_4; + else goto N43_6; + +N43_4: + if MAX_SCORE < 378124 then goto T43_2; + else goto N43_5; + +T43_2: + response = 0.00111897; + goto D43; + +N43_5: + if MAX_SCORE < 408027 then goto T43_3; + else goto T43_4; + +T43_3: + response = -0.0203516; + goto D43; + +T43_4: + response = 0.0012991; + goto D43; + +N43_6: + if INTLNEWS < 0.13 then goto T43_5; + else goto T43_6; + +T43_5: + response = 0.000774937; + goto D43; + +T43_6: + response = 0.00732047; + goto D43; + +T43_7: + response = 0.0117253; + goto D43; + +D43: + +tnscore = tnscore + response; + +/* Tree 45 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N44_1: + if MAX_SCORE < 178085 then goto T44_1; + else goto N44_2; + +T44_1: + response = -0.00220705; + goto D44; + +N44_2: + if WEEKAVG < 6.64 then goto N44_3; + else goto T44_7; + +N44_3: + if SUPERDUPER_AVG < 0.105 then goto N44_4; + else goto N44_5; + +N44_4: + if DAY_PD_HITS_RATIO < 0.115 then goto T44_2; + else goto T44_3; + +T44_2: + response = 0.0149532; + goto D44; + +T44_3: + response = 0.00106296; + goto D44; + +N44_5: + if AVG_RANK < 7.73 then goto N44_6; + else goto T44_6; + +N44_6: + if PUB_TODAY_AVG < 0.13 then goto T44_4; + else goto T44_5; + +T44_4: + response = -0.0104993; + goto D44; + +T44_5: + response = -0.00177497; + goto D44; + +T44_6: + response = 2.5899e-07; + goto D44; + +T44_7: + response = 0.00418893; + goto D44; + +D44: + +tnscore = tnscore + response; + +/* Tree 46 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N45_1: + if ISTITLE_AVG < 0.585 then goto N45_2; + else goto T45_7; + +N45_2: + if AVG_RANK < 8.47 then goto T45_1; + else goto N45_3; + +T45_1: + response = 0.00253086; + goto D45; + +N45_3: + if DAY_HITS_FRAC < 0.885 then goto N45_4; + else goto N45_5; + +N45_4: + if LOCALNEWS < 0.13 then goto T45_2; + else goto T45_3; + +T45_2: + response = -0.000894801; + goto D45; + +T45_3: + response = -0.00988189; + goto D45; + +N45_5: + if NATIONALNEWS < 0.11 then goto N45_6; + else goto T45_6; + +N45_6: + if MAX_SCORE < 282066 then goto T45_4; + else goto T45_5; + +T45_4: + response = 0.00689219; + goto D45; + +T45_5: + response = -0.00300841; + goto D45; + +T45_6: + response = 0.0150141; + goto D45; + +T45_7: + response = -0.00102603; + goto D45; + +D45: + +tnscore = tnscore + response; + +/* Tree 47 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N46_1: + if DAY_PREV_DAY_HITS_FRAC < 0.985 then goto T46_1; + else goto N46_2; + +T46_1: + response = -0.000843826; + goto D46; + +N46_2: + if MIN_RANK < 9 then goto N46_3; + else goto T46_7; + +N46_3: + if AVG_RANK < 8.71 then goto T46_2; + else goto N46_4; + +T46_2: + response = 0.000451436; + goto D46; + +N46_4: + if SUPERDUPER_AVG < 0.27 then goto N46_5; + else goto N46_6; + +N46_5: + if DAY_WEEK_AVG_RATIO < 5.05 then goto T46_3; + else goto T46_4; + +T46_3: + response = 0.00639888; + goto D46; + +T46_4: + response = 0.020614; + goto D46; + +N46_6: + if NATIONALNEWS < 0.185 then goto T46_5; + else goto T46_6; + +T46_5: + response = -0.00149465; + goto D46; + +T46_6: + response = 0.0118779; + goto D46; + +T46_7: + response = -0.00241922; + goto D46; + +D46: + +tnscore = tnscore + response; + +/* Tree 48 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N47_1: + if HEALTH < 0.105 then goto N47_2; + else goto T47_7; + +N47_2: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N47_3; + else goto N47_5; + +N47_3: + if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N47_4; + else goto T47_3; + +N47_4: + if INTLNEWS < 0.315 then goto T47_1; + else goto T47_2; + +T47_1: + response = 0.00140618; + goto D47; + +T47_2: + response = 0.0145332; + goto D47; + +T47_3: + response = -0.00130877; + goto D47; + +N47_5: + if WEEKAVG < 0.5 then goto T47_4; + else goto N47_6; + +T47_4: + response = -0.000696011; + goto D47; + +N47_6: + if MAX_MIN_RANK < 5 then goto T47_5; + else goto T47_6; + +T47_5: + response = 0.0028215; + goto D47; + +T47_6: + response = -0.00090855; + goto D47; + +T47_7: + response = -0.0059993; + goto D47; + +D47: + +tnscore = tnscore + response; + +/* Tree 49 of 97 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N48_1: + if SPORTS < 0.585 then goto N48_2; + else goto N48_4; + +N48_2: + if TOPSTORY < 0.295 then goto T48_1; + else goto N48_3; + +T48_1: + response = -0.000394764; + goto D48; + +N48_3: + if ENTERTAINMENT < 0.05 then goto T48_2; + else goto T48_3; + +T48_2: + response = 0.00143724; + goto D48; + +T48_3: + response = 0.00930005; + goto D48; + +N48_4: + if AVG_RANK < 5.55 then goto N48_5; + else goto N48_6; + +N48_5: + if DAY_WEEK_AVG_RATIO < 1.955 then goto T48_4; + else goto T48_5; + +T48_4: + response = 0.00377635; + goto D48; + +T48_5: + response = 0.0210534; + goto D48; + +N48_6: + if MAX_SCORE < 389202 then goto T48_6; + else goto T48_7; + +T48_6: + response = 0.00246072; + goto D48; + +T48_7: + response = -0.012129; + goto D48; + +D48: + +tnscore = tnscore + response; + +/* Tree 50 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N49_1: + if MAX_SCORE < 406793 then goto N49_2; + else goto N49_6; + +N49_2: + if DAY_PD_HITS_RATIO < 0.075 then goto T49_1; + else goto N49_3; + +T49_1: + response = 0.0102381; + goto D49; + +N49_3: + if PUB_TODAY_AVG < 0.295 then goto T49_2; + else goto N49_4; + +T49_2: + response = -0.00209613; + goto D49; + +N49_4: + if MAX_SCORE < 305867 then goto T49_3; + else goto N49_5; + +T49_3: + response = 0.000938554; + goto D49; + +N49_5: + if MAX_SCORE < 347812 then goto T49_4; + else goto T49_5; + +T49_4: + response = -0.00625349; + goto D49; + +T49_5: + response = -3.2361e-05; + goto D49; + +N49_6: + if PREV_DAY_HITS < 17.5 then goto T49_6; + else goto T49_7; + +T49_6: + response = 0.00426042; + goto D49; + +T49_7: + response = -0.0139803; + goto D49; + +D49: + +tnscore = tnscore + response; + +/* Tree 51 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N50_1: + if MAX_SCORE < 187757 then goto T50_1; + else goto N50_2; + +T50_1: + response = -0.00190196; + goto D50; + +N50_2: + if NATIONALNEWS < 0.185 then goto T50_2; + else goto N50_3; + +T50_2: + response = 1.08423e-05; + goto D50; + +N50_3: + if PREV_DAY_HITS < 4.5 then goto N50_4; + else goto N50_5; + +N50_4: + if ISTITLE_AVG < 0.585 then goto T50_3; + else goto T50_4; + +T50_3: + response = 0.00687766; + goto D50; + +T50_4: + response = 0.00142303; + goto D50; + +N50_5: + if SUPERDUPER_AVG < 0.275 then goto T50_5; + else goto N50_6; + +T50_5: + response = -0.00581088; + goto D50; + +N50_6: + if INTLNEWS < 0.315 then goto T50_6; + else goto T50_7; + +T50_6: + response = 0.0130163; + goto D50; + +T50_7: + response = -0.00562813; + goto D50; + +D50: + +tnscore = tnscore + response; + +/* Tree 52 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N51_1: + if MAX_SCORE < 423724 then goto N51_2; + else goto N51_3; + +N51_2: + if MAX_SCORE < 408911 then goto T51_1; + else goto T51_2; + +T51_1: + response = -0.000303869; + goto D51; + +T51_2: + response = -0.00754368; + goto D51; + +N51_3: + if MAX_SCORE < 435668 then goto T51_3; + else goto N51_4; + +T51_3: + response = 0.0194021; + goto D51; + +N51_4: + if DAY_HITS < 5.5 then goto N51_5; + else goto N51_6; + +N51_5: + if AVG_RANK < 9.265 then goto T51_4; + else goto T51_5; + +T51_4: + response = 0.00209562; + goto D51; + +T51_5: + response = 0.0171146; + goto D51; + +N51_6: + if MAX_SCORE < 466889 then goto T51_6; + else goto T51_7; + +T51_6: + response = -0.0147582; + goto D51; + +T51_7: + response = 0.00191369; + goto D51; + +D51: + +tnscore = tnscore + response; + +/* Tree 53 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N52_1: + if PREV_DAY_HITS < 26.5 then goto N52_2; + else goto N52_4; + +N52_2: + if PREV_DAY_HITS < 19.5 then goto T52_1; + else goto N52_3; + +T52_1: + response = 4.07731e-06; + goto D52; + +N52_3: + if ISTITLE_AVG < 0.7 then goto T52_2; + else goto T52_3; + +T52_2: + response = 0.0180989; + goto D52; + +T52_3: + response = 0.0014322; + goto D52; + +N52_4: + if MAX_SCORE < 378124 then goto N52_5; + else goto T52_7; + +N52_5: + if INTLNEWS < 0.25 then goto T52_4; + else goto N52_6; + +T52_4: + response = -0.00926901; + goto D52; + +N52_6: + if ISTITLE_AVG < 0.15 then goto T52_5; + else goto T52_6; + +T52_5: + response = 0.00951019; + goto D52; + +T52_6: + response = -0.00389496; + goto D52; + +T52_7: + response = -0.0168153; + goto D52; + +D52: + +tnscore = tnscore + response; + +/* Tree 54 of 97 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N53_1: + if ISABSTRACT_AVG < 0.815 then goto N53_2; + else goto N53_5; + +N53_2: + if PUB_TODAY_AVG < 0.05 then goto N53_3; + else goto N53_4; + +N53_3: + if PREV_DAY_HITS < 16.5 then goto T53_1; + else goto T53_2; + +T53_1: + response = -0.00256108; + goto D53; + +T53_2: + response = 0.010687; + goto D53; + +N53_4: + if BUSINESS < 0.05 then goto T53_3; + else goto T53_4; + +T53_3: + response = 0.00107951; + goto D53; + +T53_4: + response = -0.00114831; + goto D53; + +N53_5: + if AVG_RANK < 8.31 then goto T53_5; + else goto N53_6; + +T53_5: + response = -0.000490289; + goto D53; + +N53_6: + if SPORTS < 0.315 then goto T53_6; + else goto T53_7; + +T53_6: + response = 0.00273855; + goto D53; + +T53_7: + response = 0.0123011; + goto D53; + +D53: + +tnscore = tnscore + response; + +/* Tree 55 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N54_1: + if SUPERDUPER_AVG < 0.115 then goto N54_2; + else goto N54_3; + +N54_2: + if DAY_PD_HITS_RATIO < 0.115 then goto T54_1; + else goto T54_2; + +T54_1: + response = 0.0119548; + goto D54; + +T54_2: + response = 0.000425021; + goto D54; + +N54_3: + if INTLNEWS < 0.155 then goto N54_4; + else goto N54_6; + +N54_4: + if ISTITLE_AVG < 0.185 then goto N54_5; + else goto T54_5; + +N54_5: + if INTLNEWS < 0.05 then goto T54_3; + else goto T54_4; + +T54_3: + response = -0.00395117; + goto D54; + +T54_4: + response = -0.0145832; + goto D54; + +T54_5: + response = -0.00135759; + goto D54; + +N54_6: + if TOPSTORY < 0.295 then goto T54_6; + else goto T54_7; + +T54_6: + response = -0.00119962; + goto D54; + +T54_7: + response = 0.00380053; + goto D54; + +D54: + +tnscore = tnscore + response; + +/* Tree 56 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N55_1: + if MAX_SCORE < 187608 then goto T55_1; + else goto N55_2; + +T55_1: + response = -0.00129909; + goto D55; + +N55_2: + if DAY_WEEK_AVG_RATIO < 10.06 then goto N55_3; + else goto T55_7; + +N55_3: + if DAY_WEEK_AVG_RATIO < 9.235 then goto N55_4; + else goto T55_6; + +N55_4: + if DAY_PD_HITS_RATIO < 0.055 then goto T55_2; + else goto N55_5; + +T55_2: + response = 0.0114518; + goto D55; + +N55_5: + if PREV_DAY_HITS < 26.5 then goto T55_3; + else goto N55_6; + +T55_3: + response = 0.000965212; + goto D55; + +N55_6: + if LOCALNEWS < 0.05 then goto T55_4; + else goto T55_5; + +T55_4: + response = -0.00805593; + goto D55; + +T55_5: + response = 0.00585007; + goto D55; + +T55_6: + response = -0.0101744; + goto D55; + +T55_7: + response = 0.010206; + goto D55; + +D55: + +tnscore = tnscore + response; + +/* Tree 57 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N56_1: + if DAY_LW_DAY_HITS_RATIO < 4.71 then goto N56_2; + else goto N56_3; + +N56_2: + if SUPERDUPER_AVG < 0.315 then goto T56_1; + else goto T56_2; + +T56_1: + response = -0.000236511; + goto D56; + +T56_2: + response = -0.00312389; + goto D56; + +N56_3: + if DAY_PD_HITS_RATIO < 13.5 then goto N56_4; + else goto N56_5; + +N56_4: + if MAX_SCORE < 253372 then goto T56_3; + else goto T56_4; + +T56_3: + response = -0.00118965; + goto D56; + +T56_4: + response = 0.00291415; + goto D56; + +N56_5: + if ENTERTAINMENT < 0.05 then goto N56_6; + else goto T56_7; + +N56_6: + if SPORTS < 0.315 then goto T56_5; + else goto T56_6; + +T56_5: + response = -0.00292663; + goto D56; + +T56_6: + response = -0.0194296; + goto D56; + +T56_7: + response = 0.00658386; + goto D56; + +D56: + +tnscore = tnscore + response; + +/* Tree 58 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N57_1: + if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N57_2; + else goto N57_3; + +N57_2: + if WEEKAVG < 0.5 then goto T57_1; + else goto T57_2; + +T57_1: + response = 0.0176531; + goto D57; + +T57_2: + response = 0.000911096; + goto D57; + +N57_3: + if SPORTS < 0.39 then goto T57_3; + else goto N57_4; + +T57_3: + response = -0.000682618; + goto D57; + +N57_4: + if DAY_PD_HITS_RATIO < 11.5 then goto N57_5; + else goto T57_7; + +N57_5: + if DAY_LW_DAY_HITS_RATIO < 4.75 then goto T57_4; + else goto N57_6; + +T57_4: + response = 0.000516855; + goto D57; + +N57_6: + if DAY_HITS_FRAC < 0.41 then goto T57_5; + else goto T57_6; + +T57_5: + response = 0.0159972; + goto D57; + +T57_6: + response = 0.00324363; + goto D57; + +T57_7: + response = -0.0108331; + goto D57; + +D57: + +tnscore = tnscore + response; + +/* Tree 59 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N58_1: + if WEEKAVG < 0.36 then goto T58_1; + else goto N58_2; + +T58_1: + response = -0.00257521; + goto D58; + +N58_2: + if TOPSTORY < 0.635 then goto N58_3; + else goto T58_7; + +N58_3: + if PUB_TODAY_AVG < 0.79 then goto N58_4; + else goto T58_6; + +N58_4: + if DAY_PD_HITS_RATIO < 2.185 then goto N58_5; + else goto N58_6; + +N58_5: + if DAY_PREV_DAY_HITS_FRAC < 0.415 then goto T58_2; + else goto T58_3; + +T58_2: + response = 0.00607155; + goto D58; + +T58_3: + response = -0.000126015; + goto D58; + +N58_6: + if ISTITLE_AVG < 0.13 then goto T58_4; + else goto T58_5; + +T58_4: + response = -0.00728662; + goto D58; + +T58_5: + response = -0.000928754; + goto D58; + +T58_6: + response = 0.00147343; + goto D58; + +T58_7: + response = 0.014873; + goto D58; + +D58: + +tnscore = tnscore + response; + +/* Tree 60 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N59_1: + if TOPSTORY < 0.185 then goto T59_1; + else goto N59_2; + +T59_1: + response = -0.000297667; + goto D59; + +N59_2: + if DAY_LW_DAY_HITS_RATIO < 6.3 then goto N59_3; + else goto T59_7; + +N59_3: + if PREV_DAY_HITS < 19.5 then goto N59_4; + else goto N59_6; + +N59_4: + if DAY_PD_HITS_RATIO < 0.13 then goto T59_2; + else goto N59_5; + +T59_2: + response = -0.00702476; + goto D59; + +N59_5: + if LOCALNEWS < 0.05 then goto T59_3; + else goto T59_4; + +T59_3: + response = 0.00592136; + goto D59; + +T59_4: + response = -0.000783801; + goto D59; + +N59_6: + if ISABSTRACT_AVG < 0.15 then goto T59_5; + else goto T59_6; + +T59_5: + response = 0.023326; + goto D59; + +T59_6: + response = 0.000803551; + goto D59; + +T59_7: + response = -0.00103664; + goto D59; + +D59: + +tnscore = tnscore + response; + +/* Tree 61 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N60_1: + if WEEKAVG < 0.215 then goto T60_1; + else goto N60_2; + +T60_1: + response = -0.00379646; + goto D60; + +N60_2: + if MAX_MIN_RANK < 3 then goto N60_3; + else goto T60_7; + +N60_3: + if DAY_HITS < 1.5 then goto T60_2; + else goto N60_4; + +T60_2: + response = -0.00199037; + goto D60; + +N60_4: + if DAY_PD_HITS_RATIO < 1.125 then goto N60_5; + else goto T60_6; + +N60_5: + if DAY_LW_DAY_HITS_RATIO < 4.375 then goto T60_3; + else goto N60_6; + +T60_3: + response = 0.00535447; + goto D60; + +N60_6: + if DAY_PD_HITS_RATIO < 0.825 then goto T60_4; + else goto T60_5; + +T60_4: + response = 0.00562457; + goto D60; + +T60_5: + response = 0.0330072; + goto D60; + +T60_6: + response = 0.00138881; + goto D60; + +T60_7: + response = -0.000758841; + goto D60; + +D60: + +tnscore = tnscore + response; + +/* Tree 62 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N61_1: + if DAY_HITS_FRAC < 0.435 then goto N61_2; + else goto N61_6; + +N61_2: + if NATIONALNEWS < 0.685 then goto N61_3; + else goto T61_5; + +N61_3: + if INTLNEWS < 0.47 then goto T61_1; + else goto N61_4; + +T61_1: + response = -0.000235511; + goto D61; + +N61_4: + if MAX_SCORE < 290762 then goto T61_2; + else goto N61_5; + +T61_2: + response = 0.000619978; + goto D61; + +N61_5: + if SUPERDUPER_AVG < 0.155 then goto T61_3; + else goto T61_4; + +T61_3: + response = 0.0182407; + goto D61; + +T61_4: + response = 0.00521312; + goto D61; + +T61_5: + response = 0.0140779; + goto D61; + +N61_6: + if MAX_SCORE < 484643 then goto T61_6; + else goto T61_7; + +T61_6: + response = -0.000518234; + goto D61; + +T61_7: + response = -0.00804112; + goto D61; + +D61: + +tnscore = tnscore + response; + +/* Tree 63 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N62_1: + if DAY_PD_HITS_RATIO < 0.055 then goto T62_1; + else goto N62_2; + +T62_1: + response = 0.0111333; + goto D62; + +N62_2: + if DAY_LW_DAY_HITS_RATIO < 0.355 then goto T62_2; + else goto N62_3; + +T62_2: + response = -0.00829529; + goto D62; + +N62_3: + if PUB_TODAY_AVG < 0.95 then goto N62_4; + else goto N62_6; + +N62_4: + if DAY_PD_HITS_RATIO < 1.74 then goto T62_3; + else goto N62_5; + +T62_3: + response = 0.000529497; + goto D62; + +N62_5: + if NATIONALNEWS < 0.415 then goto T62_4; + else goto T62_5; + +T62_4: + response = -0.00200727; + goto D62; + +T62_5: + response = 0.0081622; + goto D62; + +N62_6: + if INTLNEWS < 0.47 then goto T62_6; + else goto T62_7; + +T62_6: + response = 0.00260098; + goto D62; + +T62_7: + response = -0.001284; + goto D62; + +D62: + +tnscore = tnscore + response; + +/* Tree 64 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N63_1: + if MAX_MIN_RANK < 7 then goto N63_2; + else goto N63_4; + +N63_2: + if MAX_MIN_RANK < 5 then goto T63_1; + else goto N63_3; + +T63_1: + response = 0.000381058; + goto D63; + +N63_3: + if SUPERDUPER_AVG < 0.13 then goto T63_2; + else goto T63_3; + +T63_2: + response = 0.000329065; + goto D63; + +T63_3: + response = -0.00386397; + goto D63; + +N63_4: + if MAX_SCORE < 266105 then goto T63_4; + else goto N63_5; + +T63_4: + response = -0.000580382; + goto D63; + +N63_5: + if MAX_SCORE < 322321 then goto N63_6; + else goto T63_7; + +N63_6: + if DAY_WEEK_AVG_RATIO < 5 then goto T63_5; + else goto T63_6; + +T63_5: + response = 0.0158417; + goto D63; + +T63_6: + response = 0.000253264; + goto D63; + +T63_7: + response = 0.00216101; + goto D63; + +D63: + +tnscore = tnscore + response; + +/* Tree 65 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N64_1: + if WEEKAVG < 0.215 then goto N64_2; + else goto N64_4; + +N64_2: + if SUPERDUPER_AVG < 0.5 then goto N64_3; + else goto T64_3; + +N64_3: + if LOCALNEWS < 0.5 then goto T64_1; + else goto T64_2; + +T64_1: + response = -0.00995113; + goto D64; + +T64_2: + response = 0.00292683; + goto D64; + +T64_3: + response = 0.00105182; + goto D64; + +N64_4: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto T64_4; + else goto N64_5; + +T64_4: + response = -0.000575584; + goto D64; + +N64_5: + if TOPSTORY < 0.585 then goto N64_6; + else goto T64_7; + +N64_6: + if SPORTS < 0.87 then goto T64_5; + else goto T64_6; + +T64_5: + response = 0.000783846; + goto D64; + +T64_6: + response = 0.00745576; + goto D64; + +T64_7: + response = 0.0129932; + goto D64; + +D64: + +tnscore = tnscore + response; + +/* Tree 66 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N65_1: + if HEALTH < 0.115 then goto N65_2; + else goto N65_6; + +N65_2: + if DAY_PD_HITS_RATIO < 25.165 then goto N65_3; + else goto N65_5; + +N65_3: + if DAY_WEEK_AVG_RATIO < 10.115 then goto N65_4; + else goto T65_3; + +N65_4: + if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto T65_1; + else goto T65_2; + +T65_1: + response = 0.00322116; + goto D65; + +T65_2: + response = -0.00014541; + goto D65; + +T65_3: + response = 0.00878821; + goto D65; + +N65_5: + if DAY_PREV_DAY_HITS_FRAC < 0.975 then goto T65_4; + else goto T65_5; + +T65_4: + response = -0.0149181; + goto D65; + +T65_5: + response = -0.00209673; + goto D65; + +N65_6: + if MAX_SCORE < 286434 then goto T65_6; + else goto T65_7; + +T65_6: + response = -0.00861656; + goto D65; + +T65_7: + response = 0.00142851; + goto D65; + +D65: + +tnscore = tnscore + response; + +/* Tree 67 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N66_1: + if LIFESTYLE < 0.185 then goto N66_2; + else goto N66_6; + +N66_2: + if MISC < 0.105 then goto N66_3; + else goto T66_5; + +N66_3: + if DAY_LW_DAY_HITS_RATIO < 0.925 then goto N66_4; + else goto T66_4; + +N66_4: + if MAX_SCORE < 273352 then goto N66_5; + else goto T66_3; + +N66_5: + if ISTITLE_AVG < 0.39 then goto T66_1; + else goto T66_2; + +T66_1: + response = 0.00606893; + goto D66; + +T66_2: + response = -0.00394074; + goto D66; + +T66_3: + response = -0.012762; + goto D66; + +T66_4: + response = -8.2932e-05; + goto D66; + +T66_5: + response = 0.00878689; + goto D66; + +N66_6: + if MAX_SCORE < 250603 then goto T66_6; + else goto T66_7; + +T66_6: + response = -0.00131893; + goto D66; + +T66_7: + response = -0.0107682; + goto D66; + +D66: + +tnscore = tnscore + response; + +/* Tree 68 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N67_1: + if DAY_HITS < 13.5 then goto T67_1; + else goto N67_2; + +T67_1: + response = 0.000103863; + goto D67; + +N67_2: + if ENTERTAINMENT < 0.415 then goto N67_3; + else goto T67_7; + +N67_3: + if MIN_RANK < 7 then goto N67_4; + else goto N67_6; + +N67_4: + if DAY_PREV_DAY_HITS_FRAC < 0.875 then goto N67_5; + else goto T67_4; + +N67_5: + if MAX_SCORE < 261175 then goto T67_2; + else goto T67_3; + +T67_2: + response = -0.00601924; + goto D67; + +T67_3: + response = 0.00517774; + goto D67; + +T67_4: + response = -0.00731704; + goto D67; + +N67_6: + if ISABSTRACT_AVG < 0.685 then goto T67_5; + else goto T67_6; + +T67_5: + response = -0.00243371; + goto D67; + +T67_6: + response = 0.0102497; + goto D67; + +T67_7: + response = 0.0109447; + goto D67; + +D67: + +tnscore = tnscore + response; + +/* Tree 69 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N68_1: + if DAY_WEEK_AVG_RATIO < 4.855 then goto N68_2; + else goto N68_3; + +N68_2: + if DAY_WEEK_AVG_RATIO < 4.625 then goto T68_1; + else goto T68_2; + +T68_1: + response = 0.000335357; + goto D68; + +T68_2: + response = 0.00823829; + goto D68; + +N68_3: + if NATIONALNEWS < 0.39 then goto N68_4; + else goto T68_7; + +N68_4: + if ISABSTRACT_AVG < 0.295 then goto N68_5; + else goto T68_6; + +N68_5: + if INTLNEWS < 0.95 then goto N68_6; + else goto T68_5; + +N68_6: + if NATIONALNEWS < 0.155 then goto T68_3; + else goto T68_4; + +T68_3: + response = -0.00576638; + goto D68; + +T68_4: + response = 0.00314375; + goto D68; + +T68_5: + response = 0.0086362; + goto D68; + +T68_6: + response = 0.00191928; + goto D68; + +T68_7: + response = -0.0129199; + goto D68; + +D68: + +tnscore = tnscore + response; + +/* Tree 70 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N69_1: + if WEEKAVG < 1.07 then goto T69_1; + else goto N69_2; + +T69_1: + response = -0.000688613; + goto D69; + +N69_2: + if INTLNEWS < 0.355 then goto N69_3; + else goto N69_5; + +N69_3: + if POLITICS < 0.05 then goto N69_4; + else goto T69_4; + +N69_4: + if PREV_DAY_HITS < 33.5 then goto T69_2; + else goto T69_3; + +T69_2: + response = 0.000833826; + goto D69; + +T69_3: + response = -0.0106428; + goto D69; + +T69_4: + response = -0.0100621; + goto D69; + +N69_5: + if DAY_HITS < 5.5 then goto N69_6; + else goto T69_7; + +N69_6: + if DAY_PD_HITS_RATIO < 0.105 then goto T69_5; + else goto T69_6; + +T69_5: + response = -0.00557824; + goto D69; + +T69_6: + response = 0.0173808; + goto D69; + +T69_7: + response = 0.00181211; + goto D69; + +D69: + +tnscore = tnscore + response; + +/* Tree 71 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N70_1: + if PUB_TODAY_AVG < 0.815 then goto T70_1; + else goto N70_2; + +T70_1: + response = -0.000647154; + goto D70; + +N70_2: + if DAY_PD_HITS_RATIO < 1.53 then goto T70_2; + else goto N70_3; + +T70_2: + response = -0.00676558; + goto D70; + +N70_3: + if SCIENCE < 0.05 then goto N70_4; + else goto T70_7; + +N70_4: + if PREV_DAY_HITS < 5.5 then goto N70_5; + else goto T70_6; + +N70_5: + if SUPERDUPER_AVG < 0.27 then goto N70_6; + else goto T70_5; + +N70_6: + if ISTITLE_AVG < 0.95 then goto T70_3; + else goto T70_4; + +T70_3: + response = 0.00474205; + goto D70; + +T70_4: + response = -0.00112826; + goto D70; + +T70_5: + response = -0.00194945; + goto D70; + +T70_6: + response = 0.00633626; + goto D70; + +T70_7: + response = 0.0126675; + goto D70; + +D70: + +tnscore = tnscore + response; + +/* Tree 72 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N71_1: + if MAX_SCORE < 347896 then goto N71_2; + else goto N71_3; + +N71_2: + if NUM_WORDS < 2.5 then goto T71_1; + else goto T71_2; + +T71_1: + response = -8.59477e-05; + goto D71; + +T71_2: + response = -0.00464466; + goto D71; + +N71_3: + if LOCALNEWS < 0.105 then goto N71_4; + else goto N71_5; + +N71_4: + if PREV_DAY_HITS < 17.5 then goto T71_3; + else goto T71_4; + +T71_3: + response = 0.000861947; + goto D71; + +T71_4: + response = -0.00908692; + goto D71; + +N71_5: + if SUPERDUPER_AVG < 0.415 then goto N71_6; + else goto T71_7; + +N71_6: + if SPORTS < 0.125 then goto T71_5; + else goto T71_6; + +T71_5: + response = 0.00451276; + goto D71; + +T71_6: + response = 0.0182081; + goto D71; + +T71_7: + response = -0.0128104; + goto D71; + +D71: + +tnscore = tnscore + response; + +/* Tree 73 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N72_1: + if SCIENCE < 0.365 then goto N72_2; + else goto T72_7; + +N72_2: + if MAX_SCORE < 588664 then goto N72_3; + else goto T72_6; + +N72_3: + if SUPERDUPER_AVG < 0.115 then goto T72_1; + else goto N72_4; + +T72_1: + response = 0.000473474; + goto D72; + +N72_4: + if MAX_SCORE < 282998 then goto T72_2; + else goto N72_5; + +T72_2: + response = -0.00203992; + goto D72; + +N72_5: + if SCIENCE < 0.105 then goto N72_6; + else goto T72_5; + +N72_6: + if SPORTS < 0.465 then goto T72_3; + else goto T72_4; + +T72_3: + response = 0.00173095; + goto D72; + +T72_4: + response = -0.00632811; + goto D72; + +T72_5: + response = -0.013829; + goto D72; + +T72_6: + response = -0.0095913; + goto D72; + +T72_7: + response = -0.00990551; + goto D72; + +D72: + +tnscore = tnscore + response; + +/* Tree 74 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N73_1: + if NATIONALNEWS < 0.105 then goto T73_1; + else goto N73_2; + +T73_1: + response = -0.00062577; + goto D73; + +N73_2: + if SPORTS < 0.13 then goto N73_3; + else goto N73_5; + +N73_3: + if DAY_WEEK_AVG_RATIO < 9.235 then goto N73_4; + else goto T73_4; + +N73_4: + if DAY_WEEK_AVG_RATIO < 0.505 then goto T73_2; + else goto T73_3; + +T73_2: + response = 0.00990844; + goto D73; + +T73_3: + response = 0.00024663; + goto D73; + +T73_4: + response = -0.0117063; + goto D73; + +N73_5: + if MAX_SCORE < 277259 then goto N73_6; + else goto T73_7; + +N73_6: + if DAY_WEEK_AVG_RATIO < 1.955 then goto T73_5; + else goto T73_6; + +T73_5: + response = -0.00262119; + goto D73; + +T73_6: + response = 0.0102735; + goto D73; + +T73_7: + response = 0.0198781; + goto D73; + +D73: + +tnscore = tnscore + response; + +/* Tree 75 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N74_1: + if MAX_SCORE < 382346 then goto N74_2; + else goto N74_3; + +N74_2: + if MAX_SCORE < 378950 then goto T74_1; + else goto T74_2; + +T74_1: + response = -0.000201382; + goto D74; + +T74_2: + response = -0.0116932; + goto D74; + +N74_3: + if MAX_SCORE < 385719 then goto T74_3; + else goto N74_4; + +T74_3: + response = 0.0202474; + goto D74; + +N74_4: + if AVG_RANK < 8.27 then goto N74_5; + else goto N74_6; + +N74_5: + if PREV_DAY_HITS < 9.5 then goto T74_4; + else goto T74_5; + +T74_4: + response = 0.000303439; + goto D74; + +T74_5: + response = -0.0119779; + goto D74; + +N74_6: + if ENTERTAINMENT < 0.315 then goto T74_6; + else goto T74_7; + +T74_6: + response = 0.00225595; + goto D74; + +T74_7: + response = 0.0201995; + goto D74; + +D74: + +tnscore = tnscore + response; + +/* Tree 76 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N75_1: + if DAY_PD_HITS_RATIO < 47 then goto N75_2; + else goto T75_7; + +N75_2: + if DAY_PD_HITS_RATIO < 27.25 then goto N75_3; + else goto T75_6; + +N75_3: + if DAY_LW_DAY_HITS_RATIO < 31.5 then goto N75_4; + else goto N75_5; + +N75_4: + if LOCALNEWS < 0.765 then goto T75_1; + else goto T75_2; + +T75_1: + response = -0.000399432; + goto D75; + +T75_2: + response = 0.00362509; + goto D75; + +N75_5: + if DAY_WEEK_AVG_RATIO < 3.98 then goto T75_3; + else goto N75_6; + +T75_3: + response = -0.00769823; + goto D75; + +N75_6: + if DAY_WEEK_AVG_RATIO < 5.4 then goto T75_4; + else goto T75_5; + +T75_4: + response = 0.018687; + goto D75; + +T75_5: + response = 0.00240302; + goto D75; + +T75_6: + response = -0.00932172; + goto D75; + +T75_7: + response = 0.00902439; + goto D75; + +D75: + +tnscore = tnscore + response; + +/* Tree 77 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N76_1: + if LW_DAY_HITS < 2.5 then goto N76_2; + else goto T76_7; + +N76_2: + if MIN_RANK < 7 then goto T76_1; + else goto N76_3; + +T76_1: + response = -0.000157235; + goto D76; + +N76_3: + if DAY_PREV_DAY_HITS_FRAC < 0.555 then goto N76_4; + else goto N76_5; + +N76_4: + if INTLNEWS < 0.225 then goto T76_2; + else goto T76_3; + +T76_2: + response = 0.00402381; + goto D76; + +T76_3: + response = 0.0196079; + goto D76; + +N76_5: + if DAY_WEEK_AVG_RATIO < 6.01 then goto N76_6; + else goto T76_6; + +N76_6: + if DAY_WEEK_AVG_RATIO < 3.805 then goto T76_4; + else goto T76_5; + +T76_4: + response = 0.000236934; + goto D76; + +T76_5: + response = 0.00609122; + goto D76; + +T76_6: + response = -0.0040371; + goto D76; + +T76_7: + response = -0.00301446; + goto D76; + +D76: + +tnscore = tnscore + response; + +/* Tree 78 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N77_1: + if DAY_PD_HITS_RATIO < 0.085 then goto N77_2; + else goto N77_3; + +N77_2: + if AVG_RANK < 8.9 then goto T77_1; + else goto T77_2; + +T77_1: + response = 0.0124264; + goto D77; + +T77_2: + response = 0.00018696; + goto D77; + +N77_3: + if PREV_DAY_HITS < 26.5 then goto N77_4; + else goto N77_6; + +N77_4: + if PREV_DAY_HITS < 19.5 then goto T77_3; + else goto N77_5; + +T77_3: + response = -3.01051e-05; + goto D77; + +N77_5: + if MAX_SCORE < 294152 then goto T77_4; + else goto T77_5; + +T77_4: + response = 0.010709; + goto D77; + +T77_5: + response = -0.00331498; + goto D77; + +N77_6: + if WEEKAVG < 10.785 then goto T77_6; + else goto T77_7; + +T77_6: + response = -0.0102542; + goto D77; + +T77_7: + response = -0.00013417; + goto D77; + +D77: + +tnscore = tnscore + response; + +/* Tree 79 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N78_1: + if DAY_WEEK_AVG_RATIO < 10.06 then goto N78_2; + else goto T78_7; + +N78_2: + if DAY_WEEK_AVG_RATIO < 4.835 then goto T78_1; + else goto N78_3; + +T78_1: + response = -7.25075e-05; + goto D78; + +N78_3: + if ENTERTAINMENT < 0.05 then goto N78_4; + else goto T78_6; + +N78_4: + if ISABSTRACT_AVG < 0.83 then goto N78_5; + else goto T78_5; + +N78_5: + if INTLNEWS < 0.635 then goto T78_2; + else goto N78_6; + +T78_2: + response = -0.00614185; + goto D78; + +N78_6: + if MAX_MIN_RANK < 1 then goto T78_3; + else goto T78_4; + +T78_3: + response = -0.00349054; + goto D78; + +T78_4: + response = 0.0109974; + goto D78; + +T78_5: + response = 0.0043811; + goto D78; + +T78_6: + response = 0.00439437; + goto D78; + +T78_7: + response = 0.00656011; + goto D78; + +D78: + +tnscore = tnscore + response; + +/* Tree 80 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N79_1: + if INTLNEWS < 0.185 then goto N79_2; + else goto T79_7; + +N79_2: + if INTLNEWS < 0.115 then goto N79_3; + else goto T79_6; + +N79_3: + if DAY_PD_HITS_RATIO < 0.105 then goto T79_1; + else goto N79_4; + +T79_1: + response = 0.016428; + goto D79; + +N79_4: + if AVG_RANK < 8.365 then goto N79_5; + else goto T79_5; + +N79_5: + if ISTITLE_AVG < 0.435 then goto N79_6; + else goto T79_4; + +N79_6: + if DAY_PD_HITS_RATIO < 11.25 then goto T79_2; + else goto T79_3; + +T79_2: + response = 0.00358374; + goto D79; + +T79_3: + response = -0.0162655; + goto D79; + +T79_4: + response = -0.000799475; + goto D79; + +T79_5: + response = -0.00198299; + goto D79; + +T79_6: + response = -0.00662532; + goto D79; + +T79_7: + response = 0.00087188; + goto D79; + +D79: + +tnscore = tnscore + response; + +/* Tree 81 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N80_1: + if WEEKAVG < 0.64 then goto T80_1; + else goto N80_2; + +T80_1: + response = -0.00047051; + goto D80; + +N80_2: + if DAY_PREV_DAY_HITS_FRAC < 0.965 then goto T80_2; + else goto N80_3; + +T80_2: + response = 0.000224471; + goto D80; + +N80_3: + if BUSINESS < 0.53 then goto N80_4; + else goto N80_6; + +N80_4: + if LOCALNEWS < 0.27 then goto T80_3; + else goto N80_5; + +T80_3: + response = 0.00103714; + goto D80; + +N80_5: + if NATIONALNEWS < 0.155 then goto T80_4; + else goto T80_5; + +T80_4: + response = 0.0140465; + goto D80; + +T80_5: + response = -0.00245531; + goto D80; + +N80_6: + if BUSINESS < 0.645 then goto T80_6; + else goto T80_7; + +T80_6: + response = 0.0237968; + goto D80; + +T80_7: + response = 0.00736313; + goto D80; + +D80: + +tnscore = tnscore + response; + +/* Tree 82 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N81_1: + if LOCALNEWS < 0.05 then goto T81_1; + else goto N81_2; + +T81_1: + response = -0.000573509; + goto D81; + +N81_2: + if MAX_SCORE < 253515 then goto T81_2; + else goto N81_3; + +T81_2: + response = -0.00114612; + goto D81; + +N81_3: + if SUPERDUPER_AVG < 0.315 then goto N81_4; + else goto T81_7; + +N81_4: + if MAX_SCORE < 255248 then goto T81_3; + else goto N81_5; + +T81_3: + response = 0.0174812; + goto D81; + +N81_5: + if PUB_TODAY_AVG < 0.05 then goto T81_4; + else goto N81_6; + +T81_4: + response = -0.00327708; + goto D81; + +N81_6: + if DAY_PD_HITS_RATIO < 0.425 then goto T81_5; + else goto T81_6; + +T81_5: + response = 0.0118621; + goto D81; + +T81_6: + response = 0.0033546; + goto D81; + +T81_7: + response = -0.00154643; + goto D81; + +D81: + +tnscore = tnscore + response; + +/* Tree 83 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N82_1: + if MISC < 0.105 then goto N82_2; + else goto T82_7; + +N82_2: + if DAY_WEEK_AVG_RATIO < 10.06 then goto N82_3; + else goto T82_6; + +N82_3: + if DAY_WEEK_AVG_RATIO < 9.235 then goto N82_4; + else goto T82_5; + +N82_4: + if MISC < 0.05 then goto N82_5; + else goto T82_4; + +N82_5: + if WEEKAVG < 0.215 then goto T82_1; + else goto N82_6; + +T82_1: + response = -0.00242466; + goto D82; + +N82_6: + if DAY_PREV_DAY_HITS_FRAC < 0.985 then goto T82_2; + else goto T82_3; + +T82_2: + response = -9.73666e-05; + goto D82; + +T82_3: + response = 0.00142133; + goto D82; + +T82_4: + response = -0.00753159; + goto D82; + +T82_5: + response = -0.00863543; + goto D82; + +T82_6: + response = 0.0086817; + goto D82; + +T82_7: + response = 0.0102311; + goto D82; + +D82: + +tnscore = tnscore + response; + +/* Tree 84 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N83_1: + if DAY_PD_HITS_RATIO < 47.5 then goto N83_2; + else goto N83_6; + +N83_2: + if DAY_PD_HITS_RATIO < 32.5 then goto N83_3; + else goto T83_5; + +N83_3: + if DAY_PREV_DAY_HITS_FRAC < 0.405 then goto N83_4; + else goto T83_4; + +N83_4: + if PUB_TODAY_AVG < 0.155 then goto N83_5; + else goto T83_3; + +N83_5: + if DAY_WEEK_AVG_RATIO < 0.67 then goto T83_1; + else goto T83_2; + +T83_1: + response = -0.00300426; + goto D83; + +T83_2: + response = 0.0155993; + goto D83; + +T83_3: + response = -0.000160259; + goto D83; + +T83_4: + response = -0.000324256; + goto D83; + +T83_5: + response = -0.0118989; + goto D83; + +N83_6: + if MAX_SCORE < 286380 then goto T83_6; + else goto T83_7; + +T83_6: + response = 0.0153486; + goto D83; + +T83_7: + response = 0.00067452; + goto D83; + +D83: + +tnscore = tnscore + response; + +/* Tree 85 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N84_1: + if SUPERDUPER_AVG < 0.105 then goto T84_1; + else goto N84_2; + +T84_1: + response = 0.00094466; + goto D84; + +N84_2: + if MAX_SCORE < 277301 then goto N84_3; + else goto T84_7; + +N84_3: + if ISABSTRACT_AVG < 0.39 then goto N84_4; + else goto T84_6; + +N84_4: + if DAY_PREV_DAY_HITS_FRAC < 0.795 then goto T84_2; + else goto N84_5; + +T84_2: + response = -0.000924515; + goto D84; + +N84_5: + if SPORTS < 0.275 then goto N84_6; + else goto T84_5; + +N84_6: + if ENTERTAINMENT < 0.05 then goto T84_3; + else goto T84_4; + +T84_3: + response = -0.00695203; + goto D84; + +T84_4: + response = 0.00896934; + goto D84; + +T84_5: + response = -0.0197272; + goto D84; + +T84_6: + response = 0.000167123; + goto D84; + +T84_7: + response = 0.00057116; + goto D84; + +D84: + +tnscore = tnscore + response; + +/* Tree 86 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N85_1: + if DAY_WEEK_AVG_RATIO < 3.635 then goto N85_2; + else goto N85_5; + +N85_2: + if DAY_LW_DAY_HITS_RATIO < 21.25 then goto N85_3; + else goto T85_4; + +N85_3: + if PREV_DAY_HITS < 19.5 then goto T85_1; + else goto N85_4; + +T85_1: + response = -0.000199036; + goto D85; + +N85_4: + if INTLNEWS < 0.25 then goto T85_2; + else goto T85_3; + +T85_2: + response = -0.00106649; + goto D85; + +T85_3: + response = 0.0081603; + goto D85; + +T85_4: + response = -0.0104178; + goto D85; + +N85_5: + if DAY_WEEK_AVG_RATIO < 3.845 then goto N85_6; + else goto T85_7; + +N85_6: + if DAY_HITS_FRAC < 0.94 then goto T85_5; + else goto T85_6; + +T85_5: + response = 0.00340684; + goto D85; + +T85_6: + response = 0.0209321; + goto D85; + +T85_7: + response = 0.00113853; + goto D85; + +D85: + +tnscore = tnscore + response; + +/* Tree 87 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N86_1: + if REGIONALNEWS < 0.275 then goto N86_2; + else goto T86_7; + +N86_2: + if LAW < 0.105 then goto N86_3; + else goto T86_6; + +N86_3: + if DAY_WEEK_AVG_RATIO < 6.01 then goto T86_1; + else goto N86_4; + +T86_1: + response = 0.000136175; + goto D86; + +N86_4: + if NATIONALNEWS < 0.31 then goto N86_5; + else goto T86_5; + +N86_5: + if DAY_HITS_FRAC < 0.895 then goto T86_2; + else goto N86_6; + +T86_2: + response = 0.00575266; + goto D86; + +N86_6: + if AVG_RANK < 7.53 then goto T86_3; + else goto T86_4; + +T86_3: + response = -0.0119194; + goto D86; + +T86_4: + response = -0.00112094; + goto D86; + +T86_5: + response = -0.0166441; + goto D86; + +T86_6: + response = 0.00794833; + goto D86; + +T86_7: + response = -0.0103064; + goto D86; + +D86: + +tnscore = tnscore + response; + +/* Tree 88 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N87_1: + if TOPSTORY < 0.315 then goto T87_1; + else goto N87_2; + +T87_1: + response = -0.000718271; + goto D87; + +N87_2: + if DAY_PD_HITS_RATIO < 0.13 then goto T87_2; + else goto N87_3; + +T87_2: + response = -0.0105571; + goto D87; + +N87_3: + if DAY_WEEK_AVG_RATIO < 5.82 then goto N87_4; + else goto N87_6; + +N87_4: + if DAY_WEEK_AVG_RATIO < 4.82 then goto N87_5; + else goto T87_5; + +N87_5: + if DAY_LW_DAY_HITS_RATIO < 11.5 then goto T87_3; + else goto T87_4; + +T87_3: + response = 0.00414548; + goto D87; + +T87_4: + response = -0.010294; + goto D87; + +T87_5: + response = 0.0157636; + goto D87; + +N87_6: + if ISABSTRACT_AVG < 0.135 then goto T87_6; + else goto T87_7; + +T87_6: + response = -0.0110257; + goto D87; + +T87_7: + response = 0.00663564; + goto D87; + +D87: + +tnscore = tnscore + response; + +/* Tree 89 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N88_1: + if MAX_SCORE < 362776 then goto N88_2; + else goto N88_3; + +N88_2: + if MAX_SCORE < 361504 then goto T88_1; + else goto T88_2; + +T88_1: + response = 0.000123708; + goto D88; + +T88_2: + response = 0.0215766; + goto D88; + +N88_3: + if INTLNEWS < 0.155 then goto N88_4; + else goto N88_5; + +N88_4: + if WEEKAVG < 2.36 then goto T88_3; + else goto T88_4; + +T88_3: + response = -0.00603082; + goto D88; + +T88_4: + response = 0.00760337; + goto D88; + +N88_5: + if TOPSTORY < 0.275 then goto T88_5; + else goto N88_6; + +T88_5: + response = -0.000704669; + goto D88; + +N88_6: + if PUB_TODAY_AVG < 0.86 then goto T88_6; + else goto T88_7; + +T88_6: + response = 0.0139844; + goto D88; + +T88_7: + response = -0.00288551; + goto D88; + +D88: + +tnscore = tnscore + response; + +/* Tree 90 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N89_1: + if REGIONALNEWS < 0.115 then goto N89_2; + else goto N89_5; + +N89_2: + if DAY_PREV_DAY_HITS_FRAC < 0.725 then goto N89_3; + else goto T89_4; + +N89_3: + if DAY_LW_DAY_HITS_RATIO < 21.25 then goto N89_4; + else goto T89_3; + +N89_4: + if MAX_SCORE < 242944 then goto T89_1; + else goto T89_2; + +T89_1: + response = 0.000482612; + goto D89; + +T89_2: + response = -0.00179648; + goto D89; + +T89_3: + response = -0.0123423; + goto D89; + +T89_4: + response = 0.000623923; + goto D89; + +N89_5: + if DAY_HITS < 4.5 then goto N89_6; + else goto T89_7; + +N89_6: + if DAY_PREV_DAY_HITS_FRAC < 0.87 then goto T89_5; + else goto T89_6; + +T89_5: + response = 0.00798437; + goto D89; + +T89_6: + response = -0.0109132; + goto D89; + +T89_7: + response = 0.0140617; + goto D89; + +D89: + +tnscore = tnscore + response; + +/* Tree 91 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N90_1: + if MAX_SCORE < 322221 then goto T90_1; + else goto N90_2; + +T90_1: + response = 0.000409287; + goto D90; + +N90_2: + if MAX_SCORE < 334601 then goto T90_2; + else goto N90_3; + +T90_2: + response = -0.00880555; + goto D90; + +N90_3: + if AVG_RANK < 7.58 then goto N90_4; + else goto N90_5; + +N90_4: + if SUPERDUPER_AVG < 0.295 then goto T90_3; + else goto T90_4; + +T90_3: + response = -0.00215568; + goto D90; + +T90_4: + response = -0.0124233; + goto D90; + +N90_5: + if NATIONALNEWS < 0.21 then goto T90_5; + else goto N90_6; + +T90_5: + response = 0.00160963; + goto D90; + +N90_6: + if ISABSTRACT_AVG < 0.185 then goto T90_6; + else goto T90_7; + +T90_6: + response = -0.00703363; + goto D90; + +T90_7: + response = 0.00777402; + goto D90; + +D90: + +tnscore = tnscore + response; + +/* Tree 92 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N91_1: + if ENTERTAINMENT < 0.21 then goto T91_1; + else goto N91_2; + +T91_1: + response = -0.00019687; + goto D91; + +N91_2: + if DAY_PD_HITS_RATIO < 15.75 then goto N91_3; + else goto T91_7; + +N91_3: + if SUPERDUPER_AVG < 0.415 then goto N91_4; + else goto T91_6; + +N91_4: + if MAX_MIN_RANK < 3 then goto T91_2; + else goto N91_5; + +T91_2: + response = -0.00268736; + goto D91; + +N91_5: + if MAX_MIN_RANK < 5 then goto T91_3; + else goto N91_6; + +T91_3: + response = 0.0078927; + goto D91; + +N91_6: + if DAY_PD_HITS_RATIO < 2.835 then goto T91_4; + else goto T91_5; + +T91_4: + response = 0.00387939; + goto D91; + +T91_5: + response = -0.00410318; + goto D91; + +T91_6: + response = -0.00538175; + goto D91; + +T91_7: + response = 0.0142121; + goto D91; + +D91: + +tnscore = tnscore + response; + +/* Tree 93 of 97 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N92_1: + if SPORTS < 0.415 then goto T92_1; + else goto N92_2; + +T92_1: + response = -3.44051e-05; + goto D92; + +N92_2: + if MAX_MIN_RANK < 1 then goto T92_2; + else goto N92_3; + +T92_2: + response = -0.00501256; + goto D92; + +N92_3: + if SUPERDUPER_AVG < 0.05 then goto N92_4; + else goto T92_7; + +N92_4: + if MAX_SCORE < 229196 then goto T92_3; + else goto N92_5; + +T92_3: + response = -0.00015078; + goto D92; + +N92_5: + if MAX_SCORE < 258856 then goto N92_6; + else goto T92_6; + +N92_6: + if ISTITLE_AVG < 0.355 then goto T92_4; + else goto T92_5; + +T92_4: + response = 0.0314869; + goto D92; + +T92_5: + response = 0.00734956; + goto D92; + +T92_6: + response = 0.00421683; + goto D92; + +T92_7: + response = 0.00084287; + goto D92; + +D92: + +tnscore = tnscore + response; + +/* Tree 94 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N93_1: + if MAX_MIN_RANK < 9 then goto N93_2; + else goto N93_5; + +N93_2: + if MAX_SCORE < 382719 then goto N93_3; + else goto T93_4; + +N93_3: + if MAX_SCORE < 362503 then goto T93_1; + else goto N93_4; + +T93_1: + response = 3.58027e-05; + goto D93; + +N93_4: + if MAX_SCORE < 364403 then goto T93_2; + else goto T93_3; + +T93_2: + response = -0.0154942; + goto D93; + +T93_3: + response = -0.00276027; + goto D93; + +T93_4: + response = 0.00210644; + goto D93; + +N93_5: + if WEEKAVG < 1.36 then goto N93_6; + else goto T93_7; + +N93_6: + if MAX_SCORE < 269970 then goto T93_5; + else goto T93_6; + +T93_5: + response = -0.00118638; + goto D93; + +T93_6: + response = 0.0203373; + goto D93; + +T93_7: + response = -0.00169747; + goto D93; + +D93: + +tnscore = tnscore + response; + +/* Tree 95 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N94_1: + if PREV_DAY_HITS < 26.5 then goto N94_2; + else goto N94_3; + +N94_2: + if MAX_SCORE < 187757 then goto T94_1; + else goto T94_2; + +T94_1: + response = -0.00124276; + goto D94; + +T94_2: + response = 0.000403197; + goto D94; + +N94_3: + if DAY_PREV_DAY_HITS_FRAC < 0.795 then goto N94_4; + else goto N94_5; + +N94_4: + if DAY_PREV_DAY_HITS_FRAC < 0.675 then goto T94_3; + else goto T94_4; + +T94_3: + response = -0.00288805; + goto D94; + +T94_4: + response = 0.00935152; + goto D94; + +N94_5: + if WEEKAVG < 11.93 then goto N94_6; + else goto T94_7; + +N94_6: + if DAY_PD_HITS_RATIO < 0.27 then goto T94_5; + else goto T94_6; + +T94_5: + response = -0.00585669; + goto D94; + +T94_6: + response = -0.0173104; + goto D94; + +T94_7: + response = 0.003142; + goto D94; + +D94: + +tnscore = tnscore + response; + +/* Tree 96 of 97 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N95_1: + if MAX_MIN_RANK < 7 then goto T95_1; + else goto N95_2; + +T95_1: + response = -0.000568858; + goto D95; + +N95_2: + if PUB_TODAY_AVG < 0.885 then goto N95_3; + else goto N95_6; + +N95_3: + if TOPSTORY < 0.27 then goto N95_4; + else goto T95_5; + +N95_4: + if INTLNEWS < 0.365 then goto T95_2; + else goto N95_5; + +T95_2: + response = -0.0025302; + goto D95; + +N95_5: + if AVG_RANK < 6.9 then goto T95_3; + else goto T95_4; + +T95_3: + response = -0.00341549; + goto D95; + +T95_4: + response = 0.00979915; + goto D95; + +T95_5: + response = 0.0137197; + goto D95; + +N95_6: + if DAY_HITS < 10.5 then goto T95_6; + else goto T95_7; + +T95_6: + response = 0.0179211; + goto D95; + +T95_7: + response = -0.000230543; + goto D95; + +D95: + +tnscore = tnscore + response; + +/* Tree 97 of 97 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N96_1: + if MIN_RANK < 7 then goto T96_1; + else goto N96_2; + +T96_1: + response = -0.00062463; + goto D96; + +N96_2: + if DAY_PREV_DAY_HITS_FRAC < 0.475 then goto N96_3; + else goto N96_4; + +N96_3: + if AVG_RANK < 9.745 then goto T96_2; + else goto T96_3; + +T96_2: + response = 0.0142049; + goto D96; + +T96_3: + response = -0.000373764; + goto D96; + +N96_4: + if PUB_TODAY_AVG < 0.27 then goto N96_5; + else goto N96_6; + +N96_5: + if MAX_SCORE < 276860 then goto T96_4; + else goto T96_5; + +T96_4: + response = -0.00522975; + goto D96; + +T96_5: + response = 0.00307189; + goto D96; + +N96_6: + if PREV_DAY_HITS < 1.5 then goto T96_6; + else goto T96_7; + +T96_6: + response = 0.00025413; + goto D96; + +T96_7: + response = 0.00490254; + goto D96; + +D96: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet04.model b/searchlib/src/test/files/treenet04.model new file mode 100644 index 00000000000..64082ca5ca7 --- /dev/null +++ b/searchlib/src/test/files/treenet04.model @@ -0,0 +1,6247 @@ + +/* Data Dictionary, Number Of Variables = 40 */ +/* Name = NUM_WORDS, Type = continuous. */ +/* Name = DAY_HITS, Type = continuous. */ +/* Name = PREV_DAY_HITS, Type = continuous. */ +/* Name = DAY_PD_HITS_RATIO, Type = continuous. */ +/* Name = DAY_PD_HITS_DERIV, Type = continuous. */ +/* Name = LW_DAY_HITS, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */ +/* Name = WEEKAVG, Type = continuous. */ +/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */ +/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */ +/* Name = ISTITLE_AVG, Type = continuous. */ +/* Name = ISABSTRACT_AVG, Type = continuous. */ +/* Name = SUPERDUPER_AVG, Type = continuous. */ +/* Name = PUB_TODAY_AVG, Type = continuous. */ +/* Name = BUSINESS, Type = continuous. */ +/* Name = ENTERTAINMENT, Type = continuous. */ +/* Name = HEALTH, Type = continuous. */ +/* Name = INTLNEWS, Type = continuous. */ +/* Name = LIFESTYLE, Type = continuous. */ +/* Name = LOCALNEWS, Type = continuous. */ +/* Name = MISC, Type = continuous. */ +/* Name = NATIONALNEWS, Type = continuous. */ +/* Name = POLITICS, Type = continuous. */ +/* Name = REGIONALNEWS, Type = continuous. */ +/* Name = SPORTS, Type = continuous. */ +/* Name = TOPSTORY, Type = continuous. */ +/* Name = AVG_RANK, Type = continuous. */ +/* Name = MAX_RANK, Type = continuous. */ +/* Name = MIN_RANK, Type = continuous. */ +/* Name = MAX_MIN_RANK, Type = continuous. */ +/* Name = AVG_SCORE, Type = continuous. */ +/* Name = MAX_SCORE, Type = continuous. */ +/* Name = MIN_SCORE, Type = continuous. */ +/* Name = MAX_MIN_SCORE, Type = continuous. */ +/* Name = FOUR_HOUR_WF, Type = continuous. */ +/* Name = EIGHT_HOUR_WF, Type = continuous. */ +/* Name = TWELVE_HOUR_WF, Type = continuous. */ +/* Name = ONE_DAY_WF, Type = continuous. */ +/* Name = TWO_DAY_WF, Type = continuous. */ + +MODELBEGIN: + +/* CART version: 5.0.9.156 */ +/* TreeNet: TreeNet20071016172855 */ +/* Grove: /home/rparekh/lb/lb_title_hourly/lb_title_hourly.grv */ +/* N trees: 103 */ + +link TN0; +pred = tnscore; /* predicted value for IY_CTR */ + + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +/* Tree 1 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +tnscore = 0.0; + +N0_1: + if AVG_SCORE < 236210 then goto N0_2; + else goto N0_3; + +N0_2: + if AVG_SCORE < 151678 then goto T0_1; + else goto T0_2; + +T0_1: + response = 0.205803; + goto D0; + +T0_2: + response = 0.214904; + goto D0; + +N0_3: + if ISABSTRACT_AVG < 0.155 then goto N0_4; + else goto T0_7; + +N0_4: + if WEEKAVG < 0.93 then goto N0_5; + else goto N0_6; + +N0_5: + if TOPSTORY < 0.365 then goto T0_3; + else goto T0_4; + +T0_3: + response = 0.234927; + goto D0; + +T0_4: + response = 0.262252; + goto D0; + +N0_6: + if MAX_SCORE < 271454 then goto T0_5; + else goto T0_6; + +T0_5: + response = 0.236303; + goto D0; + +T0_6: + response = 0.251477; + goto D0; + +T0_7: + response = 0.224603; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N1_1: + if AVG_SCORE < 240282 then goto N1_2; + else goto N1_3; + +N1_2: + if AVG_SCORE < 153656 then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.0157043; + goto D1; + +T1_2: + response = -0.00743688; + goto D1; + +N1_3: + if ISTITLE_AVG < 0.705 then goto N1_4; + else goto T1_7; + +N1_4: + if WEEKAVG < 0.93 then goto T1_3; + else goto N1_5; + +T1_3: + response = 0.0116703; + goto D1; + +N1_5: + if MAX_SCORE < 266499 then goto T1_4; + else goto N1_6; + +T1_4: + response = 0.00213746; + goto D1; + +N1_6: + if TWO_DAY_WF < 0.826656 then goto T1_5; + else goto T1_6; + +T1_5: + response = 0.0214705; + goto D1; + +T1_6: + response = 0.0350738; + goto D1; + +T1_7: + response = 0.00284844; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N2_1: + if AVG_SCORE < 239849 then goto N2_2; + else goto N2_3; + +N2_2: + if AVG_SCORE < 230612 then goto T2_1; + else goto T2_2; + +T2_1: + response = -0.0105243; + goto D2; + +T2_2: + response = -0.000207603; + goto D2; + +N2_3: + if ISTITLE_AVG < 0.95 then goto N2_4; + else goto T2_7; + +N2_4: + if MAX_MIN_SCORE < 36505.8 then goto N2_5; + else goto N2_6; + +N2_5: + if WEEKAVG < 10.925 then goto T2_3; + else goto T2_4; + +T2_3: + response = 0.0103073; + goto D2; + +T2_4: + response = 0.0445006; + goto D2; + +N2_6: + if MAX_SCORE < 267687 then goto T2_5; + else goto T2_6; + +T2_5: + response = 0.00115576; + goto D2; + +T2_6: + response = 0.023751; + goto D2; + +T2_7: + response = 0.00109943; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N3_1: + if AVG_SCORE < 242149 then goto N3_2; + else goto N3_4; + +N3_2: + if AVG_SCORE < 153383 then goto T3_1; + else goto N3_3; + +T3_1: + response = -0.0131014; + goto D3; + +N3_3: + if WEEKAVG < 1.5 then goto T3_2; + else goto T3_3; + +T3_2: + response = -0.00720755; + goto D3; + +T3_3: + response = -0.000377073; + goto D3; + +N3_4: + if ISTITLE_AVG < 0.705 then goto N3_5; + else goto T3_7; + +N3_5: + if MAX_MIN_SCORE < 36505 then goto N3_6; + else goto T3_6; + +N3_6: + if BUSINESS < 0.13 then goto T3_4; + else goto T3_5; + +T3_4: + response = 0.0164936; + goto D3; + +T3_5: + response = 0.00560036; + goto D3; + +T3_6: + response = 0.0218971; + goto D3; + +T3_7: + response = 0.00392608; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N4_1: + if MAX_SCORE < 264139 then goto N4_2; + else goto N4_3; + +N4_2: + if MIN_SCORE < 222136 then goto T4_1; + else goto T4_2; + +T4_1: + response = -0.0079708; + goto D4; + +T4_2: + response = 0.00140823; + goto D4; + +N4_3: + if ISABSTRACT_AVG < 0.315 then goto N4_4; + else goto N4_6; + +N4_4: + if DAY_WEEK_AVG_DERIV < 10.5 then goto N4_5; + else goto T4_5; + +N4_5: + if BUSINESS < 0.105 then goto T4_3; + else goto T4_4; + +T4_3: + response = 0.016512; + goto D4; + +T4_4: + response = 0.00726199; + goto D4; + +T4_5: + response = 0.0306897; + goto D4; + +N4_6: + if SPORTS < 0.42 then goto T4_6; + else goto T4_7; + +T4_6: + response = -0.000976569; + goto D4; + +T4_7: + response = 0.0183973; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N5_1: + if AVG_SCORE < 231394 then goto T5_1; + else goto N5_2; + +T5_1: + response = -0.00698348; + goto D5; + +N5_2: + if ISTITLE_AVG < 0.645 then goto N5_3; + else goto N5_5; + +N5_3: + if MAX_SCORE < 271880 then goto T5_2; + else goto N5_4; + +T5_2: + response = 0.00669893; + goto D5; + +N5_4: + if AVG_SCORE < 281369 then goto T5_3; + else goto T5_4; + +T5_3: + response = 0.0209096; + goto D5; + +T5_4: + response = 0.0117951; + goto D5; + +N5_5: + if MIN_SCORE < 318875 then goto N5_6; + else goto T5_7; + +N5_6: + if WEEKAVG < 1.5 then goto T5_5; + else goto T5_6; + +T5_5: + response = -0.00428011; + goto D5; + +T5_6: + response = 0.00596324; + goto D5; + +T5_7: + response = 0.0116652; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N6_1: + if MIN_SCORE < 222028 then goto N6_2; + else goto N6_4; + +N6_2: + if SUPERDUPER_AVG < 0.27 then goto T6_1; + else goto N6_3; + +T6_1: + response = -0.00761706; + goto D6; + +N6_3: + if INTLNEWS < 0.535 then goto T6_2; + else goto T6_3; + +T6_2: + response = -0.00274344; + goto D6; + +T6_3: + response = 0.0225782; + goto D6; + +N6_4: + if WEEKAVG < 0.93 then goto N6_5; + else goto N6_6; + +N6_5: + if ISTITLE_AVG < 0.71 then goto T6_4; + else goto T6_5; + +T6_4: + response = 0.00689051; + goto D6; + +T6_5: + response = -0.00500438; + goto D6; + +N6_6: + if MIN_SCORE < 319119 then goto T6_6; + else goto T6_7; + +T6_6: + response = 0.00977814; + goto D6; + +T6_7: + response = 0.0200288; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N7_1: + if MIN_SCORE < 222028 then goto N7_2; + else goto N7_5; + +N7_2: + if AVG_SCORE < 158974 then goto T7_1; + else goto N7_3; + +T7_1: + response = -0.00918892; + goto D7; + +N7_3: + if NUM_WORDS < 1.5 then goto N7_4; + else goto T7_4; + +N7_4: + if WEEKAVG < 2.93 then goto T7_2; + else goto T7_3; + +T7_2: + response = -0.00158808; + goto D7; + +T7_3: + response = 0.0119896; + goto D7; + +T7_4: + response = -0.00568155; + goto D7; + +N7_5: + if ISTITLE_AVG < 0.95 then goto N7_6; + else goto T7_7; + +N7_6: + if ISABSTRACT_AVG < 0.155 then goto T7_5; + else goto T7_6; + +T7_5: + response = 0.0116413; + goto D7; + +T7_6: + response = 0.00150493; + goto D7; + +T7_7: + response = -0.00110515; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N8_1: + if AVG_SCORE < 241264 then goto N8_2; + else goto N8_4; + +N8_2: + if MIN_SCORE < 132718 then goto T8_1; + else goto N8_3; + +T8_1: + response = -0.00978209; + goto D8; + +N8_3: + if WEEKAVG < 0.93 then goto T8_2; + else goto T8_3; + +T8_2: + response = -0.00610293; + goto D8; + +T8_3: + response = -0.000295273; + goto D8; + +N8_4: + if ISABSTRACT_AVG < 0.185 then goto N8_5; + else goto N8_6; + +N8_5: + if DAY_LW_DAY_HITS_RATIO < 11.835 then goto T8_4; + else goto T8_5; + +T8_4: + response = 0.00900634; + goto D8; + +T8_5: + response = 0.0221056; + goto D8; + +N8_6: + if WEEKAVG < 1.07 then goto T8_6; + else goto T8_7; + +T8_6: + response = -0.00392509; + goto D8; + +T8_7: + response = 0.00615921; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N9_1: + if AVG_SCORE < 233949 then goto T9_1; + else goto N9_2; + +T9_1: + response = -0.00503156; + goto D9; + +N9_2: + if ISTITLE_AVG < 0.645 then goto N9_3; + else goto N9_6; + +N9_3: + if TOPSTORY < 0.05 then goto N9_4; + else goto T9_5; + +N9_4: + if PUB_TODAY_AVG < 0.105 then goto T9_2; + else goto N9_5; + +T9_2: + response = -0.00286006; + goto D9; + +N9_5: + if DAY_PD_HITS_RATIO < 0.65 then goto T9_3; + else goto T9_4; + +T9_3: + response = 0.0275142; + goto D9; + +T9_4: + response = 0.00616295; + goto D9; + +T9_5: + response = 0.0129407; + goto D9; + +N9_6: + if DAY_LW_DAY_HITS_DERIV < 7.5 then goto T9_6; + else goto T9_7; + +T9_6: + response = -0.00186065; + goto D9; + +T9_7: + response = 0.00771893; + goto D9; + +D9: + +tnscore = tnscore + response; + +/* Tree 11 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N10_1: + if AVG_SCORE < 241955 then goto N10_2; + else goto N10_3; + +N10_2: + if MAX_SCORE < 170767 then goto T10_1; + else goto T10_2; + +T10_1: + response = -0.00748858; + goto D10; + +T10_2: + response = -0.00266952; + goto D10; + +N10_3: + if MIN_SCORE < 321219 then goto N10_4; + else goto N10_6; + +N10_4: + if TOPSTORY < 0.05 then goto T10_3; + else goto N10_5; + +T10_3: + response = -0.00143781; + goto D10; + +N10_5: + if AVG_RANK < 9.735 then goto T10_4; + else goto T10_5; + +T10_4: + response = 0.00571239; + goto D10; + +T10_5: + response = 0.0197833; + goto D10; + +N10_6: + if WEEKAVG < 0.93 then goto T10_6; + else goto T10_7; + +T10_6: + response = 0.00690536; + goto D10; + +T10_7: + response = 0.017048; + goto D10; + +D10: + +tnscore = tnscore + response; + +/* Tree 12 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N11_1: + if AVG_SCORE < 245333 then goto N11_2; + else goto N11_3; + +N11_2: + if TOPSTORY < 0.355 then goto T11_1; + else goto T11_2; + +T11_1: + response = -0.00376047; + goto D11; + +T11_2: + response = 0.00963479; + goto D11; + +N11_3: + if WEEKAVG < 0.93 then goto T11_3; + else goto N11_4; + +T11_3: + response = 0.000735298; + goto D11; + +N11_4: + if ISABSTRACT_AVG < 0.705 then goto N11_5; + else goto T11_7; + +N11_5: + if TWO_DAY_WF < 0.872534 then goto N11_6; + else goto T11_6; + +N11_6: + if MAX_MIN_SCORE < 52145.2 then goto T11_4; + else goto T11_5; + +T11_4: + response = 0.00973324; + goto D11; + +T11_5: + response = -0.00849394; + goto D11; + +T11_6: + response = 0.0177153; + goto D11; + +T11_7: + response = 0.000337073; + goto D11; + +D11: + +tnscore = tnscore + response; + +/* Tree 13 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N12_1: + if MIN_SCORE < 219800 then goto T12_1; + else goto N12_2; + +T12_1: + response = -0.00352861; + goto D12; + +N12_2: + if ISTITLE_AVG < 0.73 then goto N12_3; + else goto N12_6; + +N12_3: + if BUSINESS < 0.05 then goto N12_4; + else goto T12_5; + +N12_4: + if PREV_DAY_HITS < 7.5 then goto N12_5; + else goto T12_4; + +N12_5: + if PUB_TODAY_AVG < 0.11 then goto T12_2; + else goto T12_3; + +T12_2: + response = 0.00174614; + goto D12; + +T12_3: + response = 0.0120552; + goto D12; + +T12_4: + response = 0.0184693; + goto D12; + +T12_5: + response = 0.00396387; + goto D12; + +N12_6: + if DAY_LW_DAY_HITS_DERIV < 7.5 then goto T12_6; + else goto T12_7; + +T12_6: + response = -0.00165207; + goto D12; + +T12_7: + response = 0.00838864; + goto D12; + +D12: + +tnscore = tnscore + response; + +/* Tree 14 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N13_1: + if AVG_SCORE < 332830 then goto N13_2; + else goto T13_7; + +N13_2: + if AVG_SCORE < 221523 then goto N13_3; + else goto N13_5; + +N13_3: + if ISABSTRACT_AVG < 0.845 then goto T13_1; + else goto N13_4; + +T13_1: + response = -0.00518819; + goto D13; + +N13_4: + if MAX_MIN_SCORE < 41116.5 then goto T13_2; + else goto T13_3; + +T13_2: + response = -0.0022638; + goto D13; + +T13_3: + response = 0.010561; + goto D13; + +N13_5: + if MIN_SCORE < 275020 then goto N13_6; + else goto T13_6; + +N13_6: + if ISABSTRACT_AVG < 0.13 then goto T13_4; + else goto T13_5; + +T13_4: + response = 0.00532774; + goto D13; + +T13_5: + response = -0.00140785; + goto D13; + +T13_6: + response = -0.00673306; + goto D13; + +T13_7: + response = 0.0089962; + goto D13; + +D13: + +tnscore = tnscore + response; + +/* Tree 15 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N14_1: + if AVG_SCORE < 230217 then goto T14_1; + else goto N14_2; + +T14_1: + response = -0.00305127; + goto D14; + +N14_2: + if WEEKAVG < 0.93 then goto T14_2; + else goto N14_3; + +T14_2: + response = 0.000779225; + goto D14; + +N14_3: + if BUSINESS < 0.05 then goto N14_4; + else goto N14_6; + +N14_4: + if LOCALNEWS < 0.47 then goto N14_5; + else goto T14_5; + +N14_5: + if MAX_MIN_SCORE < 28240.8 then goto T14_3; + else goto T14_4; + +T14_3: + response = 0.0179967; + goto D14; + +T14_4: + response = 0.00806848; + goto D14; + +T14_5: + response = -0.00831202; + goto D14; + +N14_6: + if AVG_SCORE < 340223 then goto T14_6; + else goto T14_7; + +T14_6: + response = 0.000231883; + goto D14; + +T14_7: + response = 0.0140119; + goto D14; + +D14: + +tnscore = tnscore + response; + +/* Tree 16 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N15_1: + if AVG_SCORE < 245333 then goto T15_1; + else goto N15_2; + +T15_1: + response = -0.00229594; + goto D15; + +N15_2: + if ISABSTRACT_AVG < 0.315 then goto N15_3; + else goto T15_7; + +N15_3: + if LOCALNEWS < 0.05 then goto N15_4; + else goto N15_5; + +N15_4: + if DAY_LW_DAY_HITS_RATIO < 1.75 then goto T15_2; + else goto T15_3; + +T15_2: + response = -0.0039683; + goto D15; + +T15_3: + response = 0.00569577; + goto D15; + +N15_5: + if PREV_DAY_HITS < 8.5 then goto T15_4; + else goto N15_6; + +T15_4: + response = 0.007769; + goto D15; + +N15_6: + if DAY_HITS < 2.5 then goto T15_5; + else goto T15_6; + +T15_5: + response = 0.0344185; + goto D15; + +T15_6: + response = 0.0117709; + goto D15; + +T15_7: + response = -0.000676423; + goto D15; + +D15: + +tnscore = tnscore + response; + +/* Tree 17 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N16_1: + if MAX_SCORE < 249988 then goto N16_2; + else goto N16_5; + +N16_2: + if INTLNEWS < 0.105 then goto T16_1; + else goto N16_3; + +T16_1: + response = -0.00430418; + goto D16; + +N16_3: + if AVG_SCORE < 158414 then goto T16_2; + else goto N16_4; + +T16_2: + response = -0.00416318; + goto D16; + +N16_4: + if MAX_SCORE < 242790 then goto T16_3; + else goto T16_4; + +T16_3: + response = 0.00543383; + goto D16; + +T16_4: + response = -0.00915253; + goto D16; + +N16_5: + if ISABSTRACT_AVG < 0.155 then goto T16_5; + else goto N16_6; + +T16_5: + response = 0.00464962; + goto D16; + +N16_6: + if SPORTS < 0.365 then goto T16_6; + else goto T16_7; + +T16_6: + response = -0.00278462; + goto D16; + +T16_7: + response = 0.0111898; + goto D16; + +D16: + +tnscore = tnscore + response; + +/* Tree 18 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N17_1: + if AVG_SCORE < 249330 then goto T17_1; + else goto N17_2; + +T17_1: + response = -0.00159136; + goto D17; + +N17_2: + if WEEKAVG < 1.07 then goto N17_3; + else goto N17_4; + +N17_3: + if TOPSTORY < 0.07 then goto T17_2; + else goto T17_3; + +T17_2: + response = -0.00243507; + goto D17; + +T17_3: + response = 0.00585214; + goto D17; + +N17_4: + if TWO_DAY_WF < 0.9518 then goto N17_5; + else goto T17_7; + +N17_5: + if EIGHT_HOUR_WF < 0.108586 then goto N17_6; + else goto T17_6; + +N17_6: + if INTLNEWS < 0.42 then goto T17_4; + else goto T17_5; + +T17_4: + response = 0.00435459; + goto D17; + +T17_5: + response = 0.0191599; + goto D17; + +T17_6: + response = -0.00770634; + goto D17; + +T17_7: + response = 0.013571; + goto D17; + +D17: + +tnscore = tnscore + response; + +/* Tree 19 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N18_1: + if AVG_SCORE < 332253 then goto N18_2; + else goto N18_4; + +N18_2: + if TOPSTORY < 0.355 then goto N18_3; + else goto T18_3; + +N18_3: + if BUSINESS < 0.05 then goto T18_1; + else goto T18_2; + +T18_1: + response = 0.000641958; + goto D18; + +T18_2: + response = -0.00274201; + goto D18; + +T18_3: + response = 0.00886024; + goto D18; + +N18_4: + if DAY_PD_HITS_DERIV < 1.5 then goto N18_5; + else goto T18_7; + +N18_5: + if AVG_SCORE < 336554 then goto T18_4; + else goto N18_6; + +T18_4: + response = 0.0191918; + goto D18; + +N18_6: + if SUPERDUPER_AVG < 0.415 then goto T18_5; + else goto T18_6; + +T18_5: + response = -0.00116436; + goto D18; + +T18_6: + response = 0.0183934; + goto D18; + +T18_7: + response = 0.0116471; + goto D18; + +D18: + +tnscore = tnscore + response; + +/* Tree 20 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N19_1: + if MAX_SCORE < 249072 then goto N19_2; + else goto N19_6; + +N19_2: + if INTLNEWS < 0.185 then goto T19_1; + else goto N19_3; + +T19_1: + response = -0.00383726; + goto D19; + +N19_3: + if ISABSTRACT_AVG < 0.61 then goto T19_2; + else goto N19_4; + +T19_2: + response = -0.00202529; + goto D19; + +N19_4: + if WEEKAVG < 0.785 then goto T19_3; + else goto N19_5; + +T19_3: + response = -0.0038571; + goto D19; + +N19_5: + if AVG_SCORE < 169471 then goto T19_4; + else goto T19_5; + +T19_4: + response = 0.00474293; + goto D19; + +T19_5: + response = 0.0278332; + goto D19; + +N19_6: + if BUSINESS < 0.05 then goto T19_6; + else goto T19_7; + +T19_6: + response = 0.00491784; + goto D19; + +T19_7: + response = -0.000728088; + goto D19; + +D19: + +tnscore = tnscore + response; + +/* Tree 21 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N20_1: + if AVG_SCORE < 223608 then goto T20_1; + else goto N20_2; + +T20_1: + response = -0.00242896; + goto D20; + +N20_2: + if PREV_DAY_HITS < 7.5 then goto N20_3; + else goto N20_5; + +N20_3: + if ISABSTRACT_AVG < 0.05 then goto N20_4; + else goto T20_4; + +N20_4: + if MAX_MIN_RANK < 7 then goto T20_2; + else goto T20_3; + +T20_2: + response = 0.00151785; + goto D20; + +T20_3: + response = 0.0118374; + goto D20; + +T20_4: + response = -0.00165444; + goto D20; + +N20_5: + if SPORTS < 0.34 then goto N20_6; + else goto T20_7; + +N20_6: + if DAY_WEEK_AVG_DERIV < -1.93 then goto T20_5; + else goto T20_6; + +T20_5: + response = -0.00307953; + goto D20; + +T20_6: + response = 0.00717407; + goto D20; + +T20_7: + response = 0.0154963; + goto D20; + +D20: + +tnscore = tnscore + response; + +/* Tree 22 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N21_1: + if MIN_SCORE < 319241 then goto N21_2; + else goto N21_6; + +N21_2: + if INTLNEWS < 0.73 then goto N21_3; + else goto T21_5; + +N21_3: + if TOPSTORY < 0.355 then goto N21_4; + else goto T21_4; + +N21_4: + if NUM_WORDS < 2.5 then goto N21_5; + else goto T21_3; + +N21_5: + if PREV_DAY_HITS < 3.5 then goto T21_1; + else goto T21_2; + +T21_1: + response = -0.00228523; + goto D21; + +T21_2: + response = 0.00146239; + goto D21; + +T21_3: + response = -0.00850081; + goto D21; + +T21_4: + response = 0.00776825; + goto D21; + +T21_5: + response = 0.0160753; + goto D21; + +N21_6: + if DAY_LW_DAY_HITS_DERIV < 12.5 then goto T21_6; + else goto T21_7; + +T21_6: + response = 0.00439757; + goto D21; + +T21_7: + response = 0.0197836; + goto D21; + +D21: + +tnscore = tnscore + response; + +/* Tree 23 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N22_1: + if INTLNEWS < 0.705 then goto N22_2; + else goto T22_7; + +N22_2: + if TOPSTORY < 0.355 then goto N22_3; + else goto T22_6; + +N22_3: + if MIN_SCORE < 323992 then goto N22_4; + else goto N22_6; + +N22_4: + if LIFESTYLE < 0.13 then goto N22_5; + else goto T22_3; + +N22_5: + if LOCALNEWS < 0.315 then goto T22_1; + else goto T22_2; + +T22_1: + response = -0.000291455; + goto D22; + +T22_2: + response = -0.00459663; + goto D22; + +T22_3: + response = -0.00868291; + goto D22; + +N22_6: + if MIN_SCORE < 325835 then goto T22_4; + else goto T22_5; + +T22_4: + response = 0.0255955; + goto D22; + +T22_5: + response = 0.00222024; + goto D22; + +T22_6: + response = 0.00689548; + goto D22; + +T22_7: + response = 0.0147049; + goto D22; + +D22: + +tnscore = tnscore + response; + +/* Tree 24 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N23_1: + if DAY_WEEK_AVG_DERIV < 41.5 then goto N23_2; + else goto T23_7; + +N23_2: + if AVG_SCORE < 222620 then goto T23_1; + else goto N23_3; + +T23_1: + response = -0.00230434; + goto D23; + +N23_3: + if ISTITLE_AVG < 0.95 then goto N23_4; + else goto T23_6; + +N23_4: + if DAY_LW_DAY_HITS_RATIO < 2.9 then goto N23_5; + else goto N23_6; + +N23_5: + if ISABSTRACT_AVG < 0.685 then goto T23_2; + else goto T23_3; + +T23_2: + response = -0.00098145; + goto D23; + +T23_3: + response = 0.0175646; + goto D23; + +N23_6: + if FOUR_HOUR_WF < 0.0415469 then goto T23_4; + else goto T23_5; + +T23_4: + response = 0.00693887; + goto D23; + +T23_5: + response = 0.000352143; + goto D23; + +T23_6: + response = -0.00149738; + goto D23; + +T23_7: + response = 0.0156711; + goto D23; + +D23: + +tnscore = tnscore + response; + +/* Tree 25 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N24_1: + if BUSINESS < 0.105 then goto N24_2; + else goto T24_7; + +N24_2: + if DAY_WEEK_AVG_RATIO < 5.705 then goto N24_3; + else goto T24_6; + +N24_3: + if AVG_SCORE < 155902 then goto T24_1; + else goto N24_4; + +T24_1: + response = -0.0033031; + goto D24; + +N24_4: + if WEEKAVG < 0.64 then goto N24_5; + else goto T24_5; + +N24_5: + if MAX_SCORE < 363895 then goto T24_2; + else goto N24_6; + +T24_2: + response = -0.00281287; + goto D24; + +N24_6: + if MAX_MIN_SCORE < 19200.5 then goto T24_3; + else goto T24_4; + +T24_3: + response = -0.00201482; + goto D24; + +T24_4: + response = 0.0209412; + goto D24; + +T24_5: + response = 0.00313704; + goto D24; + +T24_6: + response = 0.0198315; + goto D24; + +T24_7: + response = -0.0020926; + goto D24; + +D24: + +tnscore = tnscore + response; + +/* Tree 26 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N25_1: + if DAY_PD_HITS_DERIV < -8.5 then goto N25_2; + else goto N25_4; + +N25_2: + if SPORTS < 0.42 then goto N25_3; + else goto T25_3; + +N25_3: + if TOPSTORY < 0.05 then goto T25_1; + else goto T25_2; + +T25_1: + response = -0.00256178; + goto D25; + +T25_2: + response = 0.0069554; + goto D25; + +T25_3: + response = 0.0189865; + goto D25; + +N25_4: + if MAX_SCORE < 455757 then goto N25_5; + else goto T25_7; + +N25_5: + if LIFESTYLE < 0.13 then goto N25_6; + else goto T25_6; + +N25_6: + if DAY_WEEK_AVG_RATIO < 4.535 then goto T25_4; + else goto T25_5; + +T25_4: + response = -0.00125806; + goto D25; + +T25_5: + response = 0.00573954; + goto D25; + +T25_6: + response = -0.00869664; + goto D25; + +T25_7: + response = 0.00982766; + goto D25; + +D25: + +tnscore = tnscore + response; + +/* Tree 27 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N26_1: + if AVG_SCORE < 158740 then goto T26_1; + else goto N26_2; + +T26_1: + response = -0.00306382; + goto D26; + +N26_2: + if WEEKAVG < 0.93 then goto N26_3; + else goto N26_4; + +N26_3: + if TOPSTORY < 0.365 then goto T26_2; + else goto T26_3; + +T26_2: + response = -0.00140654; + goto D26; + +T26_3: + response = 0.00834836; + goto D26; + +N26_4: + if BUSINESS < 0.05 then goto N26_5; + else goto T26_7; + +N26_5: + if MAX_MIN_SCORE < 52064.2 then goto N26_6; + else goto T26_6; + +N26_6: + if MAX_MIN_RANK < 7 then goto T26_4; + else goto T26_5; + +T26_4: + response = 0.00487329; + goto D26; + +T26_5: + response = 0.0143334; + goto D26; + +T26_6: + response = -0.00637212; + goto D26; + +T26_7: + response = -0.000162153; + goto D26; + +D26: + +tnscore = tnscore + response; + +/* Tree 28 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N27_1: + if DAY_PD_HITS_DERIV < -4.5 then goto N27_2; + else goto N27_6; + +N27_2: + if LOCALNEWS < 0.355 then goto N27_3; + else goto T27_5; + +N27_3: + if HEALTH < 0.05 then goto N27_4; + else goto T27_4; + +N27_4: + if MAX_MIN_SCORE < 42320.2 then goto T27_1; + else goto N27_5; + +T27_1: + response = 0.000165828; + goto D27; + +N27_5: + if TWELVE_HOUR_WF < 0.0923295 then goto T27_2; + else goto T27_3; + +T27_2: + response = 0.00978237; + goto D27; + +T27_3: + response = -0.00925785; + goto D27; + +T27_4: + response = 0.0176032; + goto D27; + +T27_5: + response = -0.00980315; + goto D27; + +N27_6: + if DAY_WEEK_AVG_DERIV < 65.215 then goto T27_6; + else goto T27_7; + +T27_6: + response = -0.000940015; + goto D27; + +T27_7: + response = 0.0153051; + goto D27; + +D27: + +tnscore = tnscore + response; + +/* Tree 29 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N28_1: + if INTLNEWS < 0.53 then goto N28_2; + else goto N28_6; + +N28_2: + if DAY_PD_HITS_RATIO < 0.305 then goto N28_3; + else goto T28_5; + +N28_3: + if SPORTS < 0.115 then goto T28_1; + else goto N28_4; + +T28_1: + response = 0.000204707; + goto D28; + +N28_4: + if MAX_SCORE < 258205 then goto T28_2; + else goto N28_5; + +T28_2: + response = 0.00170055; + goto D28; + +N28_5: + if AVG_SCORE < 263393 then goto T28_3; + else goto T28_4; + +T28_3: + response = 0.0247726; + goto D28; + +T28_4: + response = 0.00690842; + goto D28; + +T28_5: + response = -0.00116708; + goto D28; + +N28_6: + if DAY_LW_DAY_HITS_DERIV < 6.5 then goto T28_6; + else goto T28_7; + +T28_6: + response = -5.66203e-05; + goto D28; + +T28_7: + response = 0.0136829; + goto D28; + +D28: + +tnscore = tnscore + response; + +/* Tree 30 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N29_1: + if TOPSTORY < 0.355 then goto N29_2; + else goto N29_6; + +N29_2: + if MAX_SCORE < 455757 then goto N29_3; + else goto N29_5; + +N29_3: + if PREV_DAY_HITS < 59.5 then goto N29_4; + else goto T29_3; + +N29_4: + if MIN_SCORE < 132399 then goto T29_1; + else goto T29_2; + +T29_1: + response = -0.00370024; + goto D29; + +T29_2: + response = -2.34946e-05; + goto D29; + +T29_3: + response = 0.0131047; + goto D29; + +N29_5: + if SUPERDUPER_AVG < 0.105 then goto T29_4; + else goto T29_5; + +T29_4: + response = -0.00138025; + goto D29; + +T29_5: + response = 0.0159936; + goto D29; + +N29_6: + if AVG_RANK < 9.55 then goto T29_6; + else goto T29_7; + +T29_6: + response = 0.00325951; + goto D29; + +T29_7: + response = 0.0248619; + goto D29; + +D29: + +tnscore = tnscore + response; + +/* Tree 31 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N30_1: + if TOPSTORY < 0.21 then goto N30_2; + else goto N30_5; + +N30_2: + if PREV_DAY_HITS < 40.5 then goto N30_3; + else goto T30_4; + +N30_3: + if DAY_WEEK_AVG_RATIO < 2.665 then goto T30_1; + else goto N30_4; + +T30_1: + response = -0.00132885; + goto D30; + +N30_4: + if AVG_SCORE < 321396 then goto T30_2; + else goto T30_3; + +T30_2: + response = 0.000375419; + goto D30; + +T30_3: + response = 0.0087578; + goto D30; + +T30_4: + response = 0.0103933; + goto D30; + +N30_5: + if MAX_SCORE < 258688 then goto T30_5; + else goto N30_6; + +T30_5: + response = -0.00128842; + goto D30; + +N30_6: + if DAY_LW_DAY_HITS_RATIO < 10.5 then goto T30_6; + else goto T30_7; + +T30_6: + response = 0.00789361; + goto D30; + +T30_7: + response = -0.00472212; + goto D30; + +D30: + +tnscore = tnscore + response; + +/* Tree 32 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N31_1: + if LIFESTYLE < 0.13 then goto N31_2; + else goto T31_7; + +N31_2: + if MAX_SCORE < 170767 then goto T31_1; + else goto N31_3; + +T31_1: + response = -0.00265193; + goto D31; + +N31_3: + if REGIONALNEWS < 0.225 then goto N31_4; + else goto T31_6; + +N31_4: + if INTLNEWS < 0.73 then goto N31_5; + else goto T31_5; + +N31_5: + if AVG_SCORE < 446461 then goto N31_6; + else goto T31_4; + +N31_6: + if DAY_LW_DAY_HITS_RATIO < 11.835 then goto T31_2; + else goto T31_3; + +T31_2: + response = 0.000302165; + goto D31; + +T31_3: + response = 0.00420729; + goto D31; + +T31_4: + response = 0.0104384; + goto D31; + +T31_5: + response = 0.0112014; + goto D31; + +T31_6: + response = -0.0150576; + goto D31; + +T31_7: + response = -0.00724807; + goto D31; + +D31: + +tnscore = tnscore + response; + +/* Tree 33 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N32_1: + if TOPSTORY < 0.21 then goto N32_2; + else goto N32_3; + +N32_2: + if LW_DAY_HITS < 0.5 then goto T32_1; + else goto T32_2; + +T32_1: + response = -0.000272826; + goto D32; + +T32_2: + response = -0.0037519; + goto D32; + +N32_3: + if MAX_SCORE < 249540 then goto T32_3; + else goto N32_4; + +T32_3: + response = -0.00257574; + goto D32; + +N32_4: + if DAY_WEEK_AVG_DERIV < 3.285 then goto T32_4; + else goto N32_5; + +T32_4: + response = 0.00890149; + goto D32; + +N32_5: + if BUSINESS < 0.05 then goto N32_6; + else goto T32_7; + +N32_6: + if EIGHT_HOUR_WF < 0.108586 then goto T32_5; + else goto T32_6; + +T32_5: + response = -0.00485603; + goto D32; + +T32_6: + response = 0.0137625; + goto D32; + +T32_7: + response = -0.0117843; + goto D32; + +D32: + +tnscore = tnscore + response; + +/* Tree 34 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N33_1: + if BUSINESS < 0.05 then goto N33_2; + else goto N33_4; + +N33_2: + if PREV_DAY_HITS < 2.5 then goto N33_3; + else goto T33_3; + +N33_3: + if MAX_MIN_RANK < 9 then goto T33_1; + else goto T33_2; + +T33_1: + response = -0.000115002; + goto D33; + +T33_2: + response = 0.013627; + goto D33; + +T33_3: + response = 0.00426589; + goto D33; + +N33_4: + if SPORTS < 0.05 then goto N33_5; + else goto N33_6; + +N33_5: + if WEEKAVG < 1.07 then goto T33_4; + else goto T33_5; + +T33_4: + response = -0.00209775; + goto D33; + +T33_5: + response = 0.00207151; + goto D33; + +N33_6: + if MAX_SCORE < 282458 then goto T33_6; + else goto T33_7; + +T33_6: + response = -0.00363773; + goto D33; + +T33_7: + response = -0.0170095; + goto D33; + +D33: + +tnscore = tnscore + response; + +/* Tree 35 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N34_1: + if PREV_DAY_HITS < 6.5 then goto N34_2; + else goto N34_3; + +N34_2: + if INTLNEWS < 0.73 then goto T34_1; + else goto T34_2; + +T34_1: + response = -0.000840229; + goto D34; + +T34_2: + response = 0.0123079; + goto D34; + +N34_3: + if TWO_DAY_WF < 0.647854 then goto T34_3; + else goto N34_4; + +T34_3: + response = -0.00158583; + goto D34; + +N34_4: + if DAY_WEEK_AVG_RATIO < 0.525 then goto T34_4; + else goto N34_5; + +T34_4: + response = -0.00426295; + goto D34; + +N34_5: + if TWELVE_HOUR_WF < 0.0863095 then goto T34_5; + else goto N34_6; + +T34_5: + response = 0.010427; + goto D34; + +N34_6: + if WEEKAVG < 2.5 then goto T34_6; + else goto T34_7; + +T34_6: + response = -0.00797465; + goto D34; + +T34_7: + response = 0.00511912; + goto D34; + +D34: + +tnscore = tnscore + response; + +/* Tree 36 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N35_1: + if DAY_WEEK_AVG_DERIV < 43.215 then goto N35_2; + else goto T35_7; + +N35_2: + if MAX_SCORE < 171575 then goto T35_1; + else goto N35_3; + +T35_1: + response = -0.00279218; + goto D35; + +N35_3: + if INTLNEWS < 0.73 then goto N35_4; + else goto T35_6; + +N35_4: + if LIFESTYLE < 0.13 then goto N35_5; + else goto T35_5; + +N35_5: + if AVG_RANK < 5.29 then goto N35_6; + else goto T35_4; + +N35_6: + if AVG_RANK < 4.145 then goto T35_2; + else goto T35_3; + +T35_2: + response = -0.00292507; + goto D35; + +T35_3: + response = 0.0109271; + goto D35; + +T35_4: + response = 0.000253288; + goto D35; + +T35_5: + response = -0.00584756; + goto D35; + +T35_6: + response = 0.0132182; + goto D35; + +T35_7: + response = 0.00991648; + goto D35; + +D35: + +tnscore = tnscore + response; + +/* Tree 37 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N36_1: + if DAY_WEEK_AVG_RATIO < 0.305 then goto T36_1; + else goto N36_2; + +T36_1: + response = -0.00885189; + goto D36; + +N36_2: + if NATIONALNEWS < 0.105 then goto T36_2; + else goto N36_3; + +T36_2: + response = -0.000531735; + goto D36; + +N36_3: + if TWELVE_HOUR_WF < 0.685185 then goto N36_4; + else goto T36_7; + +N36_4: + if SPORTS < 0.465 then goto N36_5; + else goto T36_6; + +N36_5: + if ISTITLE_AVG < 0.105 then goto N36_6; + else goto T36_5; + +N36_6: + if DAY_PD_HITS_RATIO < 0.365 then goto T36_3; + else goto T36_4; + +T36_3: + response = -0.00269593; + goto D36; + +T36_4: + response = 0.0112221; + goto D36; + +T36_5: + response = 0.000819631; + goto D36; + +T36_6: + response = 0.0143634; + goto D36; + +T36_7: + response = -0.00725209; + goto D36; + +D36: + +tnscore = tnscore + response; + +/* Tree 38 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N37_1: + if AVG_SCORE < 340384 then goto N37_2; + else goto N37_3; + +N37_2: + if AVG_SCORE < 336375 then goto T37_1; + else goto T37_2; + +T37_1: + response = -0.000508552; + goto D37; + +T37_2: + response = -0.0113797; + goto D37; + +N37_3: + if MIN_SCORE < 326287 then goto T37_3; + else goto N37_4; + +T37_3: + response = 0.0209452; + goto D37; + +N37_4: + if ONE_DAY_WF < 0.567629 then goto T37_4; + else goto N37_5; + +T37_4: + response = -0.00150548; + goto D37; + +N37_5: + if ENTERTAINMENT < 0.05 then goto N37_6; + else goto T37_7; + +N37_6: + if AVG_SCORE < 375038 then goto T37_5; + else goto T37_6; + +T37_5: + response = 0.0210937; + goto D37; + +T37_6: + response = 0.00330692; + goto D37; + +T37_7: + response = -0.00420613; + goto D37; + +D37: + +tnscore = tnscore + response; + +/* Tree 39 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N38_1: + if AVG_SCORE < 245150 then goto T38_1; + else goto N38_2; + +T38_1: + response = -0.000756654; + goto D38; + +N38_2: + if FOUR_HOUR_WF < 0.436508 then goto N38_3; + else goto T38_7; + +N38_3: + if TWO_DAY_WF < 0.876894 then goto N38_4; + else goto N38_5; + +N38_4: + if DAY_HITS < 4.5 then goto T38_2; + else goto T38_3; + +T38_2: + response = 0.00184962; + goto D38; + +T38_3: + response = -0.00446764; + goto D38; + +N38_5: + if AVG_SCORE < 247846 then goto T38_4; + else goto N38_6; + +T38_4: + response = 0.0195391; + goto D38; + +N38_6: + if MAX_SCORE < 264008 then goto T38_5; + else goto T38_6; + +T38_5: + response = -0.0119329; + goto D38; + +T38_6: + response = 0.0045953; + goto D38; + +T38_7: + response = -0.0101769; + goto D38; + +D38: + +tnscore = tnscore + response; + +/* Tree 40 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N39_1: + if TOPSTORY < 0.39 then goto N39_2; + else goto N39_4; + +N39_2: + if SPORTS < 0.73 then goto T39_1; + else goto N39_3; + +T39_1: + response = -0.000167518; + goto D39; + +N39_3: + if WEEKAVG < 0.785 then goto T39_2; + else goto T39_3; + +T39_2: + response = 9.19437e-05; + goto D39; + +T39_3: + response = 0.00941928; + goto D39; + +N39_4: + if AVG_RANK < 9.55 then goto N39_5; + else goto T39_7; + +N39_5: + if AVG_RANK < 8.9 then goto N39_6; + else goto T39_6; + +N39_6: + if MAX_SCORE < 270912 then goto T39_4; + else goto T39_5; + +T39_4: + response = 0.0142439; + goto D39; + +T39_5: + response = -0.000762818; + goto D39; + +T39_6: + response = -0.00563315; + goto D39; + +T39_7: + response = 0.019371; + goto D39; + +D39: + +tnscore = tnscore + response; + +/* Tree 41 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N40_1: + if MAX_MIN_SCORE < 16050.8 then goto T40_1; + else goto N40_2; + +T40_1: + response = -0.00187676; + goto D40; + +N40_2: + if LW_DAY_HITS < 3.5 then goto N40_3; + else goto T40_7; + +N40_3: + if MAX_SCORE < 178349 then goto T40_2; + else goto N40_4; + +T40_2: + response = -0.00168833; + goto D40; + +N40_4: + if WEEKAVG < 0.93 then goto T40_3; + else goto N40_5; + +T40_3: + response = -9.59413e-05; + goto D40; + +N40_5: + if FOUR_HOUR_WF < 0.0415469 then goto T40_4; + else goto N40_6; + +T40_4: + response = 0.00437212; + goto D40; + +N40_6: + if MAX_MIN_SCORE < 26173.2 then goto T40_5; + else goto T40_6; + +T40_5: + response = 0.013711; + goto D40; + +T40_6: + response = -0.00373247; + goto D40; + +T40_7: + response = -0.00746144; + goto D40; + +D40: + +tnscore = tnscore + response; + +/* Tree 42 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N41_1: + if SPORTS < 0.73 then goto N41_2; + else goto N41_5; + +N41_2: + if INTLNEWS < 0.53 then goto T41_1; + else goto N41_3; + +T41_1: + response = -0.000662401; + goto D41; + +N41_3: + if TWO_DAY_WF < 0.564784 then goto T41_2; + else goto N41_4; + +T41_2: + response = 0.0155579; + goto D41; + +N41_4: + if DAY_WEEK_AVG_RATIO < 4.08 then goto T41_3; + else goto T41_4; + +T41_3: + response = -0.00298146; + goto D41; + +T41_4: + response = 0.015513; + goto D41; + +N41_5: + if DAY_PD_HITS_RATIO < 0.31 then goto T41_5; + else goto N41_6; + +T41_5: + response = 0.0153445; + goto D41; + +N41_6: + if SUPERDUPER_AVG < 0.155 then goto T41_6; + else goto T41_7; + +T41_6: + response = 0.00486013; + goto D41; + +T41_7: + response = -0.00508073; + goto D41; + +D41: + +tnscore = tnscore + response; + +/* Tree 43 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N42_1: + if MAX_MIN_RANK < 7 then goto T42_1; + else goto N42_2; + +T42_1: + response = -0.000214923; + goto D42; + +N42_2: + if ISTITLE_AVG < 0.55 then goto N42_3; + else goto N42_5; + +N42_3: + if MAX_MIN_SCORE < 41838 then goto N42_4; + else goto T42_4; + +N42_4: + if MAX_SCORE < 235701 then goto T42_2; + else goto T42_3; + +T42_2: + response = 0.00139705; + goto D42; + +T42_3: + response = 0.0257242; + goto D42; + +T42_4: + response = 0.00119704; + goto D42; + +N42_5: + if NATIONALNEWS < 0.115 then goto T42_5; + else goto N42_6; + +T42_5: + response = -0.00295678; + goto D42; + +N42_6: + if MIN_SCORE < 211652 then goto T42_6; + else goto T42_7; + +T42_6: + response = 0.00049411; + goto D42; + +T42_7: + response = 0.0246476; + goto D42; + +D42: + +tnscore = tnscore + response; + +/* Tree 44 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N43_1: + if SPORTS < 0.47 then goto N43_2; + else goto N43_3; + +N43_2: + if SPORTS < 0.05 then goto T43_1; + else goto T43_2; + +T43_1: + response = 0.000269559; + goto D43; + +T43_2: + response = -0.00314174; + goto D43; + +N43_3: + if MAX_SCORE < 187840 then goto T43_3; + else goto N43_4; + +T43_3: + response = -0.00191667; + goto D43; + +N43_4: + if ISABSTRACT_AVG < 0.415 then goto N43_5; + else goto N43_6; + +N43_5: + if MAX_MIN_RANK < 5 then goto T43_4; + else goto T43_5; + +T43_4: + response = -0.00316349; + goto D43; + +T43_5: + response = 0.00674809; + goto D43; + +N43_6: + if PREV_DAY_HITS < 2.5 then goto T43_6; + else goto T43_7; + +T43_6: + response = 0.00653246; + goto D43; + +T43_7: + response = 0.0230973; + goto D43; + +D43: + +tnscore = tnscore + response; + +/* Tree 45 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N44_1: + if NATIONALNEWS < 0.105 then goto T44_1; + else goto N44_2; + +T44_1: + response = -0.000806543; + goto D44; + +N44_2: + if DAY_PD_HITS_RATIO < 6.75 then goto N44_3; + else goto N44_6; + +N44_3: + if DAY_WEEK_AVG_RATIO < 3.505 then goto N44_4; + else goto T44_5; + +N44_4: + if MIN_RANK < 1 then goto T44_2; + else goto N44_5; + +T44_2: + response = 0.0183563; + goto D44; + +N44_5: + if NATIONALNEWS < 0.13 then goto T44_3; + else goto T44_4; + +T44_3: + response = 0.0095701; + goto D44; + +T44_4: + response = 0.00111755; + goto D44; + +T44_5: + response = 0.0178329; + goto D44; + +N44_6: + if MAX_MIN_SCORE < 42048.8 then goto T44_6; + else goto T44_7; + +T44_6: + response = 0.00161585; + goto D44; + +T44_7: + response = -0.0115306; + goto D44; + +D44: + +tnscore = tnscore + response; + +/* Tree 46 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N45_1: + if DAY_LW_DAY_HITS_RATIO < 2.225 then goto N45_2; + else goto N45_4; + +N45_2: + if ONE_DAY_WF < 0.370833 then goto N45_3; + else goto T45_3; + +N45_3: + if ENTERTAINMENT < 0.415 then goto T45_1; + else goto T45_2; + +T45_1: + response = -0.00110057; + goto D45; + +T45_2: + response = 0.00599021; + goto D45; + +T45_3: + response = -0.0100266; + goto D45; + +N45_4: + if LOCALNEWS < 0.645 then goto N45_5; + else goto N45_6; + +N45_5: + if FOUR_HOUR_WF < 0.0402559 then goto T45_4; + else goto T45_5; + +T45_4: + response = 0.00156752; + goto D45; + +T45_5: + response = -0.00187101; + goto D45; + +N45_6: + if TWELVE_HOUR_WF < 0.183333 then goto T45_6; + else goto T45_7; + +T45_6: + response = -0.000756081; + goto D45; + +T45_7: + response = 0.0221542; + goto D45; + +D45: + +tnscore = tnscore + response; + +/* Tree 47 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N46_1: + if DAY_WEEK_AVG_DERIV < -0.5 then goto N46_2; + else goto N46_6; + +N46_2: + if TOPSTORY < 0.185 then goto N46_3; + else goto T46_5; + +N46_3: + if SPORTS < 0.775 then goto N46_4; + else goto T46_4; + +N46_4: + if INTLNEWS < 0.435 then goto T46_1; + else goto N46_5; + +T46_1: + response = -0.000858072; + goto D46; + +N46_5: + if TWO_DAY_WF < 0.607692 then goto T46_2; + else goto T46_3; + +T46_2: + response = 0.0133903; + goto D46; + +T46_3: + response = -0.000714716; + goto D46; + +T46_4: + response = 0.00949831; + goto D46; + +T46_5: + response = 0.00678994; + goto D46; + +N46_6: + if LIFESTYLE < 0.13 then goto T46_6; + else goto T46_7; + +T46_6: + response = -0.00020835; + goto D46; + +T46_7: + response = -0.00685168; + goto D46; + +D46: + +tnscore = tnscore + response; + +/* Tree 48 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N47_1: + if MAX_MIN_RANK < 3 then goto N47_2; + else goto N47_6; + +N47_2: + if TOPSTORY < 0.365 then goto N47_3; + else goto T47_5; + +N47_3: + if INTLNEWS < 0.585 then goto N47_4; + else goto T47_4; + +N47_4: + if DAY_WEEK_AVG_DERIV < 3.36 then goto T47_1; + else goto N47_5; + +T47_1: + response = 0.00152356; + goto D47; + +N47_5: + if MAX_MIN_SCORE < 41565 then goto T47_2; + else goto T47_3; + +T47_2: + response = -0.00224511; + goto D47; + +T47_3: + response = -0.0154069; + goto D47; + +T47_4: + response = 0.00771362; + goto D47; + +T47_5: + response = 0.0141704; + goto D47; + +N47_6: + if POLITICS < 0.27 then goto T47_6; + else goto T47_7; + +T47_6: + response = -0.000741127; + goto D47; + +T47_7: + response = 0.0123356; + goto D47; + +D47: + +tnscore = tnscore + response; + +/* Tree 49 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N48_1: + if REGIONALNEWS < 0.21 then goto N48_2; + else goto T48_7; + +N48_2: + if INTLNEWS < 0.415 then goto N48_3; + else goto N48_5; + +N48_3: + if NATIONALNEWS < 0.105 then goto T48_1; + else goto N48_4; + +T48_1: + response = -0.000772108; + goto D48; + +N48_4: + if TWO_DAY_WF < 0.587963 then goto T48_2; + else goto T48_3; + +T48_2: + response = -0.00126111; + goto D48; + +T48_3: + response = 0.00367856; + goto D48; + +N48_5: + if MAX_MIN_SCORE < 41771 then goto T48_4; + else goto N48_6; + +T48_4: + response = 0.000662317; + goto D48; + +N48_6: + if MAX_MIN_SCORE < 45378.8 then goto T48_5; + else goto T48_6; + +T48_5: + response = 0.0229089; + goto D48; + +T48_6: + response = 0.00361364; + goto D48; + +T48_7: + response = -0.0100665; + goto D48; + +D48: + +tnscore = tnscore + response; + +/* Tree 50 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N49_1: + if MAX_MIN_SCORE < 46045.5 then goto N49_2; + else goto N49_6; + +N49_2: + if ISTITLE_AVG < 0.415 then goto N49_3; + else goto N49_5; + +N49_3: + if ONE_DAY_WF < 0.0658009 then goto T49_1; + else goto N49_4; + +T49_1: + response = -0.00148948; + goto D49; + +N49_4: + if MIN_SCORE < 226178 then goto T49_2; + else goto T49_3; + +T49_2: + response = -0.000291172; + goto D49; + +T49_3: + response = 0.00500722; + goto D49; + +N49_5: + if MAX_MIN_SCORE < 45527.5 then goto T49_4; + else goto T49_5; + +T49_4: + response = -0.000953746; + goto D49; + +T49_5: + response = 0.00916764; + goto D49; + +N49_6: + if EIGHT_HOUR_WF < 0.0267094 then goto T49_6; + else goto T49_7; + +T49_6: + response = -0.000542017; + goto D49; + +T49_7: + response = -0.00644438; + goto D49; + +D49: + +tnscore = tnscore + response; + +/* Tree 51 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N50_1: + if DAY_PD_HITS_DERIV < -4.5 then goto N50_2; + else goto T50_7; + +N50_2: + if DAY_PD_HITS_RATIO < 0.16 then goto T50_1; + else goto N50_3; + +T50_1: + response = -0.00116809; + goto D50; + +N50_3: + if MAX_SCORE < 178149 then goto T50_2; + else goto N50_4; + +T50_2: + response = -0.00327374; + goto D50; + +N50_4: + if MIN_SCORE < 172046 then goto T50_3; + else goto N50_5; + +T50_3: + response = 0.017586; + goto D50; + +N50_5: + if MIN_SCORE < 221890 then goto T50_4; + else goto N50_6; + +T50_4: + response = -0.00260826; + goto D50; + +N50_6: + if MIN_SCORE < 227242 then goto T50_5; + else goto T50_6; + +T50_5: + response = 0.0209327; + goto D50; + +T50_6: + response = 0.00475773; + goto D50; + +T50_7: + response = -0.000492544; + goto D50; + +D50: + +tnscore = tnscore + response; + +/* Tree 52 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N51_1: + if DAY_LW_DAY_HITS_RATIO < 0.83 then goto N51_2; + else goto N51_3; + +N51_2: + if AVG_SCORE < 237778 then goto T51_1; + else goto T51_2; + +T51_1: + response = -0.00220195; + goto D51; + +T51_2: + response = -0.0171686; + goto D51; + +N51_3: + if SPORTS < 0.79 then goto N51_4; + else goto T51_7; + +N51_4: + if SPORTS < 0.05 then goto T51_3; + else goto N51_5; + +T51_3: + response = 0.000236122; + goto D51; + +N51_5: + if MAX_MIN_SCORE < 46031.8 then goto N51_6; + else goto T51_6; + +N51_6: + if MAX_MIN_SCORE < 7978 then goto T51_4; + else goto T51_5; + +T51_4: + response = -0.011323; + goto D51; + +T51_5: + response = -0.000609338; + goto D51; + +T51_6: + response = -0.0077154; + goto D51; + +T51_7: + response = 0.00365925; + goto D51; + +D51: + +tnscore = tnscore + response; + +/* Tree 53 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N52_1: + if DAY_WEEK_AVG_DERIV < -3.93 then goto T52_1; + else goto N52_2; + +T52_1: + response = 0.00918467; + goto D52; + +N52_2: + if NATIONALNEWS < 0.105 then goto T52_2; + else goto N52_3; + +T52_2: + response = -0.00058362; + goto D52; + +N52_3: + if EIGHT_HOUR_WF < 0.480769 then goto N52_4; + else goto T52_7; + +N52_4: + if ISTITLE_AVG < 0.155 then goto N52_5; + else goto T52_6; + +N52_5: + if DAY_PD_HITS_RATIO < 0.39 then goto T52_3; + else goto N52_6; + +T52_3: + response = -0.00366457; + goto D52; + +N52_6: + if DAY_HITS < 5.5 then goto T52_4; + else goto T52_5; + +T52_4: + response = 0.0162937; + goto D52; + +T52_5: + response = -0.00117921; + goto D52; + +T52_6: + response = 0.001006; + goto D52; + +T52_7: + response = -0.00659191; + goto D52; + +D52: + +tnscore = tnscore + response; + +/* Tree 54 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N53_1: + if SPORTS < 0.705 then goto T53_1; + else goto N53_2; + +T53_1: + response = -0.000556203; + goto D53; + +N53_2: + if MAX_SCORE < 165481 then goto T53_2; + else goto N53_3; + +T53_2: + response = -0.00498224; + goto D53; + +N53_3: + if SUPERDUPER_AVG < 0.315 then goto N53_4; + else goto T53_7; + +N53_4: + if DAY_WEEK_AVG_DERIV < 0.36 then goto N53_5; + else goto N53_6; + +N53_5: + if AVG_RANK < 5.73 then goto T53_3; + else goto T53_4; + +T53_3: + response = 0.00571545; + goto D53; + +T53_4: + response = 0.0237979; + goto D53; + +N53_6: + if DAY_PD_HITS_DERIV < 3.5 then goto T53_5; + else goto T53_6; + +T53_5: + response = -0.000591932; + goto D53; + +T53_6: + response = 0.011012; + goto D53; + +T53_7: + response = -0.00679759; + goto D53; + +D53: + +tnscore = tnscore + response; + +/* Tree 55 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N54_1: + if REGIONALNEWS < 0.21 then goto N54_2; + else goto T54_7; + +N54_2: + if MISC < 0.105 then goto N54_3; + else goto T54_6; + +N54_3: + if TWO_DAY_WF < 0.492284 then goto N54_4; + else goto T54_5; + +N54_4: + if FOUR_HOUR_WF < 0.00462963 then goto N54_5; + else goto N54_6; + +N54_5: + if MAX_MIN_SCORE < 37344.8 then goto T54_1; + else goto T54_2; + +T54_1: + response = -0.00252831; + goto D54; + +T54_2: + response = 0.00347229; + goto D54; + +N54_6: + if MIN_SCORE < 216377 then goto T54_3; + else goto T54_4; + +T54_3: + response = -0.00279429; + goto D54; + +T54_4: + response = -0.0169557; + goto D54; + +T54_5: + response = 0.00040157; + goto D54; + +T54_6: + response = 0.00880965; + goto D54; + +T54_7: + response = -0.010143; + goto D54; + +D54: + +tnscore = tnscore + response; + +/* Tree 56 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N55_1: + if DAY_LW_DAY_HITS_RATIO < 0.645 then goto T55_1; + else goto N55_2; + +T55_1: + response = -0.00704886; + goto D55; + +N55_2: + if AVG_SCORE < 291527 then goto N55_3; + else goto N55_5; + +N55_3: + if MAX_SCORE < 287802 then goto T55_2; + else goto N55_4; + +T55_2: + response = 0.000191564; + goto D55; + +N55_4: + if ISTITLE_AVG < 0.95 then goto T55_3; + else goto T55_4; + +T55_3: + response = 0.0133402; + goto D55; + +T55_4: + response = 0.000516175; + goto D55; + +N55_5: + if AVG_SCORE < 317516 then goto T55_5; + else goto N55_6; + +T55_5: + response = -0.0121501; + goto D55; + +N55_6: + if SPORTS < 0.315 then goto T55_6; + else goto T55_7; + +T55_6: + response = -0.00154239; + goto D55; + +T55_7: + response = 0.00704715; + goto D55; + +D55: + +tnscore = tnscore + response; + +/* Tree 57 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N56_1: + if INTLNEWS < 0.705 then goto N56_2; + else goto T56_7; + +N56_2: + if DAY_PD_HITS_RATIO < 5.045 then goto N56_3; + else goto N56_5; + +N56_3: + if WEEKAVG < 0.785 then goto T56_1; + else goto N56_4; + +T56_1: + response = -0.00094492; + goto D56; + +N56_4: + if ONE_DAY_WF < 0.644009 then goto T56_2; + else goto T56_3; + +T56_2: + response = 0.000962924; + goto D56; + +T56_3: + response = 0.00910092; + goto D56; + +N56_5: + if LOCALNEWS < 0.295 then goto N56_6; + else goto T56_6; + +N56_6: + if ISTITLE_AVG < 0.185 then goto T56_4; + else goto T56_5; + +T56_4: + response = -0.0103214; + goto D56; + +T56_5: + response = -0.0014478; + goto D56; + +T56_6: + response = 0.0144627; + goto D56; + +T56_7: + response = 0.00968091; + goto D56; + +D56: + +tnscore = tnscore + response; + +/* Tree 58 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N57_1: + if ONE_DAY_WF < 0.328096 then goto N57_2; + else goto T57_7; + +N57_2: + if ONE_DAY_WF < 0.246773 then goto N57_3; + else goto N57_4; + +N57_3: + if TOPSTORY < 0.47 then goto T57_1; + else goto T57_2; + +T57_1: + response = 0.000196656; + goto D57; + +T57_2: + response = 0.0166177; + goto D57; + +N57_4: + if MIN_SCORE < 224862 then goto T57_3; + else goto N57_5; + +T57_3: + response = 0.00164784; + goto D57; + +N57_5: + if AVG_SCORE < 264251 then goto T57_4; + else goto N57_6; + +T57_4: + response = 0.0251889; + goto D57; + +N57_6: + if AVG_SCORE < 339362 then goto T57_5; + else goto T57_6; + +T57_5: + response = -0.00430853; + goto D57; + +T57_6: + response = 0.0156826; + goto D57; + +T57_7: + response = -0.000883446; + goto D57; + +D57: + +tnscore = tnscore + response; + +/* Tree 59 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N58_1: + if MISC < 0.105 then goto N58_2; + else goto T58_7; + +N58_2: + if MIN_SCORE < 445730 then goto N58_3; + else goto T58_6; + +N58_3: + if MIN_SCORE < 371741 then goto N58_4; + else goto N58_5; + +N58_4: + if REGIONALNEWS < 0.21 then goto T58_1; + else goto T58_2; + +T58_1: + response = 0.000514634; + goto D58; + +T58_2: + response = -0.0078218; + goto D58; + +N58_5: + if INTLNEWS < 0.145 then goto N58_6; + else goto T58_5; + +N58_6: + if ISTITLE_AVG < 0.105 then goto T58_3; + else goto T58_4; + +T58_3: + response = 0.00884708; + goto D58; + +T58_4: + response = -0.00706471; + goto D58; + +T58_5: + response = -0.0155934; + goto D58; + +T58_6: + response = 0.00871224; + goto D58; + +T58_7: + response = 0.0109724; + goto D58; + +D58: + +tnscore = tnscore + response; + +/* Tree 60 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N59_1: + if AVG_RANK < 5.07 then goto N59_2; + else goto N59_3; + +N59_2: + if MIN_SCORE < 237760 then goto T59_1; + else goto T59_2; + +T59_1: + response = -8.82014e-05; + goto D59; + +T59_2: + response = 0.0155208; + goto D59; + +N59_3: + if ENTERTAINMENT < 0.05 then goto N59_4; + else goto T59_7; + +N59_4: + if MAX_MIN_SCORE < 14449.2 then goto N59_5; + else goto N59_6; + +N59_5: + if TOPSTORY < 0.225 then goto T59_3; + else goto T59_4; + +T59_3: + response = -0.00152853; + goto D59; + +T59_4: + response = -0.0123906; + goto D59; + +N59_6: + if TOPSTORY < 0.365 then goto T59_5; + else goto T59_6; + +T59_5: + response = 0.000330374; + goto D59; + +T59_6: + response = 0.00622094; + goto D59; + +T59_7: + response = -0.0026428; + goto D59; + +D59: + +tnscore = tnscore + response; + +/* Tree 61 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N60_1: + if LW_DAY_HITS < 0.5 then goto N60_2; + else goto N60_3; + +N60_2: + if MISC < 0.105 then goto T60_1; + else goto T60_2; + +T60_1: + response = 0.000226343; + goto D60; + +T60_2: + response = 0.00938518; + goto D60; + +N60_3: + if MAX_SCORE < 254898 then goto N60_4; + else goto N60_5; + +N60_4: + if MAX_SCORE < 249948 then goto T60_3; + else goto T60_4; + +T60_3: + response = -0.0010896; + goto D60; + +T60_4: + response = 0.0106866; + goto D60; + +N60_5: + if ONE_DAY_WF < 0.537727 then goto N60_6; + else goto T60_7; + +N60_6: + if MAX_MIN_RANK < 5 then goto T60_5; + else goto T60_6; + +T60_5: + response = -0.000668387; + goto D60; + +T60_6: + response = -0.0113993; + goto D60; + +T60_7: + response = -0.0159024; + goto D60; + +D60: + +tnscore = tnscore + response; + +/* Tree 62 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N61_1: + if WEEKAVG < 0.93 then goto N61_2; + else goto N61_3; + +N61_2: + if MIN_RANK < 1 then goto T61_1; + else goto T61_2; + +T61_1: + response = 0.0105569; + goto D61; + +T61_2: + response = -0.00122424; + goto D61; + +N61_3: + if HEALTH < 0.105 then goto N61_4; + else goto N61_6; + +N61_4: + if SPORTS < 0.47 then goto N61_5; + else goto T61_5; + +N61_5: + if NATIONALNEWS < 0.105 then goto T61_3; + else goto T61_4; + +T61_3: + response = -0.00127666; + goto D61; + +T61_4: + response = 0.00259145; + goto D61; + +T61_5: + response = 0.00361046; + goto D61; + +N61_6: + if PREV_DAY_HITS < 4.5 then goto T61_6; + else goto T61_7; + +T61_6: + response = -0.00160398; + goto D61; + +T61_7: + response = 0.0210051; + goto D61; + +D61: + +tnscore = tnscore + response; + +/* Tree 63 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N62_1: + if MAX_MIN_SCORE < 62647.2 then goto N62_2; + else goto T62_7; + +N62_2: + if ISTITLE_AVG < 0.05 then goto N62_3; + else goto T62_6; + +N62_3: + if MAX_MIN_SCORE < 45894.2 then goto N62_4; + else goto T62_5; + +N62_4: + if DAY_PD_HITS_RATIO < 0.675 then goto N62_5; + else goto T62_4; + +N62_5: + if MAX_MIN_SCORE < 41917 then goto N62_6; + else goto T62_3; + +N62_6: + if MIN_SCORE < 227128 then goto T62_1; + else goto T62_2; + +T62_1: + response = 0.00157123; + goto D62; + +T62_2: + response = -0.0091657; + goto D62; + +T62_3: + response = 0.00650689; + goto D62; + +T62_4: + response = 0.000734575; + goto D62; + +T62_5: + response = -0.00919073; + goto D62; + +T62_6: + response = -0.000254308; + goto D62; + +T62_7: + response = 0.0114536; + goto D62; + +D62: + +tnscore = tnscore + response; + +/* Tree 64 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N63_1: + if BUSINESS < 0.05 then goto N63_2; + else goto T63_7; + +N63_2: + if DAY_WEEK_AVG_RATIO < 0.505 then goto N63_3; + else goto T63_6; + +N63_3: + if LOCALNEWS < 0.115 then goto N63_4; + else goto T63_5; + +N63_4: + if DAY_WEEK_AVG_RATIO < 0.36 then goto N63_5; + else goto N63_6; + +N63_5: + if DAY_WEEK_AVG_RATIO < 0.275 then goto T63_1; + else goto T63_2; + +T63_1: + response = 0.0111617; + goto D63; + +T63_2: + response = -0.00883723; + goto D63; + +N63_6: + if DAY_PD_HITS_RATIO < 0.185 then goto T63_3; + else goto T63_4; + +T63_3: + response = 0.0229969; + goto D63; + +T63_4: + response = -0.000984798; + goto D63; + +T63_5: + response = -0.00222586; + goto D63; + +T63_6: + response = 0.000255018; + goto D63; + +T63_7: + response = -0.00124642; + goto D63; + +D63: + +tnscore = tnscore + response; + +/* Tree 65 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N64_1: + if DAY_LW_DAY_HITS_RATIO < 0.645 then goto T64_1; + else goto N64_2; + +T64_1: + response = -0.0067218; + goto D64; + +N64_2: + if WEEKAVG < 26.715 then goto N64_3; + else goto T64_7; + +N64_3: + if AVG_RANK < 9.225 then goto T64_2; + else goto N64_4; + +T64_2: + response = 0.000158773; + goto D64; + +N64_4: + if TWO_DAY_WF < 0.976136 then goto T64_3; + else goto N64_5; + +T64_3: + response = 0.00457414; + goto D64; + +N64_5: + if DAY_WEEK_AVG_DERIV < 5.785 then goto T64_4; + else goto N64_6; + +T64_4: + response = -0.00480445; + goto D64; + +N64_6: + if DAY_LW_DAY_HITS_DERIV < 27.5 then goto T64_5; + else goto T64_6; + +T64_5: + response = 0.016426; + goto D64; + +T64_6: + response = -0.00308634; + goto D64; + +T64_7: + response = -0.00901968; + goto D64; + +D64: + +tnscore = tnscore + response; + +/* Tree 66 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N65_1: + if LOCALNEWS < 0.295 then goto N65_2; + else goto N65_6; + +N65_2: + if LOCALNEWS < 0.155 then goto T65_1; + else goto N65_3; + +T65_1: + response = -0.000303017; + goto D65; + +N65_3: + if MIN_SCORE < 222112 then goto T65_2; + else goto N65_4; + +T65_2: + response = 0.000210471; + goto D65; + +N65_4: + if ENTERTAINMENT < 0.05 then goto N65_5; + else goto T65_5; + +N65_5: + if ISTITLE_AVG < 0.185 then goto T65_3; + else goto T65_4; + +T65_3: + response = 0.0201204; + goto D65; + +T65_4: + response = 0.00543219; + goto D65; + +T65_5: + response = -0.00443157; + goto D65; + +N65_6: + if DAY_PD_HITS_DERIV < 6.5 then goto T65_6; + else goto T65_7; + +T65_6: + response = -0.00312853; + goto D65; + +T65_7: + response = 0.0100428; + goto D65; + +D65: + +tnscore = tnscore + response; + +/* Tree 67 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N66_1: + if DAY_PD_HITS_RATIO < 32.5 then goto N66_2; + else goto T66_7; + +N66_2: + if DAY_PD_HITS_DERIV < 20.5 then goto N66_3; + else goto T66_6; + +N66_3: + if MAX_MIN_RANK < 7 then goto T66_1; + else goto N66_4; + +T66_1: + response = -0.000109244; + goto D66; + +N66_4: + if DAY_LW_DAY_HITS_DERIV < 9.5 then goto N66_5; + else goto T66_5; + +N66_5: + if MIN_SCORE < 215422 then goto T66_2; + else goto N66_6; + +T66_2: + response = 0.000454762; + goto D66; + +N66_6: + if ISABSTRACT_AVG < 0.225 then goto T66_3; + else goto T66_4; + +T66_3: + response = 0.0146831; + goto D66; + +T66_4: + response = -0.000790241; + goto D66; + +T66_5: + response = -0.00837207; + goto D66; + +T66_6: + response = -0.00476979; + goto D66; + +T66_7: + response = 0.0102445; + goto D66; + +D66: + +tnscore = tnscore + response; + +/* Tree 68 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N67_1: + if MAX_MIN_SCORE < 45353.5 then goto N67_2; + else goto N67_4; + +N67_2: + if MAX_MIN_SCORE < 44594.8 then goto N67_3; + else goto T67_3; + +N67_3: + if MISC < 0.105 then goto T67_1; + else goto T67_2; + +T67_1: + response = -9.71934e-05; + goto D67; + +T67_2: + response = 0.00961124; + goto D67; + +T67_3: + response = 0.00683718; + goto D67; + +N67_4: + if TWELVE_HOUR_WF < 0.0451153 then goto N67_5; + else goto T67_7; + +N67_5: + if TOPSTORY < 0.05 then goto N67_6; + else goto T67_6; + +N67_6: + if ISTITLE_AVG < 0.79 then goto T67_4; + else goto T67_5; + +T67_4: + response = -0.0112723; + goto D67; + +T67_5: + response = 0.00207341; + goto D67; + +T67_6: + response = 0.00460994; + goto D67; + +T67_7: + response = -0.00460433; + goto D67; + +D67: + +tnscore = tnscore + response; + +/* Tree 69 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N68_1: + if SPORTS < 0.47 then goto N68_2; + else goto T68_7; + +N68_2: + if DAY_PD_HITS_RATIO < 4.725 then goto N68_3; + else goto N68_6; + +N68_3: + if DAY_WEEK_AVG_RATIO < 2.74 then goto T68_1; + else goto N68_4; + +T68_1: + response = -0.000301001; + goto D68; + +N68_4: + if INTLNEWS < 0.415 then goto N68_5; + else goto T68_4; + +N68_5: + if MIN_SCORE < 337522 then goto T68_2; + else goto T68_3; + +T68_2: + response = 0.0052542; + goto D68; + +T68_3: + response = 0.0240026; + goto D68; + +T68_4: + response = -0.00664368; + goto D68; + +N68_6: + if DAY_WEEK_AVG_RATIO < 4.15 then goto T68_5; + else goto T68_6; + +T68_5: + response = -0.00601665; + goto D68; + +T68_6: + response = 0.00254873; + goto D68; + +T68_7: + response = 0.00244646; + goto D68; + +D68: + +tnscore = tnscore + response; + +/* Tree 70 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N69_1: + if TOPSTORY < 0.47 then goto N69_2; + else goto N69_6; + +N69_2: + if LW_DAY_HITS < 0.5 then goto T69_1; + else goto N69_3; + +T69_1: + response = 0.00051525; + goto D69; + +N69_3: + if ONE_DAY_WF < 0.398413 then goto N69_4; + else goto N69_5; + +N69_4: + if DAY_PD_HITS_RATIO < 0.61 then goto T69_2; + else goto T69_3; + +T69_2: + response = -0.00298415; + goto D69; + +T69_3: + response = 0.00362271; + goto D69; + +N69_5: + if AVG_SCORE < 242552 then goto T69_4; + else goto T69_5; + +T69_4: + response = -0.004241; + goto D69; + +T69_5: + response = -0.0152224; + goto D69; + +N69_6: + if MAX_SCORE < 264598 then goto T69_6; + else goto T69_7; + +T69_6: + response = 0.0026109; + goto D69; + +T69_7: + response = -0.013849; + goto D69; + +D69: + +tnscore = tnscore + response; + +/* Tree 71 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N70_1: + if PREV_DAY_HITS < 59.5 then goto N70_2; + else goto T70_7; + +N70_2: + if MAX_SCORE < 455608 then goto N70_3; + else goto N70_6; + +N70_3: + if DAY_PD_HITS_DERIV < 64 then goto N70_4; + else goto T70_4; + +N70_4: + if SUPERDUPER_AVG < 0.725 then goto N70_5; + else goto T70_3; + +N70_5: + if MIN_SCORE < 132886 then goto T70_1; + else goto T70_2; + +T70_1: + response = -0.0021355; + goto D70; + +T70_2: + response = 0.000253917; + goto D70; + +T70_3: + response = -0.0118492; + goto D70; + +T70_4: + response = 0.00977612; + goto D70; + +N70_6: + if INTLNEWS < 0.145 then goto T70_5; + else goto T70_6; + +T70_5: + response = 0.0130923; + goto D70; + +T70_6: + response = -0.00429049; + goto D70; + +T70_7: + response = 0.0115797; + goto D70; + +D70: + +tnscore = tnscore + response; + +/* Tree 72 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N71_1: + if DAY_PD_HITS_RATIO < 0.115 then goto T71_1; + else goto N71_2; + +T71_1: + response = -0.00460369; + goto D71; + +N71_2: + if DAY_WEEK_AVG_RATIO < 0.455 then goto T71_2; + else goto N71_3; + +T71_2: + response = 0.00654726; + goto D71; + +N71_3: + if ISTITLE_AVG < 0.565 then goto N71_4; + else goto N71_6; + +N71_4: + if DAY_PD_HITS_DERIV < 38 then goto N71_5; + else goto T71_5; + +N71_5: + if DAY_WEEK_AVG_DERIV < -0.785 then goto T71_3; + else goto T71_4; + +T71_3: + response = -0.00572704; + goto D71; + +T71_4: + response = 0.00211848; + goto D71; + +T71_5: + response = -0.0128642; + goto D71; + +N71_6: + if MAX_SCORE < 261066 then goto T71_6; + else goto T71_7; + +T71_6: + response = 0.000164546; + goto D71; + +T71_7: + response = -0.00330215; + goto D71; + +D71: + +tnscore = tnscore + response; + +/* Tree 73 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N72_1: + if DAY_WEEK_AVG_DERIV < 8.36 then goto N72_2; + else goto N72_5; + +N72_2: + if AVG_SCORE < 266020 then goto T72_1; + else goto N72_3; + +T72_1: + response = 0.000544955; + goto D72; + +N72_3: + if TOPSTORY < 0.21 then goto N72_4; + else goto T72_4; + +N72_4: + if MAX_SCORE < 343351 then goto T72_2; + else goto T72_3; + +T72_2: + response = -0.00739666; + goto D72; + +T72_3: + response = -0.00159552; + goto D72; + +T72_4: + response = 0.00316353; + goto D72; + +N72_5: + if EIGHT_HOUR_WF < 0.117802 then goto N72_6; + else goto T72_7; + +N72_6: + if AVG_SCORE < 264897 then goto T72_5; + else goto T72_6; + +T72_5: + response = 0.00293963; + goto D72; + +T72_6: + response = 0.016815; + goto D72; + +T72_7: + response = -0.000481606; + goto D72; + +D72: + +tnscore = tnscore + response; + +/* Tree 74 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N73_1: + if AVG_SCORE < 446571 then goto N73_2; + else goto T73_7; + +N73_2: + if DAY_WEEK_AVG_RATIO < 4.59 then goto N73_3; + else goto N73_6; + +N73_3: + if MAX_SCORE < 390560 then goto T73_1; + else goto N73_4; + +T73_1: + response = -8.80057e-06; + goto D73; + +N73_4: + if PREV_DAY_HITS < 2.5 then goto N73_5; + else goto T73_4; + +N73_5: + if INTLNEWS < 0.135 then goto T73_2; + else goto T73_3; + +T73_2: + response = -0.00440415; + goto D73; + +T73_3: + response = -0.0221297; + goto D73; + +T73_4: + response = 0.000810032; + goto D73; + +N73_6: + if ENTERTAINMENT < 0.15 then goto T73_5; + else goto T73_6; + +T73_5: + response = 0.00319307; + goto D73; + +T73_6: + response = 0.0160496; + goto D73; + +T73_7: + response = 0.00840475; + goto D73; + +D73: + +tnscore = tnscore + response; + +/* Tree 75 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N74_1: + if MAX_SCORE < 390244 then goto N74_2; + else goto N74_5; + +N74_2: + if AVG_SCORE < 360833 then goto N74_3; + else goto T74_4; + +N74_3: + if AVG_SCORE < 352194 then goto T74_1; + else goto N74_4; + +T74_1: + response = 0.000175857; + goto D74; + +N74_4: + if DAY_WEEK_AVG_DERIV < 0.855 then goto T74_2; + else goto T74_3; + +T74_2: + response = -0.0187021; + goto D74; + +T74_3: + response = 0.00356778; + goto D74; + +T74_4: + response = 0.00780008; + goto D74; + +N74_5: + if INTLNEWS < 0.145 then goto N74_6; + else goto T74_7; + +N74_6: + if BUSINESS < 0.135 then goto T74_5; + else goto T74_6; + +T74_5: + response = 0.00421198; + goto D74; + +T74_6: + response = -0.0102414; + goto D74; + +T74_7: + response = -0.00922112; + goto D74; + +D74: + +tnscore = tnscore + response; + +/* Tree 76 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N75_1: + if ONE_DAY_WF < 0.605556 then goto T75_1; + else goto N75_2; + +T75_1: + response = -0.000461057; + goto D75; + +N75_2: + if MIN_SCORE < 332098 then goto N75_3; + else goto N75_5; + +N75_3: + if MAX_SCORE < 355711 then goto N75_4; + else goto T75_4; + +N75_4: + if WEEKAVG < 1.5 then goto T75_2; + else goto T75_3; + +T75_2: + response = 2.20435e-05; + goto D75; + +T75_3: + response = 0.00562666; + goto D75; + +T75_4: + response = -0.0153817; + goto D75; + +N75_5: + if INTLNEWS < 0.145 then goto T75_5; + else goto N75_6; + +T75_5: + response = 0.018971; + goto D75; + +N75_6: + if SUPERDUPER_AVG < 0.275 then goto T75_6; + else goto T75_7; + +T75_6: + response = -0.00766663; + goto D75; + +T75_7: + response = 0.0121696; + goto D75; + +D75: + +tnscore = tnscore + response; + +/* Tree 77 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N76_1: + if ENTERTAINMENT < 0.05 then goto T76_1; + else goto N76_2; + +T76_1: + response = 2.07331e-05; + goto D76; + +N76_2: + if ENTERTAINMENT < 0.415 then goto N76_3; + else goto N76_4; + +N76_3: + if AVG_SCORE < 237084 then goto T76_2; + else goto T76_3; + +T76_2: + response = -0.00148349; + goto D76; + +T76_3: + response = -0.00781033; + goto D76; + +N76_4: + if AVG_SCORE < 340606 then goto N76_5; + else goto N76_6; + +N76_5: + if ONE_DAY_WF < 0.015625 then goto T76_4; + else goto T76_5; + +T76_4: + response = 0.0143289; + goto D76; + +T76_5: + response = 0.00105025; + goto D76; + +N76_6: + if WEEKAVG < 0.785 then goto T76_6; + else goto T76_7; + +T76_6: + response = 0.00610972; + goto D76; + +T76_7: + response = -0.020138; + goto D76; + +D76: + +tnscore = tnscore + response; + +/* Tree 78 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N77_1: + if DAY_LW_DAY_HITS_DERIV < 17.5 then goto N77_2; + else goto N77_3; + +N77_2: + if DAY_LW_DAY_HITS_RATIO < 16.5 then goto T77_1; + else goto T77_2; + +T77_1: + response = 0.000188349; + goto D77; + +T77_2: + response = 0.0147086; + goto D77; + +N77_3: + if TWO_DAY_WF < 0.743223 then goto N77_4; + else goto N77_5; + +N77_4: + if MIN_SCORE < 212511 then goto T77_3; + else goto T77_4; + +T77_3: + response = -0.00155285; + goto D77; + +T77_4: + response = -0.0125926; + goto D77; + +N77_5: + if TWO_DAY_WF < 0.980566 then goto T77_5; + else goto N77_6; + +T77_5: + response = 0.00635603; + goto D77; + +N77_6: + if ISTITLE_AVG < 0.15 then goto T77_6; + else goto T77_7; + +T77_6: + response = -0.0163438; + goto D77; + +T77_7: + response = -0.00197531; + goto D77; + +D77: + +tnscore = tnscore + response; + +/* Tree 79 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N78_1: + if AVG_SCORE < 281850 then goto N78_2; + else goto N78_4; + +N78_2: + if MAX_SCORE < 288032 then goto T78_1; + else goto N78_3; + +T78_1: + response = 0.000335293; + goto D78; + +N78_3: + if TWELVE_HOUR_WF < 0.358289 then goto T78_2; + else goto T78_3; + +T78_2: + response = 0.00952171; + goto D78; + +T78_3: + response = -0.00693432; + goto D78; + +N78_4: + if EIGHT_HOUR_WF < 0.584928 then goto N78_5; + else goto T78_7; + +N78_5: + if MAX_MIN_SCORE < 2471.25 then goto T78_4; + else goto N78_6; + +T78_4: + response = -0.0141419; + goto D78; + +N78_6: + if MAX_MIN_SCORE < 6867.75 then goto T78_5; + else goto T78_6; + +T78_5: + response = 0.010146; + goto D78; + +T78_6: + response = -0.00291703; + goto D78; + +T78_7: + response = 0.00765541; + goto D78; + +D78: + +tnscore = tnscore + response; + +/* Tree 80 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N79_1: + if PREV_DAY_HITS < 3.5 then goto T79_1; + else goto N79_2; + +T79_1: + response = -0.000747677; + goto D79; + +N79_2: + if EIGHT_HOUR_WF < 0.147108 then goto N79_3; + else goto N79_6; + +N79_3: + if AVG_RANK < 7.69 then goto T79_2; + else goto N79_4; + +T79_2: + response = -0.000812926; + goto D79; + +N79_4: + if ISABSTRACT_AVG < 0.95 then goto T79_3; + else goto N79_5; + +T79_3: + response = 0.00246382; + goto D79; + +N79_5: + if SUPERDUPER_AVG < 0.21 then goto T79_4; + else goto T79_5; + +T79_4: + response = 0.00464639; + goto D79; + +T79_5: + response = 0.0245523; + goto D79; + +N79_6: + if ISTITLE_AVG < 0.05 then goto T79_6; + else goto T79_7; + +T79_6: + response = -0.0184693; + goto D79; + +T79_7: + response = -0.000890194; + goto D79; + +D79: + +tnscore = tnscore + response; + +/* Tree 81 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N80_1: + if HEALTH < 0.27 then goto N80_2; + else goto N80_6; + +N80_2: + if DAY_PD_HITS_RATIO < 5.045 then goto T80_1; + else goto N80_3; + +T80_1: + response = 3.07572e-05; + goto D80; + +N80_3: + if ISTITLE_AVG < 0.125 then goto N80_4; + else goto N80_5; + +N80_4: + if PUB_TODAY_AVG < 0.95 then goto T80_2; + else goto T80_3; + +T80_2: + response = -0.0154824; + goto D80; + +T80_3: + response = -0.00294081; + goto D80; + +N80_5: + if TWELVE_HOUR_WF < 0.226496 then goto T80_4; + else goto T80_5; + +T80_4: + response = -0.00622506; + goto D80; + +T80_5: + response = 0.00166554; + goto D80; + +N80_6: + if ISABSTRACT_AVG < 0.17 then goto T80_6; + else goto T80_7; + +T80_6: + response = 0.0165858; + goto D80; + +T80_7: + response = -0.00330948; + goto D80; + +D80: + +tnscore = tnscore + response; + +/* Tree 82 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N81_1: + if DAY_WEEK_AVG_DERIV < 44.86 then goto N81_2; + else goto T81_7; + +N81_2: + if SUPERDUPER_AVG < 0.39 then goto T81_1; + else goto N81_3; + +T81_1: + response = 0.000333898; + goto D81; + +N81_3: + if TWO_DAY_WF < 0.825226 then goto N81_4; + else goto T81_6; + +N81_4: + if MAX_MIN_SCORE < 42418.5 then goto T81_2; + else goto N81_5; + +T81_2: + response = -0.00881739; + goto D81; + +N81_5: + if EIGHT_HOUR_WF < 0.0825189 then goto N81_6; + else goto T81_5; + +N81_6: + if BUSINESS < 0.05 then goto T81_3; + else goto T81_4; + +T81_3: + response = 0.0117745; + goto D81; + +T81_4: + response = -0.00306987; + goto D81; + +T81_5: + response = -0.0111028; + goto D81; + +T81_6: + response = 0.00218894; + goto D81; + +T81_7: + response = 0.00834421; + goto D81; + +D81: + +tnscore = tnscore + response; + +/* Tree 83 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N82_1: + if MIN_SCORE < 230160 then goto N82_2; + else goto N82_3; + +N82_2: + if MIN_SCORE < 229092 then goto T82_1; + else goto T82_2; + +T82_1: + response = -0.000807888; + goto D82; + +T82_2: + response = -0.0133966; + goto D82; + +N82_3: + if TWO_DAY_WF < 0.518064 then goto N82_4; + else goto N82_6; + +N82_4: + if WEEKAVG < 1.5 then goto N82_5; + else goto T82_5; + +N82_5: + if AVG_SCORE < 359388 then goto T82_3; + else goto T82_4; + +T82_3: + response = 0.00892968; + goto D82; + +T82_4: + response = -0.00763611; + goto D82; + +T82_5: + response = -0.00138809; + goto D82; + +N82_6: + if MIN_SCORE < 232622 then goto T82_6; + else goto T82_7; + +T82_6: + response = 0.0102781; + goto D82; + +T82_7: + response = -0.000634821; + goto D82; + +D82: + +tnscore = tnscore + response; + +/* Tree 84 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N83_1: + if MAX_MIN_RANK < 7 then goto T83_1; + else goto N83_2; + +T83_1: + response = -2.44784e-05; + goto D83; + +N83_2: + if ISTITLE_AVG < 0.55 then goto N83_3; + else goto N83_6; + +N83_3: + if DAY_WEEK_AVG_DERIV < 4.57 then goto N83_4; + else goto T83_5; + +N83_4: + if DAY_WEEK_AVG_RATIO < 0.76 then goto T83_2; + else goto N83_5; + +T83_2: + response = -0.000894147; + goto D83; + +N83_5: + if MIN_SCORE < 215272 then goto T83_3; + else goto T83_4; + +T83_3: + response = 0.00546979; + goto D83; + +T83_4: + response = 0.0273153; + goto D83; + +T83_5: + response = -0.00734683; + goto D83; + +N83_6: + if NATIONALNEWS < 0.21 then goto T83_6; + else goto T83_7; + +T83_6: + response = -0.00138435; + goto D83; + +T83_7: + response = 0.00911761; + goto D83; + +D83: + +tnscore = tnscore + response; + +/* Tree 85 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N84_1: + if DAY_WEEK_AVG_RATIO < 3.83 then goto N84_2; + else goto N84_4; + +N84_2: + if ENTERTAINMENT < 0.53 then goto T84_1; + else goto N84_3; + +T84_1: + response = -0.000283843; + goto D84; + +N84_3: + if ISABSTRACT_AVG < 0.21 then goto T84_2; + else goto T84_3; + +T84_2: + response = 0.00786177; + goto D84; + +T84_3: + response = -0.00075151; + goto D84; + +N84_4: + if AVG_RANK < 9.465 then goto N84_5; + else goto N84_6; + +N84_5: + if INTLNEWS < 0.21 then goto T84_4; + else goto T84_5; + +T84_4: + response = 0.00394069; + goto D84; + +T84_5: + response = -0.00383803; + goto D84; + +N84_6: + if AVG_SCORE < 258669 then goto T84_6; + else goto T84_7; + +T84_6: + response = 0.0034867; + goto D84; + +T84_7: + response = 0.0179637; + goto D84; + +D84: + +tnscore = tnscore + response; + +/* Tree 86 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N85_1: + if SUPERDUPER_AVG < 0.725 then goto N85_2; + else goto N85_6; + +N85_2: + if SPORTS < 0.685 then goto T85_1; + else goto N85_3; + +T85_1: + response = -0.00023907; + goto D85; + +N85_3: + if AVG_SCORE < 264275 then goto N85_4; + else goto T85_5; + +N85_4: + if MIN_SCORE < 219502 then goto T85_2; + else goto N85_5; + +T85_2: + response = -0.000438684; + goto D85; + +N85_5: + if MAX_RANK < 9 then goto T85_3; + else goto T85_4; + +T85_3: + response = 0.00337648; + goto D85; + +T85_4: + response = 0.0167784; + goto D85; + +T85_5: + response = -0.00334013; + goto D85; + +N85_6: + if WEEKAVG < 12.785 then goto T85_6; + else goto T85_7; + +T85_6: + response = -0.0162992; + goto D85; + +T85_7: + response = 2.52385e-05; + goto D85; + +D85: + +tnscore = tnscore + response; + +/* Tree 87 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N86_1: + if ISABSTRACT_AVG < 0.895 then goto T86_1; + else goto N86_2; + +T86_1: + response = -0.000210253; + goto D86; + +N86_2: + if AVG_SCORE < 247839 then goto N86_3; + else goto T86_7; + +N86_3: + if PREV_DAY_HITS < 1.5 then goto T86_2; + else goto N86_4; + +T86_2: + response = -0.000398583; + goto D86; + +N86_4: + if TWO_DAY_WF < 0.551797 then goto T86_3; + else goto N86_5; + +T86_3: + response = -0.000635903; + goto D86; + +N86_5: + if MIN_SCORE < 141715 then goto T86_4; + else goto N86_6; + +T86_4: + response = 0.00351871; + goto D86; + +N86_6: + if INTLNEWS < 0.105 then goto T86_5; + else goto T86_6; + +T86_5: + response = 0.00858437; + goto D86; + +T86_6: + response = 0.0254582; + goto D86; + +T86_7: + response = -0.00273198; + goto D86; + +D86: + +tnscore = tnscore + response; + +/* Tree 88 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N87_1: + if ONE_DAY_WF < 0.605556 then goto N87_2; + else goto N87_3; + +N87_2: + if EIGHT_HOUR_WF < 0.0411953 then goto T87_1; + else goto T87_2; + +T87_1: + response = 0.000498775; + goto D87; + +T87_2: + response = -0.00236496; + goto D87; + +N87_3: + if AVG_SCORE < 342691 then goto N87_4; + else goto N87_6; + +N87_4: + if MAX_MIN_SCORE < 6080.75 then goto N87_5; + else goto T87_5; + +N87_5: + if MIN_SCORE < 236879 then goto T87_3; + else goto T87_4; + +T87_3: + response = 0.00101981; + goto D87; + +T87_4: + response = -0.0190995; + goto D87; + +T87_5: + response = 0.00142291; + goto D87; + +N87_6: + if INTLNEWS < 0.275 then goto T87_6; + else goto T87_7; + +T87_6: + response = 0.0147214; + goto D87; + +T87_7: + response = -0.000475944; + goto D87; + +D87: + +tnscore = tnscore + response; + +/* Tree 89 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N88_1: + if DAY_PD_HITS_DERIV < -13.5 then goto N88_2; + else goto N88_6; + +N88_2: + if WEEKAVG < 3.785 then goto T88_1; + else goto N88_3; + +T88_1: + response = -0.00749366; + goto D88; + +N88_3: + if PREV_DAY_HITS < 22.5 then goto T88_2; + else goto N88_4; + +T88_2: + response = 0.0146922; + goto D88; + +N88_4: + if TWO_DAY_WF < 0.822683 then goto N88_5; + else goto T88_5; + +N88_5: + if PUB_TODAY_AVG < 0.45 then goto T88_3; + else goto T88_4; + +T88_3: + response = -0.00577822; + goto D88; + +T88_4: + response = 0.00590076; + goto D88; + +T88_5: + response = 0.00846642; + goto D88; + +N88_6: + if MISC < 0.105 then goto T88_6; + else goto T88_7; + +T88_6: + response = -0.000196119; + goto D88; + +T88_7: + response = 0.0069636; + goto D88; + +D88: + +tnscore = tnscore + response; + +/* Tree 90 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N89_1: + if DAY_WEEK_AVG_DERIV < -3.93 then goto T89_1; + else goto N89_2; + +T89_1: + response = 0.00797481; + goto D89; + +N89_2: + if DAY_WEEK_AVG_RATIO < 3.865 then goto N89_3; + else goto N89_5; + +N89_3: + if DAY_WEEK_AVG_RATIO < 3.61 then goto N89_4; + else goto T89_4; + +N89_4: + if DAY_WEEK_AVG_RATIO < 3.245 then goto T89_2; + else goto T89_3; + +T89_2: + response = -0.000472322; + goto D89; + +T89_3: + response = 0.00505862; + goto D89; + +T89_4: + response = -0.00837491; + goto D89; + +N89_5: + if TWELVE_HOUR_WF < 0.202675 then goto T89_5; + else goto N89_6; + +T89_5: + response = -0.00493061; + goto D89; + +N89_6: + if TWELVE_HOUR_WF < 0.36039 then goto T89_6; + else goto T89_7; + +T89_6: + response = 0.0124758; + goto D89; + +T89_7: + response = 0.00250066; + goto D89; + +D89: + +tnscore = tnscore + response; + +/* Tree 91 of 103 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N90_1: + if MAX_MIN_SCORE < 60845.5 then goto N90_2; + else goto N90_5; + +N90_2: + if MAX_MIN_SCORE < 52128 then goto N90_3; + else goto N90_4; + +N90_3: + if MAX_MIN_SCORE < 51264.2 then goto T90_1; + else goto T90_2; + +T90_1: + response = -0.000132387; + goto D90; + +T90_2: + response = 0.0106899; + goto D90; + +N90_4: + if MIN_SCORE < 218318 then goto T90_3; + else goto T90_4; + +T90_3: + response = -0.00277432; + goto D90; + +T90_4: + response = -0.0140369; + goto D90; + +N90_5: + if ISABSTRACT_AVG < 0.05 then goto T90_5; + else goto N90_6; + +T90_5: + response = 0.0138962; + goto D90; + +N90_6: + if MAX_MIN_SCORE < 61542.5 then goto T90_6; + else goto T90_7; + +T90_6: + response = 0.00445669; + goto D90; + +T90_7: + response = -0.00606652; + goto D90; + +D90: + +tnscore = tnscore + response; + +/* Tree 92 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N91_1: + if BUSINESS < 0.315 then goto T91_1; + else goto N91_2; + +T91_1: + response = -0.000666016; + goto D91; + +N91_2: + if AVG_RANK < 8.535 then goto N91_3; + else goto N91_6; + +N91_3: + if MAX_MIN_SCORE < 7884.5 then goto T91_2; + else goto N91_4; + +T91_2: + response = 0.0134214; + goto D91; + +N91_4: + if ISTITLE_AVG < 0.465 then goto N91_5; + else goto T91_5; + +N91_5: + if MIN_SCORE < 217640 then goto T91_3; + else goto T91_4; + +T91_3: + response = -0.000967523; + goto D91; + +T91_4: + response = 0.0135374; + goto D91; + +T91_5: + response = 0.000147833; + goto D91; + +N91_6: + if DAY_PD_HITS_RATIO < 0.235 then goto T91_6; + else goto T91_7; + +T91_6: + response = 0.00921797; + goto D91; + +T91_7: + response = -0.00182698; + goto D91; + +D91: + +tnscore = tnscore + response; + +/* Tree 93 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N92_1: + if AVG_RANK < 5.29 then goto N92_2; + else goto N92_3; + +N92_2: + if AVG_SCORE < 318378 then goto T92_1; + else goto T92_2; + +T92_1: + response = 0.00224509; + goto D92; + +T92_2: + response = 0.0161861; + goto D92; + +N92_3: + if MAX_MIN_SCORE < 51537 then goto N92_4; + else goto N92_6; + +N92_4: + if MAX_MIN_SCORE < 50910.2 then goto N92_5; + else goto T92_5; + +N92_5: + if DAY_LW_DAY_HITS_DERIV < 71 then goto T92_3; + else goto T92_4; + +T92_3: + response = -0.000273537; + goto D92; + +T92_4: + response = -0.00811121; + goto D92; + +T92_5: + response = 0.0109085; + goto D92; + +N92_6: + if SPORTS < 0.415 then goto T92_6; + else goto T92_7; + +T92_6: + response = -0.00253066; + goto D92; + +T92_7: + response = -0.0129268; + goto D92; + +D92: + +tnscore = tnscore + response; + +/* Tree 94 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N93_1: + if LOCALNEWS < 0.61 then goto N93_2; + else goto N93_5; + +N93_2: + if WEEKAVG < 0.5 then goto T93_1; + else goto N93_3; + +T93_1: + response = -0.00262523; + goto D93; + +N93_3: + if NATIONALNEWS < 0.105 then goto T93_2; + else goto N93_4; + +T93_2: + response = -0.000635021; + goto D93; + +N93_4: + if ISTITLE_AVG < 0.155 then goto T93_3; + else goto T93_4; + +T93_3: + response = 0.005402; + goto D93; + +T93_4: + response = 6.7829e-05; + goto D93; + +N93_5: + if TWELVE_HOUR_WF < 0.133929 then goto T93_5; + else goto N93_6; + +T93_5: + response = -0.00168908; + goto D93; + +N93_6: + if PUB_TODAY_AVG < 0.535 then goto T93_6; + else goto T93_7; + +T93_6: + response = 0.00693807; + goto D93; + +T93_7: + response = 0.0227961; + goto D93; + +D93: + +tnscore = tnscore + response; + +/* Tree 95 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N94_1: + if DAY_WEEK_AVG_DERIV < -0.785 then goto T94_1; + else goto N94_2; + +T94_1: + response = -0.0021874; + goto D94; + +N94_2: + if ENTERTAINMENT < 0.05 then goto N94_3; + else goto T94_7; + +N94_3: + if MAX_SCORE < 363930 then goto T94_2; + else goto N94_4; + +T94_2: + response = 0.000468954; + goto D94; + +N94_4: + if MAX_SCORE < 384272 then goto N94_5; + else goto N94_6; + +N94_5: + if DAY_PD_HITS_RATIO < 0.495 then goto T94_3; + else goto T94_4; + +T94_3: + response = 0.00125669; + goto D94; + +T94_4: + response = 0.0177669; + goto D94; + +N94_6: + if INTLNEWS < 0.21 then goto T94_5; + else goto T94_6; + +T94_5: + response = 0.0060623; + goto D94; + +T94_6: + response = -0.00814847; + goto D94; + +T94_7: + response = -0.00164111; + goto D94; + +D94: + +tnscore = tnscore + response; + +/* Tree 96 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N95_1: + if MIN_SCORE < 334353 then goto N95_2; + else goto N95_5; + +N95_2: + if NUM_WORDS < 2.5 then goto T95_1; + else goto N95_3; + +T95_1: + response = 5.19901e-05; + goto D95; + +N95_3: + if AVG_SCORE < 316903 then goto N95_4; + else goto T95_4; + +N95_4: + if ISTITLE_AVG < 0.05 then goto T95_2; + else goto T95_3; + +T95_2: + response = -0.0220661; + goto D95; + +T95_3: + response = -0.00683671; + goto D95; + +T95_4: + response = -0.00100144; + goto D95; + +N95_5: + if INTLNEWS < 0.13 then goto T95_5; + else goto N95_6; + +T95_5: + response = 0.00532815; + goto D95; + +N95_6: + if MIN_SCORE < 357183 then goto T95_6; + else goto T95_7; + +T95_6: + response = 0.0067984; + goto D95; + +T95_7: + response = -0.00805372; + goto D95; + +D95: + +tnscore = tnscore + response; + +/* Tree 97 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N96_1: + if INTLNEWS < 0.53 then goto T96_1; + else goto N96_2; + +T96_1: + response = -4.58708e-05; + goto D96; + +N96_2: + if TWELVE_HOUR_WF < 0.21385 then goto N96_3; + else goto N96_6; + +N96_3: + if LOCALNEWS < 0.05 then goto N96_4; + else goto T96_5; + +N96_4: + if TWO_DAY_WF < 0.585356 then goto T96_2; + else goto N96_5; + +T96_2: + response = 0.00547473; + goto D96; + +N96_5: + if MAX_SCORE < 244158 then goto T96_3; + else goto T96_4; + +T96_3: + response = 0.00333793; + goto D96; + +T96_4: + response = -0.00880659; + goto D96; + +T96_5: + response = 0.00966211; + goto D96; + +N96_6: + if FOUR_HOUR_WF < 0.0240968 then goto T96_6; + else goto T96_7; + +T96_6: + response = 0.019307; + goto D96; + +T96_7: + response = 0.00126046; + goto D96; + +D96: + +tnscore = tnscore + response; + +/* Tree 98 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N97_1: + if BUSINESS < 0.05 then goto N97_2; + else goto T97_7; + +N97_2: + if DAY_PD_HITS_DERIV < 18.5 then goto N97_3; + else goto N97_6; + +N97_3: + if DAY_WEEK_AVG_DERIV < 12.93 then goto N97_4; + else goto T97_4; + +N97_4: + if INTLNEWS < 0.73 then goto N97_5; + else goto T97_3; + +N97_5: + if INTLNEWS < 0.315 then goto T97_1; + else goto T97_2; + +T97_1: + response = 0.00145589; + goto D97; + +T97_2: + response = -0.00205678; + goto D97; + +T97_3: + response = 0.0114136; + goto D97; + +T97_4: + response = 0.0135475; + goto D97; + +N97_6: + if MIN_RANK < 7 then goto T97_5; + else goto T97_6; + +T97_5: + response = -0.0106123; + goto D97; + +T97_6: + response = 0.00101067; + goto D97; + +T97_7: + response = -0.000965519; + goto D97; + +D97: + +tnscore = tnscore + response; + +/* Tree 99 of 103 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N98_1: + if PREV_DAY_HITS < 6.5 then goto T98_1; + else goto N98_2; + +T98_1: + response = -0.00050304; + goto D98; + +N98_2: + if TWO_DAY_WF < 0.825345 then goto N98_3; + else goto N98_4; + +N98_3: + if ONE_DAY_WF < 0.275028 then goto T98_2; + else goto T98_3; + +T98_2: + response = -0.00134104; + goto D98; + +T98_3: + response = 0.00516485; + goto D98; + +N98_4: + if TWO_DAY_WF < 0.861643 then goto T98_4; + else goto N98_5; + +T98_4: + response = 0.0172774; + goto D98; + +N98_5: + if LOCALNEWS < 0.05 then goto T98_5; + else goto N98_6; + +T98_5: + response = -0.00143161; + goto D98; + +N98_6: + if INTLNEWS < 0.21 then goto T98_6; + else goto T98_7; + +T98_6: + response = 0.0022491; + goto D98; + +T98_7: + response = 0.0202453; + goto D98; + +D98: + +tnscore = tnscore + response; + +/* Tree 100 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N99_1: + if TWO_DAY_WF < 0.477226 then goto N99_2; + else goto T99_7; + +N99_2: + if BUSINESS < 0.685 then goto N99_3; + else goto N99_6; + +N99_3: + if MAX_MIN_SCORE < 41352.5 then goto N99_4; + else goto N99_5; + +N99_4: + if SUPERDUPER_AVG < 0.315 then goto T99_1; + else goto T99_2; + +T99_1: + response = -0.00389642; + goto D99; + +T99_2: + response = -0.0130707; + goto D99; + +N99_5: + if AVG_SCORE < 253118 then goto T99_3; + else goto T99_4; + +T99_3: + response = -0.00411848; + goto D99; + +T99_4: + response = 0.0126594; + goto D99; + +N99_6: + if DAY_LW_DAY_HITS_RATIO < 1.71 then goto T99_5; + else goto T99_6; + +T99_5: + response = 0.0118462; + goto D99; + +T99_6: + response = -0.00195941; + goto D99; + +T99_7: + response = 0.000656261; + goto D99; + +D99: + +tnscore = tnscore + response; + +/* Tree 101 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N100_1: + if POLITICS < 0.235 then goto N100_2; + else goto N100_6; + +N100_2: + if MAX_MIN_SCORE < 8349.75 then goto N100_3; + else goto N100_4; + +N100_3: + if DAY_LW_DAY_HITS_RATIO < 5.335 then goto T100_1; + else goto T100_2; + +T100_1: + response = 0.00316005; + goto D100; + +T100_2: + response = -0.00643477; + goto D100; + +N100_4: + if MAX_MIN_SCORE < 16062.8 then goto T100_3; + else goto N100_5; + +T100_3: + response = -0.00319606; + goto D100; + +N100_5: + if MAX_MIN_SCORE < 16303.8 then goto T100_4; + else goto T100_5; + +T100_4: + response = 0.0107361; + goto D100; + +T100_5: + response = -3.21466e-05; + goto D100; + +N100_6: + if ISABSTRACT_AVG < 0.05 then goto T100_6; + else goto T100_7; + +T100_6: + response = 0.0148344; + goto D100; + +T100_7: + response = -0.00402193; + goto D100; + +D100: + +tnscore = tnscore + response; + +/* Tree 102 of 103 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N101_1: + if ENTERTAINMENT < 0.585 then goto N101_2; + else goto N101_3; + +N101_2: + if MAX_MIN_SCORE < 36987.5 then goto T101_1; + else goto T101_2; + +T101_1: + response = -0.00141265; + goto D101; + +T101_2: + response = 0.000338741; + goto D101; + +N101_3: + if PUB_TODAY_AVG < 0.235 then goto N101_4; + else goto N101_5; + +N101_4: + if MAX_MIN_SCORE < 20990 then goto T101_3; + else goto T101_4; + +T101_3: + response = 0.00745281; + goto D101; + +T101_4: + response = -0.0127174; + goto D101; + +N101_5: + if AVG_SCORE < 239671 then goto T101_5; + else goto N101_6; + +T101_5: + response = 0.000361332; + goto D101; + +N101_6: + if ISTITLE_AVG < 0.735 then goto T101_6; + else goto T101_7; + +T101_6: + response = 0.0208577; + goto D101; + +T101_7: + response = 0.00530017; + goto D101; + +D101: + +tnscore = tnscore + response; + +/* Tree 103 of 103 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N102_1: + if DAY_PD_HITS_DERIV < -3.5 then goto N102_2; + else goto T102_7; + +N102_2: + if HEALTH < 0.105 then goto N102_3; + else goto T102_6; + +N102_3: + if DAY_PD_HITS_RATIO < 0.165 then goto T102_1; + else goto N102_4; + +T102_1: + response = -0.00212795; + goto D102; + +N102_4: + if LW_DAY_HITS < 3.5 then goto N102_5; + else goto T102_5; + +N102_5: + if AVG_SCORE < 258650 then goto N102_6; + else goto T102_4; + +N102_6: + if ISABSTRACT_AVG < 0.225 then goto T102_2; + else goto T102_3; + +T102_2: + response = -0.00378895; + goto D102; + +T102_3: + response = 0.00511293; + goto D102; + +T102_4: + response = 0.0070848; + goto D102; + +T102_5: + response = -0.0101628; + goto D102; + +T102_6: + response = 0.0144615; + goto D102; + +T102_7: + response = -0.000510098; + goto D102; + +D102: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet05.model b/searchlib/src/test/files/treenet05.model new file mode 100644 index 00000000000..523b9de42f9 --- /dev/null +++ b/searchlib/src/test/files/treenet05.model @@ -0,0 +1,4684 @@ + +/* Data Dictionary, Number Of Variables = 37 */ +/* Name = DAY_HITS, Type = continuous. */ +/* Name = PREV_DAY_HITS, Type = continuous. */ +/* Name = DAY_PD_HITS_RATIO, Type = continuous. */ +/* Name = DAY_PD_HITS_DERIV, Type = continuous. */ +/* Name = LW_DAY_HITS, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */ +/* Name = WEEKAVG, Type = continuous. */ +/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */ +/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */ +/* Name = ISTITLE_AVG, Type = continuous. */ +/* Name = ISABSTRACT_AVG, Type = continuous. */ +/* Name = SUPERDUPER_AVG, Type = continuous. */ +/* Name = PUB_TODAY_AVG, Type = continuous. */ +/* Name = BUSINESS, Type = continuous. */ +/* Name = ENTERTAINMENT, Type = continuous. */ +/* Name = INTLNEWS, Type = continuous. */ +/* Name = LAW, Type = continuous. */ +/* Name = LIFESTYLE, Type = continuous. */ +/* Name = LOCALNEWS, Type = continuous. */ +/* Name = NATIONALNEWS, Type = continuous. */ +/* Name = POLITICS, Type = continuous. */ +/* Name = SPORTS, Type = continuous. */ +/* Name = TOPSTORY, Type = continuous. */ +/* Name = AVG_RANK, Type = continuous. */ +/* Name = MAX_RANK, Type = continuous. */ +/* Name = MIN_RANK, Type = continuous. */ +/* Name = MAX_MIN_RANK, Type = continuous. */ +/* Name = AVG_SCORE, Type = continuous. */ +/* Name = MAX_SCORE, Type = continuous. */ +/* Name = MIN_SCORE, Type = continuous. */ +/* Name = MAX_MIN_SCORE, Type = continuous. */ +/* Name = FOUR_HOUR_WF, Type = continuous. */ +/* Name = EIGHT_HOUR_WF, Type = continuous. */ +/* Name = TWELVE_HOUR_WF, Type = continuous. */ +/* Name = ONE_DAY_WF, Type = continuous. */ +/* Name = TWO_DAY_WF, Type = continuous. */ + +MODELBEGIN: + +/* CART version: 5.0.9.156 */ +/* TreeNet: TreeNet20071016174833 */ +/* Grove: /home/rparekh/lb/lb_titleabstract_hourly/lb_titleabstract_hourly.grv */ +/* N trees: 77 */ + +link TN0; +pred = tnscore; /* predicted value for IY_CTR */ + + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +/* Tree 1 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +tnscore = 0.0; + +N0_1: + if AVG_SCORE < 240274 then goto N0_2; + else goto N0_3; + +N0_2: + if AVG_SCORE < 152115 then goto T0_1; + else goto T0_2; + +T0_1: + response = 0.222147; + goto D0; + +T0_2: + response = 0.231999; + goto D0; + +N0_3: + if ISABSTRACT_AVG < 0.13 then goto N0_4; + else goto N0_6; + +N0_4: + if WEEKAVG < 1.785 then goto T0_3; + else goto N0_5; + +T0_3: + response = 0.254209; + goto D0; + +N0_5: + if TWO_DAY_WF < 0.849242 then goto T0_4; + else goto T0_5; + +T0_4: + response = 0.260625; + goto D0; + +T0_5: + response = 0.274218; + goto D0; + +N0_6: + if MIN_SCORE < 328158 then goto T0_6; + else goto T0_7; + +T0_6: + response = 0.240699; + goto D0; + +T0_7: + response = 0.25683; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 77 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N1_1: + if MIN_SCORE < 224388 then goto N1_2; + else goto N1_4; + +N1_2: + if AVG_SCORE < 229835 then goto N1_3; + else goto T1_3; + +N1_3: + if MAX_SCORE < 171144 then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.0167726; + goto D1; + +T1_2: + response = -0.0105451; + goto D1; + +T1_3: + response = -0.00242754; + goto D1; + +N1_4: + if ISABSTRACT_AVG < 0.13 then goto N1_5; + else goto N1_6; + +N1_5: + if WEEKAVG < 1.36 then goto T1_4; + else goto T1_5; + +T1_4: + response = 0.0097257; + goto D1; + +T1_5: + response = 0.0198661; + goto D1; + +N1_6: + if MIN_SCORE < 330678 then goto T1_6; + else goto T1_7; + +T1_6: + response = -0.00030123; + goto D1; + +T1_7: + response = 0.0145117; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N2_1: + if AVG_SCORE < 240820 then goto N2_2; + else goto N2_3; + +N2_2: + if AVG_SCORE < 159292 then goto T2_1; + else goto T2_2; + +T2_1: + response = -0.0146681; + goto D2; + +T2_2: + response = -0.00755839; + goto D2; + +N2_3: + if ISABSTRACT_AVG < 0.21 then goto N2_4; + else goto N2_6; + +N2_4: + if WEEKAVG < 1.64 then goto T2_3; + else goto N2_5; + +T2_3: + response = 0.0094277; + goto D2; + +N2_5: + if AVG_RANK < 7.1 then goto T2_4; + else goto T2_5; + +T2_4: + response = 0.00789525; + goto D2; + +T2_5: + response = 0.020449; + goto D2; + +N2_6: + if TOPSTORY < 0.05 then goto T2_6; + else goto T2_7; + +T2_6: + response = -0.00410248; + goto D2; + +T2_7: + response = 0.00584918; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 77 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N3_1: + if MIN_SCORE < 226846 then goto N3_2; + else goto N3_4; + +N3_2: + if AVG_SCORE < 221526 then goto T3_1; + else goto N3_3; + +T3_1: + response = -0.0110986; + goto D3; + +N3_3: + if TOPSTORY < 0.365 then goto T3_2; + else goto T3_3; + +T3_2: + response = -0.00479654; + goto D3; + +T3_3: + response = 0.020019; + goto D3; + +N3_4: + if WEEKAVG < 1.07 then goto N3_5; + else goto N3_6; + +N3_5: + if ISTITLE_AVG < 0.93 then goto T3_4; + else goto T3_5; + +T3_4: + response = 0.0050835; + goto D3; + +T3_5: + response = -0.00497491; + goto D3; + +N3_6: + if ISABSTRACT_AVG < 0.05 then goto T3_6; + else goto T3_7; + +T3_6: + response = 0.0175386; + goto D3; + +T3_7: + response = 0.00636247; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N4_1: + if MIN_SCORE < 226373 then goto N4_2; + else goto N4_5; + +N4_2: + if AVG_SCORE < 151768 then goto T4_1; + else goto N4_3; + +T4_1: + response = -0.0132135; + goto D4; + +N4_3: + if WEEKAVG < 2.5 then goto T4_2; + else goto N4_4; + +T4_2: + response = -0.00784849; + goto D4; + +N4_4: + if ISABSTRACT_AVG < 0.95 then goto T4_3; + else goto T4_4; + +T4_3: + response = -0.00100834; + goto D4; + +T4_4: + response = 0.0234278; + goto D4; + +N4_5: + if ISABSTRACT_AVG < 0.235 then goto N4_6; + else goto T4_7; + +N4_6: + if MAX_MIN_SCORE < 20325 then goto T4_5; + else goto T4_6; + +T4_5: + response = 0.00102421; + goto D4; + +T4_6: + response = 0.0145312; + goto D4; + +T4_7: + response = 0.000803179; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N5_1: + if AVG_SCORE < 234937 then goto T5_1; + else goto N5_2; + +T5_1: + response = -0.00791142; + goto D5; + +N5_2: + if ISABSTRACT_AVG < 0.13 then goto N5_3; + else goto N5_6; + +N5_3: + if WEEKAVG < 1.5 then goto T5_2; + else goto N5_4; + +T5_2: + response = 0.00645581; + goto D5; + +N5_4: + if TWO_DAY_WF < 0.925548 then goto T5_3; + else goto N5_5; + +T5_3: + response = 0.0121753; + goto D5; + +N5_5: + if LOCALNEWS < 0.05 then goto T5_4; + else goto T5_5; + +T5_4: + response = 0.013493; + goto D5; + +T5_5: + response = 0.0339803; + goto D5; + +N5_6: + if AVG_SCORE < 492653 then goto T5_6; + else goto T5_7; + +T5_6: + response = -0.00104991; + goto D5; + +T5_7: + response = 0.0192918; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N6_1: + if MIN_SCORE < 231118 then goto N6_2; + else goto N6_4; + +N6_2: + if AVG_SCORE < 223663 then goto T6_1; + else goto N6_3; + +T6_1: + response = -0.00824326; + goto D6; + +N6_3: + if ISABSTRACT_AVG < 0.05 then goto T6_2; + else goto T6_3; + +T6_2: + response = 0.00253809; + goto D6; + +T6_3: + response = -0.00637355; + goto D6; + +N6_4: + if ISTITLE_AVG < 0.885 then goto N6_5; + else goto T6_7; + +N6_5: + if DAY_LW_DAY_HITS_RATIO < 4.045 then goto N6_6; + else goto T6_6; + +N6_6: + if LW_DAY_HITS < 0.5 then goto T6_4; + else goto T6_5; + +T6_4: + response = 0.00769331; + goto D6; + +T6_5: + response = -0.00570545; + goto D6; + +T6_6: + response = 0.0136625; + goto D6; + +T6_7: + response = -0.000566847; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N7_1: + if MIN_SCORE < 222204 then goto T7_1; + else goto N7_2; + +T7_1: + response = -0.00655529; + goto D7; + +N7_2: + if ISTITLE_AVG < 0.95 then goto N7_3; + else goto T7_7; + +N7_3: + if DAY_LW_DAY_HITS_RATIO < 15.5 then goto N7_4; + else goto T7_6; + +N7_4: + if TOPSTORY < 0.185 then goto N7_5; + else goto T7_5; + +N7_5: + if INTLNEWS < 0.39 then goto T7_2; + else goto N7_6; + +T7_2: + response = 0.00204263; + goto D7; + +N7_6: + if FOUR_HOUR_WF < 0.004 then goto T7_3; + else goto T7_4; + +T7_3: + response = 0.0157981; + goto D7; + +T7_4: + response = -0.00641486; + goto D7; + +T7_5: + response = 0.0124789; + goto D7; + +T7_6: + response = 0.0166668; + goto D7; + +T7_7: + response = -0.00299979; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N8_1: + if MIN_SCORE < 224388 then goto T8_1; + else goto N8_2; + +T8_1: + response = -0.00568771; + goto D8; + +N8_2: + if WEEKAVG < 1.07 then goto N8_3; + else goto N8_4; + +N8_3: + if ISTITLE_AVG < 0.845 then goto T8_2; + else goto T8_3; + +T8_2: + response = 0.00360536; + goto D8; + +T8_3: + response = -0.00514951; + goto D8; + +N8_4: + if BUSINESS < 0.05 then goto N8_5; + else goto N8_6; + +N8_5: + if MAX_MIN_RANK < 3 then goto T8_4; + else goto T8_5; + +T8_4: + response = 0.0231505; + goto D8; + +T8_5: + response = 0.0105904; + goto D8; + +N8_6: + if MIN_SCORE < 400082 then goto T8_6; + else goto T8_7; + +T8_6: + response = 0.00196019; + goto D8; + +T8_7: + response = 0.0152236; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N9_1: + if MAX_SCORE < 264920 then goto N9_2; + else goto N9_3; + +N9_2: + if AVG_SCORE < 159289 then goto T9_1; + else goto T9_2; + +T9_1: + response = -0.00812678; + goto D9; + +T9_2: + response = -0.003174; + goto D9; + +N9_3: + if DAY_WEEK_AVG_DERIV < 30.715 then goto N9_4; + else goto T9_7; + +N9_4: + if ISABSTRACT_AVG < 0.115 then goto N9_5; + else goto T9_6; + +N9_5: + if MAX_MIN_SCORE < 163787 then goto T9_3; + else goto N9_6; + +T9_3: + response = 0.0097262; + goto D9; + +N9_6: + if AVG_SCORE < 400330 then goto T9_4; + else goto T9_5; + +T9_4: + response = -0.00390127; + goto D9; + +T9_5: + response = 0.00706031; + goto D9; + +T9_6: + response = 5.49425e-05; + goto D9; + +T9_7: + response = 0.0204424; + goto D9; + +D9: + +tnscore = tnscore + response; + +/* Tree 11 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N10_1: + if AVG_SCORE < 241590 then goto T10_1; + else goto N10_2; + +T10_1: + response = -0.00459592; + goto D10; + +N10_2: + if TOPSTORY < 0.05 then goto N10_3; + else goto N10_4; + +N10_3: + if DAY_WEEK_AVG_RATIO < 4.205 then goto T10_2; + else goto T10_3; + +T10_2: + response = -0.000126418; + goto D10; + +T10_3: + response = 0.0204507; + goto D10; + +N10_4: + if TWO_DAY_WF < 0.86039 then goto N10_5; + else goto T10_7; + +N10_5: + if ISABSTRACT_AVG < 0.585 then goto N10_6; + else goto T10_6; + +N10_6: + if TOPSTORY < 0.365 then goto T10_4; + else goto T10_5; + +T10_4: + response = 0.00566686; + goto D10; + +T10_5: + response = 0.0196157; + goto D10; + +T10_6: + response = -0.00511988; + goto D10; + +T10_7: + response = 0.0124928; + goto D10; + +D10: + +tnscore = tnscore + response; + +/* Tree 12 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N11_1: + if MIN_SCORE < 222204 then goto N11_2; + else goto N11_3; + +N11_2: + if PREV_DAY_HITS < 4.5 then goto T11_1; + else goto T11_2; + +T11_1: + response = -0.00589519; + goto D11; + +T11_2: + response = -0.000631753; + goto D11; + +N11_3: + if ISABSTRACT_AVG < 0.235 then goto N11_4; + else goto T11_7; + +N11_4: + if MAX_MIN_SCORE < 171496 then goto N11_5; + else goto T11_6; + +N11_5: + if MAX_SCORE < 558130 then goto N11_6; + else goto T11_5; + +N11_6: + if AVG_RANK < 7.125 then goto T11_3; + else goto T11_4; + +T11_3: + response = 0.00086138; + goto D11; + +T11_4: + response = 0.00952768; + goto D11; + +T11_5: + response = 0.0206013; + goto D11; + +T11_6: + response = 0.00133279; + goto D11; + +T11_7: + response = -0.00112547; + goto D11; + +D11: + +tnscore = tnscore + response; + +/* Tree 13 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N12_1: + if MIN_SCORE < 222204 then goto N12_2; + else goto N12_4; + +N12_2: + if DAY_PD_HITS_DERIV < -12.5 then goto T12_1; + else goto N12_3; + +T12_1: + response = 0.00847214; + goto D12; + +N12_3: + if DAY_WEEK_AVG_DERIV < 36.785 then goto T12_2; + else goto T12_3; + +T12_2: + response = -0.00450293; + goto D12; + +T12_3: + response = 0.0134303; + goto D12; + +N12_4: + if ISABSTRACT_AVG < 0.05 then goto N12_5; + else goto T12_7; + +N12_5: + if DAY_WEEK_AVG_RATIO < 4.83 then goto N12_6; + else goto T12_6; + +N12_6: + if NATIONALNEWS < 0.355 then goto T12_4; + else goto T12_5; + +T12_4: + response = 0.00488766; + goto D12; + +T12_5: + response = 0.0169425; + goto D12; + +T12_6: + response = 0.0204287; + goto D12; + +T12_7: + response = -0.000132037; + goto D12; + +D12: + +tnscore = tnscore + response; + +/* Tree 14 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N13_1: + if MAX_SCORE < 250058 then goto N13_2; + else goto N13_3; + +N13_2: + if INTLNEWS < 0.105 then goto T13_1; + else goto T13_2; + +T13_1: + response = -0.0059595; + goto D13; + +T13_2: + response = 0.000221029; + goto D13; + +N13_3: + if TOPSTORY < 0.355 then goto N13_4; + else goto T13_7; + +N13_4: + if MIN_SCORE < 385241 then goto N13_5; + else goto N13_6; + +N13_5: + if BUSINESS < 0.05 then goto T13_3; + else goto T13_4; + +T13_3: + response = 0.00367059; + goto D13; + +T13_4: + response = -0.00223683; + goto D13; + +N13_6: + if INTLNEWS < 0.365 then goto T13_5; + else goto T13_6; + +T13_5: + response = 0.00575046; + goto D13; + +T13_6: + response = 0.0237395; + goto D13; + +T13_7: + response = 0.0174135; + goto D13; + +D13: + +tnscore = tnscore + response; + +/* Tree 15 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N14_1: + if MAX_SCORE < 265638 then goto T14_1; + else goto N14_2; + +T14_1: + response = -0.00287962; + goto D14; + +N14_2: + if ISABSTRACT_AVG < 0.235 then goto N14_3; + else goto T14_7; + +N14_3: + if DAY_PD_HITS_RATIO < 18.75 then goto N14_4; + else goto T14_6; + +N14_4: + if MAX_MIN_SCORE < 67687.2 then goto N14_5; + else goto N14_6; + +N14_5: + if PREV_DAY_HITS < 6.5 then goto T14_2; + else goto T14_3; + +T14_2: + response = 0.00469885; + goto D14; + +T14_3: + response = 0.0145573; + goto D14; + +N14_6: + if AVG_SCORE < 399037 then goto T14_4; + else goto T14_5; + +T14_4: + response = -0.00312833; + goto D14; + +T14_5: + response = 0.00680591; + goto D14; + +T14_6: + response = 0.0238803; + goto D14; + +T14_7: + response = -0.00158812; + goto D14; + +D14: + +tnscore = tnscore + response; + +/* Tree 16 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N15_1: + if MIN_SCORE < 222204 then goto N15_2; + else goto N15_3; + +N15_2: + if TOPSTORY < 0.355 then goto T15_1; + else goto T15_2; + +T15_1: + response = -0.00377546; + goto D15; + +T15_2: + response = 0.0099145; + goto D15; + +N15_3: + if ISTITLE_AVG < 0.885 then goto N15_4; + else goto T15_7; + +N15_4: + if MAX_MIN_SCORE < 57965.2 then goto N15_5; + else goto N15_6; + +N15_5: + if MIN_SCORE < 223217 then goto T15_3; + else goto T15_4; + +T15_3: + response = 0.0291906; + goto D15; + +T15_4: + response = 0.00802385; + goto D15; + +N15_6: + if AVG_SCORE < 402324 then goto T15_5; + else goto T15_6; + +T15_5: + response = -0.00259188; + goto D15; + +T15_6: + response = 0.00560142; + goto D15; + +T15_7: + response = -0.0015883; + goto D15; + +D15: + +tnscore = tnscore + response; + +/* Tree 17 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N16_1: + if MAX_SCORE < 252015 then goto T16_1; + else goto N16_2; + +T16_1: + response = -0.00312417; + goto D16; + +N16_2: + if DAY_WEEK_AVG_RATIO < 5.91 then goto N16_3; + else goto T16_7; + +N16_3: + if TOPSTORY < 0.185 then goto N16_4; + else goto T16_6; + +N16_4: + if MAX_MIN_SCORE < 123158 then goto N16_5; + else goto T16_5; + +N16_5: + if ISABSTRACT_AVG < 0.13 then goto T16_2; + else goto N16_6; + +T16_2: + response = 0.0054303; + goto D16; + +N16_6: + if PREV_DAY_HITS < 7.5 then goto T16_3; + else goto T16_4; + +T16_3: + response = -0.00350664; + goto D16; + +T16_4: + response = 0.0115054; + goto D16; + +T16_5: + response = -0.00200056; + goto D16; + +T16_6: + response = 0.00612929; + goto D16; + +T16_7: + response = 0.0248479; + goto D16; + +D16: + +tnscore = tnscore + response; + +/* Tree 18 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N17_1: + if MIN_SCORE < 232158 then goto N17_2; + else goto N17_5; + +N17_2: + if DAY_PD_HITS_DERIV < -13.5 then goto T17_1; + else goto N17_3; + +T17_1: + response = 0.012118; + goto D17; + +N17_3: + if SPORTS < 0.685 then goto T17_2; + else goto N17_4; + +T17_2: + response = -0.00337721; + goto D17; + +N17_4: + if MAX_SCORE < 165958 then goto T17_3; + else goto T17_4; + +T17_3: + response = -0.00648055; + goto D17; + +T17_4: + response = 0.00734207; + goto D17; + +N17_5: + if ISABSTRACT_AVG < 0.635 then goto N17_6; + else goto T17_7; + +N17_6: + if EIGHT_HOUR_WF < 0.493902 then goto T17_5; + else goto T17_6; + +T17_5: + response = 0.00519362; + goto D17; + +T17_6: + response = -0.00533505; + goto D17; + +T17_7: + response = -0.00220591; + goto D17; + +D17: + +tnscore = tnscore + response; + +/* Tree 19 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N18_1: + if AVG_SCORE < 387415 then goto N18_2; + else goto N18_4; + +N18_2: + if PREV_DAY_HITS < 2.5 then goto T18_1; + else goto N18_3; + +T18_1: + response = -0.00321038; + goto D18; + +N18_3: + if BUSINESS < 0.05 then goto T18_2; + else goto T18_3; + +T18_2: + response = 0.00353532; + goto D18; + +T18_3: + response = -0.0020425; + goto D18; + +N18_4: + if TWO_DAY_WF < 0.979149 then goto T18_4; + else goto N18_5; + +T18_4: + response = 0.00271552; + goto D18; + +N18_5: + if TWELVE_HOUR_WF < 0.104418 then goto T18_5; + else goto N18_6; + +T18_5: + response = 0.00180155; + goto D18; + +N18_6: + if MIN_SCORE < 350308 then goto T18_6; + else goto T18_7; + +T18_6: + response = 0.0370742; + goto D18; + +T18_7: + response = 0.0145313; + goto D18; + +D18: + +tnscore = tnscore + response; + +/* Tree 20 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N19_1: + if MAX_SCORE < 248824 then goto N19_2; + else goto N19_3; + +N19_2: + if INTLNEWS < 0.185 then goto T19_1; + else goto T19_2; + +T19_1: + response = -0.00381799; + goto D19; + +T19_2: + response = 0.00109643; + goto D19; + +N19_3: + if TOPSTORY < 0.185 then goto N19_4; + else goto N19_6; + +N19_4: + if TWO_DAY_WF < 0.779514 then goto T19_3; + else goto N19_5; + +T19_3: + response = -0.0015664; + goto D19; + +N19_5: + if WEEKAVG < 4.07 then goto T19_4; + else goto T19_5; + +T19_4: + response = 0.00171319; + goto D19; + +T19_5: + response = 0.0126131; + goto D19; + +N19_6: + if MAX_MIN_RANK < 7 then goto T19_6; + else goto T19_7; + +T19_6: + response = 0.00411675; + goto D19; + +T19_7: + response = 0.0149353; + goto D19; + +D19: + +tnscore = tnscore + response; + +/* Tree 21 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N20_1: + if MIN_SCORE < 233311 then goto T20_1; + else goto N20_2; + +T20_1: + response = -0.00183471; + goto D20; + +N20_2: + if LW_DAY_HITS < 0.5 then goto N20_3; + else goto T20_7; + +N20_3: + if SUPERDUPER_AVG < 0.21 then goto N20_4; + else goto N20_5; + +N20_4: + if MIN_RANK < 1 then goto T20_2; + else goto T20_3; + +T20_2: + response = 0.0173917; + goto D20; + +T20_3: + response = 0.000643665; + goto D20; + +N20_5: + if LOCALNEWS < 0.185 then goto N20_6; + else goto T20_6; + +N20_6: + if DAY_PD_HITS_RATIO < 8.795 then goto T20_4; + else goto T20_5; + +T20_4: + response = 0.00308276; + goto D20; + +T20_5: + response = 0.0169982; + goto D20; + +T20_6: + response = 0.0159792; + goto D20; + +T20_7: + response = -0.00499866; + goto D20; + +D20: + +tnscore = tnscore + response; + +/* Tree 22 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N21_1: + if TOPSTORY < 0.39 then goto N21_2; + else goto T21_7; + +N21_2: + if MAX_SCORE < 176763 then goto T21_1; + else goto N21_3; + +T21_1: + response = -0.00448387; + goto D21; + +N21_3: + if INTLNEWS < 0.415 then goto N21_4; + else goto T21_6; + +N21_4: + if BUSINESS < 0.05 then goto N21_5; + else goto T21_5; + +N21_5: + if MAX_MIN_SCORE < 20408.8 then goto T21_2; + else goto N21_6; + +T21_2: + response = -0.00328596; + goto D21; + +N21_6: + if TWO_DAY_WF < 0.512854 then goto T21_3; + else goto T21_4; + +T21_3: + response = -0.00211998; + goto D21; + +T21_4: + response = 0.00522867; + goto D21; + +T21_5: + response = -0.00226038; + goto D21; + +T21_6: + response = 0.00574748; + goto D21; + +T21_7: + response = 0.00900215; + goto D21; + +D21: + +tnscore = tnscore + response; + +/* Tree 23 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N22_1: + if TWO_DAY_WF < 0.75074 then goto N22_2; + else goto N22_6; + +N22_2: + if BUSINESS < 0.05 then goto N22_3; + else goto T22_5; + +N22_3: + if FOUR_HOUR_WF < 0.0149554 then goto N22_4; + else goto T22_4; + +N22_4: + if WEEKAVG < 0.785 then goto T22_1; + else goto N22_5; + +T22_1: + response = -0.00184131; + goto D22; + +N22_5: + if AVG_SCORE < 167616 then goto T22_2; + else goto T22_3; + +T22_2: + response = -0.00305123; + goto D22; + +T22_3: + response = 0.00685803; + goto D22; + +T22_4: + response = -0.00470139; + goto D22; + +T22_5: + response = -0.003457; + goto D22; + +N22_6: + if MAX_SCORE < 504246 then goto T22_6; + else goto T22_7; + +T22_6: + response = 0.000878955; + goto D22; + +T22_7: + response = 0.00850264; + goto D22; + +D22: + +tnscore = tnscore + response; + +/* Tree 24 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N23_1: + if BUSINESS < 0.105 then goto N23_2; + else goto N23_6; + +N23_2: + if AVG_SCORE < 160899 then goto T23_1; + else goto N23_3; + +T23_1: + response = -0.00270644; + goto D23; + +N23_3: + if AVG_SCORE < 194764 then goto N23_4; + else goto N23_5; + +N23_4: + if ISABSTRACT_AVG < 0.315 then goto T23_2; + else goto T23_3; + +T23_2: + response = -0.00800918; + goto D23; + +T23_3: + response = 0.012943; + goto D23; + +N23_5: + if NATIONALNEWS < 0.355 then goto T23_4; + else goto T23_5; + +T23_4: + response = 0.000901868; + goto D23; + +T23_5: + response = 0.0112161; + goto D23; + +N23_6: + if INTLNEWS < 0.39 then goto T23_6; + else goto T23_7; + +T23_6: + response = -0.00269415; + goto D23; + +T23_7: + response = 0.00725021; + goto D23; + +D23: + +tnscore = tnscore + response; + +/* Tree 25 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N24_1: + if BUSINESS < 0.105 then goto N24_2; + else goto T24_7; + +N24_2: + if MAX_SCORE < 188088 then goto T24_1; + else goto N24_3; + +T24_1: + response = -0.00298371; + goto D24; + +N24_3: + if AVG_SCORE < 190784 then goto N24_4; + else goto N24_5; + +N24_4: + if ISABSTRACT_AVG < 0.55 then goto T24_2; + else goto T24_3; + +T24_2: + response = -0.00171064; + goto D24; + +T24_3: + response = 0.0237327; + goto D24; + +N24_5: + if AVG_RANK < 9.755 then goto T24_4; + else goto N24_6; + +T24_4: + response = 0.00131049; + goto D24; + +N24_6: + if WEEKAVG < 0.93 then goto T24_5; + else goto T24_6; + +T24_5: + response = -0.00199335; + goto D24; + +T24_6: + response = 0.020099; + goto D24; + +T24_7: + response = -0.00222399; + goto D24; + +D24: + +tnscore = tnscore + response; + +/* Tree 26 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N25_1: + if NATIONALNEWS < 0.115 then goto N25_2; + else goto T25_7; + +N25_2: + if PREV_DAY_HITS < 27.5 then goto N25_3; + else goto N25_5; + +N25_3: + if INTLNEWS < 0.725 then goto N25_4; + else goto T25_3; + +N25_4: + if AVG_SCORE < 629440 then goto T25_1; + else goto T25_2; + +T25_1: + response = -0.00184197; + goto D25; + +T25_2: + response = 0.0166573; + goto D25; + +T25_3: + response = 0.0148512; + goto D25; + +N25_5: + if TWO_DAY_WF < 0.773805 then goto N25_6; + else goto T25_6; + +N25_6: + if TWELVE_HOUR_WF < 0.114144 then goto T25_4; + else goto T25_5; + +T25_4: + response = 0.00583361; + goto D25; + +T25_5: + response = -0.012718; + goto D25; + +T25_6: + response = 0.0149618; + goto D25; + +T25_7: + response = 0.00280466; + goto D25; + +D25: + +tnscore = tnscore + response; + +/* Tree 27 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N26_1: + if TOPSTORY < 0.355 then goto N26_2; + else goto N26_4; + +N26_2: + if DAY_PD_HITS_DERIV < -4.5 then goto T26_1; + else goto N26_3; + +T26_1: + response = 0.00287102; + goto D26; + +N26_3: + if SPORTS < 0.73 then goto T26_2; + else goto T26_3; + +T26_2: + response = -0.00185575; + goto D26; + +T26_3: + response = 0.00272133; + goto D26; + +N26_4: + if AVG_RANK < 9.55 then goto N26_5; + else goto T26_7; + +N26_5: + if DAY_PD_HITS_RATIO < 0.405 then goto T26_4; + else goto N26_6; + +T26_4: + response = -0.00518413; + goto D26; + +N26_6: + if MAX_MIN_SCORE < 115612 then goto T26_5; + else goto T26_6; + +T26_5: + response = 0.00438781; + goto D26; + +T26_6: + response = 0.0211867; + goto D26; + +T26_7: + response = 0.0209324; + goto D26; + +D26: + +tnscore = tnscore + response; + +/* Tree 28 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N27_1: + if AVG_SCORE < 147623 then goto T27_1; + else goto N27_2; + +T27_1: + response = -0.00405691; + goto D27; + +N27_2: + if DAY_PD_HITS_DERIV < -23.5 then goto T27_2; + else goto N27_3; + +T27_2: + response = 0.00920672; + goto D27; + +N27_3: + if INTLNEWS < 0.725 then goto N27_4; + else goto T27_7; + +N27_4: + if TOPSTORY < 0.39 then goto T27_3; + else goto N27_5; + +T27_3: + response = 7.21159e-05; + goto D27; + +N27_5: + if INTLNEWS < 0.05 then goto N27_6; + else goto T27_6; + +N27_6: + if SUPERDUPER_AVG < 0.155 then goto T27_4; + else goto T27_5; + +T27_4: + response = 0.00462984; + goto D27; + +T27_5: + response = 0.0231233; + goto D27; + +T27_6: + response = 0.000562082; + goto D27; + +T27_7: + response = 0.0141075; + goto D27; + +D27: + +tnscore = tnscore + response; + +/* Tree 29 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N28_1: + if AVG_SCORE < 159075 then goto T28_1; + else goto N28_2; + +T28_1: + response = -0.00305707; + goto D28; + +N28_2: + if TOPSTORY < 0.05 then goto N28_3; + else goto N28_5; + +N28_3: + if SPORTS < 0.73 then goto T28_2; + else goto N28_4; + +T28_2: + response = -0.000935589; + goto D28; + +N28_4: + if AVG_RANK < 5.635 then goto T28_3; + else goto T28_4; + +T28_3: + response = -0.00405106; + goto D28; + +T28_4: + response = 0.0119584; + goto D28; + +N28_5: + if LW_DAY_HITS < 0.5 then goto T28_5; + else goto N28_6; + +T28_5: + response = 0.0045483; + goto D28; + +N28_6: + if PREV_DAY_HITS < 30 then goto T28_6; + else goto T28_7; + +T28_6: + response = -0.00540909; + goto D28; + +T28_7: + response = 0.00895866; + goto D28; + +D28: + +tnscore = tnscore + response; + +/* Tree 30 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N29_1: + if MAX_SCORE < 507014 then goto N29_2; + else goto N29_4; + +N29_2: + if AVG_RANK < 6.775 then goto T29_1; + else goto N29_3; + +T29_1: + response = -0.00328147; + goto D29; + +N29_3: + if MAX_MIN_SCORE < 150474 then goto T29_2; + else goto T29_3; + +T29_2: + response = 0.000393348; + goto D29; + +T29_3: + response = -0.00536951; + goto D29; + +N29_4: + if DAY_PD_HITS_RATIO < 7.885 then goto N29_5; + else goto T29_7; + +N29_5: + if ENTERTAINMENT < 0.05 then goto N29_6; + else goto T29_6; + +N29_6: + if MAX_SCORE < 516938 then goto T29_4; + else goto T29_5; + +T29_4: + response = 0.0171772; + goto D29; + +T29_5: + response = 0.00382646; + goto D29; + +T29_6: + response = -0.00447429; + goto D29; + +T29_7: + response = 0.0153178; + goto D29; + +D29: + +tnscore = tnscore + response; + +/* Tree 31 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N30_1: + if DAY_WEEK_AVG_RATIO < 5.905 then goto N30_2; + else goto T30_7; + +N30_2: + if NATIONALNEWS < 0.105 then goto T30_1; + else goto N30_3; + +T30_1: + response = -0.000832529; + goto D30; + +N30_3: + if MAX_MIN_RANK < 7 then goto N30_4; + else goto N30_5; + +N30_4: + if NATIONALNEWS < 0.13 then goto T30_2; + else goto T30_3; + +T30_2: + response = 0.0108634; + goto D30; + +T30_3: + response = 0.000313874; + goto D30; + +N30_5: + if AVG_SCORE < 231880 then goto T30_4; + else goto N30_6; + +T30_4: + response = -0.00104106; + goto D30; + +N30_6: + if PREV_DAY_HITS < 5.5 then goto T30_5; + else goto T30_6; + +T30_5: + response = 0.0263191; + goto D30; + +T30_6: + response = 0.00601508; + goto D30; + +T30_7: + response = 0.0149012; + goto D30; + +D30: + +tnscore = tnscore + response; + +/* Tree 32 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N31_1: + if PREV_DAY_HITS < 59.5 then goto N31_2; + else goto T31_7; + +N31_2: + if NATIONALNEWS < 0.05 then goto N31_3; + else goto N31_6; + +N31_3: + if DAY_PD_HITS_DERIV < -6.5 then goto N31_4; + else goto T31_4; + +N31_4: + if DAY_WEEK_AVG_RATIO < 1.285 then goto N31_5; + else goto T31_3; + +N31_5: + if MAX_MIN_SCORE < 160894 then goto T31_1; + else goto T31_2; + +T31_1: + response = 0.00449479; + goto D31; + +T31_2: + response = -0.00886993; + goto D31; + +T31_3: + response = 0.0153285; + goto D31; + +T31_4: + response = -0.00178263; + goto D31; + +N31_6: + if POLITICS < 0.05 then goto T31_5; + else goto T31_6; + +T31_5: + response = 0.00198329; + goto D31; + +T31_6: + response = -0.00586162; + goto D31; + +T31_7: + response = 0.00935161; + goto D31; + +D31: + +tnscore = tnscore + response; + +/* Tree 33 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N32_1: + if MIN_SCORE < 132626 then goto T32_1; + else goto N32_2; + +T32_1: + response = -0.00387076; + goto D32; + +N32_2: + if WEEKAVG < 0.93 then goto T32_2; + else goto N32_3; + +T32_2: + response = -0.00135437; + goto D32; + +N32_3: + if MAX_MIN_SCORE < 46712 then goto T32_3; + else goto N32_4; + +T32_3: + response = 0.00347721; + goto D32; + +N32_4: + if AVG_SCORE < 404994 then goto N32_5; + else goto T32_7; + +N32_5: + if MIN_SCORE < 241776 then goto N32_6; + else goto T32_6; + +N32_6: + if SPORTS < 0.79 then goto T32_4; + else goto T32_5; + +T32_4: + response = -0.00180685; + goto D32; + +T32_5: + response = 0.0168028; + goto D32; + +T32_6: + response = -0.00853053; + goto D32; + +T32_7: + response = 0.00228774; + goto D32; + +D32: + +tnscore = tnscore + response; + +/* Tree 34 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N33_1: + if LW_DAY_HITS < 0.5 then goto N33_2; + else goto N33_6; + +N33_2: + if AVG_SCORE < 159292 then goto T33_1; + else goto N33_3; + +T33_1: + response = -0.00244777; + goto D33; + +N33_3: + if LIFESTYLE < 0.155 then goto N33_4; + else goto T33_5; + +N33_4: + if DAY_PD_HITS_DERIV < -3.5 then goto T33_2; + else goto N33_5; + +T33_2: + response = 0.00412328; + goto D33; + +N33_5: + if TOPSTORY < 0.39 then goto T33_3; + else goto T33_4; + +T33_3: + response = 0.000416163; + goto D33; + +T33_4: + response = 0.0104883; + goto D33; + +T33_5: + response = -0.00615481; + goto D33; + +N33_6: + if FOUR_HOUR_WF < 0.158004 then goto T33_6; + else goto T33_7; + +T33_6: + response = -0.00212154; + goto D33; + +T33_7: + response = -0.0150848; + goto D33; + +D33: + +tnscore = tnscore + response; + +/* Tree 35 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N34_1: + if DAY_PD_HITS_RATIO < 43 then goto N34_2; + else goto T34_7; + +N34_2: + if LW_DAY_HITS < 0.5 then goto N34_3; + else goto N34_6; + +N34_3: + if DAY_PD_HITS_DERIV < -5.5 then goto N34_4; + else goto T34_4; + +N34_4: + if AVG_RANK < 9.265 then goto N34_5; + else goto T34_3; + +N34_5: + if TOPSTORY < 0.05 then goto T34_1; + else goto T34_2; + +T34_1: + response = -0.00313951; + goto D34; + +T34_2: + response = 0.00432897; + goto D34; + +T34_3: + response = 0.0117073; + goto D34; + +T34_4: + response = -0.000692; + goto D34; + +N34_6: + if MAX_MIN_SCORE < 120702 then goto T34_5; + else goto T34_6; + +T34_5: + response = -0.00138028; + goto D34; + +T34_6: + response = -0.00945152; + goto D34; + +T34_7: + response = 0.0160989; + goto D34; + +D34: + +tnscore = tnscore + response; + +/* Tree 36 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N35_1: + if LW_DAY_HITS < 0.5 then goto N35_2; + else goto N35_5; + +N35_2: + if MAX_SCORE < 507008 then goto N35_3; + else goto T35_4; + +N35_3: + if MAX_SCORE < 339502 then goto N35_4; + else goto T35_3; + +N35_4: + if ENTERTAINMENT < 0.415 then goto T35_1; + else goto T35_2; + +T35_1: + response = 0.00030327; + goto D35; + +T35_2: + response = 0.00803638; + goto D35; + +T35_3: + response = -0.0034615; + goto D35; + +T35_4: + response = 0.00344157; + goto D35; + +N35_5: + if FOUR_HOUR_WF < 0.101282 then goto N35_6; + else goto T35_7; + +N35_6: + if DAY_WEEK_AVG_DERIV < 17.5 then goto T35_5; + else goto T35_6; + +T35_5: + response = -0.00192815; + goto D35; + +T35_6: + response = 0.00914257; + goto D35; + +T35_7: + response = -0.0127954; + goto D35; + +D35: + +tnscore = tnscore + response; + +/* Tree 37 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N36_1: + if DAY_WEEK_AVG_RATIO < 0.255 then goto T36_1; + else goto N36_2; + +T36_1: + response = 0.0131801; + goto D36; + +N36_2: + if MAX_MIN_SCORE < 312687 then goto N36_3; + else goto T36_7; + +N36_3: + if MAX_MIN_SCORE < 296243 then goto N36_4; + else goto T36_6; + +N36_4: + if NATIONALNEWS < 0.105 then goto T36_2; + else goto N36_5; + +T36_2: + response = -0.000609993; + goto D36; + +N36_5: + if MAX_MIN_RANK < 7 then goto T36_3; + else goto N36_6; + +T36_3: + response = 0.000898274; + goto D36; + +N36_6: + if MAX_SCORE < 234190 then goto T36_4; + else goto T36_5; + +T36_4: + response = 0.000967677; + goto D36; + +T36_5: + response = 0.0163215; + goto D36; + +T36_6: + response = 0.0161496; + goto D36; + +T36_7: + response = -0.0112906; + goto D36; + +D36: + +tnscore = tnscore + response; + +/* Tree 38 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N37_1: + if TOPSTORY < 0.39 then goto N37_2; + else goto T37_7; + +N37_2: + if MIN_SCORE < 220684 then goto T37_1; + else goto N37_3; + +T37_1: + response = -0.00143961; + goto D37; + +N37_3: + if ISABSTRACT_AVG < 0.05 then goto N37_4; + else goto T37_6; + +N37_4: + if AVG_SCORE < 277398 then goto N37_5; + else goto N37_6; + +N37_5: + if DAY_LW_DAY_HITS_RATIO < 1.75 then goto T37_2; + else goto T37_3; + +T37_2: + response = -0.000951177; + goto D37; + +T37_3: + response = 0.00924989; + goto D37; + +N37_6: + if BUSINESS < 0.39 then goto T37_4; + else goto T37_5; + +T37_4: + response = -0.00128495; + goto D37; + +T37_5: + response = 0.0103605; + goto D37; + +T37_6: + response = -0.00106493; + goto D37; + +T37_7: + response = 0.00610485; + goto D37; + +D37: + +tnscore = tnscore + response; + +/* Tree 39 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N38_1: + if AVG_SCORE < 500951 then goto N38_2; + else goto N38_5; + +N38_2: + if DAY_HITS < 42.5 then goto T38_1; + else goto N38_3; + +T38_1: + response = -0.000676917; + goto D38; + +N38_3: + if INTLNEWS < 0.45 then goto N38_4; + else goto T38_4; + +N38_4: + if ISTITLE_AVG < 0.05 then goto T38_2; + else goto T38_3; + +T38_2: + response = -0.0122069; + goto D38; + +T38_3: + response = 0.00752268; + goto D38; + +T38_4: + response = 0.0144731; + goto D38; + +N38_5: + if MIN_SCORE < 362007 then goto T38_5; + else goto N38_6; + +T38_5: + response = 0.0202143; + goto D38; + +N38_6: + if PUB_TODAY_AVG < 0.05 then goto T38_6; + else goto T38_7; + +T38_6: + response = -0.0107444; + goto D38; + +T38_7: + response = 0.00512166; + goto D38; + +D38: + +tnscore = tnscore + response; + +/* Tree 40 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N39_1: + if WEEKAVG < 1.07 then goto T39_1; + else goto N39_2; + +T39_1: + response = -0.00167316; + goto D39; + +N39_2: + if DAY_WEEK_AVG_RATIO < 6.14 then goto N39_3; + else goto T39_7; + +N39_3: + if BUSINESS < 0.05 then goto N39_4; + else goto T39_6; + +N39_4: + if PREV_DAY_HITS < 59.5 then goto N39_5; + else goto T39_5; + +N39_5: + if AVG_RANK < 9.225 then goto N39_6; + else goto T39_4; + +N39_6: + if DAY_HITS < 30.5 then goto T39_2; + else goto T39_3; + +T39_2: + response = 0.00155636; + goto D39; + +T39_3: + response = -0.0130867; + goto D39; + +T39_4: + response = 0.0105919; + goto D39; + +T39_5: + response = 0.0218533; + goto D39; + +T39_6: + response = -0.000802313; + goto D39; + +T39_7: + response = 0.0145597; + goto D39; + +D39: + +tnscore = tnscore + response; + +/* Tree 41 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N40_1: + if LIFESTYLE < 0.05 then goto N40_2; + else goto T40_7; + +N40_2: + if MAX_MIN_RANK < 7 then goto T40_1; + else goto N40_3; + +T40_1: + response = 0.000220029; + goto D40; + +N40_3: + if ISABSTRACT_AVG < 0.115 then goto N40_4; + else goto N40_6; + +N40_4: + if TWO_DAY_WF < 0.580973 then goto T40_2; + else goto N40_5; + +T40_2: + response = -0.000179904; + goto D40; + +N40_5: + if EIGHT_HOUR_WF < 0.0125776 then goto T40_3; + else goto T40_4; + +T40_3: + response = 0.0222343; + goto D40; + +T40_4: + response = 0.00659678; + goto D40; + +N40_6: + if DAY_WEEK_AVG_DERIV < -0.93 then goto T40_5; + else goto T40_6; + +T40_5: + response = 0.0146586; + goto D40; + +T40_6: + response = -0.0018679; + goto D40; + +T40_7: + response = -0.0043182; + goto D40; + +D40: + +tnscore = tnscore + response; + +/* Tree 42 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N41_1: + if AVG_SCORE < 500853 then goto N41_2; + else goto N41_4; + +N41_2: + if MIN_SCORE < 435034 then goto N41_3; + else goto T41_3; + +N41_3: + if DAY_WEEK_AVG_RATIO < 4.15 then goto T41_1; + else goto T41_2; + +T41_1: + response = -0.000600797; + goto D41; + +T41_2: + response = 0.00413062; + goto D41; + +T41_3: + response = -0.0152667; + goto D41; + +N41_4: + if MAX_SCORE < 660352 then goto N41_5; + else goto T41_7; + +N41_5: + if TWO_DAY_WF < 0.744565 then goto T41_4; + else goto N41_6; + +T41_4: + response = 0.0172406; + goto D41; + +N41_6: + if MAX_SCORE < 596568 then goto T41_5; + else goto T41_6; + +T41_5: + response = -0.0069398; + goto D41; + +T41_6: + response = 0.0163258; + goto D41; + +T41_7: + response = -0.00228486; + goto D41; + +D41: + +tnscore = tnscore + response; + +/* Tree 43 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N42_1: + if SPORTS < 0.685 then goto T42_1; + else goto N42_2; + +T42_1: + response = -0.000328185; + goto D42; + +N42_2: + if AVG_SCORE < 446734 then goto N42_3; + else goto T42_7; + +N42_3: + if MAX_SCORE < 500264 then goto N42_4; + else goto T42_6; + +N42_4: + if MAX_SCORE < 450904 then goto N42_5; + else goto T42_5; + +N42_5: + if MIN_SCORE < 254311 then goto N42_6; + else goto T42_4; + +N42_6: + if WEEKAVG < 0.785 then goto T42_2; + else goto T42_3; + +T42_2: + response = -0.00158584; + goto D42; + +T42_3: + response = 0.0075942; + goto D42; + +T42_4: + response = -0.0103296; + goto D42; + +T42_5: + response = 0.0212781; + goto D42; + +T42_6: + response = -0.0121229; + goto D42; + +T42_7: + response = 0.0182724; + goto D42; + +D42: + +tnscore = tnscore + response; + +/* Tree 44 of 77 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N43_1: + if TOPSTORY < 0.39 then goto N43_2; + else goto N43_4; + +N43_2: + if PREV_DAY_HITS < 59.5 then goto T43_1; + else goto N43_3; + +T43_1: + response = -5.72966e-05; + goto D43; + +N43_3: + if NATIONALNEWS < 0.05 then goto T43_2; + else goto T43_3; + +T43_2: + response = 0.0144398; + goto D43; + +T43_3: + response = -0.00316385; + goto D43; + +N43_4: + if FOUR_HOUR_WF < 0.0201025 then goto N43_5; + else goto N43_6; + +N43_5: + if TWELVE_HOUR_WF < 0.163978 then goto T43_4; + else goto T43_5; + +T43_4: + response = 0.00366064; + goto D43; + +T43_5: + response = 0.0227011; + goto D43; + +N43_6: + if ONE_DAY_WF < 0.658333 then goto T43_6; + else goto T43_7; + +T43_6: + response = -0.0114776; + goto D43; + +T43_7: + response = 0.00740238; + goto D43; + +D43: + +tnscore = tnscore + response; + +/* Tree 45 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N44_1: + if TOPSTORY < 0.585 then goto N44_2; + else goto T44_7; + +N44_2: + if ENTERTAINMENT < 0.05 then goto N44_3; + else goto N44_6; + +N44_3: + if DAY_WEEK_AVG_DERIV < 43.145 then goto T44_1; + else goto N44_4; + +T44_1: + response = 0.000486446; + goto D44; + +N44_4: + if DAY_HITS < 78.5 then goto T44_2; + else goto N44_5; + +T44_2: + response = 0.0210513; + goto D44; + +N44_5: + if SUPERDUPER_AVG < 0.65 then goto T44_3; + else goto T44_4; + +T44_3: + response = -0.00387695; + goto D44; + +T44_4: + response = 0.013128; + goto D44; + +N44_6: + if AVG_RANK < 5.465 then goto T44_5; + else goto T44_6; + +T44_5: + response = 0.00674178; + goto D44; + +T44_6: + response = -0.00228932; + goto D44; + +T44_7: + response = -0.0121137; + goto D44; + +D44: + +tnscore = tnscore + response; + +/* Tree 46 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N45_1: + if TWO_DAY_WF < 0.488162 then goto T45_1; + else goto N45_2; + +T45_1: + response = -0.00237763; + goto D45; + +N45_2: + if WEEKAVG < 1.215 then goto T45_2; + else goto N45_3; + +T45_2: + response = -0.000773205; + goto D45; + +N45_3: + if EIGHT_HOUR_WF < 0.0444065 then goto N45_4; + else goto N45_5; + +N45_4: + if DAY_HITS < 19.5 then goto T45_3; + else goto T45_4; + +T45_3: + response = 0.00278939; + goto D45; + +T45_4: + response = 0.0115461; + goto D45; + +N45_5: + if NATIONALNEWS < 0.155 then goto T45_5; + else goto N45_6; + +T45_5: + response = -0.00189416; + goto D45; + +N45_6: + if SPORTS < 0.105 then goto T45_6; + else goto T45_7; + +T45_6: + response = 0.0072781; + goto D45; + +T45_7: + response = -0.00903706; + goto D45; + +D45: + +tnscore = tnscore + response; + +/* Tree 47 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N46_1: + if SPORTS < 0.47 then goto N46_2; + else goto N46_3; + +N46_2: + if SPORTS < 0.105 then goto T46_1; + else goto T46_2; + +T46_1: + response = -4.47312e-05; + goto D46; + +T46_2: + response = -0.00348966; + goto D46; + +N46_3: + if MAX_RANK < 9 then goto T46_3; + else goto N46_4; + +T46_3: + response = -0.0016478; + goto D46; + +N46_4: + if EIGHT_HOUR_WF < 0.0459777 then goto N46_5; + else goto T46_7; + +N46_5: + if TWO_DAY_WF < 0.539394 then goto N46_6; + else goto T46_6; + +N46_6: + if WEEKAVG < 1.07 then goto T46_4; + else goto T46_5; + +T46_4: + response = -0.00892999; + goto D46; + +T46_5: + response = 0.00865732; + goto D46; + +T46_6: + response = 0.0121605; + goto D46; + +T46_7: + response = 0.00131641; + goto D46; + +D46: + +tnscore = tnscore + response; + +/* Tree 48 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N47_1: + if INTLNEWS < 0.725 then goto N47_2; + else goto T47_7; + +N47_2: + if DAY_LW_DAY_HITS_RATIO < 124.5 then goto N47_3; + else goto T47_6; + +N47_3: + if AVG_SCORE < 628258 then goto N47_4; + else goto T47_5; + +N47_4: + if DAY_HITS < 55.5 then goto T47_1; + else goto N47_5; + +T47_1: + response = -0.000193067; + goto D47; + +N47_5: + if TWELVE_HOUR_WF < 0.117879 then goto T47_2; + else goto N47_6; + +T47_2: + response = 0.0187097; + goto D47; + +N47_6: + if TWELVE_HOUR_WF < 0.350814 then goto T47_3; + else goto T47_4; + +T47_3: + response = -0.00734127; + goto D47; + +T47_4: + response = 0.0131678; + goto D47; + +T47_5: + response = 0.00987754; + goto D47; + +T47_6: + response = -0.0156063; + goto D47; + +T47_7: + response = 0.00929408; + goto D47; + +D47: + +tnscore = tnscore + response; + +/* Tree 49 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N48_1: + if DAY_WEEK_AVG_DERIV < -3.36 then goto T48_1; + else goto N48_2; + +T48_1: + response = -0.00956624; + goto D48; + +N48_2: + if TOPSTORY < 0.39 then goto T48_2; + else goto N48_3; + +T48_2: + response = -0.000216336; + goto D48; + +N48_3: + if AVG_RANK < 9.55 then goto N48_4; + else goto T48_7; + +N48_4: + if AVG_RANK < 8.735 then goto N48_5; + else goto T48_6; + +N48_5: + if TWO_DAY_WF < 0.531551 then goto T48_3; + else goto N48_6; + +T48_3: + response = -0.00490451; + goto D48; + +N48_6: + if FOUR_HOUR_WF < 0.0142857 then goto T48_4; + else goto T48_5; + +T48_4: + response = 0.0229256; + goto D48; + +T48_5: + response = 0.000312813; + goto D48; + +T48_6: + response = -0.00418916; + goto D48; + +T48_7: + response = 0.0189348; + goto D48; + +D48: + +tnscore = tnscore + response; + +/* Tree 50 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N49_1: + if AVG_SCORE < 625182 then goto N49_2; + else goto T49_7; + +N49_2: + if DAY_LW_DAY_HITS_DERIV < 55.5 then goto N49_3; + else goto T49_6; + +N49_3: + if DAY_WEEK_AVG_DERIV < 19.36 then goto N49_4; + else goto N49_5; + +N49_4: + if DAY_WEEK_AVG_DERIV < 13.5 then goto T49_1; + else goto T49_2; + +T49_1: + response = -0.000195177; + goto D49; + +T49_2: + response = 0.00629794; + goto D49; + +N49_5: + if ISTITLE_AVG < 0.05 then goto T49_3; + else goto N49_6; + +T49_3: + response = -0.0149349; + goto D49; + +N49_6: + if TWELVE_HOUR_WF < 0.383204 then goto T49_4; + else goto T49_5; + +T49_4: + response = -0.00516327; + goto D49; + +T49_5: + response = 0.00921651; + goto D49; + +T49_6: + response = 0.00647785; + goto D49; + +T49_7: + response = 0.0102664; + goto D49; + +D49: + +tnscore = tnscore + response; + +/* Tree 51 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N50_1: + if DAY_WEEK_AVG_RATIO < 0.385 then goto T50_1; + else goto N50_2; + +T50_1: + response = -0.00586045; + goto D50; + +N50_2: + if LIFESTYLE < 0.155 then goto N50_3; + else goto T50_7; + +N50_3: + if MAX_MIN_SCORE < 16288 then goto T50_2; + else goto N50_4; + +T50_2: + response = -0.0016458; + goto D50; + +N50_4: + if MAX_MIN_SCORE < 45875.2 then goto N50_5; + else goto T50_6; + +N50_5: + if MAX_MIN_SCORE < 45537.5 then goto N50_6; + else goto T50_5; + +N50_6: + if AVG_SCORE < 229848 then goto T50_3; + else goto T50_4; + +T50_3: + response = -0.000297351; + goto D50; + +T50_4: + response = 0.00475294; + goto D50; + +T50_5: + response = 0.0181171; + goto D50; + +T50_6: + response = -0.000574173; + goto D50; + +T50_7: + response = -0.00499598; + goto D50; + +D50: + +tnscore = tnscore + response; + +/* Tree 52 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N51_1: + if SPORTS < 0.815 then goto N51_2; + else goto N51_5; + +N51_2: + if TWO_DAY_WF < 0.460499 then goto N51_3; + else goto T51_4; + +N51_3: + if INTLNEWS < 0.365 then goto T51_1; + else goto N51_4; + +T51_1: + response = -0.00422695; + goto D51; + +N51_4: + if DAY_HITS < 4.5 then goto T51_2; + else goto T51_3; + +T51_2: + response = 0.011483; + goto D51; + +T51_3: + response = -0.00407438; + goto D51; + +T51_4: + response = -4.78506e-06; + goto D51; + +N51_5: + if DAY_HITS < 1.5 then goto T51_5; + else goto N51_6; + +T51_5: + response = 0.00980267; + goto D51; + +N51_6: + if EIGHT_HOUR_WF < 0.301948 then goto T51_6; + else goto T51_7; + +T51_6: + response = -0.00316423; + goto D51; + +T51_7: + response = 0.0125528; + goto D51; + +D51: + +tnscore = tnscore + response; + +/* Tree 53 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N52_1: + if PREV_DAY_HITS < 0.5 then goto T52_1; + else goto N52_2; + +T52_1: + response = -0.00190281; + goto D52; + +N52_2: + if LAW < 0.05 then goto N52_3; + else goto T52_7; + +N52_3: + if PUB_TODAY_AVG < 0.05 then goto T52_2; + else goto N52_4; + +T52_2: + response = -0.00103893; + goto D52; + +N52_4: + if EIGHT_HOUR_WF < 0.0492709 then goto N52_5; + else goto N52_6; + +N52_5: + if ISABSTRACT_AVG < 0.05 then goto T52_3; + else goto T52_4; + +T52_3: + response = 0.0053372; + goto D52; + +T52_4: + response = 0.000962476; + goto D52; + +N52_6: + if NATIONALNEWS < 0.13 then goto T52_5; + else goto T52_6; + +T52_5: + response = -0.00161984; + goto D52; + +T52_6: + response = 0.005538; + goto D52; + +T52_7: + response = -0.00741284; + goto D52; + +D52: + +tnscore = tnscore + response; + +/* Tree 54 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N53_1: + if DAY_LW_DAY_HITS_RATIO < 0.69 then goto N53_2; + else goto N53_3; + +N53_2: + if AVG_SCORE < 229191 then goto T53_1; + else goto T53_2; + +T53_1: + response = -0.00217119; + goto D53; + +T53_2: + response = -0.0135186; + goto D53; + +N53_3: + if DAY_LW_DAY_HITS_RATIO < 125.5 then goto N53_4; + else goto T53_7; + +N53_4: + if ISABSTRACT_AVG < 0.05 then goto N53_5; + else goto T53_6; + +N53_5: + if AVG_SCORE < 218595 then goto T53_3; + else goto N53_6; + +T53_3: + response = -0.00358076; + goto D53; + +N53_6: + if BUSINESS < 0.685 then goto T53_4; + else goto T53_5; + +T53_4: + response = 0.00139556; + goto D53; + +T53_5: + response = 0.0140572; + goto D53; + +T53_6: + response = -0.000571652; + goto D53; + +T53_7: + response = -0.0128262; + goto D53; + +D53: + +tnscore = tnscore + response; + +/* Tree 55 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N54_1: + if WEEKAVG < 1.5 then goto N54_2; + else goto N54_5; + +N54_2: + if DAY_WEEK_AVG_DERIV < 3.5 then goto N54_3; + else goto T54_4; + +N54_3: + if SUPERDUPER_AVG < 0.355 then goto N54_4; + else goto T54_3; + +N54_4: + if LW_DAY_HITS < 0.5 then goto T54_1; + else goto T54_2; + +T54_1: + response = 0.000521639; + goto D54; + +T54_2: + response = -0.00451687; + goto D54; + +T54_3: + response = -0.00637359; + goto D54; + +T54_4: + response = -0.00562351; + goto D54; + +N54_5: + if TWO_DAY_WF < 0.829824 then goto T54_5; + else goto N54_6; + +T54_5: + response = 2.82632e-05; + goto D54; + +N54_6: + if TWELVE_HOUR_WF < 0.940588 then goto T54_6; + else goto T54_7; + +T54_6: + response = 0.00527366; + goto D54; + +T54_7: + response = -0.011917; + goto D54; + +D54: + +tnscore = tnscore + response; + +/* Tree 56 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N55_1: + if TWELVE_HOUR_WF < 0.742581 then goto N55_2; + else goto N55_5; + +N55_2: + if TOPSTORY < 0.355 then goto T55_1; + else goto N55_3; + +T55_1: + response = -0.00012321; + goto D55; + +N55_3: + if FOUR_HOUR_WF < 0.026084 then goto N55_4; + else goto T55_4; + +N55_4: + if MIN_SCORE < 356232 then goto T55_2; + else goto T55_3; + +T55_2: + response = 0.0111342; + goto D55; + +T55_3: + response = -0.00292376; + goto D55; + +T55_4: + response = -0.00479873; + goto D55; + +N55_5: + if ISABSTRACT_AVG < 0.185 then goto N55_6; + else goto T55_7; + +N55_6: + if MIN_RANK < 5 then goto T55_5; + else goto T55_6; + +T55_5: + response = -0.000125896; + goto D55; + +T55_6: + response = -0.0115332; + goto D55; + +T55_7: + response = -0.000763903; + goto D55; + +D55: + +tnscore = tnscore + response; + +/* Tree 57 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N56_1: + if NATIONALNEWS < 0.27 then goto T56_1; + else goto N56_2; + +T56_1: + response = -0.000328182; + goto D56; + +N56_2: + if ISABSTRACT_AVG < 0.05 then goto N56_3; + else goto T56_7; + +N56_3: + if AVG_SCORE < 225658 then goto T56_2; + else goto N56_4; + +T56_2: + response = -0.00305383; + goto D56; + +N56_4: + if MIN_SCORE < 231962 then goto T56_3; + else goto N56_5; + +T56_3: + response = 0.0181265; + goto D56; + +N56_5: + if MAX_MIN_SCORE < 33119.5 then goto T56_4; + else goto N56_6; + +T56_4: + response = -0.00486977; + goto D56; + +N56_6: + if WEEKAVG < 1.785 then goto T56_5; + else goto T56_6; + +T56_5: + response = 0.0229851; + goto D56; + +T56_6: + response = 0.00588037; + goto D56; + +T56_7: + response = -0.000667257; + goto D56; + +D56: + +tnscore = tnscore + response; + +/* Tree 58 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N57_1: + if MAX_MIN_SCORE < 312575 then goto N57_2; + else goto T57_7; + +N57_2: + if TWO_DAY_WF < 0.531754 then goto T57_1; + else goto N57_3; + +T57_1: + response = -0.00112552; + goto D57; + +N57_3: + if MAX_SCORE < 669432 then goto N57_4; + else goto T57_6; + +N57_4: + if BUSINESS < 0.05 then goto N57_5; + else goto N57_6; + +N57_5: + if PREV_DAY_HITS < 41.5 then goto T57_2; + else goto T57_3; + +T57_2: + response = 0.00191169; + goto D57; + +T57_3: + response = 0.0126963; + goto D57; + +N57_6: + if DAY_LW_DAY_HITS_RATIO < 37.5 then goto T57_4; + else goto T57_5; + +T57_4: + response = -0.00101754; + goto D57; + +T57_5: + response = 0.00846513; + goto D57; + +T57_6: + response = -0.00792694; + goto D57; + +T57_7: + response = -0.00834756; + goto D57; + +D57: + +tnscore = tnscore + response; + +/* Tree 59 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N58_1: + if PUB_TODAY_AVG < 0.05 then goto N58_2; + else goto N58_6; + +N58_2: + if SPORTS < 0.645 then goto N58_3; + else goto N58_5; + +N58_3: + if AVG_SCORE < 395268 then goto N58_4; + else goto T58_3; + +N58_4: + if MAX_SCORE < 460268 then goto T58_1; + else goto T58_2; + +T58_1: + response = -0.00227942; + goto D58; + +T58_2: + response = 0.00899341; + goto D58; + +T58_3: + response = -0.00823655; + goto D58; + +N58_5: + if MIN_RANK < 5 then goto T58_4; + else goto T58_5; + +T58_4: + response = -0.00122777; + goto D58; + +T58_5: + response = 0.0163908; + goto D58; + +N58_6: + if TWO_DAY_WF < 0.95119 then goto T58_6; + else goto T58_7; + +T58_6: + response = -0.00018789; + goto D58; + +T58_7: + response = 0.0021492; + goto D58; + +D58: + +tnscore = tnscore + response; + +/* Tree 60 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N59_1: + if DAY_WEEK_AVG_RATIO < 3.985 then goto T59_1; + else goto N59_2; + +T59_1: + response = -0.000226985; + goto D59; + +N59_2: + if DAY_WEEK_AVG_RATIO < 5.525 then goto N59_3; + else goto T59_7; + +N59_3: + if DAY_WEEK_AVG_RATIO < 4.95 then goto N59_4; + else goto T59_6; + +N59_4: + if AVG_SCORE < 373867 then goto N59_5; + else goto N59_6; + +N59_5: + if ENTERTAINMENT < 0.05 then goto T59_2; + else goto T59_3; + +T59_2: + response = 0.00254281; + goto D59; + +T59_3: + response = -0.0107653; + goto D59; + +N59_6: + if PREV_DAY_HITS < 3 then goto T59_4; + else goto T59_5; + +T59_4: + response = 0.0220568; + goto D59; + +T59_5: + response = 0.00220059; + goto D59; + +T59_6: + response = 0.0155791; + goto D59; + +T59_7: + response = -0.00294274; + goto D59; + +D59: + +tnscore = tnscore + response; + +/* Tree 61 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N60_1: + if EIGHT_HOUR_WF < 0.349537 then goto N60_2; + else goto N60_6; + +N60_2: + if LOCALNEWS < 0.315 then goto T60_1; + else goto N60_3; + +T60_1: + response = 0.00100629; + goto D60; + +N60_3: + if TWELVE_HOUR_WF < 0.324561 then goto N60_4; + else goto N60_5; + +N60_4: + if MAX_SCORE < 547636 then goto T60_2; + else goto T60_3; + +T60_2: + response = -0.00365503; + goto D60; + +T60_3: + response = 0.00844103; + goto D60; + +N60_5: + if MAX_SCORE < 249971 then goto T60_4; + else goto T60_5; + +T60_4: + response = -0.00217393; + goto D60; + +T60_5: + response = 0.020902; + goto D60; + +N60_6: + if ISTITLE_AVG < 0.05 then goto T60_6; + else goto T60_7; + +T60_6: + response = -0.00679711; + goto D60; + +T60_7: + response = -2.87657e-06; + goto D60; + +D60: + +tnscore = tnscore + response; + +/* Tree 62 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N61_1: + if DAY_WEEK_AVG_DERIV < 60.285 then goto N61_2; + else goto T61_7; + +N61_2: + if DAY_WEEK_AVG_DERIV < -4.07 then goto N61_3; + else goto N61_4; + +N61_3: + if DAY_WEEK_AVG_DERIV < -5.785 then goto T61_1; + else goto T61_2; + +T61_1: + response = 0.000889976; + goto D61; + +T61_2: + response = -0.016703; + goto D61; + +N61_4: + if DAY_WEEK_AVG_RATIO < 0.285 then goto T61_3; + else goto N61_5; + +T61_3: + response = 0.0108868; + goto D61; + +N61_5: + if DAY_WEEK_AVG_RATIO < 0.34 then goto T61_4; + else goto N61_6; + +T61_4: + response = -0.0115452; + goto D61; + +N61_6: + if DAY_PD_HITS_DERIV < -24.5 then goto T61_5; + else goto T61_6; + +T61_5: + response = 0.00709642; + goto D61; + +T61_6: + response = 5.85454e-05; + goto D61; + +T61_7: + response = 0.00724335; + goto D61; + +D61: + +tnscore = tnscore + response; + +/* Tree 63 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N62_1: + if WEEKAVG < 31.07 then goto N62_2; + else goto N62_6; + +N62_2: + if DAY_PD_HITS_DERIV < -8.5 then goto N62_3; + else goto T62_5; + +N62_3: + if ONE_DAY_WF < 0.209914 then goto N62_4; + else goto N62_5; + +N62_4: + if TWO_DAY_WF < 0.537088 then goto T62_1; + else goto T62_2; + +T62_1: + response = 0.00770858; + goto D62; + +T62_2: + response = -0.00166542; + goto D62; + +N62_5: + if FOUR_HOUR_WF < 0.00547982 then goto T62_3; + else goto T62_4; + +T62_3: + response = 0.0185133; + goto D62; + +T62_4: + response = 0.000303571; + goto D62; + +T62_5: + response = -0.00031074; + goto D62; + +N62_6: + if MIN_SCORE < 398722 then goto T62_6; + else goto T62_7; + +T62_6: + response = -0.0135078; + goto D62; + +T62_7: + response = 0.00113129; + goto D62; + +D62: + +tnscore = tnscore + response; + +/* Tree 64 of 77 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N63_1: + if AVG_RANK < 9.53 then goto N63_2; + else goto N63_4; + +N63_2: + if INTLNEWS < 0.73 then goto N63_3; + else goto T63_3; + +N63_3: + if SUPERDUPER_AVG < 0.61 then goto T63_1; + else goto T63_2; + +T63_1: + response = -0.000409752; + goto D63; + +T63_2: + response = -0.00974984; + goto D63; + +T63_3: + response = 0.0133732; + goto D63; + +N63_4: + if SPORTS < 0.05 then goto N63_5; + else goto N63_6; + +N63_5: + if TOPSTORY < 0.315 then goto T63_4; + else goto T63_5; + +T63_4: + response = -0.00110238; + goto D63; + +T63_5: + response = 0.0155814; + goto D63; + +N63_6: + if AVG_SCORE < 258098 then goto T63_6; + else goto T63_7; + +T63_6: + response = 0.0025561; + goto D63; + +T63_7: + response = 0.0218633; + goto D63; + +D63: + +tnscore = tnscore + response; + +/* Tree 65 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N64_1: + if WEEKAVG < 4.215 then goto T64_1; + else goto N64_2; + +T64_1: + response = 0.00064852; + goto D64; + +N64_2: + if MAX_MIN_SCORE < 163619 then goto N64_3; + else goto N64_4; + +N64_3: + if TWO_DAY_WF < 0.463325 then goto T64_2; + else goto T64_3; + +T64_2: + response = -0.00769416; + goto D64; + +T64_3: + response = 0.00053643; + goto D64; + +N64_4: + if FOUR_HOUR_WF < 0.060024 then goto N64_5; + else goto T64_7; + +N64_5: + if TWELVE_HOUR_WF < 0.0127518 then goto T64_4; + else goto N64_6; + +T64_4: + response = -0.0161574; + goto D64; + +N64_6: + if LOCALNEWS < 0.05 then goto T64_5; + else goto T64_6; + +T64_5: + response = 0.00613049; + goto D64; + +T64_6: + response = -0.00855688; + goto D64; + +T64_7: + response = -0.0167968; + goto D64; + +D64: + +tnscore = tnscore + response; + +/* Tree 66 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N65_1: + if DAY_WEEK_AVG_RATIO < 0.255 then goto T65_1; + else goto N65_2; + +T65_1: + response = 0.00985333; + goto D65; + +N65_2: + if DAY_WEEK_AVG_RATIO < 0.335 then goto T65_2; + else goto N65_3; + +T65_2: + response = -0.00951258; + goto D65; + +N65_3: + if MAX_MIN_SCORE < 307460 then goto N65_4; + else goto T65_7; + +N65_4: + if MAX_SCORE < 517912 then goto N65_5; + else goto T65_6; + +N65_5: + if MAX_MIN_SCORE < 61870.2 then goto N65_6; + else goto T65_5; + +N65_6: + if DAY_PD_HITS_RATIO < 2.865 then goto T65_3; + else goto T65_4; + +T65_3: + response = 0.00152167; + goto D65; + +T65_4: + response = -0.0017641; + goto D65; + +T65_5: + response = -0.00258099; + goto D65; + +T65_6: + response = 0.00333836; + goto D65; + +T65_7: + response = -0.00739588; + goto D65; + +D65: + +tnscore = tnscore + response; + +/* Tree 67 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N66_1: + if WEEKAVG < 12.785 then goto N66_2; + else goto N66_3; + +N66_2: + if AVG_SCORE < 629228 then goto T66_1; + else goto T66_2; + +T66_1: + response = 3.70823e-06; + goto D66; + +T66_2: + response = 0.0113262; + goto D66; + +N66_3: + if INTLNEWS < 0.25 then goto T66_3; + else goto N66_4; + +T66_3: + response = -0.00996717; + goto D66; + +N66_4: + if EIGHT_HOUR_WF < 0.205476 then goto N66_5; + else goto T66_7; + +N66_5: + if MAX_MIN_SCORE < 105786 then goto N66_6; + else goto T66_6; + +N66_6: + if FOUR_HOUR_WF < 0.00848006 then goto T66_4; + else goto T66_5; + +T66_4: + response = 0.0151832; + goto D66; + +T66_5: + response = 0.000857643; + goto D66; + +T66_6: + response = -0.00763046; + goto D66; + +T66_7: + response = -0.015098; + goto D66; + +D66: + +tnscore = tnscore + response; + +/* Tree 68 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N67_1: + if SPORTS < 0.47 then goto T67_1; + else goto N67_2; + +T67_1: + response = -0.000209978; + goto D67; + +N67_2: + if DAY_PD_HITS_RATIO < 9.5 then goto N67_3; + else goto T67_7; + +N67_3: + if MAX_RANK < 9 then goto T67_2; + else goto N67_4; + +T67_2: + response = -0.00197712; + goto D67; + +N67_4: + if MAX_SCORE < 188360 then goto T67_3; + else goto N67_5; + +T67_3: + response = -0.000464352; + goto D67; + +N67_5: + if MAX_MIN_SCORE < 45863.5 then goto N67_6; + else goto T67_6; + +N67_6: + if DAY_PD_HITS_RATIO < 0.31 then goto T67_4; + else goto T67_5; + +T67_4: + response = 0.029664; + goto D67; + +T67_5: + response = 0.00977088; + goto D67; + +T67_6: + response = 0.00333723; + goto D67; + +T67_7: + response = -0.012958; + goto D67; + +D67: + +tnscore = tnscore + response; + +/* Tree 69 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N68_1: + if EIGHT_HOUR_WF < 0.397041 then goto N68_2; + else goto N68_6; + +N68_2: + if TWELVE_HOUR_WF < 0.327106 then goto T68_1; + else goto N68_3; + +T68_1: + response = 6.40846e-05; + goto D68; + +N68_3: + if MAX_MIN_SCORE < 129263 then goto T68_2; + else goto N68_4; + +T68_2: + response = 0.00114373; + goto D68; + +N68_4: + if SUPERDUPER_AVG < 0.105 then goto N68_5; + else goto T68_5; + +N68_5: + if AVG_SCORE < 390145 then goto T68_3; + else goto T68_4; + +T68_3: + response = 0.00590447; + goto D68; + +T68_4: + response = 0.0270599; + goto D68; + +T68_5: + response = 0.00116164; + goto D68; + +N68_6: + if DAY_LW_DAY_HITS_RATIO < 27.5 then goto T68_6; + else goto T68_7; + +T68_6: + response = -0.00395192; + goto D68; + +T68_7: + response = 0.00937395; + goto D68; + +D68: + +tnscore = tnscore + response; + +/* Tree 70 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N69_1: + if LW_DAY_HITS < 3.5 then goto N69_2; + else goto T69_7; + +N69_2: + if ENTERTAINMENT < 0.845 then goto N69_3; + else goto T69_6; + +N69_3: + if LIFESTYLE < 0.115 then goto N69_4; + else goto T69_5; + +N69_4: + if AVG_RANK < 4.45 then goto T69_1; + else goto N69_5; + +T69_1: + response = -0.00817106; + goto D69; + +N69_5: + if AVG_RANK < 5.225 then goto N69_6; + else goto T69_4; + +N69_6: + if ISABSTRACT_AVG < 0.55 then goto T69_2; + else goto T69_3; + +T69_2: + response = 0.00926281; + goto D69; + +T69_3: + response = -0.00505226; + goto D69; + +T69_4: + response = 0.000181535; + goto D69; + +T69_5: + response = -0.00449952; + goto D69; + +T69_6: + response = 0.00814113; + goto D69; + +T69_7: + response = -0.00517456; + goto D69; + +D69: + +tnscore = tnscore + response; + +/* Tree 71 of 77 */ +/* N terminal nodes = 7, Depth = 4 */ + + +N70_1: + if TWO_DAY_WF < 0.439697 then goto N70_2; + else goto N70_4; + +N70_2: + if AVG_RANK < 8.7 then goto T70_1; + else goto N70_3; + +T70_1: + response = -0.00429929; + goto D70; + +N70_3: + if MAX_MIN_SCORE < 47973.5 then goto T70_2; + else goto T70_3; + +T70_2: + response = -0.000903138; + goto D70; + +T70_3: + response = 0.0125022; + goto D70; + +N70_4: + if AVG_RANK < 8.635 then goto N70_5; + else goto N70_6; + +N70_5: + if DAY_WEEK_AVG_DERIV < 29.5 then goto T70_4; + else goto T70_5; + +T70_4: + response = 0.000800681; + goto D70; + +T70_5: + response = 0.0125881; + goto D70; + +N70_6: + if ISTITLE_AVG < 0.05 then goto T70_6; + else goto T70_7; + +T70_6: + response = -0.00468934; + goto D70; + +T70_7: + response = 0.00031406; + goto D70; + +D70: + +tnscore = tnscore + response; + +/* Tree 72 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N71_1: + if LW_DAY_HITS < 0.5 then goto N71_2; + else goto N71_3; + +N71_2: + if WEEKAVG < 39.855 then goto T71_1; + else goto T71_2; + +T71_1: + response = 0.000277242; + goto D71; + +T71_2: + response = -0.0132349; + goto D71; + +N71_3: + if MIN_SCORE < 234431 then goto N71_4; + else goto N71_5; + +N71_4: + if MIN_SCORE < 225952 then goto T71_3; + else goto T71_4; + +T71_3: + response = -0.00160465; + goto D71; + +T71_4: + response = 0.01256; + goto D71; + +N71_5: + if PREV_DAY_HITS < 19.5 then goto T71_5; + else goto N71_6; + +T71_5: + response = -0.0107505; + goto D71; + +N71_6: + if WEEKAVG < 12.575 then goto T71_6; + else goto T71_7; + +T71_6: + response = 0.0119228; + goto D71; + +T71_7: + response = -0.00600679; + goto D71; + +D71: + +tnscore = tnscore + response; + +/* Tree 73 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N72_1: + if WEEKAVG < 26.93 then goto N72_2; + else goto N72_6; + +N72_2: + if TOPSTORY < 0.39 then goto T72_1; + else goto N72_3; + +T72_1: + response = -0.000140614; + goto D72; + +N72_3: + if AVG_RANK < 9.55 then goto N72_4; + else goto T72_5; + +N72_4: + if AVG_RANK < 7.755 then goto T72_2; + else goto N72_5; + +T72_2: + response = 0.0151495; + goto D72; + +N72_5: + if TOPSTORY < 0.45 then goto T72_3; + else goto T72_4; + +T72_3: + response = 0.0043054; + goto D72; + +T72_4: + response = -0.00734039; + goto D72; + +T72_5: + response = 0.0204375; + goto D72; + +N72_6: + if SUPERDUPER_AVG < 0.55 then goto T72_6; + else goto T72_7; + +T72_6: + response = -0.0146963; + goto D72; + +T72_7: + response = -0.00012832; + goto D72; + +D72: + +tnscore = tnscore + response; + +/* Tree 74 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N73_1: + if TOPSTORY < 0.55 then goto N73_2; + else goto T73_7; + +N73_2: + if WEEKAVG < 0.5 then goto N73_3; + else goto N73_5; + +N73_3: + if MAX_MIN_SCORE < 165073 then goto N73_4; + else goto T73_3; + +N73_4: + if MIN_SCORE < 215208 then goto T73_1; + else goto T73_2; + +T73_1: + response = 0.000170833; + goto D73; + +T73_2: + response = -0.0051217; + goto D73; + +T73_3: + response = 0.0104792; + goto D73; + +N73_5: + if WEEKAVG < 10.93 then goto T73_4; + else goto N73_6; + +T73_4: + response = 0.000781356; + goto D73; + +N73_6: + if EIGHT_HOUR_WF < 0.00663439 then goto T73_5; + else goto T73_6; + +T73_5: + response = 0.00783355; + goto D73; + +T73_6: + response = -0.00393311; + goto D73; + +T73_7: + response = -0.0097211; + goto D73; + +D73: + +tnscore = tnscore + response; + +/* Tree 75 of 77 */ +/* N terminal nodes = 7, Depth = 7 */ + + +N74_1: + if DAY_WEEK_AVG_RATIO < 0.225 then goto T74_1; + else goto N74_2; + +T74_1: + response = 0.0105222; + goto D74; + +N74_2: + if SPORTS < 0.73 then goto T74_2; + else goto N74_3; + +T74_2: + response = -0.000688094; + goto D74; + +N74_3: + if MAX_MIN_RANK < 5 then goto T74_3; + else goto N74_4; + +T74_3: + response = -0.00146174; + goto D74; + +N74_4: + if PREV_DAY_HITS < 6.5 then goto N74_5; + else goto T74_7; + +N74_5: + if MIN_SCORE < 144075 then goto T74_4; + else goto N74_6; + +T74_4: + response = -0.00415946; + goto D74; + +N74_6: + if PREV_DAY_HITS < 2.5 then goto T74_5; + else goto T74_6; + +T74_5: + response = 0.010148; + goto D74; + +T74_6: + response = 0.0262199; + goto D74; + +T74_7: + response = -0.00602654; + goto D74; + +D74: + +tnscore = tnscore + response; + +/* Tree 76 of 77 */ +/* N terminal nodes = 7, Depth = 5 */ + + +N75_1: + if DAY_WEEK_AVG_DERIV < -3.5 then goto N75_2; + else goto N75_4; + +N75_2: + if TWO_DAY_WF < 0.635642 then goto N75_3; + else goto T75_3; + +N75_3: + if DAY_WEEK_AVG_DERIV < -5.93 then goto T75_1; + else goto T75_2; + +T75_1: + response = 0.00406292; + goto D75; + +T75_2: + response = -0.0105257; + goto D75; + +T75_3: + response = -0.0144987; + goto D75; + +N75_4: + if DAY_PD_HITS_RATIO < 43 then goto N75_5; + else goto T75_7; + +N75_5: + if DAY_WEEK_AVG_RATIO < 5.55 then goto T75_4; + else goto N75_6; + +T75_4: + response = -6.55387e-05; + goto D75; + +N75_6: + if ISTITLE_AVG < 0.05 then goto T75_5; + else goto T75_6; + +T75_5: + response = -0.0189073; + goto D75; + +T75_6: + response = 0.00177271; + goto D75; + +T75_7: + response = 0.0120068; + goto D75; + +D75: + +tnscore = tnscore + response; + +/* Tree 77 of 77 */ +/* N terminal nodes = 7, Depth = 6 */ + + +N76_1: + if DAY_WEEK_AVG_RATIO < 0.235 then goto T76_1; + else goto N76_2; + +T76_1: + response = 0.0112125; + goto D76; + +N76_2: + if DAY_WEEK_AVG_RATIO < 0.345 then goto T76_2; + else goto N76_3; + +T76_2: + response = -0.00610693; + goto D76; + +N76_3: + if WEEKAVG < 0.93 then goto T76_3; + else goto N76_4; + +T76_3: + response = -0.00104389; + goto D76; + +N76_4: + if WEEKAVG < 4.215 then goto N76_5; + else goto N76_6; + +N76_5: + if TOPSTORY < 0.13 then goto T76_4; + else goto T76_5; + +T76_4: + response = 0.000513026; + goto D76; + +T76_5: + response = 0.00509033; + goto D76; + +N76_6: + if MAX_MIN_SCORE < 206736 then goto T76_6; + else goto T76_7; + +T76_6: + response = -0.00041877; + goto D76; + +T76_7: + response = -0.0108199; + goto D76; + +D76: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet06.model b/searchlib/src/test/files/treenet06.model new file mode 100644 index 00000000000..43555fb698a --- /dev/null +++ b/searchlib/src/test/files/treenet06.model @@ -0,0 +1,3799 @@ + +/* Data Dictionary, Number Of Variables = 32 */ +/* Name = NUM_WORDS, Type = continuous. */ +/* Name = DAY_HITS, Type = continuous. */ +/* Name = DAY_HITS_FRAC, Type = continuous. */ +/* Name = PREV_DAY_HITS, Type = continuous. */ +/* Name = DAY_PD_HITS_RATIO, Type = continuous. */ +/* Name = DAY_PD_HITS_DERIV, Type = continuous. */ +/* Name = DAY_PREV_DAY_HITS_FRAC, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_RATIO, Type = continuous. */ +/* Name = DAY_LW_DAY_HITS_DERIV, Type = continuous. */ +/* Name = WEEKAVG, Type = continuous. */ +/* Name = DAY_WEEK_AVG_RATIO, Type = continuous. */ +/* Name = DAY_WEEK_AVG_DERIV, Type = continuous. */ +/* Name = ISTITLE_AVG, Type = continuous. */ +/* Name = ISABSTRACT_AVG, Type = continuous. */ +/* Name = SUPERDUPER_AVG, Type = continuous. */ +/* Name = BUSINESS, Type = continuous. */ +/* Name = ENTERTAINMENT, Type = continuous. */ +/* Name = HEALTH, Type = continuous. */ +/* Name = INTLNEWS, Type = continuous. */ +/* Name = LOCALNEWS, Type = continuous. */ +/* Name = NATIONALNEWS, Type = continuous. */ +/* Name = POLITICS, Type = continuous. */ +/* Name = REGIONALNEWS, Type = continuous. */ +/* Name = SPORTS, Type = continuous. */ +/* Name = TOPSTORY, Type = continuous. */ +/* Name = AVG_RANK, Type = continuous. */ +/* Name = MAX_RANK, Type = continuous. */ +/* Name = MIN_RANK, Type = continuous. */ +/* Name = AVG_SCORE, Type = continuous. */ +/* Name = MAX_SCORE, Type = continuous. */ +/* Name = MIN_SCORE, Type = continuous. */ +/* Name = MAX_MIN_SCORE, Type = continuous. */ + +MODELBEGIN: + +/* CART version: 5.0.9.156 */ +/* TreeNet: TreeNet20070830184428 */ +/* Grove: /home/rparekh/lb/lb_treenet.grv */ +/* N trees: 85 */ + +link TN0; +pred = tnscore; /* predicted value for IY_CTR */ + + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +/* Tree 1 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +tnscore = 0.0; + +N0_1: + if MAX_SCORE < 364352 then goto N0_2; + else goto N0_4; + +N0_2: + if NUM_WORDS < 1.5 then goto T0_1; + else goto N0_3; + +T0_1: + response = 0.106529; + goto D0; + +N0_3: + if WEEKAVG < 0.665 then goto T0_2; + else goto T0_3; + +T0_2: + response = 0.113339; + goto D0; + +T0_3: + response = 0.129744; + goto D0; + +N0_4: + if WEEKAVG < 0.35 then goto T0_4; + else goto T0_5; + +T0_4: + response = 0.125401; + goto D0; + +T0_5: + response = 0.148456; + goto D0; + +D0: + +tnscore = tnscore + response; + +/* Tree 2 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N1_1: + if MAX_SCORE < 386454 then goto N1_2; + else goto N1_4; + +N1_2: + if NUM_WORDS < 2.5 then goto N1_3; + else goto T1_3; + +N1_3: + if MAX_SCORE < 266558 then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.00435683; + goto D1; + +T1_2: + response = 0.00232626; + goto D1; + +T1_3: + response = 0.00527105; + goto D1; + +N1_4: + if DAY_LW_DAY_HITS_RATIO < 3.75 then goto T1_4; + else goto T1_5; + +T1_4: + response = 0.0125759; + goto D1; + +T1_5: + response = 0.0415964; + goto D1; + +D1: + +tnscore = tnscore + response; + +/* Tree 3 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N2_1: + if MAX_SCORE < 285564 then goto N2_2; + else goto N2_3; + +N2_2: + if NUM_WORDS < 3.5 then goto T2_1; + else goto T2_2; + +T2_1: + response = -0.00312935; + goto D2; + +T2_2: + response = 0.0139702; + goto D2; + +N2_3: + if DAY_LW_DAY_HITS_RATIO < 4.645 then goto T2_3; + else goto N2_4; + +T2_3: + response = 0.00510366; + goto D2; + +N2_4: + if ISABSTRACT_AVG < 0.225 then goto T2_4; + else goto T2_5; + +T2_4: + response = 0.0376987; + goto D2; + +T2_5: + response = 0.00704226; + goto D2; + +D2: + +tnscore = tnscore + response; + +/* Tree 4 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N3_1: + if MAX_SCORE < 354060 then goto N3_2; + else goto N3_4; + +N3_2: + if NUM_WORDS < 1.5 then goto T3_1; + else goto N3_3; + +T3_1: + response = -0.00557684; + goto D3; + +N3_3: + if DAY_LW_DAY_HITS_RATIO < 7.25 then goto T3_2; + else goto T3_3; + +T3_2: + response = -0.000411611; + goto D3; + +T3_3: + response = 0.0176971; + goto D3; + +N3_4: + if ISTITLE_AVG < 0.845 then goto T3_4; + else goto T3_5; + +T3_4: + response = 0.0209172; + goto D3; + +T3_5: + response = 0.00437892; + goto D3; + +D3: + +tnscore = tnscore + response; + +/* Tree 5 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N4_1: + if MAX_SCORE < 357048 then goto N4_2; + else goto N4_4; + +N4_2: + if NUM_WORDS < 3.5 then goto N4_3; + else goto T4_3; + +N4_3: + if DAY_LW_DAY_HITS_RATIO < 6.75 then goto T4_1; + else goto T4_2; + +T4_1: + response = -0.00214246; + goto D4; + +T4_2: + response = 0.00909381; + goto D4; + +T4_3: + response = 0.0132498; + goto D4; + +N4_4: + if DAY_WEEK_AVG_DERIV < 2.785 then goto T4_4; + else goto T4_5; + +T4_4: + response = 0.00781954; + goto D4; + +T4_5: + response = 0.0325808; + goto D4; + +D4: + +tnscore = tnscore + response; + +/* Tree 6 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N5_1: + if MAX_SCORE < 391984 then goto N5_2; + else goto N5_4; + +N5_2: + if NUM_WORDS < 1.5 then goto T5_1; + else goto N5_3; + +T5_1: + response = -0.00479641; + goto D5; + +N5_3: + if WEEKAVG < 0.805 then goto T5_2; + else goto T5_3; + +T5_2: + response = 0.000314606; + goto D5; + +T5_3: + response = 0.0174789; + goto D5; + +N5_4: + if DAY_WEEK_AVG_DERIV < 2.5 then goto T5_4; + else goto T5_5; + +T5_4: + response = 0.0100076; + goto D5; + +T5_5: + response = 0.0303617; + goto D5; + +D5: + +tnscore = tnscore + response; + +/* Tree 7 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N6_1: + if NUM_WORDS < 2.5 then goto N6_2; + else goto N6_4; + +N6_2: + if MAX_SCORE < 273725 then goto T6_1; + else goto N6_3; + +T6_1: + response = -0.0031024; + goto D6; + +N6_3: + if ISTITLE_AVG < 0.955 then goto T6_2; + else goto T6_3; + +T6_2: + response = 0.0093897; + goto D6; + +T6_3: + response = -0.00177918; + goto D6; + +N6_4: + if WEEKAVG < 0.35 then goto T6_4; + else goto T6_5; + +T6_4: + response = 0.00506228; + goto D6; + +T6_5: + response = 0.0238941; + goto D6; + +D6: + +tnscore = tnscore + response; + +/* Tree 8 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N7_1: + if NUM_WORDS < 3.5 then goto N7_2; + else goto T7_5; + +N7_2: + if DAY_LW_DAY_HITS_RATIO < 7.835 then goto N7_3; + else goto N7_4; + +N7_3: + if NUM_WORDS < 2.5 then goto T7_1; + else goto T7_2; + +T7_1: + response = -0.00222361; + goto D7; + +T7_2: + response = 0.00275911; + goto D7; + +N7_4: + if ISABSTRACT_AVG < 0.185 then goto T7_3; + else goto T7_4; + +T7_3: + response = 0.0286851; + goto D7; + +T7_4: + response = 0.0025611; + goto D7; + +T7_5: + response = 0.0150946; + goto D7; + +D7: + +tnscore = tnscore + response; + +/* Tree 9 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N8_1: + if MAX_SCORE < 463634 then goto N8_2; + else goto T8_5; + +N8_2: + if DAY_LW_DAY_HITS_RATIO < 14.36 then goto N8_3; + else goto N8_4; + +N8_3: + if NUM_WORDS < 2.5 then goto T8_1; + else goto T8_2; + +T8_1: + response = -0.00168161; + goto D8; + +T8_2: + response = 0.00306928; + goto D8; + +N8_4: + if ISABSTRACT_AVG < 0.05 then goto T8_3; + else goto T8_4; + +T8_3: + response = 0.03626; + goto D8; + +T8_4: + response = 0.00702238; + goto D8; + +T8_5: + response = 0.018646; + goto D8; + +D8: + +tnscore = tnscore + response; + +/* Tree 10 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N9_1: + if MAX_SCORE < 291384 then goto T9_1; + else goto N9_2; + +T9_1: + response = -0.00120841; + goto D9; + +N9_2: + if ISTITLE_AVG < 0.845 then goto N9_3; + else goto T9_5; + +N9_3: + if WEEKAVG < 0.325 then goto T9_2; + else goto N9_4; + +T9_2: + response = 0.0070091; + goto D9; + +N9_4: + if DAY_WEEK_AVG_DERIV < 27.5 then goto T9_3; + else goto T9_4; + +T9_3: + response = 0.021833; + goto D9; + +T9_4: + response = 0.0670236; + goto D9; + +T9_5: + response = -0.000239127; + goto D9; + +D9: + +tnscore = tnscore + response; + +/* Tree 11 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N10_1: + if MAX_SCORE < 392716 then goto N10_2; + else goto T10_5; + +N10_2: + if WEEKAVG < 0.915 then goto N10_3; + else goto N10_4; + +N10_3: + if NUM_WORDS < 1.5 then goto T10_1; + else goto T10_2; + +T10_1: + response = -0.00408665; + goto D10; + +T10_2: + response = 1.2681e-05; + goto D10; + +N10_4: + if ISABSTRACT_AVG < 0.05 then goto T10_3; + else goto T10_4; + +T10_3: + response = 0.0315009; + goto D10; + +T10_4: + response = 0.00309315; + goto D10; + +T10_5: + response = 0.0101865; + goto D10; + +D10: + +tnscore = tnscore + response; + +/* Tree 12 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N11_1: + if NUM_WORDS < 3.5 then goto N11_2; + else goto T11_5; + +N11_2: + if DAY_LW_DAY_HITS_RATIO < 4.9 then goto N11_3; + else goto N11_4; + +N11_3: + if NUM_WORDS < 1.5 then goto T11_1; + else goto T11_2; + +T11_1: + response = -0.00378383; + goto D11; + +T11_2: + response = -0.000155068; + goto D11; + +N11_4: + if ISTITLE_AVG < 0.915 then goto T11_3; + else goto T11_4; + +T11_3: + response = 0.0164901; + goto D11; + +T11_4: + response = 0.00035458; + goto D11; + +T11_5: + response = 0.0111533; + goto D11; + +D11: + +tnscore = tnscore + response; + +/* Tree 13 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N12_1: + if NUM_WORDS < 2.5 then goto N12_2; + else goto N12_4; + +N12_2: + if DAY_LW_DAY_HITS_RATIO < 12.165 then goto T12_1; + else goto N12_3; + +T12_1: + response = -0.00137589; + goto D12; + +N12_3: + if ISTITLE_AVG < 0.73 then goto T12_2; + else goto T12_3; + +T12_2: + response = 0.0299723; + goto D12; + +T12_3: + response = 0.00442332; + goto D12; + +N12_4: + if DAY_HITS < 2.125 then goto T12_4; + else goto T12_5; + +T12_4: + response = 0.00279729; + goto D12; + +T12_5: + response = 0.0157199; + goto D12; + +D12: + +tnscore = tnscore + response; + +/* Tree 14 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N13_1: + if MAX_SCORE < 391997 then goto N13_2; + else goto N13_4; + +N13_2: + if NUM_WORDS < 1.5 then goto T13_1; + else goto N13_3; + +T13_1: + response = -0.00289017; + goto D13; + +N13_3: + if PREV_DAY_HITS < 6.33333 then goto T13_2; + else goto T13_3; + +T13_2: + response = 0.000153177; + goto D13; + +T13_3: + response = 0.0114408; + goto D13; + +N13_4: + if PREV_DAY_HITS < 9.5 then goto T13_4; + else goto T13_5; + +T13_4: + response = 0.00746655; + goto D13; + +T13_5: + response = 0.040233; + goto D13; + +D13: + +tnscore = tnscore + response; + +/* Tree 15 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N14_1: + if NUM_WORDS < 3.5 then goto N14_2; + else goto T14_5; + +N14_2: + if DAY_LW_DAY_HITS_RATIO < 4.9 then goto T14_1; + else goto N14_3; + +T14_1: + response = -0.00103084; + goto D14; + +N14_3: + if ISTITLE_AVG < 0.915 then goto N14_4; + else goto T14_4; + +N14_4: + if DAY_HITS_FRAC < 0.645 then goto T14_2; + else goto T14_3; + +T14_2: + response = 0.0230528; + goto D14; + +T14_3: + response = 0.00568694; + goto D14; + +T14_4: + response = 0.000615028; + goto D14; + +T14_5: + response = 0.00901386; + goto D14; + +D14: + +tnscore = tnscore + response; + +/* Tree 16 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N15_1: + if MAX_SCORE < 291385 then goto T15_1; + else goto N15_2; + +T15_1: + response = -0.000944169; + goto D15; + +N15_2: + if ISTITLE_AVG < 0.72 then goto N15_3; + else goto T15_5; + +N15_3: + if DAY_WEEK_AVG_DERIV < 28.855 then goto N15_4; + else goto T15_4; + +N15_4: + if NATIONALNEWS < 0.355 then goto T15_2; + else goto T15_3; + +T15_2: + response = 0.00617921; + goto D15; + +T15_3: + response = 0.0271174; + goto D15; + +T15_4: + response = 0.0534392; + goto D15; + +T15_5: + response = -0.000921153; + goto D15; + +D15: + +tnscore = tnscore + response; + +/* Tree 17 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N16_1: + if NUM_WORDS < 3.5 then goto N16_2; + else goto T16_5; + +N16_2: + if PREV_DAY_HITS < 6.16667 then goto T16_1; + else goto N16_3; + +T16_1: + response = -0.000682897; + goto D16; + +N16_3: + if MIN_SCORE < 254342 then goto T16_2; + else goto N16_4; + +T16_2: + response = 0.00193942; + goto D16; + +N16_4: + if NATIONALNEWS < 0.21 then goto T16_3; + else goto T16_4; + +T16_3: + response = 0.0131534; + goto D16; + +T16_4: + response = 0.0605109; + goto D16; + +T16_5: + response = 0.00697463; + goto D16; + +D16: + +tnscore = tnscore + response; + +/* Tree 18 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N17_1: + if NUM_WORDS < 2.5 then goto T17_1; + else goto N17_2; + +T17_1: + response = -0.000931934; + goto D17; + +N17_2: + if DAY_WEEK_AVG_RATIO < 3.17 then goto N17_3; + else goto T17_5; + +N17_3: + if ISTITLE_AVG < 0.685 then goto N17_4; + else goto T17_4; + +N17_4: + if NATIONALNEWS < 0.225 then goto T17_2; + else goto T17_3; + +T17_2: + response = 0.00281994; + goto D17; + +T17_3: + response = 0.0214747; + goto D17; + +T17_4: + response = -0.00300096; + goto D17; + +T17_5: + response = 0.0138056; + goto D17; + +D17: + +tnscore = tnscore + response; + +/* Tree 19 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N18_1: + if NUM_WORDS < 3.5 then goto N18_2; + else goto T18_5; + +N18_2: + if DAY_LW_DAY_HITS_RATIO < 4.9 then goto T18_1; + else goto N18_3; + +T18_1: + response = -0.000881061; + goto D18; + +N18_3: + if AVG_SCORE < 268471 then goto T18_2; + else goto N18_4; + +T18_2: + response = -8.77741e-05; + goto D18; + +N18_4: + if ISABSTRACT_AVG < 0.105 then goto T18_3; + else goto T18_4; + +T18_3: + response = 0.0164307; + goto D18; + +T18_4: + response = 0.000985136; + goto D18; + +T18_5: + response = 0.00654057; + goto D18; + +D18: + +tnscore = tnscore + response; + +/* Tree 20 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N19_1: + if TOPSTORY < 0.03 then goto N19_2; + else goto N19_4; + +N19_2: + if ISTITLE_AVG < 0.62 then goto N19_3; + else goto T19_3; + +N19_3: + if AVG_SCORE < 268819 then goto T19_1; + else goto T19_2; + +T19_1: + response = -0.000315955; + goto D19; + +T19_2: + response = 0.00518778; + goto D19; + +T19_3: + response = -0.00177677; + goto D19; + +N19_4: + if ISTITLE_AVG < 0.72 then goto T19_4; + else goto T19_5; + +T19_4: + response = 0.0116413; + goto D19; + +T19_5: + response = 0.00139452; + goto D19; + +D19: + +tnscore = tnscore + response; + +/* Tree 21 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N20_1: + if MAX_SCORE < 472738 then goto N20_2; + else goto T20_5; + +N20_2: + if ISTITLE_AVG < 0.63 then goto N20_3; + else goto T20_4; + +N20_3: + if DAY_WEEK_AVG_DERIV < 1.825 then goto T20_1; + else goto N20_4; + +T20_1: + response = 0.000494339; + goto D20; + +N20_4: + if MIN_SCORE < 250779 then goto T20_2; + else goto T20_3; + +T20_2: + response = -1.72329e-05; + goto D20; + +T20_3: + response = 0.012004; + goto D20; + +T20_4: + response = -0.00134588; + goto D20; + +T20_5: + response = 0.0100001; + goto D20; + +D20: + +tnscore = tnscore + response; + +/* Tree 22 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N21_1: + if NUM_WORDS < 3.5 then goto N21_2; + else goto T21_5; + +N21_2: + if DAY_LW_DAY_HITS_RATIO < 24.9 then goto N21_3; + else goto N21_4; + +N21_3: + if PREV_DAY_HITS < 14.8333 then goto T21_1; + else goto T21_2; + +T21_1: + response = -0.000501675; + goto D21; + +T21_2: + response = 0.00773172; + goto D21; + +N21_4: + if LOCALNEWS < 0.11 then goto T21_3; + else goto T21_4; + +T21_3: + response = 0.00984187; + goto D21; + +T21_4: + response = 0.0382478; + goto D21; + +T21_5: + response = 0.00673426; + goto D21; + +D21: + +tnscore = tnscore + response; + +/* Tree 23 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N22_1: + if PREV_DAY_HITS < 4.35 then goto N22_2; + else goto N22_3; + +N22_2: + if NUM_WORDS < 1.5 then goto T22_1; + else goto T22_2; + +T22_1: + response = -0.00227953; + goto D22; + +T22_2: + response = 8.50381e-05; + goto D22; + +N22_3: + if ISTITLE_AVG < 0.905 then goto N22_4; + else goto T22_5; + +N22_4: + if DAY_LW_DAY_HITS_RATIO < 1.6 then goto T22_3; + else goto T22_4; + +T22_3: + response = -0.00121777; + goto D22; + +T22_4: + response = 0.0155982; + goto D22; + +T22_5: + response = 0.00116876; + goto D22; + +D22: + +tnscore = tnscore + response; + +/* Tree 24 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N23_1: + if DAY_LW_DAY_HITS_RATIO < 4.73 then goto T23_1; + else goto N23_2; + +T23_1: + response = -0.000272614; + goto D23; + +N23_2: + if NATIONALNEWS < 0.58 then goto N23_3; + else goto T23_5; + +N23_3: + if DAY_PD_HITS_RATIO < 0.63 then goto N23_4; + else goto T23_4; + +N23_4: + if DAY_HITS_FRAC < 0.265 then goto T23_2; + else goto T23_3; + +T23_2: + response = 0.0099063; + goto D23; + +T23_3: + response = 0.0510568; + goto D23; + +T23_4: + response = 0.00250323; + goto D23; + +T23_5: + response = 0.0470183; + goto D23; + +D23: + +tnscore = tnscore + response; + +/* Tree 25 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N24_1: + if DAY_LW_DAY_HITS_RATIO < 5.915 then goto T24_1; + else goto N24_2; + +T24_1: + response = -0.000336118; + goto D24; + +N24_2: + if DAY_PD_HITS_DERIV < -11.5 then goto N24_3; + else goto T24_5; + +N24_3: + if DAY_WEEK_AVG_RATIO < 1.735 then goto T24_2; + else goto N24_4; + +T24_2: + response = 0.000924298; + goto D24; + +N24_4: + if DAY_HITS_FRAC < 0.165 then goto T24_3; + else goto T24_4; + +T24_3: + response = 0.0161363; + goto D24; + +T24_4: + response = 0.0802279; + goto D24; + +T24_5: + response = 0.00327439; + goto D24; + +D24: + +tnscore = tnscore + response; + +/* Tree 26 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N25_1: + if NUM_WORDS < 2.5 then goto T25_1; + else goto N25_2; + +T25_1: + response = -0.000656912; + goto D25; + +N25_2: + if BUSINESS < 0.275 then goto N25_3; + else goto T25_5; + +N25_3: + if MAX_MIN_SCORE < 50.25 then goto T25_2; + else goto N25_4; + +T25_2: + response = 0.00224658; + goto D25; + +N25_4: + if ISABSTRACT_AVG < 0.415 then goto T25_3; + else goto T25_4; + +T25_3: + response = 0.013094; + goto D25; + +T25_4: + response = -0.0054932; + goto D25; + +T25_5: + response = -0.00808819; + goto D25; + +D25: + +tnscore = tnscore + response; + +/* Tree 27 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N26_1: + if NATIONALNEWS < 0.135 then goto T26_1; + else goto N26_2; + +T26_1: + response = -0.000140405; + goto D26; + +N26_2: + if AVG_SCORE < 263507 then goto T26_2; + else goto N26_3; + +T26_2: + response = -0.000119297; + goto D26; + +N26_3: + if ISTITLE_AVG < 0.73 then goto N26_4; + else goto T26_5; + +N26_4: + if ENTERTAINMENT < 0.05 then goto T26_3; + else goto T26_4; + +T26_3: + response = 0.0220643; + goto D26; + +T26_4: + response = -0.00416695; + goto D26; + +T26_5: + response = 0.00371154; + goto D26; + +D26: + +tnscore = tnscore + response; + +/* Tree 28 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N27_1: + if DAY_WEEK_AVG_RATIO < 14.28 then goto N27_2; + else goto T27_5; + +N27_2: + if NUM_WORDS < 4.5 then goto N27_3; + else goto T27_4; + +N27_3: + if MIN_SCORE < 245866 then goto T27_1; + else goto N27_4; + +T27_1: + response = -0.000984768; + goto D27; + +N27_4: + if ISTITLE_AVG < 0.72 then goto T27_2; + else goto T27_3; + +T27_2: + response = 0.00341093; + goto D27; + +T27_3: + response = -0.000973418; + goto D27; + +T27_4: + response = 0.0106439; + goto D27; + +T27_5: + response = 0.0360619; + goto D27; + +D27: + +tnscore = tnscore + response; + +/* Tree 29 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N28_1: + if MIN_SCORE < 472207 then goto T28_1; + else goto N28_2; + +T28_1: + response = -0.000200627; + goto D28; + +N28_2: + if WEEKAVG < 0.325 then goto T28_2; + else goto N28_3; + +T28_2: + response = 0.00409488; + goto D28; + +N28_3: + if AVG_SCORE < 531893 then goto T28_3; + else goto N28_4; + +T28_3: + response = 0.0518209; + goto D28; + +N28_4: + if MAX_SCORE < 602809 then goto T28_4; + else goto T28_5; + +T28_4: + response = -0.0080393; + goto D28; + +T28_5: + response = 0.0383655; + goto D28; + +D28: + +tnscore = tnscore + response; + +/* Tree 30 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N29_1: + if ISTITLE_AVG < 0.72 then goto N29_2; + else goto T29_5; + +N29_2: + if AVG_SCORE < 268824 then goto T29_1; + else goto N29_3; + +T29_1: + response = -0.000131907; + goto D29; + +N29_3: + if MAX_MIN_SCORE < 7909.75 then goto T29_2; + else goto N29_4; + +T29_2: + response = 0.00173958; + goto D29; + +N29_4: + if NATIONALNEWS < 0.39 then goto T29_3; + else goto T29_4; + +T29_3: + response = 0.010916; + goto D29; + +T29_4: + response = 0.0343348; + goto D29; + +T29_5: + response = -0.00113192; + goto D29; + +D29: + +tnscore = tnscore + response; + +/* Tree 31 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N30_1: + if NUM_WORDS < 2.5 then goto T30_1; + else goto N30_2; + +T30_1: + response = -0.000626265; + goto D30; + +N30_2: + if BUSINESS < 0.115 then goto N30_3; + else goto T30_5; + +N30_3: + if MAX_MIN_SCORE < 15489.8 then goto T30_2; + else goto N30_4; + +T30_2: + response = 0.00192349; + goto D30; + +N30_4: + if MAX_MIN_SCORE < 35950.8 then goto T30_3; + else goto T30_4; + +T30_3: + response = 0.0188263; + goto D30; + +T30_4: + response = 0.00372838; + goto D30; + +T30_5: + response = -0.00528885; + goto D30; + +D30: + +tnscore = tnscore + response; + +/* Tree 32 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N31_1: + if MAX_SCORE < 468155 then goto T31_1; + else goto N31_2; + +T31_1: + response = -0.000113066; + goto D31; + +N31_2: + if ENTERTAINMENT < 0.235 then goto N31_3; + else goto N31_4; + +N31_3: + if TOPSTORY < 0.22 then goto T31_2; + else goto T31_3; + +T31_2: + response = -0.000322423; + goto D31; + +T31_3: + response = 0.0188811; + goto D31; + +N31_4: + if AVG_RANK < 8.365 then goto T31_4; + else goto T31_5; + +T31_4: + response = 0.00856273; + goto D31; + +T31_5: + response = 0.064677; + goto D31; + +D31: + +tnscore = tnscore + response; + +/* Tree 33 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N32_1: + if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N32_2; + else goto T32_5; + +N32_2: + if AVG_SCORE < 259970 then goto T32_1; + else goto N32_3; + +T32_1: + response = -0.000611764; + goto D32; + +N32_3: + if INTLNEWS < 0.045 then goto T32_2; + else goto N32_4; + +T32_2: + response = 0.000128558; + goto D32; + +N32_4: + if LOCALNEWS < 0.28 then goto T32_3; + else goto T32_4; + +T32_3: + response = 0.00350635; + goto D32; + +T32_4: + response = 0.0165708; + goto D32; + +T32_5: + response = 0.018775; + goto D32; + +D32: + +tnscore = tnscore + response; + +/* Tree 34 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N33_1: + if DAY_LW_DAY_HITS_RATIO < 28.5 then goto N33_2; + else goto T33_5; + +N33_2: + if ISTITLE_AVG < 0.585 then goto N33_3; + else goto T33_4; + +N33_3: + if MAX_RANK < 9 then goto T33_1; + else goto N33_4; + +T33_1: + response = 0.000172066; + goto D33; + +N33_4: + if AVG_SCORE < 269329 then goto T33_2; + else goto T33_3; + +T33_2: + response = -0.000102726; + goto D33; + +T33_3: + response = 0.00688386; + goto D33; + +T33_4: + response = -0.00101067; + goto D33; + +T33_5: + response = 0.0149278; + goto D33; + +D33: + +tnscore = tnscore + response; + +/* Tree 35 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N34_1: + if DAY_LW_DAY_HITS_DERIV < 14.5 then goto T34_1; + else goto N34_2; + +T34_1: + response = -0.000172386; + goto D34; + +N34_2: + if DAY_PD_HITS_DERIV < -11.5 then goto N34_3; + else goto N34_4; + +N34_3: + if DAY_WEEK_AVG_RATIO < 1.82 then goto T34_2; + else goto T34_3; + +T34_2: + response = 0.00314713; + goto D34; + +T34_3: + response = 0.044771; + goto D34; + +N34_4: + if LOCALNEWS < 0.115 then goto T34_4; + else goto T34_5; + +T34_4: + response = 0.00112935; + goto D34; + +T34_5: + response = 0.0165557; + goto D34; + +D34: + +tnscore = tnscore + response; + +/* Tree 36 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N35_1: + if DAY_PD_HITS_RATIO < 33.75 then goto N35_2; + else goto T35_5; + +N35_2: + if AVG_SCORE < 597646 then goto N35_3; + else goto T35_4; + +N35_3: + if DAY_PD_HITS_DERIV < -4.9 then goto N35_4; + else goto T35_3; + +N35_4: + if DAY_PREV_DAY_HITS_FRAC < 0.845 then goto T35_1; + else goto T35_2; + +T35_1: + response = -0.00115559; + goto D35; + +T35_2: + response = 0.0101514; + goto D35; + +T35_3: + response = -0.000188064; + goto D35; + +T35_4: + response = 0.0159463; + goto D35; + +T35_5: + response = 0.024791; + goto D35; + +D35: + +tnscore = tnscore + response; + +/* Tree 37 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N36_1: + if MIN_SCORE < 481598 then goto T36_1; + else goto N36_2; + +T36_1: + response = -4.83069e-05; + goto D36; + +N36_2: + if MIN_SCORE < 512815 then goto N36_3; + else goto N36_4; + +N36_3: + if MAX_SCORE < 507654 then goto T36_2; + else goto T36_3; + +T36_2: + response = 0.00563943; + goto D36; + +T36_3: + response = 0.0345982; + goto D36; + +N36_4: + if MAX_SCORE < 584112 then goto T36_4; + else goto T36_5; + +T36_4: + response = -0.00935941; + goto D36; + +T36_5: + response = 0.0104819; + goto D36; + +D36: + +tnscore = tnscore + response; + +/* Tree 38 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N37_1: + if TOPSTORY < 0.105 then goto T37_1; + else goto N37_2; + +T37_1: + response = -0.000322897; + goto D37; + +N37_2: + if PREV_DAY_HITS < 1.45833 then goto T37_2; + else goto N37_3; + +T37_2: + response = -0.000616703; + goto D37; + +N37_3: + if MIN_SCORE < 253414 then goto T37_3; + else goto N37_4; + +T37_3: + response = 0.00194629; + goto D37; + +N37_4: + if MIN_SCORE < 255748 then goto T37_4; + else goto T37_5; + +T37_4: + response = 0.0480784; + goto D37; + +T37_5: + response = 0.00955667; + goto D37; + +D37: + +tnscore = tnscore + response; + +/* Tree 39 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N38_1: + if NUM_WORDS < 3.5 then goto N38_2; + else goto N38_4; + +N38_2: + if NATIONALNEWS < 0.39 then goto T38_1; + else goto N38_3; + +T38_1: + response = -0.000285208; + goto D38; + +N38_3: + if PREV_DAY_HITS < 6.83333 then goto T38_2; + else goto T38_3; + +T38_2: + response = 0.00277459; + goto D38; + +T38_3: + response = 0.0334432; + goto D38; + +N38_4: + if BUSINESS < 0.77 then goto T38_4; + else goto T38_5; + +T38_4: + response = 0.00558387; + goto D38; + +T38_5: + response = -0.0192348; + goto D38; + +D38: + +tnscore = tnscore + response; + +/* Tree 40 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N39_1: + if BUSINESS < 0.19 then goto N39_2; + else goto T39_5; + +N39_2: + if INTLNEWS < 0.095 then goto T39_1; + else goto N39_3; + +T39_1: + response = 0.000107539; + goto D39; + +N39_3: + if TOPSTORY < 0.03 then goto T39_2; + else goto N39_4; + +T39_2: + response = 0.00158516; + goto D39; + +N39_4: + if NUM_WORDS < 3.5 then goto T39_3; + else goto T39_4; + +T39_3: + response = 0.00653366; + goto D39; + +T39_4: + response = 0.0388007; + goto D39; + +T39_5: + response = -0.00186321; + goto D39; + +D39: + +tnscore = tnscore + response; + +/* Tree 41 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N40_1: + if DAY_LW_DAY_HITS_RATIO < 3.635 then goto T40_1; + else goto N40_2; + +T40_1: + response = -0.000380753; + goto D40; + +N40_2: + if LOCALNEWS < 0.185 then goto T40_2; + else goto N40_3; + +T40_2: + response = 0.000567701; + goto D40; + +N40_3: + if ISTITLE_AVG < 0.585 then goto N40_4; + else goto T40_5; + +N40_4: + if MAX_RANK < 9 then goto T40_3; + else goto T40_4; + +T40_3: + response = -0.00212514; + goto D40; + +T40_4: + response = 0.0247626; + goto D40; + +T40_5: + response = 0.00163409; + goto D40; + +D40: + +tnscore = tnscore + response; + +/* Tree 42 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N41_1: + if DAY_WEEK_AVG_RATIO < 14.28 then goto N41_2; + else goto T41_5; + +N41_2: + if TOPSTORY < 0.115 then goto T41_1; + else goto N41_3; + +T41_1: + response = -0.000369615; + goto D41; + +N41_3: + if DAY_LW_DAY_HITS_RATIO < 4.875 then goto T41_2; + else goto N41_4; + +T41_2: + response = -0.000456219; + goto D41; + +N41_4: + if DAY_PD_HITS_RATIO < 2.275 then goto T41_3; + else goto T41_4; + +T41_3: + response = 0.0169104; + goto D41; + +T41_4: + response = 0.0021639; + goto D41; + +T41_5: + response = 0.0238394; + goto D41; + +D41: + +tnscore = tnscore + response; + +/* Tree 43 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N42_1: + if NUM_WORDS < 1.5 then goto T42_1; + else goto N42_2; + +T42_1: + response = -0.00139356; + goto D42; + +N42_2: + if BUSINESS < 0.13 then goto N42_3; + else goto T42_5; + +N42_3: + if BUSINESS < 0.05 then goto T42_2; + else goto N42_4; + +T42_2: + response = 0.00103638; + goto D42; + +N42_4: + if DAY_LW_DAY_HITS_RATIO < 9.5 then goto T42_3; + else goto T42_4; + +T42_3: + response = 0.0441388; + goto D42; + +T42_4: + response = -0.00313189; + goto D42; + +T42_5: + response = -0.00195899; + goto D42; + +D42: + +tnscore = tnscore + response; + +/* Tree 44 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N43_1: + if NATIONALNEWS < 0.13 then goto T43_1; + else goto N43_2; + +T43_1: + response = -2.4886e-05; + goto D43; + +N43_2: + if HEALTH < 0.105 then goto N43_3; + else goto N43_4; + +N43_3: + if WEEKAVG < 0.93 then goto T43_2; + else goto T43_3; + +T43_2: + response = 0.00135398; + goto D43; + +T43_3: + response = 0.0138431; + goto D43; + +N43_4: + if MIN_RANK < 3 then goto T43_4; + else goto T43_5; + +T43_4: + response = 0.0513615; + goto D43; + +T43_5: + response = -0.00585742; + goto D43; + +D43: + +tnscore = tnscore + response; + +/* Tree 45 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N44_1: + if NATIONALNEWS < 0.225 then goto T44_1; + else goto N44_2; + +T44_1: + response = -8.48873e-05; + goto D44; + +N44_2: + if MIN_SCORE < 259062 then goto T44_2; + else goto N44_3; + +T44_2: + response = -0.00036897; + goto D44; + +N44_3: + if ISTITLE_AVG < 0.71 then goto N44_4; + else goto T44_5; + +N44_4: + if ISTITLE_AVG < 0.45 then goto T44_3; + else goto T44_4; + +T44_3: + response = 0.0126383; + goto D44; + +T44_4: + response = 0.0410443; + goto D44; + +T44_5: + response = 0.00420061; + goto D44; + +D44: + +tnscore = tnscore + response; + +/* Tree 46 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N45_1: + if DAY_HITS < 15.25 then goto T45_1; + else goto N45_2; + +T45_1: + response = -0.000322532; + goto D45; + +N45_2: + if LOCALNEWS < 0.13 then goto T45_2; + else goto N45_3; + +T45_2: + response = 0.00109495; + goto D45; + +N45_3: + if WEEKAVG < 6.715 then goto N45_4; + else goto T45_5; + +N45_4: + if MAX_MIN_SCORE < 42695.8 then goto T45_3; + else goto T45_4; + +T45_3: + response = 0.0489508; + goto D45; + +T45_4: + response = 0.00942793; + goto D45; + +T45_5: + response = -0.00595868; + goto D45; + +D45: + +tnscore = tnscore + response; + +/* Tree 47 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N46_1: + if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N46_2; + else goto N46_3; + +N46_2: + if NUM_WORDS < 2.5 then goto T46_1; + else goto T46_2; + +T46_1: + response = -0.000253621; + goto D46; + +T46_2: + response = 0.00124598; + goto D46; + +N46_3: + if DAY_WEEK_AVG_DERIV < 49.715 then goto N46_4; + else goto T46_5; + +N46_4: + if ISTITLE_AVG < 0.74 then goto T46_3; + else goto T46_4; + +T46_3: + response = 0.0495711; + goto D46; + +T46_4: + response = 0.00323737; + goto D46; + +T46_5: + response = -0.00771975; + goto D46; + +D46: + +tnscore = tnscore + response; + +/* Tree 48 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N47_1: + if NUM_WORDS < 1.5 then goto T47_1; + else goto N47_2; + +T47_1: + response = -0.001351; + goto D47; + +N47_2: + if DAY_WEEK_AVG_RATIO < 13.99 then goto N47_3; + else goto T47_5; + +N47_3: + if BUSINESS < 0.105 then goto N47_4; + else goto T47_4; + +N47_4: + if DAY_PD_HITS_DERIV < -10.5 then goto T47_2; + else goto T47_3; + +T47_2: + response = 0.0106695; + goto D47; + +T47_3: + response = 0.000527774; + goto D47; + +T47_4: + response = -0.00157676; + goto D47; + +T47_5: + response = 0.0239454; + goto D47; + +D47: + +tnscore = tnscore + response; + +/* Tree 49 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N48_1: + if DAY_LW_DAY_HITS_RATIO < 24.9 then goto T48_1; + else goto N48_2; + +T48_1: + response = -6.42273e-05; + goto D48; + +N48_2: + if MIN_SCORE < 247776 then goto N48_3; + else goto N48_4; + +N48_3: + if TOPSTORY < 0.05 then goto T48_2; + else goto T48_3; + +T48_2: + response = -0.0160384; + goto D48; + +T48_3: + response = 0.00605178; + goto D48; + +N48_4: + if DAY_PREV_DAY_HITS_FRAC < 0.715 then goto T48_4; + else goto T48_5; + +T48_4: + response = -0.00225007; + goto D48; + +T48_5: + response = 0.0290955; + goto D48; + +D48: + +tnscore = tnscore + response; + +/* Tree 50 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N49_1: + if PREV_DAY_HITS < 1.46429 then goto T49_1; + else goto N49_2; + +T49_1: + response = -0.000357361; + goto D49; + +N49_2: + if AVG_SCORE < 242369 then goto T49_2; + else goto N49_3; + +T49_2: + response = -0.000884977; + goto D49; + +N49_3: + if WEEKAVG < 5.975 then goto N49_4; + else goto T49_5; + +N49_4: + if DAY_HITS_FRAC < 0.135 then goto T49_3; + else goto T49_4; + +T49_3: + response = 0.00160961; + goto D49; + +T49_4: + response = 0.00805956; + goto D49; + +T49_5: + response = -0.0114471; + goto D49; + +D49: + +tnscore = tnscore + response; + +/* Tree 51 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N50_1: + if PREV_DAY_HITS < 26.5 then goto N50_2; + else goto N50_3; + +N50_2: + if ISABSTRACT_AVG < 0.155 then goto T50_1; + else goto T50_2; + +T50_1: + response = 0.000196561; + goto D50; + +T50_2: + response = -0.00121068; + goto D50; + +N50_3: + if DAY_HITS_FRAC < 0.265 then goto N50_4; + else goto T50_5; + +N50_4: + if DAY_WEEK_AVG_RATIO < 2.12 then goto T50_3; + else goto T50_4; + +T50_3: + response = -0.00523257; + goto D50; + +T50_4: + response = 0.0187339; + goto D50; + +T50_5: + response = 0.0345852; + goto D50; + +D50: + +tnscore = tnscore + response; + +/* Tree 52 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N51_1: + if NUM_WORDS < 4.5 then goto N51_2; + else goto N51_3; + +N51_2: + if MAX_MIN_SCORE < 55839.5 then goto T51_1; + else goto T51_2; + +T51_1: + response = -0.000141945; + goto D51; + +T51_2: + response = -0.00394864; + goto D51; + +N51_3: + if DAY_WEEK_AVG_RATIO < 3.505 then goto N51_4; + else goto T51_5; + +N51_4: + if AVG_RANK < 8.395 then goto T51_3; + else goto T51_4; + +T51_3: + response = 0.0095287; + goto D51; + +T51_4: + response = -0.0143254; + goto D51; + +T51_5: + response = 0.0273452; + goto D51; + +D51: + +tnscore = tnscore + response; + +/* Tree 53 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N52_1: + if PREV_DAY_HITS < 16.5 then goto T52_1; + else goto N52_2; + +T52_1: + response = 0.000180791; + goto D52; + +N52_2: + if AVG_SCORE < 312154 then goto N52_3; + else goto T52_5; + +N52_3: + if DAY_HITS_FRAC < 0.325 then goto N52_4; + else goto T52_4; + +N52_4: + if MIN_SCORE < 254301 then goto T52_2; + else goto T52_3; + +T52_2: + response = -0.00274788; + goto D52; + +T52_3: + response = 0.0174896; + goto D52; + +T52_4: + response = -0.0248862; + goto D52; + +T52_5: + response = -0.023664; + goto D52; + +D52: + +tnscore = tnscore + response; + +/* Tree 54 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N53_1: + if MIN_SCORE < 245164 then goto T53_1; + else goto N53_2; + +T53_1: + response = -0.000561547; + goto D53; + +N53_2: + if DAY_PREV_DAY_HITS_FRAC < 0.395 then goto T53_2; + else goto N53_3; + +T53_2: + response = -0.00410237; + goto D53; + +N53_3: + if MAX_RANK < 9 then goto T53_3; + else goto N53_4; + +T53_3: + response = -7.35691e-05; + goto D53; + +N53_4: + if MIN_SCORE < 560324 then goto T53_4; + else goto T53_5; + +T53_4: + response = 0.00230962; + goto D53; + +T53_5: + response = 0.0217268; + goto D53; + +D53: + +tnscore = tnscore + response; + +/* Tree 55 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N54_1: + if DAY_WEEK_AVG_RATIO < 0.925 then goto N54_2; + else goto N54_3; + +N54_2: + if MAX_SCORE < 405533 then goto T54_1; + else goto T54_2; + +T54_1: + response = -0.000583987; + goto D54; + +T54_2: + response = -0.00549206; + goto D54; + +N54_3: + if MIN_SCORE < 479344 then goto T54_3; + else goto N54_4; + +T54_3: + response = 0.000303644; + goto D54; + +N54_4: + if MIN_SCORE < 489462 then goto T54_4; + else goto T54_5; + +T54_4: + response = 0.038018; + goto D54; + +T54_5: + response = 0.00297502; + goto D54; + +D54: + +tnscore = tnscore + response; + +/* Tree 56 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N55_1: + if MIN_SCORE < 475038 then goto N55_2; + else goto N55_3; + +N55_2: + if MAX_SCORE < 498633 then goto T55_1; + else goto T55_2; + +T55_1: + response = -4.41489e-05; + goto D55; + +T55_2: + response = -0.0267606; + goto D55; + +N55_3: + if ISABSTRACT_AVG < 0.1 then goto N55_4; + else goto T55_5; + +N55_4: + if ENTERTAINMENT < 0.31 then goto T55_3; + else goto T55_4; + +T55_3: + response = 0.00455624; + goto D55; + +T55_4: + response = 0.0204099; + goto D55; + +T55_5: + response = -0.00647491; + goto D55; + +D55: + +tnscore = tnscore + response; + +/* Tree 57 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N56_1: + if DAY_PD_HITS_RATIO < 0.115 then goto T56_1; + else goto N56_2; + +T56_1: + response = -0.00668954; + goto D56; + +N56_2: + if DAY_PD_HITS_DERIV < -11.5 then goto N56_3; + else goto T56_5; + +N56_3: + if DAY_PREV_DAY_HITS_FRAC < 0.975 then goto N56_4; + else goto T56_4; + +N56_4: + if DAY_PD_HITS_RATIO < 0.515 then goto T56_2; + else goto T56_3; + +T56_2: + response = -0.00146208; + goto D56; + +T56_3: + response = 0.0238106; + goto D56; + +T56_4: + response = 0.0377246; + goto D56; + +T56_5: + response = -9.33641e-05; + goto D56; + +D56: + +tnscore = tnscore + response; + +/* Tree 58 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N57_1: + if DAY_WEEK_AVG_RATIO < 10.84 then goto N57_2; + else goto N57_4; + +N57_2: + if DAY_WEEK_AVG_RATIO < 6.845 then goto T57_1; + else goto N57_3; + +T57_1: + response = 0.000128893; + goto D57; + +N57_3: + if MIN_SCORE < 367047 then goto T57_2; + else goto T57_3; + +T57_2: + response = -0.00823593; + goto D57; + +T57_3: + response = 0.0102315; + goto D57; + +N57_4: + if AVG_SCORE < 279315 then goto T57_4; + else goto T57_5; + +T57_4: + response = -0.00477584; + goto D57; + +T57_5: + response = 0.0265572; + goto D57; + +D57: + +tnscore = tnscore + response; + +/* Tree 59 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N58_1: + if PREV_DAY_HITS < 9.75 then goto T58_1; + else goto N58_2; + +T58_1: + response = -0.000266304; + goto D58; + +N58_2: + if DAY_PREV_DAY_HITS_FRAC < 0.925 then goto T58_2; + else goto N58_3; + +T58_2: + response = -0.000269406; + goto D58; + +N58_3: + if AVG_RANK < 8.45 then goto N58_4; + else goto T58_5; + +N58_4: + if INTLNEWS < 0.295 then goto T58_3; + else goto T58_4; + +T58_3: + response = 0.0146136; + goto D58; + +T58_4: + response = -0.0235187; + goto D58; + +T58_5: + response = 0.0313855; + goto D58; + +D58: + +tnscore = tnscore + response; + +/* Tree 60 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N59_1: + if MIN_SCORE < 483511 then goto T59_1; + else goto N59_2; + +T59_1: + response = -0.000181558; + goto D59; + +N59_2: + if MIN_SCORE < 498030 then goto N59_3; + else goto N59_4; + +N59_3: + if DAY_WEEK_AVG_RATIO < 1.68 then goto T59_2; + else goto T59_3; + +T59_2: + response = 0.00293744; + goto D59; + +T59_3: + response = 0.0371557; + goto D59; + +N59_4: + if SUPERDUPER_AVG < 0.53 then goto T59_4; + else goto T59_5; + +T59_4: + response = 0.00413503; + goto D59; + +T59_5: + response = -0.0112815; + goto D59; + +D59: + +tnscore = tnscore + response; + +/* Tree 61 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N60_1: + if DAY_PD_HITS_RATIO < 26.5 then goto N60_2; + else goto N60_4; + +N60_2: + if DAY_PD_HITS_DERIV < -11.5 then goto N60_3; + else goto T60_3; + +N60_3: + if DAY_WEEK_AVG_RATIO < 1.735 then goto T60_1; + else goto T60_2; + +T60_1: + response = 0.00051364; + goto D60; + +T60_2: + response = 0.0186441; + goto D60; + +T60_3: + response = -7.75501e-05; + goto D60; + +N60_4: + if WEEKAVG < 5.57 then goto T60_4; + else goto T60_5; + +T60_4: + response = 0.0278366; + goto D60; + +T60_5: + response = -0.00263107; + goto D60; + +D60: + +tnscore = tnscore + response; + +/* Tree 62 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N61_1: + if NUM_WORDS < 1.5 then goto T61_1; + else goto N61_2; + +T61_1: + response = -0.0008926; + goto D61; + +N61_2: + if DAY_WEEK_AVG_DERIV < 47.86 then goto N61_3; + else goto T61_5; + +N61_3: + if DAY_LW_DAY_HITS_RATIO < 38.5 then goto N61_4; + else goto T61_4; + +N61_4: + if ISABSTRACT_AVG < 0.235 then goto T61_2; + else goto T61_3; + +T61_2: + response = 0.000972798; + goto D61; + +T61_3: + response = -0.00127979; + goto D61; + +T61_4: + response = 0.0250611; + goto D61; + +T61_5: + response = -0.0239326; + goto D61; + +D61: + +tnscore = tnscore + response; + +/* Tree 63 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N62_1: + if MAX_SCORE < 407652 then goto N62_2; + else goto T62_5; + +N62_2: + if MAX_SCORE < 395501 then goto T62_1; + else goto N62_3; + +T62_1: + response = 1.49872e-05; + goto D62; + +N62_3: + if DAY_PD_HITS_RATIO < 0.285 then goto T62_2; + else goto N62_4; + +T62_2: + response = 0.0434173; + goto D62; + +N62_4: + if BUSINESS < 0.05 then goto T62_3; + else goto T62_4; + +T62_3: + response = 0.0102549; + goto D62; + +T62_4: + response = -0.010691; + goto D62; + +T62_5: + response = -0.00277705; + goto D62; + +D62: + +tnscore = tnscore + response; + +/* Tree 64 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N63_1: + if DAY_WEEK_AVG_RATIO < 6.355 then goto T63_1; + else goto N63_2; + +T63_1: + response = 0.00013418; + goto D63; + +N63_2: + if MIN_SCORE < 405020 then goto N63_3; + else goto T63_5; + +N63_3: + if AVG_SCORE < 356693 then goto N63_4; + else goto T63_4; + +N63_4: + if MAX_SCORE < 327611 then goto T63_2; + else goto T63_3; + +T63_2: + response = -0.00546237; + goto D63; + +T63_3: + response = 0.0146496; + goto D63; + +T63_4: + response = -0.0196891; + goto D63; + +T63_5: + response = 0.0208141; + goto D63; + +D63: + +tnscore = tnscore + response; + +/* Tree 65 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N64_1: + if AVG_SCORE < 526352 then goto N64_2; + else goto N64_3; + +N64_2: + if MAX_SCORE < 521635 then goto T64_1; + else goto T64_2; + +T64_1: + response = -6.26311e-05; + goto D64; + +T64_2: + response = 0.0210967; + goto D64; + +N64_3: + if MAX_SCORE < 550983 then goto T64_3; + else goto N64_4; + +T64_3: + response = -0.0232122; + goto D64; + +N64_4: + if AVG_RANK < 5.5 then goto T64_4; + else goto T64_5; + +T64_4: + response = -0.0243343; + goto D64; + +T64_5: + response = -0.00151995; + goto D64; + +D64: + +tnscore = tnscore + response; + +/* Tree 66 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N65_1: + if DAY_PD_HITS_RATIO < 26.5 then goto N65_2; + else goto N65_3; + +N65_2: + if DAY_PD_HITS_RATIO < 20.625 then goto T65_1; + else goto T65_2; + +T65_1: + response = -5.89198e-05; + goto D65; + +T65_2: + response = -0.0216644; + goto D65; + +N65_3: + if MAX_SCORE < 200640 then goto T65_3; + else goto N65_4; + +T65_3: + response = -0.011139; + goto D65; + +N65_4: + if AVG_RANK < 8.55 then goto T65_4; + else goto T65_5; + +T65_4: + response = 0.0390014; + goto D65; + +T65_5: + response = 0.00966164; + goto D65; + +D65: + +tnscore = tnscore + response; + +/* Tree 67 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N66_1: + if NATIONALNEWS < 0.27 then goto T66_1; + else goto N66_2; + +T66_1: + response = -0.000174062; + goto D66; + +N66_2: + if HEALTH < 0.05 then goto N66_3; + else goto T66_5; + +N66_3: + if AVG_SCORE < 342310 then goto T66_2; + else goto N66_4; + +T66_2: + response = 0.000835476; + goto D66; + +N66_4: + if MIN_SCORE < 347780 then goto T66_3; + else goto T66_4; + +T66_3: + response = 0.0334442; + goto D66; + +T66_4: + response = 0.00624751; + goto D66; + +T66_5: + response = 0.025545; + goto D66; + +D66: + +tnscore = tnscore + response; + +/* Tree 68 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N67_1: + if ISTITLE_AVG < 0.72 then goto N67_2; + else goto T67_5; + +N67_2: + if MAX_MIN_SCORE < 43995.2 then goto T67_1; + else goto N67_3; + +T67_1: + response = 0.000396726; + goto D67; + +N67_3: + if DAY_PREV_DAY_HITS_FRAC < 0.935 then goto T67_2; + else goto N67_4; + +T67_2: + response = 0.00123996; + goto D67; + +N67_4: + if MAX_MIN_SCORE < 56002.2 then goto T67_3; + else goto T67_4; + +T67_3: + response = 0.0235285; + goto D67; + +T67_4: + response = -0.00154573; + goto D67; + +T67_5: + response = -0.000478464; + goto D67; + +D67: + +tnscore = tnscore + response; + +/* Tree 69 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N68_1: + if BUSINESS < 0.21 then goto N68_2; + else goto T68_5; + +N68_2: + if DAY_LW_DAY_HITS_DERIV < 14.5 then goto T68_1; + else goto N68_3; + +T68_1: + response = 2.41495e-05; + goto D68; + +N68_3: + if NATIONALNEWS < 0.225 then goto N68_4; + else goto T68_4; + +N68_4: + if LOCALNEWS < 0.035 then goto T68_2; + else goto T68_3; + +T68_2: + response = -0.00142155; + goto D68; + +T68_3: + response = 0.0129645; + goto D68; + +T68_4: + response = 0.0297085; + goto D68; + +T68_5: + response = -0.0014897; + goto D68; + +D68: + +tnscore = tnscore + response; + +/* Tree 70 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N69_1: + if SPORTS < 0.585 then goto T69_1; + else goto N69_2; + +T69_1: + response = -0.000304907; + goto D69; + +N69_2: + if MAX_SCORE < 285618 then goto T69_2; + else goto N69_3; + +T69_2: + response = 0.000221636; + goto D69; + +N69_3: + if ISTITLE_AVG < 0.7 then goto N69_4; + else goto T69_5; + +N69_4: + if MIN_SCORE < 269093 then goto T69_3; + else goto T69_4; + +T69_3: + response = 0.0417159; + goto D69; + +T69_4: + response = 0.00987586; + goto D69; + +T69_5: + response = 0.00129559; + goto D69; + +D69: + +tnscore = tnscore + response; + +/* Tree 71 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N70_1: + if DAY_PD_HITS_RATIO < 12.28 then goto T70_1; + else goto N70_2; + +T70_1: + response = -5.73419e-05; + goto D70; + +N70_2: + if LOCALNEWS < 0.03 then goto T70_2; + else goto N70_3; + +T70_2: + response = -0.00224701; + goto D70; + +N70_3: + if WEEKAVG < 5.57 then goto N70_4; + else goto T70_5; + +N70_4: + if AVG_RANK < 8.1 then goto T70_3; + else goto T70_4; + +T70_3: + response = 0.0150017; + goto D70; + +T70_4: + response = 0.0490061; + goto D70; + +T70_5: + response = 0.000899967; + goto D70; + +D70: + +tnscore = tnscore + response; + +/* Tree 72 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N71_1: + if NATIONALNEWS < 0.28 then goto T71_1; + else goto N71_2; + +T71_1: + response = -0.000202096; + goto D71; + +N71_2: + if MIN_SCORE < 259050 then goto T71_2; + else goto N71_3; + +T71_2: + response = -0.000461524; + goto D71; + +N71_3: + if PREV_DAY_HITS < 5.5 then goto N71_4; + else goto T71_5; + +N71_4: + if ISTITLE_AVG < 0.085 then goto T71_3; + else goto T71_4; + +T71_3: + response = 0.0107478; + goto D71; + +T71_4: + response = 0.000658206; + goto D71; + +T71_5: + response = 0.039025; + goto D71; + +D71: + +tnscore = tnscore + response; + +/* Tree 73 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N72_1: + if DAY_WEEK_AVG_RATIO < 0.885 then goto T72_1; + else goto N72_2; + +T72_1: + response = -0.000898287; + goto D72; + +N72_2: + if MIN_SCORE < 482615 then goto T72_2; + else goto N72_3; + +T72_2: + response = 0.000137426; + goto D72; + +N72_3: + if AVG_SCORE < 506793 then goto N72_4; + else goto T72_5; + +N72_4: + if AVG_SCORE < 493340 then goto T72_3; + else goto T72_4; + +T72_3: + response = 0.011503; + goto D72; + +T72_4: + response = 0.0451903; + goto D72; + +T72_5: + response = 1.36945e-05; + goto D72; + +D72: + +tnscore = tnscore + response; + +/* Tree 74 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N73_1: + if DAY_PD_HITS_DERIV < -4.5 then goto N73_2; + else goto T73_5; + +N73_2: + if HEALTH < 0.13 then goto N73_3; + else goto T73_4; + +N73_3: + if BUSINESS < 0.96 then goto T73_1; + else goto N73_4; + +T73_1: + response = 0.00144328; + goto D73; + +N73_4: + if MAX_SCORE < 239157 then goto T73_2; + else goto T73_3; + +T73_2: + response = -4.31323e-05; + goto D73; + +T73_3: + response = 0.0302083; + goto D73; + +T73_4: + response = 0.0263586; + goto D73; + +T73_5: + response = 3.93517e-05; + goto D73; + +D73: + +tnscore = tnscore + response; + +/* Tree 75 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N74_1: + if DAY_LW_DAY_HITS_DERIV < 1.91 then goto T74_1; + else goto N74_2; + +T74_1: + response = -0.000330312; + goto D74; + +N74_2: + if MIN_SCORE < 254252 then goto T74_2; + else goto N74_3; + +T74_2: + response = -0.000287448; + goto D74; + +N74_3: + if WEEKAVG < 4.5 then goto N74_4; + else goto T74_5; + +N74_4: + if WEEKAVG < 0.93 then goto T74_3; + else goto T74_4; + +T74_3: + response = 0.00341942; + goto D74; + +T74_4: + response = 0.0180965; + goto D74; + +T74_5: + response = -0.0144877; + goto D74; + +D74: + +tnscore = tnscore + response; + +/* Tree 76 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N75_1: + if NUM_WORDS < 4.5 then goto N75_2; + else goto N75_4; + +N75_2: + if MAX_MIN_SCORE < 56141.5 then goto T75_1; + else goto N75_3; + +T75_1: + response = 3.55635e-05; + goto D75; + +N75_3: + if ISTITLE_AVG < 0.69 then goto T75_2; + else goto T75_3; + +T75_2: + response = -0.0120653; + goto D75; + +T75_3: + response = -0.00193295; + goto D75; + +N75_4: + if DAY_HITS_FRAC < 0.585 then goto T75_4; + else goto T75_5; + +T75_4: + response = 0.0109657; + goto D75; + +T75_5: + response = -0.00562292; + goto D75; + +D75: + +tnscore = tnscore + response; + +/* Tree 77 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N76_1: + if MAX_SCORE < 597411 then goto N76_2; + else goto T76_5; + +N76_2: + if AVG_SCORE < 525986 then goto N76_3; + else goto T76_4; + +N76_3: + if AVG_SCORE < 504944 then goto T76_1; + else goto N76_4; + +T76_1: + response = 6.12611e-05; + goto D76; + +N76_4: + if AVG_SCORE < 512650 then goto T76_2; + else goto T76_3; + +T76_2: + response = 0.0310299; + goto D76; + +T76_3: + response = 0.000664858; + goto D76; + +T76_4: + response = -0.010433; + goto D76; + +T76_5: + response = 0.00965011; + goto D76; + +D76: + +tnscore = tnscore + response; + +/* Tree 78 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N77_1: + if DAY_PD_HITS_RATIO < 5.945 then goto N77_2; + else goto N77_3; + +N77_2: + if MAX_SCORE < 629654 then goto T77_1; + else goto T77_2; + +T77_1: + response = 0.000234339; + goto D77; + +T77_2: + response = -0.01439; + goto D77; + +N77_3: + if MAX_MIN_SCORE < 63226.5 then goto N77_4; + else goto T77_5; + +N77_4: + if LOCALNEWS < 0.28 then goto T77_3; + else goto T77_4; + +T77_3: + response = -0.00423293; + goto D77; + +T77_4: + response = 0.00606695; + goto D77; + +T77_5: + response = 0.0188983; + goto D77; + +D77: + +tnscore = tnscore + response; + +/* Tree 79 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N78_1: + if MIN_SCORE < 670535 then goto N78_2; + else goto T78_5; + +N78_2: + if DAY_PD_HITS_RATIO < 5.845 then goto N78_3; + else goto T78_4; + +N78_3: + if DAY_LW_DAY_HITS_RATIO < 5.47 then goto T78_1; + else goto N78_4; + +T78_1: + response = -8.49912e-05; + goto D78; + +N78_4: + if TOPSTORY < 0.105 then goto T78_2; + else goto T78_3; + +T78_2: + response = -0.00034055; + goto D78; + +T78_3: + response = 0.0101604; + goto D78; + +T78_4: + response = -0.00330677; + goto D78; + +T78_5: + response = 0.0174593; + goto D78; + +D78: + +tnscore = tnscore + response; + +/* Tree 80 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N79_1: + if DAY_PD_HITS_RATIO < 33.75 then goto N79_2; + else goto T79_5; + +N79_2: + if WEEKAVG < 4.46 then goto T79_1; + else goto N79_3; + +T79_1: + response = 3.99921e-05; + goto D79; + +N79_3: + if DAY_PREV_DAY_HITS_FRAC < 0.945 then goto N79_4; + else goto T79_4; + +N79_4: + if DAY_LW_DAY_HITS_RATIO < 48.5 then goto T79_2; + else goto T79_3; + +T79_2: + response = -0.00420023; + goto D79; + +T79_3: + response = 0.0191669; + goto D79; + +T79_4: + response = -0.0241434; + goto D79; + +T79_5: + response = 0.0157146; + goto D79; + +D79: + +tnscore = tnscore + response; + +/* Tree 81 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N80_1: + if DAY_WEEK_AVG_RATIO < 6.3 then goto T80_1; + else goto N80_2; + +T80_1: + response = 0.00024645; + goto D80; + +N80_2: + if DAY_WEEK_AVG_DERIV < 13.785 then goto N80_3; + else goto N80_4; + +N80_3: + if MIN_SCORE < 397526 then goto T80_2; + else goto T80_3; + +T80_2: + response = -0.00908083; + goto D80; + +T80_3: + response = 0.00977666; + goto D80; + +N80_4: + if AVG_SCORE < 289007 then goto T80_4; + else goto T80_5; + +T80_4: + response = -0.00132101; + goto D80; + +T80_5: + response = 0.0196639; + goto D80; + +D80: + +tnscore = tnscore + response; + +/* Tree 82 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N81_1: + if MIN_SCORE < 672810 then goto N81_2; + else goto T81_5; + +N81_2: + if MIN_SCORE < 631089 then goto N81_3; + else goto T81_4; + +N81_3: + if MAX_SCORE < 611207 then goto N81_4; + else goto T81_3; + +N81_4: + if MIN_SCORE < 512782 then goto T81_1; + else goto T81_2; + +T81_1: + response = -3.64401e-05; + goto D81; + +T81_2: + response = -0.00741622; + goto D81; + +T81_3: + response = 0.0190309; + goto D81; + +T81_4: + response = -0.0227335; + goto D81; + +T81_5: + response = 0.0167703; + goto D81; + +D81: + +tnscore = tnscore + response; + +/* Tree 83 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N82_1: + if MAX_SCORE < 439769 then goto T82_1; + else goto N82_2; + +T82_1: + response = 4.38016e-05; + goto D82; + +N82_2: + if TOPSTORY < 0.22 then goto T82_2; + else goto N82_3; + +T82_2: + response = -0.00593521; + goto D82; + +N82_3: + if SUPERDUPER_AVG < 0.45 then goto N82_4; + else goto T82_5; + +N82_4: + if DAY_PD_HITS_DERIV < -1.5 then goto T82_3; + else goto T82_4; + +T82_3: + response = 0.0429213; + goto D82; + +T82_4: + response = 0.0049244; + goto D82; + +T82_5: + response = -0.0152763; + goto D82; + +D82: + +tnscore = tnscore + response; + +/* Tree 84 of 85 */ +/* N terminal nodes = 5, Depth = 4 */ + + +N83_1: + if REGIONALNEWS < 0.105 then goto N83_2; + else goto N83_3; + +N83_2: + if POLITICS < 0.29 then goto T83_1; + else goto T83_2; + +T83_1: + response = -8.83284e-05; + goto D83; + +T83_2: + response = -0.00708574; + goto D83; + +N83_3: + if MAX_SCORE < 291999 then goto T83_3; + else goto N83_4; + +T83_3: + response = 0.000387947; + goto D83; + +N83_4: + if DAY_WEEK_AVG_DERIV < 2.145 then goto T83_4; + else goto T83_5; + +T83_4: + response = -0.00777391; + goto D83; + +T83_5: + response = -0.0310452; + goto D83; + +D83: + +tnscore = tnscore + response; + +/* Tree 85 of 85 */ +/* N terminal nodes = 5, Depth = 5 */ + + +N84_1: + if MAX_MIN_SCORE < 61554.2 then goto T84_1; + else goto N84_2; + +T84_1: + response = -2.35487e-05; + goto D84; + +N84_2: + if NUM_WORDS < 2.5 then goto N84_3; + else goto T84_5; + +N84_3: + if MAX_MIN_SCORE < 88657.5 then goto T84_2; + else goto N84_4; + +T84_2: + response = -0.00672369; + goto D84; + +N84_4: + if DAY_PREV_DAY_HITS_FRAC < 0.295 then goto T84_3; + else goto T84_4; + +T84_3: + response = -0.0026578; + goto D84; + +T84_4: + response = 0.0151957; + goto D84; + +T84_5: + response = -0.0135855; + goto D84; + +D84: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet07.model b/searchlib/src/test/files/treenet07.model new file mode 100644 index 00000000000..98059bec273 --- /dev/null +++ b/searchlib/src/test/files/treenet07.model @@ -0,0 +1,13275 @@ +MODELBEGIN: + + /* N trees: 200 */ + +link TN0; +pred = tnscore; /* predicted value for GRADE */ + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + + +TN0: + +tnscore = 0.0; + +N0_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100400142, 100200054, 100300073, 100200192, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200176, 100300200, 100200028, 100300076, 100200232) then goto N0_2; + else goto N0_8; + +N0_2: + if attribute(catid) in (100200186, 100200068, 100300121, 100300019, 100200176, 100300200, 100200028, 100300076) then goto N0_3; + else goto N0_4; + +N0_3: + if attribute(catid) in (100200068, 100300019, 100200176, 100300200) then goto T0_1; + else goto T0_2; + +T0_1: + response = -0.0249999798; + goto D0; + +T0_2: + response = 0.0022099815; + goto D0; + +N0_4: + if attribute(catid) in (0, 100300011, 100300014, 100300077, 100200034, 100400141, 100300165, 100300005, 100200172, 100300008, 100300027, 100200053, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080) then goto N0_5; + else goto N0_7; + +N0_5: + if attribute(catid) in (100300011, 100300165, 100300005, 100300027, 100200192, 100300209, 100400079, 100400080) then goto T0_3; + else goto N0_6; + +T0_3: + response = 0.0131601540; + goto D0; + +N0_6: + if attribute(catid) in (100300014, 100200034, 100400141, 100200172, 100300008, 100200053, 100200170, 100300169) then goto T0_4; + else goto T0_5; + +T0_4: + response = 0.0191030525; + goto D0; + +T0_5: + response = 0.0217254140; + goto D0; + +N0_7: + if attribute(catid) in (100200130, 100400142, 100200054, 100300073) then goto T0_6; + else goto T0_7; + +T0_6: + response = 0.0270836867; + goto D0; + +T0_7: + response = 0.0305748922; + goto D0; + +N0_8: + if attribute(catid) in (100300058, 100300166, 100300102, 100400037, 100400038, 100300065, 100300127, 100200087, 100300066, 100300006) then goto T0_8; + else goto N0_9; + +T0_8: + response = 0.0410066553; + goto D0; + +N0_9: + if attribute(catid) in (100300093, 100200234, 100300126, 100200193, 100300122, 100300074) then goto T0_9; + else goto T0_10; + +T0_9: + response = 0.0557829172; + goto D0; + +T0_10: + response = 0.0704327304; + goto D0; + +D0: + +tnscore = tnscore + response; + + /* Tree 2 of 200 */ +N1_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100300076, 100200055, 100200232, 100300214) then goto N1_2; + else goto N1_7; + +N1_2: + if attribute(catid) in (100300011, 100300014, 100300058, 100300005, 100200068, 100300019, 100300209, 100400079, 100200170, 100400080, 100200176, 100300076, 100200055, 100200232, 100300214) then goto N1_3; + else goto N1_4; + +N1_3: + if attribute(catid) in (100200068, 100300019, 100200055, 100200232, 100300214) then goto T1_1; + else goto T1_2; + +T1_1: + response = -0.0359908300; + goto D1; + +T1_2: + response = -0.0027644159; + goto D1; + +N1_4: + if attribute(catid) in (100200171, 100200034, 100200186, 100300008, 100300116, 100300073, 100400038, 100200192, 100300127, 100300169, 100200087, 100300200) then goto N1_5; + else goto N1_6; + +N1_5: + if attribute(catid) in (100200171, 100200034, 100200186, 100300008, 100300116, 100200192) then goto T1_3; + else goto T1_4; + +T1_3: + response = 0.0113307000; + goto D1; + +T1_4: + response = 0.0164266261; + goto D1; + +N1_6: + if attribute(catid) in (0, 100400141, 100200052, 100300102, 100300027, 100300121, 100300065) then goto T1_5; + else goto T1_6; + +T1_5: + response = 0.0212550280; + goto D1; + +T1_6: + response = 0.0272380704; + goto D1; + +N1_7: + if attribute(catid) in (100200130, 100300013, 100300166, 100300004, 100200054, 100200193, 100300212, 100300074, 100300066) then goto N1_8; + else goto N1_9; + +N1_8: + if attribute(catid) in (100200130, 100300166) then goto T1_7; + else goto T1_8; + +T1_7: + response = 0.0328865429; + goto D1; + +T1_8: + response = 0.0399735491; + goto D1; + +N1_9: + if attribute(catid) in (100300165, 100300093, 100400142, 100300122, 100300006, 100300146) then goto T1_9; + else goto T1_10; + +T1_9: + response = 0.0477513417; + goto D1; + +T1_10: + response = 0.0587510469; + goto D1; + +D1: + +tnscore = tnscore + response; + + /* Tree 3 of 200 */ +N2_1: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300065, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100300076, 100300006, 100200232, 100300146) then goto N2_2; + else goto N2_6; + +N2_2: + if attribute(catid) in (100300014, 100300058, 100200034, 100200186, 100300008, 100200068, 100300019, 100300212, 100200232) then goto N2_3; + else goto N2_4; + +N2_3: + if attribute(catid) in (100300008, 100200068, 100300019, 100200232) then goto T2_1; + else goto T2_2; + +T2_1: + response = -0.0260716807; + goto D2; + +T2_2: + response = -0.0008004775; + goto D2; + +N2_4: + if attribute(catid) in (100300165, 100300005, 100300073, 100400079, 100200170, 100400080, 100200087, 100300146) then goto T2_3; + else goto N2_5; + +T2_3: + response = 0.0126841581; + goto D2; + +N2_5: + if attribute(catid) in (0, 100200171, 100300077, 100200172, 100300065, 100300006) then goto T2_4; + else goto T2_5; + +T2_4: + response = 0.0228814610; + goto D2; + +T2_5: + response = 0.0298499891; + goto D2; + +N2_6: + if attribute(catid) in (100300011, 100200130, 100300013, 100300166, 100200052, 100300102, 100300116, 100200234, 100400142, 100200054, 100300209, 100300127, 100300074, 100300066, 100200176, 100200028) then goto N2_7; + else goto N2_8; + +N2_7: + if attribute(catid) in (100200130, 100200052, 100300102, 100300116, 100200234, 100200054, 100300209) then goto T2_6; + else goto T2_7; + +T2_6: + response = 0.0393021257; + goto D2; + +T2_7: + response = 0.0475085975; + goto D2; + +N2_8: + if attribute(catid) in (100400037, 100300122, 100200067) then goto T2_8; + else goto T2_9; + +T2_8: + response = 0.0575085503; + goto D2; + +T2_9: + response = 0.0751742626; + goto D2; + +D2: + +tnscore = tnscore + response; + + /* Tree 4 of 200 */ +N3_1: + if attribute(catid) in (0, 100300011, 100300014, 100300077, 100200186, 100400141, 100300165, 100300005, 100300008, 100200068, 100300032, 100300027, 100300121, 100300019, 100300126, 100300073, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200067, 100200055, 100300006) then goto N3_2; + else goto N3_7; + +N3_2: + if attribute(catid) in (100300005, 100300008, 100200068, 100300032, 100300209, 100400080, 100200028, 100200067, 100200055) then goto N3_3; + else goto N3_4; + +N3_3: + if attribute(catid) in (100300005, 100300008, 100300032, 100300209, 100200067, 100200055) then goto T3_1; + else goto T3_2; + +T3_1: + response = -0.0365460976; + goto D3; + +T3_2: + response = -0.0109180769; + goto D3; + +N3_4: + if attribute(catid) in (100300014, 100300073, 100200192, 100300212, 100400079, 100300074, 100200176) then goto T3_3; + else goto N3_5; + +T3_3: + response = 0.0093762436; + goto D3; + +N3_5: + if attribute(catid) in (100200186, 100300165, 100300126, 100200170, 100300169, 100300200, 100300076) then goto T3_4; + else goto N3_6; + +T3_4: + response = 0.0193739138; + goto D3; + +N3_6: + if attribute(catid) in (0, 100300077, 100200087) then goto T3_5; + else goto T3_6; + +T3_5: + response = 0.0231180054; + goto D3; + +T3_6: + response = 0.0274056462; + goto D3; + +N3_7: + if attribute(catid) in (100200171, 100200130, 100300058, 100200034, 100200052, 100200172, 100300116, 100200053, 100400142, 100200054, 100300066) then goto N3_8; + else goto N3_9; + +N3_8: + if attribute(catid) in (100200171, 100200130, 100300058, 100300116, 100200054, 100300066) then goto T3_7; + else goto T3_8; + +T3_7: + response = 0.0339904435; + goto D3; + +T3_8: + response = 0.0402629873; + goto D3; + +N3_9: + if attribute(catid) in (100300013, 100300166, 100300102, 100200234, 100300004, 100400038, 100300122, 100300127, 100200185) then goto T3_9; + else goto T3_10; + +T3_9: + response = 0.0471640537; + goto D3; + +T3_10: + response = 0.0679501752; + goto D3; + +D3: + +tnscore = tnscore + response; + + /* Tree 5 of 200 */ +N4_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300004, 100300073, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N4_2; + else goto N4_7; + +N4_2: + if attribute(catid) in (100300014, 100200034, 100200186, 100400141, 100300005, 100300008, 100200068, 100300032, 100300121, 100300019, 100300004, 100200192, 100300212, 100300209, 100400079, 100200170, 100400080, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N4_3; + else goto N4_5; + +N4_3: + if attribute(catid) in (100300014, 100200034, 100200068, 100300032, 100300019, 100300212, 100300209, 100200170, 100300200, 100200028, 100200067) then goto N4_4; + else goto T4_3; + +N4_4: + if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100300200, 100200028, 100200067) then goto T4_1; + else goto T4_2; + +T4_1: + response = -0.0252149649; + goto D4; + +T4_2: + response = 0.0005982331; + goto D4; + +T4_3: + response = 0.0109551118; + goto D4; + +N4_5: + if attribute(catid) in (0, 100200171, 100200172, 100300027, 100300073, 100300065, 100300169, 100200087, 100300074) then goto N4_6; + else goto T4_6; + +N4_6: + if attribute(catid) in (100200171, 100300073, 100300169, 100200087, 100300074) then goto T4_4; + else goto T4_5; + +T4_4: + response = 0.0192764204; + goto D4; + +T4_5: + response = 0.0239324010; + goto D4; + +T4_6: + response = 0.0295724103; + goto D4; + +N4_7: + if attribute(catid) in (100300011, 100300077, 100300166, 100200052, 100200234, 100200053, 100400142, 100400038, 100300122, 100300127, 100300066) then goto N4_8; + else goto N4_9; + +N4_8: + if attribute(catid) in (100300011, 100300077, 100300166, 100200052, 100200234, 100400038) then goto T4_7; + else goto T4_8; + +T4_7: + response = 0.0362646736; + goto D4; + +T4_8: + response = 0.0458985070; + goto D4; + +N4_9: + if attribute(catid) in (100300143, 100300093, 100300102, 100300126, 100200193, 100300006) then goto T4_9; + else goto T4_10; + +T4_9: + response = 0.0576959337; + goto D4; + +T4_10: + response = 0.0940124464; + goto D4; + +D4: + +tnscore = tnscore + response; + + /* Tree 6 of 200 */ +N5_1: + if attribute(catid) in (0, 100200171, 100300014, 100300013, 100300077, 100200034, 100200186, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300200, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N5_2; + else goto N5_8; + +N5_2: + if attribute(catid) in (100300014, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300212, 100400079, 100300076, 100300214, 100300146) then goto N5_3; + else goto N5_5; + +N5_3: + if attribute(catid) in (100300008, 100200068, 100300032, 100300212, 100300214) then goto N5_4; + else goto T5_3; + +N5_4: + if attribute(catid) in (100300008, 100300032, 100300214) then goto T5_1; + else goto T5_2; + +T5_1: + response = -0.0524432898; + goto D5; + +T5_2: + response = -0.0132279367; + goto D5; + +T5_3: + response = 0.0034801390; + goto D5; + +N5_5: + if attribute(catid) in (100200171, 100300013, 100200034, 100200186, 100200052, 100300073, 100400038, 100200192, 100300169, 100400080, 100300074, 100300066, 100300200, 100200067, 100300006) then goto N5_6; + else goto N5_7; + +N5_6: + if attribute(catid) in (100300013, 100200186, 100200052, 100300073, 100400038, 100300169, 100400080, 100300074, 100200067, 100300006) then goto T5_4; + else goto T5_5; + +T5_4: + response = 0.0179759830; + goto D5; + +T5_5: + response = 0.0212068067; + goto D5; + +N5_7: + if attribute(catid) in (100300077, 100300165, 100300209, 100200170, 100200176) then goto T5_6; + else goto T5_7; + +T5_6: + response = 0.0229665861; + goto D5; + +T5_7: + response = 0.0258231076; + goto D5; + +N5_8: + if attribute(catid) in (100300011, 100200130, 100300058, 100300166, 100300143, 100400141, 100300093, 100300116, 100300121, 100200053, 100300004, 100400142, 100200054, 100300122, 100300127, 100200087, 100200232) then goto N5_9; + else goto N5_10; + +N5_9: + if attribute(catid) in (100300011, 100200130, 100300058, 100300166, 100300143, 100300093, 100300116, 100200053, 100200054, 100300122, 100200087) then goto T5_8; + else goto T5_9; + +T5_8: + response = 0.0353581654; + goto D5; + +T5_9: + response = 0.0430524781; + goto D5; + +N5_10: + if attribute(catid) in (100200234, 100300019, 100400037, 100200028) then goto T5_10; + else goto T5_11; + +T5_10: + response = 0.0542526213; + goto D5; + +T5_11: + response = 0.0961212144; + goto D5; + +D5: + +tnscore = tnscore + response; + + /* Tree 7 of 200 */ +N6_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100200054, 100300073, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214, 100300146) then goto N6_2; + else goto N6_6; + +N6_2: + if attribute(catid) in (100200171, 100300011, 100300014, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100200192, 100300209, 100400079, 100400080, 100200087, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N6_3; + else goto N6_4; + +N6_3: + if attribute(catid) in (100300008, 100200068, 100300032, 100300209, 100300214) then goto T6_1; + else goto T6_2; + +T6_1: + response = -0.0256804569; + goto D6; + +T6_2: + response = 0.0046816048; + goto D6; + +N6_4: + if attribute(catid) in (100300058, 100200186, 100400141, 100300121, 100300019, 100200170, 100300169, 100300200) then goto T6_3; + else goto N6_5; + +T6_3: + response = 0.0160713107; + goto D6; + +N6_5: + if attribute(catid) in (0, 100200034, 100200052, 100200232) then goto T6_4; + else goto T6_5; + +T6_4: + response = 0.0221538810; + goto D6; + +T6_5: + response = 0.0250017744; + goto D6; + +N6_6: + if attribute(catid) in (100300166, 100300143, 100300165, 100300093, 100200172, 100200234, 100300004, 100300126, 100400142, 100400038, 100300065, 100300127, 100300074, 100300066, 100200185) then goto N6_7; + else goto N6_9; + +N6_7: + if attribute(catid) in (100300143, 100300165, 100300093, 100200172, 100200234, 100300126, 100400142, 100400038, 100300065, 100300066, 100200185) then goto N6_8; + else goto T6_8; + +N6_8: + if attribute(catid) in (100300093, 100200234, 100400038, 100300065, 100300066) then goto T6_6; + else goto T6_7; + +T6_6: + response = 0.0315719603; + goto D6; + +T6_7: + response = 0.0353792385; + goto D6; + +T6_8: + response = 0.0430233685; + goto D6; + +N6_9: + if attribute(catid) in (100200193, 100300122) then goto T6_9; + else goto T6_10; + +T6_9: + response = 0.0518243263; + goto D6; + +T6_10: + response = 0.0744220771; + goto D6; + +D6: + +tnscore = tnscore + response; + + /* Tree 8 of 200 */ +N7_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300073, 100400037, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100300076, 100200067, 100300006, 100300214, 100300146) then goto N7_2; + else goto N7_8; + +N7_2: + if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100400141, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100200192, 100400079, 100400080, 100200176, 100300200, 100300214, 100300146) then goto N7_3; + else goto N7_5; + +N7_3: + if attribute(catid) in (100300011, 100300102, 100300008, 100200068, 100300032, 100300200, 100300214) then goto T7_1; + else goto N7_4; + +T7_1: + response = -0.0241441823; + goto D7; + +N7_4: + if attribute(catid) in (100300014, 100400141, 100300005, 100200192, 100400080, 100200176) then goto T7_2; + else goto T7_3; + +T7_2: + response = 0.0020142953; + goto D7; + +T7_3: + response = 0.0081257199; + goto D7; + +N7_5: + if attribute(catid) in (100300058, 100300077, 100200052, 100300093, 100300073, 100400037, 100300065, 100200170, 100200087, 100200067) then goto T7_4; + else goto N7_6; + +T7_4: + response = 0.0141239460; + goto D7; + +N7_6: + if attribute(catid) in (0, 100200171, 100300165, 100300121, 100300019, 100300169) then goto N7_7; + else goto T7_7; + +N7_7: + if attribute(catid) in (0) then goto T7_5; + else goto T7_6; + +T7_5: + response = 0.0217711535; + goto D7; + +T7_6: + response = 0.0235348760; + goto D7; + +T7_7: + response = 0.0294010162; + goto D7; + +N7_8: + if attribute(catid) in (100200130, 100300143, 100200172, 100200234, 100200053, 100400142, 100400038, 100300212, 100300209, 100300066) then goto N7_9; + else goto N7_10; + +N7_9: + if attribute(catid) in (100200130, 100200172, 100200053, 100400142, 100400038, 100300212, 100300209) then goto T7_8; + else goto T7_9; + +T7_8: + response = 0.0358343100; + goto D7; + +T7_9: + response = 0.0447717702; + goto D7; + +N7_10: + if attribute(catid) in (100300004, 100300126, 100300074, 100300007, 100300045, 100200028, 100200185, 100200232) then goto T7_10; + else goto T7_11; + +T7_10: + response = 0.0622909986; + goto D7; + +T7_11: + response = 0.0942393297; + goto D7; + +D7: + +tnscore = tnscore + response; + + /* Tree 9 of 200 */ +N8_1: + if attribute(catid) in (100300014, 100200034, 100300102, 100300005, 100200068, 100300019, 100200054, 100300209, 100400079, 100200170, 100400080, 100300200, 100200028, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N8_2; + else goto N8_3; + +N8_2: + if attribute(catid) in (100300102, 100300005, 100200068, 100300209, 100200028, 100200067, 100200232, 100300214) then goto T8_1; + else goto T8_2; + +T8_1: + response = -0.0222756779; + goto D8; + +T8_2: + response = -0.0032979771; + goto D8; + +N8_3: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100300121, 100200053, 100400142, 100300073, 100200192, 100300065, 100300212, 100300122, 100300127, 100300169, 100200087, 100300074, 100300006) then goto N8_4; + else goto N8_8; + +N8_4: + if attribute(catid) in (100300058, 100300077, 100200186, 100400141, 100200052, 100300008, 100300032, 100300073, 100200192, 100300212, 100300169, 100200087) then goto N8_5; + else goto N8_6; + +N8_5: + if attribute(catid) in (100300077, 100200186, 100400141, 100300032, 100200192, 100300212) then goto T8_3; + else goto T8_4; + +T8_3: + response = 0.0114479050; + goto D8; + +T8_4: + response = 0.0165377861; + goto D8; + +N8_6: + if attribute(catid) in (100200171, 100300165, 100300093, 100200172, 100300121) then goto T8_5; + else goto N8_7; + +T8_5: + response = 0.0209845722; + goto D8; + +N8_7: + if attribute(catid) in (0, 100200130) then goto T8_6; + else goto T8_7; + +T8_6: + response = 0.0242667474; + goto D8; + +T8_7: + response = 0.0268049425; + goto D8; + +N8_8: + if attribute(catid) in (100300011, 100300166, 100200234, 100300004, 100400037, 100400038, 100300066, 100200176) then goto T8_8; + else goto T8_9; + +T8_8: + response = 0.0447283469; + goto D8; + +T8_9: + response = 0.0603545392; + goto D8; + +D8: + +tnscore = tnscore + response; + + /* Tree 10 of 200 */ +N9_1: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200053, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100300076, 100200067, 100200055, 100200232, 100300214) then goto N9_2; + else goto N9_6; + +N9_2: + if attribute(catid) in (100300005, 100300008, 100200068, 100300032, 100300121, 100300212, 100400079, 100400080, 100300200, 100300076, 100200055, 100300214) then goto N9_3; + else goto N9_4; + +N9_3: + if attribute(catid) in (100200068, 100300076, 100200055, 100300214) then goto T9_1; + else goto T9_2; + +T9_1: + response = -0.0265329011; + goto D9; + +T9_2: + response = -0.0038518940; + goto D9; + +N9_4: + if attribute(catid) in (100200171, 100300014, 100400141, 100300027, 100200054, 100300073, 100200192, 100200087, 100300074, 100200067) then goto T9_3; + else goto N9_5; + +T9_3: + response = 0.0107802387; + goto D9; + +N9_5: + if attribute(catid) in (0, 100300058, 100300077, 100200186, 100300093, 100200053, 100200170, 100300169, 100200232) then goto T9_4; + else goto T9_5; + +T9_4: + response = 0.0212053257; + goto D9; + +T9_5: + response = 0.0251822224; + goto D9; + +N9_6: + if attribute(catid) in (100300011, 100200130, 100300013, 100300166, 100300143, 100300102, 100300116, 100200234, 100300004, 100400142, 100200193, 100300122, 100300127, 100300066, 100200176, 100200028, 100300006) then goto N9_7; + else goto T9_9; + +N9_7: + if attribute(catid) in (100200130, 100300116, 100200234, 100300006) then goto T9_6; + else goto N9_8; + +T9_6: + response = 0.0314457190; + goto D9; + +N9_8: + if attribute(catid) in (100300166, 100400142, 100300122, 100300127, 100300066, 100200028) then goto T9_7; + else goto T9_8; + +T9_7: + response = 0.0391757711; + goto D9; + +T9_8: + response = 0.0454843261; + goto D9; + +T9_9: + response = 0.0873814277; + goto D9; + +D9: + +tnscore = tnscore + response; + + /* Tree 11 of 200 */ +N10_1: + if attribute(catid) in (0, 100300014, 100300058, 100300013, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100300073, 100400038, 100200192, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200028, 100300076, 100300006, 100200232) then goto N10_2; + else goto N10_7; + +N10_2: + if attribute(catid) in (100300013, 100200186, 100300165, 100200052, 100300102, 100300005, 100200068, 100300116, 100200234, 100300073, 100400079, 100300169, 100400080, 100200087, 100300200, 100300076) then goto N10_3; + else goto N10_6; + +N10_3: + if attribute(catid) in (100300102, 100200068, 100200234, 100400080, 100200087, 100300200) then goto N10_4; + else goto N10_5; + +N10_4: + if attribute(catid) in (100200068) then goto T10_1; + else goto T10_2; + +T10_1: + response = -0.0151909005; + goto D10; + +T10_2: + response = -0.0021225032; + goto D10; + +N10_5: + if attribute(catid) in (100300005, 100300073, 100400079, 100300169, 100300076) then goto T10_3; + else goto T10_4; + +T10_3: + response = 0.0086835438; + goto D10; + +T10_4: + response = 0.0120329553; + goto D10; + +N10_6: + if attribute(catid) in (100300058, 100400141, 100300008, 100300032, 100300027, 100300121, 100400038, 100200192, 100300212, 100200170, 100200028, 100300006) then goto T10_5; + else goto T10_6; + +T10_5: + response = 0.0171461073; + goto D10; + +T10_6: + response = 0.0218015413; + goto D10; + +N10_7: + if attribute(catid) in (100200171, 100300011, 100200130, 100300077, 100300166, 100200034, 100200053, 100300019, 100400142, 100200054, 100400037, 100300065, 100300122, 100300127, 100200176) then goto N10_8; + else goto N10_9; + +N10_8: + if attribute(catid) in (100200171, 100300011, 100200034, 100200053, 100300019, 100200054, 100300065) then goto T10_7; + else goto T10_8; + +T10_7: + response = 0.0280408356; + goto D10; + +T10_8: + response = 0.0355357753; + goto D10; + +N10_9: + if attribute(catid) in (100300093, 100300004, 100300126, 100200185) then goto T10_9; + else goto T10_10; + +T10_9: + response = 0.0584272687; + goto D10; + +T10_10: + response = 0.0854108429; + goto D10; + +D10: + +tnscore = tnscore + response; + + /* Tree 12 of 200 */ +N11_1: + if attribute(catid) in (100300011, 100300143, 100200034, 100300093, 100300005, 100300008, 100200068, 100300019, 100300073, 100400079, 100200170, 100400080, 100200087, 100200176, 100300200, 100300076, 100200055, 100200185, 100300006) then goto N11_2; + else goto N11_3; + +N11_2: + if attribute(catid) in (100300008, 100200068, 100400080, 100200176, 100200055) then goto T11_1; + else goto T11_2; + +T11_1: + response = -0.0169257508; + goto D11; + +T11_2: + response = 0.0096089202; + goto D11; + +N11_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100300102, 100200172, 100300032, 100300027, 100300121, 100200053, 100300004, 100400142, 100200054, 100400037, 100200192, 100300065, 100300209, 100300122, 100300127, 100300169, 100300074) then goto N11_4; + else goto N11_7; + +N11_4: + if attribute(catid) in (0, 100300014, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100300032, 100300121, 100200053, 100400037, 100200192, 100300065, 100300169) then goto N11_5; + else goto T11_6; + +N11_5: + if attribute(catid) in (100300077, 100200186, 100200052, 100300032, 100400037, 100200192) then goto T11_3; + else goto N11_6; + +T11_3: + response = 0.0188605145; + goto D11; + +N11_6: + if attribute(catid) in (0, 100300014, 100200053) then goto T11_4; + else goto T11_5; + +T11_4: + response = 0.0230703185; + goto D11; + +T11_5: + response = 0.0248762385; + goto D11; + +T11_6: + response = 0.0312398602; + goto D11; + +N11_7: + if attribute(catid) in (100300116, 100400038, 100300212, 100300066, 100200232) then goto T11_7; + else goto T11_8; + +T11_7: + response = 0.0379114379; + goto D11; + +T11_8: + response = 0.0540119608; + goto D11; + +D11: + +tnscore = tnscore + response; + + /* Tree 13 of 200 */ +N12_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100200186, 100400141, 100300102, 100300005, 100200068, 100300032, 100300121, 100300073, 100200192, 100300209, 100400079, 100300169, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055, 100200232) then goto N12_2; + else goto N12_5; + +N12_2: + if attribute(catid) in (100300011, 100300014, 100300058, 100300005, 100200068, 100300209, 100400080, 100200087, 100200028, 100200067, 100200055, 100200232) then goto N12_3; + else goto N12_4; + +N12_3: + if attribute(catid) in (100300209, 100200087, 100200028, 100200067, 100200055, 100200232) then goto T12_1; + else goto T12_2; + +T12_1: + response = -0.0225817796; + goto D12; + +T12_2: + response = -0.0008082327; + goto D12; + +N12_4: + if attribute(catid) in (100300102, 100300121, 100300073, 100200192, 100400079) then goto T12_3; + else goto T12_4; + +T12_3: + response = 0.0088591799; + goto D12; + +T12_4: + response = 0.0138162711; + goto D12; + +N12_5: + if attribute(catid) in (0, 100200171, 100300013, 100200034, 100300165, 100200172, 100300027, 100300116, 100200234, 100300004, 100400142, 100200054, 100300065, 100300122, 100300127, 100200170, 100300006) then goto N12_6; + else goto N12_8; + +N12_6: + if attribute(catid) in (0, 100200034, 100200172, 100300027, 100300116, 100200054, 100300065, 100300127) then goto N12_7; + else goto T12_7; + +N12_7: + if attribute(catid) in (100200034, 100200172, 100300027, 100300116, 100200054, 100300065) then goto T12_5; + else goto T12_6; + +T12_5: + response = 0.0185182017; + goto D12; + +T12_6: + response = 0.0221653757; + goto D12; + +T12_7: + response = 0.0258671547; + goto D12; + +N12_8: + if attribute(catid) in (100300166, 100200052, 100300093, 100200053, 100400037, 100300076) then goto T12_8; + else goto N12_9; + +T12_8: + response = 0.0319314298; + goto D12; + +N12_9: + if attribute(catid) in (100200130, 100300143, 100300008, 100400038, 100300074, 100300066) then goto T12_9; + else goto T12_10; + +T12_9: + response = 0.0399544136; + goto D12; + +T12_10: + response = 0.0491124971; + goto D12; + +D12: + +tnscore = tnscore + response; + + /* Tree 14 of 200 */ +N13_1: + if attribute(catid) in (0, 100300011, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300073, 100400038, 100300065, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N13_2; + else goto N13_6; + +N13_2: + if attribute(catid) in (100300011, 100300005, 100300008, 100300032, 100400080, 100200087, 100300076, 100300214) then goto N13_3; + else goto N13_4; + +N13_3: + if attribute(catid) in (100300008, 100300032, 100200087, 100300076, 100300214) then goto T13_1; + else goto T13_2; + +T13_1: + response = -0.0316835796; + goto D13; + +T13_2: + response = -0.0075185917; + goto D13; + +N13_4: + if attribute(catid) in (100300077, 100200034, 100400141, 100300165, 100200068, 100300027, 100300121, 100300209, 100200176, 100300200, 100200067, 100300006) then goto T13_3; + else goto N13_5; + +T13_3: + response = 0.0093589722; + goto D13; + +N13_5: + if attribute(catid) in (0, 100200052, 100300073, 100400079, 100200232) then goto T13_4; + else goto T13_5; + +T13_4: + response = 0.0185920468; + goto D13; + +T13_5: + response = 0.0231228547; + goto D13; + +N13_6: + if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300166, 100300093, 100300102, 100200172, 100200234, 100200193, 100200192, 100300122, 100300127, 100300074, 100300066) then goto N13_7; + else goto N13_9; + +N13_7: + if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300102, 100200172, 100200234, 100200192, 100300127) then goto N13_8; + else goto T13_8; + +N13_8: + if attribute(catid) in (100200130, 100200234, 100200192) then goto T13_6; + else goto T13_7; + +T13_6: + response = 0.0282114001; + goto D13; + +T13_7: + response = 0.0319414987; + goto D13; + +T13_8: + response = 0.0377741997; + goto D13; + +N13_9: + if attribute(catid) in (100200053, 100300004, 100400142, 100200054, 100400037, 100200185) then goto T13_9; + else goto T13_10; + +T13_9: + response = 0.0450431326; + goto D13; + +T13_10: + response = 0.0654935018; + goto D13; + +D13: + +tnscore = tnscore + response; + + /* Tree 15 of 200 */ +N14_1: + if attribute(catid) in (100300011, 100300013, 100200034, 100200186, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300019, 100300126, 100300073, 100400037, 100200192, 100300065, 100300209, 100400079, 100200170, 100400080, 100200087, 100300074, 100300200, 100200067, 100300006) then goto N14_2; + else goto N14_4; + +N14_2: + if attribute(catid) in (100300005, 100300008, 100300032, 100300019, 100300209, 100200067) then goto T14_1; + else goto N14_3; + +T14_1: + response = -0.0352996105; + goto D14; + +N14_3: + if attribute(catid) in (100300011, 100200034, 100200186, 100200068, 100200192, 100200170, 100200087, 100300074) then goto T14_2; + else goto T14_3; + +T14_2: + response = 0.0029355359; + goto D14; + +T14_3: + response = 0.0110257031; + goto D14; + +N14_4: + if attribute(catid) in (0, 100200171, 100300014, 100300165, 100200052, 100200172, 100300121, 100200234, 100200053, 100400142, 100200054, 100300122, 100300127, 100300169, 100300066) then goto N14_5; + else goto N14_7; + +N14_5: + if attribute(catid) in (100300165, 100300121, 100200053, 100400142, 100300127, 100300169) then goto T14_4; + else goto N14_6; + +T14_4: + response = 0.0200666023; + goto D14; + +N14_6: + if attribute(catid) in (0, 100200171, 100200052, 100200172, 100200054) then goto T14_5; + else goto T14_6; + +T14_5: + response = 0.0258497457; + goto D14; + +T14_6: + response = 0.0331869782; + goto D14; + +N14_7: + if attribute(catid) in (100200130, 100300077, 100300166, 100400141, 100300093, 100300004, 100300007, 100200028) then goto T14_7; + else goto N14_8; + +T14_7: + response = 0.0426763778; + goto D14; + +N14_8: + if attribute(catid) in (100300143, 100400038, 100200176) then goto T14_8; + else goto T14_9; + +T14_8: + response = 0.0551482574; + goto D14; + +T14_9: + response = 0.0805987774; + goto D14; + +D14: + +tnscore = tnscore + response; + + /* Tree 16 of 200 */ +N15_1: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100200052, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100300073, 100400037, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200067, 100300146) then goto N15_2; + else goto N15_6; + +N15_2: + if attribute(catid) in (100300014, 100200034, 100300005, 100200068, 100300032, 100200192, 100300212, 100300209, 100200176, 100300200, 100200028, 100200067, 100300146) then goto N15_3; + else goto N15_4; + +N15_3: + if attribute(catid) in (100200068, 100300032, 100200176, 100200028, 100200067, 100300146) then goto T15_1; + else goto T15_2; + +T15_1: + response = -0.0188052149; + goto D15; + +T15_2: + response = -0.0014384095; + goto D15; + +N15_4: + if attribute(catid) in (100300058, 100300102, 100200053, 100300073, 100300065, 100400079, 100300169, 100400080, 100200087, 100300076) then goto T15_3; + else goto N15_5; + +T15_3: + response = 0.0138476724; + goto D15; + +N15_5: + if attribute(catid) in (0, 100200171, 100200186, 100200052, 100400038, 100200170) then goto T15_4; + else goto T15_5; + +T15_4: + response = 0.0196068633; + goto D15; + +T15_5: + response = 0.0248333768; + goto D15; + +N15_6: + if attribute(catid) in (100300011, 100200130, 100300143, 100400141, 100300165, 100300093, 100300027, 100200234, 100300019, 100300004, 100400142, 100200193, 100300074, 100300066, 100200232) then goto N15_7; + else goto T15_8; + +N15_7: + if attribute(catid) in (100200130, 100400141, 100300165, 100300027, 100300019, 100300004, 100300074, 100300066) then goto T15_6; + else goto T15_7; + +T15_6: + response = 0.0337546327; + goto D15; + +T15_7: + response = 0.0412000578; + goto D15; + +T15_8: + response = 0.0666143289; + goto D15; + +D15: + +tnscore = tnscore + response; + + /* Tree 17 of 200 */ +N16_1: + if attribute(catid) in (100200171, 100300011, 100300077, 100200034, 100200186, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300019, 100300073, 100400038, 100300209, 100400079, 100400080, 100300074, 100300200, 100200067, 100200055, 100300006) then goto N16_2; + else goto N16_5; + +N16_2: + if attribute(catid) in (100200034, 100200186, 100300005, 100300008, 100200068, 100300019, 100300200, 100200067, 100200055) then goto N16_3; + else goto N16_4; + +N16_3: + if attribute(catid) in (100300008, 100200068, 100200067, 100200055) then goto T16_1; + else goto T16_2; + +T16_1: + response = -0.0193944486; + goto D16; + +T16_2: + response = -0.0039850146; + goto D16; + +N16_4: + if attribute(catid) in (100300011, 100300102, 100300027, 100300116, 100300121, 100400038, 100300209, 100400080, 100300074) then goto T16_3; + else goto T16_4; + +T16_3: + response = 0.0038699264; + goto D16; + +T16_4: + response = 0.0111071757; + goto D16; + +N16_5: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300013, 100300166, 100300143, 100200052, 100200172, 100200054, 100200192, 100300065, 100300127, 100200170, 100300169, 100200087, 100200176, 100200028, 100300076, 100300146) then goto N16_6; + else goto N16_8; + +N16_6: + if attribute(catid) in (0, 100200130, 100300058, 100300143, 100200172, 100200054, 100200192, 100200170, 100300169, 100200087, 100200176, 100200028, 100300076) then goto N16_7; + else goto T16_7; + +N16_7: + if attribute(catid) in (100200130, 100300143, 100200192, 100300169, 100200176, 100200028) then goto T16_5; + else goto T16_6; + +T16_5: + response = 0.0184644801; + goto D16; + +T16_6: + response = 0.0213606360; + goto D16; + +T16_7: + response = 0.0266245188; + goto D16; + +N16_8: + if attribute(catid) in (100400141, 100300165, 100200053, 100300004, 100300126, 100300212, 100300122, 100300066) then goto N16_9; + else goto T16_10; + +N16_9: + if attribute(catid) in (100200053, 100300004, 100300126, 100300122) then goto T16_8; + else goto T16_9; + +T16_8: + response = 0.0334635662; + goto D16; + +T16_9: + response = 0.0386077462; + goto D16; + +T16_10: + response = 0.0470519595; + goto D16; + +D16: + +tnscore = tnscore + response; + + /* Tree 18 of 200 */ +N17_1: + if attribute(catid) in (0, 100300011, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200053, 100400142, 100300073, 100200192, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100200028, 100200055, 100200232, 100300146) then goto N17_2; + else goto N17_6; + +N17_2: + if attribute(catid) in (100300011, 100300014, 100200034, 100300008, 100200068, 100300121, 100200192, 100200170, 100300074, 100300200, 100200055, 100200232, 100300146) then goto N17_3; + else goto N17_4; + +N17_3: + if attribute(catid) in (100200034, 100300008, 100300121, 100300200, 100200055, 100200232, 100300146) then goto T17_1; + else goto T17_2; + +T17_1: + response = -0.0069202095; + goto D17; + +T17_2: + response = 0.0039000323; + goto D17; + +N17_4: + if attribute(catid) in (100300058, 100400141, 100300093, 100300102, 100300005, 100200172, 100400142, 100300073, 100400080, 100200087) then goto T17_3; + else goto N17_5; + +T17_3: + response = 0.0156946965; + goto D17; + +N17_5: + if attribute(catid) in (0) then goto T17_4; + else goto T17_5; + +T17_4: + response = 0.0175514273; + goto D17; + +T17_5: + response = 0.0195153127; + goto D17; + +N17_6: + if attribute(catid) in (100200171, 100200130, 100300166, 100300165, 100200052, 100300032, 100300116, 100200234, 100300004, 100300126, 100400038, 100300065, 100300209, 100300066, 100300007, 100200176, 100300076) then goto N17_7; + else goto N17_9; + +N17_7: + if attribute(catid) in (100300166, 100300165, 100300004, 100300126, 100300065, 100300209) then goto T17_6; + else goto N17_8; + +T17_6: + response = 0.0260422255; + goto D17; + +N17_8: + if attribute(catid) in (100200171, 100200130, 100300032, 100400038, 100300066, 100300076) then goto T17_7; + else goto T17_8; + +T17_7: + response = 0.0288416138; + goto D17; + +T17_8: + response = 0.0331073272; + goto D17; + +N17_9: + if attribute(catid) in (100200054, 100200193, 100300122, 100300127, 100300045, 100200067) then goto T17_9; + else goto T17_10; + +T17_9: + response = 0.0443969439; + goto D17; + +T17_10: + response = 0.0673805882; + goto D17; + +D17: + +tnscore = tnscore + response; + + /* Tree 19 of 200 */ +N18_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100400142, 100300073, 100200192, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300200, 100200028, 100300076, 100200055, 100200232) then goto N18_2; + else goto N18_7; + +N18_2: + if attribute(catid) in (100200034, 100200068, 100300209, 100200170, 100300200, 100200028, 100200055, 100200232) then goto N18_3; + else goto N18_4; + +N18_3: + if attribute(catid) in (100200034, 100300209, 100300200, 100200028, 100200055, 100200232) then goto T18_1; + else goto T18_2; + +T18_1: + response = -0.0248522225; + goto D18; + +T18_2: + response = -0.0018897827; + goto D18; + +N18_4: + if attribute(catid) in (100200171, 100300013, 100300077, 100200186, 100400141, 100300093, 100300027, 100300116, 100400142, 100400079, 100400080) then goto N18_5; + else goto N18_6; + +N18_5: + if attribute(catid) in (100200171, 100300077, 100200186, 100300116, 100400079) then goto T18_3; + else goto T18_4; + +T18_3: + response = 0.0098977390; + goto D18; + +T18_4: + response = 0.0135323202; + goto D18; + +N18_6: + if attribute(catid) in (100300058, 100300165, 100200052, 100200172, 100300169, 100200087, 100300076) then goto T18_5; + else goto T18_6; + +T18_5: + response = 0.0178483129; + goto D18; + +T18_6: + response = 0.0206390742; + goto D18; + +N18_7: + if attribute(catid) in (100200130, 100300166, 100300008, 100200234, 100300004, 100300126, 100400037, 100400038, 100300065, 100300122, 100300074, 100300066, 100300006, 100300146) then goto N18_8; + else goto N18_9; + +N18_8: + if attribute(catid) in (100200130, 100300166, 100200234, 100300065, 100300146) then goto T18_7; + else goto T18_8; + +T18_7: + response = 0.0310277032; + goto D18; + +T18_8: + response = 0.0370699377; + goto D18; + +N18_9: + if attribute(catid) in (100300121, 100200053, 100300212, 100300127, 100200176, 100200185) then goto T18_9; + else goto T18_10; + +T18_9: + response = 0.0485097295; + goto D18; + +T18_10: + response = 0.0645157682; + goto D18; + +D18: + +tnscore = tnscore + response; + + /* Tree 20 of 200 */ +N19_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200055, 100300006, 100200232, 100300214) then goto N19_2; + else goto N19_7; + +N19_2: + if attribute(catid) in (100200171, 100300011, 100300077, 100200034, 100200186, 100300005, 100200068, 100200053, 100300019, 100300004, 100300073, 100400038, 100200192, 100300209, 100400079, 100200087, 100300074, 100200055, 100300006, 100300214) then goto N19_3; + else goto N19_5; + +N19_3: + if attribute(catid) in (100300005, 100200068, 100300019, 100300209, 100200087, 100200055, 100300006, 100300214) then goto T19_1; + else goto N19_4; + +T19_1: + response = -0.0244019521; + goto D19; + +N19_4: + if attribute(catid) in (100200186, 100400038, 100400079, 100300074) then goto T19_2; + else goto T19_3; + +T19_2: + response = 0.0034399160; + goto D19; + +T19_3: + response = 0.0084132649; + goto D19; + +N19_5: + if attribute(catid) in (100200130, 100400141, 100200052, 100200172, 100300008, 100300027, 100300116, 100300065, 100300169, 100400080, 100300200) then goto N19_6; + else goto T19_6; + +N19_6: + if attribute(catid) in (100400141, 100200052, 100200172, 100300116, 100300065, 100400080, 100300200) then goto T19_4; + else goto T19_5; + +T19_4: + response = 0.0146253305; + goto D19; + +T19_5: + response = 0.0185737842; + goto D19; + +T19_6: + response = 0.0224432378; + goto D19; + +N19_7: + if attribute(catid) in (100300166, 100300165, 100300032, 100400142, 100300122, 100200170, 100300007, 100200028) then goto N19_8; + else goto N19_9; + +N19_8: + if attribute(catid) in (100300166, 100300032, 100400142, 100200170) then goto T19_7; + else goto T19_8; + +T19_7: + response = 0.0312540362; + goto D19; + +T19_8: + response = 0.0367389808; + goto D19; + +N19_9: + if attribute(catid) in (100300014, 100200234, 100400037, 100200193, 100200176, 100200067, 100200185) then goto T19_9; + else goto T19_10; + +T19_9: + response = 0.0515240946; + goto D19; + +T19_10: + response = 0.0623565161; + goto D19; + +D19: + +tnscore = tnscore + response; + + /* Tree 21 of 200 */ +N20_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200234, 100300019, 100300004, 100200054, 100300073, 100400037, 100400038, 100300212, 100300209, 100400079, 100300169, 100400080, 100200087, 100300066, 100300200, 100200028, 100300076, 100200067, 100300006, 100300214) then goto N20_2; + else goto N20_7; + +N20_2: + if attribute(catid) in (100300013, 100300093, 100300008, 100200068, 100300019, 100300073, 100300212, 100300209, 100400080, 100200087, 100300200, 100200028, 100300076, 100200067, 100300214) then goto N20_3; + else goto N20_4; + +N20_3: + if attribute(catid) in (100300008, 100200068, 100300019, 100300212, 100400080, 100200067, 100300214) then goto T20_1; + else goto T20_2; + +T20_1: + response = -0.0143906523; + goto D20; + +T20_2: + response = 0.0034452824; + goto D20; + +N20_4: + if attribute(catid) in (100300058, 100200186, 100400141, 100300165, 100200052, 100300005, 100300032, 100300027, 100200234, 100200054, 100400038, 100400079, 100300169, 100300006) then goto N20_5; + else goto N20_6; + +N20_5: + if attribute(catid) in (100300058, 100300005, 100300027, 100200234, 100200054, 100300169, 100300006) then goto T20_3; + else goto T20_4; + +T20_3: + response = 0.0099743393; + goto D20; + +T20_4: + response = 0.0144610757; + goto D20; + +N20_6: + if attribute(catid) in (0, 100300011, 100300014, 100300102, 100200172, 100300004, 100400037) then goto T20_5; + else goto T20_6; + +T20_5: + response = 0.0194162110; + goto D20; + +T20_6: + response = 0.0220846421; + goto D20; + +N20_7: + if attribute(catid) in (100200130, 100300166, 100200034, 100200053, 100200192, 100300065, 100200170, 100200176) then goto T20_7; + else goto N20_8; + +T20_7: + response = 0.0303840891; + goto D20; + +N20_8: + if attribute(catid) in (100300116, 100400142, 100300122, 100300127, 100300074, 100300045) then goto T20_8; + else goto T20_9; + +T20_8: + response = 0.0417668157; + goto D20; + +T20_9: + response = 0.0552431545; + goto D20; + +D20: + +tnscore = tnscore + response; + + /* Tree 22 of 200 */ +N21_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300126, 100200054, 100300073, 100400038, 100200192, 100300209, 100400079, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N21_2; + else goto N21_7; + +N21_2: + if attribute(catid) in (100300011, 100300058, 100200186, 100300165, 100300005, 100200068, 100300032, 100300116, 100300121, 100200234, 100300126, 100200054, 100400038, 100200192, 100300209, 100400079, 100200176, 100200028, 100200055, 100200232) then goto N21_3; + else goto N21_5; + +N21_3: + if attribute(catid) in (100300058, 100300005, 100200068, 100300032, 100300209, 100200028, 100200055, 100200232) then goto N21_4; + else goto T21_3; + +N21_4: + if attribute(catid) in (100300058, 100300005, 100300032, 100300209, 100200055) then goto T21_1; + else goto T21_2; + +T21_1: + response = -0.0199572721; + goto D21; + +T21_2: + response = -0.0000134782; + goto D21; + +T21_3: + response = 0.0095039400; + goto D21; + +N21_5: + if attribute(catid) in (100200171, 100300077, 100400141, 100300073, 100400080, 100200087, 100300200) then goto T21_4; + else goto N21_6; + +T21_4: + response = 0.0170204672; + goto D21; + +N21_6: + if attribute(catid) in (0, 100200130, 100200053, 100300006) then goto T21_5; + else goto T21_6; + +T21_5: + response = 0.0199906818; + goto D21; + +T21_6: + response = 0.0230038494; + goto D21; + +N21_7: + if attribute(catid) in (100300014, 100300013, 100300166, 100200034, 100200052, 100300093, 100300008, 100400142, 100200193, 100300065, 100300122, 100300127, 100200170, 100300074, 100300066, 100300045) then goto N21_8; + else goto T21_9; + +N21_8: + if attribute(catid) in (100300013, 100300166, 100200034, 100300093, 100300008, 100400142, 100300127) then goto T21_7; + else goto T21_8; + +T21_7: + response = 0.0272410205; + goto D21; + +T21_8: + response = 0.0353850420; + goto D21; + +T21_9: + response = 0.0546059415; + goto D21; + +D21: + +tnscore = tnscore + response; + + /* Tree 23 of 200 */ +N22_1: + if attribute(catid) in (100300014, 100300013, 100200186, 100300165, 100300093, 100300102, 100300005, 100300008, 100200068, 100300027, 100300116, 100200234, 100300073, 100400038, 100200192, 100300212, 100400079, 100300169, 100400080, 100300074, 100300200, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N22_2; + else goto N22_5; + +N22_2: + if attribute(catid) in (100300008, 100200068, 100300027, 100300212, 100400080, 100300074, 100300200, 100200067, 100200232, 100300214) then goto N22_3; + else goto N22_4; + +N22_3: + if attribute(catid) in (100300008, 100200068, 100300212, 100300200, 100200067, 100200232, 100300214) then goto T22_1; + else goto T22_2; + +T22_1: + response = -0.0257347618; + goto D22; + +T22_2: + response = -0.0087401374; + goto D22; + +N22_4: + if attribute(catid) in (100300014, 100300165, 100300102, 100300073, 100200192, 100400079, 100300076) then goto T22_3; + else goto T22_4; + +T22_3: + response = 0.0079479453; + goto D22; + +T22_4: + response = 0.0122270306; + goto D22; + +N22_5: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100200052, 100300121, 100300004, 100300126, 100400142, 100200054, 100400037, 100300065, 100300122, 100300127, 100200170, 100200176) then goto N22_6; + else goto N22_8; + +N22_6: + if attribute(catid) in (0, 100200130, 100200052, 100300121, 100300004, 100300126, 100200054, 100300065, 100300122, 100200176) then goto N22_7; + else goto T22_7; + +N22_7: + if attribute(catid) in (100200130, 100200052, 100300121, 100300004, 100300065, 100300122) then goto T22_5; + else goto T22_6; + +T22_5: + response = 0.0190487090; + goto D22; + +T22_6: + response = 0.0215394009; + goto D22; + +T22_7: + response = 0.0276338957; + goto D22; + +N22_8: + if attribute(catid) in (100200034, 100400141, 100200172, 100300032, 100200053, 100300209, 100300066, 100200185) then goto T22_8; + else goto T22_9; + +T22_8: + response = 0.0397536732; + goto D22; + +T22_9: + response = 0.0689753704; + goto D22; + +D22: + +tnscore = tnscore + response; + + /* Tree 24 of 200 */ +N23_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300121, 100200234, 100300019, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100200067, 100200055, 100300006, 100300214) then goto N23_2; + else goto N23_6; + +N23_2: + if attribute(catid) in (100200171, 100300011, 100300077, 100200186, 100200052, 100300093, 100300005, 100200068, 100300027, 100300121, 100200234, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300200, 100200067, 100200055, 100300214) then goto N23_3; + else goto N23_5; + +N23_3: + if attribute(catid) in (100200186, 100300005, 100200068, 100300212, 100200170, 100200087, 100300200, 100200055, 100300214) then goto T23_1; + else goto N23_4; + +T23_1: + response = -0.0052715451; + goto D23; + +N23_4: + if attribute(catid) in (100200171, 100300011, 100300093, 100300027, 100200234, 100300127, 100400079, 100200067) then goto T23_2; + else goto T23_3; + +T23_2: + response = 0.0075908988; + goto D23; + +T23_3: + response = 0.0114788963; + goto D23; + +N23_5: + if attribute(catid) in (100300019, 100300073, 100200193, 100300169, 100300074, 100300066, 100300006) then goto T23_4; + else goto T23_5; + +T23_4: + response = 0.0146049077; + goto D23; + +T23_5: + response = 0.0198627318; + goto D23; + +N23_6: + if attribute(catid) in (100300165, 100300008, 100300032, 100300116, 100200053, 100400142, 100400037, 100300122) then goto T23_6; + else goto N23_7; + +T23_6: + response = 0.0286281196; + goto D23; + +N23_7: + if attribute(catid) in (100200130, 100300013, 100300166, 100300004, 100300126, 100200054, 100300007, 100200028) then goto T23_7; + else goto T23_8; + +T23_7: + response = 0.0377626212; + goto D23; + +T23_8: + response = 0.0646214069; + goto D23; + +D23: + +tnscore = tnscore + response; + + /* Tree 25 of 200 */ +N24_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300121, 100200234, 100300019, 100400142, 100300073, 100200193, 100200192, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100200028, 100300006, 100200232) then goto N24_2; + else goto N24_6; + +N24_2: + if attribute(catid) in (100300014, 100300058, 100200186, 100400141, 100300093, 100300005, 100300032, 100200234, 100300019, 100300073, 100200192, 100300169, 100200087, 100300074, 100300200, 100200232) then goto N24_3; + else goto N24_4; + +N24_3: + if attribute(catid) in (100300014, 100300058, 100200186, 100300032, 100300019, 100300074, 100300200, 100200232) then goto T24_1; + else goto T24_2; + +T24_1: + response = -0.0076214570; + goto D24; + +T24_2: + response = 0.0077673481; + goto D24; + +N24_4: + if attribute(catid) in (0, 100200171, 100300011, 100300077, 100300165, 100200068, 100300121, 100200193, 100300209, 100300122, 100400079, 100400080, 100300066, 100200028, 100300006) then goto N24_5; + else goto T24_5; + +N24_5: + if attribute(catid) in (100200171, 100300011, 100300165, 100400079, 100300006) then goto T24_3; + else goto T24_4; + +T24_3: + response = 0.0143096613; + goto D24; + +T24_4: + response = 0.0162758268; + goto D24; + +T24_5: + response = 0.0237528500; + goto D24; + +N24_6: + if attribute(catid) in (100200130, 100300166, 100200034, 100300116, 100200053, 100300004, 100200054, 100400038, 100300065, 100300212, 100300007, 100200176, 100300045, 100300076) then goto N24_7; + else goto N24_8; + +N24_7: + if attribute(catid) in (100200130, 100300166, 100200034, 100300116, 100200053) then goto T24_6; + else goto T24_7; + +T24_6: + response = 0.0297411208; + goto D24; + +T24_7: + response = 0.0387614885; + goto D24; + +N24_8: + if attribute(catid) in (100300143, 100300126, 100400037, 100200185) then goto T24_8; + else goto T24_9; + +T24_8: + response = 0.0600165302; + goto D24; + +T24_9: + response = 0.0961472130; + goto D24; + +D24: + +tnscore = tnscore + response; + + /* Tree 26 of 200 */ +N25_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100300004, 100300073, 100400038, 100200192, 100300212, 100300209, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100200028, 100200067, 100200055, 100200232, 100300214, 100300146) then goto N25_2; + else goto N25_5; + +N25_2: + if attribute(catid) in (100200034, 100300005, 100300008, 100200068, 100300116, 100300212, 100300209, 100400079, 100400080, 100300200, 100200067, 100200055, 100200232, 100300214, 100300146) then goto N25_3; + else goto N25_4; + +N25_3: + if attribute(catid) in (100200034, 100300008, 100300212, 100300209, 100200055, 100200232, 100300214, 100300146) then goto T25_1; + else goto T25_2; + +T25_1: + response = -0.0143064261; + goto D25; + +T25_2: + response = 0.0061747257; + goto D25; + +N25_4: + if attribute(catid) in (100200171, 100300058, 100200186, 100400141, 100200172, 100300032, 100300027, 100300121, 100300004, 100400038, 100200170, 100200087, 100300074, 100200028) then goto T25_3; + else goto T25_4; + +T25_3: + response = 0.0140796593; + goto D25; + +T25_4: + response = 0.0185037483; + goto D25; + +N25_5: + if attribute(catid) in (100300011, 100300014, 100300013, 100300166, 100300143, 100300165, 100300093, 100300102, 100400142, 100400037, 100300065, 100300122, 100300127, 100300169, 100300006) then goto N25_6; + else goto T25_7; + +N25_6: + if attribute(catid) in (100300011, 100300014, 100300013, 100300166, 100300165, 100300102, 100400142, 100400037, 100300169) then goto T25_5; + else goto T25_6; + +T25_5: + response = 0.0279089674; + goto D25; + +T25_6: + response = 0.0344726516; + goto D25; + +T25_7: + response = 0.0515666225; + goto D25; + +D25: + +tnscore = tnscore + response; + + /* Tree 27 of 200 */ +N26_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300121, 100200053, 100400142, 100200054, 100300073, 100400037, 100200192, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100200176, 100300200, 100200028, 100300076, 100200185) then goto N26_2; + else goto N26_6; + +N26_2: + if attribute(catid) in (100300011, 100300014, 100200068, 100300032, 100200053, 100200192, 100300209, 100300122, 100200170, 100400080, 100200176, 100200028, 100300076, 100200185) then goto N26_3; + else goto N26_4; + +N26_3: + if attribute(catid) in (100300014, 100200068, 100300032, 100300209, 100400080, 100200176, 100200028, 100300076, 100200185) then goto T26_1; + else goto T26_2; + +T26_1: + response = -0.0100026799; + goto D26; + +T26_2: + response = 0.0069768979; + goto D26; + +N26_4: + if attribute(catid) in (100300077, 100300143, 100400141, 100200052, 100300005, 100300008, 100300121, 100400079, 100300169, 100200087) then goto T26_3; + else goto N26_5; + +T26_3: + response = 0.0149447853; + goto D26; + +N26_5: + if attribute(catid) in (0, 100300166, 100200034, 100200186, 100200172, 100400037, 100300066, 100300200) then goto T26_4; + else goto T26_5; + +T26_4: + response = 0.0207339117; + goto D26; + +T26_5: + response = 0.0250275322; + goto D26; + +N26_6: + if attribute(catid) in (100200130, 100300058, 100300165, 100300102, 100300116, 100200234, 100300004, 100300126, 100200193, 100400038, 100300065, 100300127, 100300074, 100300045, 100300006) then goto N26_7; + else goto T26_8; + +N26_7: + if attribute(catid) in (100300058, 100300102, 100300116, 100200234, 100300126, 100200193, 100400038, 100300127, 100300006) then goto T26_6; + else goto T26_7; + +T26_6: + response = 0.0305216411; + goto D26; + +T26_7: + response = 0.0392374586; + goto D26; + +T26_8: + response = 0.0735120500; + goto D26; + +D26: + +tnscore = tnscore + response; + + /* Tree 28 of 200 */ +N27_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300166, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100300066, 100200176, 100300076, 100200067, 100200055, 100300006, 100300214, 100300146) then goto N27_2; + else goto N27_8; + +N27_2: + if attribute(catid) in (100200171, 100300011, 100300014, 100400141, 100300102, 100300005, 100300008, 100300121, 100300019, 100200193, 100200192, 100300209, 100200170, 100400080, 100200067, 100200055, 100300214) then goto N27_3; + else goto N27_5; + +N27_3: + if attribute(catid) in (100300102, 100300008, 100300019, 100200193, 100300209, 100200067, 100200055, 100300214) then goto T27_1; + else goto N27_4; + +T27_1: + response = -0.0452597976; + goto D27; + +N27_4: + if attribute(catid) in (100400141, 100300005, 100200192, 100400080) then goto T27_2; + else goto T27_3; + +T27_2: + response = 0.0025798730; + goto D27; + +T27_3: + response = 0.0085058714; + goto D27; + +N27_5: + if attribute(catid) in (100200130, 100200186, 100300165, 100200172, 100200068, 100300027, 100200234, 100400142, 100300066, 100200176, 100300076, 100300146) then goto N27_6; + else goto N27_7; + +N27_6: + if attribute(catid) in (100200130, 100200186, 100200068, 100300066, 100200176, 100300076, 100300146) then goto T27_4; + else goto T27_5; + +T27_4: + response = 0.0134972332; + goto D27; + +T27_5: + response = 0.0161598104; + goto D27; + +N27_7: + if attribute(catid) in (0, 100200053, 100300073) then goto T27_6; + else goto T27_7; + +T27_6: + response = 0.0201733337; + goto D27; + +T27_7: + response = 0.0242718101; + goto D27; + +N27_8: + if attribute(catid) in (100300013, 100300077, 100300143, 100200034, 100300093, 100300116, 100300127, 100200087, 100300074, 100300007, 100300200, 100300045) then goto T27_8; + else goto T27_9; + +T27_8: + response = 0.0376364024; + goto D27; + +T27_9: + response = 0.0722294524; + goto D27; + +D27: + +tnscore = tnscore + response; + + /* Tree 29 of 200 */ +N28_1: + if attribute(catid) in (100300014, 100400141, 100300102, 100300008, 100200068, 100300032, 100200234, 100300019, 100400038, 100300212, 100400080, 100200176, 100300200, 100200028, 100200055, 100200185, 100300006, 100300214) then goto N28_2; + else goto N28_4; + +N28_2: + if attribute(catid) in (100300014, 100300102, 100300008, 100200068, 100300032, 100300019, 100300212, 100300200, 100200055, 100300214) then goto N28_3; + else goto T28_3; + +N28_3: + if attribute(catid) in (100300102, 100300008, 100300032, 100300019, 100300212, 100300200, 100200055, 100300214) then goto T28_1; + else goto T28_2; + +T28_1: + response = -0.0330162432; + goto D28; + +T28_2: + response = -0.0099054066; + goto D28; + +T28_3: + response = 0.0037184723; + goto D28; + +N28_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100300143, 100200034, 100200186, 100300165, 100200052, 100300093, 100300005, 100200172, 100300027, 100200053, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100300074, 100300076) then goto N28_5; + else goto N28_8; + +N28_5: + if attribute(catid) in (100200171, 100300166, 100200034, 100200186, 100300165, 100300093, 100300005, 100300027, 100200053, 100400079, 100300169, 100300074) then goto N28_6; + else goto N28_7; + +N28_6: + if attribute(catid) in (100200171, 100300166, 100200186, 100300005, 100300027, 100200053, 100300169) then goto T28_4; + else goto T28_5; + +T28_4: + response = 0.0128773968; + goto D28; + +T28_5: + response = 0.0170605503; + goto D28; + +N28_7: + if attribute(catid) in (0, 100300143, 100200192, 100300065, 100200170, 100300076) then goto T28_6; + else goto T28_7; + +T28_6: + response = 0.0199592353; + goto D28; + +T28_7: + response = 0.0237865531; + goto D28; + +N28_8: + if attribute(catid) in (100300077, 100300121, 100400142, 100300073, 100200193, 100200087, 100300066, 100300007) then goto T28_8; + else goto T28_9; + +T28_8: + response = 0.0288156047; + goto D28; + +T28_9: + response = 0.0451598089; + goto D28; + +D28: + +tnscore = tnscore + response; + + /* Tree 30 of 200 */ +N29_1: + if attribute(catid) in (100200171, 100300011, 100300013, 100200034, 100200186, 100200052, 100300102, 100300008, 100200068, 100300027, 100300121, 100200053, 100300019, 100300004, 100300073, 100200193, 100400038, 100200192, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300007, 100300214, 100300146) then goto N29_2; + else goto N29_5; + +N29_2: + if attribute(catid) in (100300013, 100200034, 100200068, 100300121, 100300019, 100200170, 100200087, 100300214, 100300146) then goto N29_3; + else goto N29_4; + +N29_3: + if attribute(catid) in (100300013, 100200034, 100300121, 100300019, 100300214, 100300146) then goto T29_1; + else goto T29_2; + +T29_1: + response = -0.0200374966; + goto D29; + +T29_2: + response = -0.0056497245; + goto D29; + +N29_4: + if attribute(catid) in (100300011, 100200186, 100300102, 100300008, 100300004, 100200192, 100300212, 100400079, 100400080, 100300074) then goto T29_3; + else goto T29_4; + +T29_3: + response = 0.0036157343; + goto D29; + +T29_4: + response = 0.0117177746; + goto D29; + +N29_5: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300166, 100300143, 100400141, 100300165, 100300093, 100300005, 100200172, 100300032, 100300116, 100300126, 100400142, 100300065, 100300122, 100300169, 100300066, 100300200, 100200028, 100300006) then goto N29_6; + else goto N29_8; + +N29_6: + if attribute(catid) in (0, 100200130, 100300166, 100400141, 100300165, 100200172, 100300032, 100300066, 100300006) then goto N29_7; + else goto T29_7; + +N29_7: + if attribute(catid) in (100200130, 100400141, 100300165, 100200172, 100300032, 100300006) then goto T29_5; + else goto T29_6; + +T29_5: + response = 0.0173296173; + goto D29; + +T29_6: + response = 0.0209361475; + goto D29; + +T29_7: + response = 0.0268947656; + goto D29; + +N29_8: + if attribute(catid) in (100300077, 100400037, 100300209, 100200176, 100300045, 100300076, 100200185) then goto T29_8; + else goto T29_9; + +T29_8: + response = 0.0431779718; + goto D29; + +T29_9: + response = 0.0596202146; + goto D29; + +D29: + +tnscore = tnscore + response; + + /* Tree 31 of 200 */ +N30_1: + if attribute(catid) in (100300011, 100300014, 100200034, 100300102, 100300005, 100200068, 100300032, 100300027, 100300116, 100200234, 100300073, 100400038, 100200192, 100400079, 100200170, 100400080, 100300200, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N30_2; + else goto N30_4; + +N30_2: + if attribute(catid) in (100300011, 100300102, 100200068, 100300032, 100300027, 100400038, 100300200, 100200067, 100200055) then goto T30_1; + else goto N30_3; + +T30_1: + response = -0.0105115826; + goto D30; + +N30_3: + if attribute(catid) in (100300014, 100200234, 100200028, 100300006, 100200232) then goto T30_2; + else goto T30_3; + +T30_2: + response = 0.0007402621; + goto D30; + +T30_3: + response = 0.0078629039; + goto D30; + +N30_4: + if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300143, 100200186, 100400141, 100300165, 100200052, 100200172, 100300121, 100200053, 100300019, 100400142, 100300122, 100300127, 100300169, 100200087, 100200176) then goto N30_5; + else goto N30_7; + +N30_5: + if attribute(catid) in (100200171, 100200130, 100200186, 100200052, 100200172, 100200053, 100300122, 100200176) then goto T30_4; + else goto N30_6; + +T30_4: + response = 0.0148540934; + goto D30; + +N30_6: + if attribute(catid) in (0, 100300077, 100300143, 100400141, 100300019, 100400142, 100300127) then goto T30_5; + else goto T30_6; + +T30_5: + response = 0.0193734454; + goto D30; + +T30_6: + response = 0.0217320370; + goto D30; + +N30_7: + if attribute(catid) in (100300166, 100300126, 100200054, 100200193, 100300065, 100300212, 100300066) then goto T30_7; + else goto T30_8; + +T30_7: + response = 0.0305394508; + goto D30; + +T30_8: + response = 0.0503395698; + goto D30; + +D30: + +tnscore = tnscore + response; + + /* Tree 32 of 200 */ +N31_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100300126, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N31_2; + else goto N31_7; + +N31_2: + if attribute(catid) in (100300014, 100300013, 100300077, 100400141, 100300165, 100300005, 100300008, 100300116, 100200053, 100300126, 100400038, 100300212, 100300209, 100400079, 100200170, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232) then goto N31_3; + else goto N31_5; + +N31_3: + if attribute(catid) in (100300005, 100300126, 100300209, 100200087, 100300200, 100200055) then goto T31_1; + else goto N31_4; + +T31_1: + response = -0.0155099848; + goto D31; + +N31_4: + if attribute(catid) in (100300014, 100300013, 100400141, 100400038, 100300212, 100400079, 100200170, 100200028, 100300076, 100200232) then goto T31_2; + else goto T31_3; + +T31_2: + response = 0.0052367005; + goto D31; + +T31_3: + response = 0.0111703118; + goto D31; + +N31_5: + if attribute(catid) in (0, 100200034, 100300102, 100200068, 100300027, 100300121, 100300073, 100200192, 100300169, 100400080, 100300074) then goto N31_6; + else goto T31_6; + +N31_6: + if attribute(catid) in (100200034, 100200068, 100300027, 100300121, 100300073, 100200192, 100300169, 100400080, 100300074) then goto T31_4; + else goto T31_5; + +T31_4: + response = 0.0150887568; + goto D31; + +T31_5: + response = 0.0184571681; + goto D31; + +T31_6: + response = 0.0221098064; + goto D31; + +N31_7: + if attribute(catid) in (100300011, 100300166, 100300093, 100300004, 100400142, 100300127, 100300066, 100300045) then goto T31_7; + else goto N31_8; + +T31_7: + response = 0.0294845237; + goto D31; + +N31_8: + if attribute(catid) in (100300058, 100300143, 100200193, 100300007) then goto T31_8; + else goto T31_9; + +T31_8: + response = 0.0412156817; + goto D31; + +T31_9: + response = 0.0581097149; + goto D31; + +D31: + +tnscore = tnscore + response; + + /* Tree 33 of 200 */ +N32_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300126, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100300007, 100200176, 100300200, 100300076, 100200067, 100200055, 100200185, 100300214) then goto N32_2; + else goto N32_7; + +N32_2: + if attribute(catid) in (100300011, 100300014, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100300019, 100400037, 100300209, 100200176, 100200055, 100200185, 100300214) then goto N32_3; + else goto N32_4; + +N32_3: + if attribute(catid) in (100300008, 100200068, 100300032, 100200055, 100200185, 100300214) then goto T32_1; + else goto T32_2; + +T32_1: + response = -0.0168814696; + goto D32; + +T32_2: + response = -0.0007128433; + goto D32; + +N32_4: + if attribute(catid) in (100200171, 100300058, 100300077, 100200034, 100400141, 100300102, 100300005, 100300126, 100200054, 100200193, 100400038, 100200192, 100300212, 100200170, 100300169, 100400080, 100200087, 100300076) then goto N32_5; + else goto N32_6; + +N32_5: + if attribute(catid) in (100300058, 100200034, 100200193, 100400038, 100200170, 100300169, 100400080, 100300076) then goto T32_3; + else goto T32_4; + +T32_3: + response = 0.0104471779; + goto D32; + +T32_4: + response = 0.0151303026; + goto D32; + +N32_6: + if attribute(catid) in (0, 100200186, 100300093, 100300121, 100200053, 100300073, 100300200) then goto T32_5; + else goto T32_6; + +T32_5: + response = 0.0192366025; + goto D32; + +T32_6: + response = 0.0235706074; + goto D32; + +N32_7: + if attribute(catid) in (100300143, 100300165, 100200172, 100300004, 100400142, 100300122, 100300074, 100200232) then goto N32_8; + else goto T32_9; + +N32_8: + if attribute(catid) in (100200172, 100400142, 100300122, 100200232) then goto T32_7; + else goto T32_8; + +T32_7: + response = 0.0313391652; + goto D32; + +T32_8: + response = 0.0410697301; + goto D32; + +T32_9: + response = 0.0691824633; + goto D32; + +D32: + +tnscore = tnscore + response; + + /* Tree 34 of 200 */ +N33_1: + if attribute(catid) in (100300013, 100200186, 100400141, 100300005, 100200068, 100300032, 100300116, 100200234, 100300004, 100200192, 100300212, 100300209, 100200170, 100400080, 100300074, 100300200, 100200028, 100200055, 100300146) then goto N33_2; + else goto N33_4; + +N33_2: + if attribute(catid) in (100300032, 100300212, 100300209, 100200028, 100200055, 100300146) then goto T33_1; + else goto N33_3; + +T33_1: + response = -0.0313637219; + goto D33; + +N33_3: + if attribute(catid) in (100300013, 100200186, 100400141, 100200068, 100200234, 100300004, 100400080, 100300200) then goto T33_2; + else goto T33_3; + +T33_2: + response = 0.0050115322; + goto D33; + +T33_3: + response = 0.0108163539; + goto D33; + +N33_4: + if attribute(catid) in (0, 100200171, 100200130, 100300077, 100200034, 100300165, 100300102, 100200172, 100300008, 100300027, 100300121, 100200053, 100300126, 100400142, 100300073, 100400038, 100300065, 100300127, 100400079, 100300169, 100200087, 100300066, 100300007, 100300076, 100300006) then goto N33_5; + else goto N33_7; + +N33_5: + if attribute(catid) in (0, 100300077, 100200034, 100300165, 100200172, 100300008, 100300121, 100200053, 100300126, 100400142, 100300073, 100400038, 100300065, 100400079, 100200087) then goto N33_6; + else goto T33_6; + +N33_6: + if attribute(catid) in (100300077, 100300165, 100200172, 100300008, 100300121, 100300073, 100400038, 100200087) then goto T33_4; + else goto T33_5; + +T33_4: + response = 0.0169532751; + goto D33; + +T33_5: + response = 0.0197771960; + goto D33; + +T33_6: + response = 0.0229019262; + goto D33; + +N33_7: + if attribute(catid) in (100300014, 100300058, 100300166, 100300143, 100200052, 100300093, 100200054, 100300122) then goto T33_7; + else goto T33_8; + +T33_7: + response = 0.0275740075; + goto D33; + +T33_8: + response = 0.0402576409; + goto D33; + +D33: + +tnscore = tnscore + response; + + /* Tree 35 of 200 */ +N34_1: + if attribute(catid) in (0, 100200171, 100300011, 100300058, 100300013, 100300077, 100200186, 100300165, 100200052, 100300102, 100300005, 100200068, 100300032, 100300027, 100300116, 100200234, 100300019, 100300126, 100400142, 100300073, 100200192, 100400079, 100300169, 100400080, 100300074, 100300200, 100300045, 100200028, 100300076, 100200185, 100200232, 100300214) then goto N34_2; + else goto N34_5; + +N34_2: + if attribute(catid) in (100300011, 100300013, 100200052, 100300102, 100200068, 100300032, 100300116, 100300019, 100300126, 100300076, 100200185, 100200232, 100300214) then goto N34_3; + else goto N34_4; + +N34_3: + if attribute(catid) in (100300013, 100200068, 100300032, 100300019, 100300076, 100200185, 100200232, 100300214) then goto T34_1; + else goto T34_2; + +T34_1: + response = -0.0132508399; + goto D34; + +T34_2: + response = 0.0002145632; + goto D34; + +N34_4: + if attribute(catid) in (0, 100300058, 100300077, 100200186, 100300165, 100300005, 100300027, 100300073, 100200192, 100300169, 100400080) then goto T34_3; + else goto T34_4; + +T34_3: + response = 0.0144506818; + goto D34; + +T34_4: + response = 0.0181232118; + goto D34; + +N34_5: + if attribute(catid) in (100200130, 100300014, 100300166, 100300143, 100200034, 100400141, 100300093, 100200172, 100300008, 100300121, 100200053, 100300004, 100300065, 100300212, 100300209, 100300122, 100300127, 100200170, 100200087, 100300006) then goto N34_6; + else goto T34_7; + +N34_6: + if attribute(catid) in (100200130, 100200034, 100400141, 100300093, 100300121, 100300004, 100300065, 100300212, 100300127, 100200170, 100200087, 100300006) then goto T34_5; + else goto T34_6; + +T34_5: + response = 0.0237514530; + goto D34; + +T34_6: + response = 0.0281193568; + goto D34; + +T34_7: + response = 0.0394520537; + goto D34; + +D34: + +tnscore = tnscore + response; + + /* Tree 36 of 200 */ +N35_1: + if attribute(catid) in (100300011, 100300013, 100300077, 100200034, 100200186, 100300102, 100300005, 100200068, 100300116, 100300121, 100400079, 100200170, 100400080, 100300074, 100300200, 100300076, 100200055, 100300214, 100300146) then goto N35_2; + else goto N35_3; + +N35_2: + if attribute(catid) in (100300011, 100300013, 100200034, 100200186, 100300005, 100200170, 100300076, 100200055, 100300214) then goto T35_1; + else goto T35_2; + +T35_1: + response = -0.0113273120; + goto D35; + +T35_2: + response = 0.0081802635; + goto D35; + +N35_3: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100400141, 100300165, 100200172, 100300008, 100300027, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300065, 100300122, 100300169, 100300066, 100300045, 100200028, 100200067, 100300006) then goto N35_4; + else goto N35_6; + +N35_4: + if attribute(catid) in (100200130, 100300058, 100300165, 100200172, 100300027, 100200234, 100300073) then goto T35_3; + else goto N35_5; + +T35_3: + response = 0.0154377299; + goto D35; + +N35_5: + if attribute(catid) in (0, 100400141, 100300008, 100400037, 100400038, 100200192, 100300169, 100300045, 100200067) then goto T35_4; + else goto T35_5; + +T35_4: + response = 0.0188466465; + goto D35; + +T35_5: + response = 0.0219373268; + goto D35; + +N35_6: + if attribute(catid) in (100300014, 100200052, 100300093, 100300032, 100300004, 100300127, 100200087, 100200176, 100200185) then goto T35_6; + else goto T35_7; + +T35_6: + response = 0.0332492867; + goto D35; + +T35_7: + response = 0.0538118306; + goto D35; + +D35: + +tnscore = tnscore + response; + + /* Tree 37 of 200 */ +N36_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300166, 100300143, 100200186, 100300165, 100200052, 100300093, 100300008, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100400038, 100200192, 100300212, 100300122, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100200067, 100200055, 100300214, 100300146) then goto N36_2; + else goto N36_6; + +N36_2: + if attribute(catid) in (100200171, 100300014, 100300013, 100300008, 100200068, 100300116, 100300121, 100200054, 100300073, 100200192, 100300212, 100400080, 100200087, 100300200, 100200055, 100300214, 100300146) then goto N36_3; + else goto N36_4; + +N36_3: + if attribute(catid) in (100300014, 100300008, 100200068, 100200054, 100300212, 100300200, 100200055, 100300214) then goto T36_1; + else goto T36_2; + +T36_1: + response = -0.0068335973; + goto D36; + +T36_2: + response = 0.0078647534; + goto D36; + +N36_4: + if attribute(catid) in (100300165, 100200052, 100200053, 100300019, 100300004, 100300126, 100300122, 100200170, 100300169, 100300074) then goto T36_3; + else goto N36_5; + +T36_3: + response = 0.0135025323; + goto D36; + +N36_5: + if attribute(catid) in (0, 100300143, 100300027, 100400142, 100400038) then goto T36_4; + else goto T36_5; + +T36_4: + response = 0.0176344289; + goto D36; + +T36_5: + response = 0.0196235950; + goto D36; + +N36_6: + if attribute(catid) in (100300058, 100200034, 100400141, 100200193, 100300065, 100300209, 100300127, 100400079, 100300066, 100300045, 100200028, 100300076, 100300006) then goto T36_6; + else goto N36_7; + +T36_6: + response = 0.0257873841; + goto D36; + +N36_7: + if attribute(catid) in (100200172, 100300032, 100200176) then goto T36_7; + else goto T36_8; + +T36_7: + response = 0.0338144700; + goto D36; + +T36_8: + response = 0.0425972117; + goto D36; + +D36: + +tnscore = tnscore + response; + + /* Tree 38 of 200 */ +N37_1: + if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300102, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300126, 100200054, 100300073, 100400037, 100200192, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200028, 100300076) then goto N37_2; + else goto N37_5; + +N37_2: + if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100300102, 100300008, 100200068, 100300027, 100300116, 100300073, 100400037, 100200192, 100300209, 100400079, 100200087, 100300074, 100200176, 100200028, 100300076) then goto N37_3; + else goto N37_4; + +N37_3: + if attribute(catid) in (100300102, 100300008, 100400037, 100200087, 100200176, 100300076) then goto T37_1; + else goto T37_2; + +T37_1: + response = -0.0165719048; + goto D37; + +T37_2: + response = 0.0081977488; + goto D37; + +N37_4: + if attribute(catid) in (100300013, 100300165, 100200052, 100200172, 100200053, 100200054, 100300169) then goto T37_3; + else goto T37_4; + +T37_3: + response = 0.0136130091; + goto D37; + +T37_4: + response = 0.0180110506; + goto D37; + +N37_5: + if attribute(catid) in (100200171, 100200130, 100300058, 100300093, 100300032, 100300121, 100400142, 100200170, 100300200, 100300006, 100200232) then goto T37_5; + else goto N37_6; + +T37_5: + response = 0.0236852926; + goto D37; + +N37_6: + if attribute(catid) in (100300143, 100300005, 100200193, 100300065, 100300122, 100200067) then goto T37_6; + else goto T37_7; + +T37_6: + response = 0.0327973275; + goto D37; + +T37_7: + response = 0.0538361793; + goto D37; + +D37: + +tnscore = tnscore + response; + + /* Tree 39 of 200 */ +N38_1: + if attribute(catid) in (100200171, 100300011, 100300014, 100300013, 100300143, 100200034, 100200186, 100300005, 100300008, 100200068, 100300116, 100200053, 100300019, 100400037, 100200192, 100200170, 100300074, 100300007, 100300200, 100200028, 100200055, 100300214, 100300146) then goto N38_2; + else goto N38_5; + +N38_2: + if attribute(catid) in (100300011, 100300143, 100200186, 100300005, 100300008, 100200068, 100300019, 100200055, 100300214, 100300146) then goto N38_3; + else goto N38_4; + +N38_3: + if attribute(catid) in (100300011, 100300143, 100300005, 100300019, 100200055, 100300214, 100300146) then goto T38_1; + else goto T38_2; + +T38_1: + response = -0.0264171514; + goto D38; + +T38_2: + response = -0.0055670691; + goto D38; + +N38_4: + if attribute(catid) in (100300014, 100300013, 100300116, 100400037, 100200192, 100200170, 100300007) then goto T38_3; + else goto T38_4; + +T38_3: + response = 0.0026101595; + goto D38; + +T38_4: + response = 0.0084034666; + goto D38; + +N38_5: + if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300166, 100400141, 100200052, 100300093, 100300027, 100300121, 100200234, 100300004, 100300126, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100200176, 100300045, 100300076, 100200067, 100300006, 100200232) then goto N38_6; + else goto N38_9; + +N38_6: + if attribute(catid) in (0, 100300058, 100300077, 100200052, 100300093, 100300027, 100200234, 100200054, 100300073, 100300169, 100300045, 100200067, 100200232) then goto N38_7; + else goto N38_8; + +N38_7: + if attribute(catid) in (100300077, 100300027, 100200234, 100200054, 100300073, 100300169) then goto T38_5; + else goto T38_6; + +T38_5: + response = 0.0136319750; + goto D38; + +T38_6: + response = 0.0160752676; + goto D38; + +N38_8: + if attribute(catid) in (100200130, 100400141, 100300121, 100300127, 100400079, 100200176, 100300076) then goto T38_7; + else goto T38_8; + +T38_7: + response = 0.0182663338; + goto D38; + +T38_8: + response = 0.0220047542; + goto D38; + +N38_9: + if attribute(catid) in (100300102, 100200172, 100400142, 100300065, 100300066, 100200185) then goto T38_9; + else goto N38_10; + +T38_9: + response = 0.0265330650; + goto D38; + +N38_10: + if attribute(catid) in (100300165, 100400038) then goto T38_10; + else goto T38_11; + +T38_10: + response = 0.0364634573; + goto D38; + +T38_11: + response = 0.0690252268; + goto D38; + +D38: + +tnscore = tnscore + response; + + /* Tree 40 of 200 */ +N39_1: + if attribute(catid) in (0, 100200171, 100300077, 100300166, 100200034, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300032, 100300027, 100200234, 100200053, 100300004, 100300126, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300076, 100200067, 100200055, 100300006, 100300214) then goto N39_2; + else goto N39_7; + +N39_2: + if attribute(catid) in (100400141, 100300165, 100300032, 100300004, 100300126, 100200193, 100300007, 100200176, 100200067, 100200055, 100300006, 100300214) then goto N39_3; + else goto N39_4; + +N39_3: + if attribute(catid) in (100300032, 100300126, 100300007, 100200176, 100200067, 100200055, 100300006, 100300214) then goto T39_1; + else goto T39_2; + +T39_1: + response = -0.0053479534; + goto D39; + +T39_2: + response = 0.0054148332; + goto D39; + +N39_4: + if attribute(catid) in (0, 100300077, 100200068, 100300027, 100200234, 100200053, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300076) then goto N39_5; + else goto N39_6; + +N39_5: + if attribute(catid) in (100200068, 100200053, 100300065, 100300209, 100200170, 100300169, 100300076) then goto T39_3; + else goto T39_4; + +T39_3: + response = 0.0124266534; + goto D39; + +T39_4: + response = 0.0155941575; + goto D39; + +N39_6: + if attribute(catid) in (100300166, 100200034, 100300093, 100200172, 100300073) then goto T39_5; + else goto T39_6; + +T39_5: + response = 0.0190612693; + goto D39; + +T39_6: + response = 0.0217608552; + goto D39; + +N39_7: + if attribute(catid) in (100200130, 100300014, 100300058, 100300013, 100200186, 100300005, 100300116, 100300121, 100300019, 100400142, 100300045, 100200028, 100200185) then goto N39_8; + else goto T39_9; + +N39_8: + if attribute(catid) in (100300058, 100300013, 100200186, 100300005, 100300116, 100300121, 100300019, 100300045) then goto T39_7; + else goto T39_8; + +T39_7: + response = 0.0276398014; + goto D39; + +T39_8: + response = 0.0339388499; + goto D39; + +T39_9: + response = 0.0448102783; + goto D39; + +D39: + +tnscore = tnscore + response; + + /* Tree 41 of 200 */ +N40_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100200052, 100300005, 100300008, 100200068, 100300032, 100200234, 100200054, 100400037, 100300209, 100300127, 100300169, 100300074, 100300007, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N40_2; + else goto N40_5; + +N40_2: + if attribute(catid) in (100300014, 100300058, 100300005, 100300008, 100200068, 100300032, 100200234, 100300209, 100200067, 100200232, 100300214) then goto N40_3; + else goto N40_4; + +N40_3: + if attribute(catid) in (100300014, 100300005, 100300032, 100300209, 100200067, 100200232, 100300214) then goto T40_1; + else goto T40_2; + +T40_1: + response = -0.0271870843; + goto D40; + +T40_2: + response = -0.0066979774; + goto D40; + +N40_4: + if attribute(catid) in (100300011, 100300143, 100400037, 100300127, 100300007, 100300076, 100300006) then goto T40_3; + else goto T40_4; + +T40_3: + response = 0.0003261718; + goto D40; + +T40_4: + response = 0.0072958932; + goto D40; + +N40_5: + if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300166, 100200186, 100200172, 100300121, 100200053, 100400142, 100300073, 100200192, 100400079, 100400080, 100300066) then goto N40_6; + else goto N40_8; + +N40_6: + if attribute(catid) in (0, 100200130, 100300077, 100200186, 100200172, 100300121, 100300073, 100400079, 100400080) then goto N40_7; + else goto T40_7; + +N40_7: + if attribute(catid) in (100200130, 100200172, 100300121, 100300073, 100400079, 100400080) then goto T40_5; + else goto T40_6; + +T40_5: + response = 0.0130456694; + goto D40; + +T40_6: + response = 0.0171286061; + goto D40; + +T40_7: + response = 0.0214322678; + goto D40; + +N40_8: + if attribute(catid) in (100400141, 100300165, 100300093, 100300102, 100300027, 100300116, 100300126, 100400038, 100300065, 100300122, 100200087, 100300045, 100200028) then goto N40_9; + else goto T40_10; + +N40_9: + if attribute(catid) in (100400141, 100300165, 100300102, 100300027, 100200087, 100300045) then goto T40_8; + else goto T40_9; + +T40_8: + response = 0.0264983702; + goto D40; + +T40_9: + response = 0.0310587203; + goto D40; + +T40_10: + response = 0.0435590971; + goto D40; + +D40: + +tnscore = tnscore + response; + + /* Tree 42 of 200 */ +N41_1: + if attribute(catid) in (100300013, 100200034, 100300093, 100300102, 100300008, 100200068, 100300032, 100200234, 100300019, 100300004, 100300212, 100300209, 100400079, 100200170, 100300169, 100300200, 100200028, 100300076, 100200067, 100200055, 100200185, 100300006) then goto N41_2; + else goto N41_4; + +N41_2: + if attribute(catid) in (100300013, 100300102, 100300032, 100300019, 100300212, 100300209, 100300200, 100200067, 100200055) then goto T41_1; + else goto N41_3; + +T41_1: + response = -0.0292043593; + goto D41; + +N41_3: + if attribute(catid) in (100300093, 100300008, 100200234, 100300004, 100300076, 100200185, 100300006) then goto T41_2; + else goto T41_3; + +T41_2: + response = -0.0009351701; + goto D41; + +T41_3: + response = 0.0074356232; + goto D41; + +N41_4: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300143, 100200186, 100300027, 100300116, 100200053, 100400142, 100300073, 100200193, 100400038, 100200192, 100300122, 100400080, 100200087, 100300074, 100300066, 100200176, 100300045) then goto N41_5; + else goto N41_6; + +N41_5: + if attribute(catid) in (0, 100300058, 100200186, 100300027, 100300116, 100200087, 100200176) then goto T41_4; + else goto T41_5; + +T41_4: + response = 0.0139006166; + goto D41; + +T41_5: + response = 0.0181039982; + goto D41; + +N41_6: + if attribute(catid) in (100300014, 100300077, 100400141, 100300165, 100200052, 100300005, 100200172, 100300121, 100300126, 100300065, 100300007) then goto T41_6; + else goto T41_7; + +T41_6: + response = 0.0233872084; + goto D41; + +T41_7: + response = 0.0318278949; + goto D41; + +D41: + +tnscore = tnscore + response; + + /* Tree 43 of 200 */ +N42_1: + if attribute(catid) in (100300013, 100300008, 100200068, 100300121, 100300019, 100300004, 100200054, 100400038, 100300212, 100200028, 100200185, 100200232) then goto N42_2; + else goto N42_3; + +N42_2: + if attribute(catid) in (100300013, 100300008, 100300019, 100300212, 100200185) then goto T42_1; + else goto T42_2; + +T42_1: + response = -0.0407457887; + goto D42; + +T42_2: + response = -0.0116755527; + goto D42; + +N42_3: + if attribute(catid) in (0, 100200171, 100300058, 100300166, 100200034, 100200186, 100200052, 100300005, 100300027, 100300116, 100200053, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300209, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300006) then goto N42_4; + else goto N42_7; + +N42_4: + if attribute(catid) in (100200171, 100200034, 100200052, 100300005, 100200053, 100300073, 100400037, 100300209, 100400079, 100400080, 100300074) then goto N42_5; + else goto N42_6; + +N42_5: + if attribute(catid) in (100200171, 100200034, 100300005, 100200053, 100300209, 100400079) then goto T42_3; + else goto T42_4; + +T42_3: + response = 0.0049667166; + goto D42; + +T42_4: + response = 0.0103313635; + goto D42; + +N42_6: + if attribute(catid) in (100200186, 100400142, 100300065, 100300169, 100200087, 100300066, 100200176) then goto T42_5; + else goto T42_6; + +T42_5: + response = 0.0145292773; + goto D42; + +T42_6: + response = 0.0169648891; + goto D42; + +N42_7: + if attribute(catid) in (100200130, 100300014, 100300077, 100300143, 100400141, 100300165, 100200172, 100200193, 100300122, 100300127, 100300200) then goto N42_8; + else goto T42_9; + +N42_8: + if attribute(catid) in (100200130, 100300143, 100400141, 100300122, 100300127) then goto T42_7; + else goto T42_8; + +T42_7: + response = 0.0211036464; + goto D42; + +T42_8: + response = 0.0257964434; + goto D42; + +T42_9: + response = 0.0412799006; + goto D42; + +D42: + +tnscore = tnscore + response; + + /* Tree 44 of 200 */ +N43_1: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300143, 100200034, 100200186, 100400141, 100300093, 100300005, 100200172, 100300008, 100300027, 100300121, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100200028, 100200185, 100300006) then goto N43_2; + else goto N43_6; + +N43_2: + if attribute(catid) in (100300143, 100300093, 100300005, 100300008, 100300019, 100300212, 100200028, 100200185, 100300006) then goto T43_1; + else goto N43_3; + +T43_1: + response = -0.0120071553; + goto D43; + +N43_3: + if attribute(catid) in (100200171, 100300166, 100200186, 100400141, 100200172, 100300027, 100300121, 100300004, 100200054, 100300073, 100400079, 100200170, 100200087, 100300074) then goto N43_4; + else goto N43_5; + +N43_4: + if attribute(catid) in (100200186, 100300004, 100200054, 100300073, 100200087) then goto T43_2; + else goto T43_3; + +T43_2: + response = 0.0078585148; + goto D43; + +T43_3: + response = 0.0109817855; + goto D43; + +N43_5: + if attribute(catid) in (0, 100200130, 100300065, 100400080) then goto T43_4; + else goto T43_5; + +T43_4: + response = 0.0142642384; + goto D43; + +T43_5: + response = 0.0175222293; + goto D43; + +N43_6: + if attribute(catid) in (100300011, 100300014, 100300013, 100200052, 100200068, 100200234, 100200053, 100400038, 100300122, 100300127, 100300066, 100200176, 100300045, 100300076) then goto N43_7; + else goto N43_8; + +N43_7: + if attribute(catid) in (100300014, 100300013, 100200068, 100200234, 100200053, 100300127, 100200176, 100300076) then goto T43_6; + else goto T43_7; + +T43_6: + response = 0.0221804998; + goto D43; + +T43_7: + response = 0.0265637670; + goto D43; + +N43_8: + if attribute(catid) in (100300077, 100300165, 100400142, 100200232) then goto T43_8; + else goto T43_9; + +T43_8: + response = 0.0309690505; + goto D43; + +T43_9: + response = 0.0459150714; + goto D43; + +D43: + +tnscore = tnscore + response; + + /* Tree 45 of 200 */ +N44_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300166, 100300143, 100200186, 100300165, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300200, 100300076, 100200232, 100300146) then goto N44_2; + else goto N44_7; + +N44_2: + if attribute(catid) in (100300014, 100300102, 100200068, 100300032, 100300121, 100300019, 100300004, 100400142, 100200193, 100400038, 100300212, 100300127, 100400079, 100200170, 100300074, 100200176, 100300200, 100300076, 100200232, 100300146) then goto N44_3; + else goto N44_4; + +N44_3: + if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100300212, 100200176, 100200232) then goto T44_1; + else goto T44_2; + +T44_1: + response = -0.0148052713; + goto D44; + +T44_2: + response = 0.0048766529; + goto D44; + +N44_4: + if attribute(catid) in (100200171, 100300165, 100200052, 100300005, 100200172, 100300116, 100200234, 100200192, 100300169) then goto N44_5; + else goto N44_6; + +N44_5: + if attribute(catid) in (100200052, 100300116, 100200234, 100200192, 100300169) then goto T44_3; + else goto T44_4; + +T44_3: + response = 0.0099622919; + goto D44; + +T44_4: + response = 0.0138380378; + goto D44; + +N44_6: + if attribute(catid) in (0, 100300027, 100300126, 100300066) then goto T44_5; + else goto T44_6; + +T44_5: + response = 0.0153805374; + goto D44; + +T44_6: + response = 0.0183919749; + goto D44; + +N44_7: + if attribute(catid) in (100300011, 100300077, 100400141, 100300093, 100300073, 100200087, 100200028, 100200185) then goto T44_7; + else goto T44_8; + +T44_7: + response = 0.0247957566; + goto D44; + +T44_8: + response = 0.0395124104; + goto D44; + +D44: + +tnscore = tnscore + response; + + /* Tree 46 of 200 */ +N45_1: + if attribute(catid) in (100300011, 100200130, 100300014, 100300058, 100200034, 100200186, 100300008, 100200068, 100300073, 100400038, 100300065, 100300127, 100400079, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055) then goto N45_2; + else goto N45_4; + +N45_2: + if attribute(catid) in (100300014, 100300058, 100200034, 100300008, 100200068, 100200087, 100200028, 100200067, 100200055) then goto T45_1; + else goto N45_3; + +T45_1: + response = -0.0102104476; + goto D45; + +N45_3: + if attribute(catid) in (100300011, 100200186, 100400038, 100300127) then goto T45_2; + else goto T45_3; + +T45_2: + response = 0.0006233907; + goto D45; + +T45_3: + response = 0.0069244113; + goto D45; + +N45_4: + if attribute(catid) in (0, 100200171, 100300077, 100300166, 100300143, 100400141, 100300165, 100300093, 100300005, 100200172, 100300032, 100300027, 100300116, 100300121, 100200053, 100400142, 100200193, 100200192, 100200170, 100300169, 100300074, 100300066, 100300007, 100300200, 100300045, 100300076) then goto N45_5; + else goto N45_8; + +N45_5: + if attribute(catid) in (0, 100300077, 100300166, 100300143, 100300165, 100300005, 100200172, 100300032, 100300027, 100200053, 100200192, 100300045, 100300076) then goto N45_6; + else goto N45_7; + +N45_6: + if attribute(catid) in (100300077, 100300143, 100300165, 100200053, 100200192, 100300045) then goto T45_4; + else goto T45_5; + +T45_4: + response = 0.0146529601; + goto D45; + +T45_5: + response = 0.0167435205; + goto D45; + +N45_7: + if attribute(catid) in (100200171, 100400141, 100300116, 100200170, 100300074, 100300007, 100300200) then goto T45_6; + else goto T45_7; + +T45_6: + response = 0.0216207477; + goto D45; + +T45_7: + response = 0.0252995150; + goto D45; + +N45_8: + if attribute(catid) in (100200052, 100300102, 100200234, 100300004, 100300126, 100300122, 100200185, 100300006, 100200232) then goto T45_8; + else goto T45_9; + +T45_8: + response = 0.0342788593; + goto D45; + +T45_9: + response = 0.0559275992; + goto D45; + +D45: + +tnscore = tnscore + response; + + /* Tree 47 of 200 */ +N46_1: + if attribute(catid) in (100200171, 100300011, 100200130, 100300013, 100400141, 100200052, 100300102, 100300008, 100200068, 100300027, 100300116, 100200234, 100200054, 100300073, 100400037, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100300074, 100200176, 100200185, 100300214) then goto N46_2; + else goto N46_4; + +N46_2: + if attribute(catid) in (100300008, 100300027, 100400037, 100300074, 100200176, 100200185, 100300214) then goto T46_1; + else goto N46_3; + +T46_1: + response = -0.0102961911; + goto D46; + +N46_3: + if attribute(catid) in (100200171, 100300011, 100200052, 100300102, 100200068, 100300116, 100200234, 100300065, 100300209, 100400079, 100300169) then goto T46_2; + else goto T46_3; + +T46_2: + response = 0.0058614005; + goto D46; + +T46_3: + response = 0.0117994941; + goto D46; + +N46_4: + if attribute(catid) in (0, 100300014, 100300166, 100200186, 100300165, 100300005, 100300032, 100300004, 100300126, 100400142, 100200192, 100400080, 100200087, 100300200, 100200028, 100200067, 100300006) then goto N46_5; + else goto N46_6; + +N46_5: + if attribute(catid) in (0, 100300166, 100200186, 100300005, 100300032, 100300126, 100400142, 100200067) then goto T46_4; + else goto T46_5; + +T46_4: + response = 0.0176102969; + goto D46; + +T46_5: + response = 0.0199567396; + goto D46; + +N46_6: + if attribute(catid) in (100300058, 100300077, 100300143, 100200034, 100300093, 100200172, 100300121, 100300019, 100400038, 100300212, 100300066, 100300076, 100200232) then goto N46_7; + else goto T46_8; + +N46_7: + if attribute(catid) in (100300077, 100300093, 100200172, 100300019, 100300066, 100300076, 100200232) then goto T46_6; + else goto T46_7; + +T46_6: + response = 0.0280698840; + goto D46; + +T46_7: + response = 0.0349016561; + goto D46; + +T46_8: + response = 0.0778635274; + goto D46; + +D46: + +tnscore = tnscore + response; + + /* Tree 48 of 200 */ +N47_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300013, 100300077, 100300166, 100300143, 100200034, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100300073, 100200193, 100400038, 100200192, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300200, 100200028, 100300076, 100200055, 100200185, 100200232, 100300146) then goto N47_2; + else goto N47_6; + +N47_2: + if attribute(catid) in (100200171, 100300011, 100400141, 100300102, 100300005, 100300008, 100200068, 100300116, 100200053, 100300019, 100300004, 100400038, 100300212, 100300209, 100400079, 100200176, 100300200, 100200028, 100300076, 100200055, 100200185, 100300146) then goto N47_3; + else goto N47_4; + +N47_3: + if attribute(catid) in (100300011, 100300019, 100300209, 100300200, 100200028, 100300076, 100200055, 100200185) then goto T47_1; + else goto T47_2; + +T47_1: + response = -0.0255215536; + goto D47; + +T47_2: + response = 0.0049014532; + goto D47; + +N47_4: + if attribute(catid) in (0, 100300013, 100300166, 100300143, 100200034, 100300121, 100300126, 100200192, 100300122, 100300127, 100300169, 100300074, 100300066) then goto N47_5; + else goto T47_5; + +N47_5: + if attribute(catid) in (100300013, 100300143, 100200192, 100300127, 100300074, 100300066) then goto T47_3; + else goto T47_4; + +T47_3: + response = 0.0096441943; + goto D47; + +T47_4: + response = 0.0132971959; + goto D47; + +T47_5: + response = 0.0172127947; + goto D47; + +N47_6: + if attribute(catid) in (100300014, 100200186, 100300093, 100300027, 100400142, 100400037, 100200087, 100300006) then goto T47_6; + else goto N47_7; + +T47_6: + response = 0.0253813497; + goto D47; + +N47_7: + if attribute(catid) in (100300165, 100300007, 100300045) then goto T47_7; + else goto T47_8; + +T47_7: + response = 0.0318318618; + goto D47; + +T47_8: + response = 0.0425817751; + goto D47; + +D47: + +tnscore = tnscore + response; + + /* Tree 49 of 200 */ +N48_1: + if attribute(catid) in (100300058, 100300013, 100300077, 100200186, 100400141, 100300165, 100200052, 100300005, 100200068, 100300116, 100200234, 100300019, 100300126, 100200054, 100400038, 100300212, 100300169, 100300074, 100300066, 100300007, 100300200, 100300006, 100200232, 100300214, 100300146) then goto N48_2; + else goto N48_5; + +N48_2: + if attribute(catid) in (100300013, 100400141, 100300005, 100300116, 100200234, 100300019, 100300212, 100300200, 100200232, 100300214, 100300146) then goto N48_3; + else goto N48_4; + +N48_3: + if attribute(catid) in (100300013, 100300005, 100300019, 100300200, 100200232, 100300214, 100300146) then goto T48_1; + else goto T48_2; + +T48_1: + response = -0.0378281153; + goto D48; + +T48_2: + response = -0.0106433322; + goto D48; + +N48_4: + if attribute(catid) in (100300165, 100200068, 100300126, 100400038, 100300074, 100300007) then goto T48_3; + else goto T48_4; + +T48_3: + response = 0.0013709167; + goto D48; + +T48_4: + response = 0.0079886834; + goto D48; + +N48_5: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100300102, 100200172, 100300008, 100300027, 100300121, 100200053, 100300004, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100200028, 100300076) then goto N48_6; + else goto N48_8; + +N48_6: + if attribute(catid) in (100200130, 100300014, 100200034, 100300008, 100300027, 100300121, 100200053, 100400142, 100300073, 100200192, 100300122, 100300127, 100200170, 100400080, 100200087) then goto T48_5; + else goto N48_7; + +T48_5: + response = 0.0145425948; + goto D48; + +N48_7: + if attribute(catid) in (0, 100300004) then goto T48_6; + else goto T48_7; + +T48_6: + response = 0.0195574674; + goto D48; + +T48_7: + response = 0.0232919623; + goto D48; + +N48_8: + if attribute(catid) in (100300011, 100300166, 100200176, 100300045, 100200185) then goto T48_8; + else goto T48_9; + +T48_8: + response = 0.0300271939; + goto D48; + +T48_9: + response = 0.0494949990; + goto D48; + +D48: + +tnscore = tnscore + response; + + /* Tree 50 of 200 */ +N49_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100300214, 100300146) then goto N49_2; + else goto N49_7; + +N49_2: + if attribute(catid) in (100300014, 100300058, 100300077, 100400141, 100300165, 100300102, 100200068, 100300032, 100300116, 100300019, 100300004, 100400142, 100400037, 100300212, 100300127, 100200170, 100400080, 100300007, 100300045, 100200028, 100200055, 100300214, 100300146) then goto N49_3; + else goto N49_5; + +N49_3: + if attribute(catid) in (100300102, 100300032, 100300019, 100300004, 100400037, 100300212, 100200055, 100300214, 100300146) then goto T49_1; + else goto N49_4; + +T49_1: + response = -0.0237347370; + goto D49; + +N49_4: + if attribute(catid) in (100300014, 100300058, 100300165, 100200068, 100300116, 100300127, 100400080, 100300007, 100200028) then goto T49_2; + else goto T49_3; + +T49_2: + response = 0.0013606160; + goto D49; + +T49_3: + response = 0.0065228229; + goto D49; + +N49_5: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200034, 100300005, 100200172, 100300121, 100200193, 100200192, 100400079, 100300169, 100300200) then goto N49_6; + else goto T49_6; + +N49_6: + if attribute(catid) in (0, 100200034, 100200172, 100200193, 100200192, 100300169) then goto T49_4; + else goto T49_5; + +T49_4: + response = 0.0131275051; + goto D49; + +T49_5: + response = 0.0156511717; + goto D49; + +T49_6: + response = 0.0216393464; + goto D49; + +N49_7: + if attribute(catid) in (100300011, 100300143, 100300008, 100300027, 100300126, 100200054, 100300065, 100300209, 100200176, 100200232) then goto T49_7; + else goto T49_8; + +T49_7: + response = 0.0329759178; + goto D49; + +T49_8: + response = 0.0612562214; + goto D49; + +D49: + +tnscore = tnscore + response; + + /* Tree 51 of 200 */ +N50_1: + if attribute(catid) in (100300014, 100300058, 100300077, 100300166, 100400141, 100300165, 100200052, 100300093, 100200172, 100300027, 100300116, 100300121, 100200053, 100300126, 100200054, 100300073, 100200193, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100300200, 100300076, 100200185, 100300214, 100300146) then goto N50_2; + else goto N50_5; + +N50_2: + if attribute(catid) in (100300014, 100300166, 100200052, 100300093, 100300116, 100300121, 100200193, 100300212, 100300209, 100400080, 100300007, 100200185, 100300214) then goto N50_3; + else goto N50_4; + +N50_3: + if attribute(catid) in (100300014, 100300121, 100300212, 100300209, 100200185, 100300214) then goto T50_1; + else goto T50_2; + +T50_1: + response = -0.0165119187; + goto D50; + +T50_2: + response = 0.0001429856; + goto D50; + +N50_4: + if attribute(catid) in (100300058, 100300077, 100300165, 100200172, 100200053, 100300126, 100200054, 100200170, 100300074, 100300200, 100300146) then goto T50_3; + else goto T50_4; + +T50_3: + response = 0.0055114285; + goto D50; + +T50_4: + response = 0.0104192793; + goto D50; + +N50_5: + if attribute(catid) in (0, 100200171, 100200130, 100200034, 100200186, 100300005, 100300008, 100200068, 100200234, 100400142, 100400038, 100300065, 100300045, 100300006) then goto N50_6; + else goto N50_7; + +N50_6: + if attribute(catid) in (0, 100200130, 100200034, 100300005, 100400142, 100300065, 100300045, 100300006) then goto T50_5; + else goto T50_6; + +T50_5: + response = 0.0161701482; + goto D50; + +T50_6: + response = 0.0225059966; + goto D50; + +N50_7: + if attribute(catid) in (100300013, 100300143, 100300004, 100300122, 100200087) then goto T50_7; + else goto T50_8; + +T50_7: + response = 0.0319347909; + goto D50; + +T50_8: + response = 0.0503173002; + goto D50; + +D50: + +tnscore = tnscore + response; + + /* Tree 52 of 200 */ +N51_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100200053, 100300019, 100300004, 100200054, 100300073, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200028, 100200067, 100200232) then goto N51_2; + else goto N51_6; + +N51_2: + if attribute(catid) in (100200171, 100300013, 100300143, 100200034, 100300102, 100300005, 100300008, 100200068, 100300116, 100300019, 100200054, 100400038, 100300212, 100300122, 100300169, 100200087, 100300074, 100300045, 100200067, 100200232) then goto N51_3; + else goto N51_5; + +N51_3: + if attribute(catid) in (100300143, 100200034, 100300102, 100300008, 100300019, 100300212, 100200067) then goto T51_1; + else goto N51_4; + +T51_1: + response = -0.0135611192; + goto D51; + +N51_4: + if attribute(catid) in (100300013, 100200068, 100300116, 100200054, 100300122) then goto T51_2; + else goto T51_3; + +T51_2: + response = 0.0015450909; + goto D51; + +T51_3: + response = 0.0074784769; + goto D51; + +N51_5: + if attribute(catid) in (100200130, 100300014, 100300058, 100200172, 100300027, 100200053, 100300004, 100300073, 100200192, 100200028) then goto T51_4; + else goto T51_5; + +T51_4: + response = 0.0109788633; + goto D51; + +T51_5: + response = 0.0166053500; + goto D51; + +N51_6: + if attribute(catid) in (100300165, 100200234, 100400142, 100300065, 100400080, 100300076, 100200185, 100300006) then goto N51_7; + else goto T51_8; + +N51_7: + if attribute(catid) in (100200234, 100300065, 100400080, 100300076) then goto T51_6; + else goto T51_7; + +T51_6: + response = 0.0245937925; + goto D51; + +T51_7: + response = 0.0299316682; + goto D51; + +T51_8: + response = 0.0434718302; + goto D51; + +D51: + +tnscore = tnscore + response; + + /* Tree 53 of 200 */ +N52_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100200186, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300074, 100300066, 100300200, 100300045, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N52_2; + else goto N52_7; + +N52_2: + if attribute(catid) in (100300011, 100300014, 100300077, 100200186, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100300073, 100400037, 100300212, 100400079, 100300200, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N52_3; + else goto N52_5; + +N52_3: + if attribute(catid) in (100200234, 100300019, 100300212, 100300200, 100200067, 100200055, 100200232, 100300214) then goto T52_1; + else goto N52_4; + +T52_1: + response = -0.0132138062; + goto D52; + +N52_4: + if attribute(catid) in (100300011, 100300014, 100200186, 100200068, 100300027, 100300116, 100300121, 100400037, 100300006, 100300146) then goto T52_2; + else goto T52_3; + +T52_2: + response = 0.0019211021; + goto D52; + +T52_3: + response = 0.0071648202; + goto D52; + +N52_5: + if attribute(catid) in (0, 100200171, 100200130, 100200052, 100300093, 100300005, 100200054, 100200192, 100300065, 100300209, 100200170, 100300066, 100300045) then goto N52_6; + else goto T52_6; + +N52_6: + if attribute(catid) in (100200171, 100200130, 100200052, 100300093, 100300005, 100300209, 100300066, 100300045) then goto T52_4; + else goto T52_5; + +T52_4: + response = 0.0114878654; + goto D52; + +T52_5: + response = 0.0141002634; + goto D52; + +T52_6: + response = 0.0189217722; + goto D52; + +N52_7: + if attribute(catid) in (100300143, 100300165, 100300102, 100300008, 100200053, 100400142, 100200193, 100400038, 100300122, 100300127, 100300169, 100400080, 100200087, 100200176, 100200185) then goto N52_8; + else goto N52_9; + +N52_8: + if attribute(catid) in (100300143, 100300165, 100300102, 100200053, 100400142, 100300127, 100200176) then goto T52_7; + else goto T52_8; + +T52_7: + response = 0.0249929297; + goto D52; + +T52_8: + response = 0.0287505728; + goto D52; + +N52_9: + if attribute(catid) in (100300058, 100200034, 100400141, 100200028) then goto T52_9; + else goto T52_10; + +T52_9: + response = 0.0405244074; + goto D52; + +T52_10: + response = 0.0625787358; + goto D52; + +D52: + +tnscore = tnscore + response; + + /* Tree 54 of 200 */ +N53_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100200052, 100300093, 100300102, 100300005, 100300008, 100300019, 100300004, 100300126, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100200170, 100300169, 100300066, 100300200, 100200028, 100200067, 100200232, 100300214) then goto N53_2; + else goto N53_5; + +N53_2: + if attribute(catid) in (100300014, 100200034, 100300093, 100300102, 100300008, 100300019, 100300004, 100300126, 100200054, 100300212, 100300209, 100300200, 100200028, 100200067, 100200232, 100300214) then goto N53_3; + else goto N53_4; + +N53_3: + if attribute(catid) in (100300014, 100300008, 100300019, 100200054, 100300209, 100200067, 100200232, 100300214) then goto T53_1; + else goto T53_2; + +T53_1: + response = -0.0251474123; + goto D53; + +T53_2: + response = -0.0003413433; + goto D53; + +N53_4: + if attribute(catid) in (100200130, 100200052, 100300073, 100200192, 100200170, 100300169, 100300066) then goto T53_3; + else goto T53_4; + +T53_3: + response = 0.0080094607; + goto D53; + +T53_4: + response = 0.0126070285; + goto D53; + +N53_5: + if attribute(catid) in (100300011, 100300077, 100300166, 100300143, 100200186, 100400141, 100200068, 100300116, 100300121, 100200053, 100400142, 100400037, 100400079, 100400080, 100200087, 100300074, 100300045) then goto N53_6; + else goto N53_7; + +N53_6: + if attribute(catid) in (100300011, 100300077, 100300143, 100200186, 100400141, 100200053, 100400142, 100400079, 100400080, 100200087, 100300045) then goto T53_5; + else goto T53_6; + +T53_5: + response = 0.0176471308; + goto D53; + +T53_6: + response = 0.0208465659; + goto D53; + +N53_7: + if attribute(catid) in (100300165, 100200172, 100200234, 100300122, 100200176) then goto T53_7; + else goto T53_8; + +T53_7: + response = 0.0268188222; + goto D53; + +T53_8: + response = 0.0367255273; + goto D53; + +D53: + +tnscore = tnscore + response; + + /* Tree 55 of 200 */ +N54_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100300093, 100300102, 100200172, 100300008, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100300073, 100400037, 100200193, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N54_2; + else goto N54_6; + +N54_2: + if attribute(catid) in (100300014, 100300093, 100300102, 100300008, 100200053, 100300019, 100300004, 100300073, 100300212, 100300127, 100300200, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N54_3; + else goto N54_4; + +N54_3: + if attribute(catid) in (100300102, 100300008, 100300019, 100200067, 100200055, 100200232, 100300214) then goto T54_1; + else goto T54_2; + +T54_1: + response = -0.0254633193; + goto D54; + +T54_2: + response = 0.0030878168; + goto D54; + +N54_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300027, 100300121, 100200234, 100200193, 100200170, 100300045) then goto N54_5; + else goto T54_5; + +N54_5: + if attribute(catid) in (100200171, 100200130, 100300058, 100300027, 100300121, 100200234, 100200193, 100300045) then goto T54_3; + else goto T54_4; + +T54_3: + response = 0.0097072082; + goto D54; + +T54_4: + response = 0.0114229146; + goto D54; + +T54_5: + response = 0.0156613592; + goto D54; + +N54_6: + if attribute(catid) in (100200034, 100200052, 100300005, 100200068, 100300032, 100300116, 100400142, 100200054, 100200192, 100300122) then goto T54_6; + else goto T54_7; + +T54_6: + response = 0.0227531664; + goto D54; + +T54_7: + response = 0.0305920398; + goto D54; + +D54: + +tnscore = tnscore + response; + + /* Tree 56 of 200 */ +N55_1: + if attribute(catid) in (100300011, 100200034, 100300102, 100300005, 100200068, 100300032, 100300116, 100300019, 100300004, 100200193, 100300212, 100300007, 100300200, 100200028, 100200067, 100200055, 100300006, 100300214, 100300146) then goto N55_2; + else goto N55_3; + +N55_2: + if attribute(catid) in (100300011, 100300102, 100300005, 100300032, 100300019, 100300004, 100200193, 100300200, 100200067, 100200055, 100300214, 100300146) then goto T55_1; + else goto T55_2; + +T55_1: + response = -0.0181634396; + goto D55; + +T55_2: + response = -0.0014727477; + goto D55; + +N55_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300077, 100200186, 100400141, 100200052, 100200172, 100300008, 100200234, 100200053, 100400142, 100300073, 100400038, 100200192, 100300065, 100300122, 100200170, 100300169, 100200087, 100300074, 100200176, 100300045, 100300076) then goto N55_4; + else goto N55_6; + +N55_4: + if attribute(catid) in (100300014, 100200186, 100400141, 100200052, 100200234, 100200053, 100400038, 100200192, 100300065, 100300169, 100300074, 100200176, 100300045) then goto T55_3; + else goto N55_5; + +T55_3: + response = 0.0098907776; + goto D55; + +N55_5: + if attribute(catid) in (0, 100200130, 100300013, 100300077, 100200172, 100300008, 100400142, 100300122, 100200170) then goto T55_4; + else goto T55_5; + +T55_4: + response = 0.0138164577; + goto D55; + +T55_5: + response = 0.0170925410; + goto D55; + +N55_6: + if attribute(catid) in (100300166, 100300143, 100300165, 100300093, 100300027, 100300127, 100400079, 100400080, 100300066) then goto T55_6; + else goto T55_7; + +T55_6: + response = 0.0219244924; + goto D55; + +T55_7: + response = 0.0405698900; + goto D55; + +D55: + +tnscore = tnscore + response; + + /* Tree 57 of 200 */ +N56_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100200053, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100200193, 100400038, 100200192, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100300200, 100200028, 100300076, 100200055, 100300006, 100200232, 100300146) then goto N56_2; + else goto N56_7; + +N56_2: + if attribute(catid) in (100300143, 100300093, 100300102, 100300116, 100300019, 100300004, 100200193, 100400038, 100300212, 100400080, 100200028, 100200055, 100300006, 100200232, 100300146) then goto N56_3; + else goto N56_4; + +N56_3: + if attribute(catid) in (100300019, 100400038, 100200028, 100200055, 100200232, 100300146) then goto T56_1; + else goto T56_2; + +T56_1: + response = -0.0376899039; + goto D56; + +T56_2: + response = -0.0084354615; + goto D56; + +N56_4: + if attribute(catid) in (100200034, 100200186, 100400141, 100300027, 100300073, 100300122, 100300127, 100400079, 100200170, 100200087, 100300200) then goto T56_3; + else goto N56_5; + +T56_3: + response = 0.0049554661; + goto D56; + +N56_5: + if attribute(catid) in (0, 100200171, 100300058, 100300165, 100300005, 100200172, 100200053, 100300126, 100400142, 100200192, 100300169, 100200176) then goto N56_6; + else goto T56_6; + +N56_6: + if attribute(catid) in (0, 100300058, 100200172, 100200053, 100300126, 100400142, 100200192) then goto T56_4; + else goto T56_5; + +T56_4: + response = 0.0134283205; + goto D56; + +T56_5: + response = 0.0149300488; + goto D56; + +T56_6: + response = 0.0182669992; + goto D56; + +N56_7: + if attribute(catid) in (100300014, 100300077, 100300121, 100200234, 100200054, 100300074, 100300066, 100200185) then goto T56_7; + else goto N56_8; + +T56_7: + response = 0.0260635269; + goto D56; + +N56_8: + if attribute(catid) in (100300011, 100300166) then goto T56_8; + else goto T56_9; + +T56_8: + response = 0.0344146236; + goto D56; + +T56_9: + response = 0.0489908315; + goto D56; + +D56: + +tnscore = tnscore + response; + + /* Tree 58 of 200 */ +N57_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100300008, 100300116, 100200234, 100300126, 100400142, 100300073, 100200192, 100300065, 100300209, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300146) then goto N57_2; + else goto N57_5; + +N57_2: + if attribute(catid) in (100300011, 100300014, 100200034, 100400141, 100300165, 100300008, 100300116, 100200234, 100200192, 100300065, 100400079, 100300169, 100200087, 100300200, 100200028, 100300076, 100200055, 100300146) then goto N57_3; + else goto N57_4; + +N57_3: + if attribute(catid) in (100300014, 100300008, 100400079, 100300200, 100300076, 100200055) then goto T57_1; + else goto T57_2; + +T57_1: + response = -0.0047600269; + goto D57; + +T57_2: + response = 0.0060145343; + goto D57; + +N57_4: + if attribute(catid) in (0, 100300073, 100300209, 100400080, 100300066) then goto T57_3; + else goto T57_4; + +T57_3: + response = 0.0120012047; + goto D57; + +T57_4: + response = 0.0143999679; + goto D57; + +N57_5: + if attribute(catid) in (100200130, 100300058, 100300143, 100200052, 100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100300004, 100200054, 100400037, 100400038, 100300212, 100300122, 100300127, 100300007, 100200176) then goto N57_6; + else goto T57_7; + +N57_6: + if attribute(catid) in (100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100400037, 100300122, 100300127, 100200176) then goto T57_5; + else goto T57_6; + +T57_5: + response = 0.0205502481; + goto D57; + +T57_6: + response = 0.0247979152; + goto D57; + +T57_7: + response = 0.0392012352; + goto D57; + +D57: + +tnscore = tnscore + response; + + /* Tree 59 of 200 */ +N58_1: + if attribute(catid) in (100300011, 100200130, 100300077, 100400141, 100300165, 100300093, 100300005, 100300008, 100300116, 100200053, 100200193, 100400038, 100200192, 100300209, 100300122, 100200087, 100300074, 100300066, 100200176, 100300045, 100200028, 100200067) then goto N58_2; + else goto N58_4; + +N58_2: + if attribute(catid) in (100300011, 100400141, 100300005, 100300008, 100200193, 100300209, 100300122, 100200087, 100300074, 100200176, 100200067) then goto N58_3; + else goto T58_3; + +N58_3: + if attribute(catid) in (100300011, 100200193, 100300209, 100200087, 100200176, 100200067) then goto T58_1; + else goto T58_2; + +T58_1: + response = -0.0143613312; + goto D58; + +T58_2: + response = -0.0022635925; + goto D58; + +T58_3: + response = 0.0062470659; + goto D58; + +N58_4: + if attribute(catid) in (0, 100300058, 100300166, 100200034, 100200186, 100200052, 100200172, 100200068, 100300027, 100300121, 100200234, 100400142, 100300073, 100400037, 100300127, 100400079, 100200170, 100300169, 100300076, 100300006, 100200232) then goto N58_5; + else goto N58_7; + +N58_5: + if attribute(catid) in (0, 100300166, 100200034, 100200186, 100200052, 100200234, 100400142, 100200170, 100300169, 100300076, 100300006) then goto N58_6; + else goto T58_6; + +N58_6: + if attribute(catid) in (100200034, 100200186, 100200234, 100200170, 100300169, 100300076, 100300006) then goto T58_4; + else goto T58_5; + +T58_4: + response = 0.0121738240; + goto D58; + +T58_5: + response = 0.0142409856; + goto D58; + +T58_6: + response = 0.0181125356; + goto D58; + +N58_7: + if attribute(catid) in (100200171, 100300014, 100300013, 100300019, 100300004, 100300126, 100300065, 100300212, 100400080, 100300007, 100300200) then goto T58_7; + else goto T58_8; + +T58_7: + response = 0.0273051157; + goto D58; + +T58_8: + response = 0.0468409464; + goto D58; + +D58: + +tnscore = tnscore + response; + + /* Tree 60 of 200 */ +N59_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300143, 100200034, 100200186, 100400141, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100200028, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N59_2; + else goto N59_6; + +N59_2: + if attribute(catid) in (100300077, 100300143, 100200034, 100300116, 100300019, 100300004, 100400037, 100300212, 100300209, 100300074, 100200028, 100200185, 100300006, 100300214, 100300146) then goto N59_3; + else goto N59_4; + +N59_3: + if attribute(catid) in (100300143, 100300019, 100300004, 100400037, 100300212, 100300209, 100300214, 100300146) then goto T59_1; + else goto T59_2; + +T59_1: + response = -0.0293561273; + goto D59; + +T59_2: + response = -0.0014538622; + goto D59; + +N59_4: + if attribute(catid) in (100200186, 100400141, 100300093, 100300005, 100200172, 100300121, 100200054, 100200192, 100300065, 100200232) then goto T59_3; + else goto N59_5; + +T59_3: + response = 0.0075212689; + goto D59; + +N59_5: + if attribute(catid) in (0, 100200171, 100300058, 100300032, 100200053, 100400079, 100300169) then goto T59_4; + else goto T59_5; + +T59_4: + response = 0.0113283464; + goto D59; + +T59_5: + response = 0.0146230806; + goto D59; + +N59_6: + if attribute(catid) in (100300013, 100300166, 100300165, 100300008, 100200234, 100400142, 100200193, 100400038, 100300122, 100200087, 100300007, 100300076) then goto T59_6; + else goto T59_7; + +T59_6: + response = 0.0245202493; + goto D59; + +T59_7: + response = 0.0398629845; + goto D59; + +D59: + +tnscore = tnscore + response; + + /* Tree 61 of 200 */ +N60_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100200034, 100200186, 100300165, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200192, 100300212, 100300209, 100300127, 100400079, 100300169, 100200087, 100300074, 100300007, 100300045, 100200028, 100200067, 100300006) then goto N60_2; + else goto N60_6; + +N60_2: + if attribute(catid) in (100300011, 100300008, 100200234, 100300019, 100300004, 100400142, 100400037, 100300209, 100200087, 100200028, 100200067, 100300006) then goto N60_3; + else goto N60_4; + +N60_3: + if attribute(catid) in (100300011, 100300008, 100300019, 100300209, 100200087, 100200028, 100200067, 100300006) then goto T60_1; + else goto T60_2; + +T60_1: + response = -0.0125504180; + goto D60; + +T60_2: + response = 0.0005157971; + goto D60; + +N60_4: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300165, 100300102, 100300116, 100200053, 100400079, 100300169, 100300074, 100300007, 100300045) then goto N60_5; + else goto T60_5; + +N60_5: + if attribute(catid) in (100200171, 100200130, 100300014, 100300102, 100300116, 100400079, 100300169, 100300074, 100300007, 100300045) then goto T60_3; + else goto T60_4; + +T60_3: + response = 0.0073987391; + goto D60; + +T60_4: + response = 0.0089320166; + goto D60; + +T60_5: + response = 0.0143250256; + goto D60; + +N60_6: + if attribute(catid) in (100300166, 100400141, 100300005, 100200068, 100300027, 100300121, 100300126, 100200054, 100400038, 100300065, 100300122, 100200170, 100400080, 100300066, 100200176, 100300076, 100200185) then goto N60_7; + else goto T60_8; + +N60_7: + if attribute(catid) in (100400141, 100300027, 100300121, 100300126, 100300065, 100200170, 100400080, 100300066) then goto T60_6; + else goto T60_7; + +T60_6: + response = 0.0203552723; + goto D60; + +T60_7: + response = 0.0282273054; + goto D60; + +T60_8: + response = 0.0491434915; + goto D60; + +D60: + +tnscore = tnscore + response; + + /* Tree 62 of 200 */ +N61_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300143, 100200034, 100200186, 100400141, 100200052, 100300005, 100300008, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200176, 100200067, 100300214, 100300146) then goto N61_2; + else goto N61_5; + +N61_2: + if attribute(catid) in (100200130, 100300058, 100300143, 100400141, 100200052, 100300005, 100300008, 100300032, 100300027, 100300121, 100200234, 100300126, 100200054, 100300073, 100400038, 100300212, 100300209, 100300127, 100200170, 100400080, 100200087, 100200176, 100200067, 100300214, 100300146) then goto N61_3; + else goto N61_4; + +N61_3: + if attribute(catid) in (100300058, 100300143, 100300005, 100300008, 100300032, 100300121, 100200054, 100300212, 100300209, 100200087, 100200176, 100200067, 100300214) then goto T61_1; + else goto T61_2; + +T61_1: + response = -0.0099342652; + goto D61; + +T61_2: + response = 0.0039806749; + goto D61; + +N61_4: + if attribute(catid) in (0, 100400142, 100200193, 100400079, 100300169) then goto T61_3; + else goto T61_4; + +T61_3: + response = 0.0115553152; + goto D61; + +T61_4: + response = 0.0135427680; + goto D61; + +N61_5: + if attribute(catid) in (100300014, 100300077, 100300166, 100200172, 100200068, 100300065, 100300122, 100300074, 100300066, 100300007, 100300200, 100300006) then goto N61_6; + else goto N61_7; + +N61_6: + if attribute(catid) in (100300077, 100200172, 100300065, 100300066, 100300200) then goto T61_5; + else goto T61_6; + +T61_5: + response = 0.0169304303; + goto D61; + +T61_6: + response = 0.0205751961; + goto D61; + +N61_7: + if attribute(catid) in (100300013, 100300165, 100300004, 100300076) then goto T61_7; + else goto T61_8; + +T61_7: + response = 0.0276389874; + goto D61; + +T61_8: + response = 0.0441753863; + goto D61; + +D61: + +tnscore = tnscore + response; + + /* Tree 63 of 200 */ +N62_1: + if attribute(catid) in (100300011, 100300014, 100300166, 100200034, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100300027, 100200053, 100300019, 100400142, 100400037, 100200193, 100200170, 100300074, 100300007, 100300200, 100200028, 100200067, 100300006) then goto N62_2; + else goto N62_5; + +N62_2: + if attribute(catid) in (100200034, 100300102, 100300005, 100300008, 100200068, 100300032, 100300019, 100400037, 100300007, 100300200, 100200067, 100300006) then goto N62_3; + else goto N62_4; + +N62_3: + if attribute(catid) in (100300005, 100300008, 100300032, 100300019, 100400037, 100200067) then goto T62_1; + else goto T62_2; + +T62_1: + response = -0.0322662364; + goto D62; + +T62_2: + response = -0.0081793105; + goto D62; + +N62_4: + if attribute(catid) in (100300011, 100200186, 100200052, 100400142, 100200193, 100300074, 100200028) then goto T62_3; + else goto T62_4; + +T62_3: + response = 0.0017199453; + goto D62; + +T62_4: + response = 0.0060178344; + goto D62; + +N62_5: + if attribute(catid) in (0, 100300013, 100300077, 100300143, 100300165, 100300116, 100300121, 100200234, 100300004, 100300073, 100400038, 100200192, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066, 100200176) then goto N62_6; + else goto N62_8; + +N62_6: + if attribute(catid) in (100300013, 100300077, 100300143, 100300165, 100300116, 100300121, 100200234, 100300073, 100300065, 100300169, 100400080, 100300066) then goto N62_7; + else goto T62_7; + +N62_7: + if attribute(catid) in (100300013, 100300143, 100300165, 100300116, 100300169) then goto T62_5; + else goto T62_6; + +T62_5: + response = 0.0103393155; + goto D62; + +T62_6: + response = 0.0134250404; + goto D62; + +T62_7: + response = 0.0166558979; + goto D62; + +N62_8: + if attribute(catid) in (100200171, 100200130, 100200087, 100300076, 100200232) then goto T62_8; + else goto T62_9; + +T62_8: + response = 0.0241920527; + goto D62; + +T62_9: + response = 0.0369765147; + goto D62; + +D62: + +tnscore = tnscore + response; + + /* Tree 64 of 200 */ +N63_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300077, 100300143, 100200034, 100400141, 100300093, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100300073, 100400037, 100400038, 100300065, 100300212, 100300209, 100300122, 100400079, 100200170, 100300169, 100200087, 100300074, 100300007, 100200176, 100300200, 100200028, 100200185, 100300214, 100300146) then goto N63_2; + else goto N63_6; + +N63_2: + if attribute(catid) in (100300143, 100300008, 100300019, 100400037, 100300212, 100300209, 100300007, 100300200, 100200185, 100300214) then goto T63_1; + else goto N63_3; + +T63_1: + response = -0.0243180335; + goto D63; + +N63_3: + if attribute(catid) in (100300014, 100300013, 100200034, 100400141, 100200172, 100200068, 100400142, 100200054, 100400079, 100200170, 100300169, 100200087, 100300146) then goto N63_4; + else goto N63_5; + +N63_4: + if attribute(catid) in (100300014, 100300013, 100200034, 100200068, 100400142, 100200054, 100300146) then goto T63_2; + else goto T63_3; + +T63_2: + response = 0.0025036422; + goto D63; + +T63_3: + response = 0.0060784676; + goto D63; + +N63_5: + if attribute(catid) in (0, 100200171, 100200130, 100200234, 100200053, 100300004, 100300073, 100400038, 100200028) then goto T63_4; + else goto T63_5; + +T63_4: + response = 0.0109550470; + goto D63; + +T63_5: + response = 0.0136122663; + goto D63; + +N63_6: + if attribute(catid) in (100300011, 100300166, 100200186, 100300165, 100200052, 100300102, 100200192, 100300127, 100400080, 100300066, 100300045, 100300076) then goto N63_7; + else goto T63_8; + +N63_7: + if attribute(catid) in (100300011, 100200186, 100200052, 100300102, 100200192, 100400080, 100300066, 100300045, 100300076) then goto T63_6; + else goto T63_7; + +T63_6: + response = 0.0188007232; + goto D63; + +T63_7: + response = 0.0246438709; + goto D63; + +T63_8: + response = 0.0423545435; + goto D63; + +D63: + +tnscore = tnscore + response; + + /* Tree 65 of 200 */ +N64_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100200186, 100400141, 100300093, 100300102, 100300032, 100300121, 100200234, 100300126, 100400142, 100300073, 100400037, 100200193, 100200192, 100300122, 100200087, 100300074, 100300066, 100200176, 100300200, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N64_2; + else goto N64_5; + +N64_2: + if attribute(catid) in (100300093, 100300032, 100200234, 100300126, 100400037, 100200176, 100300200, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto T64_1; + else goto N64_3; + +T64_1: + response = -0.0157352941; + goto D64; + +N64_3: + if attribute(catid) in (100200171, 100300011, 100300058, 100300102, 100200193, 100200192, 100300122, 100300074, 100200028) then goto T64_2; + else goto N64_4; + +T64_2: + response = 0.0030409464; + goto D64; + +N64_4: + if attribute(catid) in (0, 100200186, 100400141, 100300121, 100400142) then goto T64_3; + else goto T64_4; + +T64_3: + response = 0.0080637180; + goto D64; + +T64_4: + response = 0.0107191663; + goto D64; + +N64_5: + if attribute(catid) in (100200130, 100300013, 100300166, 100200034, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100200054, 100400038, 100300127, 100400079, 100200170, 100300169, 100400080, 100300007, 100300076) then goto N64_6; + else goto N64_8; + +N64_6: + if attribute(catid) in (100200130, 100300013, 100300165, 100200172, 100300027, 100200054, 100400079, 100200170, 100300169, 100400080) then goto N64_7; + else goto T64_7; + +N64_7: + if attribute(catid) in (100300013, 100200172, 100300027, 100400079, 100200170, 100400080) then goto T64_5; + else goto T64_6; + +T64_5: + response = 0.0154977719; + goto D64; + +T64_6: + response = 0.0195741488; + goto D64; + +T64_7: + response = 0.0236445967; + goto D64; + +N64_8: + if attribute(catid) in (100300116, 100200053, 100300065) then goto T64_8; + else goto T64_9; + +T64_8: + response = 0.0324449764; + goto D64; + +T64_9: + response = 0.0541648949; + goto D64; + +D64: + +tnscore = tnscore + response; + + /* Tree 66 of 200 */ +N65_1: + if attribute(catid) in (100300011, 100300058, 100300143, 100200052, 100300093, 100200068, 100300032, 100300027, 100200054, 100300073, 100400037, 100200193, 100200192, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100300200, 100300045, 100200055, 100200185, 100300214, 100300146) then goto N65_2; + else goto N65_4; + +N65_2: + if attribute(catid) in (100300143, 100200068, 100300032, 100300209, 100200170, 100200055, 100300214, 100300146) then goto T65_1; + else goto N65_3; + +T65_1: + response = -0.0241523508; + goto D65; + +N65_3: + if attribute(catid) in (100300011, 100300058, 100200054, 100200192, 100300127, 100400080, 100300200, 100300045, 100200185) then goto T65_2; + else goto T65_3; + +T65_2: + response = -0.0009657677; + goto D65; + +T65_3: + response = 0.0051241150; + goto D65; + +N65_4: + if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100300102, 100300116, 100300121, 100200234, 100300126, 100400142, 100400038, 100300065, 100300169, 100300066, 100300006) then goto N65_5; + else goto N65_7; + +N65_5: + if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100200186, 100400141, 100300102, 100300116, 100300126, 100300006) then goto N65_6; + else goto T65_6; + +N65_6: + if attribute(catid) in (100300013, 100300077, 100200034, 100200186, 100400141) then goto T65_4; + else goto T65_5; + +T65_4: + response = 0.0090067376; + goto D65; + +T65_5: + response = 0.0127908297; + goto D65; + +T65_6: + response = 0.0164902475; + goto D65; + +N65_7: + if attribute(catid) in (100200171, 100200130, 100300166, 100200172, 100200053, 100200087, 100200176, 100300076) then goto T65_7; + else goto T65_8; + +T65_7: + response = 0.0228127126; + goto D65; + +T65_8: + response = 0.0418200655; + goto D65; + +D65: + +tnscore = tnscore + response; + + /* Tree 67 of 200 */ +N66_1: + if attribute(catid) in (100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100200172, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400038, 100300212, 100300209, 100400079, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100200028, 100200067, 100300006, 100300146) then goto N66_2; + else goto N66_6; + +N66_2: + if attribute(catid) in (100300013, 100400141, 100300032, 100200053, 100300019, 100300004, 100300212, 100300209, 100400080, 100300007, 100200176, 100200028, 100200067) then goto N66_3; + else goto N66_4; + +N66_3: + if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100200176, 100200028, 100200067) then goto T66_1; + else goto T66_2; + +T66_1: + response = -0.0386082590; + goto D66; + +T66_2: + response = -0.0063382264; + goto D66; + +N66_4: + if attribute(catid) in (100300014, 100300077, 100200186, 100300165, 100300008, 100200234, 100400142, 100300073, 100400038, 100300169, 100300074, 100300200, 100300146) then goto N66_5; + else goto T66_5; + +N66_5: + if attribute(catid) in (100300077, 100300008, 100300073, 100400038, 100300074, 100300146) then goto T66_3; + else goto T66_4; + +T66_3: + response = 0.0014081125; + goto D66; + +T66_4: + response = 0.0048469355; + goto D66; + +T66_5: + response = 0.0085143275; + goto D66; + +N66_6: + if attribute(catid) in (0, 100300058, 100200034, 100300121, 100300126, 100200054, 100200193, 100200192, 100300065, 100300122, 100300127, 100200170, 100200087, 100300066, 100300045) then goto N66_7; + else goto T66_8; + +N66_7: + if attribute(catid) in (0, 100300058, 100200193, 100200192, 100300122, 100300127, 100200170, 100200087) then goto T66_6; + else goto T66_7; + +T66_6: + response = 0.0154377122; + goto D66; + +T66_7: + response = 0.0222690511; + goto D66; + +T66_8: + response = 0.0445329146; + goto D66; + +D66: + +tnscore = tnscore + response; + + /* Tree 68 of 200 */ +N67_1: + if attribute(catid) in (100300011, 100300058, 100300013, 100200186, 100300165, 100300102, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100300019, 100200054, 100300212, 100300127, 100400079, 100200087, 100300074, 100300007, 100200176, 100300076, 100200055, 100300006, 100200232, 100300214) then goto N67_2; + else goto N67_4; + +N67_2: + if attribute(catid) in (100300011, 100300165, 100300102, 100300027, 100300121, 100300019, 100200087, 100300074, 100200176, 100200055, 100300214) then goto T67_1; + else goto N67_3; + +T67_1: + response = -0.0084086451; + goto D67; + +N67_3: + if attribute(catid) in (100300013, 100200186, 100300008, 100200234, 100200054, 100300212, 100300127, 100300006) then goto T67_2; + else goto T67_3; + +T67_2: + response = 0.0005019617; + goto D67; + +T67_3: + response = 0.0061914097; + goto D67; + +N67_4: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200052, 100300093, 100300005, 100200172, 100200053, 100300004, 100300126, 100400142, 100300073, 100400038, 100200192, 100300122, 100200170, 100300169, 100300066, 100300200, 100300045) then goto N67_5; + else goto N67_7; + +N67_5: + if attribute(catid) in (100200130, 100300014, 100300093, 100300005, 100300004, 100300126, 100400038, 100200192, 100300169, 100300200) then goto T67_4; + else goto N67_6; + +T67_4: + response = 0.0114280621; + goto D67; + +N67_6: + if attribute(catid) in (0, 100200171, 100300077, 100200052, 100300073, 100300066) then goto T67_5; + else goto T67_6; + +T67_5: + response = 0.0141959004; + goto D67; + +T67_6: + response = 0.0160019821; + goto D67; + +N67_7: + if attribute(catid) in (100300143, 100400141, 100300032, 100300065, 100300209, 100200067) then goto T67_7; + else goto T67_8; + +T67_7: + response = 0.0245106044; + goto D67; + +T67_8: + response = 0.0334093506; + goto D67; + +D67: + +tnscore = tnscore + response; + + /* Tree 69 of 200 */ +N68_1: + if attribute(catid) in (100300011, 100200130, 100300014, 100300143, 100400141, 100300165, 100200052, 100300005, 100300027, 100300116, 100200053, 100300019, 100300004, 100300073, 100400037, 100300209, 100400079, 100200170, 100300169, 100300074, 100300007, 100200176, 100300045, 100300076, 100200067, 100200055, 100200185, 100300214) then goto N68_2; + else goto N68_4; + +N68_2: + if attribute(catid) in (100300011, 100300005, 100300019, 100400037, 100300209, 100200176, 100200055, 100200185, 100300214) then goto T68_1; + else goto N68_3; + +T68_1: + response = -0.0208490757; + goto D68; + +N68_3: + if attribute(catid) in (100300143, 100400141, 100300116, 100200053, 100300004, 100300073, 100400079, 100300074) then goto T68_2; + else goto T68_3; + +T68_2: + response = 0.0000297283; + goto D68; + +T68_3: + response = 0.0064274847; + goto D68; + +N68_4: + if attribute(catid) in (0, 100200171, 100300058, 100300077, 100300166, 100200034, 100200186, 100200172, 100200068, 100200234, 100300126, 100200192, 100300065, 100300122, 100300127, 100400080, 100200087, 100300066, 100200028, 100300006) then goto N68_5; + else goto N68_7; + +N68_5: + if attribute(catid) in (0, 100300077, 100300166, 100200186, 100200172, 100300122, 100200087, 100200028, 100300006) then goto N68_6; + else goto T68_6; + +N68_6: + if attribute(catid) in (0, 100300166, 100300122, 100200028) then goto T68_4; + else goto T68_5; + +T68_4: + response = 0.0124031076; + goto D68; + +T68_5: + response = 0.0150987823; + goto D68; + +T68_6: + response = 0.0188364733; + goto D68; + +N68_7: + if attribute(catid) in (100300093, 100300102, 100300008, 100300121, 100400142, 100200054, 100300200, 100200232) then goto T68_7; + else goto T68_8; + +T68_7: + response = 0.0293407993; + goto D68; + +T68_8: + response = 0.0513888162; + goto D68; + +D68: + +tnscore = tnscore + response; + + /* Tree 70 of 200 */ +N69_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100400141, 100300165, 100200052, 100300008, 100200068, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100200054, 100300073, 100400038, 100200192, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300200, 100300076, 100200067, 100200185, 100200232) then goto N69_2; + else goto N69_6; + +N69_2: + if attribute(catid) in (100300014, 100300008, 100300121, 100200234, 100200053, 100300019, 100300212, 100400080, 100200176, 100300200, 100200067, 100200185, 100200232) then goto N69_3; + else goto N69_4; + +N69_3: + if attribute(catid) in (100300008, 100300019, 100300212, 100200176, 100300200, 100200067, 100200185, 100200232) then goto T69_1; + else goto T69_2; + +T69_1: + response = -0.0190877492; + goto D69; + +T69_2: + response = -0.0007557548; + goto D69; + +N69_4: + if attribute(catid) in (100400141, 100200052, 100300126, 100200054, 100300065, 100400079, 100200170, 100300169) then goto T69_3; + else goto N69_5; + +T69_3: + response = 0.0071693422; + goto D69; + +N69_5: + if attribute(catid) in (0, 100200130, 100300073, 100400038, 100200192, 100300127, 100300076) then goto T69_4; + else goto T69_5; + +T69_4: + response = 0.0119746374; + goto D69; + +T69_5: + response = 0.0136797362; + goto D69; + +N69_6: + if attribute(catid) in (100300011, 100300058, 100300166, 100200034, 100200186, 100300093, 100300005, 100200172, 100300027, 100400142, 100300122, 100200087, 100300066, 100300045, 100300006) then goto N69_7; + else goto T69_8; + +N69_7: + if attribute(catid) in (100300058, 100200186, 100300093, 100300005, 100200172, 100300027, 100400142, 100300122, 100200087, 100300045, 100300006) then goto T69_6; + else goto T69_7; + +T69_6: + response = 0.0192765099; + goto D69; + +T69_7: + response = 0.0227594602; + goto D69; + +T69_8: + response = 0.0348341149; + goto D69; + +D69: + +tnscore = tnscore + response; + + /* Tree 71 of 200 */ +N70_1: + if attribute(catid) in (100300011, 100300013, 100300143, 100300008, 100200068, 100300019, 100200054, 100400079, 100200170, 100400080, 100300066, 100200176, 100300200, 100200028, 100300006, 100300146) then goto N70_2; + else goto N70_3; + +N70_2: + if attribute(catid) in (100300011, 100300013, 100300008, 100300019, 100200176, 100200028) then goto T70_1; + else goto T70_2; + +T70_1: + response = -0.0154031193; + goto D70; + +T70_2: + response = -0.0007651129; + goto D70; + +N70_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300102, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100300127, 100300169, 100200087, 100300074, 100300007, 100300045, 100300076, 100200067) then goto N70_4; + else goto N70_7; + +N70_4: + if attribute(catid) in (100200171, 100200130, 100300077, 100200034, 100400141, 100300102, 100200172, 100300027, 100300004, 100400142, 100400037, 100300127, 100200087, 100300074, 100300007, 100300045) then goto N70_5; + else goto N70_6; + +N70_5: + if attribute(catid) in (100200130, 100400141, 100300102, 100200172, 100300027, 100300004, 100400037, 100300127, 100300074, 100300007) then goto T70_3; + else goto T70_4; + +T70_3: + response = 0.0066677335; + goto D70; + +T70_4: + response = 0.0095565475; + goto D70; + +N70_6: + if attribute(catid) in (0, 100200052, 100300065, 100300169, 100300076, 100200067) then goto T70_5; + else goto T70_6; + +T70_5: + response = 0.0113431678; + goto D70; + +T70_6: + response = 0.0156844830; + goto D70; + +N70_7: + if attribute(catid) in (100300165, 100300116, 100200234, 100200053, 100300126, 100400038, 100200185, 100200232) then goto T70_7; + else goto T70_8; + +T70_7: + response = 0.0231181080; + goto D70; + +T70_8: + response = 0.0499183157; + goto D70; + +D70: + +tnscore = tnscore + response; + + /* Tree 72 of 200 */ +N71_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100300032, 100300027, 100300121, 100200234, 100300004, 100300126, 100400142, 100300073, 100400037, 100200193, 100300065, 100300209, 100200170, 100300169, 100400080, 100300066, 100300200, 100300045, 100300076, 100200055, 100300006, 100200232, 100300146) then goto N71_2; + else goto N71_6; + +N71_2: + if attribute(catid) in (100200171, 100200130, 100300058, 100200034, 100400141, 100300093, 100300102, 100300005, 100300032, 100300027, 100300073, 100400037, 100300209, 100400080, 100300200, 100200055, 100200232, 100300146) then goto N71_3; + else goto N71_5; + +N71_3: + if attribute(catid) in (100300102, 100300032, 100300027, 100400037, 100300209, 100400080, 100200055, 100300146) then goto T71_1; + else goto N71_4; + +T71_1: + response = -0.0210007071; + goto D71; + +N71_4: + if attribute(catid) in (100200171, 100300058, 100200034) then goto T71_2; + else goto T71_3; + +T71_2: + response = -0.0023338437; + goto D71; + +T71_3: + response = 0.0034194175; + goto D71; + +N71_5: + if attribute(catid) in (0, 100300077, 100200186, 100200234, 100300126, 100400142, 100200193, 100300065, 100200170, 100300169, 100300066, 100300045) then goto T71_4; + else goto T71_5; + +T71_4: + response = 0.0096608445; + goto D71; + +T71_5: + response = 0.0144728932; + goto D71; + +N71_6: + if attribute(catid) in (100300014, 100300165, 100300008, 100200068, 100200192, 100300122, 100300127, 100400079, 100200087, 100300074, 100200028, 100200185) then goto N71_7; + else goto T71_8; + +N71_7: + if attribute(catid) in (100300014, 100300165, 100200068, 100200192, 100400079, 100200087) then goto T71_6; + else goto T71_7; + +T71_6: + response = 0.0193246792; + goto D71; + +T71_7: + response = 0.0247702235; + goto D71; + +T71_8: + response = 0.0360951958; + goto D71; + +D71: + +tnscore = tnscore + response; + + /* Tree 73 of 200 */ +N72_1: + if attribute(catid) in (100300011, 100300143, 100200186, 100400141, 100300165, 100300008, 100300032, 100300116, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100400038, 100200192, 100300212, 100300122, 100300169, 100400080, 100300066, 100300007, 100300076, 100200067, 100200055, 100200185, 100300146) then goto N72_2; + else goto N72_4; + +N72_2: + if attribute(catid) in (100300143, 100300032, 100300019, 100300126, 100400038, 100200067, 100200055, 100200185) then goto T72_1; + else goto N72_3; + +T72_1: + response = -0.0215257824; + goto D72; + +N72_3: + if attribute(catid) in (100300011, 100400141, 100300008, 100300004, 100400142, 100300073, 100400037, 100200192, 100300066, 100300076, 100300146) then goto T72_2; + else goto T72_3; + +T72_2: + response = 0.0024762462; + goto D72; + +T72_3: + response = 0.0073887199; + goto D72; + +N72_4: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100300093, 100200172, 100200068, 100300027, 100200053, 100300065, 100300209, 100300127, 100400079, 100200170, 100200087, 100300074, 100300200) then goto N72_5; + else goto N72_6; + +N72_5: + if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200034, 100300093, 100200172, 100300027, 100200053, 100300209, 100400079, 100300074, 100300200) then goto T72_4; + else goto T72_5; + +T72_4: + response = 0.0122813047; + goto D72; + +T72_5: + response = 0.0177730971; + goto D72; + +N72_6: + if attribute(catid) in (100200130, 100300121, 100200234, 100200193, 100200176, 100200028) then goto T72_6; + else goto T72_7; + +T72_6: + response = 0.0248372595; + goto D72; + +T72_7: + response = 0.0378712543; + goto D72; + +D72: + +tnscore = tnscore + response; + + /* Tree 74 of 200 */ +N73_1: + if attribute(catid) in (100200171, 100200186, 100300165, 100300102, 100300005, 100200068, 100300032, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100400038, 100300212, 100300122, 100400079, 100400080, 100200087, 100200067, 100200055, 100200232) then goto N73_2; + else goto N73_5; + +N73_2: + if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100300126, 100400038, 100300212, 100200067, 100200055, 100200232) then goto N73_3; + else goto N73_4; + +N73_3: + if attribute(catid) in (100300102, 100200068, 100300032, 100300019, 100200067, 100200055, 100200232) then goto T73_1; + else goto T73_2; + +T73_1: + response = -0.0209289749; + goto D73; + +T73_2: + response = -0.0056438478; + goto D73; + +N73_4: + if attribute(catid) in (100200186, 100300165, 100300121, 100200053, 100300004, 100400079, 100400080, 100200087) then goto T73_3; + else goto T73_4; + +T73_3: + response = 0.0024863738; + goto D73; + +T73_4: + response = 0.0054172149; + goto D73; + +N73_5: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300058, 100300166, 100400141, 100200052, 100200172, 100300027, 100300073, 100200192, 100300127, 100200170, 100300169, 100300066, 100300200, 100200028) then goto N73_6; + else goto N73_8; + +N73_6: + if attribute(catid) in (0, 100300014, 100300058, 100300166, 100400141, 100200052, 100200192, 100300066) then goto N73_7; + else goto T73_7; + +N73_7: + if attribute(catid) in (100300014, 100300058, 100300166, 100400141, 100200192) then goto T73_5; + else goto T73_6; + +T73_5: + response = 0.0098208012; + goto D73; + +T73_6: + response = 0.0120469551; + goto D73; + +T73_7: + response = 0.0162551324; + goto D73; + +N73_8: + if attribute(catid) in (100300077, 100200034, 100300093, 100300008, 100300116, 100200054, 100300065, 100300074, 100300007, 100200176) then goto T73_8; + else goto T73_9; + +T73_8: + response = 0.0250861627; + goto D73; + +T73_9: + response = 0.0414462132; + goto D73; + +D73: + +tnscore = tnscore + response; + + /* Tree 75 of 200 */ +N74_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100200034, 100200186, 100200052, 100300093, 100300102, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100200054, 100300073, 100400037, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100300076, 100200067, 100300006, 100300214) then goto N74_2; + else goto N74_6; + +N74_2: + if attribute(catid) in (100200052, 100200068, 100200234, 100300019, 100300004, 100300212, 100400079, 100300169, 100300074, 100200176, 100300076, 100200067, 100300214) then goto N74_3; + else goto N74_4; + +N74_3: + if attribute(catid) in (100200234, 100300019, 100300004, 100300212, 100300214) then goto T74_1; + else goto T74_2; + +T74_1: + response = -0.0247332845; + goto D74; + +T74_2: + response = -0.0060255621; + goto D74; + +N74_4: + if attribute(catid) in (0, 100300058, 100300077, 100200034, 100200186, 100300093, 100300102, 100300027, 100300121, 100200054, 100300073, 100400037, 100200192, 100300122, 100300127, 100300006) then goto N74_5; + else goto T74_5; + +N74_5: + if attribute(catid) in (100300058, 100200186, 100300093, 100300121, 100300073, 100400037, 100200192, 100300127) then goto T74_3; + else goto T74_4; + +T74_3: + response = 0.0043627132; + goto D74; + +T74_4: + response = 0.0088967157; + goto D74; + +T74_5: + response = 0.0143359261; + goto D74; + +N74_6: + if attribute(catid) in (100300143, 100200172, 100400038, 100200087, 100300200, 100200028, 100200185) then goto T74_6; + else goto N74_7; + +T74_6: + response = 0.0202781940; + goto D74; + +N74_7: + if attribute(catid) in (100300014, 100300166, 100400141, 100300165, 100300005, 100300116, 100300126, 100400142) then goto T74_7; + else goto T74_8; + +T74_7: + response = 0.0288593151; + goto D74; + +T74_8: + response = 0.0450652060; + goto D74; + +D74: + +tnscore = tnscore + response; + + /* Tree 76 of 200 */ +N75_1: + if attribute(catid) in (100200171, 100300011, 100300058, 100300013, 100300077, 100300166, 100200186, 100300093, 100300005, 100300008, 100300027, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300209, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100200176, 100200028, 100200185) then goto N75_2; + else goto N75_5; + +N75_2: + if attribute(catid) in (100300011, 100300013, 100300008, 100300027, 100300126, 100200054, 100400038, 100300209, 100300127, 100400080, 100200185) then goto N75_3; + else goto N75_4; + +N75_3: + if attribute(catid) in (100300011, 100300013, 100300008, 100300126, 100200054, 100200185) then goto T75_1; + else goto T75_2; + +T75_1: + response = -0.0134142246; + goto D75; + +T75_2: + response = -0.0021611460; + goto D75; + +N75_4: + if attribute(catid) in (100200171, 100200186, 100300093, 100400037, 100400079, 100200087, 100300074, 100200176, 100200028) then goto T75_3; + else goto T75_4; + +T75_3: + response = 0.0042397431; + goto D75; + +T75_4: + response = 0.0082573117; + goto D75; + +N75_5: + if attribute(catid) in (0, 100200130, 100300143, 100400141, 100300165, 100200052, 100200172, 100200068, 100300032, 100300116, 100200234, 100200053, 100300004, 100300065, 100300122, 100200170, 100300066, 100300076, 100300006) then goto N75_6; + else goto T75_8; + +N75_6: + if attribute(catid) in (0, 100200130, 100400141, 100300116, 100300004, 100300076, 100300006) then goto T75_5; + else goto N75_7; + +T75_5: + response = 0.0125577031; + goto D75; + +N75_7: + if attribute(catid) in (100300143, 100300165, 100200052, 100200172, 100300032, 100300065) then goto T75_6; + else goto T75_7; + +T75_6: + response = 0.0151707760; + goto D75; + +T75_7: + response = 0.0177925563; + goto D75; + +T75_8: + response = 0.0282720969; + goto D75; + +D75: + +tnscore = tnscore + response; + + /* Tree 77 of 200 */ +N76_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100300073, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N76_2; + else goto N76_7; + +N76_2: + if attribute(catid) in (100200171, 100300013, 100300093, 100300005, 100200068, 100300121, 100200234, 100300019, 100300212, 100300209, 100200170, 100200028, 100200067, 100200055, 100300006, 100300146) then goto N76_3; + else goto N76_4; + +N76_3: + if attribute(catid) in (100300013, 100300005, 100300121, 100300212, 100200067, 100200055) then goto T76_1; + else goto T76_2; + +T76_1: + response = -0.0177817471; + goto D76; + +T76_2: + response = -0.0032656602; + goto D76; + +N76_4: + if attribute(catid) in (0, 100200130, 100300014, 100300077, 100200034, 100200186, 100300165, 100200052, 100200172, 100300032, 100300027, 100300004, 100200192, 100300065, 100300122, 100300127, 100400079, 100300074, 100300066, 100300076) then goto N76_5; + else goto T76_6; + +N76_5: + if attribute(catid) in (100300014, 100200186, 100200052, 100300122, 100300066, 100300076) then goto T76_3; + else goto N76_6; + +T76_3: + response = 0.0049749252; + goto D76; + +N76_6: + if attribute(catid) in (100300077, 100200034, 100300165, 100200172, 100300027, 100300004, 100200192, 100400079, 100300074) then goto T76_4; + else goto T76_5; + +T76_4: + response = 0.0100440563; + goto D76; + +T76_5: + response = 0.0117011752; + goto D76; + +T76_6: + response = 0.0178896771; + goto D76; + +N76_7: + if attribute(catid) in (100300058, 100200054, 100400037, 100200193, 100300169, 100300200) then goto T76_7; + else goto T76_8; + +T76_7: + response = 0.0319705253; + goto D76; + +T76_8: + response = 0.0522115674; + goto D76; + +D76: + +tnscore = tnscore + response; + + /* Tree 78 of 200 */ +N77_1: + if attribute(catid) in (100300058, 100200034, 100400141, 100300093, 100300102, 100300005, 100300008, 100300004, 100200054, 100400038, 100300212, 100300122, 100200087, 100300074, 100200176, 100200055, 100300214) then goto N77_2; + else goto N77_4; + +N77_2: + if attribute(catid) in (100300005, 100300008, 100300212, 100200055, 100300214) then goto T77_1; + else goto N77_3; + +T77_1: + response = -0.0403440609; + goto D77; + +N77_3: + if attribute(catid) in (100300058, 100200034, 100300102, 100300004, 100200054, 100400038, 100200176) then goto T77_2; + else goto T77_3; + +T77_2: + response = -0.0114254470; + goto D77; + +T77_3: + response = -0.0039460534; + goto D77; + +N77_4: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300077, 100200052, 100200172, 100400142, 100300073, 100200193, 100200192, 100300065, 100300127, 100400079, 100200170, 100300045, 100200028, 100300006, 100200232) then goto N77_5; + else goto N77_7; + +N77_5: + if attribute(catid) in (0, 100300077, 100400142, 100300073, 100300065, 100300127, 100200170, 100300045, 100200028, 100300006) then goto N77_6; + else goto T77_6; + +N77_6: + if attribute(catid) in (100300077, 100300065, 100300127, 100200170, 100300006) then goto T77_4; + else goto T77_5; + +T77_4: + response = 0.0038995654; + goto D77; + +T77_5: + response = 0.0096106726; + goto D77; + +T77_6: + response = 0.0136466804; + goto D77; + +N77_7: + if attribute(catid) in (100300014, 100300013, 100300166, 100300165, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300126, 100400080, 100300066, 100300007, 100300200) then goto N77_8; + else goto T77_9; + +N77_8: + if attribute(catid) in (100300013, 100300166, 100200068, 100300027, 100300121, 100200234, 100200053, 100300126, 100400080, 100300007, 100300200) then goto T77_7; + else goto T77_8; + +T77_7: + response = 0.0192859199; + goto D77; + +T77_8: + response = 0.0245003908; + goto D77; + +T77_9: + response = 0.0334815162; + goto D77; + +D77: + +tnscore = tnscore + response; + + /* Tree 79 of 200 */ +N78_1: + if attribute(catid) in (100200171, 100200130, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100300165, 100200052, 100300093, 100300008, 100200068, 100300032, 100300027, 100300116, 100200053, 100300019, 100300126, 100300073, 100200193, 100300209, 100400079, 100200170, 100300066, 100200176, 100300076, 100200067, 100200055, 100300214) then goto N78_2; + else goto N78_5; + +N78_2: + if attribute(catid) in (100200052, 100300008, 100300032, 100300019, 100300126, 100300209, 100200176, 100300076, 100200067, 100200055, 100300214) then goto N78_3; + else goto N78_4; + +N78_3: + if attribute(catid) in (100300008, 100300032, 100300209, 100200176, 100300076, 100200067, 100200055, 100300214) then goto T78_1; + else goto T78_2; + +T78_1: + response = -0.0379621177; + goto D78; + +T78_2: + response = -0.0077279547; + goto D78; + +N78_4: + if attribute(catid) in (100200171, 100300058, 100300013, 100300077, 100200034, 100300027, 100200053) then goto T78_3; + else goto T78_4; + +T78_3: + response = 0.0040069447; + goto D78; + +T78_4: + response = 0.0080600184; + goto D78; + +N78_5: + if attribute(catid) in (0, 100300011, 100300014, 100400141, 100300102, 100300005, 100200172, 100300121, 100200234, 100400037, 100400038, 100200192, 100300065, 100400080, 100300074, 100300006, 100200232) then goto N78_6; + else goto N78_7; + +N78_6: + if attribute(catid) in (0, 100300011, 100300014, 100400141, 100300102, 100300005, 100200172, 100400038) then goto T78_5; + else goto T78_6; + +T78_5: + response = 0.0135824088; + goto D78; + +T78_6: + response = 0.0165574836; + goto D78; + +N78_7: + if attribute(catid) in (100300004, 100400142, 100300127, 100300169, 100200087, 100300007, 100200185) then goto T78_7; + else goto T78_8; + +T78_7: + response = 0.0214892901; + goto D78; + +T78_8: + response = 0.0327877321; + goto D78; + +D78: + +tnscore = tnscore + response; + + /* Tree 80 of 200 */ +N79_1: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300143, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200068, 100300032, 100300121, 100200234, 100200053, 100300004, 100300126, 100400142, 100300073, 100200193, 100200192, 100300065, 100300212, 100400079, 100300169, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214) then goto N79_2; + else goto N79_5; + +N79_2: + if attribute(catid) in (100200130, 100300058, 100300143, 100200186, 100200052, 100300093, 100300102, 100300005, 100300121, 100200234, 100300004, 100300126, 100200193, 100300212, 100200087, 100300007, 100200176, 100300045, 100300076, 100200067, 100200055, 100300214) then goto N79_3; + else goto N79_4; + +N79_3: + if attribute(catid) in (100300058, 100300143, 100300102, 100300005, 100200234, 100200193, 100200087, 100200176, 100300045, 100300076, 100200067, 100200055, 100300214) then goto T79_1; + else goto T79_2; + +T79_1: + response = -0.0107053090; + goto D79; + +T79_2: + response = 0.0003635835; + goto D79; + +N79_4: + if attribute(catid) in (100300014, 100300165, 100200068, 100200053, 100400142, 100300073, 100300065) then goto T79_3; + else goto T79_4; + +T79_3: + response = 0.0065823776; + goto D79; + +T79_4: + response = 0.0100610854; + goto D79; + +N79_5: + if attribute(catid) in (100300011, 100300077, 100300166, 100200172, 100400037, 100300122, 100300127) then goto T79_5; + else goto N79_6; + +T79_5: + response = 0.0175946260; + goto D79; + +N79_6: + if attribute(catid) in (100200171, 100200034, 100300027, 100300116, 100300209, 100200170, 100400080, 100200028, 100200185) then goto T79_6; + else goto T79_7; + +T79_6: + response = 0.0254523278; + goto D79; + +T79_7: + response = 0.0397162435; + goto D79; + +D79: + +tnscore = tnscore + response; + + /* Tree 81 of 200 */ +N80_1: + if attribute(catid) in (100300058, 100300013, 100300077, 100200186, 100300165, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100300121, 100200234, 100300019, 100400142, 100300073, 100200193, 100300212, 100400079, 100200087, 100300066, 100200176, 100300200, 100300045, 100200028, 100300076, 100200055, 100300006) then goto N80_2; + else goto N80_5; + +N80_2: + if attribute(catid) in (100300058, 100300013, 100200186, 100300008, 100300032, 100300027, 100300019, 100400142, 100300212, 100200087, 100200176, 100300200, 100300045, 100200055) then goto N80_3; + else goto N80_4; + +N80_3: + if attribute(catid) in (100300008, 100300032, 100300019, 100200176, 100300200, 100200055) then goto T80_1; + else goto T80_2; + +T80_1: + response = -0.0294108915; + goto D80; + +T80_2: + response = -0.0054927303; + goto D80; + +N80_4: + if attribute(catid) in (100300093, 100300116, 100300121, 100200234, 100300073, 100200193, 100400079, 100300066, 100300076, 100300006) then goto T80_3; + else goto T80_4; + +T80_3: + response = 0.0042404411; + goto D80; + +T80_4: + response = 0.0073750844; + goto D80; + +N80_5: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300166, 100200034, 100400141, 100300102, 100200068, 100200053, 100300004, 100200054, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080) then goto N80_6; + else goto N80_8; + +N80_6: + if attribute(catid) in (0, 100200171, 100300166, 100200034, 100300102, 100200053, 100300004, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080) then goto N80_7; + else goto T80_7; + +N80_7: + if attribute(catid) in (100200171, 100300166, 100300004, 100300065, 100300127, 100300169) then goto T80_5; + else goto T80_6; + +T80_5: + response = 0.0124777156; + goto D80; + +T80_6: + response = 0.0143158121; + goto D80; + +T80_7: + response = 0.0177029723; + goto D80; + +N80_8: + if attribute(catid) in (100300011, 100300143, 100200052, 100300126, 100300074, 100200067) then goto T80_8; + else goto T80_9; + +T80_8: + response = 0.0257271811; + goto D80; + +T80_9: + response = 0.0375978662; + goto D80; + +D80: + +tnscore = tnscore + response; + + /* Tree 82 of 200 */ +N81_1: + if attribute(catid) in (100300011, 100300058, 100300013, 100300143, 100200034, 100400141, 100300008, 100400038, 100300169, 100300200, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N81_2; + else goto N81_3; + +N81_2: + if attribute(catid) in (100300011, 100300013, 100300143, 100200034, 100300008, 100300200, 100200028, 100200067, 100200055, 100200232, 100300146) then goto T81_1; + else goto T81_2; + +T81_1: + response = -0.0179030459; + goto D81; + +T81_2: + response = -0.0025885619; + goto D81; + +N81_3: + if attribute(catid) in (0, 100300077, 100200186, 100300165, 100300102, 100300005, 100200068, 100300027, 100300116, 100300126, 100400142, 100300073, 100200192, 100300122, 100400079, 100200170, 100300074, 100300066) then goto N81_4; + else goto N81_5; + +N81_4: + if attribute(catid) in (0, 100200186, 100300165, 100300005, 100300027, 100300116, 100300126, 100400079) then goto T81_3; + else goto T81_4; + +T81_3: + response = 0.0078435164; + goto D81; + +T81_4: + response = 0.0118007064; + goto D81; + +N81_5: + if attribute(catid) in (100200171, 100200130, 100300014, 100300166, 100200172, 100300121, 100300004, 100200054, 100300065, 100300127, 100400080, 100200087, 100200185) then goto T81_5; + else goto N81_6; + +T81_5: + response = 0.0172799995; + goto D81; + +N81_6: + if attribute(catid) in (100200052, 100200234, 100200053, 100400037, 100200193, 100200176) then goto T81_6; + else goto T81_7; + +T81_6: + response = 0.0276688136; + goto D81; + +T81_7: + response = 0.0491582153; + goto D81; + +D81: + +tnscore = tnscore + response; + + /* Tree 83 of 200 */ +N82_1: + if attribute(catid) in (100200130, 100300014, 100300058, 100300077, 100200034, 100200186, 100400141, 100300165, 100300005, 100200172, 100300032, 100300116, 100200234, 100300004, 100400142, 100300073, 100200193, 100400038, 100200192, 100300212, 100300127, 100400079, 100200170, 100400080, 100200087, 100300007, 100200176, 100200028, 100300076, 100200055, 100200232, 100300214) then goto N82_2; + else goto N82_5; + +N82_2: + if attribute(catid) in (100200186, 100300032, 100300116, 100300073, 100400038, 100300212, 100400079, 100200087, 100300007, 100300076, 100200055, 100200232, 100300214) then goto N82_3; + else goto N82_4; + +N82_3: + if attribute(catid) in (100300032, 100300212, 100300076, 100200055, 100200232, 100300214) then goto T82_1; + else goto T82_2; + +T82_1: + response = -0.0300814303; + goto D82; + +T82_2: + response = -0.0044598873; + goto D82; + +N82_4: + if attribute(catid) in (100300058, 100300077, 100200034, 100400141, 100300005, 100300004, 100400142, 100200193, 100200192, 100300127, 100200170, 100400080, 100200176) then goto T82_3; + else goto T82_4; + +T82_3: + response = 0.0033831149; + goto D82; + +T82_4: + response = 0.0061763311; + goto D82; + +N82_5: + if attribute(catid) in (0, 100200171, 100300166, 100300143, 100200052, 100300102, 100200068, 100300027, 100200053, 100400037, 100300065, 100300209, 100300122) then goto N82_6; + else goto N82_7; + +N82_6: + if attribute(catid) in (0, 100300102, 100200053, 100400037, 100300209, 100300122) then goto T82_5; + else goto T82_6; + +T82_5: + response = 0.0108688948; + goto D82; + +T82_6: + response = 0.0140670577; + goto D82; + +N82_7: + if attribute(catid) in (100300121, 100200054, 100300169, 100300074, 100300066, 100300200, 100200185) then goto T82_7; + else goto T82_8; + +T82_7: + response = 0.0219282043; + goto D82; + +T82_8: + response = 0.0332033624; + goto D82; + +D82: + +tnscore = tnscore + response; + + /* Tree 84 of 200 */ +N83_1: + if attribute(catid) in (0, 100300011, 100300058, 100300013, 100300077, 100300166, 100400141, 100200052, 100300093, 100300005, 100200068, 100300032, 100300027, 100200053, 100200054, 100400037, 100200193, 100400038, 100300209, 100300122, 100300127, 100400079, 100200170, 100300066, 100300045, 100200185, 100300214, 100300146) then goto N83_2; + else goto N83_6; + +N83_2: + if attribute(catid) in (100300011, 100300013, 100300005, 100300032, 100300027, 100200193, 100300209, 100300122, 100200170, 100200185, 100300214, 100300146) then goto N83_3; + else goto N83_4; + +N83_3: + if attribute(catid) in (100300011, 100300005, 100300032, 100200193, 100300209, 100200185, 100300214, 100300146) then goto T83_1; + else goto T83_2; + +T83_1: + response = -0.0217026454; + goto D83; + +T83_2: + response = -0.0034858812; + goto D83; + +N83_4: + if attribute(catid) in (100300058, 100300077, 100400141, 100200068, 100200053, 100400079, 100300066) then goto T83_3; + else goto N83_5; + +T83_3: + response = 0.0030692700; + goto D83; + +N83_5: + if attribute(catid) in (0, 100400037) then goto T83_4; + else goto T83_5; + +T83_4: + response = 0.0062304681; + goto D83; + +T83_5: + response = 0.0091439421; + goto D83; + +N83_6: + if attribute(catid) in (100200171, 100200130, 100200034, 100200186, 100300165, 100200172, 100300116, 100200234, 100300073, 100200192, 100300065, 100300169, 100400080, 100200087, 100200028, 100300076, 100200232) then goto N83_7; + else goto N83_8; + +N83_7: + if attribute(catid) in (100200130, 100300165, 100200172, 100300116, 100200234, 100300073, 100200192, 100300169, 100200232) then goto T83_6; + else goto T83_7; + +T83_6: + response = 0.0117914211; + goto D83; + +T83_7: + response = 0.0156586974; + goto D83; + +N83_8: + if attribute(catid) in (100300121, 100300126, 100400142, 100300074, 100300006) then goto T83_8; + else goto T83_9; + +T83_8: + response = 0.0236611361; + goto D83; + +T83_9: + response = 0.0326251935; + goto D83; + +D83: + +tnscore = tnscore + response; + + /* Tree 85 of 200 */ +N84_1: + if attribute(catid) in (100300013, 100300008, 100300032, 100300121, 100200234, 100300004, 100300126, 100200054, 100200193, 100300212, 100300122, 100300127, 100400079, 100200087, 100200176, 100300045, 100200028, 100300076, 100200067, 100300006) then goto N84_2; + else goto N84_3; + +N84_2: + if attribute(catid) in (100300121, 100200234, 100300004, 100200054, 100300212, 100200087, 100300076, 100200067) then goto T84_1; + else goto T84_2; + +T84_1: + response = -0.0242668043; + goto D84; + +T84_2: + response = -0.0057454024; + goto D84; + +N84_3: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100300143, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100400142, 100300073, 100400037, 100400038, 100200192, 100300065, 100200170, 100300169, 100400080, 100300074, 100300007) then goto N84_4; + else goto N84_7; + +N84_4: + if attribute(catid) in (100200130, 100300166, 100300143, 100400141, 100300165, 100300093, 100200172, 100300027, 100300116, 100300073, 100400038, 100200170, 100300169, 100400080, 100300074) then goto N84_5; + else goto N84_6; + +N84_5: + if attribute(catid) in (100300143, 100400141, 100300093, 100300027, 100300116, 100300073, 100400080) then goto T84_3; + else goto T84_4; + +T84_3: + response = 0.0044087377; + goto D84; + +T84_4: + response = 0.0084781159; + goto D84; + +N84_6: + if attribute(catid) in (0, 100200068, 100400142, 100300065) then goto T84_5; + else goto T84_6; + +T84_5: + response = 0.0107211296; + goto D84; + +T84_6: + response = 0.0151177012; + goto D84; + +N84_7: + if attribute(catid) in (100300011, 100300014, 100300077, 100300102, 100200053, 100300209) then goto T84_7; + else goto T84_8; + +T84_7: + response = 0.0227209620; + goto D84; + +T84_8: + response = 0.0333058662; + goto D84; + +D84: + +tnscore = tnscore + response; + + /* Tree 86 of 200 */ +N85_1: + if attribute(catid) in (100300011, 100300014, 100300143, 100200052, 100300102, 100300005, 100300116, 100200053, 100300126, 100400038, 100200192, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300214) then goto N85_2; + else goto N85_3; + +N85_2: + if attribute(catid) in (100300143, 100300102, 100300005, 100300116, 100200053, 100300126, 100400038, 100300007, 100200028, 100300214) then goto T85_1; + else goto T85_2; + +T85_1: + response = -0.0112761132; + goto D85; + +T85_2: + response = 0.0005180964; + goto D85; + +N85_3: + if attribute(catid) in (0, 100200130, 100300166, 100300093, 100200172, 100300121, 100200234, 100400142, 100200054, 100300073, 100400037, 100300122, 100300127, 100400079, 100300169, 100400080, 100300076, 100200067) then goto N85_4; + else goto N85_6; + +N85_4: + if attribute(catid) in (0, 100300093, 100200172, 100300121, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100300076) then goto N85_5; + else goto T85_5; + +N85_5: + if attribute(catid) in (100300093, 100200172, 100200054, 100300073, 100300122, 100300127, 100400079, 100300169, 100300076) then goto T85_3; + else goto T85_4; + +T85_3: + response = 0.0062969186; + goto D85; + +T85_4: + response = 0.0079944471; + goto D85; + +T85_5: + response = 0.0107843133; + goto D85; + +N85_6: + if attribute(catid) in (100200171, 100300058, 100300013, 100300077, 100200034, 100200186, 100400141, 100300165, 100200068, 100300027, 100300065, 100300212, 100200170, 100200087, 100300006) then goto T85_6; + else goto T85_7; + +T85_6: + response = 0.0156933768; + goto D85; + +T85_7: + response = 0.0301329171; + goto D85; + +D85: + +tnscore = tnscore + response; + + /* Tree 87 of 200 */ +N86_1: + if attribute(catid) in (100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100200034, 100400141, 100200068, 100300116, 100200234, 100300019, 100400037, 100200193, 100400038, 100200192, 100300212, 100300209, 100300127, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100200067, 100200185, 100300006, 100300214) then goto N86_2; + else goto N86_4; + +N86_2: + if attribute(catid) in (100300011, 100300013, 100300116, 100400038, 100300212, 100300209, 100200087, 100200176, 100200067, 100200185, 100300214) then goto T86_1; + else goto N86_3; + +T86_1: + response = -0.0153646097; + goto D86; + +N86_3: + if attribute(catid) in (100200171, 100300014, 100300058, 100200034, 100200068, 100200234, 100300019, 100200193, 100400080, 100300066, 100300007) then goto T86_2; + else goto T86_3; + +T86_2: + response = -0.0021431391; + goto D86; + +T86_3: + response = 0.0027978033; + goto D86; + +N86_4: + if attribute(catid) in (0, 100200130, 100300166, 100200186, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100300073, 100300065, 100300122, 100400079, 100200170, 100300045, 100200028, 100300076) then goto N86_5; + else goto T86_7; + +N86_5: + if attribute(catid) in (0, 100200130, 100200186, 100300165, 100300102, 100300005, 100200172, 100300027, 100300121, 100200053, 100300073, 100300045, 100200028) then goto N86_6; + else goto T86_6; + +N86_6: + if attribute(catid) in (100200130, 100300165, 100300102, 100300027, 100300121, 100200053, 100300073, 100300045, 100200028) then goto T86_4; + else goto T86_5; + +T86_4: + response = 0.0092257166; + goto D86; + +T86_5: + response = 0.0114031662; + goto D86; + +T86_6: + response = 0.0178845925; + goto D86; + +T86_7: + response = 0.0516414799; + goto D86; + +D86: + +tnscore = tnscore + response; + + /* Tree 88 of 200 */ +N87_1: + if attribute(catid) in (100300032, 100200234, 100300212, 100300209, 100300045, 100200055, 100300146) then goto T87_1; + else goto N87_2; + +T87_1: + response = -0.0281888364; + goto D87; + +N87_2: + if attribute(catid) in (0, 100200130, 100300058, 100300013, 100300077, 100300166, 100300165, 100200052, 100200172, 100200068, 100300027, 100300019, 100300004, 100400142, 100400037, 100200192, 100300122, 100400079, 100200170, 100300169, 100400080, 100300074, 100200176, 100200028, 100300076, 100300006) then goto N87_3; + else goto N87_6; + +N87_3: + if attribute(catid) in (100300058, 100300077, 100300165, 100200068, 100300019, 100400037, 100200192, 100300122, 100400079, 100200170, 100300169, 100400080, 100200028, 100300006) then goto N87_4; + else goto N87_5; + +N87_4: + if attribute(catid) in (100300058, 100200068, 100300019, 100400037, 100200192, 100400079, 100200170, 100200028, 100300006) then goto T87_2; + else goto T87_3; + +T87_2: + response = -0.0005203242; + goto D87; + +T87_3: + response = 0.0053481381; + goto D87; + +N87_5: + if attribute(catid) in (100200130, 100200052, 100200172, 100300074) then goto T87_4; + else goto T87_5; + +T87_4: + response = 0.0070360348; + goto D87; + +T87_5: + response = 0.0095044104; + goto D87; + +N87_6: + if attribute(catid) in (100200171, 100400141, 100300093, 100300008, 100300121, 100300126, 100300073, 100300127, 100200087, 100300066) then goto T87_6; + else goto N87_7; + +T87_6: + response = 0.0150338406; + goto D87; + +N87_7: + if attribute(catid) in (100300011, 100300014, 100200186, 100300116, 100200053, 100400038, 100300065, 100300200, 100200185, 100200232) then goto T87_7; + else goto T87_8; + +T87_7: + response = 0.0219656474; + goto D87; + +T87_8: + response = 0.0303343362; + goto D87; + +D87: + +tnscore = tnscore + response; + + /* Tree 89 of 200 */ +N88_1: + if attribute(catid) in (100300011, 100300058, 100200034, 100300165, 100300093, 100300102, 100300032, 100300027, 100300121, 100300019, 100300073, 100400037, 100200193, 100300127, 100300076, 100200232, 100300214) then goto N88_2; + else goto N88_3; + +N88_2: + if attribute(catid) in (100300011, 100300058, 100200034, 100300093, 100300102, 100300032, 100200193, 100300076, 100200232, 100300214) then goto T88_1; + else goto T88_2; + +T88_1: + response = -0.0146956932; + goto D88; + +T88_2: + response = -0.0021492979; + goto D88; + +N88_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100200068, 100300116, 100300004, 100400142, 100200054, 100400038, 100200192, 100300065, 100300122, 100400079, 100200170, 100300169, 100400080, 100200087, 100200185, 100300006) then goto N88_4; + else goto N88_7; + +N88_4: + if attribute(catid) in (100200130, 100300166, 100200052, 100200172, 100200068, 100300004, 100400142, 100400038, 100300065, 100400079, 100400080, 100200087) then goto N88_5; + else goto N88_6; + +N88_5: + if attribute(catid) in (100200130, 100200172, 100200068, 100400142, 100400079, 100400080) then goto T88_3; + else goto T88_4; + +T88_3: + response = 0.0060613479; + goto D88; + +T88_4: + response = 0.0086309145; + goto D88; + +N88_6: + if attribute(catid) in (0, 100200186, 100400141, 100300116, 100200054, 100300122, 100300169) then goto T88_5; + else goto T88_6; + +T88_5: + response = 0.0111211317; + goto D88; + +T88_6: + response = 0.0167951946; + goto D88; + +N88_7: + if attribute(catid) in (100300143, 100300005, 100200234, 100200053, 100300126, 100300212, 100300074, 100300066, 100300200, 100200028, 100200067) then goto N88_8; + else goto T88_9; + +N88_8: + if attribute(catid) in (100200053, 100300126, 100300074, 100300066, 100200028) then goto T88_7; + else goto T88_8; + +T88_7: + response = 0.0204993878; + goto D88; + +T88_8: + response = 0.0274572562; + goto D88; + +T88_9: + response = 0.0495878122; + goto D88; + +D88: + +tnscore = tnscore + response; + + /* Tree 90 of 200 */ +N89_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100300166, 100300143, 100200034, 100300165, 100200052, 100300093, 100200172, 100300032, 100300116, 100200053, 100300019, 100400142, 100400037, 100400038, 100200192, 100300209, 100300122, 100300127, 100400079, 100300074, 100200176, 100300200, 100200028, 100200232) then goto N89_2; + else goto N89_5; + +N89_2: + if attribute(catid) in (100300011, 100200034, 100300032, 100300019, 100400037, 100300127, 100400079, 100300074, 100200176, 100300200, 100200028, 100200232) then goto N89_3; + else goto N89_4; + +N89_3: + if attribute(catid) in (100300032, 100300019, 100400037, 100300200, 100200028, 100200232) then goto T89_1; + else goto T89_2; + +T89_1: + response = -0.0422634866; + goto D89; + +T89_2: + response = -0.0088083561; + goto D89; + +N89_4: + if attribute(catid) in (100300014, 100300143, 100300165, 100300093, 100200172, 100200192, 100300209, 100300122) then goto T89_3; + else goto T89_4; + +T89_3: + response = -0.0004084485; + goto D89; + +T89_4: + response = 0.0040562959; + goto D89; + +N89_5: + if attribute(catid) in (0, 100200130, 100300077, 100400141, 100300005, 100300027, 100200234, 100300004, 100300126, 100300073, 100200193, 100300065, 100300212, 100200170, 100300169, 100200087, 100300066, 100300045, 100300076) then goto N89_6; + else goto N89_8; + +N89_6: + if attribute(catid) in (100200130, 100400141, 100300005, 100300065, 100300212, 100200170, 100300169, 100300045) then goto T89_5; + else goto N89_7; + +T89_5: + response = 0.0081251015; + goto D89; + +N89_7: + if attribute(catid) in (0, 100200234) then goto T89_6; + else goto T89_7; + +T89_6: + response = 0.0119875946; + goto D89; + +T89_7: + response = 0.0148183346; + goto D89; + +N89_8: + if attribute(catid) in (100200171, 100200186, 100200068, 100300121, 100300006) then goto T89_8; + else goto T89_9; + +T89_8: + response = 0.0204518722; + goto D89; + +T89_9: + response = 0.0367144755; + goto D89; + +D89: + +tnscore = tnscore + response; + + /* Tree 91 of 200 */ +N90_1: + if attribute(catid) in (100300058, 100300013, 100200034, 100300165, 100300093, 100300005, 100200172, 100300027, 100200053, 100300019, 100300126, 100300073, 100300212, 100300209, 100300169, 100400080, 100200087, 100300007, 100300045, 100200067, 100200055, 100200232, 100300214) then goto N90_2; + else goto N90_5; + +N90_2: + if attribute(catid) in (100300013, 100200034, 100300005, 100300027, 100300019, 100300212, 100200067, 100200055, 100200232, 100300214) then goto N90_3; + else goto N90_4; + +N90_3: + if attribute(catid) in (100300013, 100300005, 100300019, 100300212, 100200055, 100200232, 100300214) then goto T90_1; + else goto T90_2; + +T90_1: + response = -0.0410131690; + goto D90; + +T90_2: + response = -0.0176924609; + goto D90; + +N90_4: + if attribute(catid) in (100300058, 100300165, 100300093, 100200053, 100300126, 100300073, 100300209, 100300007, 100300045) then goto T90_3; + else goto T90_4; + +T90_3: + response = -0.0030372433; + goto D90; + +T90_4: + response = 0.0030417758; + goto D90; + +N90_5: + if attribute(catid) in (0, 100200171, 100300077, 100300166, 100200186, 100400141, 100200052, 100200068, 100300116, 100300121, 100200054, 100200192, 100300065, 100300122, 100300127, 100200170, 100300066, 100200028, 100300076, 100300006) then goto N90_6; + else goto N90_8; + +N90_6: + if attribute(catid) in (0, 100300077, 100300166, 100200186, 100200052, 100300121, 100300065, 100300122, 100300066, 100300076, 100300006) then goto N90_7; + else goto T90_7; + +N90_7: + if attribute(catid) in (100300077, 100300166, 100300121, 100300122, 100300066, 100300076) then goto T90_5; + else goto T90_6; + +T90_5: + response = 0.0066574572; + goto D90; + +T90_6: + response = 0.0093092556; + goto D90; + +T90_7: + response = 0.0138817683; + goto D90; + +N90_8: + if attribute(catid) in (100300011, 100300102, 100300032, 100200234, 100300004, 100400142, 100400079, 100300074, 100200176, 100300200) then goto T90_8; + else goto N90_9; + +T90_8: + response = 0.0211738270; + goto D90; + +N90_9: + if attribute(catid) in (100200130, 100300143, 100200193, 100400038) then goto T90_9; + else goto T90_10; + +T90_9: + response = 0.0270451990; + goto D90; + +T90_10: + response = 0.0447659217; + goto D90; + +D90: + +tnscore = tnscore + response; + + /* Tree 92 of 200 */ +N91_1: + if attribute(catid) in (100300014, 100300013, 100300008, 100200068, 100200234, 100300004, 100200193, 100300212, 100300209, 100400080, 100200087, 100200176, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N91_2; + else goto N91_4; + +N91_2: + if attribute(catid) in (100300013, 100300008, 100200068, 100300212, 100300209, 100200028, 100200067, 100200055, 100200185) then goto N91_3; + else goto T91_3; + +N91_3: + if attribute(catid) in (100300013, 100300008, 100300209, 100200028, 100200067, 100200055) then goto T91_1; + else goto T91_2; + +T91_1: + response = -0.0410751689; + goto D91; + +T91_2: + response = -0.0211139959; + goto D91; + +T91_3: + response = -0.0060525832; + goto D91; + +N91_4: + if attribute(catid) in (100200171, 100200130, 100300077, 100300143, 100400141, 100200052, 100300102, 100200172, 100300073, 100400037, 100300122, 100300074, 100300200, 100300045) then goto N91_5; + else goto N91_6; + +N91_5: + if attribute(catid) in (100200171, 100300077, 100300143, 100200052, 100300102, 100200172, 100400037, 100300045) then goto T91_4; + else goto T91_5; + +T91_4: + response = 0.0022476773; + goto D91; + +T91_5: + response = 0.0072496833; + goto D91; + +N91_6: + if attribute(catid) in (0, 100300011, 100300166, 100200186, 100300165, 100300005, 100300027, 100300116, 100200053, 100300126, 100300127, 100400079, 100200170, 100300169, 100300066, 100300076) then goto N91_7; + else goto N91_9; + +N91_7: + if attribute(catid) in (0, 100300166, 100300165, 100300005, 100200053, 100400079, 100300169) then goto N91_8; + else goto T91_8; + +N91_8: + if attribute(catid) in (100300166, 100300165, 100300005, 100200053, 100400079, 100300169) then goto T91_6; + else goto T91_7; + +T91_6: + response = 0.0108065489; + goto D91; + +T91_7: + response = 0.0121894583; + goto D91; + +T91_8: + response = 0.0150333423; + goto D91; + +N91_9: + if attribute(catid) in (100200034, 100300032, 100400142, 100400038, 100200192, 100300065) then goto T91_9; + else goto T91_10; + +T91_9: + response = 0.0204411972; + goto D91; + +T91_10: + response = 0.0355402269; + goto D91; + +D91: + +tnscore = tnscore + response; + + /* Tree 93 of 200 */ +N92_1: + if attribute(catid) in (100300058, 100300143, 100200034, 100200186, 100400141, 100300093, 100300102, 100300027, 100300004, 100200054, 100200193, 100400038, 100300065, 100300212, 100300209, 100300007, 100200028, 100200067, 100200185, 100300146) then goto N92_2; + else goto N92_3; + +N92_2: + if attribute(catid) in (100300058, 100400038, 100300065, 100300209, 100200028, 100200067, 100200185) then goto T92_1; + else goto T92_2; + +T92_1: + response = -0.0271979436; + goto D92; + +T92_2: + response = -0.0038611614; + goto D92; + +N92_3: + if attribute(catid) in (0, 100300014, 100200052, 100200172, 100400142, 100200192, 100300122, 100300127, 100400079, 100200170, 100300074, 100300076) then goto N92_4; + else goto N92_5; + +N92_4: + if attribute(catid) in (0, 100200172, 100400142, 100200192, 100300122, 100200170, 100300076) then goto T92_3; + else goto T92_4; + +T92_3: + response = 0.0073159372; + goto D92; + +T92_4: + response = 0.0115841741; + goto D92; + +N92_5: + if attribute(catid) in (100200171, 100200130, 100300013, 100300166, 100300008, 100200068, 100300116, 100300121, 100200053, 100300126, 100300073, 100300169, 100400080, 100200087, 100300066, 100200176) then goto N92_6; + else goto T92_7; + +N92_6: + if attribute(catid) in (100200130, 100300013, 100200068, 100300121, 100200053, 100300073, 100300169) then goto T92_5; + else goto T92_6; + +T92_5: + response = 0.0135762410; + goto D92; + +T92_6: + response = 0.0164515309; + goto D92; + +T92_7: + response = 0.0222390758; + goto D92; + +D92: + +tnscore = tnscore + response; + + /* Tree 94 of 200 */ +N93_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300166, 100300143, 100400141, 100200052, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300209, 100300122, 100200170, 100200087, 100300074, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100300006, 100300214) then goto N93_2; + else goto N93_6; + +N93_2: + if attribute(catid) in (100300011, 100300166, 100300143, 100300008, 100400142, 100300073, 100200193, 100300209, 100300007, 100200176, 100300214) then goto N93_3; + else goto N93_4; + +N93_3: + if attribute(catid) in (100300143, 100200193, 100300007, 100200176, 100300214) then goto T93_1; + else goto T93_2; + +T93_1: + response = -0.0297063352; + goto D93; + +T93_2: + response = -0.0033584809; + goto D93; + +N93_4: + if attribute(catid) in (0, 100200130, 100400141, 100200052, 100200172, 100300027, 100300116, 100200234, 100200053, 100200054, 100400038, 100200192, 100300122, 100200170, 100300076, 100300006) then goto N93_5; + else goto T93_5; + +N93_5: + if attribute(catid) in (100200130, 100200052, 100200172, 100300027, 100300116, 100200234, 100200053, 100200054, 100400038, 100200192, 100200170, 100300006) then goto T93_3; + else goto T93_4; + +T93_3: + response = 0.0038378464; + goto D93; + +T93_4: + response = 0.0065133022; + goto D93; + +T93_5: + response = 0.0116394129; + goto D93; + +N93_6: + if attribute(catid) in (100300014, 100300077, 100200034, 100200186, 100300165, 100300121, 100300004, 100300126, 100300065, 100300127, 100300169, 100400080, 100300066, 100200185, 100200232) then goto N93_7; + else goto T93_8; + +N93_7: + if attribute(catid) in (100300014, 100300077, 100300004, 100300127, 100300169, 100300066, 100200185, 100200232) then goto T93_6; + else goto T93_7; + +T93_6: + response = 0.0160520754; + goto D93; + +T93_7: + response = 0.0218263304; + goto D93; + +T93_8: + response = 0.0403414109; + goto D93; + +D93: + +tnscore = tnscore + response; + + /* Tree 95 of 200 */ +N94_1: + if attribute(catid) in (100300077, 100300143, 100200034, 100300102, 100300005, 100300008, 100300116, 100300121, 100300004, 100300126, 100300073, 100400038, 100300212, 100300209, 100300007, 100200067, 100200055, 100300214, 100300146) then goto N94_2; + else goto N94_4; + +N94_2: + if attribute(catid) in (100300143, 100200034, 100300005, 100300209, 100200067, 100200055, 100300214) then goto T94_1; + else goto N94_3; + +T94_1: + response = -0.0407918257; + goto D94; + +N94_3: + if attribute(catid) in (100300102, 100300008, 100300116, 100300004, 100400038, 100300007, 100300146) then goto T94_2; + else goto T94_3; + +T94_2: + response = -0.0072483912; + goto D94; + +T94_3: + response = 0.0008912521; + goto D94; + +N94_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100400141, 100200052, 100300093, 100200172, 100200068, 100300032, 100300027, 100200053, 100400142, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100200176, 100200028, 100300076) then goto N94_5; + else goto N94_8; + +N94_5: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100300093, 100200172, 100200053, 100300122, 100300127, 100400079, 100300169, 100200176, 100200028) then goto N94_6; + else goto N94_7; + +N94_6: + if attribute(catid) in (100200171, 100300166, 100200186, 100300093, 100200053, 100300127, 100300169, 100200176, 100200028) then goto T94_4; + else goto T94_5; + +T94_4: + response = 0.0062732454; + goto D94; + +T94_5: + response = 0.0100875564; + goto D94; + +N94_7: + if attribute(catid) in (100200068, 100400142, 100300065, 100400080, 100300074) then goto T94_6; + else goto T94_7; + +T94_6: + response = 0.0120096679; + goto D94; + +T94_7: + response = 0.0160208786; + goto D94; + +N94_8: + if attribute(catid) in (100300014, 100300165, 100200234, 100200054, 100200193, 100200087, 100300200, 100200185) then goto T94_8; + else goto T94_9; + +T94_8: + response = 0.0234816349; + goto D94; + +T94_9: + response = 0.0499780329; + goto D94; + +D94: + +tnscore = tnscore + response; + + /* Tree 96 of 200 */ +N95_1: + if attribute(catid) in (100300014, 100400141, 100300093, 100300102, 100300008, 100200068, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100400038, 100300127, 100200087, 100300074, 100200176, 100300200, 100300045, 100200028, 100200067, 100300006, 100200232, 100300214) then goto N95_2; + else goto N95_4; + +N95_2: + if attribute(catid) in (100300102, 100300008, 100200068, 100300019, 100300004, 100400038, 100300127, 100200176, 100300200, 100300045, 100200067, 100300006, 100200232, 100300214) then goto T95_1; + else goto N95_3; + +T95_1: + response = -0.0139227136; + goto D95; + +N95_3: + if attribute(catid) in (100400142, 100300074) then goto T95_2; + else goto T95_3; + +T95_2: + response = -0.0037315997; + goto D95; + +T95_3: + response = 0.0024425812; + goto D95; + +N95_4: + if attribute(catid) in (0, 100200171, 100200130, 100300077, 100300166, 100300165, 100200052, 100300073, 100300065, 100300212, 100400079, 100300169, 100300007) then goto N95_5; + else goto N95_6; + +N95_5: + if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100300165, 100200052, 100300212, 100400079) then goto T95_4; + else goto T95_5; + +T95_4: + response = 0.0062742131; + goto D95; + +T95_5: + response = 0.0107218180; + goto D95; + +N95_6: + if attribute(catid) in (100300058, 100300013, 100300143, 100200034, 100200186, 100300005, 100200172, 100300027, 100300121, 100200192, 100300209, 100300122, 100200170, 100400080, 100300066) then goto T95_6; + else goto T95_7; + +T95_6: + response = 0.0153086152; + goto D95; + +T95_7: + response = 0.0249190643; + goto D95; + +D95: + +tnscore = tnscore + response; + + /* Tree 97 of 200 */ +N96_1: + if attribute(catid) in (100300143, 100200186, 100200052, 100300093, 100300102, 100300008, 100300121, 100200234, 100200053, 100300019, 100300004, 100400038, 100300065, 100300212, 100200087, 100300200, 100200028, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N96_2; + else goto N96_3; + +N96_2: + if attribute(catid) in (100300143, 100300102, 100300008, 100300121, 100300019, 100300004, 100400038, 100200067, 100200055, 100300006, 100300214, 100300146) then goto T96_1; + else goto T96_2; + +T96_1: + response = -0.0160264772; + goto D96; + +T96_2: + response = -0.0023817409; + goto D96; + +N96_3: + if attribute(catid) in (0, 100300013, 100300166, 100200034, 100300032, 100400142, 100300073, 100200193, 100200192, 100400079, 100200170, 100300066) then goto N96_4; + else goto N96_5; + +N96_4: + if attribute(catid) in (0, 100300013, 100300073, 100200192, 100400079, 100300066) then goto T96_3; + else goto T96_4; + +T96_3: + response = 0.0078781947; + goto D96; + +T96_4: + response = 0.0111361463; + goto D96; + +N96_5: + if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300077, 100300165, 100200172, 100200068, 100300027, 100300126, 100200054, 100300209, 100300122, 100300169, 100400080, 100300074, 100200176) then goto N96_6; + else goto T96_7; + +N96_6: + if attribute(catid) in (100300011, 100200130, 100300014, 100300077, 100300165, 100200054, 100300209, 100300122, 100300169, 100200176) then goto T96_5; + else goto T96_6; + +T96_5: + response = 0.0143761703; + goto D96; + +T96_6: + response = 0.0180005310; + goto D96; + +T96_7: + response = 0.0280056529; + goto D96; + +D96: + +tnscore = tnscore + response; + + /* Tree 98 of 200 */ +N97_1: + if attribute(catid) in (100300011, 100300013, 100300166, 100200034, 100200052, 100300008, 100200068, 100300116, 100300121, 100300019, 100200193, 100200192, 100300065, 100300209, 100300127, 100300074, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214, 100300146) then goto N97_2; + else goto N97_3; + +N97_2: + if attribute(catid) in (100300011, 100200034, 100200068, 100300019, 100300209, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100300214, 100300146) then goto T97_1; + else goto T97_2; + +T97_1: + response = -0.0143446006; + goto D97; + +T97_2: + response = -0.0005591090; + goto D97; + +N97_3: + if attribute(catid) in (0, 100200171, 100300014, 100300077, 100300143, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100300027, 100300004, 100300126, 100400142, 100200054, 100300073, 100400038, 100400079, 100200170, 100300169, 100400080, 100300066, 100200028, 100200185, 100300006) then goto N97_4; + else goto N97_6; + +N97_4: + if attribute(catid) in (0, 100200171, 100200186, 100400141, 100300165, 100300102, 100300005, 100200172, 100300027, 100200054, 100300073, 100400038, 100300169, 100400080, 100300066, 100200028) then goto N97_5; + else goto T97_5; + +N97_5: + if attribute(catid) in (100200186, 100400141, 100300005, 100200172, 100200054, 100300073, 100300169, 100300066) then goto T97_3; + else goto T97_4; + +T97_3: + response = 0.0063901469; + goto D97; + +T97_4: + response = 0.0098655154; + goto D97; + +T97_5: + response = 0.0150886566; + goto D97; + +N97_6: + if attribute(catid) in (100200130, 100300058, 100300093, 100300032, 100300122) then goto T97_6; + else goto T97_7; + +T97_6: + response = 0.0218922437; + goto D97; + +T97_7: + response = 0.0331326520; + goto D97; + +D97: + +tnscore = tnscore + response; + + /* Tree 99 of 200 */ +N98_1: + if attribute(catid) in (100200171, 100300011, 100300013, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300027, 100300116, 100200054, 100300073, 100400037, 100200193, 100200170, 100300066, 100300007, 100300200, 100300045, 100300076, 100200055) then goto N98_2; + else goto N98_3; + +N98_2: + if attribute(catid) in (100300011, 100200186, 100300102, 100300008, 100400037, 100200193, 100200170, 100300007, 100300200, 100200055) then goto T98_1; + else goto T98_2; + +T98_1: + response = -0.0100694371; + goto D98; + +T98_2: + response = -0.0013265371; + goto D98; + +N98_3: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300166, 100300165, 100300005, 100300121, 100200234, 100300004, 100300126, 100400142, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100300074, 100200028, 100200067, 100300006) then goto N98_4; + else goto N98_5; + +N98_4: + if attribute(catid) in (0, 100300058, 100300005, 100300121, 100200234, 100300065, 100300212, 100300127) then goto T98_3; + else goto T98_4; + +T98_3: + response = 0.0083645817; + goto D98; + +T98_4: + response = 0.0130866864; + goto D98; + +N98_5: + if attribute(catid) in (100300077, 100200068, 100200053, 100400080, 100200087, 100200176) then goto T98_5; + else goto T98_6; + +T98_5: + response = 0.0197130039; + goto D98; + +T98_6: + response = 0.0369209199; + goto D98; + +D98: + +tnscore = tnscore + response; + + /* Tree 100 of 200 */ +N99_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100300027, 100200234, 100200053, 100300004, 100200054, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100400080, 100200087, 100300074, 100300007, 100300045, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N99_2; + else goto N99_6; + +N99_2: + if attribute(catid) in (100300011, 100300143, 100200052, 100300093, 100200054, 100200193, 100300209, 100300074, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N99_3; + else goto N99_4; + +N99_3: + if attribute(catid) in (100300143, 100300093, 100200193, 100300209, 100300074, 100200055, 100200185, 100300214) then goto T99_1; + else goto T99_2; + +T99_1: + response = -0.0161446372; + goto D99; + +T99_2: + response = -0.0058395053; + goto D99; + +N99_4: + if attribute(catid) in (0, 100300014, 100200186, 100400141, 100300102, 100200234, 100300004, 100300073, 100400037, 100300212, 100300122, 100300127, 100300045, 100300076) then goto N99_5; + else goto T99_5; + +N99_5: + if attribute(catid) in (100300014, 100200186, 100400141, 100300102, 100200234, 100300004, 100300073, 100400037, 100300212, 100300045) then goto T99_3; + else goto T99_4; + +T99_3: + response = 0.0018639770; + goto D99; + +T99_4: + response = 0.0041634423; + goto D99; + +T99_5: + response = 0.0073459177; + goto D99; + +N99_6: + if attribute(catid) in (100200130, 100300058, 100200068, 100300032, 100300116, 100300121, 100300126, 100400142, 100300065, 100400079, 100200170, 100300169, 100300066) then goto T99_6; + else goto N99_7; + +T99_6: + response = 0.0148443276; + goto D99; + +N99_7: + if attribute(catid) in (100200034, 100300165, 100200172) then goto T99_7; + else goto T99_8; + +T99_7: + response = 0.0223682677; + goto D99; + +T99_8: + response = 0.0294343337; + goto D99; + +D99: + +tnscore = tnscore + response; + + /* Tree 101 of 200 */ +N100_1: + if attribute(catid) in (100300011, 100300143, 100300102, 100200234, 100300019, 100400037, 100300074, 100300066, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214) then goto N100_2; + else goto N100_3; + +N100_2: + if attribute(catid) in (100300143, 100200234, 100300019, 100200055, 100300214) then goto T100_1; + else goto T100_2; + +T100_1: + response = -0.0247867880; + goto D100; + +T100_2: + response = -0.0075505833; + goto D100; + +N100_3: + if attribute(catid) in (0, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300027, 100300116, 100200053, 100400142, 100200054, 100300073, 100400038, 100200192, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300007, 100300076) then goto N100_4; + else goto N100_7; + +N100_4: + if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200034, 100300165, 100200052, 100300093, 100300027, 100300116, 100200053, 100300073, 100400079, 100400080, 100200087, 100300076) then goto N100_5; + else goto N100_6; + +N100_5: + if attribute(catid) in (100200130, 100300077, 100300093, 100300027, 100200053, 100400079, 100200087, 100300076) then goto T100_3; + else goto T100_4; + +T100_3: + response = 0.0019910708; + goto D100; + +T100_4: + response = 0.0076733873; + goto D100; + +N100_6: + if attribute(catid) in (100300014, 100400141, 100200172, 100400142, 100300065, 100300122, 100300007) then goto T100_5; + else goto T100_6; + +T100_5: + response = 0.0110065874; + goto D100; + +T100_6: + response = 0.0140312744; + goto D100; + +N100_7: + if attribute(catid) in (100200171, 100300121, 100300004, 100300126, 100200193, 100300209, 100300127, 100200170, 100200176, 100200028) then goto T100_7; + else goto T100_8; + +T100_7: + response = 0.0200792046; + goto D100; + +T100_8: + response = 0.0371851273; + goto D100; + +D100: + +tnscore = tnscore + response; + + /* Tree 102 of 200 */ +N101_1: + if attribute(catid) in (100300011, 100300014, 100300077, 100300165, 100300093, 100300005, 100200068, 100300032, 100300121, 100200234, 100300126, 100200054, 100400037, 100200193, 100400038, 100300122, 100200087, 100200176, 100300200, 100300076, 100200067, 100200232, 100300214) then goto N101_2; + else goto N101_4; + +N101_2: + if attribute(catid) in (100300014, 100300165, 100300093, 100200068, 100300032, 100200193, 100200087, 100200176, 100200067, 100200232, 100300214) then goto N101_3; + else goto T101_3; + +N101_3: + if attribute(catid) in (100300014, 100300093, 100300032, 100200176, 100200067, 100200232, 100300214) then goto T101_1; + else goto T101_2; + +T101_1: + response = -0.0238697728; + goto D101; + +T101_2: + response = -0.0068920318; + goto D101; + +T101_3: + response = -0.0008192848; + goto D101; + +N101_4: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200034, 100200186, 100400141, 100200052, 100200172, 100300027, 100300116, 100200053, 100400142, 100300073, 100200192, 100300212, 100300127, 100200170, 100300074, 100300066, 100300007, 100200028) then goto N101_5; + else goto N101_7; + +N101_5: + if attribute(catid) in (100200130, 100400141, 100200052, 100300027, 100400142, 100200192, 100300127, 100200170, 100300074, 100300066) then goto T101_4; + else goto N101_6; + +T101_4: + response = 0.0063630817; + goto D101; + +N101_6: + if attribute(catid) in (100200171, 100300166, 100200034, 100200186, 100200053, 100300073, 100300007) then goto T101_5; + else goto T101_6; + +T101_5: + response = 0.0102002983; + goto D101; + +T101_6: + response = 0.0117073770; + goto D101; + +N101_7: + if attribute(catid) in (100300008, 100300004, 100300065, 100400079, 100300169, 100400080, 100200185, 100300006) then goto T101_7; + else goto T101_8; + +T101_7: + response = 0.0199785583; + goto D101; + +T101_8: + response = 0.0332135569; + goto D101; + +D101: + +tnscore = tnscore + response; + + /* Tree 103 of 200 */ +N102_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100400141, 100300165, 100300093, 100300005, 100200172, 100200068, 100200053, 100300004, 100400142, 100200192, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232) then goto N102_2; + else goto N102_6; + +N102_2: + if attribute(catid) in (100300011, 100300014, 100300004, 100400142, 100300209, 100400080, 100200087, 100300007, 100300200, 100200067, 100200055, 100300006, 100200232) then goto N102_3; + else goto N102_4; + +N102_3: + if attribute(catid) in (100300011, 100300014, 100300004, 100300209, 100400080, 100300007, 100300200, 100200067, 100200055, 100200232) then goto T102_1; + else goto T102_2; + +T102_1: + response = -0.0226068659; + goto D102; + +T102_2: + response = -0.0049292005; + goto D102; + +N102_4: + if attribute(catid) in (0, 100200130, 100300077, 100300165, 100200053, 100300122, 100400079, 100300169, 100300074, 100300066, 100300045, 100200028) then goto N102_5; + else goto T102_5; + +N102_5: + if attribute(catid) in (100200130, 100300077, 100300165, 100400079, 100300066, 100300045, 100200028) then goto T102_3; + else goto T102_4; + +T102_3: + response = 0.0024587834; + goto D102; + +T102_4: + response = 0.0048477347; + goto D102; + +T102_5: + response = 0.0100107901; + goto D102; + +N102_6: + if attribute(catid) in (100200171, 100300058, 100300013, 100200034, 100200186, 100200052, 100300027, 100300116, 100200234, 100300073, 100400037, 100200193, 100400038, 100300065, 100200170, 100300076) then goto N102_7; + else goto N102_8; + +N102_7: + if attribute(catid) in (100300013, 100200186, 100300116, 100200234, 100300073, 100200193, 100400038, 100300065) then goto T102_6; + else goto T102_7; + +T102_6: + response = 0.0134952529; + goto D102; + +T102_7: + response = 0.0171978597; + goto D102; + +N102_8: + if attribute(catid) in (100300143, 100300121, 100200176) then goto T102_8; + else goto T102_9; + +T102_8: + response = 0.0268710783; + goto D102; + +T102_9: + response = 0.0389950015; + goto D102; + +D102: + +tnscore = tnscore + response; + + /* Tree 104 of 200 */ +N103_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200186, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300121, 100200053, 100300004, 100300126, 100200054, 100400037, 100200193, 100400038, 100200192, 100300122, 100300127, 100400079, 100200170, 100300169, 100200087, 100300074, 100300007, 100300045, 100200028, 100300076, 100300006, 100200232, 100300214, 100300146) then goto N103_2; + else goto N103_7; + +N103_2: + if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300166, 100200186, 100300165, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100200053, 100300004, 100200054, 100400037, 100400038, 100200192, 100300122, 100400079, 100200170, 100200028, 100300076, 100300006, 100300214, 100300146) then goto N103_3; + else goto N103_6; + +N103_3: + if attribute(catid) in (100300011, 100300058, 100300013, 100300008, 100300004, 100200054, 100400038, 100200170, 100200028, 100300076, 100300214, 100300146) then goto N103_4; + else goto N103_5; + +N103_4: + if attribute(catid) in (100300058, 100300013, 100200054, 100400038, 100300214, 100300146) then goto T103_1; + else goto T103_2; + +T103_1: + response = -0.0127825599; + goto D103; + +T103_2: + response = -0.0040621004; + goto D103; + +N103_5: + if attribute(catid) in (100300165, 100300102, 100300005, 100200068, 100200053, 100400037, 100300122, 100300006) then goto T103_3; + else goto T103_4; + +T103_3: + response = 0.0003831181; + goto D103; + +T103_4: + response = 0.0035617568; + goto D103; + +N103_6: + if attribute(catid) in (100300077, 100300169, 100300074, 100300045) then goto T103_5; + else goto T103_6; + +T103_5: + response = 0.0064465003; + goto D103; + +T103_6: + response = 0.0097160619; + goto D103; + +N103_7: + if attribute(catid) in (100300143, 100400141, 100300032, 100300027, 100300116, 100200234, 100400142, 100300073, 100300065, 100400080, 100300066, 100300200, 100200185) then goto N103_8; + else goto T103_9; + +N103_8: + if attribute(catid) in (100300143, 100300032, 100300116, 100400142, 100400080, 100300066, 100200185) then goto T103_7; + else goto T103_8; + +T103_7: + response = 0.0176560057; + goto D103; + +T103_8: + response = 0.0221678704; + goto D103; + +T103_9: + response = 0.0407063066; + goto D103; + +D103: + +tnscore = tnscore + response; + + /* Tree 105 of 200 */ +N104_1: + if attribute(catid) in (100300011, 100300143, 100300102, 100300008, 100200068, 100300116, 100300121, 100200234, 100200054, 100300073, 100400037, 100200193, 100300212, 100300209, 100300066, 100300200, 100200028, 100300076, 100200067, 100300006, 100200232) then goto N104_2; + else goto N104_4; + +N104_2: + if attribute(catid) in (100300011, 100300143, 100300102, 100300116, 100200234, 100200054, 100400037, 100300212, 100300209, 100300066, 100300200, 100200067, 100300006, 100200232) then goto N104_3; + else goto T104_3; + +N104_3: + if attribute(catid) in (100300143, 100300102, 100200054, 100300212, 100300209, 100300200, 100200067, 100200232) then goto T104_1; + else goto T104_2; + +T104_1: + response = -0.0313895030; + goto D104; + +T104_2: + response = -0.0136037814; + goto D104; + +T104_3: + response = -0.0032608717; + goto D104; + +N104_4: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100200172, 100300032, 100300027, 100200053, 100300004, 100300126, 100400038, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300045, 100200185) then goto N104_5; + else goto N104_7; + +N104_5: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100200052, 100300032, 100200053, 100300126, 100200192, 100300065, 100300122, 100300127, 100400079, 100200176, 100300045) then goto N104_6; + else goto T104_6; + +N104_6: + if attribute(catid) in (100200130, 100300014, 100300166, 100200052, 100200192, 100300065, 100200176) then goto T104_4; + else goto T104_5; + +T104_4: + response = 0.0042937725; + goto D104; + +T104_5: + response = 0.0066960091; + goto D104; + +T104_6: + response = 0.0115205820; + goto D104; + +N104_7: + if attribute(catid) in (100200034, 100300165, 100300093, 100300005, 100200087) then goto T104_7; + else goto T104_8; + +T104_7: + response = 0.0194805523; + goto D104; + +T104_8: + response = 0.0346934783; + goto D104; + +D104: + +tnscore = tnscore + response; + + /* Tree 106 of 200 */ +N105_1: + if attribute(catid) in (100300013, 100300077, 100300143, 100200186, 100300008, 100300027, 100300116, 100300019, 100300004, 100300126, 100300073, 100400037, 100200192, 100300209, 100300122, 100200170, 100300169, 100300066, 100300007, 100300076, 100200055, 100300146) then goto N105_2; + else goto N105_4; + +N105_2: + if attribute(catid) in (100300143, 100300008, 100300019, 100400037, 100300122, 100200055, 100300146) then goto T105_1; + else goto N105_3; + +T105_1: + response = -0.0346820188; + goto D105; + +N105_3: + if attribute(catid) in (100300013, 100300077, 100300027, 100300126, 100300169, 100300007, 100300076) then goto T105_2; + else goto T105_3; + +T105_2: + response = -0.0068202013; + goto D105; + +T105_3: + response = -0.0006852405; + goto D105; + +N105_4: + if attribute(catid) in (0, 100200171, 100300011, 100300058, 100300166, 100400141, 100300165, 100200172, 100200068, 100300032, 100300121, 100200234, 100200053, 100200054, 100300127, 100400079, 100400080, 100200087, 100300074, 100300045, 100200185) then goto N105_5; + else goto N105_7; + +N105_5: + if attribute(catid) in (0, 100200171, 100300058, 100300166, 100400141, 100300121, 100200234, 100200054, 100400079, 100200087, 100300045) then goto N105_6; + else goto T105_6; + +N105_6: + if attribute(catid) in (100300058, 100300166, 100300121, 100200234, 100400079, 100200087) then goto T105_4; + else goto T105_5; + +T105_4: + response = 0.0054906177; + goto D105; + +T105_5: + response = 0.0089382511; + goto D105; + +T105_6: + response = 0.0137638882; + goto D105; + +N105_7: + if attribute(catid) in (100200130, 100200052, 100300093, 100400142, 100200193, 100400038, 100300065, 100200028) then goto T105_7; + else goto T105_8; + +T105_7: + response = 0.0231575087; + goto D105; + +T105_8: + response = 0.0350257823; + goto D105; + +D105: + +tnscore = tnscore + response; + + /* Tree 107 of 200 */ +N106_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300166, 100300143, 100200186, 100400141, 100300102, 100200172, 100300008, 100300032, 100300027, 100300121, 100200234, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300045, 100300076, 100200055, 100300006, 100300146) then goto N106_2; + else goto N106_7; + +N106_2: + if attribute(catid) in (100200186, 100300102, 100300008, 100300032, 100200234, 100200193, 100300212, 100300209, 100300122, 100200087, 100300076, 100200055, 100300146) then goto N106_3; + else goto N106_4; + +N106_3: + if attribute(catid) in (100300102, 100200234, 100300212, 100300209, 100300076, 100200055) then goto T106_1; + else goto T106_2; + +T106_1: + response = -0.0272486986; + goto D106; + +T106_2: + response = -0.0121921962; + goto D106; + +N106_4: + if attribute(catid) in (100200171, 100200130, 100300058, 100300143, 100400141, 100300121, 100400142, 100400037, 100200192, 100400080, 100300074, 100300045, 100300006) then goto N106_5; + else goto N106_6; + +N106_5: + if attribute(catid) in (100200171, 100200192, 100400080, 100300074, 100300045, 100300006) then goto T106_3; + else goto T106_4; + +T106_3: + response = -0.0003106606; + goto D106; + +T106_4: + response = 0.0031181748; + goto D106; + +N106_6: + if attribute(catid) in (0, 100300013, 100200172, 100300027, 100400079, 100300066) then goto T106_5; + else goto T106_6; + +T106_5: + response = 0.0075863782; + goto D106; + +T106_6: + response = 0.0110589536; + goto D106; + +N106_7: + if attribute(catid) in (100300011, 100300077, 100200034, 100300165, 100200052, 100300093, 100300005, 100200068, 100300126, 100200054, 100300065, 100300127, 100200170, 100200185, 100200232) then goto N106_8; + else goto T106_9; + +N106_8: + if attribute(catid) in (100300165, 100200052, 100300093, 100300126, 100200170, 100200185) then goto T106_7; + else goto T106_8; + +T106_7: + response = 0.0158814592; + goto D106; + +T106_8: + response = 0.0213240490; + goto D106; + +T106_9: + response = 0.0395357198; + goto D106; + +D106: + +tnscore = tnscore + response; + + /* Tree 108 of 200 */ +N107_1: + if attribute(catid) in (100300058, 100300013, 100300077, 100300166, 100200034, 100300165, 100200052, 100300093, 100300005, 100200172, 100300008, 100200068, 100300019, 100300004, 100200054, 100400038, 100300212, 100400079, 100200170, 100300074, 100300066, 100200176, 100300200, 100200028, 100300076, 100200055, 100200232, 100300146) then goto N107_2; + else goto N107_4; + +N107_2: + if attribute(catid) in (100300013, 100300008, 100200068, 100300019, 100400038, 100200055, 100200232, 100300146) then goto T107_1; + else goto N107_3; + +T107_1: + response = -0.0281903745; + goto D107; + +N107_3: + if attribute(catid) in (100300058, 100200034, 100300165, 100200052, 100300005, 100300004, 100300212, 100400079, 100200170, 100300074, 100200176, 100300200, 100200028) then goto T107_2; + else goto T107_3; + +T107_2: + response = -0.0038124936; + goto D107; + +T107_3: + response = 0.0031637671; + goto D107; + +N107_4: + if attribute(catid) in (0, 100200171, 100200130, 100300143, 100200186, 100400141, 100300027, 100300116, 100200234, 100200053, 100300126, 100400142, 100300073, 100200193, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080, 100300045, 100300006) then goto N107_5; + else goto T107_7; + +N107_5: + if attribute(catid) in (0, 100200171, 100200130, 100300143, 100200186, 100400141, 100300027, 100300116, 100200234, 100300126, 100400142, 100200193, 100200192, 100300122, 100300045, 100300006) then goto N107_6; + else goto T107_6; + +N107_6: + if attribute(catid) in (100200171, 100200130, 100400141, 100300027, 100300116, 100200234, 100400142, 100200193, 100300122, 100300045, 100300006) then goto T107_4; + else goto T107_5; + +T107_4: + response = 0.0097327734; + goto D107; + +T107_5: + response = 0.0117883652; + goto D107; + +T107_6: + response = 0.0163276141; + goto D107; + +T107_7: + response = 0.0378076890; + goto D107; + +D107: + +tnscore = tnscore + response; + + /* Tree 109 of 200 */ +N108_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300166, 100300143, 100200186, 100400141, 100300165, 100300093, 100300102, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300126, 100400142, 100300073, 100200193, 100200192, 100300212, 100300127, 100400079, 100400080, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100200055, 100200185, 100300006) then goto N108_2; + else goto N108_6; + +N108_2: + if attribute(catid) in (100200171, 100300011, 100300013, 100300102, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100300126, 100300127, 100400079, 100400080, 100200176, 100300045, 100200028, 100200055, 100200185) then goto N108_3; + else goto N108_4; + +N108_3: + if attribute(catid) in (100300011, 100300013, 100300102, 100300008, 100300032, 100300116, 100300121, 100300126, 100300127, 100200176, 100200055, 100200185) then goto T108_1; + else goto T108_2; + +T108_1: + response = -0.0183851919; + goto D108; + +T108_2: + response = -0.0026357282; + goto D108; + +N108_4: + if attribute(catid) in (100200130, 100300166, 100200186, 100400141, 100400142) then goto T108_3; + else goto N108_5; + +T108_3: + response = 0.0029870563; + goto D108; + +N108_5: + if attribute(catid) in (100300143, 100200053, 100300073, 100200193, 100200192, 100200087, 100300066, 100300200) then goto T108_4; + else goto T108_5; + +T108_4: + response = 0.0070772264; + goto D108; + +T108_5: + response = 0.0091929568; + goto D108; + +N108_6: + if attribute(catid) in (100300058, 100300077, 100200052, 100300005, 100200172, 100300027, 100300019, 100300004, 100400037, 100400038, 100300065, 100200170, 100300169, 100300076, 100200067) then goto T108_6; + else goto T108_7; + +T108_6: + response = 0.0192905696; + goto D108; + +T108_7: + response = 0.0373801828; + goto D108; + +D108: + +tnscore = tnscore + response; + + /* Tree 110 of 200 */ +N109_1: + if attribute(catid) in (100200171, 100200130, 100300014, 100300013, 100200186, 100300165, 100200052, 100300102, 100300008, 100200068, 100300027, 100300116, 100300004, 100200192, 100300065, 100300212, 100300122, 100300127, 100200170, 100300074, 100200176, 100300045, 100300076, 100200055, 100200185, 100300214) then goto N109_2; + else goto N109_4; + +N109_2: + if attribute(catid) in (100300102, 100300008, 100300212, 100200170, 100300074, 100200055, 100300214) then goto T109_1; + else goto N109_3; + +T109_1: + response = -0.0234366968; + goto D109; + +N109_3: + if attribute(catid) in (100300014, 100300013, 100200186, 100300165, 100200068, 100300116, 100300127, 100300076, 100200185) then goto T109_2; + else goto T109_3; + +T109_2: + response = -0.0041768475; + goto D109; + +T109_3: + response = 0.0012159251; + goto D109; + +N109_4: + if attribute(catid) in (0, 100300011, 100300058, 100300077, 100200034, 100400141, 100200172, 100300032, 100300121, 100200234, 100300126, 100400142, 100300073, 100400037, 100400079, 100300169, 100200087, 100300200, 100200028, 100300006) then goto N109_5; + else goto N109_7; + +N109_5: + if attribute(catid) in (0, 100300077, 100200034, 100200172, 100200234, 100300073, 100400037, 100300169, 100300200, 100200028) then goto N109_6; + else goto T109_6; + +N109_6: + if attribute(catid) in (100300077, 100200172, 100200234, 100400037, 100300169, 100300200) then goto T109_4; + else goto T109_5; + +T109_4: + response = 0.0073379486; + goto D109; + +T109_5: + response = 0.0101628542; + goto D109; + +T109_6: + response = 0.0145361756; + goto D109; + +N109_7: + if attribute(catid) in (100300166, 100300093, 100300005, 100400080, 100300066, 100200232) then goto T109_7; + else goto T109_8; + +T109_7: + response = 0.0211367281; + goto D109; + +T109_8: + response = 0.0344431588; + goto D109; + +D109: + +tnscore = tnscore + response; + + /* Tree 111 of 200 */ +N110_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300143, 100200034, 100400141, 100300165, 100300093, 100200172, 100300032, 100300027, 100300116, 100200053, 100300004, 100300126, 100200054, 100400037, 100200193, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N110_2; + else goto N110_6; + +N110_2: + if attribute(catid) in (100300011, 100300014, 100300143, 100300093, 100200053, 100200054, 100400037, 100300007, 100200176, 100200067, 100200055, 100200185, 100300214, 100300146) then goto N110_3; + else goto N110_4; + +N110_3: + if attribute(catid) in (100300011, 100300093, 100400037, 100300007, 100200176, 100200067, 100200055, 100300214, 100300146) then goto T110_1; + else goto T110_2; + +T110_1: + response = -0.0290230654; + goto D110; + +T110_2: + response = -0.0106383395; + goto D110; + +N110_4: + if attribute(catid) in (100200034, 100300165, 100200172, 100300116, 100300126, 100400079, 100200170, 100400080, 100300074, 100300045, 100300006) then goto T110_3; + else goto N110_5; + +T110_3: + response = -0.0013386003; + goto D110; + +N110_5: + if attribute(catid) in (0, 100300027, 100200087, 100300066) then goto T110_4; + else goto T110_5; + +T110_4: + response = 0.0034896642; + goto D110; + +T110_5: + response = 0.0060074974; + goto D110; + +N110_6: + if attribute(catid) in (100200171, 100300058, 100300166, 100200186, 100200052, 100300005, 100200068, 100300121, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100300169, 100300200) then goto N110_7; + else goto N110_8; + +N110_7: + if attribute(catid) in (100200186, 100200052, 100300005, 100400142, 100300073, 100300122, 100300169) then goto T110_6; + else goto T110_7; + +T110_6: + response = 0.0118090198; + goto D110; + +T110_7: + response = 0.0185977150; + goto D110; + +N110_8: + if attribute(catid) in (100300008, 100200234, 100400038) then goto T110_8; + else goto T110_9; + +T110_8: + response = 0.0272065563; + goto D110; + +T110_9: + response = 0.0476478756; + goto D110; + +D110: + +tnscore = tnscore + response; + + /* Tree 112 of 200 */ +N111_1: + if attribute(catid) in (100300011, 100300013, 100200034, 100400141, 100200052, 100300005, 100300008, 100200068, 100300027, 100300116, 100200234, 100300019, 100300004, 100400142, 100300073, 100400038, 100300209, 100300127, 100400079, 100200170, 100300066, 100300007, 100200176, 100300045, 100300076, 100200055, 100300214, 100300146) then goto N111_2; + else goto N111_5; + +N111_2: + if attribute(catid) in (100300013, 100300008, 100300019, 100300209, 100200176, 100200055, 100300214, 100300146) then goto T111_1; + else goto N111_3; + +T111_1: + response = -0.0323268404; + goto D111; + +N111_3: + if attribute(catid) in (100300011, 100200052, 100300005, 100200068, 100300004, 100300066, 100300007) then goto T111_2; + else goto N111_4; + +T111_2: + response = -0.0057248097; + goto D111; + +N111_4: + if attribute(catid) in (100200234, 100400142, 100300073, 100400079, 100300076) then goto T111_3; + else goto T111_4; + +T111_3: + response = -0.0010770901; + goto D111; + +T111_4: + response = 0.0026132947; + goto D111; + +N111_5: + if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200186, 100300165, 100200172, 100300121, 100200053, 100300126, 100200193, 100200192, 100300065, 100300122, 100300169, 100400080, 100300074) then goto N111_6; + else goto N111_7; + +N111_6: + if attribute(catid) in (100200130, 100300166, 100200186, 100300165, 100200172, 100200053, 100300126, 100300169, 100300074) then goto T111_5; + else goto T111_6; + +T111_5: + response = 0.0070056177; + goto D111; + +T111_6: + response = 0.0111605097; + goto D111; + +N111_7: + if attribute(catid) in (100200171, 100300014, 100300143, 100200054, 100200087, 100300200, 100200028) then goto T111_7; + else goto T111_8; + +T111_7: + response = 0.0189654625; + goto D111; + +T111_8: + response = 0.0388492541; + goto D111; + +D111: + +tnscore = tnscore + response; + + /* Tree 113 of 200 */ +N112_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300077, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300116, 100300121, 100200053, 100300019, 100400142, 100300073, 100400037, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100300045, 100200028, 100200067, 100200185, 100200232, 100300146) then goto N112_2; + else goto N112_7; + +N112_2: + if attribute(catid) in (100300011, 100300058, 100300102, 100300005, 100300027, 100200053, 100300019, 100300066, 100300007, 100200028, 100200067, 100200232, 100300146) then goto N112_3; + else goto N112_4; + +N112_3: + if attribute(catid) in (100300011, 100300102, 100300005, 100300019, 100300007, 100200028, 100200067, 100200232, 100300146) then goto T112_1; + else goto T112_2; + +T112_1: + response = -0.0351424027; + goto D112; + +T112_2: + response = -0.0101917869; + goto D112; + +N112_4: + if attribute(catid) in (0, 100300014, 100300165, 100300093, 100300116, 100300073, 100300127, 100200170, 100300169, 100400080, 100300045, 100200185) then goto N112_5; + else goto N112_6; + +N112_5: + if attribute(catid) in (0, 100300093, 100300116, 100300073, 100200170, 100400080, 100300045, 100200185) then goto T112_3; + else goto T112_4; + +T112_3: + response = 0.0023576953; + goto D112; + +T112_4: + response = 0.0043646024; + goto D112; + +N112_6: + if attribute(catid) in (100200171, 100200052, 100200172, 100400142) then goto T112_5; + else goto T112_6; + +T112_5: + response = 0.0064763216; + goto D112; + +T112_6: + response = 0.0093043399; + goto D112; + +N112_7: + if attribute(catid) in (100200130, 100300013, 100300166, 100300143, 100200034, 100200186, 100300008, 100300032, 100200234, 100300004, 100300126, 100200054, 100400038, 100300122, 100200176, 100300200, 100300006) then goto N112_8; + else goto T112_9; + +N112_8: + if attribute(catid) in (100200130, 100300166, 100300143, 100300032, 100300004, 100300126, 100200054, 100400038, 100300122, 100200176, 100300200) then goto T112_7; + else goto T112_8; + +T112_7: + response = 0.0153614150; + goto D112; + +T112_8: + response = 0.0203450573; + goto D112; + +T112_9: + response = 0.0370146837; + goto D112; + +D112: + +tnscore = tnscore + response; + + /* Tree 114 of 200 */ +N113_1: + if attribute(catid) in (100200171, 100300011, 100200034, 100200186, 100300093, 100300102, 100300008, 100300032, 100300121, 100300126, 100300122, 100200087, 100300200, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N113_2; + else goto N113_4; + +N113_2: + if attribute(catid) in (100300093, 100300008, 100300032, 100300076, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T113_1; + else goto N113_3; + +T113_1: + response = -0.0283721448; + goto D113; + +N113_3: + if attribute(catid) in (100200171, 100300011, 100200034, 100200186, 100300102, 100300200, 100300006) then goto T113_2; + else goto T113_3; + +T113_2: + response = -0.0097302345; + goto D113; + +T113_3: + response = -0.0016235117; + goto D113; + +N113_4: + if attribute(catid) in (0, 100200130, 100300058, 100300013, 100300143, 100400141, 100300165, 100200052, 100300005, 100200068, 100300027, 100200053, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300212, 100300127, 100200170, 100300169, 100300074, 100300066, 100300045, 100200028, 100200232) then goto N113_5; + else goto N113_7; + +N113_5: + if attribute(catid) in (0, 100300058, 100300013, 100300143, 100400141, 100200052, 100200068, 100200053, 100300073, 100400037, 100200193, 100200192, 100300127, 100200170, 100300074, 100300045) then goto N113_6; + else goto T113_6; + +N113_6: + if attribute(catid) in (100300058, 100200052, 100200068, 100200053, 100300073, 100400037, 100200193, 100200192, 100300127, 100200170, 100300074, 100300045) then goto T113_4; + else goto T113_5; + +T113_4: + response = 0.0055504107; + goto D113; + +T113_5: + response = 0.0071774828; + goto D113; + +T113_6: + response = 0.0090674985; + goto D113; + +N113_7: + if attribute(catid) in (100300014, 100300077, 100300166, 100200054, 100400038, 100300065) then goto T113_7; + else goto N113_8; + +T113_7: + response = 0.0138239288; + goto D113; + +N113_8: + if attribute(catid) in (100200172, 100200234, 100400079, 100400080) then goto T113_8; + else goto T113_9; + +T113_8: + response = 0.0189710965; + goto D113; + +T113_9: + response = 0.0320267193; + goto D113; + +D113: + +tnscore = tnscore + response; + + /* Tree 115 of 200 */ +N114_1: + if attribute(catid) in (100200171, 100300014, 100300058, 100300013, 100300166, 100400141, 100200172, 100300008, 100200068, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400037, 100200192, 100300212, 100300209, 100300127, 100400080, 100200087, 100300074, 100200176, 100300076, 100200055, 100300214) then goto N114_2; + else goto N114_5; + +N114_2: + if attribute(catid) in (100300014, 100300058, 100300013, 100300008, 100200068, 100200053, 100300019, 100300212, 100300209, 100300127, 100400080, 100200087, 100200055, 100300214) then goto N114_3; + else goto N114_4; + +N114_3: + if attribute(catid) in (100300013, 100300008, 100300019, 100300212, 100200087, 100200055, 100300214) then goto T114_1; + else goto T114_2; + +T114_1: + response = -0.0279752223; + goto D114; + +T114_2: + response = -0.0097745433; + goto D114; + +N114_4: + if attribute(catid) in (100200171, 100400141, 100200234, 100300004, 100400037, 100300074, 100200176, 100300076) then goto T114_3; + else goto T114_4; + +T114_3: + response = -0.0015465151; + goto D114; + +T114_4: + response = 0.0023515763; + goto D114; + +N114_5: + if attribute(catid) in (0, 100200130, 100300143, 100200034, 100200052, 100300027, 100400142, 100200054, 100300073, 100200193, 100400038, 100300065, 100400079, 100300169, 100300045, 100200028) then goto N114_6; + else goto N114_7; + +N114_6: + if attribute(catid) in (100200130, 100300143, 100200034, 100300027, 100400142, 100200054, 100300073, 100400038, 100300065, 100400079, 100300169) then goto T114_5; + else goto T114_6; + +T114_5: + response = 0.0070041173; + goto D114; + +T114_6: + response = 0.0113800549; + goto D114; + +N114_7: + if attribute(catid) in (100300077, 100200186, 100300165, 100300102, 100300116, 100300122, 100200170, 100300066, 100200185, 100200232) then goto T114_7; + else goto T114_8; + +T114_7: + response = 0.0215968499; + goto D114; + +T114_8: + response = 0.0448418659; + goto D114; + +D114: + +tnscore = tnscore + response; + + /* Tree 116 of 200 */ +N115_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200186, 100300165, 100300093, 100300102, 100300008, 100300032, 100300027, 100300116, 100200234, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100400080, 100300007, 100300200, 100300045, 100200028, 100300076, 100200185, 100300006, 100200232) then goto N115_2; + else goto N115_5; + +N115_2: + if attribute(catid) in (100300011, 100200130, 100300014, 100300058, 100300013, 100300032, 100300065, 100300007, 100300200, 100300045, 100200028, 100300076, 100200185, 100300006) then goto T115_1; + else goto N115_3; + +T115_1: + response = -0.0074030560; + goto D115; + +N115_3: + if attribute(catid) in (0, 100200186, 100300165, 100300102, 100300008, 100300027, 100300116, 100200234, 100400037, 100200192, 100300122, 100400080) then goto N115_4; + else goto T115_4; + +N115_4: + if attribute(catid) in (100300165, 100300008, 100300027, 100200234, 100400037, 100200192, 100300122, 100400080) then goto T115_2; + else goto T115_3; + +T115_2: + response = 0.0018774622; + goto D115; + +T115_3: + response = 0.0037801837; + goto D115; + +T115_4: + response = 0.0085665512; + goto D115; + +N115_5: + if attribute(catid) in (100200034, 100400141, 100200052, 100200172, 100200054, 100400038, 100400079, 100200170, 100300169, 100200087, 100200067) then goto N115_6; + else goto N115_7; + +N115_6: + if attribute(catid) in (100200034, 100400141, 100400038, 100400079, 100200087, 100200067) then goto T115_5; + else goto T115_6; + +T115_5: + response = 0.0118341371; + goto D115; + +T115_6: + response = 0.0154556164; + goto D115; + +N115_7: + if attribute(catid) in (100200068, 100200053, 100300126, 100300212, 100300074, 100300066) then goto T115_7; + else goto T115_8; + +T115_7: + response = 0.0234779795; + goto D115; + +T115_8: + response = 0.0334980927; + goto D115; + +D115: + +tnscore = tnscore + response; + + /* Tree 117 of 200 */ +N116_1: + if attribute(catid) in (100200171, 100300058, 100300013, 100300143, 100200186, 100200052, 100300102, 100300008, 100300116, 100300121, 100200234, 100300019, 100300004, 100200054, 100200193, 100400038, 100300212, 100300209, 100300127, 100200170, 100200176, 100200067, 100200055) then goto N116_2; + else goto N116_5; + +N116_2: + if attribute(catid) in (100300013, 100300102, 100200234, 100400038, 100300212, 100300209, 100200176, 100200067, 100200055) then goto N116_3; + else goto N116_4; + +N116_3: + if attribute(catid) in (100300013, 100300102, 100400038, 100200176, 100200067, 100200055) then goto T116_1; + else goto T116_2; + +T116_1: + response = -0.0390121048; + goto D116; + +T116_2: + response = -0.0143867192; + goto D116; + +N116_4: + if attribute(catid) in (100300143, 100200186, 100300008, 100300116, 100300121, 100300004, 100200054, 100200193) then goto T116_3; + else goto T116_4; + +T116_3: + response = -0.0044006932; + goto D116; + +T116_4: + response = 0.0015511826; + goto D116; + +N116_5: + if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200034, 100400141, 100300165, 100300093, 100300005, 100200172, 100200068, 100300027, 100200053, 100400142, 100300073, 100200192, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100300200, 100200028, 100300076, 100200185, 100200232) then goto N116_6; + else goto T116_9; + +N116_6: + if attribute(catid) in (100200130, 100300077, 100300166, 100200034, 100400141, 100300165, 100300005, 100200172, 100200068, 100200053, 100400142, 100300073, 100300065, 100400079, 100300169, 100400080, 100200087, 100300074, 100200185, 100200232) then goto N116_7; + else goto N116_8; + +N116_7: + if attribute(catid) in (100200130, 100400141, 100200068, 100400142, 100300073, 100300065, 100400079, 100300169, 100400080, 100300074, 100200185) then goto T116_5; + else goto T116_6; + +T116_5: + response = 0.0060636247; + goto D116; + +T116_6: + response = 0.0088886465; + goto D116; + +N116_8: + if attribute(catid) in (0, 100300093, 100300007) then goto T116_7; + else goto T116_8; + +T116_7: + response = 0.0120697556; + goto D116; + +T116_8: + response = 0.0177577497; + goto D116; + +T116_9: + response = 0.0377473017; + goto D116; + +D116: + +tnscore = tnscore + response; + + /* Tree 118 of 200 */ +N117_1: + if attribute(catid) in (100300014, 100300077, 100200034, 100300165, 100300093, 100300005, 100300008, 100200053, 100300004, 100300126, 100400142, 100300212, 100300209, 100300169, 100300200, 100300076, 100200067, 100200185, 100300146) then goto N117_2; + else goto N117_3; + +N117_2: + if attribute(catid) in (100200034, 100300093, 100300005, 100300004, 100300126, 100400142, 100300209, 100300076, 100200067, 100200185, 100300146) then goto T117_1; + else goto T117_2; + +T117_1: + response = -0.0137318062; + goto D117; + +T117_2: + response = -0.0014925810; + goto D117; + +N117_3: + if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300166, 100200186, 100400141, 100200068, 100300027, 100200234, 100200054, 100300073, 100200193, 100200192, 100300065, 100300122, 100400079, 100200170, 100400080, 100300074, 100200028) then goto N117_4; + else goto N117_5; + +N117_4: + if attribute(catid) in (0, 100300058, 100200186, 100400141, 100200193, 100200192, 100400080, 100200028) then goto T117_3; + else goto T117_4; + +T117_3: + response = 0.0060199161; + goto D117; + +T117_4: + response = 0.0102196174; + goto D117; + +N117_5: + if attribute(catid) in (100200171, 100200052, 100200172, 100300032, 100300116, 100400038, 100200087, 100300066) then goto T117_5; + else goto T117_6; + +T117_5: + response = 0.0151193734; + goto D117; + +T117_6: + response = 0.0263768697; + goto D117; + +D117: + +tnscore = tnscore + response; + + /* Tree 119 of 200 */ +N118_1: + if attribute(catid) in (0, 100300058, 100200034, 100200052, 100300093, 100300102, 100300005, 100300008, 100300027, 100300004, 100300126, 100400142, 100300073, 100300212, 100400079, 100400080, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100300146) then goto N118_2; + else goto N118_5; + +N118_2: + if attribute(catid) in (100200034, 100200052, 100300102, 100300008, 100300027, 100300126, 100300007, 100200176, 100200067, 100200185, 100300146) then goto N118_3; + else goto N118_4; + +N118_3: + if attribute(catid) in (100300102, 100300008, 100300027, 100300126, 100200067, 100200185, 100300146) then goto T118_1; + else goto T118_2; + +T118_1: + response = -0.0197844476; + goto D118; + +T118_2: + response = -0.0061589619; + goto D118; + +N118_4: + if attribute(catid) in (100300058, 100400142, 100300212, 100400079, 100400080, 100300200, 100300045, 100300076) then goto T118_3; + else goto T118_4; + +T118_3: + response = -0.0009327577; + goto D118; + +T118_4: + response = 0.0041966425; + goto D118; + +N118_5: + if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100300143, 100200186, 100400141, 100300165, 100200172, 100200068, 100300032, 100300116, 100300121, 100200053, 100400038, 100200192, 100300065, 100200170, 100300169, 100300074, 100300066, 100200028, 100300006) then goto N118_6; + else goto N118_7; + +N118_6: + if attribute(catid) in (100200171, 100200130, 100300077, 100400141, 100300165, 100200172, 100300032, 100300116, 100300121, 100300065, 100200170) then goto T118_5; + else goto T118_6; + +T118_5: + response = 0.0090658634; + goto D118; + +T118_6: + response = 0.0131790639; + goto D118; + +N118_7: + if attribute(catid) in (100300011, 100300013, 100400037, 100300122, 100300127, 100200087) then goto T118_7; + else goto T118_8; + +T118_7: + response = 0.0204665481; + goto D118; + +T118_8: + response = 0.0306124846; + goto D118; + +D118: + +tnscore = tnscore + response; + + /* Tree 120 of 200 */ +N119_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300077, 100300166, 100200052, 100300005, 100200172, 100300008, 100300116, 100200234, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100200170, 100300169, 100300074, 100300007, 100200176, 100300200, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N119_2; + else goto N119_6; + +N119_2: + if attribute(catid) in (100200130, 100300013, 100300077, 100300005, 100300008, 100300116, 100200234, 100200054, 100300073, 100200193, 100300212, 100300209, 100300122, 100200170, 100300007, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N119_3; + else goto N119_4; + +N119_3: + if attribute(catid) in (100300013, 100300116, 100200234, 100200054, 100200193, 100300212, 100300209, 100300007, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto T119_1; + else goto T119_2; + +T119_1: + response = -0.0108827313; + goto D119; + +T119_2: + response = -0.0005017785; + goto D119; + +N119_4: + if attribute(catid) in (0, 100300058, 100300166, 100200172, 100400037, 100200176) then goto N119_5; + else goto T119_5; + +N119_5: + if attribute(catid) in (100300058, 100300166, 100200172, 100400037) then goto T119_3; + else goto T119_4; + +T119_3: + response = 0.0023128525; + goto D119; + +T119_4: + response = 0.0053288841; + goto D119; + +T119_5: + response = 0.0080470055; + goto D119; + +N119_6: + if attribute(catid) in (100300014, 100300143, 100200034, 100200186, 100400141, 100300165, 100200068, 100300032, 100300027, 100300121, 100200053, 100300004, 100300065, 100400079, 100200087, 100300066) then goto N119_7; + else goto N119_8; + +N119_7: + if attribute(catid) in (100300014, 100300143, 100200034, 100400141, 100300165, 100300032, 100200053, 100300004, 100300065, 100400079, 100300066) then goto T119_6; + else goto T119_7; + +T119_6: + response = 0.0118541533; + goto D119; + +T119_7: + response = 0.0171167195; + goto D119; + +N119_8: + if attribute(catid) in (100300093, 100300126, 100400038, 100400080) then goto T119_8; + else goto T119_9; + +T119_8: + response = 0.0252859922; + goto D119; + +T119_9: + response = 0.0427399285; + goto D119; + +D119: + +tnscore = tnscore + response; + + /* Tree 121 of 200 */ +N120_1: + if attribute(catid) in (100200171, 100300011, 100200130, 100300058, 100300166, 100200034, 100200186, 100400141, 100300093, 100300005, 100300008, 100200068, 100300116, 100200053, 100300126, 100400142, 100200054, 100300073, 100200193, 100300065, 100400079, 100300169, 100400080, 100300074, 100300007, 100200176, 100300045, 100200028, 100200067, 100300006, 100300214, 100300146) then goto N120_2; + else goto N120_5; + +N120_2: + if attribute(catid) in (100300011, 100300058, 100200034, 100200186, 100300005, 100300008, 100200054, 100400080, 100300074, 100300007, 100200176, 100200067, 100300214, 100300146) then goto N120_3; + else goto N120_4; + +N120_3: + if attribute(catid) in (100200034, 100200054, 100400080, 100300074, 100300007, 100200067, 100300214, 100300146) then goto T120_1; + else goto T120_2; + +T120_1: + response = -0.0139396834; + goto D120; + +T120_2: + response = -0.0053561842; + goto D120; + +N120_4: + if attribute(catid) in (100200171, 100200130, 100300093, 100200068, 100200053, 100300126, 100400142, 100400079, 100300045, 100300006) then goto T120_3; + else goto T120_4; + +T120_3: + response = -0.0012208885; + goto D120; + +T120_4: + response = 0.0034171063; + goto D120; + +N120_5: + if attribute(catid) in (0, 100300014, 100300013, 100300143, 100300165, 100200052, 100300102, 100200172, 100300032, 100300121, 100400037, 100400038, 100200192, 100300122, 100300127, 100200170, 100200087, 100300066, 100300200) then goto N120_6; + else goto T120_8; + +N120_6: + if attribute(catid) in (0, 100300143, 100300165, 100200052, 100300102, 100300032, 100300121, 100400037, 100400038, 100300122, 100300127, 100200170) then goto N120_7; + else goto T120_7; + +N120_7: + if attribute(catid) in (100300143, 100200052, 100300102, 100300032, 100400037, 100400038, 100200170) then goto T120_5; + else goto T120_6; + +T120_5: + response = 0.0071786267; + goto D120; + +T120_6: + response = 0.0105454236; + goto D120; + +T120_7: + response = 0.0151332724; + goto D120; + +T120_8: + response = 0.0271687303; + goto D120; + +D120: + +tnscore = tnscore + response; + + /* Tree 122 of 200 */ +N121_1: + if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200052, 100200172, 100300032, 100300121, 100300019, 100400142, 100300073, 100200193, 100400038, 100300065, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300007, 100200176, 100200028, 100300076, 100200067, 100300006) then goto N121_2; + else goto N121_5; + +N121_2: + if attribute(catid) in (100300011, 100200034, 100300019, 100200193, 100400038, 100300209, 100300122, 100300007, 100200176, 100200028, 100200067, 100300006) then goto T121_1; + else goto N121_3; + +T121_1: + response = -0.0107750803; + goto D121; + +N121_3: + if attribute(catid) in (100400142, 100300073, 100400079, 100400080, 100300076) then goto T121_2; + else goto N121_4; + +T121_2: + response = 0.0000414432; + goto D121; + +N121_4: + if attribute(catid) in (0, 100200130, 100300077, 100300166, 100300143, 100200172) then goto T121_3; + else goto T121_4; + +T121_3: + response = 0.0037799336; + goto D121; + +T121_4: + response = 0.0062373044; + goto D121; + +N121_5: + if attribute(catid) in (100200171, 100200186, 100300165, 100300102, 100300005, 100300027, 100300116, 100200234, 100300004, 100300126, 100200054, 100400037, 100200170, 100300074, 100300066) then goto N121_6; + else goto N121_7; + +N121_6: + if attribute(catid) in (100200171, 100200186, 100300165, 100200234, 100300074) then goto T121_5; + else goto T121_6; + +T121_5: + response = 0.0105527030; + goto D121; + +T121_6: + response = 0.0153207486; + goto D121; + +N121_7: + if attribute(catid) in (100300014, 100300093, 100200068, 100200053, 100200192, 100300200, 100200232) then goto T121_7; + else goto T121_8; + +T121_7: + response = 0.0240361458; + goto D121; + +T121_8: + response = 0.0348297568; + goto D121; + +D121: + +tnscore = tnscore + response; + + /* Tree 123 of 200 */ +N122_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N122_2; + else goto N122_8; + +N122_2: + if attribute(catid) in (100200130, 100300014, 100300058, 100200034, 100300165, 100200068, 100200054, 100300209, 100200170, 100400080, 100300074, 100300007, 100200176, 100200055, 100300006, 100200232, 100300146) then goto N122_3; + else goto N122_5; + +N122_3: + if attribute(catid) in (100300058, 100200068, 100200054, 100300209, 100200176, 100200055, 100200232, 100300146) then goto T122_1; + else goto N122_4; + +T122_1: + response = -0.0154543001; + goto D122; + +N122_4: + if attribute(catid) in (100200034, 100300165, 100200170, 100300007, 100300006) then goto T122_2; + else goto T122_3; + +T122_2: + response = -0.0053315859; + goto D122; + +T122_3: + response = -0.0008526404; + goto D122; + +N122_5: + if attribute(catid) in (0, 100300011, 100300013, 100400141, 100200052, 100200172, 100300121, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100400079, 100300066, 100300200, 100300045, 100200028, 100300076) then goto N122_6; + else goto N122_7; + +N122_6: + if attribute(catid) in (100300011, 100300013, 100400141, 100300121, 100400142, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127) then goto T122_4; + else goto T122_5; + +T122_4: + response = 0.0029708190; + goto D122; + +T122_5: + response = 0.0066974843; + goto D122; + +N122_7: + if attribute(catid) in (100200171, 100300027, 100200053, 100300169) then goto T122_6; + else goto T122_7; + +T122_6: + response = 0.0109293572; + goto D122; + +T122_7: + response = 0.0151670383; + goto D122; + +N122_8: + if attribute(catid) in (100300077, 100300093, 100300102, 100300005, 100300008, 100200087) then goto T122_8; + else goto T122_9; + +T122_8: + response = 0.0226972124; + goto D122; + +T122_9: + response = 0.0401505411; + goto D122; + +D122: + +tnscore = tnscore + response; + + /* Tree 124 of 200 */ +N123_1: + if attribute(catid) in (100200130, 100300014, 100300077, 100200034, 100300165, 100300102, 100300032, 100300116, 100300121, 100200234, 100200053, 100400038, 100300212, 100300127, 100400080, 100200087, 100300074, 100300007, 100200028, 100200067, 100200055, 100200185, 100300006) then goto N123_2; + else goto N123_4; + +N123_2: + if attribute(catid) in (100300014, 100300032, 100400038, 100300212, 100200087, 100300007, 100200055, 100200185) then goto T123_1; + else goto N123_3; + +T123_1: + response = -0.0294546465; + goto D123; + +N123_3: + if attribute(catid) in (100300165, 100300121, 100200234, 100300127, 100400080, 100200028, 100200067, 100300006) then goto T123_2; + else goto T123_3; + +T123_2: + response = -0.0041076181; + goto D123; + +T123_3: + response = -0.0003323120; + goto D123; + +N123_4: + if attribute(catid) in (0, 100300011, 100300058, 100300166, 100200186, 100300093, 100300005, 100200172, 100300008, 100200068, 100300073, 100400037, 100200192, 100300122, 100400079, 100300169, 100300066, 100200176, 100300076) then goto N123_5; + else goto N123_7; + +N123_5: + if attribute(catid) in (0, 100300011, 100300058, 100300093, 100300005, 100200172, 100200068, 100300073, 100200192, 100400079, 100300169, 100300076) then goto N123_6; + else goto T123_6; + +N123_6: + if attribute(catid) in (100300011, 100300058, 100300093, 100300005, 100200172, 100200068, 100300073, 100200192, 100400079, 100300169) then goto T123_4; + else goto T123_5; + +T123_4: + response = 0.0060463670; + goto D123; + +T123_5: + response = 0.0075872041; + goto D123; + +T123_6: + response = 0.0099717066; + goto D123; + +N123_7: + if attribute(catid) in (100200171, 100400141, 100200052, 100300004, 100300126, 100400142, 100200054, 100300065, 100200170) then goto T123_7; + else goto T123_8; + +T123_7: + response = 0.0147808083; + goto D123; + +T123_8: + response = 0.0271833064; + goto D123; + +D123: + +tnscore = tnscore + response; + + /* Tree 125 of 200 */ +N124_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100200172, 100300008, 100300032, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100300073, 100400037, 100400038, 100200192, 100300065, 100300212, 100300122, 100300127, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100200028, 100200067) then goto N124_2; + else goto N124_6; + +N124_2: + if attribute(catid) in (100300014, 100300013, 100300077, 100300143, 100200186, 100300093, 100300102, 100300008, 100300032, 100200053, 100300019, 100400037, 100400038, 100300212, 100300007, 100200028, 100200067) then goto N124_3; + else goto N124_4; + +N124_3: + if attribute(catid) in (100200186, 100300032, 100300019, 100300212, 100200067) then goto T124_1; + else goto T124_2; + +T124_1: + response = -0.0346775988; + goto D124; + +T124_2: + response = -0.0084286391; + goto D124; + +N124_4: + if attribute(catid) in (100200034, 100400141, 100200052, 100200172, 100200234, 100300073, 100300065, 100200170, 100200087, 100300074, 100200176) then goto T124_3; + else goto N124_5; + +T124_3: + response = 0.0021428485; + goto D124; + +N124_5: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300165, 100300027, 100200192, 100300122, 100300169, 100300066, 100300200) then goto T124_4; + else goto T124_5; + +T124_4: + response = 0.0069464030; + goto D124; + +T124_5: + response = 0.0110205277; + goto D124; + +N124_6: + if attribute(catid) in (100300058, 100200068, 100300116, 100300126, 100400142) then goto T124_6; + else goto T124_7; + +T124_6: + response = 0.0228118568; + goto D124; + +T124_7: + response = 0.0343825826; + goto D124; + +D124: + +tnscore = tnscore + response; + + /* Tree 126 of 200 */ +N125_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300143, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232) then goto N125_2; + else goto N125_6; + +N125_2: + if attribute(catid) in (100300011, 100200186, 100300102, 100300005, 100200068, 100300032, 100300121, 100200053, 100300126, 100400037, 100300209, 100300007, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006) then goto N125_3; + else goto N125_4; + +N125_3: + if attribute(catid) in (100300011, 100300102, 100300005, 100300121, 100300126, 100400037, 100300209, 100300076, 100200055, 100200185, 100300006) then goto T125_1; + else goto T125_2; + +T125_1: + response = -0.0217391763; + goto D125; + +T125_2: + response = -0.0085505926; + goto D125; + +N125_4: + if attribute(catid) in (100200171, 100200130, 100300014, 100300143, 100400141, 100200052, 100300027, 100200234, 100300073, 100200192, 100300127, 100200170, 100200087, 100300074, 100300066, 100200176) then goto N125_5; + else goto T125_5; + +N125_5: + if attribute(catid) in (100200171, 100200130, 100300143, 100200052, 100200170, 100300074, 100200176) then goto T125_3; + else goto T125_4; + +T125_3: + response = 0.0001581819; + goto D125; + +T125_4: + response = 0.0028254989; + goto D125; + +T125_5: + response = 0.0069841416; + goto D125; + +N125_6: + if attribute(catid) in (100300013, 100300166, 100300165, 100200172, 100300008, 100300116, 100300004, 100200054, 100400038, 100300212, 100300122, 100300169, 100300200, 100200067) then goto N125_7; + else goto T125_8; + +N125_7: + if attribute(catid) in (100200172, 100300116, 100300004, 100300122, 100300169, 100300200) then goto T125_6; + else goto T125_7; + +T125_6: + response = 0.0144252893; + goto D125; + +T125_7: + response = 0.0197908458; + goto D125; + +T125_8: + response = 0.0428441900; + goto D125; + +D125: + +tnscore = tnscore + response; + + /* Tree 127 of 200 */ +N126_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100300019, 100300126, 100300073, 100400037, 100200193, 100200192, 100300209, 100300127, 100400079, 100200170, 100300169, 100300066, 100300007, 100200176, 100300200, 100200028, 100300076, 100200185, 100300006, 100300146) then goto N126_2; + else goto N126_7; + +N126_2: + if attribute(catid) in (100300011, 100300014, 100300143, 100300005, 100300027, 100200234, 100300126, 100300209, 100300066, 100300007, 100200176, 100300200, 100200185, 100300146) then goto N126_3; + else goto N126_4; + +N126_3: + if attribute(catid) in (100300011, 100300005, 100300126, 100300209, 100300007, 100200176, 100200185) then goto T126_1; + else goto T126_2; + +T126_1: + response = -0.0278752616; + goto D126; + +T126_2: + response = -0.0114872053; + goto D126; + +N126_4: + if attribute(catid) in (100200171, 100300058, 100300077, 100300166, 100200172, 100300008, 100200068, 100300116, 100200193, 100400079) then goto N126_5; + else goto N126_6; + +N126_5: + if attribute(catid) in (100300058, 100200172, 100200068, 100300116, 100400079) then goto T126_3; + else goto T126_4; + +T126_3: + response = -0.0025940117; + goto D126; + +T126_4: + response = 0.0005790855; + goto D126; + +N126_6: + if attribute(catid) in (0, 100400141, 100300019, 100300073, 100200192, 100300076) then goto T126_5; + else goto T126_6; + +T126_5: + response = 0.0050971880; + goto D126; + +T126_6: + response = 0.0074013229; + goto D126; + +N126_7: + if attribute(catid) in (100200034, 100300093, 100300121, 100200053, 100300004, 100400142, 100200054, 100400038, 100300065, 100300122, 100400080, 100300074) then goto N126_8; + else goto N126_9; + +N126_8: + if attribute(catid) in (100300093, 100300121, 100200053, 100300004, 100400142, 100300065, 100400080) then goto T126_7; + else goto T126_8; + +T126_7: + response = 0.0112491051; + goto D126; + +T126_8: + response = 0.0165554655; + goto D126; + +N126_9: + if attribute(catid) in (100300165, 100200087, 100200232) then goto T126_9; + else goto T126_10; + +T126_9: + response = 0.0252638080; + goto D126; + +T126_10: + response = 0.0395197280; + goto D126; + +D126: + +tnscore = tnscore + response; + + /* Tree 128 of 200 */ +N127_1: + if attribute(catid) in (0, 100200130, 100300058, 100300013, 100200034, 100400141, 100300165, 100300093, 100300102, 100200172, 100300008, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300004, 100300126, 100400142, 100300073, 100200193, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100200087, 100300066, 100300200, 100300045, 100200028, 100300076, 100200067, 100200055) then goto N127_2; + else goto N127_5; + +N127_2: + if attribute(catid) in (100300093, 100300008, 100300032, 100300121, 100200234, 100300004, 100300212, 100300169, 100200087, 100300045, 100200067, 100200055) then goto N127_3; + else goto N127_4; + +N127_3: + if attribute(catid) in (100300032, 100200234, 100300212, 100200087, 100200055) then goto T127_1; + else goto T127_2; + +T127_1: + response = -0.0204195189; + goto D127; + +T127_2: + response = -0.0097651462; + goto D127; + +N127_4: + if attribute(catid) in (0, 100200130, 100300058, 100300165, 100300126, 100400142, 100200192, 100400079, 100300066, 100300200, 100200028, 100300076) then goto T127_3; + else goto T127_4; + +T127_3: + response = 0.0029821664; + goto D127; + +T127_4: + response = 0.0074556490; + goto D127; + +N127_5: + if attribute(catid) in (100200171, 100300077, 100300166, 100200186, 100300027, 100200054, 100400038, 100300065, 100300122, 100300007, 100200232) then goto T127_5; + else goto N127_6; + +T127_5: + response = 0.0123063395; + goto D127; + +N127_6: + if attribute(catid) in (100300014, 100200052, 100300005, 100400037, 100400080) then goto T127_6; + else goto T127_7; + +T127_6: + response = 0.0219573650; + goto D127; + +T127_7: + response = 0.0390618799; + goto D127; + +D127: + +tnscore = tnscore + response; + + /* Tree 129 of 200 */ +N128_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200053, 100300019, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300122, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067, 100300006, 100200232, 100300214) then goto N128_2; + else goto N128_5; + +N128_2: + if attribute(catid) in (100300011, 100300166, 100300143, 100400141, 100300093, 100300005, 100200172, 100200068, 100300116, 100300121, 100300019, 100300126, 100200054, 100300073, 100200193, 100200087, 100300074, 100300007, 100300200, 100300045, 100200028, 100200067, 100200232, 100300214) then goto N128_3; + else goto N128_4; + +N128_3: + if attribute(catid) in (100300143, 100400141, 100300116, 100300019, 100300126, 100200054, 100300074, 100200067, 100200232, 100300214) then goto T128_1; + else goto T128_2; + +T128_1: + response = -0.0084514959; + goto D128; + +T128_2: + response = -0.0026252948; + goto D128; + +N128_4: + if attribute(catid) in (0, 100200130, 100300013, 100200034, 100300027, 100200053, 100400037, 100200192, 100200170, 100300169, 100300006) then goto T128_3; + else goto T128_4; + +T128_3: + response = 0.0047022442; + goto D128; + +T128_4: + response = 0.0075241336; + goto D128; + +N128_5: + if attribute(catid) in (100300032, 100200234, 100300004, 100400142, 100400038, 100300065, 100300212, 100300127, 100400080, 100300076) then goto T128_5; + else goto N128_6; + +T128_5: + response = 0.0167624654; + goto D128; + +N128_6: + if attribute(catid) in (100300165) then goto T128_6; + else goto T128_7; + +T128_6: + response = 0.0213460841; + goto D128; + +T128_7: + response = 0.0385220165; + goto D128; + +D128: + +tnscore = tnscore + response; + + /* Tree 130 of 200 */ +N129_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100200234, 100200053, 100300126, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300066, 100300007, 100200176, 100300045, 100200028, 100200055, 100300006) then goto N129_2; + else goto N129_6; + +N129_2: + if attribute(catid) in (100300011, 100300166, 100300143, 100300093, 100200068, 100300032, 100200054, 100400038, 100300209, 100300127, 100400079, 100200087, 100200176, 100300045, 100200028, 100200055, 100300006) then goto N129_3; + else goto N129_4; + +N129_3: + if attribute(catid) in (100200068, 100300032, 100300209, 100200176, 100200028, 100200055) then goto T129_1; + else goto T129_2; + +T129_1: + response = -0.0269726448; + goto D129; + +T129_2: + response = -0.0065892436; + goto D129; + +N129_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300005, 100200172, 100300027, 100300116, 100200053, 100300073, 100200193, 100200192, 100300065, 100300122, 100300169, 100400080, 100300066, 100300007) then goto N129_5; + else goto T129_5; + +N129_5: + if attribute(catid) in (100200171, 100200130, 100300005, 100200172, 100300027, 100300073, 100200193, 100200192, 100300122, 100300169, 100300007) then goto T129_3; + else goto T129_4; + +T129_3: + response = 0.0012062164; + goto D129; + +T129_4: + response = 0.0058063938; + goto D129; + +T129_5: + response = 0.0115917030; + goto D129; + +N129_6: + if attribute(catid) in (100300014, 100300121, 100300004, 100400142, 100200170, 100300074, 100300076, 100200067, 100300146) then goto T129_6; + else goto T129_7; + +T129_6: + response = 0.0227008484; + goto D129; + +T129_7: + response = 0.0502957296; + goto D129; + +D129: + +tnscore = tnscore + response; + + /* Tree 131 of 200 */ +N130_1: + if attribute(catid) in (0, 100200171, 100300013, 100300166, 100200186, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100200234, 100300004, 100400142, 100200193, 100400038, 100300212, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300066, 100300007, 100300200, 100300045, 100200028, 100300006, 100200232) then goto N130_2; + else goto N130_5; + +N130_2: + if attribute(catid) in (100300013, 100300032, 100200193, 100300212, 100300127, 100300007) then goto T130_1; + else goto N130_3; + +T130_1: + response = -0.0319002490; + goto D130; + +N130_3: + if attribute(catid) in (100200171, 100300166, 100300093, 100200172, 100300008, 100200234, 100300004, 100400142, 100400038, 100300169, 100200087, 100300066, 100300200, 100300045, 100300006, 100200232) then goto N130_4; + else goto T130_4; + +N130_4: + if attribute(catid) in (100300166, 100300093, 100300008, 100200234, 100300004, 100400142, 100200087, 100300066, 100300200, 100300045, 100300006) then goto T130_2; + else goto T130_3; + +T130_2: + response = -0.0040077306; + goto D130; + +T130_3: + response = 0.0005434632; + goto D130; + +T130_4: + response = 0.0044295890; + goto D130; + +N130_5: + if attribute(catid) in (100200130, 100300058, 100300077, 100300143, 100400141, 100300165, 100300102, 100300005, 100300121, 100200054, 100300073, 100200192, 100300065, 100300209, 100200176, 100300076, 100200185) then goto N130_6; + else goto N130_7; + +N130_6: + if attribute(catid) in (100300058, 100300077, 100300102, 100300005, 100300073, 100300065, 100300209) then goto T130_5; + else goto T130_6; + +T130_5: + response = 0.0088697865; + goto D130; + +T130_6: + response = 0.0120553652; + goto D130; + +N130_7: + if attribute(catid) in (100200034, 100200052, 100200068, 100200053, 100300126) then goto T130_7; + else goto T130_8; + +T130_7: + response = 0.0200329832; + goto D130; + +T130_8: + response = 0.0315067216; + goto D130; + +D130: + +tnscore = tnscore + response; + + /* Tree 132 of 200 */ +N131_1: + if attribute(catid) in (100300011, 100200130, 100300058, 100300013, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100200234, 100200053, 100300126, 100400142, 100200054, 100300073, 100400037, 100300122, 100300127, 100200170, 100300169, 100300074, 100300007, 100300045, 100200028, 100200185, 100300146) then goto N131_2; + else goto N131_5; + +N131_2: + if attribute(catid) in (100300011, 100300058, 100300013, 100300165, 100200052, 100300008, 100200054, 100300074, 100300007, 100200185, 100300146) then goto N131_3; + else goto N131_4; + +N131_3: + if attribute(catid) in (100300011, 100300013, 100300008, 100200054, 100300007, 100300146) then goto T131_1; + else goto T131_2; + +T131_1: + response = -0.0282159404; + goto D131; + +T131_2: + response = -0.0102832725; + goto D131; + +N131_4: + if attribute(catid) in (100200068, 100300116, 100200234, 100300126, 100300073, 100400037, 100300122, 100300127, 100300045, 100200028) then goto T131_3; + else goto T131_4; + +T131_3: + response = -0.0021058619; + goto D131; + +T131_4: + response = 0.0025428121; + goto D131; + +N131_5: + if attribute(catid) in (0, 100200171, 100300014, 100300077, 100300166, 100300143, 100200186, 100400141, 100300032, 100300121, 100300004, 100400038, 100300065, 100400079, 100300066, 100200067, 100300006) then goto N131_6; + else goto N131_7; + +N131_6: + if attribute(catid) in (0, 100300014, 100300077, 100200186, 100300032, 100300121, 100300004, 100400038, 100300066, 100300006) then goto T131_5; + else goto T131_6; + +T131_5: + response = 0.0091363630; + goto D131; + +T131_6: + response = 0.0165605827; + goto D131; + +N131_7: + if attribute(catid) in (100200034, 100200192, 100200087, 100200176, 100300200) then goto T131_7; + else goto T131_8; + +T131_7: + response = 0.0258553552; + goto D131; + +T131_8: + response = 0.0438879554; + goto D131; + +D131: + +tnscore = tnscore + response; + + /* Tree 133 of 200 */ +N132_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100300143, 100200034, 100400141, 100200052, 100300093, 100300102, 100300005, 100300008, 100300121, 100200053, 100300004, 100300126, 100400142, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100200170, 100400080, 100300066, 100200176, 100200028, 100200067, 100200055, 100300146) then goto N132_2; + else goto N132_5; + +N132_2: + if attribute(catid) in (100300011, 100200130, 100300014, 100300143, 100200034, 100300093, 100300008, 100300121, 100200053, 100300004, 100200193, 100400038, 100200192, 100300212, 100400079, 100200170, 100200176, 100200067, 100200055, 100300146) then goto N132_3; + else goto N132_4; + +N132_3: + if attribute(catid) in (100300011, 100300143, 100300121, 100300212, 100200067, 100200055, 100300146) then goto T132_1; + else goto T132_2; + +T132_1: + response = -0.0232281903; + goto D132; + +T132_2: + response = -0.0044814242; + goto D132; + +N132_4: + if attribute(catid) in (100300166, 100200052, 100300126, 100400142, 100400037, 100300122, 100400080, 100200028) then goto T132_3; + else goto T132_4; + +T132_3: + response = 0.0018401795; + goto D132; + +T132_4: + response = 0.0038857825; + goto D132; + +N132_5: + if attribute(catid) in (100200171, 100200186, 100300165, 100200172, 100200068, 100300027, 100300116, 100200234, 100300019, 100300073, 100300169, 100200087, 100300074, 100300200, 100300076, 100200232) then goto N132_6; + else goto T132_7; + +N132_6: + if attribute(catid) in (100200186, 100300165, 100200172, 100200068, 100300027, 100200234, 100300073, 100200087, 100300200) then goto T132_5; + else goto T132_6; + +T132_5: + response = 0.0097089357; + goto D132; + +T132_6: + response = 0.0155015911; + goto D132; + +T132_7: + response = 0.0416491361; + goto D132; + +D132: + +tnscore = tnscore + response; + + /* Tree 134 of 200 */ +N133_1: + if attribute(catid) in (100300011, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300032, 100300027, 100300121, 100200053, 100300019, 100300004, 100200054, 100200193, 100400038, 100300127, 100300169, 100300074, 100300007, 100200176, 100200028, 100200232, 100300214, 100300146) then goto N133_2; + else goto N133_4; + +N133_2: + if attribute(catid) in (100300011, 100200186, 100300093, 100300005, 100300027, 100300019, 100200054, 100400038, 100300007, 100200028, 100200232, 100300214, 100300146) then goto T133_1; + else goto N133_3; + +T133_1: + response = -0.0086481469; + goto D133; + +N133_3: + if attribute(catid) in (100300165, 100200172, 100300121, 100200193, 100300074) then goto T133_2; + else goto T133_3; + +T133_2: + response = -0.0026659712; + goto D133; + +T133_3: + response = 0.0001319612; + goto D133; + +N133_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100300166, 100300143, 100200034, 100200068, 100300116, 100200234, 100300126, 100400142, 100300073, 100400037, 100200192, 100300065, 100300212, 100300122, 100400079, 100200170, 100400080, 100300066, 100300200, 100300006) then goto N133_5; + else goto T133_6; + +N133_5: + if attribute(catid) in (0, 100300058, 100300077, 100200034, 100200234, 100300073, 100400037, 100200192, 100300065, 100300122, 100400079, 100400080, 100300066) then goto T133_4; + else goto T133_5; + +T133_4: + response = 0.0058612042; + goto D133; + +T133_5: + response = 0.0117793447; + goto D133; + +T133_6: + response = 0.0334049996; + goto D133; + +D133: + +tnscore = tnscore + response; + + /* Tree 135 of 200 */ +N134_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100200172, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N134_2; + else goto N134_6; + +N134_2: + if attribute(catid) in (100300014, 100300166, 100300143, 100400141, 100200052, 100300102, 100200068, 100300019, 100300004, 100200054, 100400037, 100400038, 100300212, 100300122, 100200170, 100200087, 100200067, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N134_3; + else goto N134_4; + +N134_3: + if attribute(catid) in (100300143, 100300102, 100300019, 100200054, 100400038, 100300212, 100200067, 100200055, 100200185, 100300214) then goto T134_1; + else goto T134_2; + +T134_1: + response = -0.0256497270; + goto D134; + +T134_2: + response = -0.0043110057; + goto D134; + +N134_4: + if attribute(catid) in (0, 100300011, 100200130, 100200034, 100300165, 100200172, 100300032, 100300121, 100200053, 100300073, 100200193, 100200192, 100300209, 100400079, 100300169, 100300074, 100300066, 100300007) then goto N134_5; + else goto T134_5; + +N134_5: + if attribute(catid) in (100300011, 100200130, 100200034, 100200053, 100300073, 100300169, 100300066, 100300007) then goto T134_3; + else goto T134_4; + +T134_3: + response = 0.0027424259; + goto D134; + +T134_4: + response = 0.0069640136; + goto D134; + +T134_5: + response = 0.0123173309; + goto D134; + +N134_6: + if attribute(catid) in (100300058, 100300077, 100300005, 100300116, 100300126, 100300200, 100200028, 100300076) then goto T134_6; + else goto T134_7; + +T134_6: + response = 0.0267885216; + goto D134; + +T134_7: + response = 0.0575708735; + goto D134; + +D134: + +tnscore = tnscore + response; + + /* Tree 136 of 200 */ +N135_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300116, 100300121, 100200234, 100200053, 100400142, 100300073, 100400038, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100200055, 100200185, 100300006, 100200232) then goto N135_2; + else goto N135_6; + +N135_2: + if attribute(catid) in (100300011, 100300143, 100200186, 100200068, 100200234, 100200053, 100300209, 100200087, 100300066, 100300045, 100200055, 100200185, 100300006) then goto N135_3; + else goto N135_4; + +N135_3: + if attribute(catid) in (100300011, 100200234, 100300209, 100300066, 100200055, 100200185) then goto T135_1; + else goto T135_2; + +T135_1: + response = -0.0209712583; + goto D135; + +T135_2: + response = -0.0093038797; + goto D135; + +N135_4: + if attribute(catid) in (100300077, 100200052, 100300102, 100200172, 100300121, 100400142, 100300122, 100200170, 100300074, 100300007, 100200176, 100200232) then goto T135_3; + else goto N135_5; + +T135_3: + response = 0.0001606661; + goto D135; + +N135_5: + if attribute(catid) in (0, 100200130, 100300166, 100300005, 100300073, 100300065, 100400079, 100300200) then goto T135_4; + else goto T135_5; + +T135_4: + response = 0.0052908982; + goto D135; + +T135_5: + response = 0.0074959812; + goto D135; + +N135_6: + if attribute(catid) in (100300165, 100300093, 100400037, 100200192, 100300169, 100400080, 100300076) then goto T135_6; + else goto T135_7; + +T135_6: + response = 0.0146939559; + goto D135; + +T135_7: + response = 0.0295044733; + goto D135; + +D135: + +tnscore = tnscore + response; + + /* Tree 137 of 200 */ +N136_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300013, 100300077, 100200034, 100200186, 100400141, 100300093, 100300005, 100300008, 100300027, 100300116, 100300121, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100300045, 100300076, 100200055, 100300214) then goto N136_2; + else goto N136_5; + +N136_2: + if attribute(catid) in (100300013, 100300093, 100300005, 100200234, 100300004, 100200193, 100400038, 100300212, 100300122, 100300169, 100200087, 100300074, 100300200, 100300045, 100300076, 100200055, 100300214) then goto N136_3; + else goto N136_4; + +N136_3: + if attribute(catid) in (100200234, 100300004, 100300212, 100300200, 100200055, 100300214) then goto T136_1; + else goto T136_2; + +T136_1: + response = -0.0225234294; + goto D136; + +T136_2: + response = -0.0073428723; + goto D136; + +N136_4: + if attribute(catid) in (100200130, 100300077, 100400141, 100300027, 100300126, 100200054, 100300073, 100400037, 100300065, 100400080, 100200176) then goto T136_3; + else goto T136_4; + +T136_3: + response = -0.0007043255; + goto D136; + +T136_4: + response = 0.0046211972; + goto D136; + +N136_5: + if attribute(catid) in (100300058, 100300166, 100300143, 100200052, 100200172, 100300032, 100200192, 100300209, 100200170, 100200232, 100300146) then goto T136_5; + else goto N136_6; + +T136_5: + response = 0.0127794955; + goto D136; + +N136_6: + if attribute(catid) in (100300165, 100200053, 100300019) then goto T136_6; + else goto T136_7; + +T136_6: + response = 0.0212698998; + goto D136; + +T136_7: + response = 0.0283454496; + goto D136; + +D136: + +tnscore = tnscore + response; + + /* Tree 138 of 200 */ +N137_1: + if attribute(catid) in (0, 100300014, 100300013, 100300077, 100200186, 100400141, 100200172, 100300116, 100300004, 100300126, 100400142, 100200054, 100200192, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300045, 100200028, 100300076, 100200067, 100300006, 100300146) then goto N137_2; + else goto N137_5; + +N137_2: + if attribute(catid) in (100300126, 100400142, 100200054, 100300212, 100300045, 100200067, 100300146) then goto N137_3; + else goto N137_4; + +N137_3: + if attribute(catid) in (100300126, 100300212, 100200067, 100300146) then goto T137_1; + else goto T137_2; + +T137_1: + response = -0.0423375801; + goto D137; + +T137_2: + response = -0.0094881961; + goto D137; + +N137_4: + if attribute(catid) in (0, 100300014, 100300013, 100200186, 100400141, 100300116, 100300004, 100300209, 100400079, 100200028, 100300076, 100300006) then goto T137_3; + else goto T137_4; + +T137_3: + response = -0.0010068479; + goto D137; + +T137_4: + response = 0.0043349463; + goto D137; + +N137_5: + if attribute(catid) in (100200171, 100200130, 100300058, 100300166, 100200034, 100300165, 100300093, 100300005, 100200068, 100300032, 100300027, 100300121, 100200234, 100200053, 100300073, 100300065, 100300122, 100300127, 100200176) then goto N137_6; + else goto N137_8; + +N137_6: + if attribute(catid) in (100200130, 100300166, 100300165, 100300093, 100300121, 100200053, 100300073, 100300065, 100300122, 100300127) then goto N137_7; + else goto T137_7; + +N137_7: + if attribute(catid) in (100300166, 100300121, 100300073, 100300065, 100300122, 100300127) then goto T137_5; + else goto T137_6; + +T137_5: + response = 0.0078139636; + goto D137; + +T137_6: + response = 0.0103953208; + goto D137; + +T137_7: + response = 0.0132055533; + goto D137; + +N137_8: + if attribute(catid) in (100300011, 100200052, 100300102, 100300008, 100400037, 100200193, 100400038, 100200087, 100300074) then goto T137_8; + else goto T137_9; + +T137_8: + response = 0.0206601511; + goto D137; + +T137_9: + response = 0.0355590743; + goto D137; + +D137: + +tnscore = tnscore + response; + + /* Tree 139 of 200 */ +N138_1: + if attribute(catid) in (100200171, 100200130, 100300166, 100300143, 100200034, 100400141, 100300093, 100300102, 100300005, 100300008, 100200068, 100300116, 100300073, 100200193, 100200192, 100300209, 100300122, 100300169, 100400080, 100200087, 100300074, 100300066, 100300045, 100200028, 100200067) then goto N138_2; + else goto N138_4; + +N138_2: + if attribute(catid) in (100400141, 100300102, 100300008, 100200193, 100300209, 100400080, 100300066, 100300045, 100200028, 100200067) then goto T138_1; + else goto N138_3; + +T138_1: + response = -0.0141770941; + goto D138; + +N138_3: + if attribute(catid) in (100200171, 100200130, 100300166, 100200034, 100300005, 100200068, 100200192, 100200087, 100300074) then goto T138_2; + else goto T138_3; + +T138_2: + response = -0.0034615278; + goto D138; + +T138_3: + response = 0.0015215338; + goto D138; + +N138_4: + if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300077, 100300165, 100200052, 100200172, 100300032, 100300121, 100200053, 100300004, 100300126, 100400142, 100200054, 100400038, 100300065, 100400079, 100200176, 100300076, 100200185, 100300006) then goto N138_5; + else goto T138_7; + +N138_5: + if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300165, 100300126, 100200054, 100400079, 100300076, 100200185, 100300006) then goto N138_6; + else goto T138_6; + +N138_6: + if attribute(catid) in (100300014, 100300013, 100300165, 100300126, 100200054, 100400079, 100300076, 100200185, 100300006) then goto T138_4; + else goto T138_5; + +T138_4: + response = 0.0055004027; + goto D138; + +T138_5: + response = 0.0079799037; + goto D138; + +T138_6: + response = 0.0113299928; + goto D138; + +T138_7: + response = 0.0216285145; + goto D138; + +D138: + +tnscore = tnscore + response; + + /* Tree 140 of 200 */ +N139_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300143, 100200034, 100400141, 100200052, 100300093, 100300102, 100200172, 100200068, 100300027, 100300121, 100200234, 100200053, 100300019, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300122, 100300127, 100400079, 100300169, 100400080, 100300007, 100200176, 100300200, 100300045, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N139_2; + else goto N139_6; + +N139_2: + if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300143, 100200034, 100200052, 100300093, 100300102, 100200053, 100300019, 100200054, 100200193, 100400080, 100300007, 100300200, 100300045, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N139_3; + else goto N139_4; + +N139_3: + if attribute(catid) in (100300011, 100300143, 100300093, 100300102, 100300019, 100200193, 100300007, 100200055, 100300006, 100200232, 100300214, 100300146) then goto T139_1; + else goto T139_2; + +T139_1: + response = -0.0214500230; + goto D139; + +T139_2: + response = -0.0079398906; + goto D139; + +N139_4: + if attribute(catid) in (0, 100300077, 100400141, 100200068, 100300027, 100300121, 100300126, 100400142, 100300073, 100400037, 100300065, 100300212, 100300127, 100400079, 100300169) then goto N139_5; + else goto T139_5; + +N139_5: + if attribute(catid) in (100300077, 100200068, 100300027, 100300121, 100300073, 100300127, 100400079) then goto T139_3; + else goto T139_4; + +T139_3: + response = 0.0017634542; + goto D139; + +T139_4: + response = 0.0044214643; + goto D139; + +T139_5: + response = 0.0078134161; + goto D139; + +N139_6: + if attribute(catid) in (100200130, 100300166, 100200186, 100300032, 100400038, 100300209, 100200170, 100200087) then goto T139_6; + else goto T139_7; + +T139_6: + response = 0.0133924680; + goto D139; + +T139_7: + response = 0.0239045512; + goto D139; + +D139: + +tnscore = tnscore + response; + + /* Tree 141 of 200 */ +N140_1: + if attribute(catid) in (100300013, 100300165, 100300102, 100300008, 100200068, 100300116, 100300121, 100200053, 100200054, 100200192, 100300209, 100300122, 100200170, 100400080, 100200087, 100300066, 100200176, 100300200, 100300045, 100300076, 100200067, 100300214) then goto N140_2; + else goto N140_3; + +N140_2: + if attribute(catid) in (100300013, 100300102, 100300008, 100300116, 100300121, 100200176, 100300200, 100300076, 100200067, 100300214) then goto T140_1; + else goto T140_2; + +T140_1: + response = -0.0229996772; + goto D140; + +T140_2: + response = -0.0059777362; + goto D140; + +N140_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200052, 100300027, 100300004, 100300126, 100300073, 100200193, 100300065, 100300212, 100300127, 100400079, 100300169, 100300074, 100200028) then goto N140_4; + else goto N140_6; + +N140_4: + if attribute(catid) in (100200130, 100300014, 100300166, 100300004, 100300065, 100300212, 100400079, 100300169, 100300074) then goto T140_3; + else goto N140_5; + +T140_3: + response = 0.0030774960; + goto D140; + +N140_5: + if attribute(catid) in (0, 100300027, 100300126, 100300127) then goto T140_4; + else goto T140_5; + +T140_4: + response = 0.0063783302; + goto D140; + +T140_5: + response = 0.0094067973; + goto D140; + +N140_6: + if attribute(catid) in (100200186, 100400141, 100200172, 100300032, 100200234, 100300019, 100400142, 100400037, 100300006, 100300146) then goto T140_6; + else goto T140_7; + +T140_6: + response = 0.0137033492; + goto D140; + +T140_7: + response = 0.0253895093; + goto D140; + +D140: + +tnscore = tnscore + response; + + /* Tree 142 of 200 */ +N141_1: + if attribute(catid) in (0, 100300014, 100300058, 100300013, 100300166, 100300143, 100300165, 100200172, 100200068, 100300116, 100300121, 100200053, 100300004, 100400038, 100300065, 100300212, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300045, 100300214) then goto N141_2; + else goto N141_6; + +N141_2: + if attribute(catid) in (100300013, 100300121, 100400038, 100300212, 100200087, 100300045, 100300214) then goto N141_3; + else goto N141_4; + +N141_3: + if attribute(catid) in (100300013, 100300212, 100300214) then goto T141_1; + else goto T141_2; + +T141_1: + response = -0.0545034219; + goto D141; + +T141_2: + response = -0.0112857727; + goto D141; + +N141_4: + if attribute(catid) in (100300014, 100300143, 100300165, 100200068, 100200053, 100300065, 100200170, 100300074, 100300007) then goto T141_3; + else goto N141_5; + +T141_3: + response = -0.0011777575; + goto D141; + +N141_5: + if attribute(catid) in (0, 100200172, 100300116, 100200176) then goto T141_4; + else goto T141_5; + +T141_4: + response = 0.0034598533; + goto D141; + +T141_5: + response = 0.0071870285; + goto D141; + +N141_6: + if attribute(catid) in (100200171, 100300011, 100200130, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300027, 100200234, 100300126, 100400142, 100200054, 100300073, 100400037, 100200192, 100300122, 100300127, 100400079, 100300200, 100200028) then goto N141_7; + else goto T141_9; + +N141_7: + if attribute(catid) in (100200130, 100300077, 100200186, 100400141, 100300102, 100200234, 100300126, 100400142, 100200054, 100300073, 100200192, 100300122, 100400079, 100300200, 100200028) then goto T141_6; + else goto N141_8; + +T141_6: + response = 0.0113718649; + goto D141; + +N141_8: + if attribute(catid) in (100200171, 100300011, 100300027, 100400037) then goto T141_7; + else goto T141_8; + +T141_7: + response = 0.0157280324; + goto D141; + +T141_8: + response = 0.0222069557; + goto D141; + +T141_9: + response = 0.0489348021; + goto D141; + +D141: + +tnscore = tnscore + response; + + /* Tree 143 of 200 */ +N142_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300166, 100200034, 100400141, 100300165, 100200052, 100300093, 100300102, 100300008, 100300027, 100200053, 100300019, 100300126, 100400142, 100300073, 100400038, 100300209, 100200170, 100400080, 100200087, 100300007, 100300200, 100300076, 100200067, 100200055, 100300146) then goto N142_2; + else goto N142_4; + +N142_2: + if attribute(catid) in (100300008, 100300019, 100300126, 100300007, 100300200, 100200055, 100300146) then goto T142_1; + else goto N142_3; + +T142_1: + response = -0.0393976544; + goto D142; + +N142_3: + if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300166, 100200034, 100300093, 100300102, 100400038, 100300209, 100200170, 100400080, 100300076, 100200067) then goto T142_2; + else goto T142_3; + +T142_2: + response = -0.0070993241; + goto D142; + +T142_3: + response = -0.0007351884; + goto D142; + +N142_4: + if attribute(catid) in (0, 100200171, 100200130, 100300077, 100200186, 100200172, 100200234, 100200054, 100400037, 100300122, 100300127, 100400079, 100300074, 100300066, 100200028, 100200232) then goto N142_5; + else goto N142_6; + +N142_5: + if attribute(catid) in (100200171, 100300077, 100200234, 100400079, 100300066, 100200028) then goto T142_4; + else goto T142_5; + +T142_4: + response = 0.0040677589; + goto D142; + +T142_5: + response = 0.0073363935; + goto D142; + +N142_6: + if attribute(catid) in (100300143, 100300005, 100200068, 100300116, 100200192, 100300065, 100300169, 100200176) then goto T142_6; + else goto T142_7; + +T142_6: + response = 0.0152680762; + goto D142; + +T142_7: + response = 0.0386360428; + goto D142; + +D142: + +tnscore = tnscore + response; + + /* Tree 144 of 200 */ +N143_1: + if attribute(catid) in (100200171, 100300143, 100400141, 100300093, 100300005, 100300008, 100200068, 100300027, 100300121, 100300126, 100200054, 100400038, 100200192, 100300212, 100300122, 100300007, 100200176, 100300045, 100200028, 100300076, 100200067, 100300006, 100200232, 100300214) then goto N143_2; + else goto N143_4; + +N143_2: + if attribute(catid) in (100300093, 100300027, 100300212, 100200176, 100200067, 100200232, 100300214) then goto T143_1; + else goto N143_3; + +T143_1: + response = -0.0439825609; + goto D143; + +N143_3: + if attribute(catid) in (100300143, 100400141, 100300005, 100300008, 100300121, 100300126, 100200192, 100300007, 100300045, 100300076, 100300006) then goto T143_2; + else goto T143_3; + +T143_2: + response = -0.0078490055; + goto D143; + +T143_3: + response = -0.0029548934; + goto D143; + +N143_4: + if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100300166, 100200034, 100200186, 100200052, 100200172, 100300032, 100300116, 100200234, 100200053, 100300004, 100400142, 100300073, 100200193, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066, 100300200) then goto N143_5; + else goto N143_7; + +N143_5: + if attribute(catid) in (0, 100300011, 100300014, 100300013, 100300077, 100200034, 100200172, 100300116, 100200053, 100400142, 100200193, 100300065, 100300127, 100400079, 100300169, 100400080, 100300066) then goto N143_6; + else goto T143_6; + +N143_6: + if attribute(catid) in (100300014, 100200172, 100300116, 100400142, 100200193, 100300065, 100400079) then goto T143_4; + else goto T143_5; + +T143_4: + response = 0.0037085174; + goto D143; + +T143_5: + response = 0.0067213111; + goto D143; + +T143_6: + response = 0.0103041294; + goto D143; + +N143_7: + if attribute(catid) in (100200130, 100300165, 100300209, 100200087) then goto T143_7; + else goto T143_8; + +T143_7: + response = 0.0193058409; + goto D143; + +T143_8: + response = 0.0282268747; + goto D143; + +D143: + +tnscore = tnscore + response; + + /* Tree 145 of 200 */ +N144_1: + if attribute(catid) in (100300011, 100200130, 100300058, 100300143, 100300165, 100200052, 100300093, 100300102, 100300005, 100200068, 100300027, 100300116, 100200053, 100300019, 100300004, 100400037, 100200193, 100300209, 100200170, 100300074, 100300007, 100200176, 100300045, 100200055, 100200185, 100200232, 100300214) then goto N144_2; + else goto N144_4; + +N144_2: + if attribute(catid) in (100300102, 100300005, 100300116, 100300019, 100300209, 100200176, 100200055, 100200185, 100200232, 100300214) then goto T144_1; + else goto N144_3; + +T144_1: + response = -0.0293243609; + goto D144; + +N144_3: + if attribute(catid) in (100200052, 100300093, 100200068, 100300004, 100200193, 100300074, 100300045) then goto T144_2; + else goto T144_3; + +T144_2: + response = -0.0070078206; + goto D144; + +T144_3: + response = -0.0009152377; + goto D144; + +N144_4: + if attribute(catid) in (0, 100200171, 100300013, 100300077, 100200186, 100400141, 100300126, 100200054, 100300073, 100300065, 100300122, 100400079, 100300169, 100400080, 100200087, 100300066) then goto N144_5; + else goto N144_6; + +N144_5: + if attribute(catid) in (100200171, 100300013, 100200186, 100300073, 100300065, 100300122, 100300169) then goto T144_4; + else goto T144_5; + +T144_4: + response = 0.0041690469; + goto D144; + +T144_5: + response = 0.0070675916; + goto D144; + +N144_6: + if attribute(catid) in (100300166, 100200034, 100300032, 100400142, 100200192, 100300076, 100200067) then goto T144_6; + else goto T144_7; + +T144_6: + response = 0.0123326309; + goto D144; + +T144_7: + response = 0.0192701631; + goto D144; + +D144: + +tnscore = tnscore + response; + + /* Tree 146 of 200 */ +N145_1: + if attribute(catid) in (100400141, 100300093, 100300102, 100200068, 100300032, 100300116, 100200234, 100300004, 100200193, 100400038, 100300212, 100300209, 100400079, 100300066, 100200176, 100300200, 100300045, 100300076, 100300214, 100300146) then goto N145_2; + else goto N145_3; + +N145_2: + if attribute(catid) in (100300093, 100300102, 100200068, 100300032, 100200234, 100400038, 100300212, 100300214, 100300146) then goto T145_1; + else goto T145_2; + +T145_1: + response = -0.0199825802; + goto D145; + +T145_2: + response = -0.0078667369; + goto D145; + +N145_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100200186, 100300165, 100200052, 100300005, 100200172, 100300027, 100200053, 100400142, 100300073, 100200192, 100300122, 100200170, 100300169, 100400080, 100200087, 100300074, 100300007, 100300006) then goto N145_4; + else goto N145_6; + +N145_4: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300077, 100300166, 100300005, 100200172, 100300027, 100200053, 100300073, 100200170, 100400080, 100200087, 100300007) then goto N145_5; + else goto T145_5; + +N145_5: + if attribute(catid) in (100200171, 100300014, 100300077, 100300166, 100300005, 100200172, 100300027, 100200170, 100400080, 100300007) then goto T145_3; + else goto T145_4; + +T145_3: + response = 0.0025693654; + goto D145; + +T145_4: + response = 0.0050450725; + goto D145; + +T145_5: + response = 0.0104631035; + goto D145; + +N145_6: + if attribute(catid) in (100300011, 100300143, 100300121, 100300126, 100400037, 100300065, 100300127, 100200028) then goto T145_6; + else goto T145_7; + +T145_6: + response = 0.0209253237; + goto D145; + +T145_7: + response = 0.0358125311; + goto D145; + +D145: + +tnscore = tnscore + response; + + /* Tree 147 of 200 */ +N146_1: + if attribute(catid) in (100200171, 100300058, 100300077, 100200034, 100200186, 100300005, 100300008, 100200068, 100300121, 100200053, 100300019, 100300126, 100200193, 100200192, 100300065, 100300212, 100300209, 100400079, 100200087, 100300045, 100200028, 100300076, 100200067, 100200055, 100300006, 100300146) then goto N146_2; + else goto N146_4; + +N146_2: + if attribute(catid) in (100300008, 100300121, 100300019, 100300126, 100300212, 100300209, 100200028, 100200055, 100300146) then goto T146_1; + else goto N146_3; + +T146_1: + response = -0.0326978382; + goto D146; + +N146_3: + if attribute(catid) in (100300077, 100200034, 100200186, 100300005, 100200068, 100200053, 100300045, 100300076, 100200067) then goto T146_2; + else goto T146_3; + +T146_2: + response = -0.0102205963; + goto D146; + +T146_3: + response = -0.0022839975; + goto D146; + +N146_4: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300013, 100400141, 100300165, 100200052, 100300027, 100300116, 100300004, 100400142, 100300073, 100400037, 100400038, 100300122, 100200170, 100300169, 100400080, 100300066, 100200176, 100300200, 100200185) then goto N146_5; + else goto N146_7; + +N146_5: + if attribute(catid) in (100200130, 100300014, 100300165, 100200052, 100300116, 100300004, 100400142, 100300073, 100400038, 100300122, 100200170, 100300169, 100300200) then goto T146_4; + else goto N146_6; + +T146_4: + response = 0.0024615075; + goto D146; + +N146_6: + if attribute(catid) in (0, 100300013, 100300027) then goto T146_5; + else goto T146_6; + +T146_5: + response = 0.0070973911; + goto D146; + +T146_6: + response = 0.0098426968; + goto D146; + +N146_7: + if attribute(catid) in (100300166, 100300093, 100200172, 100200234, 100200054, 100300127, 100300074, 100300007, 100200232) then goto T146_7; + else goto T146_8; + +T146_7: + response = 0.0164836278; + goto D146; + +T146_8: + response = 0.0507175593; + goto D146; + +D146: + +tnscore = tnscore + response; + + /* Tree 148 of 200 */ +N147_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100300008, 100200068, 100300032, 100300121, 100200234, 100200053, 100300004, 100300126, 100400037, 100300065, 100300122, 100300127, 100400080, 100200087, 100300066, 100300007, 100200176, 100200067, 100300006, 100300214) then goto N147_2; + else goto N147_4; + +N147_2: + if attribute(catid) in (100300008, 100300032, 100300004, 100300126, 100400037, 100200176, 100300214) then goto T147_1; + else goto N147_3; + +T147_1: + response = -0.0321717738; + goto D147; + +N147_3: + if attribute(catid) in (100300011, 100200130, 100300014, 100300077, 100200052, 100200234, 100200053, 100300122, 100300127, 100400080, 100200067, 100300006) then goto T147_2; + else goto T147_3; + +T147_2: + response = -0.0016525958; + goto D147; + +T147_3: + response = 0.0046522992; + goto D147; + +N147_4: + if attribute(catid) in (100200171, 100300143, 100300165, 100200172, 100300027, 100300116, 100300019, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100400079, 100200170, 100300169, 100300200, 100300076) then goto N147_5; + else goto T147_6; + +N147_5: + if attribute(catid) in (100200171, 100200172, 100200054, 100300073, 100200192, 100400079, 100200170, 100300169) then goto T147_4; + else goto T147_5; + +T147_4: + response = 0.0111420250; + goto D147; + +T147_5: + response = 0.0154339977; + goto D147; + +T147_6: + response = 0.0284170367; + goto D147; + +D147: + +tnscore = tnscore + response; + + /* Tree 149 of 200 */ +N148_1: + if attribute(catid) in (100200130, 100300058, 100300077, 100200034, 100400141, 100300102, 100300005, 100300008, 100200053, 100300004, 100400037, 100300212, 100300122, 100400079, 100400080, 100300074, 100300007, 100200176, 100200185, 100300006, 100200232) then goto N148_2; + else goto N148_3; + +N148_2: + if attribute(catid) in (100200034, 100300102, 100300005, 100300008, 100400037, 100300074, 100300007, 100200185, 100300006) then goto T148_1; + else goto T148_2; + +T148_1: + response = -0.0179586062; + goto D148; + +T148_2: + response = -0.0035242201; + goto D148; + +N148_3: + if attribute(catid) in (0, 100300014, 100300013, 100300166, 100200186, 100300165, 100200052, 100300093, 100200172, 100200068, 100300027, 100200234, 100400142, 100200054, 100300073, 100400038, 100200192, 100300065, 100300127, 100300169, 100300200, 100300076) then goto N148_4; + else goto N148_6; + +N148_4: + if attribute(catid) in (100300013, 100200052, 100400142, 100300073, 100400038, 100300065, 100300076) then goto T148_3; + else goto N148_5; + +T148_3: + response = 0.0026855380; + goto D148; + +N148_5: + if attribute(catid) in (0, 100300165, 100300093, 100200172, 100200234, 100200054, 100200192) then goto T148_4; + else goto T148_5; + +T148_4: + response = 0.0064693900; + goto D148; + +T148_5: + response = 0.0086837084; + goto D148; + +N148_6: + if attribute(catid) in (100200171, 100300143, 100300032, 100300121, 100300126, 100200170, 100200087, 100300066, 100200028) then goto T148_6; + else goto T148_7; + +T148_6: + response = 0.0141461740; + goto D148; + +T148_7: + response = 0.0293888308; + goto D148; + +D148: + +tnscore = tnscore + response; + + /* Tree 150 of 200 */ +N149_1: + if attribute(catid) in (100200171, 100300011, 100200130, 100300014, 100300058, 100300166, 100300143, 100200186, 100300165, 100300008, 100200068, 100300032, 100300027, 100300116, 100200234, 100300126, 100200054, 100400037, 100200193, 100400038, 100300209, 100300127, 100200170, 100400080, 100300074, 100300007, 100300045, 100200028, 100300076, 100200067, 100200185, 100300006, 100200232, 100300146) then goto N149_2; + else goto N149_5; + +N149_2: + if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100200186, 100300165, 100300032, 100300116, 100200234, 100300126, 100200193, 100400038, 100300209, 100200170, 100300007, 100300045, 100200028, 100200067, 100200232, 100300146) then goto N149_3; + else goto N149_4; + +N149_3: + if attribute(catid) in (100300014, 100300032, 100300116, 100200234, 100300209, 100300007, 100300045, 100200028, 100200067, 100200232, 100300146) then goto T149_1; + else goto T149_2; + +T149_1: + response = -0.0140634241; + goto D149; + +T149_2: + response = -0.0065657437; + goto D149; + +N149_4: + if attribute(catid) in (100200171, 100200068, 100200054, 100400080, 100300074, 100300076, 100300006) then goto T149_3; + else goto T149_4; + +T149_3: + response = -0.0013766512; + goto D149; + +T149_4: + response = 0.0012665197; + goto D149; + +N149_5: + if attribute(catid) in (0, 100300077, 100200052, 100300005, 100300121, 100200053, 100300004, 100300073, 100200192, 100300122, 100300169, 100200087, 100200176, 100300200) then goto N149_6; + else goto N149_7; + +N149_6: + if attribute(catid) in (100200052, 100300005, 100300073, 100200192) then goto T149_5; + else goto T149_6; + +T149_5: + response = 0.0044075628; + goto D149; + +T149_6: + response = 0.0068551736; + goto D149; + +N149_7: + if attribute(catid) in (100200172, 100300065, 100400079, 100300066) then goto T149_7; + else goto T149_8; + +T149_7: + response = 0.0142734060; + goto D149; + +T149_8: + response = 0.0204258682; + goto D149; + +D149: + +tnscore = tnscore + response; + + /* Tree 151 of 200 */ +N150_1: + if attribute(catid) in (100300014, 100300166, 100300143, 100200068, 100300032, 100300116, 100200234, 100300126, 100400037, 100200193, 100400038, 100300212, 100300127, 100200087, 100300200, 100200055, 100300214, 100300146) then goto N150_2; + else goto N150_3; + +N150_2: + if attribute(catid) in (100300143, 100200234, 100400037, 100200193, 100300212, 100300200, 100200055, 100300214, 100300146) then goto T150_1; + else goto T150_2; + +T150_1: + response = -0.0253090838; + goto D150; + +T150_2: + response = -0.0066897329; + goto D150; + +N150_3: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300121, 100200053, 100300004, 100400142, 100300073, 100300065, 100300122, 100200170, 100300169, 100400080, 100300074, 100300066, 100300007, 100200176, 100300076, 100200067, 100200185) then goto N150_4; + else goto N150_7; + +N150_4: + if attribute(catid) in (100200171, 100200130, 100300077, 100200186, 100200053, 100300004, 100300073, 100300169, 100300074, 100300066, 100200176, 100300076) then goto N150_5; + else goto N150_6; + +N150_5: + if attribute(catid) in (100200171, 100300004, 100300073, 100300169, 100300066, 100300076) then goto T150_3; + else goto T150_4; + +T150_3: + response = 0.0001455877; + goto D150; + +T150_4: + response = 0.0029189153; + goto D150; + +N150_6: + if attribute(catid) in (0, 100200034, 100400141, 100300005, 100400142, 100300065, 100300007, 100200185) then goto T150_5; + else goto T150_6; + +T150_5: + response = 0.0054511445; + goto D150; + +T150_6: + response = 0.0085988325; + goto D150; + +N150_7: + if attribute(catid) in (100300013, 100300165, 100300102, 100300027, 100200192, 100400079, 100200028, 100300006, 100200232) then goto T150_7; + else goto T150_8; + +T150_7: + response = 0.0180003070; + goto D150; + +T150_8: + response = 0.0331227663; + goto D150; + +D150: + +tnscore = tnscore + response; + + /* Tree 152 of 200 */ +N151_1: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300077, 100200034, 100400141, 100300165, 100300093, 100200172, 100300008, 100300027, 100300116, 100300121, 100200234, 100300004, 100400142, 100300073, 100400037, 100200193, 100300212, 100300127, 100400080, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100300076, 100300006, 100300146) then goto N151_2; + else goto N151_6; + +N151_2: + if attribute(catid) in (100300014, 100300058, 100300077, 100300093, 100300008, 100200234, 100400142, 100400037, 100300074, 100300200, 100300045, 100200028, 100300076, 100300006, 100300146) then goto N151_3; + else goto N151_4; + +N151_3: + if attribute(catid) in (100300014, 100300058, 100300093, 100400142, 100300200, 100300045, 100300076, 100300006, 100300146) then goto T151_1; + else goto T151_2; + +T151_1: + response = -0.0129979670; + goto D151; + +T151_2: + response = -0.0044598258; + goto D151; + +N151_4: + if attribute(catid) in (0, 100400141, 100200172, 100300116, 100300004, 100200193, 100300212, 100300127, 100400080, 100300066) then goto N151_5; + else goto T151_5; + +N151_5: + if attribute(catid) in (100400141, 100200172, 100300116, 100300212, 100300127, 100400080) then goto T151_3; + else goto T151_4; + +T151_3: + response = -0.0003302269; + goto D151; + +T151_4: + response = 0.0020272308; + goto D151; + +T151_5: + response = 0.0056490673; + goto D151; + +N151_6: + if attribute(catid) in (100200171, 100300011, 100300166, 100200186, 100200052, 100300102, 100300005, 100200068, 100300032, 100200053, 100400038, 100200192, 100300065, 100300122, 100400079, 100200170, 100300169, 100200176, 100200232) then goto N151_7; + else goto T151_9; + +N151_7: + if attribute(catid) in (100200171, 100300011, 100200052, 100300005, 100200068, 100200053, 100400038, 100300065, 100300122, 100400079, 100200170, 100300169, 100200232) then goto N151_8; + else goto T151_8; + +N151_8: + if attribute(catid) in (100200052, 100300005, 100200053, 100400038, 100400079, 100200170, 100300169) then goto T151_6; + else goto T151_7; + +T151_6: + response = 0.0089176032; + goto D151; + +T151_7: + response = 0.0121994068; + goto D151; + +T151_8: + response = 0.0173516652; + goto D151; + +T151_9: + response = 0.0325017104; + goto D151; + +D151: + +tnscore = tnscore + response; + + /* Tree 153 of 200 */ +N152_1: + if attribute(catid) in (0, 100200171, 100300014, 100300013, 100300077, 100300166, 100200186, 100400141, 100300102, 100200172, 100300008, 100300116, 100200234, 100200053, 100300019, 100300004, 100400142, 100400037, 100200193, 100300212, 100300209, 100300122, 100300127, 100200170, 100200087, 100200176, 100300200, 100300076, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N152_2; + else goto N152_5; + +N152_2: + if attribute(catid) in (100300013, 100300166, 100300102, 100300008, 100300116, 100200234, 100300019, 100300004, 100400142, 100300212, 100300209, 100200176, 100300200, 100200055, 100300006, 100200232, 100300214, 100300146) then goto N152_3; + else goto N152_4; + +N152_3: + if attribute(catid) in (100300013, 100300166, 100300019, 100300212, 100300200, 100200055, 100300214, 100300146) then goto T152_1; + else goto T152_2; + +T152_1: + response = -0.0132054608; + goto D152; + +T152_2: + response = -0.0064396815; + goto D152; + +N152_4: + if attribute(catid) in (0, 100300014, 100300077, 100200172, 100400037, 100200193) then goto T152_3; + else goto T152_4; + +T152_3: + response = 0.0018800662; + goto D152; + +T152_4: + response = 0.0043219093; + goto D152; + +N152_5: + if attribute(catid) in (100200130, 100300058, 100200034, 100300165, 100200052, 100200068, 100300032, 100300121, 100300126, 100300073, 100200192, 100300065, 100400079, 100300169, 100400080, 100300074, 100300066, 100200028) then goto N152_6; + else goto T152_7; + +N152_6: + if attribute(catid) in (100300058, 100300165, 100300121, 100300126, 100300073, 100400079, 100300066, 100200028) then goto T152_5; + else goto T152_6; + +T152_5: + response = 0.0086112093; + goto D152; + +T152_6: + response = 0.0133895272; + goto D152; + +T152_7: + response = 0.0254777637; + goto D152; + +D152: + +tnscore = tnscore + response; + + /* Tree 154 of 200 */ +N153_1: + if attribute(catid) in (0, 100200171, 100300013, 100300077, 100300143, 100200186, 100400141, 100200052, 100300102, 100200172, 100300027, 100300116, 100200053, 100300004, 100400142, 100200054, 100200192, 100300065, 100300212, 100300209, 100300169, 100400080, 100200087, 100300066, 100200176, 100300200, 100300045, 100300076, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N153_2; + else goto N153_5; + +N153_2: + if attribute(catid) in (100300013, 100300077, 100300102, 100200053, 100300004, 100400142, 100200054, 100300065, 100300209, 100300066, 100300200, 100300045, 100200067, 100200055, 100200185, 100300006, 100300214, 100300146) then goto N153_3; + else goto N153_4; + +N153_3: + if attribute(catid) in (100300077, 100300102, 100300066, 100300200, 100300045, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T153_1; + else goto T153_2; + +T153_1: + response = -0.0199968300; + goto D153; + +T153_2: + response = -0.0066258033; + goto D153; + +N153_4: + if attribute(catid) in (100200171, 100400141, 100200052, 100200172, 100300027, 100200192, 100400080, 100200087, 100200176, 100300076) then goto T153_3; + else goto T153_4; + +T153_3: + response = 0.0002616813; + goto D153; + +T153_4: + response = 0.0029401657; + goto D153; + +N153_5: + if attribute(catid) in (100200130, 100300014, 100300166, 100200034, 100300165, 100200068, 100200234, 100300073, 100400037, 100300122, 100400079, 100200170, 100300074, 100200028) then goto N153_6; + else goto T153_7; + +N153_6: + if attribute(catid) in (100300166, 100200034, 100300165, 100200068, 100300122, 100400079, 100200170) then goto T153_5; + else goto T153_6; + +T153_5: + response = 0.0100029130; + goto D153; + +T153_6: + response = 0.0150210552; + goto D153; + +T153_7: + response = 0.0282741486; + goto D153; + +D153: + +tnscore = tnscore + response; + + /* Tree 155 of 200 */ +N154_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300013, 100300166, 100200034, 100400141, 100300165, 100200052, 100300102, 100200172, 100300032, 100300027, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200176, 100300045, 100200028, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N154_2; + else goto N154_7; + +N154_2: + if attribute(catid) in (100300014, 100300058, 100300166, 100200034, 100300165, 100300102, 100300032, 100200053, 100300019, 100300004, 100300212, 100300209, 100300127, 100200170, 100300169, 100400080, 100200176, 100300045, 100200067, 100200055, 100200185, 100300006, 100200232) then goto N154_3; + else goto N154_5; + +N154_3: + if attribute(catid) in (100300032, 100300019, 100300212, 100300209, 100200176, 100200067, 100200055, 100200185, 100200232) then goto T154_1; + else goto N154_4; + +T154_1: + response = -0.0365575410; + goto D154; + +N154_4: + if attribute(catid) in (100300166, 100300102, 100200053, 100300004, 100300169, 100400080, 100300006) then goto T154_2; + else goto T154_3; + +T154_2: + response = -0.0063417149; + goto D154; + +T154_3: + response = -0.0028552545; + goto D154; + +N154_5: + if attribute(catid) in (0, 100200130, 100400141, 100200234, 100400142, 100300073, 100200193, 100300065, 100300122, 100400079, 100200028) then goto N154_6; + else goto T154_6; + +N154_6: + if attribute(catid) in (100200234, 100400142, 100300065, 100300122, 100400079, 100200028) then goto T154_4; + else goto T154_5; + +T154_4: + response = 0.0013434898; + goto D154; + +T154_5: + response = 0.0041326482; + goto D154; + +T154_6: + response = 0.0076971051; + goto D154; + +N154_7: + if attribute(catid) in (100300011, 100300143, 100200186, 100300005, 100200068, 100400038, 100200087, 100300066, 100300007) then goto T154_7; + else goto N154_8; + +T154_7: + response = 0.0168824118; + goto D154; + +N154_8: + if attribute(catid) in (100300077, 100300116, 100300121, 100200054, 100300074) then goto T154_8; + else goto T154_9; + +T154_8: + response = 0.0265247041; + goto D154; + +T154_9: + response = 0.0418405954; + goto D154; + +D154: + +tnscore = tnscore + response; + + /* Tree 156 of 200 */ +N155_1: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100300093, 100300005, 100200068, 100300032, 100300027, 100300116, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100400038, 100200192, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100400080, 100300074, 100300066, 100300007, 100300045, 100300076, 100300006, 100200232, 100300214) then goto N155_2; + else goto N155_5; + +N155_2: + if attribute(catid) in (100200171, 100200034, 100300093, 100300005, 100200068, 100300027, 100300116, 100300004, 100300126, 100200054, 100400037, 100400038, 100300212, 100300209, 100400079, 100300074, 100300214) then goto N155_3; + else goto N155_4; + +N155_3: + if attribute(catid) in (100300005, 100300027, 100300116, 100200054, 100400037, 100300209, 100300074, 100300214) then goto T155_1; + else goto T155_2; + +T155_1: + response = -0.0185907409; + goto D155; + +T155_2: + response = -0.0037649772; + goto D155; + +N155_4: + if attribute(catid) in (100300014, 100300058, 100300013, 100300077, 100300166, 100300066, 100300007, 100300076, 100300006) then goto T155_3; + else goto T155_4; + +T155_3: + response = 0.0007932193; + goto D155; + +T155_4: + response = 0.0048541117; + goto D155; + +N155_5: + if attribute(catid) in (100200130, 100200052, 100300102, 100200172, 100200193, 100300169, 100200176, 100200028, 100300146) then goto T155_5; + else goto N155_6; + +T155_5: + response = 0.0114452275; + goto D155; + +N155_6: + if attribute(catid) in (100300008, 100200234, 100300065, 100200087) then goto T155_6; + else goto T155_7; + +T155_6: + response = 0.0212529341; + goto D155; + +T155_7: + response = 0.0435817724; + goto D155; + +D155: + +tnscore = tnscore + response; + + /* Tree 157 of 200 */ +N156_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100300027, 100300116, 100300121, 100200053, 100300004, 100400142, 100300073, 100400037, 100400038, 100200192, 100300212, 100300122, 100300127, 100300169, 100400080, 100300007, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N156_2; + else goto N156_6; + +N156_2: + if attribute(catid) in (100300014, 100300058, 100300143, 100400141, 100300093, 100300102, 100300116, 100400142, 100400037, 100400038, 100300122, 100300007, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N156_3; + else goto N156_4; + +N156_3: + if attribute(catid) in (100300143, 100300093, 100300102, 100400037, 100300007, 100200028, 100200055, 100200185, 100300006, 100200232, 100300146) then goto T156_1; + else goto T156_2; + +T156_1: + response = -0.0240406920; + goto D156; + +T156_2: + response = -0.0039451648; + goto D156; + +N156_4: + if attribute(catid) in (0, 100300077, 100300166, 100200172, 100300027, 100300004, 100200192, 100300212, 100300127) then goto N156_5; + else goto T156_5; + +N156_5: + if attribute(catid) in (100300077, 100300166, 100200172, 100300027, 100200192, 100300212) then goto T156_3; + else goto T156_4; + +T156_3: + response = 0.0007340608; + goto D156; + +T156_4: + response = 0.0030962230; + goto D156; + +T156_5: + response = 0.0055831787; + goto D156; + +N156_6: + if attribute(catid) in (100300011, 100300165, 100300032, 100200234, 100300126, 100200193, 100300065, 100400079, 100200170, 100200087, 100300074, 100300066, 100300200) then goto N156_7; + else goto T156_8; + +N156_7: + if attribute(catid) in (100300165, 100300032, 100200234, 100300126, 100200193, 100400079, 100200170, 100300074, 100300066, 100300200) then goto T156_6; + else goto T156_7; + +T156_6: + response = 0.0106987137; + goto D156; + +T156_7: + response = 0.0159048063; + goto D156; + +T156_8: + response = 0.0286231943; + goto D156; + +D156: + +tnscore = tnscore + response; + + /* Tree 158 of 200 */ +N157_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300143, 100200034, 100200186, 100400141, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100300121, 100200234, 100200053, 100300004, 100400142, 100200054, 100400037, 100200193, 100400038, 100200192, 100300122, 100300169, 100400080, 100200087, 100300074, 100300066, 100300200, 100300045, 100300076, 100200055, 100200232) then goto N157_2; + else goto N157_5; + +N157_2: + if attribute(catid) in (100300011, 100300014, 100200034, 100400141, 100300102, 100300005, 100200172, 100200234, 100200193, 100400080, 100300066, 100300200, 100300045, 100300076, 100200055, 100200232) then goto N157_3; + else goto N157_4; + +N157_3: + if attribute(catid) in (100300011, 100200034, 100300102, 100200193, 100400080, 100200055, 100200232) then goto T157_1; + else goto T157_2; + +T157_1: + response = -0.0213247733; + goto D157; + +T157_2: + response = -0.0035116900; + goto D157; + +N157_4: + if attribute(catid) in (100200130, 100200052, 100200068, 100300121, 100200053, 100300004, 100200054, 100300169) then goto T157_3; + else goto T157_4; + +T157_3: + response = 0.0006568155; + goto D157; + +T157_4: + response = 0.0043360634; + goto D157; + +N157_5: + if attribute(catid) in (100200171, 100300077, 100300166, 100300165, 100300032, 100300073, 100300065, 100300209, 100300127, 100400079, 100200170, 100200028, 100300146) then goto N157_6; + else goto T157_7; + +N157_6: + if attribute(catid) in (100200171, 100300077, 100300165, 100300032, 100300073, 100300209, 100300127, 100200170) then goto T157_5; + else goto T157_6; + +T157_5: + response = 0.0086409598; + goto D157; + +T157_6: + response = 0.0123374521; + goto D157; + +T157_7: + response = 0.0275853300; + goto D157; + +D157: + +tnscore = tnscore + response; + + /* Tree 159 of 200 */ +N158_1: + if attribute(catid) in (100300013, 100200034, 100400141, 100300165, 100300093, 100300102, 100300008, 100300032, 100300116, 100300121, 100200234, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100200232) then goto N158_2; + else goto N158_5; + +N158_2: + if attribute(catid) in (100300013, 100200034, 100400141, 100300102, 100300008, 100300032, 100300116, 100200234, 100300019, 100300004, 100300126, 100300212, 100300209, 100300200, 100200067, 100200185, 100200232) then goto N158_3; + else goto N158_4; + +N158_3: + if attribute(catid) in (100300102, 100300032, 100300019, 100300212, 100300209, 100200067, 100200185, 100200232) then goto T158_1; + else goto T158_2; + +T158_1: + response = -0.0524906600; + goto D158; + +T158_2: + response = -0.0117164184; + goto D158; + +N158_4: + if attribute(catid) in (100300093, 100200054, 100200192, 100400079, 100200170, 100300066, 100300045, 100300076) then goto T158_3; + else goto T158_4; + +T158_3: + response = -0.0050797102; + goto D158; + +T158_4: + response = -0.0000182540; + goto D158; + +N158_5: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100200186, 100300005, 100200172, 100200068, 100300027, 100200053, 100400142, 100300065, 100300122, 100400080, 100200028, 100300006, 100300146) then goto N158_6; + else goto N158_8; + +N158_6: + if attribute(catid) in (0, 100200171, 100300166, 100200186, 100300005, 100400142, 100300065, 100300122, 100200028) then goto N158_7; + else goto T158_7; + +N158_7: + if attribute(catid) in (100200171, 100300166, 100300005, 100300065, 100300122, 100200028) then goto T158_5; + else goto T158_6; + +T158_5: + response = 0.0045192638; + goto D158; + +T158_6: + response = 0.0060994023; + goto D158; + +T158_7: + response = 0.0117841342; + goto D158; + +N158_8: + if attribute(catid) in (100300077, 100300143, 100200052, 100300074) then goto T158_8; + else goto T158_9; + +T158_8: + response = 0.0191236363; + goto D158; + +T158_9: + response = 0.0376099520; + goto D158; + +D158: + +tnscore = tnscore + response; + + /* Tree 160 of 200 */ +N159_1: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300013, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300005, 100200172, 100300027, 100300121, 100300019, 100300004, 100400142, 100300073, 100400037, 100300065, 100300212, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100200176, 100300045, 100200067, 100200185) then goto N159_2; + else goto N159_7; + +N159_2: + if attribute(catid) in (100200171, 100300013, 100300019, 100400037, 100300212, 100200087, 100200067, 100200185) then goto N159_3; + else goto N159_4; + +N159_3: + if attribute(catid) in (100300013, 100300019, 100300212, 100200185) then goto T159_1; + else goto T159_2; + +T159_1: + response = -0.0408187739; + goto D159; + +T159_2: + response = -0.0118328301; + goto D159; + +N159_4: + if attribute(catid) in (0, 100200130, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100300005, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100300065, 100400079, 100300169, 100300074, 100200176, 100300045) then goto N159_5; + else goto T159_6; + +N159_5: + if attribute(catid) in (100200034, 100200186, 100300093, 100300005, 100200172, 100300027, 100300121, 100300004, 100400142, 100300073, 100400079, 100300169, 100300074, 100200176) then goto N159_6; + else goto T159_5; + +N159_6: + if attribute(catid) in (100200034, 100300093, 100300005, 100300027, 100400079, 100300169, 100300074, 100200176) then goto T159_3; + else goto T159_4; + +T159_3: + response = -0.0023114463; + goto D159; + +T159_4: + response = 0.0006956308; + goto D159; + +T159_5: + response = 0.0033093887; + goto D159; + +T159_6: + response = 0.0090068346; + goto D159; + +N159_7: + if attribute(catid) in (100300011, 100300014, 100300077, 100300116, 100200053, 100300126, 100200193, 100400038, 100200192, 100300127, 100200170, 100300066, 100200028, 100300076, 100200232, 100300146) then goto T159_7; + else goto T159_8; + +T159_7: + response = 0.0157546690; + goto D159; + +T159_8: + response = 0.0276821855; + goto D159; + +D159: + +tnscore = tnscore + response; + + /* Tree 161 of 200 */ +N160_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100300143, 100200186, 100300165, 100200052, 100300093, 100300005, 100200068, 100300032, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100300127, 100400079, 100200170, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N160_2; + else goto N160_6; + +N160_2: + if attribute(catid) in (100200052, 100200068, 100300121, 100200053, 100400142, 100400038, 100200192, 100300212, 100300209, 100200170, 100300200, 100300045, 100200028, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N160_3; + else goto N160_4; + +N160_3: + if attribute(catid) in (100300121, 100300212, 100300209, 100200028, 100200067, 100200055, 100200232, 100300146) then goto T160_1; + else goto T160_2; + +T160_1: + response = -0.0318529122; + goto D160; + +T160_2: + response = -0.0076204316; + goto D160; + +N160_4: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100200186, 100300165, 100300004, 100300073, 100200193, 100300065, 100300127, 100400079, 100300074, 100300007, 100200176) then goto N160_5; + else goto T160_5; + +N160_5: + if attribute(catid) in (100300014, 100300058, 100200186, 100300165, 100300004, 100200193, 100300065, 100300127, 100400079, 100300074, 100300007) then goto T160_3; + else goto T160_4; + +T160_3: + response = 0.0024278683; + goto D160; + +T160_4: + response = 0.0048565045; + goto D160; + +T160_5: + response = 0.0078949518; + goto D160; + +N160_6: + if attribute(catid) in (100300077, 100200034, 100400141, 100200172, 100300027, 100300122, 100200087) then goto T160_6; + else goto T160_7; + +T160_6: + response = 0.0144109170; + goto D160; + +T160_7: + response = 0.0330128168; + goto D160; + +D160: + +tnscore = tnscore + response; + + /* Tree 162 of 200 */ +N161_1: + if attribute(catid) in (100300077, 100300166, 100300143, 100200034, 100200186, 100300032, 100300121, 100300019, 100300004, 100300126, 100400142, 100300073, 100400037, 100400038, 100300065, 100300212, 100300209, 100300127, 100400079, 100200087, 100200176, 100300200, 100300076, 100200067, 100200055, 100300214) then goto N161_2; + else goto N161_4; + +N161_2: + if attribute(catid) in (100300143, 100300032, 100300126, 100300212, 100300209, 100200176, 100200055, 100300214) then goto T161_1; + else goto N161_3; + +T161_1: + response = -0.0309719186; + goto D161; + +N161_3: + if attribute(catid) in (100300019, 100300004, 100400142, 100400037, 100400038, 100300065, 100200087, 100300200, 100200067) then goto T161_2; + else goto T161_3; + +T161_2: + response = -0.0082013563; + goto D161; + +T161_3: + response = -0.0020664794; + goto D161; + +N161_4: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300013, 100400141, 100300165, 100200172, 100300008, 100300027, 100300116, 100200053, 100200192, 100300122, 100200170, 100300169, 100400080, 100300074, 100300045, 100200028) then goto N161_5; + else goto N161_7; + +N161_5: + if attribute(catid) in (0, 100200171, 100300014, 100300058, 100300008, 100300116, 100200053, 100200192, 100300169, 100300074, 100300045, 100200028) then goto N161_6; + else goto T161_6; + +N161_6: + if attribute(catid) in (100200171, 100300014, 100300058, 100300008, 100300116, 100200053, 100200192, 100300074, 100200028) then goto T161_4; + else goto T161_5; + +T161_4: + response = 0.0024438226; + goto D161; + +T161_5: + response = 0.0050777724; + goto D161; + +T161_6: + response = 0.0078352283; + goto D161; + +N161_7: + if attribute(catid) in (100300011, 100200130, 100200052, 100200054, 100200193, 100300066, 100200232) then goto T161_7; + else goto T161_8; + +T161_7: + response = 0.0123902667; + goto D161; + +T161_8: + response = 0.0178145861; + goto D161; + +D161: + +tnscore = tnscore + response; + + /* Tree 163 of 200 */ +N162_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300058, 100300077, 100300166, 100300143, 100200052, 100300093, 100200172, 100300008, 100300032, 100300027, 100300116, 100200234, 100400142, 100200054, 100200193, 100400038, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300074, 100300066, 100300007, 100300200, 100300076, 100200055, 100300006) then goto N162_2; + else goto N162_7; + +N162_2: + if attribute(catid) in (100300058, 100300143, 100300032, 100200054, 100300074, 100300007, 100300076, 100200055, 100300006) then goto N162_3; + else goto N162_4; + +N162_3: + if attribute(catid) in (100300058, 100300143, 100300032, 100300076, 100200055, 100300006) then goto T162_1; + else goto T162_2; + +T162_1: + response = -0.0392140951; + goto D162; + +T162_2: + response = -0.0178716478; + goto D162; + +N162_4: + if attribute(catid) in (100300011, 100200130, 100200052, 100300093, 100300008, 100300027, 100300116, 100200234, 100400142, 100200193, 100400038, 100300209, 100300127, 100400079, 100200170, 100200087, 100300066, 100300200) then goto N162_5; + else goto N162_6; + +N162_5: + if attribute(catid) in (100300011, 100200052, 100300008, 100300027, 100200234, 100400038, 100300209, 100200170) then goto T162_3; + else goto T162_4; + +T162_3: + response = -0.0066755608; + goto D162; + +T162_4: + response = -0.0018428992; + goto D162; + +N162_6: + if attribute(catid) in (0, 100200171, 100300077, 100300166) then goto T162_5; + else goto T162_6; + +T162_5: + response = 0.0036371154; + goto D162; + +T162_6: + response = 0.0064922752; + goto D162; + +N162_7: + if attribute(catid) in (100300013, 100200034, 100200186, 100400141, 100300165, 100300005, 100200068, 100200053, 100300073, 100300169, 100400080, 100200067, 100200185, 100300146) then goto T162_7; + else goto N162_8; + +T162_7: + response = 0.0121567368; + goto D162; + +N162_8: + if attribute(catid) in (100300014, 100300121, 100300004, 100400037, 100200176, 100200028) then goto T162_8; + else goto T162_9; + +T162_8: + response = 0.0238269627; + goto D162; + +T162_9: + response = 0.0454004741; + goto D162; + +D162: + +tnscore = tnscore + response; + + /* Tree 164 of 200 */ +N163_1: + if attribute(catid) in (0, 100300011, 100300013, 100200034, 100400141, 100200052, 100300093, 100300102, 100200172, 100300008, 100200068, 100300116, 100300004, 100300126, 100400142, 100400038, 100300209, 100300127, 100400079, 100300169, 100300200, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N163_2; + else goto N163_5; + +N163_2: + if attribute(catid) in (100300011, 100300013, 100300008, 100300126, 100300200, 100200055, 100200185, 100300006, 100200232, 100300146) then goto T163_1; + else goto N163_3; + +T163_1: + response = -0.0236401207; + goto D163; + +N163_3: + if attribute(catid) in (100200034, 100200052, 100300093, 100300102, 100200068, 100300127) then goto T163_2; + else goto N163_4; + +T163_2: + response = -0.0040925434; + goto D163; + +N163_4: + if attribute(catid) in (0, 100400142, 100400079) then goto T163_3; + else goto T163_4; + +T163_3: + response = 0.0014214596; + goto D163; + +T163_4: + response = 0.0034589578; + goto D163; + +N163_5: + if attribute(catid) in (100200171, 100200130, 100300058, 100300077, 100300166, 100200186, 100300165, 100300027, 100300121, 100200234, 100200053, 100300073, 100200192, 100300065, 100300122, 100400080, 100300074) then goto N163_6; + else goto N163_7; + +N163_6: + if attribute(catid) in (100300077, 100300166, 100300165, 100300027, 100200192, 100300065, 100300122, 100400080) then goto T163_5; + else goto T163_6; + +T163_5: + response = 0.0069931850; + goto D163; + +T163_6: + response = 0.0098596774; + goto D163; + +N163_7: + if attribute(catid) in (100300005, 100300032, 100400037, 100200193, 100200170, 100300066, 100200176, 100200028) then goto T163_7; + else goto T163_8; + +T163_7: + response = 0.0175470785; + goto D163; + +T163_8: + response = 0.0289653859; + goto D163; + +D163: + +tnscore = tnscore + response; + + /* Tree 165 of 200 */ +N164_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200034, 100200186, 100400141, 100200052, 100300093, 100200172, 100300116, 100200234, 100200053, 100400142, 100200054, 100200193, 100400038, 100300065, 100300212, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300200, 100300045, 100300076, 100200055, 100300006, 100300214) then goto N164_2; + else goto N164_6; + +N164_2: + if attribute(catid) in (100300014, 100200034, 100200052, 100300093, 100300116, 100200053, 100200193, 100300212, 100300127, 100200176, 100300200, 100200055, 100300214) then goto N164_3; + else goto N164_4; + +N164_3: + if attribute(catid) in (100300014, 100200053, 100300212, 100300200, 100200055, 100300214) then goto T164_1; + else goto T164_2; + +T164_1: + response = -0.0378968222; + goto D164; + +T164_2: + response = -0.0078372609; + goto D164; + +N164_4: + if attribute(catid) in (0, 100200171, 100300077, 100200186, 100400142, 100200054, 100400038, 100300065, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100300045) then goto N164_5; + else goto T164_5; + +N164_5: + if attribute(catid) in (100200171, 100400142, 100200054, 100400038, 100300065, 100200170, 100300169, 100400080, 100300066) then goto T164_3; + else goto T164_4; + +T164_3: + response = -0.0001641778; + goto D164; + +T164_4: + response = 0.0015184867; + goto D164; + +T164_5: + response = 0.0061033364; + goto D164; + +N164_6: + if attribute(catid) in (100300058, 100300143, 100200068, 100300032, 100300121, 100300004, 100300126, 100300073, 100400037, 100200192) then goto T164_6; + else goto N164_7; + +T164_6: + response = 0.0125327069; + goto D164; + +N164_7: + if attribute(catid) in (100300011, 100300013, 100300165, 100300102, 100300005, 100300027, 100300122) then goto T164_7; + else goto T164_8; + +T164_7: + response = 0.0195931033; + goto D164; + +T164_8: + response = 0.0422132813; + goto D164; + +D164: + +tnscore = tnscore + response; + + /* Tree 166 of 200 */ +N165_1: + if attribute(catid) in (0, 100200171, 100300014, 100300077, 100200186, 100300165, 100300102, 100300005, 100200172, 100300008, 100200068, 100300116, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100200193, 100200192, 100300065, 100300212, 100200170, 100200087, 100300074, 100300066, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300214, 100300146) then goto N165_2; + else goto N165_5; + +N165_2: + if attribute(catid) in (100300014, 100300102, 100300116, 100200234, 100400142, 100200054, 100200087, 100300200, 100300045, 100200055, 100200185, 100300214, 100300146) then goto T165_1; + else goto N165_3; + +T165_1: + response = -0.0134771000; + goto D165; + +N165_3: + if attribute(catid) in (0, 100300165, 100300005, 100300008, 100200068, 100200193, 100300212, 100300066, 100200028, 100300076) then goto N165_4; + else goto T165_4; + +N165_4: + if attribute(catid) in (100300165, 100300005, 100300008, 100300066, 100300076) then goto T165_2; + else goto T165_3; + +T165_2: + response = -0.0004632359; + goto D165; + +T165_3: + response = 0.0017931695; + goto D165; + +T165_4: + response = 0.0047361213; + goto D165; + +N165_5: + if attribute(catid) in (100300011, 100200130, 100300166, 100400141, 100200052, 100300032, 100300027, 100300121, 100200053, 100400038, 100300209, 100300122, 100400079, 100300169, 100400080, 100300007, 100200176) then goto N165_6; + else goto T165_7; + +N165_6: + if attribute(catid) in (100200130, 100400141, 100200052, 100300032, 100300027, 100200053, 100300209, 100300122, 100400079) then goto T165_5; + else goto T165_6; + +T165_5: + response = 0.0106878879; + goto D165; + +T165_6: + response = 0.0162088762; + goto D165; + +T165_7: + response = 0.0270716952; + goto D165; + +D165: + +tnscore = tnscore + response; + + /* Tree 167 of 200 */ +N166_1: + if attribute(catid) in (100300011, 100200130, 100300058, 100300013, 100400141, 100300165, 100300093, 100300005, 100300027, 100300116, 100200234, 100300004, 100300126, 100300073, 100200193, 100300209, 100300122, 100300007, 100200176, 100200028, 100200055, 100200185, 100300006) then goto N166_2; + else goto N166_5; + +N166_2: + if attribute(catid) in (100300011, 100300058, 100400141, 100300005, 100200234, 100300004, 100200193, 100300209, 100300122, 100200028, 100200055, 100200185) then goto N166_3; + else goto N166_4; + +N166_3: + if attribute(catid) in (100300011, 100200193, 100300209, 100200028, 100200055) then goto T166_1; + else goto T166_2; + +T166_1: + response = -0.0335395120; + goto D166; + +T166_2: + response = -0.0118989268; + goto D166; + +N166_4: + if attribute(catid) in (100300013, 100300165, 100300126, 100200176) then goto T166_3; + else goto T166_4; + +T166_3: + response = -0.0067126195; + goto D166; + +T166_4: + response = -0.0029610222; + goto D166; + +N166_5: + if attribute(catid) in (0, 100300014, 100300166, 100200034, 100200186, 100200052, 100200172, 100200068, 100200053, 100300019, 100200054, 100400038, 100200192, 100300065, 100300127, 100400079, 100200170, 100400080, 100300074, 100300066, 100300076, 100300146) then goto N166_6; + else goto N166_8; + +N166_6: + if attribute(catid) in (0, 100300166, 100200186, 100200172, 100200053, 100300019, 100400038, 100200192, 100300065, 100200170, 100400080, 100300074, 100300076) then goto N166_7; + else goto T166_7; + +N166_7: + if attribute(catid) in (100200186, 100200172, 100200053, 100300019, 100400038, 100200170, 100400080) then goto T166_5; + else goto T166_6; + +T166_5: + response = 0.0031153692; + goto D166; + +T166_6: + response = 0.0055175700; + goto D166; + +T166_7: + response = 0.0106121829; + goto D166; + +N166_8: + if attribute(catid) in (100200171, 100300077, 100300008, 100400142, 100300169, 100200087, 100300200) then goto T166_8; + else goto T166_9; + +T166_8: + response = 0.0168125614; + goto D166; + +T166_9: + response = 0.0398137842; + goto D166; + +D166: + +tnscore = tnscore + response; + + /* Tree 168 of 200 */ +N167_1: + if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100400141, 100300165, 100300102, 100200068, 100300032, 100300121, 100200234, 100300019, 100300004, 100300126, 100300073, 100400037, 100200193, 100300212, 100300127, 100400079, 100200170, 100300066, 100300045, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N167_2; + else goto N167_4; + +N167_2: + if attribute(catid) in (100300102, 100300032, 100300121, 100300019, 100400037, 100200193, 100300212, 100200170, 100300045, 100300076, 100200067, 100200185, 100300214, 100300146) then goto T167_1; + else goto N167_3; + +T167_1: + response = -0.0158766850; + goto D167; + +N167_3: + if attribute(catid) in (100300166, 100300165, 100200068, 100300004, 100300126, 100300073, 100300127, 100300066, 100200232) then goto T167_2; + else goto T167_3; + +T167_2: + response = -0.0044887193; + goto D167; + +T167_3: + response = -0.0003188875; + goto D167; + +N167_4: + if attribute(catid) in (0, 100300014, 100300143, 100200186, 100200053, 100400142, 100200054, 100200192, 100300065, 100300122, 100300169, 100300074, 100300200) then goto N167_5; + else goto N167_6; + +N167_5: + if attribute(catid) in (0, 100300014, 100300143, 100200186, 100200053, 100200192, 100300065, 100300074) then goto T167_4; + else goto T167_5; + +T167_4: + response = 0.0053430945; + goto D167; + +T167_5: + response = 0.0090007568; + goto D167; + +N167_6: + if attribute(catid) in (100200171, 100200052, 100300093, 100300005, 100200172, 100300027, 100300116, 100400080, 100200087, 100300007) then goto T167_6; + else goto T167_7; + +T167_6: + response = 0.0122908466; + goto D167; + +T167_7: + response = 0.0204822127; + goto D167; + +D167: + +tnscore = tnscore + response; + + /* Tree 169 of 200 */ +N168_1: + if attribute(catid) in (100200130, 100300077, 100300143, 100200034, 100300005, 100300032, 100300116, 100300121, 100300019, 100200054, 100200192, 100300212, 100300209, 100300127, 100400079, 100200170, 100400080, 100300074, 100300007, 100300200, 100300045, 100200028, 100300076, 100200055, 100200232) then goto N168_2; + else goto N168_4; + +N168_2: + if attribute(catid) in (100300143, 100300032, 100300121, 100300019, 100300212, 100300200, 100200028, 100200055) then goto T168_1; + else goto N168_3; + +T168_1: + response = -0.0260564211; + goto D168; + +N168_3: + if attribute(catid) in (100200034, 100300005, 100200054, 100300209, 100400080, 100300074, 100300045) then goto T168_2; + else goto T168_3; + +T168_2: + response = -0.0098627619; + goto D168; + +T168_3: + response = -0.0040846106; + goto D168; + +N168_4: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300013, 100300166, 100200186, 100400141, 100300165, 100200052, 100200172, 100300027, 100200053, 100300004, 100400142, 100300073, 100400037, 100200193, 100300065, 100300122, 100300169, 100300066) then goto N168_5; + else goto N168_8; + +N168_5: + if attribute(catid) in (100200171, 100300011, 100300013, 100300166, 100300165, 100200172, 100400142, 100300073, 100400037, 100300122, 100300169, 100300066) then goto N168_6; + else goto N168_7; + +N168_6: + if attribute(catid) in (100300011, 100300013, 100300166, 100300165, 100300073, 100400037, 100300066) then goto T168_4; + else goto T168_5; + +T168_4: + response = 0.0011147417; + goto D168; + +T168_5: + response = 0.0042983630; + goto D168; + +N168_7: + if attribute(catid) in (0, 100300014, 100200053, 100300004, 100300065) then goto T168_6; + else goto T168_7; + +T168_6: + response = 0.0064436133; + goto D168; + +T168_7: + response = 0.0100503455; + goto D168; + +N168_8: + if attribute(catid) in (100300058, 100300093, 100300008, 100200234, 100200087, 100200176) then goto T168_8; + else goto T168_9; + +T168_8: + response = 0.0146658078; + goto D168; + +T168_9: + response = 0.0247793114; + goto D168; + +D168: + +tnscore = tnscore + response; + + /* Tree 170 of 200 */ +N169_1: + if attribute(catid) in (100300013, 100300165, 100300102, 100300005, 100200068, 100300027, 100300121, 100400037, 100200193, 100300212, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100200028, 100200067) then goto N169_2; + else goto N169_3; + +N169_2: + if attribute(catid) in (100300013, 100300165, 100300102, 100300005, 100200068, 100400037, 100200193, 100300007, 100300200, 100200067) then goto T169_1; + else goto T169_2; + +T169_1: + response = -0.0164905100; + goto D169; + +T169_2: + response = -0.0065097756; + goto D169; + +N169_3: + if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300166, 100200186, 100400141, 100200052, 100300093, 100200172, 100300116, 100200053, 100300019, 100300004, 100300126, 100300073, 100300065, 100300209, 100300122, 100400079, 100200170, 100300076) then goto N169_4; + else goto N169_6; + +N169_4: + if attribute(catid) in (0, 100200130, 100300058, 100200186, 100300093, 100200172, 100300004, 100300073, 100300209, 100300122, 100200170, 100300076) then goto N169_5; + else goto T169_5; + +N169_5: + if attribute(catid) in (100200130, 100200186, 100200172, 100300073, 100200170, 100300076) then goto T169_3; + else goto T169_4; + +T169_3: + response = 0.0025078440; + goto D169; + +T169_4: + response = 0.0047210186; + goto D169; + +T169_5: + response = 0.0076570114; + goto D169; + +N169_6: + if attribute(catid) in (100200171, 100300014, 100300077, 100200034, 100300008, 100300032, 100200234, 100200054, 100400038, 100200192, 100300127, 100400080, 100300006, 100300146) then goto T169_6; + else goto T169_7; + +T169_6: + response = 0.0139365828; + goto D169; + +T169_7: + response = 0.0234469942; + goto D169; + +D169: + +tnscore = tnscore + response; + + /* Tree 171 of 200 */ +N170_1: + if attribute(catid) in (100300013, 100300077, 100300165, 100200052, 100300102, 100200068, 100300032, 100300116, 100300121, 100200234, 100300004, 100300126, 100400142, 100200054, 100300073, 100200192, 100300065, 100300212, 100200170, 100400080, 100300074, 100300007, 100200176, 100300045, 100200067, 100200055, 100200185, 100200232, 100300214, 100300146) then goto N170_2; + else goto N170_4; + +N170_2: + if attribute(catid) in (100300032, 100300121, 100300212, 100300007, 100200176, 100200067, 100200055, 100200185, 100300214, 100300146) then goto T170_1; + else goto N170_3; + +T170_1: + response = -0.0389470287; + goto D170; + +N170_3: + if attribute(catid) in (100200052, 100300102, 100200068, 100300116, 100200234, 100300126, 100200054, 100300065, 100200170, 100300074) then goto T170_2; + else goto T170_3; + +T170_2: + response = -0.0087219876; + goto D170; + +T170_3: + response = -0.0029990733; + goto D170; + +N170_4: + if attribute(catid) in (0, 100200130, 100300014, 100300143, 100200034, 100200186, 100400141, 100300005, 100200172, 100200053, 100400038, 100300122, 100300127, 100400079, 100300169, 100300066, 100300076, 100300006) then goto N170_5; + else goto N170_6; + +N170_5: + if attribute(catid) in (0, 100200130, 100300014, 100300143, 100400141, 100400038, 100300122, 100300127, 100400079, 100300066, 100300076, 100300006) then goto T170_4; + else goto T170_5; + +T170_4: + response = 0.0052739356; + goto D170; + +T170_5: + response = 0.0099085929; + goto D170; + +N170_6: + if attribute(catid) in (100200171, 100300011, 100300166, 100300093, 100300027, 100200193, 100300209, 100200087, 100300200) then goto N170_7; + else goto T170_8; + +N170_7: + if attribute(catid) in (100200171, 100200193, 100200087) then goto T170_6; + else goto T170_7; + +T170_6: + response = 0.0138275479; + goto D170; + +T170_7: + response = 0.0190545276; + goto D170; + +T170_8: + response = 0.0389964998; + goto D170; + +D170: + +tnscore = tnscore + response; + + /* Tree 172 of 200 */ +N171_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100300143, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300027, 100300116, 100300121, 100200234, 100200053, 100300019, 100300004, 100400142, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100300066, 100200176, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N171_2; + else goto N171_7; + +N171_2: + if attribute(catid) in (100300011, 100300058, 100300077, 100300005, 100300008, 100200068, 100400142, 100300065, 100300122, 100400079, 100400080, 100200087, 100300045, 100200055, 100200185, 100200232, 100300214) then goto N171_3; + else goto N171_4; + +N171_3: + if attribute(catid) in (100300058, 100300005, 100400142, 100200087, 100300045, 100200055, 100200185, 100200232, 100300214) then goto T171_1; + else goto T171_2; + +T171_1: + response = -0.0193646710; + goto D171; + +T171_2: + response = -0.0065149978; + goto D171; + +N171_4: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200034, 100200186, 100300093, 100300102, 100300116, 100300121, 100200234, 100200053, 100300019, 100300073, 100200193, 100200192, 100300127, 100300169, 100300074, 100300006) then goto N171_5; + else goto N171_6; + +N171_5: + if attribute(catid) in (0, 100200171, 100300014, 100200186, 100300102, 100200193, 100300127, 100300074, 100300006) then goto T171_3; + else goto T171_4; + +T171_3: + response = 0.0016997077; + goto D171; + +T171_4: + response = 0.0042301416; + goto D171; + +N171_6: + if attribute(catid) in (100300166, 100400141, 100200172, 100300027, 100300004, 100300209, 100200170, 100300066) then goto T171_5; + else goto T171_6; + +T171_5: + response = 0.0083225080; + goto D171; + +T171_6: + response = 0.0106242146; + goto D171; + +N171_7: + if attribute(catid) in (100300126, 100200054, 100300212, 100300200, 100300076) then goto T171_7; + else goto T171_8; + +T171_7: + response = 0.0274475014; + goto D171; + +T171_8: + response = 0.0436714177; + goto D171; + +D171: + +tnscore = tnscore + response; + + /* Tree 173 of 200 */ +N172_1: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100200034, 100200186, 100300165, 100200052, 100300093, 100300102, 100300027, 100300121, 100200234, 100300019, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100300169, 100200087, 100300074, 100300066, 100300007, 100300200, 100300045, 100300076, 100200067, 100200185) then goto N172_2; + else goto N172_5; + +N172_2: + if attribute(catid) in (100300014, 100200034, 100200186, 100300121, 100300004, 100400142, 100300073, 100400037, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200087, 100300074, 100200067, 100200185) then goto N172_3; + else goto N172_4; + +N172_3: + if attribute(catid) in (100300014, 100300121, 100400037, 100300212, 100300209, 100200087, 100200067, 100200185) then goto T172_1; + else goto T172_2; + +T172_1: + response = -0.0261969274; + goto D172; + +T172_2: + response = -0.0052677773; + goto D172; + +N172_4: + if attribute(catid) in (100200130, 100300058, 100300165, 100300019, 100300126, 100200193, 100300169, 100300066, 100300007, 100300200, 100300076) then goto T172_3; + else goto T172_4; + +T172_3: + response = 0.0006535877; + goto D172; + +T172_4: + response = 0.0044410765; + goto D172; + +N172_5: + if attribute(catid) in (100200171, 100300011, 100300013, 100300166, 100400141, 100300005, 100200172, 100200068, 100300032, 100300116, 100200192, 100200170, 100300006, 100200232, 100300146) then goto N172_6; + else goto N172_7; + +N172_6: + if attribute(catid) in (100200171, 100300005, 100200172, 100300006, 100200232, 100300146) then goto T172_5; + else goto T172_6; + +T172_5: + response = 0.0098796141; + goto D172; + +T172_6: + response = 0.0147635144; + goto D172; + +N172_7: + if attribute(catid) in (100300077, 100200053, 100400080, 100200028) then goto T172_7; + else goto T172_8; + +T172_7: + response = 0.0226086570; + goto D172; + +T172_8: + response = 0.0384965531; + goto D172; + +D172: + +tnscore = tnscore + response; + + /* Tree 174 of 200 */ +N173_1: + if attribute(catid) in (100200130, 100200052, 100300093, 100300102, 100300005, 100200172, 100300008, 100200068, 100300032, 100200234, 100200053, 100300004, 100300126, 100400142, 100200054, 100200193, 100300209, 100300074, 100300066, 100300007, 100300045, 100200028, 100200055, 100200232) then goto N173_2; + else goto N173_4; + +N173_2: + if attribute(catid) in (100300093, 100300102, 100300008, 100400142, 100200054, 100300209, 100300074, 100200028, 100200055) then goto T173_1; + else goto N173_3; + +T173_1: + response = -0.0206111829; + goto D173; + +N173_3: + if attribute(catid) in (100200052, 100300005, 100200172, 100200234, 100300004, 100300126, 100300066) then goto T173_2; + else goto T173_3; + +T173_2: + response = -0.0077046600; + goto D173; + +T173_3: + response = -0.0024157474; + goto D173; + +N173_4: + if attribute(catid) in (0, 100200171, 100300011, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100400141, 100300027, 100300121, 100300073, 100400038, 100200192, 100300065, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100200067) then goto N173_5; + else goto N173_7; + +N173_5: + if attribute(catid) in (0, 100300013, 100400141, 100300027, 100300121, 100400038, 100200192, 100300127, 100400079, 100300169, 100200087, 100200067) then goto N173_6; + else goto T173_6; + +N173_6: + if attribute(catid) in (100300013, 100400141, 100200192, 100300127, 100400079, 100300169, 100200087) then goto T173_4; + else goto T173_5; + +T173_4: + response = 0.0028937370; + goto D173; + +T173_5: + response = 0.0056818850; + goto D173; + +T173_6: + response = 0.0099814265; + goto D173; + +N173_7: + if attribute(catid) in (100200186, 100300165, 100300212, 100300122, 100300076) then goto T173_7; + else goto T173_8; + +T173_7: + response = 0.0162181832; + goto D173; + +T173_8: + response = 0.0314914649; + goto D173; + +D173: + +tnscore = tnscore + response; + + /* Tree 175 of 200 */ +N174_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300058, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100300027, 100300116, 100200053, 100300019, 100300126, 100200054, 100300073, 100200193, 100200192, 100300212, 100300209, 100300122, 100300127, 100300169, 100400080, 100200087, 100300074, 100200176, 100300200, 100300045, 100200067, 100300006, 100200232, 100300214, 100300146) then goto N174_2; + else goto N174_5; + +N174_2: + if attribute(catid) in (100200171, 100300058, 100200034, 100400141, 100300093, 100300005, 100300027, 100300116, 100200053, 100300126, 100200054, 100200193, 100200192, 100300212, 100300209, 100300122, 100300074, 100200176, 100200067, 100200232, 100300214, 100300146) then goto N174_3; + else goto N174_4; + +N174_3: + if attribute(catid) in (100200034, 100300126, 100200193, 100200192, 100300209, 100300122, 100200176, 100200067, 100200232, 100300214, 100300146) then goto T174_1; + else goto T174_2; + +T174_1: + response = -0.0183821122; + goto D174; + +T174_2: + response = -0.0073843870; + goto D174; + +N174_4: + if attribute(catid) in (100300011, 100300014, 100300077, 100300166, 100200052, 100300102, 100300019, 100300073, 100300127, 100400080, 100200087, 100300200, 100300006) then goto T174_3; + else goto T174_4; + +T174_3: + response = -0.0006604841; + goto D174; + +T174_4: + response = 0.0030934288; + goto D174; + +N174_5: + if attribute(catid) in (100300013, 100200172, 100300008, 100200068, 100200234, 100300004, 100400142, 100300065, 100400079, 100200170, 100200185) then goto N174_6; + else goto T174_7; + +N174_6: + if attribute(catid) in (100300013, 100200172, 100300008, 100300004, 100400142, 100300065, 100400079, 100200185) then goto T174_5; + else goto T174_6; + +T174_5: + response = 0.0088954013; + goto D174; + +T174_6: + response = 0.0132240377; + goto D174; + +T174_7: + response = 0.0239626156; + goto D174; + +D174: + +tnscore = tnscore + response; + + /* Tree 176 of 200 */ +N175_1: + if attribute(catid) in (100200171, 100300014, 100300143, 100300165, 100300102, 100300008, 100300032, 100300027, 100300121, 100200053, 100400037, 100300122, 100300127, 100300007, 100200176, 100200028, 100200067, 100200232, 100300146) then goto N175_2; + else goto N175_3; + +N175_2: + if attribute(catid) in (100300143, 100300008, 100400037, 100300122, 100300007, 100200176, 100200028, 100200067, 100200232, 100300146) then goto T175_1; + else goto T175_2; + +T175_1: + response = -0.0238688087; + goto D175; + +T175_2: + response = -0.0050071269; + goto D175; + +N175_3: + if attribute(catid) in (0, 100300011, 100200130, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100300116, 100200234, 100300019, 100300004, 100400142, 100200054, 100300073, 100200193, 100200192, 100300212, 100400079, 100200170, 100300169, 100200087, 100300074, 100300066, 100300200, 100300006) then goto N175_4; + else goto N175_5; + +N175_4: + if attribute(catid) in (100300058, 100300077, 100300166, 100200052, 100300116, 100200234, 100400142, 100200054, 100300073, 100200192, 100300212, 100400079, 100300074, 100300066, 100300200, 100300006) then goto T175_3; + else goto T175_4; + +T175_3: + response = 0.0013911393; + goto D175; + +T175_4: + response = 0.0050101023; + goto D175; + +N175_5: + if attribute(catid) in (100200034, 100300005, 100200172, 100200068, 100300126, 100400038, 100300065) then goto T175_5; + else goto T175_6; + +T175_5: + response = 0.0131280626; + goto D175; + +T175_6: + response = 0.0264913220; + goto D175; + +D175: + +tnscore = tnscore + response; + + /* Tree 177 of 200 */ +N176_1: + if attribute(catid) in (100300011, 100300166, 100300165, 100300102, 100300008, 100300032, 100200234, 100400037, 100300209, 100300169, 100300200, 100300045, 100200028, 100300076, 100200055, 100200185, 100300006, 100200232, 100300146) then goto N176_2; + else goto N176_4; + +N176_2: + if attribute(catid) in (100300011, 100300102, 100300008, 100200234, 100400037, 100300209, 100300076, 100200055, 100300006, 100200232, 100300146) then goto T176_1; + else goto N176_3; + +T176_1: + response = -0.0278018549; + goto D176; + +N176_3: + if attribute(catid) in (100300165, 100200185) then goto T176_2; + else goto T176_3; + +T176_2: + response = -0.0124509833; + goto D176; + +T176_3: + response = -0.0065030338; + goto D176; + +N176_4: + if attribute(catid) in (0, 100200171, 100300013, 100300077, 100200186, 100400141, 100200052, 100300093, 100200068, 100300027, 100200053, 100300004, 100400142, 100200054, 100300073, 100200193, 100300122, 100300127, 100400079, 100200170, 100400080, 100200087, 100300074, 100300066) then goto N176_5; + else goto N176_7; + +N176_5: + if attribute(catid) in (100300077, 100400141, 100300027, 100300004, 100400142, 100200054, 100200193, 100300122, 100300127, 100200087, 100300074) then goto T176_4; + else goto N176_6; + +T176_4: + response = -0.0006424011; + goto D176; + +N176_6: + if attribute(catid) in (0, 100200171, 100200186, 100200068, 100200053, 100400079, 100400080) then goto T176_5; + else goto T176_6; + +T176_5: + response = 0.0042262873; + goto D176; + +T176_6: + response = 0.0083942658; + goto D176; + +N176_7: + if attribute(catid) in (100200130, 100300058, 100200034, 100300005, 100200172, 100300121, 100200192, 100300065, 100300212, 100300007) then goto N176_8; + else goto T176_9; + +N176_8: + if attribute(catid) in (100300058, 100300005, 100200172, 100200192) then goto T176_7; + else goto T176_8; + +T176_7: + response = 0.0108729295; + goto D176; + +T176_8: + response = 0.0157560823; + goto D176; + +T176_9: + response = 0.0252591937; + goto D176; + +D176: + +tnscore = tnscore + response; + + /* Tree 178 of 200 */ +N177_1: + if attribute(catid) in (100200171, 100200130, 100300014, 100300058, 100300013, 100300166, 100300143, 100300093, 100300102, 100300008, 100300027, 100300019, 100300004, 100200054, 100300073, 100400037, 100200193, 100300212, 100300209, 100300127, 100200170, 100400080, 100300066, 100200176, 100300200, 100200067, 100300146) then goto N177_2; + else goto N177_4; + +N177_2: + if attribute(catid) in (100300143, 100300093, 100300102, 100300008, 100300019, 100200054, 100400037, 100300212, 100300209, 100200176, 100300146) then goto T177_1; + else goto N177_3; + +T177_1: + response = -0.0224536708; + goto D177; + +N177_3: + if attribute(catid) in (100200171, 100300014, 100300013, 100300027, 100300004, 100300066, 100200067) then goto T177_2; + else goto T177_3; + +T177_2: + response = -0.0094554624; + goto D177; + +T177_3: + response = -0.0036901881; + goto D177; + +N177_4: + if attribute(catid) in (0, 100300077, 100200034, 100200186, 100400141, 100200052, 100300005, 100200172, 100300116, 100300121, 100200053, 100400038, 100200192, 100300065, 100300122, 100400079, 100300169, 100200087, 100200028) then goto N177_5; + else goto N177_6; + +N177_5: + if attribute(catid) in (0, 100200186, 100200052, 100200172, 100300121, 100200192) then goto T177_4; + else goto T177_5; + +T177_4: + response = 0.0020254456; + goto D177; + +T177_5: + response = 0.0072521361; + goto D177; + +N177_6: + if attribute(catid) in (100300165, 100300126, 100400142, 100300074, 100300007, 100300006) then goto T177_6; + else goto T177_7; + +T177_6: + response = 0.0116654091; + goto D177; + +T177_7: + response = 0.0258802787; + goto D177; + +D177: + +tnscore = tnscore + response; + + /* Tree 179 of 200 */ +N178_1: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300166, 100200186, 100400141, 100300165, 100200052, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300116, 100300121, 100300019, 100300004, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300212, 100300209, 100300122, 100300127, 100400079, 100200170, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100200028, 100300076, 100200067, 100200232) then goto N178_2; + else goto N178_6; + +N178_2: + if attribute(catid) in (100200171, 100200130, 100300166, 100200186, 100300093, 100300102, 100300005, 100200172, 100200068, 100300116, 100300019, 100300126, 100200054, 100200193, 100200192, 100300212, 100300209, 100300127, 100200170, 100200087, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200232) then goto N178_3; + else goto N178_4; + +N178_3: + if attribute(catid) in (100200130, 100200186, 100300102, 100300005, 100300116, 100300019, 100200054, 100200193, 100200192, 100300212, 100300209, 100300127, 100200087, 100300066, 100200176, 100300200, 100300045, 100200067) then goto T178_1; + else goto T178_2; + +T178_1: + response = -0.0063942493; + goto D178; + +T178_2: + response = -0.0019096403; + goto D178; + +N178_4: + if attribute(catid) in (100300014, 100300077, 100300165, 100300004, 100300073, 100400037, 100300122, 100400079, 100200028) then goto T178_3; + else goto N178_5; + +T178_3: + response = 0.0026034959; + goto D178; + +N178_5: + if attribute(catid) in (0, 100300032) then goto T178_4; + else goto T178_5; + +T178_4: + response = 0.0043048063; + goto D178; + +T178_5: + response = 0.0068527373; + goto D178; + +N178_6: + if attribute(catid) in (100300011, 100300058, 100200034, 100300027, 100200234, 100200053, 100400142, 100300169, 100400080, 100300146) then goto T178_6; + else goto T178_7; + +T178_6: + response = 0.0134919535; + goto D178; + +T178_7: + response = 0.0274589028; + goto D178; + +D178: + +tnscore = tnscore + response; + + /* Tree 180 of 200 */ +N179_1: + if attribute(catid) in (0, 100200130, 100300014, 100300058, 100300013, 100300077, 100300166, 100200034, 100200186, 100300165, 100200052, 100300093, 100300005, 100200172, 100200068, 100300027, 100300116, 100300004, 100300126, 100400142, 100200054, 100300073, 100400037, 100200193, 100400038, 100200192, 100300065, 100300212, 100300209, 100400079, 100200170, 100300169, 100400080, 100300074, 100300066, 100300200, 100300045, 100300076, 100200185, 100300006, 100200232, 100300214, 100300146) then goto N179_2; + else goto N179_6; + +N179_2: + if attribute(catid) in (100300013, 100300165, 100200052, 100300093, 100200068, 100300126, 100400142, 100200054, 100200193, 100300209, 100300200, 100300045, 100200185, 100300214, 100300146) then goto N179_3; + else goto N179_4; + +N179_3: + if attribute(catid) in (100300013, 100400142, 100200054, 100300209, 100300200, 100200185, 100300214, 100300146) then goto T179_1; + else goto T179_2; + +T179_1: + response = -0.0220987103; + goto D179; + +T179_2: + response = -0.0066619739; + goto D179; + +N179_4: + if attribute(catid) in (100200130, 100300014, 100300058, 100300166, 100200034, 100200186, 100300005, 100200172, 100300027, 100400038, 100200170, 100300074, 100300076, 100200232) then goto T179_3; + else goto N179_5; + +T179_3: + response = -0.0009424528; + goto D179; + +N179_5: + if attribute(catid) in (0, 100300004, 100300212, 100400079) then goto T179_4; + else goto T179_5; + +T179_4: + response = 0.0034943052; + goto D179; + +T179_5: + response = 0.0064497198; + goto D179; + +N179_6: + if attribute(catid) in (100200171, 100300011, 100300143, 100400141, 100300102, 100300032, 100300121, 100200053, 100300019, 100300127, 100200067) then goto T179_6; + else goto T179_7; + +T179_6: + response = 0.0127972052; + goto D179; + +T179_7: + response = 0.0287759999; + goto D179; + +D179: + +tnscore = tnscore + response; + + /* Tree 181 of 200 */ +N180_1: + if attribute(catid) in (100200171, 100300011, 100300014, 100300058, 100300143, 100200034, 100300093, 100300102, 100300005, 100300008, 100200068, 100300032, 100300027, 100300116, 100300019, 100300126, 100200054, 100400038, 100200192, 100300209, 100300122, 100300169, 100400080, 100300007, 100200176, 100300045, 100200185, 100200232, 100300146) then goto N180_2; + else goto N180_4; + +N180_2: + if attribute(catid) in (100300102, 100300008, 100300032, 100300116, 100300019, 100300126, 100300209, 100400080, 100200176, 100200185, 100200232, 100300146) then goto T180_1; + else goto N180_3; + +T180_1: + response = -0.0303496242; + goto D180; + +N180_3: + if attribute(catid) in (100300011, 100300143, 100200034, 100300093, 100200068, 100300027, 100200054, 100300045) then goto T180_2; + else goto T180_3; + +T180_2: + response = -0.0083664582; + goto D180; + +T180_3: + response = -0.0038598802; + goto D180; + +N180_4: + if attribute(catid) in (0, 100200130, 100300077, 100300166, 100200186, 100400141, 100200052, 100200234, 100200053, 100400142, 100300073, 100300065, 100300127, 100400079, 100200170, 100300066, 100300076, 100300006) then goto N180_5; + else goto N180_6; + +N180_5: + if attribute(catid) in (100200130, 100300077, 100300166, 100200186, 100200053, 100300073, 100300065, 100300127, 100400079, 100200170, 100300066) then goto T180_4; + else goto T180_5; + +T180_4: + response = 0.0012616080; + goto D180; + +T180_5: + response = 0.0050477397; + goto D180; + +N180_6: + if attribute(catid) in (100300165, 100200172, 100200193, 100300212) then goto T180_6; + else goto T180_7; + +T180_6: + response = 0.0108120161; + goto D180; + +T180_7: + response = 0.0235525620; + goto D180; + +D180: + +tnscore = tnscore + response; + + /* Tree 182 of 200 */ +N181_1: + if attribute(catid) in (100300011, 100300058, 100300013, 100300077, 100200186, 100300102, 100300005, 100200172, 100300008, 100300116, 100300121, 100300126, 100200054, 100400038, 100200170, 100300200, 100200055, 100200185, 100200232) then goto N181_2; + else goto N181_3; + +N181_2: + if attribute(catid) in (100300011, 100300008, 100200054, 100400038, 100300200, 100200055, 100200185, 100200232) then goto T181_1; + else goto T181_2; + +T181_1: + response = -0.0268832718; + goto D181; + +T181_2: + response = -0.0068696426; + goto D181; + +N181_3: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100300143, 100200034, 100400141, 100300165, 100200052, 100200053, 100300004, 100400142, 100400037, 100200193, 100200192, 100300065, 100300122, 100300127, 100300169, 100400080, 100300074, 100300066, 100200176, 100300045, 100200028, 100300006) then goto N181_4; + else goto N181_7; + +N181_4: + if attribute(catid) in (0, 100200034, 100400141, 100300165, 100400142, 100200193, 100200192, 100300122, 100300127, 100400080, 100300045, 100200028, 100300006) then goto N181_5; + else goto N181_6; + +N181_5: + if attribute(catid) in (100200034, 100400141, 100300165, 100400142, 100200193, 100300122, 100200028) then goto T181_3; + else goto T181_4; + +T181_3: + response = -0.0000690536; + goto D181; + +T181_4: + response = 0.0031806080; + goto D181; + +N181_6: + if attribute(catid) in (100200171, 100200130, 100300143, 100200053, 100400037, 100300074, 100200176) then goto T181_5; + else goto T181_6; + +T181_5: + response = 0.0046312438; + goto D181; + +T181_6: + response = 0.0072930454; + goto D181; + +N181_7: + if attribute(catid) in (100300014, 100300093, 100200068, 100300032, 100300027, 100300019, 100300073, 100400079, 100300007, 100300146) then goto T181_7; + else goto T181_8; + +T181_7: + response = 0.0137098872; + goto D181; + +T181_8: + response = 0.0263591456; + goto D181; + +D181: + +tnscore = tnscore + response; + + /* Tree 183 of 200 */ +N182_1: + if attribute(catid) in (0, 100300011, 100200130, 100300014, 100300058, 100300166, 100200034, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100300032, 100300027, 100200053, 100300019, 100400142, 100300073, 100400038, 100200192, 100300065, 100300209, 100300122, 100400079, 100300169, 100400080, 100200087, 100300074, 100300007, 100300200, 100300045, 100200055, 100200185, 100300006, 100200232, 100300214) then goto N182_2; + else goto N182_6; + +N182_2: + if attribute(catid) in (100300011, 100300014, 100300058, 100300165, 100200052, 100200172, 100300008, 100300032, 100200053, 100300019, 100400142, 100300073, 100400038, 100300209, 100300122, 100400080, 100300074, 100300007, 100300200, 100200055, 100200185, 100300006, 100300214) then goto N182_3; + else goto N182_5; + +N182_3: + if attribute(catid) in (100300008, 100300032, 100300019, 100400038, 100300209, 100300122, 100200055, 100200185, 100300006, 100300214) then goto T182_1; + else goto N182_4; + +T182_1: + response = -0.0161057659; + goto D182; + +N182_4: + if attribute(catid) in (100300014, 100300058, 100300165, 100200053, 100400142, 100300074) then goto T182_2; + else goto T182_3; + +T182_2: + response = -0.0064541439; + goto D182; + +T182_3: + response = -0.0026860316; + goto D182; + +N182_5: + if attribute(catid) in (0, 100200034, 100300005, 100300065, 100400079, 100300169) then goto T182_4; + else goto T182_5; + +T182_4: + response = 0.0019364980; + goto D182; + +T182_5: + response = 0.0039988711; + goto D182; + +N182_6: + if attribute(catid) in (100200171, 100300077, 100300143, 100200186, 100200068, 100300116, 100200234, 100300126, 100200193, 100200170, 100300066, 100200176, 100300076) then goto T182_6; + else goto N182_7; + +T182_6: + response = 0.0130783191; + goto D182; + +N182_7: + if attribute(catid) in (100300093, 100300121, 100200054, 100400037, 100300127, 100200028, 100200067) then goto T182_7; + else goto T182_8; + +T182_7: + response = 0.0218038927; + goto D182; + +T182_8: + response = 0.0414721313; + goto D182; + +D182: + +tnscore = tnscore + response; + + /* Tree 184 of 200 */ +N183_1: + if attribute(catid) in (100200171, 100300014, 100300058, 100300077, 100200186, 100300093, 100300008, 100200068, 100300032, 100300027, 100300019, 100300004, 100300126, 100200054, 100400037, 100400038, 100300209, 100400079, 100200170, 100300169, 100200087, 100300007, 100300200, 100200055, 100200185, 100300214, 100300146) then goto N183_2; + else goto N183_4; + +N183_2: + if attribute(catid) in (100300032, 100300027, 100300019, 100300004, 100400038, 100200055, 100200185, 100300214, 100300146) then goto T183_1; + else goto N183_3; + +T183_1: + response = -0.0294531155; + goto D183; + +N183_3: + if attribute(catid) in (100300058, 100300008, 100200068, 100300126, 100200054, 100400037, 100300007, 100300200) then goto T183_2; + else goto T183_3; + +T183_2: + response = -0.0064558393; + goto D183; + +T183_3: + response = -0.0017589508; + goto D183; + +N183_4: + if attribute(catid) in (0, 100200130, 100300013, 100300166, 100200052, 100300102, 100300121, 100200234, 100400142, 100300073, 100300065, 100300122, 100300066, 100200028) then goto N183_5; + else goto N183_6; + +N183_5: + if attribute(catid) in (100200130, 100300166, 100200052, 100300102, 100400142, 100300065) then goto T183_4; + else goto T183_5; + +T183_4: + response = 0.0028476082; + goto D183; + +T183_5: + response = 0.0049351263; + goto D183; + +N183_6: + if attribute(catid) in (100300143, 100200034, 100400141, 100300165, 100200172, 100200193, 100200192, 100300074, 100300006) then goto T183_6; + else goto T183_7; + +T183_6: + response = 0.0122604781; + goto D183; + +T183_7: + response = 0.0164705118; + goto D183; + +D183: + +tnscore = tnscore + response; + + /* Tree 185 of 200 */ +N184_1: + if attribute(catid) in (0, 100200171, 100300011, 100300077, 100300166, 100200034, 100400141, 100300165, 100300102, 100200172, 100300032, 100300027, 100300116, 100200053, 100300019, 100400038, 100200192, 100300212, 100300122, 100400079, 100300169, 100200087, 100300200, 100300045, 100200028, 100300076) then goto N184_2; + else goto N184_6; + +N184_2: + if attribute(catid) in (100300077, 100300102, 100300032, 100200053, 100300019, 100400038, 100300122, 100300169, 100200087, 100200028, 100300076) then goto N184_3; + else goto N184_4; + +N184_3: + if attribute(catid) in (100300032, 100300019, 100200087, 100200028, 100300076) then goto T184_1; + else goto T184_2; + +T184_1: + response = -0.0267943136; + goto D184; + +T184_2: + response = -0.0073590500; + goto D184; + +N184_4: + if attribute(catid) in (0, 100200171, 100300011, 100200034, 100400141, 100200172, 100300027, 100300116, 100200192, 100300212, 100300045) then goto N184_5; + else goto T184_5; + +N184_5: + if attribute(catid) in (100200171, 100200034, 100400141, 100200172, 100300027, 100300212, 100300045) then goto T184_3; + else goto T184_4; + +T184_3: + response = -0.0027994712; + goto D184; + +T184_4: + response = -0.0009574051; + goto D184; + +T184_5: + response = 0.0035088242; + goto D184; + +N184_6: + if attribute(catid) in (100200130, 100300014, 100300058, 100300013, 100300143, 100200186, 100200052, 100200068, 100300121, 100200234, 100300126, 100400142, 100300073, 100200193, 100300065, 100300209, 100300127, 100200170, 100400080, 100300066, 100200176, 100300006, 100200232, 100300146) then goto N184_7; + else goto T184_8; + +N184_7: + if attribute(catid) in (100200130, 100300143, 100200186, 100200234, 100300073, 100300065, 100300127, 100400080, 100300066) then goto T184_6; + else goto T184_7; + +T184_6: + response = 0.0073203788; + goto D184; + +T184_7: + response = 0.0143066526; + goto D184; + +T184_8: + response = 0.0363911505; + goto D184; + +D184: + +tnscore = tnscore + response; + + /* Tree 186 of 200 */ +N185_1: + if attribute(catid) in (100300014, 100300058, 100300166, 100200034, 100300093, 100300102, 100300005, 100200172, 100200068, 100300032, 100300027, 100300019, 100300004, 100400038, 100300212, 100300209, 100200170, 100300169, 100200087, 100300074, 100300066, 100200028, 100300006, 100200232, 100300214, 100300146) then goto N185_2; + else goto N185_5; + +N185_2: + if attribute(catid) in (100300102, 100300005, 100300032, 100300019, 100300004, 100300209, 100300074, 100300066, 100200232, 100300214, 100300146) then goto N185_3; + else goto N185_4; + +N185_3: + if attribute(catid) in (100300005, 100300032, 100300019, 100300209, 100300066, 100200232, 100300214) then goto T185_1; + else goto T185_2; + +T185_1: + response = -0.0402234424; + goto D185; + +T185_2: + response = -0.0155707935; + goto D185; + +N185_4: + if attribute(catid) in (100300014, 100300166, 100200068, 100300027, 100300212, 100200028, 100300006) then goto T185_3; + else goto T185_4; + +T185_3: + response = -0.0067321936; + goto D185; + +T185_4: + response = -0.0015157833; + goto D185; + +N185_5: + if attribute(catid) in (0, 100200171, 100300013, 100300077, 100300143, 100200186, 100200052, 100300121, 100200054, 100300073, 100200192, 100300065, 100400079, 100300045, 100200185) then goto N185_6; + else goto N185_8; + +N185_6: + if attribute(catid) in (0, 100200171, 100300077, 100200186, 100300121, 100200054, 100400079, 100300045, 100200185) then goto N185_7; + else goto T185_7; + +N185_7: + if attribute(catid) in (100200171, 100300077, 100200186, 100300121, 100200054, 100400079, 100300045) then goto T185_5; + else goto T185_6; + +T185_5: + response = 0.0027009397; + goto D185; + +T185_6: + response = 0.0049127750; + goto D185; + +T185_7: + response = 0.0074818877; + goto D185; + +N185_8: + if attribute(catid) in (100300011, 100200130, 100400141, 100200234, 100200053, 100400142, 100400037, 100200193, 100300122, 100300127, 100400080) then goto T185_8; + else goto T185_9; + +T185_8: + response = 0.0114687451; + goto D185; + +T185_9: + response = 0.0184788462; + goto D185; + +D185: + +tnscore = tnscore + response; + + /* Tree 187 of 200 */ +N186_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100300143, 100300165, 100300093, 100300102, 100300008, 100300116, 100200234, 100300019, 100400142, 100200193, 100400038, 100300065, 100300127, 100300169, 100400080, 100300074, 100300007, 100200176, 100300200, 100300076, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N186_2; + else goto N186_5; + +N186_2: + if attribute(catid) in (100300011, 100300014, 100300165, 100300008, 100300116, 100300019, 100400038, 100300007, 100200176, 100200067, 100200185, 100200232, 100300214, 100300146) then goto N186_3; + else goto N186_4; + +N186_3: + if attribute(catid) in (100300014, 100300019, 100400038, 100300007, 100200067, 100200185, 100200232, 100300214) then goto T186_1; + else goto T186_2; + +T186_1: + response = -0.0315173226; + goto D186; + +T186_2: + response = -0.0166807619; + goto D186; + +N186_4: + if attribute(catid) in (100300058, 100300143, 100300102, 100200234, 100200193, 100400080, 100300076) then goto T186_3; + else goto T186_4; + +T186_3: + response = -0.0097877493; + goto D186; + +T186_4: + response = -0.0031922320; + goto D186; + +N186_5: + if attribute(catid) in (0, 100300013, 100400141, 100200052, 100200172, 100200053, 100300004, 100300073, 100400037, 100200192, 100300122, 100400079, 100300066, 100200028, 100300006) then goto N186_6; + else goto N186_7; + +N186_6: + if attribute(catid) in (100200172, 100300004, 100300073, 100400037, 100300122, 100200028, 100300006) then goto T186_5; + else goto T186_6; + +T186_5: + response = 0.0016110349; + goto D186; + +T186_6: + response = 0.0058680637; + goto D186; + +N186_7: + if attribute(catid) in (100200171, 100200130, 100300077, 100300166, 100200186, 100300005, 100200068, 100300126, 100200170) then goto T186_7; + else goto T186_8; + +T186_7: + response = 0.0122208670; + goto D186; + +T186_8: + response = 0.0260145679; + goto D186; + +D186: + +tnscore = tnscore + response; + + /* Tree 188 of 200 */ +N187_1: + if attribute(catid) in (0, 100300011, 100300014, 100300058, 100300013, 100300077, 100300143, 100200186, 100300165, 100300005, 100300121, 100200053, 100300004, 100300126, 100400038, 100200192, 100300122, 100300169, 100200087, 100300074, 100300200, 100300045, 100200028, 100300006) then goto N187_2; + else goto N187_5; + +N187_2: + if attribute(catid) in (100300011, 100300013, 100300143, 100300005, 100300126, 100400038, 100200087, 100300200, 100200028) then goto N187_3; + else goto N187_4; + +N187_3: + if attribute(catid) in (100300013, 100300143, 100300005, 100300126) then goto T187_1; + else goto T187_2; + +T187_1: + response = -0.0362895954; + goto D187; + +T187_2: + response = -0.0192422418; + goto D187; + +N187_4: + if attribute(catid) in (100300058, 100300121, 100200053, 100300169, 100300074, 100300006) then goto T187_3; + else goto T187_4; + +T187_3: + response = -0.0047104781; + goto D187; + +T187_4: + response = 0.0002148509; + goto D187; + +N187_5: + if attribute(catid) in (100200171, 100200130, 100300166, 100200034, 100400141, 100200052, 100300093, 100200172, 100300027, 100300116, 100400142, 100300073, 100400037, 100200193, 100300065, 100300209, 100300127, 100400079, 100400080, 100300066, 100300007) then goto N187_6; + else goto N187_8; + +N187_6: + if attribute(catid) in (100200171, 100200130, 100200052, 100300093, 100300073, 100400037, 100200193, 100300209, 100300066) then goto T187_5; + else goto N187_7; + +T187_5: + response = 0.0059481372; + goto D187; + +N187_7: + if attribute(catid) in (100300166, 100200034, 100300027, 100300116, 100300065) then goto T187_6; + else goto T187_7; + +T187_6: + response = 0.0086482206; + goto D187; + +T187_7: + response = 0.0113173904; + goto D187; + +N187_8: + if attribute(catid) in (100300102, 100200068, 100300212, 100200170, 100300076, 100200067, 100200232, 100300146) then goto T187_8; + else goto T187_9; + +T187_8: + response = 0.0189016022; + goto D187; + +T187_9: + response = 0.0294237004; + goto D187; + +D187: + +tnscore = tnscore + response; + + /* Tree 189 of 200 */ +N188_1: + if attribute(catid) in (100300011, 100300014, 100200034, 100200186, 100300165, 100200172, 100300008, 100200068, 100300121, 100200234, 100300126, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300209, 100300122, 100200170, 100400080, 100300007, 100200176, 100300045, 100200028, 100300076, 100200185, 100300146) then goto N188_2; + else goto N188_5; + +N188_2: + if attribute(catid) in (100300011, 100200034, 100300165, 100300008, 100200234, 100300126, 100200054, 100400037, 100300209, 100400080, 100300007, 100200176, 100200028, 100200185, 100300146) then goto N188_3; + else goto N188_4; + +N188_3: + if attribute(catid) in (100300011, 100200034, 100200234, 100300126, 100400037, 100300209, 100300007, 100200176, 100200185, 100300146) then goto T188_1; + else goto T188_2; + +T188_1: + response = -0.0183846087; + goto D188; + +T188_2: + response = -0.0112640996; + goto D188; + +N188_4: + if attribute(catid) in (100200068, 100300121, 100300073, 100300065, 100200170, 100300045, 100300076) then goto T188_3; + else goto T188_4; + +T188_3: + response = -0.0047781445; + goto D188; + +T188_4: + response = -0.0011633168; + goto D188; + +N188_5: + if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300093, 100300005, 100300027, 100300127, 100400079, 100200087, 100300074, 100300066) then goto N188_6; + else goto N188_7; + +N188_6: + if attribute(catid) in (0, 100200130, 100300027, 100300127, 100400079, 100200087) then goto T188_5; + else goto T188_6; + +T188_5: + response = 0.0039708336; + goto D188; + +T188_6: + response = 0.0079025406; + goto D188; + +N188_7: + if attribute(catid) in (100200171, 100300166, 100400141, 100200052, 100300032, 100300116, 100200053, 100300004, 100400142, 100300169, 100300200, 100300006) then goto N188_8; + else goto T188_9; + +N188_8: + if attribute(catid) in (100300166, 100300032, 100200053, 100400142, 100300169, 100300006) then goto T188_7; + else goto T188_8; + +T188_7: + response = 0.0121783231; + goto D188; + +T188_8: + response = 0.0159340797; + goto D188; + +T188_9: + response = 0.0363585815; + goto D188; + +D188: + +tnscore = tnscore + response; + + /* Tree 190 of 200 */ +N189_1: + if attribute(catid) in (100300011, 100300013, 100300077, 100300143, 100200034, 100200186, 100300165, 100300102, 100300008, 100200053, 100300019, 100400142, 100200054, 100400037, 100300127, 100400079, 100400080, 100200087, 100300007, 100300200, 100300045, 100200067, 100200055, 100200185, 100300006, 100300146) then goto N189_2; + else goto N189_4; + +N189_2: + if attribute(catid) in (100300013, 100300143, 100300102, 100200053, 100300019, 100300200, 100200067, 100200055, 100200185, 100300006, 100300146) then goto T189_1; + else goto N189_3; + +T189_1: + response = -0.0269176309; + goto D189; + +N189_3: + if attribute(catid) in (100300011, 100200186, 100300008, 100200054, 100400037, 100400079, 100400080, 100300045) then goto T189_2; + else goto T189_3; + +T189_2: + response = -0.0082753604; + goto D189; + +T189_3: + response = -0.0027168619; + goto D189; + +N189_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300166, 100400141, 100200172, 100200068, 100300121, 100300073, 100200193, 100200192, 100300065, 100200170, 100300169, 100300066, 100200028) then goto N189_5; + else goto N189_6; + +N189_5: + if attribute(catid) in (0, 100200130, 100200172, 100300121, 100300073, 100200192, 100300065, 100300169, 100200028) then goto T189_4; + else goto T189_5; + +T189_4: + response = 0.0030863813; + goto D189; + +T189_5: + response = 0.0082511598; + goto D189; + +N189_6: + if attribute(catid) in (100300014, 100200052, 100300027, 100300116, 100200234, 100300004, 100400038, 100300122, 100300074) then goto T189_6; + else goto T189_7; + +T189_6: + response = 0.0142405946; + goto D189; + +T189_7: + response = 0.0282071621; + goto D189; + +D189: + +tnscore = tnscore + response; + + /* Tree 191 of 200 */ +N190_1: + if attribute(catid) in (100300058, 100300143, 100300093, 100300005, 100300116, 100300121, 100300019, 100300004, 100200193, 100400038, 100300200, 100300045, 100300076, 100200055) then goto N190_2; + else goto N190_3; + +N190_2: + if attribute(catid) in (100300143, 100300121, 100300019, 100200193, 100300076, 100200055) then goto T190_1; + else goto T190_2; + +T190_1: + response = -0.0314321220; + goto D190; + +T190_2: + response = -0.0099657936; + goto D190; + +N190_3: + if attribute(catid) in (0, 100200171, 100200130, 100300166, 100200186, 100400141, 100300165, 100200052, 100200053, 100400142, 100300073, 100200192, 100300065, 100300122, 100300127, 100400079, 100200170, 100300169, 100400080, 100200087, 100300074, 100200176, 100200028, 100200067, 100300006) then goto N190_4; + else goto N190_6; + +N190_4: + if attribute(catid) in (100400141, 100200052, 100200192, 100300122, 100300127, 100200087, 100300074, 100300006) then goto T190_3; + else goto N190_5; + +T190_3: + response = -0.0018808186; + goto D190; + +N190_5: + if attribute(catid) in (0, 100200171, 100200186, 100300165, 100200053, 100400142, 100300065, 100300169, 100200028, 100200067) then goto T190_4; + else goto T190_5; + +T190_4: + response = 0.0034558143; + goto D190; + +T190_5: + response = 0.0071443084; + goto D190; + +N190_6: + if attribute(catid) in (100300011, 100300014, 100300013, 100300077, 100200034, 100200172, 100200068, 100300027, 100200234, 100200054, 100300212, 100300066, 100300007, 100300146) then goto T190_6; + else goto T190_7; + +T190_6: + response = 0.0113747240; + goto D190; + +T190_7: + response = 0.0365678279; + goto D190; + +D190: + +tnscore = tnscore + response; + + /* Tree 192 of 200 */ +N191_1: + if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100200034, 100400141, 100300165, 100200172, 100200068, 100300004, 100400142, 100200054, 100400037, 100300212, 100300127, 100400079, 100400080, 100300074, 100300066, 100200028, 100200067, 100200055, 100300006) then goto N191_2; + else goto N191_5; + +N191_2: + if attribute(catid) in (100200034, 100300165, 100300004, 100200054, 100300127, 100200028, 100200067, 100200055, 100300006) then goto N191_3; + else goto N191_4; + +N191_3: + if attribute(catid) in (100200034, 100300004, 100300127, 100200028, 100200067, 100200055) then goto T191_1; + else goto T191_2; + +T191_1: + response = -0.0176698057; + goto D191; + +T191_2: + response = -0.0096668582; + goto D191; + +N191_4: + if attribute(catid) in (100200130, 100300013, 100300077, 100300166, 100400142, 100400037, 100300212, 100400079) then goto T191_3; + else goto T191_4; + +T191_3: + response = -0.0070606716; + goto D191; + +T191_4: + response = -0.0037781209; + goto D191; + +N191_5: + if attribute(catid) in (0, 100200171, 100300058, 100200186, 100200052, 100300102, 100300027, 100200053, 100300073, 100200193, 100200192, 100300065, 100300122, 100300169, 100200087, 100200176, 100300076, 100200232) then goto N191_6; + else goto N191_7; + +N191_6: + if attribute(catid) in (0, 100300058, 100200052, 100300102, 100300027, 100200192, 100300065, 100300122, 100200232) then goto T191_5; + else goto T191_6; + +T191_5: + response = 0.0055156165; + goto D191; + +T191_6: + response = 0.0073414677; + goto D191; + +N191_7: + if attribute(catid) in (100300005, 100300008, 100300032, 100300121, 100200234, 100300126, 100400038, 100200170, 100300200, 100200185) then goto T191_7; + else goto T191_8; + +T191_7: + response = 0.0138952295; + goto D191; + +T191_8: + response = 0.0286522384; + goto D191; + +D191: + +tnscore = tnscore + response; + + /* Tree 193 of 200 */ +N192_1: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100300143, 100200186, 100300165, 100300093, 100300102, 100300005, 100200172, 100300008, 100300027, 100300116, 100300121, 100200234, 100200053, 100300004, 100200054, 100400038, 100200192, 100400079, 100300169, 100400080, 100200087, 100300007, 100200176, 100300200, 100300045, 100200055, 100300006, 100300214) then goto N192_2; + else goto N192_5; + +N192_2: + if attribute(catid) in (100300011, 100300014, 100300143, 100300093, 100300102, 100200172, 100200054, 100400079, 100200087, 100300007, 100300200, 100300045, 100200055, 100300006, 100300214) then goto N192_3; + else goto N192_4; + +N192_3: + if attribute(catid) in (100300014, 100300093, 100300102, 100200054, 100300007, 100300200, 100200055, 100300006, 100300214) then goto T192_1; + else goto T192_2; + +T192_1: + response = -0.0266674639; + goto D192; + +T192_2: + response = -0.0102733938; + goto D192; + +N192_4: + if attribute(catid) in (0, 100200171, 100300077, 100300027, 100200053, 100300004, 100200192, 100300169, 100400080, 100200176) then goto T192_3; + else goto T192_4; + +T192_3: + response = -0.0008805496; + goto D192; + +T192_4: + response = 0.0030248341; + goto D192; + +N192_5: + if attribute(catid) in (100300058, 100200034, 100400141, 100200052, 100300126, 100300073, 100300065, 100300212, 100300122, 100300127, 100200170, 100300076, 100200185, 100200232, 100300146) then goto N192_6; + else goto N192_7; + +N192_6: + if attribute(catid) in (100300058, 100400141, 100300126, 100300073, 100300065, 100300212, 100300122, 100200185) then goto T192_5; + else goto T192_6; + +T192_5: + response = 0.0088319892; + goto D192; + +T192_6: + response = 0.0145181522; + goto D192; + +N192_7: + if attribute(catid) in (100200068, 100400142, 100200193, 100300074, 100300066) then goto T192_7; + else goto T192_8; + +T192_7: + response = 0.0203803500; + goto D192; + +T192_8: + response = 0.0412794221; + goto D192; + +D192: + +tnscore = tnscore + response; + + /* Tree 194 of 200 */ +N193_1: + if attribute(catid) in (100300011, 100300014, 100300013, 100300077, 100300143, 100200186, 100300093, 100300005, 100300008, 100300032, 100300121, 100200054, 100200192, 100300122, 100300007, 100300200, 100200067, 100200055, 100200185, 100200232) then goto N193_2; + else goto N193_3; + +N193_2: + if attribute(catid) in (100300143, 100300005, 100300008, 100300032, 100200054, 100300007, 100200067, 100200055, 100200232) then goto T193_1; + else goto T193_2; + +T193_1: + response = -0.0313843116; + goto D193; + +T193_2: + response = -0.0090786448; + goto D193; + +N193_3: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100400141, 100300165, 100200052, 100300102, 100200172, 100200068, 100300116, 100300004, 100400142, 100300073, 100200193, 100400038, 100300065, 100300127, 100400079, 100300169, 100300066, 100200176, 100300045, 100200028, 100300006) then goto N193_4; + else goto N193_5; + +N193_4: + if attribute(catid) in (0, 100200171, 100200130, 100200052, 100300116, 100400038, 100400079, 100300169, 100300045, 100200028) then goto T193_3; + else goto T193_4; + +T193_3: + response = 0.0019399364; + goto D193; + +T193_4: + response = 0.0057587234; + goto D193; + +N193_5: + if attribute(catid) in (100300166, 100300027, 100200234, 100200053, 100200170, 100200087, 100300074, 100300076) then goto T193_5; + else goto T193_6; + +T193_5: + response = 0.0118065270; + goto D193; + +T193_6: + response = 0.0240753548; + goto D193; + +D193: + +tnscore = tnscore + response; + + /* Tree 195 of 200 */ +N194_1: + if attribute(catid) in (100300011, 100300014, 100200034, 100300165, 100300093, 100300032, 100300121, 100300019, 100300004, 100400142, 100400038, 100300122, 100300127, 100200170, 100300074, 100200176, 100300200, 100200067, 100200055, 100300006, 100200232, 100300146) then goto N194_2; + else goto N194_4; + +N194_2: + if attribute(catid) in (100300011, 100200034, 100300093, 100300032, 100300019, 100400038, 100200176, 100200067, 100200055) then goto T194_1; + else goto N194_3; + +T194_1: + response = -0.0297490120; + goto D194; + +N194_3: + if attribute(catid) in (100300165, 100300004, 100300200, 100300006, 100200232, 100300146) then goto T194_2; + else goto T194_3; + +T194_2: + response = -0.0129734582; + goto D194; + +T194_3: + response = -0.0048159123; + goto D194; + +N194_4: + if attribute(catid) in (0, 100200171, 100200130, 100300058, 100300077, 100300166, 100200186, 100400141, 100200052, 100300102, 100300005, 100200172, 100200068, 100300027, 100300116, 100200234, 100200053, 100200054, 100300073, 100400037, 100200193, 100200192, 100300065, 100300169, 100400080, 100300066, 100300045, 100200028, 100300076, 100200185) then goto N194_5; + else goto N194_8; + +N194_5: + if attribute(catid) in (0, 100200130, 100300058, 100300077, 100300166, 100200186, 100300102, 100300116, 100200053, 100200192, 100300065, 100300045, 100200028) then goto N194_6; + else goto N194_7; + +N194_6: + if attribute(catid) in (100300077, 100300166, 100200186, 100300116, 100200192) then goto T194_4; + else goto T194_5; + +T194_4: + response = -0.0004494225; + goto D194; + +T194_5: + response = 0.0030538822; + goto D194; + +N194_7: + if attribute(catid) in (100200171, 100400141, 100200234, 100300073, 100400037, 100300169, 100300066) then goto T194_6; + else goto T194_7; + +T194_6: + response = 0.0068726028; + goto D194; + +T194_7: + response = 0.0116359714; + goto D194; + +N194_8: + if attribute(catid) in (100300143, 100300126, 100400079, 100200087) then goto T194_8; + else goto T194_9; + +T194_8: + response = 0.0222596119; + goto D194; + +T194_9: + response = 0.0442934684; + goto D194; + +D194: + +tnscore = tnscore + response; + + /* Tree 196 of 200 */ +N195_1: + if attribute(catid) in (100300011, 100300077, 100200186, 100400141, 100300165, 100200052, 100300005, 100200172, 100300008, 100200068, 100300032, 100300116, 100300019, 100400142, 100200054, 100200193, 100400038, 100300209, 100300127, 100200170, 100300169, 100300074, 100200176, 100300045, 100200067, 100200055, 100200232, 100300146) then goto N195_2; + else goto N195_5; + +N195_2: + if attribute(catid) in (100300011, 100300008, 100200068, 100300032, 100300019, 100200193, 100300074, 100200176, 100200067, 100200055, 100200232, 100300146) then goto N195_3; + else goto N195_4; + +N195_3: + if attribute(catid) in (100300032, 100300019, 100200193, 100300074, 100200176, 100200067, 100200055, 100200232, 100300146) then goto T195_1; + else goto T195_2; + +T195_1: + response = -0.0304543117; + goto D195; + +T195_2: + response = -0.0136959974; + goto D195; + +N195_4: + if attribute(catid) in (100400141, 100400142, 100300127, 100200170) then goto T195_3; + else goto T195_4; + +T195_3: + response = -0.0090390793; + goto D195; + +T195_4: + response = -0.0027839113; + goto D195; + +N195_5: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300058, 100300166, 100200034, 100300027, 100300121, 100200234, 100200053, 100300004, 100300073, 100200192, 100300122, 100400079, 100400080, 100200087, 100300066, 100300200, 100200028, 100300076) then goto N195_6; + else goto N195_7; + +N195_6: + if attribute(catid) in (0, 100300058, 100300027, 100200234, 100200053, 100300004, 100300073, 100200192, 100400080, 100300066, 100300200) then goto T195_5; + else goto T195_6; + +T195_5: + response = 0.0035175195; + goto D195; + +T195_6: + response = 0.0082798864; + goto D195; + +N195_7: + if attribute(catid) in (100300093, 100300126, 100300065) then goto T195_7; + else goto T195_8; + +T195_7: + response = 0.0167800289; + goto D195; + +T195_8: + response = 0.0355577197; + goto D195; + +D195: + +tnscore = tnscore + response; + + /* Tree 197 of 200 */ +N196_1: + if attribute(catid) in (0, 100200130, 100300013, 100300077, 100300166, 100300143, 100200186, 100200052, 100300005, 100200172, 100200234, 100200053, 100300019, 100300004, 100400142, 100200054, 100400037, 100200193, 100300212, 100300127, 100400079, 100300169, 100400080, 100200087, 100300074, 100300066, 100300007, 100200176, 100300200, 100300045, 100300076, 100200067, 100200185, 100300006, 100200232, 100300146) then goto N196_2; + else goto N196_6; + +N196_2: + if attribute(catid) in (100300013, 100200186, 100200234, 100300019, 100300004, 100200054, 100200193, 100300212, 100400080, 100200087, 100200176, 100300200, 100200067, 100200185, 100200232, 100300146) then goto N196_3; + else goto N196_4; + +N196_3: + if attribute(catid) in (100300013, 100300019, 100200193, 100300212, 100200176, 100200067, 100200185, 100200232) then goto T196_1; + else goto T196_2; + +T196_1: + response = -0.0351013956; + goto D196; + +T196_2: + response = -0.0146983415; + goto D196; + +N196_4: + if attribute(catid) in (0, 100300077, 100300143, 100300005, 100200053, 100300169, 100300074, 100300066, 100300007, 100300045, 100300076, 100300006) then goto N196_5; + else goto T196_5; + +N196_5: + if attribute(catid) in (100300077, 100300143, 100300169, 100300074, 100300007, 100300045) then goto T196_3; + else goto T196_4; + +T196_3: + response = -0.0036169246; + goto D196; + +T196_4: + response = 0.0006508121; + goto D196; + +T196_5: + response = 0.0039348871; + goto D196; + +N196_6: + if attribute(catid) in (100200171, 100300011, 100300058, 100400141, 100300165, 100300093, 100200068, 100300032, 100300027, 100300121, 100300126, 100300073, 100400038, 100200192, 100300065, 100200170) then goto N196_7; + else goto N196_8; + +N196_7: + if attribute(catid) in (100300058, 100400141, 100300165, 100300027, 100300121, 100300065, 100200170) then goto T196_6; + else goto T196_7; + +T196_6: + response = 0.0092359739; + goto D196; + +T196_7: + response = 0.0139160873; + goto D196; + +N196_8: + if attribute(catid) in (100300014, 100200034, 100300122, 100200028) then goto T196_8; + else goto T196_9; + +T196_8: + response = 0.0220286224; + goto D196; + +T196_9: + response = 0.0419934945; + goto D196; + +D196: + +tnscore = tnscore + response; + + /* Tree 198 of 200 */ +N197_1: + if attribute(catid) in (100300013, 100300143, 100300005, 100300116, 100300019, 100300004, 100200054, 100300065, 100300212, 100300209, 100200087, 100200232, 100300214) then goto N197_2; + else goto N197_3; + +N197_2: + if attribute(catid) in (100300143, 100300019, 100300065, 100300209, 100200232, 100300214) then goto T197_1; + else goto T197_2; + +T197_1: + response = -0.0342436123; + goto D197; + +T197_2: + response = -0.0130043453; + goto D197; + +N197_3: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100200186, 100400141, 100200052, 100200172, 100200068, 100200053, 100400142, 100300073, 100400037, 100200193, 100400038, 100300122, 100400079, 100200170, 100300007, 100300045, 100200185, 100300006) then goto N197_4; + else goto N197_6; + +N197_4: + if attribute(catid) in (100200130, 100300014, 100400141, 100200172, 100200068, 100400142, 100400037, 100200193, 100300007, 100300006) then goto T197_3; + else goto N197_5; + +T197_3: + response = -0.0025589925; + goto D197; + +N197_5: + if attribute(catid) in (100200171, 100300073, 100400038) then goto T197_4; + else goto T197_5; + +T197_4: + response = 0.0013996109; + goto D197; + +T197_5: + response = 0.0034393713; + goto D197; + +N197_6: + if attribute(catid) in (100300011, 100300077, 100300166, 100200034, 100300165, 100300093, 100300027, 100200192, 100300169, 100300066, 100300200, 100200028, 100300076) then goto N197_7; + else goto T197_8; + +N197_7: + if attribute(catid) in (100300077, 100300166, 100200192, 100300169, 100300066, 100200028) then goto T197_6; + else goto T197_7; + +T197_6: + response = 0.0071225815; + goto D197; + +T197_7: + response = 0.0104333907; + goto D197; + +T197_8: + response = 0.0168963783; + goto D197; + +D197: + +tnscore = tnscore + response; + + /* Tree 199 of 200 */ +N198_1: + if attribute(catid) in (100300011, 100300014, 100300058, 100300013, 100300143, 100300027, 100200053, 100300019, 100300126, 100400037, 100300169, 100400080, 100200176, 100300076, 100200067, 100200055, 100200185, 100300214, 100300146) then goto N198_2; + else goto N198_3; + +N198_2: + if attribute(catid) in (100300011, 100300058, 100300013, 100300143, 100300019, 100400037, 100200176, 100200067, 100200055, 100200185, 100300214) then goto T198_1; + else goto T198_2; + +T198_1: + response = -0.0290173523; + goto D198; + +T198_2: + response = -0.0086034947; + goto D198; + +N198_3: + if attribute(catid) in (0, 100300077, 100300166, 100200034, 100200186, 100400141, 100300165, 100200052, 100300093, 100200172, 100200068, 100300116, 100200234, 100300004, 100400142, 100300073, 100400038, 100200192, 100300065, 100300209, 100300127, 100400079, 100200170, 100200087, 100300074, 100300045, 100200232) then goto N198_4; + else goto N198_6; + +N198_4: + if attribute(catid) in (100300077, 100200186, 100400141, 100300093, 100200172, 100200068, 100300004, 100300065, 100300074) then goto T198_3; + else goto N198_5; + +T198_3: + response = -0.0007979247; + goto D198; + +N198_5: + if attribute(catid) in (0, 100300166, 100300165, 100200052, 100300045) then goto T198_4; + else goto T198_5; + +T198_4: + response = 0.0031637733; + goto D198; + +T198_5: + response = 0.0058373245; + goto D198; + +N198_6: + if attribute(catid) in (100200171, 100200130, 100300005, 100300008, 100300121, 100300066, 100300007, 100200028, 100300006) then goto T198_6; + else goto N198_7; + +T198_6: + response = 0.0142240118; + goto D198; + +N198_7: + if attribute(catid) in (100200054, 100200193, 100300122) then goto T198_7; + else goto T198_8; + +T198_7: + response = 0.0228053439; + goto D198; + +T198_8: + response = 0.0584841669; + goto D198; + +D198: + +tnscore = tnscore + response; + + /* Tree 200 of 200 */ +N199_1: + if attribute(catid) in (100300143, 100300165, 100300093, 100300008, 100300116, 100200234, 100300004, 100300126, 100200054, 100400037, 100200193, 100400038, 100300212, 100300122, 100200170, 100300066, 100300007, 100200055, 100200232, 100300146) then goto N199_2; + else goto N199_4; + +N199_2: + if attribute(catid) in (100300008, 100300116, 100300126, 100300212, 100300007, 100200055, 100200232, 100300146) then goto T199_1; + else goto N199_3; + +T199_1: + response = -0.0246224156; + goto D199; + +N199_3: + if attribute(catid) in (100300143, 100300165, 100200234, 100300004, 100400037, 100300122) then goto T199_2; + else goto T199_3; + +T199_2: + response = -0.0104770173; + goto D199; + +T199_3: + response = -0.0059185929; + goto D199; + +N199_4: + if attribute(catid) in (0, 100200171, 100300011, 100200130, 100300014, 100300077, 100300166, 100200172, 100300121, 100200053, 100300073, 100300127, 100400079, 100200028) then goto N199_5; + else goto N199_7; + +N199_5: + if attribute(catid) in (0, 100200171, 100200130, 100300014, 100300077, 100300121, 100200053, 100300073) then goto N199_6; + else goto T199_6; + +N199_6: + if attribute(catid) in (100200171, 100200130, 100300077, 100200053, 100300073) then goto T199_4; + else goto T199_5; + +T199_4: + response = 0.0010797418; + goto D199; + +T199_5: + response = 0.0024531718; + goto D199; + +T199_6: + response = 0.0064407369; + goto D199; + +N199_7: + if attribute(catid) in (100200186, 100400141, 100200052, 100300102, 100200068, 100300027, 100300019, 100400142, 100200192, 100300065, 100300169, 100400080, 100300200, 100300076, 100300006) then goto T199_7; + else goto N199_8; + +T199_7: + response = 0.0120768393; + goto D199; + +N199_8: + if attribute(catid) in (100300058, 100200034, 100300209, 100200087, 100300074) then goto T199_8; + else goto T199_9; + +T199_8: + response = 0.0185835130; + goto D199; + +T199_9: + response = 0.0301892716; + goto D199; + +D199: + +tnscore = tnscore + response; + +return; diff --git a/searchlib/src/test/files/treenet08.model b/searchlib/src/test/files/treenet08.model new file mode 100644 index 00000000000..3edd278d432 --- /dev/null +++ b/searchlib/src/test/files/treenet08.model @@ -0,0 +1,227 @@ +/********************************************************** + * The following C source code was automatically generated + * by the new DTREE version: 1.x + **********************************************************/ + +#include /* for strcmp() */ + +/******************************************* + * APPLICATION DEPENDENT MISSING VALUE CODES + *******************************************/ + +const double DBL_MISSING_VALUE = 0.0; +const int INT_MISSING_VALUE = 0; + +/************ + * PREDICTORS + ************/ +double CT$, QPSCOREFOR_KG PEOPLE, SDSF_LOCAL, SDSF_WEB; + +/*************************************************************** + * Here come the trees in the treenet. A shell for calling them + * appears at the end of this source file. + ***************************************************************/ +/* Data Dictionary, Number Of Variables = 4*/ +/* Name = CT$, Type = categorical. */ +/* Name = QPSCOREFOR_KG PEOPLE, Type = continuous. */ +/* Name = SDSF_LOCAL, Type = continuous. */ +/* Name = SDSF_WEB, Type = continuous. */ + +MODELBEGIN: + + /* N trees: 5 */ + +link TN0; +pred = tnscore; /* predicted value for GRADE */ + +/*********************/ +/* Model is complete */ +/*********************/ + +return; + + +tnscore = 0.0; + +TN0: + + /* Tree 1 of 5 */ +N0_1: + if CT$ in (Wiki, Web, Image, Video, Finance) then goto N0_2; + else goto T0_4; + +N0_2: + if SDSF_LOCAL < 0.6359952986 then goto N0_3; + else goto T0_3; + +N0_3: + if CT$ in (Image, Video) then goto T0_1; + else goto T0_2; + +T0_1: + response = -0.1846455351; + goto D0; + +T0_2: + response = -0.0057844764; + goto D0; + +T0_3: + response = -0.4039473684; + goto D0; + +T0_4: + response = 0.2900655347; + goto D0; + +D0: + +tnscore = tnscore + response; + + /* Tree 2 of 5 */ +N1_1: + if CT$ in (Wiki, Web, Image, Video, KG Movie, Finance, Timezone) then goto T1_1; + else goto N1_2; + +T1_1: + response = -0.0790797330; + goto D1; + +N1_2: + if CT$ in (Local, Q2A) then goto N1_3; + else goto T1_4; + +N1_3: + if SDSF_LOCAL < 0.5348491371 then goto T1_2; + else goto T1_3; + +T1_2: + response = -0.0304336373; + goto D1; + +T1_3: + response = 0.2401947405; + goto D1; + +T1_4: + response = 0.3739991530; + goto D1; + +D1: + +tnscore = tnscore + response; + + /* Tree 3 of 5 */ +N2_1: + if CT$ in (Web, Image, Video, Timezone) then goto T2_1; + else goto N2_2; + +T2_1: + response = -0.0572267897; + goto D2; + +N2_2: + if CT$ in (Wiki, Local, KG Movie) then goto N2_3; + else goto T2_4; + +N2_3: + if SDSF_LOCAL < 0.4078139514 then goto T2_2; + else goto T2_3; + +T2_2: + response = -0.0295648159; + goto D2; + +T2_3: + response = 0.1601345785; + goto D2; + +T2_4: + response = 0.2612064355; + goto D2; + +D2: + +tnscore = tnscore + response; + + /* Tree 4 of 5 */ +N3_1: + if CT$ in (Image, Video, Timezone) then goto T3_1; + else goto N3_2; + +T3_1: + response = -0.1103244788; + goto D3; + +N3_2: + if CT$ in (Wiki, Web, Local, KG Movie) then goto N3_3; + else goto T3_4; + +N3_3: + if QPSCOREFOR_KG_PEOPLE < 0.9930000007 then goto T3_2; + else goto T3_3; + +T3_2: + response = 0.0194079789; + goto D3; + +T3_3: + response = -0.2056829336; + goto D3; + +T3_4: + response = 0.1987635246; + goto D3; + +D3: + +tnscore = tnscore + response; + + /* Tree 5 of 5 */ +N4_1: + if CT$ in (Image, Video, Event, Timezone) then goto N4_2; + else goto N4_3; + +N4_2: + if SDSF_WEB < 0.3725785315 then goto T4_1; + else goto T4_2; + +T4_1: + response = -0.0680975953; + goto D4; + +T4_2: + response = -0.2264832978; + goto D4; + +N4_3: + if CT$ in (Wiki, Web, Local, Q2A) then goto T4_3; + else goto T4_4; + +T4_3: + response = 0.0105928220; + goto D4; + +T4_4: + response = 0.1366891795; + goto D4; + +D4: + +tnscore = tnscore + response; + +return; +/****************************** + * CALL EACH TREENET EXPLICITLY + ******************************/ + +void grove(void) +{ + int terminal_node_number; + + + return; +} + + ___FINISHED___ + diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java new file mode 100755 index 00000000000..2f271ec84db --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/AggregationTestCase.java @@ -0,0 +1,346 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.searchlib.expression.*; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectOperation; +import com.yahoo.vespa.objects.ObjectPredicate; +import junit.framework.TestCase; + +/** + * @author Henning Baldersheim + */ +public class AggregationTestCase extends TestCase { + + public void testSumAggregationResult() { + SumAggregationResult a = new SumAggregationResult(); + a.setExpression(new AttributeNode("attributeA")); + a.setSum(new IntegerResultNode(7)); + assertEquals(a.getSum().getInteger(), 7); + SumAggregationResult b = (SumAggregationResult)serializeDeserialize(a); + assertEquals(b.getSum().getInteger(), 7); + b.merge(a); + assertEquals(b.getSum().getInteger(), 14); + } + + public void testXorAggregationResult() { + XorAggregationResult a = new XorAggregationResult(6); + a.setExpression(new AttributeNode("attributeA")); + assertEquals(a.getXor(), 6); + a.setXor(7); + assertEquals(a.getXor(), 7); + XorAggregationResult b = (XorAggregationResult)serializeDeserialize(a); + assertEquals(b.getXor(), 7); + b.merge(a); + assertEquals(b.getXor(), 0); + } + + public void testCountAggregationResult() { + CountAggregationResult a = new CountAggregationResult(6); + a.setExpression(new AttributeNode("attributeA")); + assertEquals(a.getCount(), 6); + a.setCount(7); + assertEquals(a.getCount(), 7); + CountAggregationResult b = (CountAggregationResult)serializeDeserialize(a); + assertEquals(b.getCount(), 7); + b.merge(a); + assertEquals(b.getCount(), 14); + } + + public void testMinAggregationResult() { + MinAggregationResult a = new MinAggregationResult(new IntegerResultNode(6)); + a.setExpression(new AttributeNode("attributeA")); + assertEquals(a.getMin().getInteger(), 6); + a.setMin(new IntegerResultNode(7)); + assertEquals(a.getMin().getInteger(), 7); + MinAggregationResult b = (MinAggregationResult)serializeDeserialize(a); + a.setMin(new IntegerResultNode(6)); + assertEquals(b.getMin().getInteger(), 7); + b.merge(a); + assertEquals(b.getMin().getInteger(), 6); + } + + public void testMaxAggregationResult() { + MaxAggregationResult a = new MaxAggregationResult(new IntegerResultNode(6)); + a.setExpression(new AttributeNode("attributeA")); + assertEquals(a.getMax().getInteger(), 6); + a.setMax(new IntegerResultNode(7)); + assertEquals(a.getMax().getInteger(), 7); + MaxAggregationResult b = (MaxAggregationResult)serializeDeserialize(a); + a.setMax(new IntegerResultNode(6)); + assertEquals(b.getMax().getInteger(), 7); + b.merge(a); + assertEquals(b.getMax().getInteger(), 7); + } + + public void testAverageAggregationResult() { + AverageAggregationResult a = new AverageAggregationResult(new FloatResultNode(72), 6); + a.setExpression(new AttributeNode("attributeA")); + assertEquals(a.getCount(), 6); + a.setCount(8); + assertEquals(a.getCount(), 8); + AverageAggregationResult b = (AverageAggregationResult)serializeDeserialize(a); + assertEquals(b.getCount(), 8); + a.setCount(6); + b.merge(a); + assertEquals(b.getCount(), 14); + assertEquals(b.getSum().getInteger(), 144); + } + + private static boolean equals(Object a, Object b) { + return a.equals(b); + } + + private GlobalId createGlobalId(int docId) { + return new GlobalId((new DocumentId("doc:test:" + docId)).getGlobalId()); + } + + public void testFs4HitsAggregationResult() { + double rank1 = 1; + double rank2 = 2; + assertEquals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(1), rank1)); + assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(2, createGlobalId(1), rank1))); + assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(2), rank1))); + assertFalse(equals(new FS4Hit(1, createGlobalId(1), rank1), new FS4Hit(1, createGlobalId(1), rank2))); + + HitsAggregationResult a = new HitsAggregationResult(5); + assertEquals(5, a.getMaxHits()); + assertEquals(0, a.getHits().size()); + a.setExpression(new AttributeNode("attributeA")); + a.addHit(new FS4Hit(1, createGlobalId(2), rank1)); + a.addHit(new FS4Hit(5, createGlobalId(7), rank2)); + assertEquals(2, a.getHits().size()); + HitsAggregationResult b = (HitsAggregationResult)serializeDeserialize(a); + assertEquals(a, b); + a.postMerge(); + assertEquals(2, a.getHits().size()); + assertEquals(2.0, a.getHits().get(0).getRank()); + a.setMaxHits(1).postMerge(); + assertEquals(1, a.getHits().size()); + assertEquals(2.0, a.getHits().get(0).getRank()); + + HitsAggregationResult hits = new HitsAggregationResult(3) + .addHit(new FS4Hit(1, createGlobalId(3), 1)) + .addHit(new FS4Hit(2, createGlobalId(2), 2)) + .addHit(new FS4Hit(3, createGlobalId(1), 3)); + Grouping request = new Grouping() + .setRoot(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()))) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()))))); + assertFS4Hits(request, 0, 0, 3); + assertFS4Hits(request, 1, 1, 6); + assertFS4Hits(request, 2, 2, 6); + assertFS4Hits(request, 3, 3, 3); + assertFS4Hits(request, 4, 4, 0); + + assertFS4Hits(request, 0, 1, 9); + assertFS4Hits(request, 0, 2, 15); + assertFS4Hits(request, 0, 3, 18); + assertFS4Hits(request, 0, 4, 18); + assertFS4Hits(request, 1, 4, 15); + assertFS4Hits(request, 2, 4, 9); + assertFS4Hits(request, 3, 4, 3); + + assertFS4Hits(request, 1, 2, 12); + assertFS4Hits(request, 2, 3, 9); + assertFS4Hits(request, 3, 4, 3); + assertFS4Hits(request, 4, 5, 0); + } + + public void testVdsHitsAggregationResult() { + double rank1 = 1; + double rank2 = 2; + byte [] s1 = {'a','b','c'}; + byte [] s2 = {'n','o','e'}; + byte [] s3 = {'n','o','3'}; + assertEquals(new VdsHit("1", s1, rank1), new VdsHit("1", s1, rank1)); + assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("2", s1, rank1))); + assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("1", s2, rank1))); + assertFalse(equals(new VdsHit("1", s1, rank1), new VdsHit("1", s1, rank2))); + + HitsAggregationResult a = new HitsAggregationResult(5); + assertEquals(5, a.getMaxHits()); + assertEquals(0, a.getHits().size()); + a.setExpression(new AttributeNode("attributeA")); + a.addHit(new VdsHit("1", s2, rank1)); +// a.addHit(new VdsHit("5", s7, rank2)); +// assertEquals(2, a.getHits().size()); + HitsAggregationResult b = (HitsAggregationResult)serializeDeserialize(a); + assertEquals(a, b); + + HitsAggregationResult hits = new HitsAggregationResult(3) + .addHit(new VdsHit("1", s3, 1)) + .addHit(new VdsHit("2", s2, 2)) + .addHit(new VdsHit("3", s1, 3)); + Grouping request = new Grouping() + .setRoot(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()))) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()) + .addChild(new Group() + .addAggregationResult(hits.clone()))))); + assertVdsHits(request, 0, 0, 3); + assertVdsHits(request, 1, 1, 6); + assertVdsHits(request, 2, 2, 6); + assertVdsHits(request, 3, 3, 3); + assertVdsHits(request, 4, 4, 0); + + assertVdsHits(request, 0, 1, 9); + assertVdsHits(request, 0, 2, 15); + assertVdsHits(request, 0, 3, 18); + assertVdsHits(request, 0, 4, 18); + assertVdsHits(request, 1, 4, 15); + assertVdsHits(request, 2, 4, 9); + assertVdsHits(request, 3, 4, 3); + + assertVdsHits(request, 1, 2, 12); + assertVdsHits(request, 2, 3, 9); + assertVdsHits(request, 3, 4, 3); + assertVdsHits(request, 4, 5, 0); + } + + + private void assertFS4Hits(Grouping request, int firstLevel, int lastLevel, int expected) { + CountFS4Hits obj = new CountFS4Hits(); + request.setFirstLevel(firstLevel); + request.setLastLevel(lastLevel); + request.select(obj, obj); + assertEquals(expected, obj.count); + } + + private void assertVdsHits(Grouping request, int firstLevel, int lastLevel, int expected) { + CountVdsHits obj = new CountVdsHits(); + request.setFirstLevel(firstLevel); + request.setLastLevel(lastLevel); + request.select(obj, obj); + assertEquals(expected, obj.count); + } + + private class CountFS4Hits implements ObjectPredicate, ObjectOperation { + int count; + public boolean check(Object obj) { + return obj instanceof FS4Hit; + } + public void execute(Object obj) { + ++count; + } + } + + private class CountVdsHits implements ObjectPredicate, ObjectOperation { + int count; + public boolean check(Object obj) { + return obj instanceof VdsHit; + } + public void execute(Object obj) { + ++count; + } + } + + public void testGroup() { + Group a = new Group(); + a.setId(new IntegerResultNode(17)); + a.addAggregationResult(new XorAggregationResult()); + serializeDeserialize1(a); + } + + public void testGrouping() { + Grouping a = new Grouping(); + GroupingLevel level = new GroupingLevel(); + level.setExpression(new AttributeNode("folder")); + + XorAggregationResult xor = new XorAggregationResult(); + xor.setExpression(new MD5BitFunctionNode(new AttributeNode("docid"), 64)); + level.getGroupPrototype().addAggregationResult(xor); + + SumAggregationResult sum = new SumAggregationResult(); + MinFunctionNode min = new MinFunctionNode(); + min.addArg(new AttributeNode("attribute1")); + min.addArg(new AttributeNode("attribute2")); + sum.setExpression(min); + level.getGroupPrototype().addAggregationResult(sum); + + CatFunctionNode cat = new CatFunctionNode(); + cat.addArg(new GetDocIdNamespaceSpecificFunctionNode()); + cat.addArg(new DocumentFieldNode("folder")); + cat.addArg(new DocumentFieldNode("flags")); + XorAggregationResult xor2 = new XorAggregationResult(); + xor2.setExpression(new XorBitFunctionNode(cat, 64)); + level.getGroupPrototype().addAggregationResult(xor2); + a.addLevel(level); + + Group g = new Group(); + g.setId(new IntegerResultNode(17)); + g.addAggregationResult(xor); // XXX: this is BAD + a.getRoot().addChild(g); + serializeDeserialize1(a); + + + Grouping foo = new Grouping(); + foo.addLevel(level); + int hashBefore = foo.hashCode(); + foo.setFirstLevel(66); + assertEquals(hashBefore, foo.hashCode()); + foo.setFirstLevel(99); + assertEquals(hashBefore, foo.hashCode()); + foo.setLastLevel(66); + assertEquals(hashBefore, foo.hashCode()); + foo.setLastLevel(99); + assertEquals(hashBefore, foo.hashCode()); + foo.getRoot().addChild(g); + assertEquals(hashBefore, foo.hashCode()); + } + + // -------------------------------------------------------------------------------- + // + // Everything below this point is helper functions. + // + // -------------------------------------------------------------------------------- + private static Identifiable serializeDeserialize1(Identifiable a) { + BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer()); + a.serializeWithId(buf); + buf.flip(); + Identifiable b = Identifiable.create(buf); + assertEquals(a.getClass(), b.getClass()); + assertEquals(buf.getBuf().hasRemaining(), false); + Identifiable c = b.clone(); + assertEquals(b.getClass(), c.getClass()); + BufferSerializer bb = new BufferSerializer(new GrowableByteBuffer()); + BufferSerializer cb = new BufferSerializer(new GrowableByteBuffer()); + b.serializeWithId(bb); + c.serializeWithId(cb); + assertEquals(bb.getBuf().limit(), cb.getBuf().limit()); + assertEquals(bb.position(), cb.position()); + bb.getBuf().flip(); + cb.getBuf().flip(); + for (int i = 0; i < bb.getBuf().limit(); i++) { + assertEquals(bb.getBuf().get(), cb.getBuf().get()); + } + + return b; + } + + private static AggregationResult serializeDeserialize(AggregationResult a) { + AggregationResult b = (AggregationResult)serializeDeserialize1(a); + assertEquals(a.getExpression().getClass(), b.getExpression().getClass()); + return b; + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java new file mode 100644 index 00000000000..0d7c4c8bca1 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ExpressionCountAggregationResultTest.java @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.aggregation.hll.*; +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author bjorncs + */ +public class ExpressionCountAggregationResultTest { + + @Test + public void requireThatSketchesAreMerged() { + ExpressionCountAggregationResult aggr1 = createAggregationWithSparseSketch(42); + ExpressionCountAggregationResult aggr2 = createAggregationWithSparseSketch(1337); + + // Merge performs union of the underlying data of the sparse sketch. + aggr1.onMerge(aggr2); + + SparseSketch sketch = (SparseSketch) aggr1.getSketch(); + SketchUtils.assertSparseSketchContains(sketch, 42, 1337); + } + + @Test + public void requireThatEstimateIsCorrect() { + ExpressionCountAggregationResult aggr = createAggregationWithSparseSketch(42); + assertTrue(aggr.getEstimatedUniqueCount() == 1); + } + + @Test + public void requireThatPostMergeUpdatesEstimate() { + ExpressionCountAggregationResult aggr = createAggregationWithSparseSketch(1337); + assertEquals(1, aggr.getEstimatedUniqueCount()); + // Merge performs union of the underlying data of the sparse sketch. + aggr.onMerge(createAggregationWithSparseSketch(9001)); + assertEquals(2, aggr.getEstimatedUniqueCount()); + } + + @Test + public void requireThatSerializationDeserializationMatchSparseSketch() { + ExpressionCountAggregationResult from = createAggregationWithSparseSketch(42); + ExpressionCountAggregationResult to = createAggregationWithSparseSketch(1337); + testSerialization(from, to); + } + + @Test + public void requireThatSerializationDeserializationMatchNormalSketch() { + ExpressionCountAggregationResult from = createAggregationWithNormalSketch(42); + ExpressionCountAggregationResult to = createAggregationWithNormalSketch(1337); + testSerialization(from, to); + } + + private void testSerialization(ExpressionCountAggregationResult from, ExpressionCountAggregationResult to) { + BufferSerializer buffer = new BufferSerializer(); + from.serialize(buffer); + buffer.flip(); + to.deserialize(buffer); + + assertEquals(from.getSketch(), to.getSketch()); + } + + private static ExpressionCountAggregationResult createAggregationWithSparseSketch(int sketchValue) { + SparseSketch initialSketch = SketchUtils.createSparseSketch(sketchValue); + return new ExpressionCountAggregationResult( + initialSketch, + sketch -> ((SparseSketch) sketch).size() + ); + } + + private static ExpressionCountAggregationResult createAggregationWithNormalSketch(int sketchValue) { + NormalSketch initialSketch = SketchUtils.createNormalSketch(sketchValue); + return new ExpressionCountAggregationResult( + initialSketch, + sketch -> 42 + ); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java new file mode 100755 index 00000000000..ee7d50f33cb --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/ForceLoadTestCase.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +public class ForceLoadTestCase extends junit.framework.TestCase { + + public ForceLoadTestCase(String name) { + super(name); + } + + public void testLoadClasses() { + try { + new com.yahoo.searchlib.aggregation.ForceLoad(); + assertTrue(com.yahoo.searchlib.aggregation.ForceLoad.forceLoad()); + } catch (com.yahoo.system.ForceLoadError e) { + e.printStackTrace(); + assertTrue(false); + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java new file mode 100644 index 00000000000..1852f292a48 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupTestCase.java @@ -0,0 +1,229 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.*; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.Identifiable; +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen + */ +public class GroupTestCase { + + @Test + public void requireThatAggregationResultsCanBeAdded() { + Group group = new Group(); + AggregationResult res = new AverageAggregationResult(); + group.addAggregationResult(res); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAggregationResultListIsNotImmutable() { + Group group = new Group(); + group.getAggregationResults().add(new AverageAggregationResult()); + } + + @Test + public void requireThatOrderByExpressionsCanBeAdded() { + Group group = new Group(); + ExpressionNode foo = new ConstantNode(new IntegerResultNode(6)); + group.addOrderBy(foo, true); + assertEquals(1, group.getOrderByExpressions().size()); + assertSame(foo, group.getOrderByExpressions().get(0)); + assertEquals(Arrays.asList(1), group.getOrderByIndexes()); + + ExpressionNode bar = new ConstantNode(new IntegerResultNode(9)); + group.addOrderBy(bar, false); + assertEquals(2, group.getOrderByExpressions().size()); + assertSame(bar, group.getOrderByExpressions().get(1)); + assertEquals(Arrays.asList(1, -2), group.getOrderByIndexes()); + } + + @Test + public void requireThatOrderByListsAreImmutable() { + Group group = new Group(); + try { + group.getOrderByExpressions().add(new ConstantNode(new IntegerResultNode(69))); + fail(); + } catch (UnsupportedOperationException e) { + + } + try { + group.getOrderByIndexes().add(69); + fail(); + } catch (UnsupportedOperationException e) { + + } + } + + @Test + public void requireThatAddOrderByAddsAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(res, true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByDoesNotAddDuplicateAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addAggregationResult(res); + group.addOrderBy(res, true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByIgnoresAggregationResultTagWhenMatching() { + Group group = new Group(); + AggregationResult foo = new MinAggregationResult(); + foo.setTag(6); + group.addAggregationResult(foo); + AggregationResult bar = new MinAggregationResult(); + bar.setTag(9); + group.addOrderBy(bar, true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(foo, group.getAggregationResults().get(0)); + assertEquals(6, foo.getTag()); + } + + @Test + public void requireThatAddOrderByDoesNotModifyTagOfNewAggregationResult() { + Group group = new Group(); + AggregationResult foo = new MinAggregationResult(); + foo.setTag(6); + group.addAggregationResult(foo); + AggregationResult bar = new MaxAggregationResult(); + bar.setTag(9); + group.addOrderBy(bar, true); + assertEquals(2, group.getAggregationResults().size()); + assertSame(foo, group.getAggregationResults().get(0)); + assertEquals(6, foo.getTag()); + assertSame(bar, group.getAggregationResults().get(1)); + assertEquals(9, bar.getTag()); + } + + @Test + public void requireThatAddOrderByAddsReferencedAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new AggregationRefNode(res), true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByDoesNotAddDuplicateReferencedAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addAggregationResult(res); + group.addOrderBy(new AggregationRefNode(res), true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByAddsDeepReferencedAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByDoesNotAddDuplicateDeepReferencedAggregationResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addAggregationResult(res); + group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true); + assertEquals(1, group.getAggregationResults().size()); + assertSame(res, group.getAggregationResults().get(0)); + } + + @Test + public void requireThatAddOrderByResolvesReferenceIndex() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addAggregationResult(res); + group.addOrderBy(new AggregationRefNode(res), true); + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0); + assertEquals(0, ref.getIndex()); + assertSame(res, ref.getExpression()); + } + + @Test + public void requireThatAddOrderByResolvesDeepReferenceIndex() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addAggregationResult(res); + group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true); + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)((NegateFunctionNode)group.getOrderByExpressions().get(0)).getArg(); + assertEquals(0, ref.getIndex()); + assertSame(res, ref.getExpression()); + } + + @Test + public void requireThatAddOrderByResolvesReferenceResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new AggregationRefNode(res), true); + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0); + assertEquals(0, ref.getIndex()); + assertSame(res, ref.getExpression()); + } + + @Test + public void requireThatAddOrderByResolvesDeepReferenceResult() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new NegateFunctionNode(new AggregationRefNode(res)), true); + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)((NegateFunctionNode)group.getOrderByExpressions().get(0)).getArg(); + assertEquals(0, ref.getIndex()); + assertSame(res, ref.getExpression()); + } + + @Test + public void requireThatCloneResolvesAggregationRef() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new AggregationRefNode(res), true); + group = group.clone(); + + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0); + assertEquals(0, ref.getIndex()); + assertEquals(res, ref.getExpression()); + assertNotSame(res, ref.getExpression()); + } + + @Test + public void requireThatDeserializeResolvesAggregationRef() { + Group group = new Group(); + AggregationResult res = new MinAggregationResult(); + group.addOrderBy(new AggregationRefNode(res), true); + BufferSerializer buf = new BufferSerializer(); + group.serializeWithId(buf); + buf.flip(); + group = (Group)Identifiable.create(buf); + + assertEquals(1, group.getOrderByExpressions().size()); + AggregationRefNode ref = (AggregationRefNode)group.getOrderByExpressions().get(0); + assertEquals(0, ref.getIndex()); + assertEquals(res, ref.getExpression()); + assertNotSame(res, ref.getExpression()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java new file mode 100644 index 00000000000..a9926f7c0e2 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingSerializationTest.java @@ -0,0 +1,387 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.searchlib.aggregation.hll.SparseSketch; +import com.yahoo.searchlib.expression.*; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.Identifiable; +import com.yahoo.vespa.objects.ObjectDumper; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; + +import static org.junit.Assert.fail; + +/** + * Tests serialization compatibility across Java and C++. The comparison is performed by comparing serialized Java + * object graphs with the content of specific binary files. C++ unit tests serializes + * identical data structures into these files. + * Note: This test relies heavily on proper implementation of {@link Object#equals(Object)}! + */ +public class GroupingSerializationTest { + + @BeforeClass + public static void forceLoadingOfSerializableClasses() { + com.yahoo.searchlib.aggregation.ForceLoad.forceLoad(); + com.yahoo.searchlib.expression.ForceLoad.forceLoad(); + } + + @Test + public void testResultTypes() throws IOException { + try (SerializationTester t = new SerializationTester("testResultTypes")) { + t.assertMatch(new IntegerResultNode(7)); + t.assertMatch(new FloatResultNode(7.3)); + t.assertMatch(new StringResultNode("7.3")); + t.assertMatch(new StringResultNode( + new String(new byte[]{(byte)0xe5, (byte)0xa6, (byte)0x82, (byte)0xe6, (byte)0x9e, (byte)0x9c}))); + t.assertMatch(new RawResultNode(new byte[]{'7', '.', '4'})); + t.assertMatch(new IntegerBucketResultNode()); + t.assertMatch(new FloatBucketResultNode()); + t.assertMatch(new IntegerBucketResultNode(10, 20)); + t.assertMatch(new FloatBucketResultNode(10, 20)); + t.assertMatch(new StringBucketResultNode("10.0", "20.0")); + t.assertMatch(new RawBucketResultNode( + new RawResultNode(new byte[]{1, 0, 0}), + new RawResultNode(new byte[]{1, 1, 0}))); + t.assertMatch(new IntegerBucketResultNodeVector() + .add(new IntegerBucketResultNode(878, 3246823))); + t.assertMatch(new FloatBucketResultNodeVector() + .add(new FloatBucketResultNode(878, 3246823))); + t.assertMatch(new StringBucketResultNodeVector() + .add(new StringBucketResultNode("878", "3246823"))); + t.assertMatch(new RawBucketResultNodeVector() + .add(new RawBucketResultNode( + new RawResultNode(new byte[]{1, 0, 0}), + new RawResultNode(new byte[]{1, 1, 0})))); + } + + } + + @Test + public void testSpecialNodes() throws IOException { + try (SerializationTester t = new SerializationTester("testSpecialNodes")) { + t.assertMatch(new AttributeNode("testattribute")); + t.assertMatch(new DocumentFieldNode("testdocumentfield")); + t.assertMatch(new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7))); + t.assertMatch(new GetYMUMChecksumFunctionNode()); + } + } + + @Test + public void testFunctionNodes() throws IOException { + try (SerializationTester t = new SerializationTester("testFunctionNodes")) { + t.assertMatch(new AddFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new XorFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new MultiplyFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new DivideFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new ModuloFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new MinFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new MaxFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new TimeStampFunctionNode(new ConstantNode(new IntegerResultNode(7)), + TimeStampFunctionNode.TimePart.Hour, true)); + t.assertMatch(new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)), + ZCurveFunctionNode.Dimension.X)); + t.assertMatch(new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)), + ZCurveFunctionNode.Dimension.Y)); + t.assertMatch(new NegateFunctionNode(new ConstantNode(new IntegerResultNode(7)))); + t.assertMatch(new SortFunctionNode(new ConstantNode(new IntegerResultNode(7)))); + t.assertMatch(new NormalizeSubjectFunctionNode(new ConstantNode( + new StringResultNode("foo")))); + t.assertMatch(new ReverseFunctionNode(new ConstantNode(new IntegerResultNode(7)))); + t.assertMatch(new MD5BitFunctionNode(new ConstantNode(new IntegerResultNode(7)), 64)); + t.assertMatch(new XorBitFunctionNode(new ConstantNode(new IntegerResultNode(7)), 64)); + t.assertMatch(new CatFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(7))) + .addArg(new ConstantNode(new IntegerResultNode(8))) + .addArg(new ConstantNode(new IntegerResultNode(9)))); + t.assertMatch(new FixedWidthBucketFunctionNode()); + t.assertMatch(new FixedWidthBucketFunctionNode().addArg(new AttributeNode("foo"))); + t.assertMatch(new FixedWidthBucketFunctionNode(new IntegerResultNode(10), new AttributeNode("foo"))); + t.assertMatch(new FixedWidthBucketFunctionNode(new FloatResultNode(10.0), new AttributeNode("foo"))); + t.assertMatch(new RangeBucketPreDefFunctionNode()); + t.assertMatch(new RangeBucketPreDefFunctionNode().addArg(new AttributeNode("foo"))); + t.assertMatch(new DebugWaitFunctionNode(new ConstantNode(new IntegerResultNode(5)), + 3.3, false)); + } + + } + + @Test + public void testAggregatorResults() throws IOException { + try (SerializationTester t = new SerializationTester("testAggregatorResults")) { + t.assertMatch(new SumAggregationResult(new IntegerResultNode(7)) + .setExpression(new AttributeNode("attributeA"))); + t.assertMatch(new XorAggregationResult() + .setXor(7) + .setExpression(new AttributeNode("attributeA"))); + t.assertMatch(new CountAggregationResult() + .setCount(7) + .setExpression(new AttributeNode("attributeA"))); + t.assertMatch(new MinAggregationResult(new IntegerResultNode(7)) + .setExpression(new AttributeNode("attributeA"))); + t.assertMatch(new MaxAggregationResult(new IntegerResultNode(7)) + .setExpression(new AttributeNode("attributeA"))); + t.assertMatch(new AverageAggregationResult(new IntegerResultNode(7), 0) + .setExpression(new AttributeNode("attributeA"))); + SparseSketch sketch = new SparseSketch(); + sketch.aggregate(1955583074); + t.assertMatch(new ExpressionCountAggregationResult(sketch, s -> 42) + .setExpression(new ConstantNode(new IntegerResultNode(67)))); + } + } + + @Test + public void testHitCollection() throws IOException { + try (SerializationTester t = new SerializationTester("testHitCollection")) { + t.assertMatch(new FS4Hit(0, new GlobalId(new byte[GlobalId.LENGTH]), 0, -1)); + t.assertMatch(new FS4Hit(0, createGlobalId(100), 50.0, -1)); + t.assertMatch(new VdsHit()); + //TODO Verify the two structures below + t.assertMatch(new VdsHit("100", new byte[0], 50.0)); + t.assertMatch(new VdsHit("100", "rawsummary".getBytes(), 50.0)); + t.assertMatch(new HitsAggregationResult()); + t.assertMatch(new HitsAggregationResult() + .setMaxHits(5) + .addHit(new FS4Hit(0, createGlobalId(10), 1.0, -1)) + .addHit(new FS4Hit(0, createGlobalId(20), 2.0, -1)) + .addHit(new FS4Hit(0, createGlobalId(30), 3.0, -1)) + .addHit(new FS4Hit(0, createGlobalId(40), 4.0, -1)) + .addHit(new FS4Hit(0, createGlobalId(50), 5.0, -1)) + .setExpression(new ConstantNode(new IntegerResultNode(5)))); + t.assertMatch(new HitsAggregationResult() + .setMaxHits(3) + .addHit(new FS4Hit(0, createGlobalId(10), 1.0, 100)) + .addHit(new FS4Hit(0, createGlobalId(20), 2.0, 200)) + .addHit(new FS4Hit(0, createGlobalId(30), 3.0, 300)) + .setExpression(new ConstantNode(new IntegerResultNode(5)))); + //TODO Verify content + t.assertMatch(new HitsAggregationResult() + .setMaxHits(3) + .addHit(new VdsHit("10", "100".getBytes(), 1.0)) + .addHit(new VdsHit("20", "200".getBytes(), 2.0)) + .addHit(new VdsHit("30", "300".getBytes(), 3.0)) + .setExpression(new ConstantNode(new IntegerResultNode(5)))); + } + } + + @Test + public void testGroupingLevel() throws IOException { + try (SerializationTester t = new SerializationTester("testGroupingLevel")) { + GroupingLevel groupingLevel = new GroupingLevel(); + groupingLevel.setMaxGroups(100) + .setExpression(createDummyExpression()) + .getGroupPrototype() + .addAggregationResult( + new SumAggregationResult() + .setExpression(createDummyExpression())); + t.assertMatch(groupingLevel); + } + } + + @Test + public void testGroup() throws IOException { + try (SerializationTester t = new SerializationTester("testGroup")) { + t.assertMatch(new Group()); + t.assertMatch(new Group().setId(new IntegerResultNode(50)) + .setRank(10)); + t.assertMatch(new Group().setId(new IntegerResultNode(100)) + .addChild(new Group().setId(new IntegerResultNode(110))) + .addChild(new Group().setId(new IntegerResultNode(120)) + .setRank(20.5) + .addAggregationResult(new SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult(new SumAggregationResult() + .setExpression(createDummyExpression()))) + .addChild(new Group().setId(new IntegerResultNode(130)) + .addChild(new Group().setId(new IntegerResultNode(131))))); + } + } + + @Test + public void testGrouping() throws IOException { + try (SerializationTester t = new SerializationTester("testGrouping")) { + t.assertMatch(new Grouping()); + + GroupingLevel level1 = new GroupingLevel(); + level1.setMaxGroups(100) + .setExpression(createDummyExpression()) + .getGroupPrototype() + .addAggregationResult( + new SumAggregationResult() + .setExpression(createDummyExpression())); + GroupingLevel level2 = new GroupingLevel(); + level2.setMaxGroups(10) + .setExpression(createDummyExpression()) + .getGroupPrototype() + .addAggregationResult( + new SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult( + new SumAggregationResult() + .setExpression(createDummyExpression())); + t.assertMatch(new Grouping() + .addLevel(level1) + .addLevel(level2)); + + GroupingLevel level3 = new GroupingLevel(); + level3.setExpression(new AttributeNode("folder")) + .getGroupPrototype() + .addAggregationResult( + new XorAggregationResult() + .setExpression(new MD5BitFunctionNode(new AttributeNode("docid"), 64))) + .addAggregationResult( + new SumAggregationResult() + .setExpression(new MinFunctionNode() + .addArg(new AttributeNode("attribute1")) + .addArg(new AttributeNode("attribute2")))) + .addAggregationResult( + new XorAggregationResult() + .setExpression( + new XorBitFunctionNode(new CatFunctionNode() + .addArg(new GetDocIdNamespaceSpecificFunctionNode(new StringResultNode(""))) + .addArg(new DocumentFieldNode("folder")) + .addArg(new DocumentFieldNode("flags")), 64))); + t.assertMatch(new Grouping() + .addLevel(level3)); + } + } + + + private static GlobalId createGlobalId(int docId) { + return new GlobalId( + new DocumentId(String.format("doc:test:%d", docId)).getGlobalId()); + } + + private static ExpressionNode createDummyExpression() { + return new AddFunctionNode() + .addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(2))); + } + + private static class SerializationTester implements AutoCloseable { + + private static final String FILE_PATH = "src/test/files"; + + private final DataInputStream in; + private final String fileName; + + public SerializationTester(String fileName) throws IOException { + this.fileName = fileName; + this.in = new DataInputStream( + new BufferedInputStream( + new FileInputStream( + new File(FILE_PATH, fileName)))); + } + + public SerializationTester assertMatch(Identifiable expectedObject) throws IOException { + int length = readLittleEndianInt(in); + byte[] originalData = new byte[length]; + in.readFully(originalData); + Identifiable deserializedObject = Identifiable.create(new BufferSerializer(originalData)); + + if (!deserializedObject.equals(expectedObject)) { + fail(String.format("Serialized object in file '%s' does not equal expected values.\n" + + "==================================================\n" + + "Expected:\n" + + "==================================================\n" + + "%s\n" + + "==================================================\n" + + "Actual:\n" + + "==================================================\n" + + "%s\n" + + "==================================================\n", + fileName, dumpObject(expectedObject), dumpObject(deserializedObject))); + } + GrowableByteBuffer buffer = new GrowableByteBuffer(1024 * 8); + BufferSerializer serializer = new BufferSerializer(buffer); + deserializedObject.serializeWithId(serializer); + buffer.flip(); + + byte[] newData = new byte[buffer.limit()]; + buffer.get(newData); + if (!Arrays.equals(newData, originalData)) { + fail(String.format("Serialized object data does not match the original serialized data from file.\n" + + "==================================================\n" + + "Original:\n" + + "==================================================\n" + + "%s\n" + + "==================================================\n" + + "Serialized:\n" + + "==================================================\n" + + "%s\n" + + "==================================================\n", + toHexString(originalData), toHexString(newData))); + } + return this; + } + + private static int readLittleEndianInt(DataInputStream in) throws IOException { + byte[] data = new byte[4]; + in.readFully(data); + ByteBuffer buffer = ByteBuffer.wrap(data); + buffer.order(ByteOrder.LITTLE_ENDIAN); + return buffer.getInt(); + } + + private static String dumpObject(Identifiable obj) { + ObjectDumper dumper = new ObjectDumper(); + obj.visitMembers(dumper); + return dumper.toString(); + } + + @Override + public void close() throws IOException { + boolean moreDataAvailable = in.read() != -1; + in.close(); + if (moreDataAvailable) { + fail("The file was not fully consumed. Did you forget to deserialize an object on Java side?"); + } + } + + private static String toHexString(byte[] data) { + char[] table = "0123456789ABCDEF".toCharArray(); + StringBuilder builder = new StringBuilder(); + builder.append("(" + data.length + " bytes)"); + for (int i = 0; i < data.length; i++) { + if (i % 16 == 0) { + builder.append("\n"); + } + builder.append(table[(data[i] >> 4) & 0xf]); + builder.append(table[data[i] & 0xf]); + builder.append(" "); + } + return builder.toString(); + } + + + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java new file mode 100644 index 00000000000..f4ae62265d7 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/GroupingTestCase.java @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.searchlib.expression.NullResultNode; +import com.yahoo.searchlib.expression.StringBucketResultNode; +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen + */ +public class GroupingTestCase { + + private static final int VALID_BYTE_INDEX = 8; + + @Test + public void requireThatIdAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(0, grouping.getId()); + + grouping = new Grouping(6); + assertEquals(6, grouping.getId()); + grouping.setId(9); + assertEquals(9, grouping.getId()); + + Grouping other = new Grouping(6); + assertFalse(grouping.equals(other)); + other.setId(9); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatAllAccessorsWork() { + Grouping grouping = new Grouping(); + assertFalse(grouping.getAll()); + grouping.setAll(true); + assertTrue(grouping.getAll()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.setAll(true); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatTopNAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(-1, grouping.getTopN()); + grouping.setTopN(69); + assertEquals(69, grouping.getTopN()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.setTopN(69); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatFirstLevelAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(0, grouping.getFirstLevel()); + grouping.setFirstLevel(69); + assertEquals(69, grouping.getFirstLevel()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.setFirstLevel(69); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatLastLevelAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(0, grouping.getLastLevel()); + grouping.setLastLevel(69); + assertEquals(69, grouping.getLastLevel()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.setLastLevel(69); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatRootAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(new Group(), grouping.getRoot()); + try { + grouping.setRoot(null); + fail(); + } catch (NullPointerException e) { + + } + Group root = new Group().setRank(6.9); + grouping.setRoot(root); + assertEquals(root, grouping.getRoot()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.setRoot(root); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatLevelAccessorsWork() { + Grouping grouping = new Grouping(); + assertEquals(Collections.emptyList(), grouping.getLevels()); + try { + grouping.addLevel(null); + fail(); + } catch (NullPointerException e) { + + } + GroupingLevel level = new GroupingLevel(); + grouping.addLevel(level); + assertEquals(Arrays.asList(level), grouping.getLevels()); + + Grouping other = new Grouping(); + assertFalse(grouping.equals(other)); + other.addLevel(level); + assertEquals(grouping, other); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatHashCodeIsImplemented() { + assertEquals(new Grouping().hashCode(), new Grouping().hashCode()); + } + + @Test + public void requireThatEqualsIsImplemented() { + assertFalse(new Grouping().equals(new Object())); + assertTrue(new Grouping().equals(new Grouping())); + } + + @Test + public void requireThatValidAccessorsWork() { + byte[] arr = new byte[1024]; + BufferSerializer buf = new BufferSerializer(arr); + Grouping grouping = new Grouping(); + grouping.serializeWithId(buf); + buf.flip(); + assertEquals(1, arr[VALID_BYTE_INDEX]); + arr[VALID_BYTE_INDEX] = 0; + + Grouping other = (Grouping)Grouping.create(buf); + assertFalse(other.valid()); + + assertEquals(grouping, grouping.clone()); + assertSerialize(grouping); + } + + @Test + public void requireThatSetForceSinglePassWorks() { + assertFalse(new Grouping().getForceSinglePass()); + assertFalse(new Grouping().setForceSinglePass(false).getForceSinglePass()); + assertTrue(new Grouping().setForceSinglePass(true).getForceSinglePass()); + } + + @Test + public void requireThatNeedDeepResultCollectionWorks() { + assertFalse(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group())).needDeepResultCollection()); + assertTrue(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group().addOrderBy(new CountAggregationResult(9), true))).needDeepResultCollection()); + } + + @Test + public void requireThatUseSinglePassWorks() { + assertFalse(new Grouping().useSinglePass()); + assertFalse(new Grouping().setForceSinglePass(false).useSinglePass()); + assertTrue(new Grouping().setForceSinglePass(true).useSinglePass()); + assertFalse(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group())).useSinglePass()); + assertTrue(new Grouping().addLevel(new GroupingLevel().setGroupPrototype(new Group().addOrderBy(new CountAggregationResult(9), true))).useSinglePass()); + } + + @Test + public void requireThatUnifyNullReplacesEmptyBucketIds() { + Grouping grouping = new Grouping(); + grouping.getRoot().addChild(new Group().setId(new StringBucketResultNode())); + grouping.setLastLevel(1); // otherwise unifyNull will not operate on it + grouping.unifyNull(); + assertEquals(NullResultNode.class, grouping.getRoot().getChildren().get(0).getId().getClass()); + } + + @Test + public void requireThatUnifyNullDoesNotReplaceNonEmptyBucketIds() { + Grouping grouping = new Grouping(); + grouping.getRoot().addChild(new Group().setId(new StringBucketResultNode("6", "9"))); + grouping.setLastLevel(1); // otherwise unifyNull will not operate on it + grouping.unifyNull(); + assertEquals(StringBucketResultNode.class, grouping.getRoot().getChildren().get(0).getId().getClass()); + } + + private static void assertSerialize(Grouping grouping) { + BufferSerializer buf = new BufferSerializer(); + grouping.serializeWithId(buf); + + buf.flip(); + Grouping other = (Grouping)Grouping.create(buf); + assertEquals(grouping, other); + } +} + diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java new file mode 100755 index 00000000000..67361048773 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/MergeTestCase.java @@ -0,0 +1,735 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation; + +import com.yahoo.document.DocumentId; +import com.yahoo.document.GlobalId; +import com.yahoo.searchlib.expression.*; +import junit.framework.TestCase; + +import java.util.Arrays; +import java.util.List; + +/** + * @author Simon Thoresen + */ +public class MergeTestCase extends TestCase { + + private GlobalId createGlobalId(int docId) { + return new GlobalId((new DocumentId("doc:test:" + docId)).getGlobalId()); + } + + // Test merging of hits. + public void testMergeHits() { + Grouping request = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(new GroupingLevel().setMaxGroups(69)); + + Group expect = new Group() + .addAggregationResult(new HitsAggregationResult() + .setMaxHits(5) + .addHit(new FS4Hit(30, createGlobalId(30), 30)) + .addHit(new FS4Hit(20, createGlobalId(20), 20)) + .addHit(new FS4Hit(10, createGlobalId(10), 10)) + .addHit(new FS4Hit(5, createGlobalId(9), 9)) + .addHit(new FS4Hit(6, createGlobalId(8), 8)) + .setExpression(new ConstantNode( new IntegerResultNode(0)))); + + Group a = new Group() + .addAggregationResult(new HitsAggregationResult() + .setMaxHits(5) + .addHit(new FS4Hit(10, createGlobalId(10), 10)) + .addHit(new FS4Hit(1, createGlobalId(5), 5)) + .addHit(new FS4Hit(2, createGlobalId(4), 4)) + .setExpression(new ConstantNode( new IntegerResultNode(0)))); + + Group b = new Group() + .addAggregationResult(new HitsAggregationResult() + .setMaxHits(5) + .addHit(new FS4Hit(20, createGlobalId(20), 20)) + .addHit(new FS4Hit(3, createGlobalId(7), 7)) + .addHit(new FS4Hit(4, createGlobalId(6), 6)) + .setExpression(new ConstantNode( new IntegerResultNode(0)))); + + Group c = new Group() + .addAggregationResult(new HitsAggregationResult() + .setMaxHits(5) + .addHit(new FS4Hit(30, createGlobalId(30), 30)) + .addHit(new FS4Hit(5, createGlobalId(9), 9)) + .addHit(new FS4Hit(6, createGlobalId(8), 8)) + .setExpression(new ConstantNode( new IntegerResultNode(0)))); + + assertMerge(request, a, b, c, expect); + assertMerge(request, a, c, b, expect); + assertMerge(request, b, a, c, expect); + assertMerge(request, c, a, b, expect); + assertMerge(request, b, c, a, expect); + assertMerge(request, c, b, a, expect); + } + + // Test merging the sum of the values from a single attribute vector that was collected directly into the root node. + public void testMergeSimpleSum() { + Grouping lhs = new Grouping() + .setRoot(new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("foo")))); + + Grouping rhs = new Grouping() + .setRoot(new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("foo")))); + + Group expect = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(50)) + .setExpression(new AttributeNode("foo"))); + + assertMerge(lhs, rhs, expect); + } + + // Test merging of the value from a single attribute vector in level 1. + public void testMergeSingleChild() { + Grouping lhs = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(new Group().addChild(new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("foo"))))); + + Grouping rhs = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(new Group().addChild(new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("foo"))))); + + Group expect = new Group().addChild(new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(50)) + .setExpression(new AttributeNode("foo")))); + + assertMerge(lhs, rhs, expect); + } + + // Test merging of the value from a multiple attribute vectors in level 1. + public void testMergeMultiChild() { + Grouping lhs = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(new Group() + .addChild(new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("foo")))) + .addChild(new Group() + .setId(new StringResultNode("bar")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("foo"))))); + + Grouping rhs = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(new Group() + .addChild(new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("foo")))) + .addChild(new Group() + .setId(new StringResultNode("baz")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("foo"))))); + + Group expect = new Group().addChild( + new Group() + .setId(new StringResultNode("foo")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(50)) + .setExpression(new AttributeNode("foo")))) + .addChild(new Group() + .setId(new StringResultNode("bar")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("foo")))) + .addChild(new Group() + .setId(new StringResultNode("baz")) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("foo")))); + + assertMerge(lhs, rhs, expect); + } + + // Verify that frozen levels are not touched during merge. + public void testMergeLevels() { + Grouping request = new Grouping() + .addLevel(new GroupingLevel() + .setExpression(new AttributeNode("c1")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s1"))))) + .addLevel(new GroupingLevel() + .setExpression(new AttributeNode("c2")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s2"))))) + .addLevel(new GroupingLevel() + .setExpression(new AttributeNode("c3")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s3"))))); + + Group lhs = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(15)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("s3")))))); + + Group rhs = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(15)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("s3")))))); + + Group expectAll = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("s3")))))); + + Group expect0 = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("s3")))))); + + Group expect1 = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(30)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("s3")))))); + + Group expect2 = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(15)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(40)) + .setExpression(new AttributeNode("s3")))))); + + Group expect3 = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(5)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(10)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(15)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(30)) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(20)) + .setExpression(new AttributeNode("s3")))))); + + request.setFirstLevel(0).setLastLevel(3); + assertMerge(request, lhs, rhs, expectAll); + request.setFirstLevel(1).setLastLevel(3); + assertMerge(request, lhs, rhs, expect0); + request.setFirstLevel(2).setLastLevel(5); + assertMerge(request, lhs, rhs, expect1); + request.setFirstLevel(3).setLastLevel(5); + assertMerge(request, lhs, rhs, expect2); + request.setFirstLevel(4).setLastLevel(4); + assertMerge(request, lhs, rhs, expect3); + } + + // Verify that the number of groups for a level is pruned down to maxGroups, that the remaining groups are the + // highest ranked ones, and that they are sorted by group id. + public void testMergeGroups() { + Grouping request = new Grouping() + .addLevel(new GroupingLevel() + .setExpression(new AttributeNode("attr"))); + Group lhs = new Group() + .addChild(new Group().setId(new IntegerResultNode(5)).setRank(5)) + .addChild(new Group().setId(new IntegerResultNode(10)).setRank(5)) + .addChild(new Group().setId(new IntegerResultNode(15)).setRank(15)) + .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100)) + .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30)); + + Group rhs = new Group() + .addChild(new Group().setId(new IntegerResultNode(0)).setRank(10)) + .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50)) + .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25)) + .addChild(new Group().setId(new IntegerResultNode(40)).setRank(10)) + .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20)); + + Group expect3 = new Group() + .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50)) + .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100)) + .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30)); + + Group expect5 = new Group() + .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50)) + .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25)) + .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100)) + .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20)) + .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30)); + + Group expectAll = new Group() + .addChild(new Group().setId(new IntegerResultNode(0)).setRank(10)) + .addChild(new Group().setId(new IntegerResultNode(5)).setRank(5)) + .addChild(new Group().setId(new IntegerResultNode(10)).setRank(50)) + .addChild(new Group().setId(new IntegerResultNode(15)).setRank(15)) + .addChild(new Group().setId(new IntegerResultNode(20)).setRank(25)) + .addChild(new Group().setId(new IntegerResultNode(40)).setRank(100)) + .addChild(new Group().setId(new IntegerResultNode(45)).setRank(20)) + .addChild(new Group().setId(new IntegerResultNode(50)).setRank(30)); + + request.getLevels().get(0).setMaxGroups(3); + assertMerge(request, lhs, rhs, expect3); + assertMerge(request, rhs, lhs, expect3); + + request.getLevels().get(0).setMaxGroups(5); + assertMerge(request, lhs, rhs, expect5); + assertMerge(request, rhs, lhs, expect5); + + request.getLevels().get(0).setMaxGroups(-1); + assertMerge(request, lhs, rhs, expectAll); + assertMerge(request, rhs, lhs, expectAll); + } + + public void testMergeBuckets() { + Grouping lhs = new Grouping() + .setRoot(new Group().setTag(0) + .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4)) + .addAggregationResult(new CountAggregationResult().setCount(1)) + .setTag(1)) + .addChild(new Group().setId(new FloatBucketResultNode(0, 0)) + .addAggregationResult(new CountAggregationResult().setCount(12)) + .setTag(1))); + + Grouping rhs = new Grouping() + .setRoot(new Group().setTag(0) + .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4)) + .addAggregationResult(new CountAggregationResult().setCount(0)) + .setTag(1)) + .addChild(new Group().setId(new FloatBucketResultNode(0, 0)) + .addAggregationResult(new CountAggregationResult().setCount(15)) + .setTag(1))); + + Group expected = new Group().setTag(0) + .addChild(new Group().setId(new FloatBucketResultNode(FloatResultNode.getNegativeInfinity().getFloat(), 0.4)) + .addAggregationResult(new CountAggregationResult().setCount(1)) + .setTag(1)) + .addChild(new Group().setId(new FloatBucketResultNode(0, 0)) + .addAggregationResult(new CountAggregationResult().setCount(27)) + .setTag(1)); + assertMerge(lhs, rhs, expected); + } + + // Merge two trees that are ordered by an expression, and verify that the resulting order after merge is correct. + public void testMergeExpressions() { + Grouping a = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(new GroupingLevel().setMaxGroups(1)) + .setRoot(new Group() + .addChild(new Group().setId(new StringResultNode("aa")) + .addAggregationResult(new MaxAggregationResult().setMax(new IntegerResultNode(9))) + .addAggregationResult(new CountAggregationResult().setCount(2)) + .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0)) + .addArg(new AggregationRefNode(1)), true))); + Grouping b = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(new GroupingLevel().setMaxGroups(1)) + .setRoot(new Group() + .addChild(new Group().setId(new StringResultNode("ab")) + .addAggregationResult(new MaxAggregationResult().setMax( + new IntegerResultNode(12))) + .addAggregationResult(new CountAggregationResult().setCount(1)) + .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0)) + .addArg(new AggregationRefNode(1)), true))); + + Grouping expected = new Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(new GroupingLevel().setMaxGroups(1)) + .setRoot(new Group() + .addChild(new Group().setId(new StringResultNode("ab")) + .addAggregationResult(new MaxAggregationResult().setMax( + new IntegerResultNode(12))) + .addAggregationResult(new CountAggregationResult().setCount(1)) + .addOrderBy(new MultiplyFunctionNode().addArg(new AggregationRefNode(0)) + .addArg(new AggregationRefNode(1)), true))); + expected.postMerge(); + + a.merge(b); + a.postMerge(); + assertEquals(expected.toString(), a.toString()); + } + + // Merge two relatively complex tree structures and verify that the end result is as expected. + public void testMergeTrees() { + Grouping request = new Grouping() + .addLevel(new GroupingLevel() + .setMaxGroups(3) + .setExpression(new AttributeNode("c1")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s1"))))) + .addLevel(new GroupingLevel() + .setMaxGroups(2) + .setExpression(new AttributeNode("c2")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s2"))))) + .addLevel(new GroupingLevel() + .setMaxGroups(1) + .setExpression(new AttributeNode("c3")) + .setGroupPrototype(new Group().addAggregationResult( + new SumAggregationResult().setExpression(new AttributeNode("s3"))))); + + Group lhs = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(5) // merged with 200 rank node + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(500) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))) + .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + // dummy child would be picked up here + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))) + .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))))); + + Group rhs = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group().setId(new IntegerResultNode(4)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(400) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))) + .addChild(new Group().setId(new IntegerResultNode(9)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + // dummy child would be picket up here + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))))) + .addChild(new Group().setId(new IntegerResultNode(14)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(5) // merged with 300 rank node + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .setRank(5) // merged with 100 rank node + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group().setId(new IntegerResultNode(19)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .setRank(500) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3"))))) + .addChild(new Group().setId(new IntegerResultNode(24)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(25)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group().setId(new IntegerResultNode(24)).setRank(10)) + .addChild(new Group() + .setId(new IntegerResultNode(25)) + .setRank(400) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))); + + Group expect = new Group() + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s0"))) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(500) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(5)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3"))))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(400) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))) + .addChild(new Group() + .setId(new IntegerResultNode(10)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(200) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))) + .addChild(new Group() + .setId(new IntegerResultNode(15)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s1"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .setRank(100) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(200)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(20)) + .setRank(500) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3"))))) + .addChild(new Group() + .setId(new IntegerResultNode(25)) + .setRank(300) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s2"))) + .addChild(new Group() + .setId(new IntegerResultNode(25)) + .setRank(400) + .addAggregationResult(new SumAggregationResult() + .setSum(new IntegerResultNode(100)) + .setExpression(new AttributeNode("s3")))))); + + assertMerge(request, lhs, rhs, expect); + assertMerge(request, rhs, lhs, expect); + } + + private static void assertMerge(Grouping request, Group lhs, Group rhs, Group expect) { + assertMerge(Arrays.asList(request.clone().setRoot(lhs.clone()), + request.clone().setRoot(rhs.clone())), + expect); + } + + private static void assertMerge(Grouping request, Group a, Group b, Group c, Group expect) { + assertMerge(Arrays.asList(request.clone().setRoot(a.clone()), + request.clone().setRoot(b.clone()), + request.clone().setRoot(c.clone())), + expect); + } + + private static void assertMerge(Grouping lhs, Grouping rhs, Group expect) { + assertMerge(Arrays.asList(lhs, rhs), expect); + } + + private static void assertMerge(List groupingList, Group expect) { + Grouping tmp = groupingList.get(0).clone(); + for (int i = 1; i < groupingList.size(); ++i) { + tmp.merge(groupingList.get(i)); + } + tmp.postMerge(); + assertEquals(expect.toString(), tmp.getRoot().toString()); + assertEquals(expect, tmp.getRoot()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java new file mode 100644 index 00000000000..307214d8c1c --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/BiasEstimatorTest.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class BiasEstimatorTest { + + @Test + public void requireThatExactValueIsReturnedIfAvailable() { + BiasEstimator biasEstimator = new BiasEstimator(10); + // Index 0 in biasData/rawEstimateData + assertEstimateEquals(737.1256, 738.1256, biasEstimator); + // Index 10 in biasData/rawEstimateData + assertEstimateEquals(612.1992, 868.1992, biasEstimator); + // Index 199 (last) in biasData/rawEstimateData + assertEstimateEquals(-9.81720000000041, 5084.1828, biasEstimator); + } + + @Test + public void requireThatBiasEstimatorHandlesAllValidPrecisions() { + // Index 0 values for biasData/rawEstimateData for each precision + double[][] testValuesForPrecision = new double[][] { + {11, 10}, + {23, 22}, + {46, 45}, + {92, 91}, + {184.2152, 183.2152}, + {369, 368}, + {738.1256, 737.1256}, + {1477, 1476}, + {2954, 2953}, + {5908.5052, 5907.5052}, + {11817.475, 11816.475}, + {23635.0036, 23634.0036}, + {47271, 47270}, + {94542, 94541}, + {189084, 189083} + }; + for (int p = 4; p <= 18; p++) { + assertEstimateEquals(testValuesForPrecision[p - 4][1], testValuesForPrecision[p - 4][0], new BiasEstimator(p)); + } + } + + @Test + public void requireThatEdgeCasesAreCorrect() { + BiasEstimator estimator = new BiasEstimator(10); + // Test with a raw estimate less than first element of rawEstimateData + assertEstimateEquals(737.1256, 7, estimator); + // Test with a raw estimate larger than last element of rawEstimateData + assertEstimateEquals(-9.81720000000041, 9001, estimator); + } + + @Test + public void requireThatLinearInterpolationIsCorrect() { + BiasEstimator estimator = new BiasEstimator(10); + double rawEstimate = (738.1256 + 750.4234) / 2; // average of two first elements + double expectedBias = (737.1256 + 724.4234) / 2; + assertEstimateEquals(expectedBias, rawEstimate, estimator); + + rawEstimate = 3 * 854.7864 / 4 + 868.1992 / 4; // weighted average of element 10 and 11 + expectedBias = 3 * 623.7864 / 4 + 612.1992 / 4; + assertEstimateEquals(expectedBias, rawEstimate, estimator); + } + + private static void assertEstimateEquals(double expected, double rawEstimate, BiasEstimator biasEstimator) { + assertEquals(expected, biasEstimator.estimateBias(rawEstimate), 0.00000001); + } +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java new file mode 100644 index 00000000000..1ba4a71d102 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogEstimatorTest.java @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import net.jpountz.xxhash.XXHash32; +import net.jpountz.xxhash.XXHashFactory; +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class HyperLogLogEstimatorTest { + + private XXHash32 hashGenerator = XXHashFactory.safeInstance().hash32(); + + @Test + public void requireThatEstimateInRangeForSmallValueSetUsingNormalSketch() { + testEstimateUsingNormalSketch(15, 1337); + } + + @Test + public void requireThatEstimateInRangeForLargeValueSetUsingNormalSketch() { + testEstimateUsingNormalSketch(1_000_000, 1337); + } + + @Test + public void requireThatEstimateIsReasonableForFullNormalSketch() { + HyperLogLogEstimator estimator = new HyperLogLogEstimator(10); + NormalSketch sketch = new NormalSketch(10); + // Fill sketch with 23 - highest possible zero prefix for precision 10. + Arrays.fill(sketch.data(), (byte) 23); + long estimate = estimator.estimateCount(sketch); + assertTrue(estimate > 6_000_000_000l); + } + + @Test + public void requireThatEstimateIsCorrectForSparseSketch() { + SparseSketch sketch = new SparseSketch(); + HyperLogLogEstimator estimator = new HyperLogLogEstimator(10); + long estimate = estimator.estimateCount(sketch); + assertEquals(0, estimate); + + // Check that estimate is correct for every possible sketch size up to threshold + for (int i = 1; i <= HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD; i++) { + sketch.aggregate(i); + estimate = estimator.estimateCount(sketch); + assertEquals(i, estimate); + } + } + + private void testEstimateUsingNormalSketch(int nValues, int seed) { + for (int precision = 4; precision <= 16; precision++) { + HyperLogLogEstimator estimator = new HyperLogLogEstimator(precision); + + long uniqueCount = new Random(seed) + .ints(nValues) + .map(this::makeHash) + .distinct() + .count(); + + Iterable hashValues = () -> + new Random(seed) + .ints(nValues) + .map(this::makeHash) + .iterator(); + + NormalSketch sketch = new NormalSketch(precision); + sketch.aggregate(hashValues); + long estimate = estimator.estimateCount(sketch); + double standardError = standardErrorForPrecision(precision); + assertTrue(estimate > uniqueCount * (1 - standardError) * 0.9); + assertTrue(estimate < uniqueCount * (1 + standardError) * 1.1); + } + } + + private static double standardErrorForPrecision(int precision) { + return 1.04 / Math.sqrt(1 << precision); // HLL standard error + } + + + private int makeHash(int value) { + final int seed = 42424242; + byte[] bytes = ByteBuffer.allocate(4).putInt(value).array(); + return hashGenerator.hash(bytes, 0, 4, seed); + } +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java new file mode 100644 index 00000000000..5dba5e48578 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/HyperLogLogPrecisionBenchmark.java @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import net.jpountz.xxhash.XXHash32; +import net.jpountz.xxhash.XXHashFactory; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * This benchmarks performs a series of unique counting tests to analyse the HyperLogLog accuracy. + */ +public class HyperLogLogPrecisionBenchmark { + + private static final int MAX_VAL = 256_000; + private static final int MAX_ITERATION = 1000; + + private static final XXHash32 hashGenerator = XXHashFactory.safeInstance().hash32(); + private static final HyperLogLogEstimator estimator = new HyperLogLogEstimator(); + private static final Random random = new Random(424242); + + + public static void main(String[] args) { + System.out.println("Unique count; Average estimated unique count; Normalized standard error; Standard error; Min; Max"); + for (int val = 1; val <= MAX_VAL; val *= 2) { + List samples = new ArrayList<>(); + long sumEstimates = 0; + for (int iteration = 0; iteration < MAX_ITERATION; iteration++) { + long sample = estimateUniqueCount(val); + samples.add(sample); + sumEstimates += sample; + } + double average = sumEstimates / (double) MAX_ITERATION; + long min = samples.stream().min(Long::compare).get(); + long max = samples.stream().max(Long::compare).get(); + double standardDeviation = getStandardDeviation(samples, average); + System.out.printf("%d; %.2f; %.4f; %.4f; %d; %d\n", val, average, standardDeviation / average, standardDeviation, min, max); + } + } + + private static double getStandardDeviation(List samples, double average) { + double sumSquared = 0; + for (long sample : samples) { + sumSquared += Math.pow(sample - average, 2); + } + return Math.sqrt(sumSquared / samples.size()); + } + + private static long estimateUniqueCount(int nValues) { + SparseSketch sparse = new SparseSketch(); + while (sparse.size() < nValues) { + sparse.aggregate(makeHash(random.nextInt())); + } + if (sparse.size() > HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD) { + NormalSketch normal = new NormalSketch(); + normal.aggregate(sparse.data()); + return estimator.estimateCount(normal); + } else { + return estimator.estimateCount(sparse); + } + } + + private static int makeHash(int value) { + final int seed = 1333337; + byte[] bytes = ByteBuffer.allocate(4).putInt(value).array(); + return hashGenerator.hash(bytes, 0, 4, seed); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java new file mode 100644 index 00000000000..3b0a584f37b --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/NormalSketchTest.java @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + + +public class NormalSketchTest { + + @Test + public void requireThatSerializationIsCorrectForCompressibleData() { + testSerializationForPrecision(16); + } + + @Test + public void requireThatSerializationIsCorrectForIncompressibleData() { + // A sketch of precision 1 contains only two elements and will therefore not be compressible. + testSerializationForPrecision(1); + } + + private static void testSerializationForPrecision(int precision) { + NormalSketch from = new NormalSketch(precision); // precision p => 2^p bytes + for (int i = 0; i < from.size(); i++) { + from.data()[i] = (byte) i; + } + NormalSketch to = new NormalSketch(precision); + + BufferSerializer buffer = new BufferSerializer(); + from.serialize(buffer); + buffer.flip(); + to.deserialize(buffer); + + assertEquals(from, to); + } + + @Test + public void requireThatMergeDoesElementWiseMax() { + NormalSketch s1 = new NormalSketch(2); + setSketchValues(s1, 0, 1, 1, 3); + NormalSketch s2 = new NormalSketch(2); + setSketchValues(s2, 2, 1, 1, 0); + s1.merge(s2); + + assertBucketEquals(s1, 0, 2); + assertBucketEquals(s1, 1, 1); + assertBucketEquals(s1, 2, 1); + assertBucketEquals(s1, 3, 3); + } + + @Test(expected = IllegalArgumentException.class) + public void requireThatMergingFailsForSketchesOfDifferentSize() { + NormalSketch s1 = new NormalSketch(2); + NormalSketch s2 = new NormalSketch(3); + s1.merge(s2); + } + + @Test + public void requireThatEqualsIsCorrect() { + NormalSketch s1 = new NormalSketch(1); + setSketchValues(s1, 42, 127); + NormalSketch s2 = new NormalSketch(1); + setSketchValues(s2, 42, 127); + assertEquals(s1, s2); + } + + @Test + public void requireThatSketchBucketsAreCorrectForSingleValues() { + + testSingleValueAggregation(0, 0, 23); + testSingleValueAggregation(1, 1, 23); + testSingleValueAggregation(-1, 1023, 1); + testSingleValueAggregation(Integer.MAX_VALUE, 1023, 2); + testSingleValueAggregation(Integer.MIN_VALUE, 0, 1); + testSingleValueAggregation(42, 42, 23); + testSingleValueAggregation(0b00000011_00000000_00000000_11000011, 0b11000011, 7); + } + + private static void testSingleValueAggregation(int hashValue, int bucketIndex, int expectedValue) { + NormalSketch sketch = new NormalSketch(10); + sketch.aggregate(hashValue); + assertBucketEquals(sketch, bucketIndex, expectedValue); + for (int i = 0; i < sketch.size(); i++) { + if (i == bucketIndex) { + continue; + } + assertBucketEquals(sketch, i, 0); + } + } + + @Test + public void requireThatSketchBucketsAreCorrectForMultipleValues() { + NormalSketch sketch = new NormalSketch(10); + + // Aggregate multiple values + sketch.aggregate(Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); + for (int i = 0; i < 10; i++) { + assertBucketEquals(sketch, i, 23); + } + // Check that the other values are zero. + for (int i = 10; i < 1024; i++) { + assertBucketEquals(sketch, i, 0); + } + } + + private static void assertBucketEquals(NormalSketch sketch, int index, int expectedValue) { + assertEquals(expectedValue, sketch.data()[index]); + } + + private static void setSketchValues(NormalSketch sketch, Integer... values) { + for (int i = 0; i < values.length; i++) { + sketch.data()[i] = values[i].byteValue(); + } + } + +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java new file mode 100644 index 00000000000..07488d21fd3 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchMergerTest.java @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class SketchMergerTest { + + private final SketchMerger merger = new SketchMerger(); + + @Test + public void requireThatMergingTwoSmallSparseSketchesReturnsSparseSketch() { + SparseSketch s1 = SketchUtils.createSparseSketch(1); + SparseSketch s2 = SketchUtils.createSparseSketch(2); + + Sketch result = merger.merge(s1, s2); + assertEquals(result.getClass(), SparseSketch.class); + assertTrue("Should return the instance given by first argument.", result == s1); + SketchUtils.assertSketchContains(result, 1, 2); + } + + @Test + public void requireThatMergingTwoThresholdSizeSparseSketchesReturnsNormalSketch() { + SparseSketch s1 = SketchUtils.createSparseSketch(); + SparseSketch s2 = SketchUtils.createSparseSketch(); + + // Fill sketches with disjoint data. + for (int i = 0; i < HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD; i++) { + s1.aggregate(i); + s2.aggregate(i + HyperLogLog.SPARSE_SKETCH_CONVERSION_THRESHOLD); + } + + Sketch result = merger.merge(s1, s2); + assertEquals(result.getClass(), NormalSketch.class); + + List unionOfSketchData = new ArrayList<>(); + unionOfSketchData.addAll(s1.data()); + unionOfSketchData.addAll(s2.data()); + Integer[] expectedValues = unionOfSketchData.toArray(new Integer[unionOfSketchData.size()]); + SketchUtils.assertSketchContains(result, expectedValues); + } + + @Test + public void requireThatMergingTwoNormalSketchesReturnsNormalSketch() { + NormalSketch s1 = SketchUtils.createNormalSketch(1); + NormalSketch s2 = SketchUtils.createNormalSketch(2); + + Sketch result = merger.merge(s1, s2); + assertEquals(result.getClass(), NormalSketch.class); + assertTrue("Should return the instance given by first argument.", result == s1); + SketchUtils.assertSketchContains(result, 1, 2); + } + + @Test + public void requireThatMergingNormalAndSparseSketchReturnsNormalSketch() { + SparseSketch s1 = SketchUtils.createSparseSketch(1); + NormalSketch s2 = SketchUtils.createNormalSketch(2); + + Sketch result = merger.merge(s1, s2); + assertEquals(result.getClass(), NormalSketch.class); + assertTrue("Should return the NormalSketch instance given by the arguments.", result == s2); + SketchUtils.assertSketchContains(result, 1, 2); + } +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java new file mode 100644 index 00000000000..90098f8c950 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SketchUtils.java @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; + +/** + * Utility class for creating sketches and comparing their content. + * + * @author bjorncs + */ +public class SketchUtils { + + private SketchUtils() {} + + public static SparseSketch createSparseSketch(Integer... values) { + SparseSketch sketch = new SparseSketch(); + sketch.aggregate(Arrays.asList(values)); + return sketch; + } + + public static NormalSketch createNormalSketch(Integer... values) { + NormalSketch sketch = new NormalSketch(); + sketch.aggregate(Arrays.asList(values)); + return sketch; + } + + public static void assertSketchContains(Sketch sketch, Integer... values) { + if (sketch instanceof SparseSketch) { + assertSparseSketchContains((SparseSketch) sketch, values); + } else { + assertNormalSketchContains((NormalSketch) sketch, values); + } + } + + public static void assertNormalSketchContains(NormalSketch sketch, Integer... values) { + NormalSketch expectedSketch = createNormalSketch(values); + assertEquals(expectedSketch, sketch); + } + + public static void assertSparseSketchContains(SparseSketch sketch, Integer... values) { + SparseSketch expectedSketch = createSparseSketch(values); + assertEquals(expectedSketch, sketch); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java new file mode 100644 index 00000000000..4be0f89514d --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/aggregation/hll/SparseSketchTest.java @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.aggregation.hll; + +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import java.util.HashSet; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class SparseSketchTest { + + @Test + public void requireThatMergeDoesSetUnion() { + SparseSketch s1 = new SparseSketch(); + s1.aggregate(42); + s1.aggregate(9001); + + SparseSketch s2 = new SparseSketch(); + s2.aggregate(1337); + s2.aggregate(9001); + + s1.merge(s2); + + HashSet data = s1.data(); + assertEquals(3, s1.size()); + assertTrue(data.contains(42)); + assertTrue(data.contains(1337)); + assertTrue(data.contains(9001)); + } + + + @Test + public void requireThatSerializationRetainAllData() { + SparseSketch from = new SparseSketch(); + from.aggregate(42); + from.aggregate(1337); + + SparseSketch to = new SparseSketch(); + + BufferSerializer buffer = new BufferSerializer(); + from.serialize(buffer); + buffer.flip(); + to.deserialize(buffer); + + assertEquals(from, to); + } + + @Test + public void requireThatEqualsComparesDataContent() { + SparseSketch s1 = new SparseSketch(); + s1.aggregate(1337); + s1.aggregate(42); + + SparseSketch s2 = new SparseSketch(); + s2.aggregate(42); + s2.aggregate(1337); + + assertEquals(s1.data(), s2.data()); + } +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java new file mode 100755 index 00000000000..2c5e65c03e4 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ExpressionTestCase.java @@ -0,0 +1,932 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.io.GrowableByteBuffer; +import com.yahoo.text.Utf8; +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.Identifiable; +import junit.framework.TestCase; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * @author Henning Baldersheim + */ +public class ExpressionTestCase extends TestCase { + + public void testRangeBucketPreDefFunctionNode() { + assertMultiArgFunctionNode(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo"))); + assertEquals(new RangeBucketPreDefFunctionNode(), new RangeBucketPreDefFunctionNode()); + assertEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")), + new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo"))); + assertNotEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")), + new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "21")), new AttributeNode("foo"))); + assertNotEquals(new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("foo")), + new RangeBucketPreDefFunctionNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), new AttributeNode("bar"))); + } + + public void testFixedWidthBucketFunctionNode() { + assertMultiArgFunctionNode(new FixedWidthBucketFunctionNode()); + assertEquals(new FixedWidthBucketFunctionNode(), new FixedWidthBucketFunctionNode()); + assertEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")), + new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo"))); + assertNotEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")), + new FixedWidthBucketFunctionNode(new IntegerResultNode(6), new AttributeNode("foo"))); + assertNotEquals(new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo")), + new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("bar"))); + } + + public void testIntegerBucketResultNodeVector() { + assertResultNode(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20))); + assertEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)), + new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20))); + assertNotEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)), + new IntegerBucketResultNodeVector()); + assertNotEquals(new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(10, 20)), + new IntegerBucketResultNodeVector().add(new IntegerBucketResultNode(11, 20))); + } + + public void testFloatBucketResultNodeVector() { + assertResultNode(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20))); + assertEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)), + new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20))); + assertNotEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)), + new FloatBucketResultNodeVector()); + assertNotEquals(new FloatBucketResultNodeVector().add(new FloatBucketResultNode(10, 20)), + new FloatBucketResultNodeVector().add(new FloatBucketResultNode(11, 20))); + } + + public void testStringBucketResultNodeVector() { + assertResultNode(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20"))); + assertEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), + new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20"))); + assertNotEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), + new StringBucketResultNodeVector()); + assertNotEquals(new StringBucketResultNodeVector().add(new StringBucketResultNode("10", "20")), + new StringBucketResultNodeVector().add(new StringBucketResultNode("11", "20"))); + } + + public void testIntegerBucketResultNode() { + assertResultNode(new IntegerBucketResultNode(10, 20)); + assertEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(10, 20)); + assertNotEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(11, 20)); + assertNotEquals(new IntegerBucketResultNode(10, 20), new IntegerBucketResultNode(10, 21)); + } + + public void testFloatBucketResultNode() { + assertResultNode(new FloatBucketResultNode(10.0, 20.0)); + assertEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(10.0, 20.0)); + assertNotEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(11.0, 20.0)); + assertNotEquals(new FloatBucketResultNode(10.0, 20.0), new FloatBucketResultNode(10.0, 21.0)); + } + + public void testStringBucketResultNode() { + assertResultNode(new StringBucketResultNode("10.0", "20.0")); + assertEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "20.0")); + assertNotEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("11.0", "20.0")); + assertNotEquals(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0")); + compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0"), new StringBucketResultNode("10.0", "22.0")); + compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0")); + compare(new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("20.0")), + new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0")); + compare(new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("20.0")), + new StringBucketResultNode(StringResultNode.getNegativeInfinity(), new StringResultNode("21.0")), + new StringBucketResultNode("11.0", "20.0")); + compare(new StringBucketResultNode("10.0", "20.0"), new StringBucketResultNode("10.0", "21.0"), + new StringBucketResultNode(new StringResultNode("10.0"), StringResultNode.getPositiveInfinity())); + compare(new StringBucketResultNode(new StringResultNode("10.0"), StringResultNode.getPositiveInfinity()), + new StringBucketResultNode("11.0", "19.0"), new StringBucketResultNode("11.0", "20.0")); + } + + public void testPositiveInfinity() { + PositiveInfinityResultNode inf = new PositiveInfinityResultNode(); + PositiveInfinityResultNode inf2 = new PositiveInfinityResultNode(); + assertResultNode(inf); + assertEquals(inf, inf2); + } + + public void testAddFunctionNode() { + assertMultiArgFunctionNode(new AddFunctionNode()); + assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(3))), + 5, 5.0, "5", longAsRaw(5)); + assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.0))) + .addArg(new ConstantNode(new IntegerResultNode(2))), + 5, 5.0, "5.0", doubleAsRaw(5.0)); + assertFunctionNode(new AddFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode(new FloatResultNode(2.0))), + 5, 5.0, "5.0", doubleAsRaw(5.0)); + } + + public void testAndFunctionNode() { + assertMultiArgFunctionNode(new AndFunctionNode()); + assertFunctionNode(new AndFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode(new IntegerResultNode(7))), + 3, 3.0, "3", longAsRaw(3)); + } + + public void testZCurveFunctionNode() { + assertMultiArgFunctionNode( + new ZCurveFunctionNode(new ConstantNode(new IntegerResultNode(7)), ZCurveFunctionNode.Dimension.Y)); + } + + public void testTimeStampFunctionNode() { + assertMultiArgFunctionNode(new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true)); + assertEquals(new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true), + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, true)); + assertNotEquals( + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, + true), + new TimeStampFunctionNode(new AttributeNode("testattributt"), TimeStampFunctionNode.TimePart.Hour, + true)); + assertNotEquals( + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, + true), + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Year, + true)); + assertNotEquals( + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, + true), + new TimeStampFunctionNode(new AttributeNode("testattribute"), TimeStampFunctionNode.TimePart.Hour, + false)); + } + + public void testExpressionRefNode() { + AggregationRefNode ref = new AggregationRefNode(3); + assertEquals(3, ref.getIndex()); + } + + public void testAttributeNode() { + try { + new AttributeNode(null); + fail("Should not be able to set null attribute name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new AttributeNode().setAttributeName(null); + fail("Should not be able to set null attribute name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new AttributeNode().prepare(); + fail("Should not be possible to prepare or execute attribute node"); + } catch (RuntimeException e) { + // expected + } + try { + new AttributeNode().execute(); + fail("Should not be possible to prepare or execute attribute node"); + } catch (RuntimeException e) { + // expected + } + AttributeNode a = new AttributeNode("testattribute"); + assertEquals("testattribute", a.getAttributeName()); + AttributeNode b = (AttributeNode)assertSerialize(a); + assertEquals("testattribute", b.getAttributeName()); + AttributeNode c = new AttributeNode("testattribute"); + assertEquals(b, c); + c.setAttributeName("fail"); + assertFalse(b.equals(c)); + } + + public void testInterpolatedLookupNode() { + ExpressionNode argA = new ConstantNode(new FloatResultNode(2.71828182846)); + ExpressionNode argB = new ConstantNode(new FloatResultNode(3.14159265359)); + try { + new InterpolatedLookupNode(null, argA); + fail("Should not be able to set null attribute name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new InterpolatedLookupNode().setAttributeName(null); + fail("Should not be able to set null attribute name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new InterpolatedLookupNode().prepare(); + fail("Should not be possible to prepare or execute interpolatedlookup node"); + } catch (RuntimeException e) { + // expected + } + try { + new InterpolatedLookupNode().execute(); + fail("Should not be possible to prepare or execute interpolatedlookup node"); + } catch (RuntimeException e) { + // expected + } + ExpressionNode a1 = new InterpolatedLookupNode().setAttributeName("foo").addArg(argA); + InterpolatedLookupNode a2 = new InterpolatedLookupNode("foo", argA); + assertEquals("foo", ((InterpolatedLookupNode)a1).getAttributeName()); + assertEquals("foo", a2.getAttributeName()); + assertEquals(argA, ((InterpolatedLookupNode)a1).getArg()); + assertEquals(argA, a2.getArg()); + assertEquals(a1, a2); + InterpolatedLookupNode b1 = new InterpolatedLookupNode("foo", argB); + InterpolatedLookupNode b2 = new InterpolatedLookupNode("bar", argA); + assertFalse(a1.equals(b1)); + assertFalse(a1.equals(b2)); + assertFalse(a2.equals(b1)); + assertFalse(a2.equals(b2)); + a2.setAttributeName("fail"); + assertFalse(a1.equals(a2)); + } + + public void testCatFunctionNode() { + assertMultiArgFunctionNode(new CatFunctionNode()); + assertFunctionNode(new CatFunctionNode().addArg(new ConstantNode(new RawResultNode(asRaw('1', '2')))) + .addArg(new ConstantNode(new RawResultNode(asRaw('3', '4')))), + 0, 0.0, "1234", asRaw('1', '2', '3', '4')); + } + + public void testStrCatFunctionNode() { + assertMultiArgFunctionNode(new StrCatFunctionNode()); + assertFunctionNode(new StrCatFunctionNode().addArg(new ConstantNode(new StringResultNode("foo"))) + .addArg(new ConstantNode(new StringResultNode("bar"))), + 0, 0.0, "foobar", stringAsRaw("foobar")); + } + + public void testDivideFunctionNode() { + assertMultiArgFunctionNode(new DivideFunctionNode()); + assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(10))) + .addArg(new ConstantNode(new IntegerResultNode(2))), + 5, 5.0, "5", longAsRaw(5)); + assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(6))) + .addArg(new ConstantNode(new FloatResultNode(2.0))), + 3, 3.0, "3.0", doubleAsRaw(3.0)); + assertFunctionNode(new DivideFunctionNode().addArg(new ConstantNode(new IntegerResultNode(6))) + .addArg(new ConstantNode(new FloatResultNode(12.0))), + 1, 0.5, "0.5", doubleAsRaw(0.5)); + } + + public void testDocumentFieldNode() { + try { + new DocumentFieldNode(null); + fail("Should not be able to set null field name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new DocumentFieldNode().setDocumentFieldName(null); + fail("Should not be able to set null field name."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new DocumentFieldNode("foo").prepare(); + fail("Should not be able to prepare documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + try { + new DocumentFieldNode("foo").execute(); + fail("Should not be able to execute documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + DocumentFieldNode a = new DocumentFieldNode("testdocumentfield"); + assertEquals("testdocumentfield", a.getDocumentFieldName()); + DocumentFieldNode b = (DocumentFieldNode)assertSerialize(a); + assertEquals("testdocumentfield", b.getDocumentFieldName()); + DocumentFieldNode c = new DocumentFieldNode("testdocumentfield"); + assertEquals(b, c); + c.setDocumentFieldName("fail"); + assertFalse(b.equals(c)); + } + + public void testFloatResultNode() { + FloatResultNode a = new FloatResultNode(7.3); + assertEquals(a.getInteger(), 7); + assertEquals(a.getFloat(), 7.3); + assertEquals(a.getString(), "7.3"); + assertEquals(a.getNumber(), new Double(7.3)); + byte[] raw = a.getRaw(); + assertEquals(raw.length, 8); + assertResultNode(a); + compare(new FloatResultNode(-1), new FloatResultNode(0), new FloatResultNode(1)); + a.set(new FloatResultNode(4)); + assertResultNode(a); + + FloatResultNode b = new FloatResultNode(7.5); + assertEquals(b.getInteger(), 8); + assertEquals(b.getFloat(), 7.5); + assertEquals(b.getString(), "7.5"); + assertEquals(b.getNumber(), new Double(7.5)); + } + + public void testGetDocIdNamespaceSpecificFunctionNode() { + GetDocIdNamespaceSpecificFunctionNode a = new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)); + assertTrue(a.getResult() instanceof IntegerResultNode); + GetDocIdNamespaceSpecificFunctionNode b = (GetDocIdNamespaceSpecificFunctionNode)assertSerialize(a); + assertTrue(b.getResult() instanceof IntegerResultNode); + assertEquals(7, b.getResult().getInteger()); + GetDocIdNamespaceSpecificFunctionNode c = new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)); + assertEquals(b, c); + try { + new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)).prepare(); + fail("Should not be able to prepare documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + try { + new GetDocIdNamespaceSpecificFunctionNode(new IntegerResultNode(7)).execute(); + fail("Should not be able to execute documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + } + + public void testGetYMUMChecksumFunctionNode() { + GetYMUMChecksumFunctionNode a = new GetYMUMChecksumFunctionNode(); + assertTrue(a.getResult() instanceof IntegerResultNode); + assertSerialize(a); + try { + new GetYMUMChecksumFunctionNode().prepare(); + fail("Should not be able to prepare documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + try { + new GetYMUMChecksumFunctionNode().execute(); + fail("Should not be able to execute documentfieldnode"); + } catch (RuntimeException e) { + // expected + } + } + + public void testIntegerResultNode() { + IntegerResultNode a = new IntegerResultNode(7); + assertEquals(a.getInteger(), 7); + assertEquals(a.getFloat(), 7.0); + assertEquals(a.getString(), "7"); + assertEquals(a.getNumber(), new Long(7)); + byte[] raw = a.getRaw(); + assertEquals(raw.length, 8); + assertResultNode(a); + compare(new IntegerResultNode(-1), new IntegerResultNode(0), new IntegerResultNode(1)); + compare(new IntegerResultNode(-1), new IntegerResultNode(0), new IntegerResultNode(0x80000000L)); + } + + public void testMaxFunctionNode() { + assertMultiArgFunctionNode(new MaxFunctionNode()); + assertFunctionNode(new MaxFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 5, 5.0, "5", longAsRaw(5)); + assertFunctionNode(new MaxFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 5, 5.0, "5.0", doubleAsRaw(5.0)); + } + + public void testMD5BitFunctionNode() { + try { + new MD5BitFunctionNode(null, 64); + fail("Should not be able to set null argument."); + } catch (NullPointerException e) { + // expected + } + try { + new MD5BitFunctionNode().prepare(); + fail("Should not be able to run prepare."); + } catch (RuntimeException e) { + // expected + } + try { + new MD5BitFunctionNode().execute(); + fail("Should not be able to run execute."); + } catch (RuntimeException e) { + // expected + } + assertUnaryBitFunctionNode(new MD5BitFunctionNode()); + } + + public void testMinFunctionNode() { + assertMultiArgFunctionNode(new MinFunctionNode()); + assertFunctionNode(new MinFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 3, 3.0, "3", longAsRaw(3)); + assertFunctionNode(new MinFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 5, 4.9999999, "4.9999999", doubleAsRaw(4.9999999)); + } + + public void testModuloFunctionNode() { + assertMultiArgFunctionNode(new ModuloFunctionNode()); + assertFunctionNode(new ModuloFunctionNode().addArg(new ConstantNode(new IntegerResultNode(13))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 3, 3.0, "3", longAsRaw(3)); + assertFunctionNode(new ModuloFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.9999999))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 5, 4.9999999, "4.9999999", doubleAsRaw(4.9999999)); + } + + public void testMultiplyFunctionNode() { + assertMultiArgFunctionNode(new MultiplyFunctionNode()); + assertFunctionNode(new MultiplyFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 15, 15.0, "15", longAsRaw(15)); + assertFunctionNode(new MultiplyFunctionNode().addArg(new ConstantNode(new FloatResultNode(4.5))) + .addArg(new ConstantNode(new IntegerResultNode(5))), + 23, 22.5, "22.5", doubleAsRaw(22.5)); + } + + public void testNegateFunctionNode() { + assertMultiArgFunctionNode(new NegateFunctionNode()); + assertFunctionNode(new NegateFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))), + -3, -3.0, "-3", longAsRaw(-3)); + assertFunctionNode(new NegateFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.0))), + -3, -3.0, "-3.0", doubleAsRaw(-3.0)); + } + + public void testSortFunctionNode() { + assertMultiArgFunctionNode(new SortFunctionNode()); + + } + + public void testReverseFunctionNode() { + assertMultiArgFunctionNode(new ReverseFunctionNode()); + } + + public void testToIntFunctionNode() { + assertMultiArgFunctionNode(new ToIntFunctionNode()); + assertFunctionNode(new ToIntFunctionNode().addArg(new ConstantNode(new StringResultNode("1337"))), + 1337, 1337.0, "1337", longAsRaw(1337)); + } + + public void testToFloatFunctionNode() { + assertMultiArgFunctionNode(new ToFloatFunctionNode()); + assertFunctionNode(new ToFloatFunctionNode().addArg(new ConstantNode(new FloatResultNode(3.14))), + 3, 3.14, "3.14", doubleAsRaw(3.14)); + } + + public void testMathFunctionNode() { + assertMultiArgFunctionNode(new MathFunctionNode(MathFunctionNode.Function.LOG10)); + assertFunctionNode(new MathFunctionNode(MathFunctionNode.Function.LOG10).addArg(new ConstantNode(new IntegerResultNode(100000))), + 5, 5.0, "5.0", doubleAsRaw(5.0)); + } + + public void testStrLenFunctionNode() { + assertMultiArgFunctionNode(new StrLenFunctionNode()); + assertFunctionNode(new StrLenFunctionNode().addArg(new ConstantNode(new StringResultNode("foo"))), + 3, 3.0, "3", longAsRaw(3)); + } + + public void testNormalizeSubjectFunctionNode() { + assertMultiArgFunctionNode(new NormalizeSubjectFunctionNode()); + assertFunctionNode(new NormalizeSubjectFunctionNode().addArg(new ConstantNode(new StringResultNode("Re: Your mail"))), + 0, 0, "Your mail", stringAsRaw("Your mail")); + } + + public void testNormalizeSubjectFunctionNode2() { + assertMultiArgFunctionNode(new NormalizeSubjectFunctionNode()); + assertFunctionNode(new NormalizeSubjectFunctionNode().addArg(new ConstantNode(new StringResultNode("Your mail"))), + 0, 0, "Your mail", stringAsRaw("Your mail")); + } + + public void testNumElemFunctionNode() { + assertMultiArgFunctionNode(new NumElemFunctionNode()); + assertFunctionNode(new NumElemFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))), + 1, 1.0, "1", longAsRaw(1)); + } + + public void testToStringFunctionNode() { + assertMultiArgFunctionNode(new ToStringFunctionNode()); + assertFunctionNode(new ToStringFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))), + 1337, 1337.0, "1337", stringAsRaw("1337")); + } + + public void testToRawFunctionNode() { + assertMultiArgFunctionNode(new ToRawFunctionNode()); + assertFunctionNode(new ToRawFunctionNode().addArg(new ConstantNode(new IntegerResultNode(1337))), + 1337, 1337.0, "1337", longAsRaw(1337)); + } + + public void testNullResultNode() { + // TODO: Implement. + } + + public void testOrFunctionNode() { + assertMultiArgFunctionNode(new OrFunctionNode()); + assertFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(4))), + 6, 6.0, "6", longAsRaw(6)); + } + + public void testDebugWaitFunctionNode() { + assertFunctionNode( + new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(4))), + 0.01, + true), + 6, 6.0, "6", longAsRaw(6)); + DebugWaitFunctionNode n = new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(4))), + 0.3, + false); + n.prepare(); + long start = System.currentTimeMillis(); + n.execute(); + long end = System.currentTimeMillis(); + assertTrue(end - start > 250); + + DebugWaitFunctionNode n2 = new DebugWaitFunctionNode(new OrFunctionNode().addArg(new ConstantNode(new IntegerResultNode(2))) + .addArg(new ConstantNode(new IntegerResultNode(4))), + 0.5, + true); + n2.prepare(); + start = System.currentTimeMillis(); + n2.execute(); + end = System.currentTimeMillis(); + assertTrue(end - start > 450); + } + + public void testRawResultNode() { + try { + new RawResultNode(null); + fail("Should not be able to set null value."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new RawResultNode().setValue(null); + fail("Should not be able to set null value."); + } catch (IllegalArgumentException e) { + // expected + } + byte[] b = { '7', '.', '4' }; + RawResultNode a = new RawResultNode(b); + byte[] raw = a.getRaw(); + assertEquals(raw.length, 3); + assertEquals(raw[0], '7'); + assertEquals(raw[1], '.'); + assertEquals(raw[2], '4'); + assertEquals(a.getInteger(), 0); + assertEquals(a.getFloat(), 0.0); + assertEquals(a.getString(), "7.4"); + assertResultNode(a); + compare(new RawResultNode(), new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z', 'z'})); + compare(new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z', 'z'}), new RawResultNode(new byte [] {'z','z','z'})); + compare(new RawResultNode(new byte [] {'z'}), new RawResultNode(new byte [] {'z','z'}), new PositiveInfinityResultNode()); + byte [] b1 = {0x00}; + byte [] b2 = {0x07}; + byte [] b3 = {0x7f}; + byte [] b4 = {(byte)0x80}; + byte [] b5 = {(byte)0xb1}; + byte [] b6 = {(byte)0xff}; + + assertEquals(0x00, b1[0]); + assertEquals(0x07, b2[0]); + assertEquals(0x7f, b3[0]); + assertEquals(0x80, ((int)b4[0]) & 0xff); + assertEquals(0xb1, ((int)b5[0]) & 0xff); + assertEquals(0xff, ((int)b6[0]) & 0xff); + + RawResultNode r1 = new RawResultNode(b1); + RawResultNode r2 = new RawResultNode(b2); + RawResultNode r3 = new RawResultNode(b3); + RawResultNode r4 = new RawResultNode(b4); + RawResultNode r5 = new RawResultNode(b5); + RawResultNode r6 = new RawResultNode(b6); + + assertTrue(r1.compareTo(r1) == 0); + assertTrue(r1.compareTo(r2) < 0); + assertTrue(r1.compareTo(r3) < 0); + assertTrue(r1.compareTo(r4) < 0); + assertTrue(r1.compareTo(r5) < 0); + assertTrue(r1.compareTo(r6) < 0); + + assertTrue(r2.compareTo(r1) > 0); + assertTrue(r2.compareTo(r2) == 0); + assertTrue(r2.compareTo(r3) < 0); + assertTrue(r2.compareTo(r4) < 0); + assertTrue(r2.compareTo(r5) < 0); + assertTrue(r2.compareTo(r6) < 0); + + assertTrue(r3.compareTo(r1) > 0); + assertTrue(r3.compareTo(r2) > 0); + assertTrue(r3.compareTo(r3) == 0); + assertTrue(r3.compareTo(r4) < 0); + assertTrue(r3.compareTo(r5) < 0); + assertTrue(r3.compareTo(r6) < 0); + + assertTrue(r4.compareTo(r1) > 0); + assertTrue(r4.compareTo(r2) > 0); + assertTrue(r4.compareTo(r3) > 0); + assertTrue(r4.compareTo(r4) == 0); + assertTrue(r4.compareTo(r5) < 0); + assertTrue(r4.compareTo(r6) < 0); + + assertTrue(r5.compareTo(r1) > 0); + assertTrue(r5.compareTo(r2) > 0); + assertTrue(r5.compareTo(r3) > 0); + assertTrue(r5.compareTo(r4) > 0); + assertTrue(r5.compareTo(r5) == 0); + assertTrue(r5.compareTo(r6) < 0); + + assertTrue(r6.compareTo(r1) > 0); + assertTrue(r6.compareTo(r2) > 0); + assertTrue(r6.compareTo(r3) > 0); + assertTrue(r6.compareTo(r4) > 0); + assertTrue(r6.compareTo(r5) > 0); + assertTrue(r6.compareTo(r6) == 0); + + } + + private void compare(ResultNode small, ResultNode medium, ResultNode large) { + assertTrue(small.compareTo(medium) < 0); + assertTrue(small.compareTo(large) < 0); + assertTrue(medium.compareTo(large) < 0); + assertTrue(medium.compareTo(small) > 0); + assertTrue(large.compareTo(small) > 0); + assertTrue(large.compareTo(medium) > 0); + assertEquals(0, small.compareTo(small)); + assertEquals(0, medium.compareTo(medium)); + assertEquals(0, large.compareTo(large)); + } + + public void testStringResultNode() { + try { + new StringResultNode(null); + fail("Should not be able to set null value."); + } catch (IllegalArgumentException e) { + // expected + } + try { + new StringResultNode().setValue(null); + fail("Should not be able to set null value."); + } catch (IllegalArgumentException e) { + // expected + } + StringResultNode a = new StringResultNode("7.3"); + assertEquals(a.getInteger(), 0); + assertEquals(a.getFloat(), 7.3); + assertEquals(a.getString(), "7.3"); + byte[] raw = a.getRaw(); + assertEquals(raw.length, 3); + assertResultNode(a); + compare(new StringResultNode(), new StringResultNode("z"), new StringResultNode("zz")); + compare(new StringResultNode("z"), new StringResultNode("zz"), new StringResultNode("zzz")); + compare(new StringResultNode("a"), new StringResultNode("zz"), new PositiveInfinityResultNode()); + } + + public void testXorBitFunctionNode() { + try { + new XorBitFunctionNode(null, 64); + fail("Should not be able to set null argument."); + } catch (NullPointerException e) { + // expected + } + try { + new XorBitFunctionNode().prepare(); + fail("Should not be able to run prepare."); + } catch (RuntimeException e) { + // expected + } + try { + new XorBitFunctionNode().execute(); + fail("Should not be able to run execute."); + } catch (RuntimeException e) { + // expected + } + assertUnaryBitFunctionNode(new XorBitFunctionNode()); + } + + public void testUcaFunctionNode() { + try { + new UcaFunctionNode(null, "foo"); + fail("Should not be able to set null argument."); + } catch (NullPointerException e) { + // expected + } + try { + new UcaFunctionNode().prepare(); + fail("Should not be able to run prepare."); + } catch (RuntimeException e) { + // expected + } + try { + new UcaFunctionNode().execute(); + fail("Should not be able to run execute."); + } catch (RuntimeException e) { + // expected + } + assertUcaFunctionNode(new UcaFunctionNode(new ConstantNode(new IntegerResultNode(1337)), "foo", "bar")); + } + + public void testNestedFunctions() { + assertFunctionNode(new AddFunctionNode() + .addArg(new MultiplyFunctionNode().addArg(new ConstantNode(new IntegerResultNode(3))) + .addArg(new ConstantNode( + new StringResultNode("4")))) + .addArg(new ConstantNode(new FloatResultNode(2.0))), + 14, 14.0, "14.0", doubleAsRaw(14.0)); + } + + public void testArithmeticNodes() { + ExpressionNode i1 = new ConstantNode(new IntegerResultNode(1)); + ExpressionNode i2 = new ConstantNode(new IntegerResultNode(2)); + ExpressionNode f2 = new ConstantNode(new FloatResultNode(9.9)); + ExpressionNode s2 = new ConstantNode(new StringResultNode("2")); + ExpressionNode r2 = new ConstantNode(new RawResultNode(asRaw(2))); + + AddFunctionNode add1 = new AddFunctionNode(); + add1.addArg(i1).addArg(i2); + ExpressionNode exp1 = add1; + exp1.prepare(); + assertTrue(exp1.getResult() instanceof IntegerResultNode); + assertTrue(exp1.execute()); + assertEquals(exp1.getResult().getInteger(), 3); + assertTrue(exp1.execute()); + assertEquals(exp1.getResult().getInteger(), 3); + + AddFunctionNode add2 = new AddFunctionNode(); + add2.addArg(i1); + add2.addArg(f2); + add2.prepare(); + assertTrue(add2.getResult() instanceof FloatResultNode); + + AddFunctionNode add3 = new AddFunctionNode(); + add3.addArg(i1); + add3.addArg(s2); + add3.prepare(); + assertTrue(add3.getResult() instanceof IntegerResultNode); + + AddFunctionNode add4 = new AddFunctionNode(); + add4.addArg(i1); + add4.addArg(r2); + add4.prepare(); + assertTrue(add4.getResult() instanceof IntegerResultNode); + } + + public void testArithmeticOperations() { + ExpressionNode i1 = new ConstantNode(new IntegerResultNode(1793253241)); + ExpressionNode i2 = new ConstantNode(new IntegerResultNode(1676521321)); + ExpressionNode f1 = new ConstantNode(new FloatResultNode(1.1109876)); + ExpressionNode f2 = new ConstantNode(new FloatResultNode(9.767681239)); + + assertAdd(i1, i2, 3469774562l, 3469774562l); + assertAdd(i1, f2, 1793253251l, 1793253250.767681239); + assertAdd(f1, f2, 11, 10.878668839); + assertMultiply(i1, i2, 3006427292488851361l, 3006427292488851361l); + assertMultiply(i1, f2, 17515926039l, 1793253241.0 * 9.767681239); + assertMultiply(f1, f2, 11, 10.8517727372816364); + } + + // -------------------------------------------------------------------------------- + // + // Everything below this point is helper functions. + // + // -------------------------------------------------------------------------------- + private static void assertNotEquals(Object lhs, Object rhs) { + assertFalse(lhs.equals(rhs)); + } + + private static void assertUcaFunctionNode(UcaFunctionNode node) { + UcaFunctionNode obj = node.clone(); + assertEquals(obj, node); + assertMultiArgFunctionNode((UcaFunctionNode)Identifiable.createFromId(node.getClassId())); + } + + public byte[] asRaw(int ... extra) { + byte[] mybytes = new byte[extra.length]; + for (int i = 0; i < mybytes.length; i++) { + mybytes[i] = (byte)extra[i]; + } + return mybytes; + } + + public byte[] longAsRaw(long value) { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + public byte[] doubleAsRaw(double value) { + return ByteBuffer.allocate(8).putDouble(value).array(); + } + + public byte[] stringAsRaw(String value) { + return Utf8.toBytes(value); + } + + private static void assertUnaryBitFunctionNode(UnaryBitFunctionNode node) { + UnaryBitFunctionNode obj = (UnaryBitFunctionNode)node.clone(); + assertEquals(obj, node); + + obj.setNumBits(obj.getNumBits() + 1); + assertFalse(obj.equals(node)); + + assertMultiArgFunctionNode((UnaryBitFunctionNode)Identifiable.createFromId(node.getClassId())); + } + + private static void assertMultiArgFunctionNode(MultiArgFunctionNode node) { + try { + node.addArg(null); + fail("Should not be able to add a null argument."); + } catch (NullPointerException e) { + // expected + } + int initialSz = node.getNumArgs(); + node.addArg(new ConstantNode(new IntegerResultNode(69))); + assertEquals(1+initialSz, node.getNumArgs()); + node.addArg(new ConstantNode(new IntegerResultNode(6699))); + assertEquals(2+initialSz, node.getNumArgs()); + node.addArg(new ConstantNode(new IntegerResultNode(666999))); + assertEquals(3+initialSz, node.getNumArgs()); + + MultiArgFunctionNode obj = (MultiArgFunctionNode)assertSerialize(node); + assertEquals(node, obj); + assertEquals(node.getNumArgs(), obj.getNumArgs()); + for (int i = 0, len = node.getNumArgs(); i < len; i++) { + assertEquals(node.getArg(i), obj.getArg(i)); + } + + obj.addArg(new ConstantNode(new IntegerResultNode(69))); + assertFalse(node.equals(obj)); + } + + public void assertAdd(ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) { + assertArith(new AddFunctionNode(), arg1, arg2, lexpected, dexpected); + } + + public void assertMultiply(ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) { + assertArith(new MultiplyFunctionNode(), arg1, arg2, lexpected, dexpected); + } + + public void assertArith(MultiArgFunctionNode node, ExpressionNode arg1, ExpressionNode arg2, long lexpected, double dexpected) { + node.addArg(arg1); + node.addArg(arg2); + node.prepare(); + node.execute(); + assertEquals(lexpected, node.getResult().getInteger()); + assertEquals(dexpected, node.getResult().getFloat()); + } + + public void assertFunctionNode(FunctionNode node, long lexpected, double dexpected, String sexpected, byte[] rexpected) { + node.prepare(); + node.execute(); + assertEquals(lexpected, node.getResult().getInteger()); + assertEquals(dexpected, node.getResult().getFloat()); + assertEquals(sexpected, node.getResult().getString()); + assertTrue(Arrays.equals(rexpected, node.getResult().getRaw())); + } + + private static void assertResultNode(ResultNode node) { + BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer()); + long oldInteger = node.getInteger(); + double oldFloat = node.getFloat(); + String oldString = node.getString(); + byte[] oldRaw = node.getRaw(); + node.serialize(buf); + buf.flip(); + node.deserialize(buf); + assertEquals(oldInteger, node.getInteger()); + assertEquals(oldFloat, node.getFloat()); + assertEquals(oldString, node.getString()); + assertEquals(oldRaw.length, node.getRaw().length); + + buf = new BufferSerializer(new GrowableByteBuffer()); + node.serializeWithId(buf); + buf.flip(); + node.deserializeWithId(buf); + assertEquals(oldInteger, node.getInteger()); + assertEquals(oldFloat, node.getFloat()); + assertEquals(oldString, node.getString()); + assertEquals(oldRaw.length, node.getRaw().length); + + buf = new BufferSerializer(new GrowableByteBuffer()); + node.serializeWithId(buf); + buf.flip(); + ResultNode obj = (ResultNode)Identifiable.create(buf); + assertEquals(oldInteger, obj.getInteger()); + assertEquals(oldFloat, obj.getFloat()); + assertEquals(oldString, obj.getString()); + assertEquals(oldRaw.length, obj.getRaw().length); + + assertSerialize(node); + } + + private static Identifiable assertSerialize(Identifiable node) { + BufferSerializer buf = new BufferSerializer(new GrowableByteBuffer()); + node.serializeWithId(buf); + buf.flip(); + Identifiable created = Identifiable.create(buf); + assertEquals(node, created); + assertEquals(buf.getBuf().hasRemaining(), false); + Identifiable cloned = created.clone(); + assertEquals(node, cloned); + BufferSerializer createdBuffer = new BufferSerializer(new GrowableByteBuffer()); + BufferSerializer clonedBuffer = new BufferSerializer(new GrowableByteBuffer()); + created.serializeWithId(createdBuffer); + cloned.serializeWithId(clonedBuffer); + assertEquals(createdBuffer.getBuf().limit(), clonedBuffer.getBuf().limit()); + assertEquals(createdBuffer.position(), clonedBuffer.position()); + createdBuffer.getBuf().flip(); + clonedBuffer.getBuf().flip(); + for (int i = 0; i < createdBuffer.getBuf().limit(); i++) { + assertEquals(createdBuffer.getBuf().get(), clonedBuffer.getBuf().get()); + } + return created; + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java new file mode 100644 index 00000000000..4836c9c05d2 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/FixedWidthBucketFunctionTestCase.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.junit.Assert.assertSame; + +/** + * @author Simon Thoresen + */ +public class FixedWidthBucketFunctionTestCase { + + @Test + public void requireThatAccessorsWork() { + ExpressionNode arg = new AttributeNode("foo"); + NumericResultNode width = new IntegerResultNode(69L); + FixedWidthBucketFunctionNode node = new FixedWidthBucketFunctionNode(width, arg); + assertSame(arg, node.getArg()); + assertSame(width, node.getWidth()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java new file mode 100644 index 00000000000..a1255db4536 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/FloatBucketResultNodeTestCase.java @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.*; + +/** + * @author lulf + * @since 5.1 + */ +public class FloatBucketResultNodeTestCase extends ResultNodeTest { + @Test + public void testEmpty() { + final double val = 3.14; + final FloatBucketResultNode node = createNode(val, val); + assertTrue(node.empty()); + assertCorrectSerialization(node, new FloatBucketResultNode()); + } + + @Test + public void testRange() { + FloatBucketResultNode bucket = createNode(3.14, 6.9); + assertFalse(bucket.empty()); + assertEquals(bucket.getFrom(), 3.14, 0.01); + assertEquals(bucket.getTo(), 6.9, 0.01); + assertCorrectSerialization(bucket, new FloatBucketResultNode()); + assertTrue(dumpNode(bucket).contains("from: 3.14")); + assertTrue(dumpNode(bucket).contains("to: 6.9")); + } + + private FloatBucketResultNode createNode(double from, double to) { + return new FloatBucketResultNode(from, to); + } + + @Test + public void testCmp() { + assertOrder(createNode(6, 9), createNode(7, 9), createNode(8, 9)); + assertOrder(createNode(6, 7), createNode(6, 8), createNode(6, 9)); + assertOrder(createNode(6, 3), createNode(7, 2), createNode(8, 1)); + assertTrue(createNode(6, 8).onCmp(new NullResultNode()) != 0); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java new file mode 100755 index 00000000000..e1bfe321619 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ForceLoadTestCase.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +public class ForceLoadTestCase extends junit.framework.TestCase { + + public ForceLoadTestCase(String name) { + super(name); + } + + public void testLoadClasses() { + try { + new com.yahoo.searchlib.expression.ForceLoad(); + assertTrue(com.yahoo.searchlib.expression.ForceLoad.forceLoad()); + } catch (com.yahoo.system.ForceLoadError e) { + e.printStackTrace(); + assertTrue(false); + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java new file mode 100644 index 00000000000..a7517952703 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerBucketResultNodeTestCase.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class IntegerBucketResultNodeTestCase extends ResultNodeTest { + + @Test + public void testEmptyRange() { + IntegerBucketResultNode bucket = new IntegerBucketResultNode(4, 4); + assertTrue(bucket.empty()); + assertCorrectSerialization(bucket, new IntegerBucketResultNode()); + } + + @Test + public void testRange() { + IntegerBucketResultNode bucket = new IntegerBucketResultNode(4, 10); + assertThat(bucket.getFrom(), is(4l)); + assertThat(bucket.getTo(), is(10l)); + assertFalse(bucket.empty()); + assertTrue(dumpNode(bucket).contains("from: 4")); + assertTrue(dumpNode(bucket).contains("to: 10")); + assertCorrectSerialization(bucket, new IntegerBucketResultNode()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java new file mode 100644 index 00000000000..07c88464958 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/IntegerResultNodeTestCase.java @@ -0,0 +1,118 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.ObjectDumper; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class IntegerResultNodeTestCase extends ResultNodeTest { + + List getResultNodes(long startvalue) { + return Arrays.asList(new Int8ResultNode((byte)startvalue), + new Int16ResultNode((short)startvalue), + new Int32ResultNode((int)startvalue), + new IntegerResultNode(startvalue)); + } + + @Test + public void testClassId() { + assertThat(new Int8ResultNode().getClassId(), is(Int8ResultNode.classId)); + assertThat(new Int16ResultNode().getClassId(), is(Int16ResultNode.classId)); + assertThat(new Int32ResultNode().getClassId(), is(Int32ResultNode.classId)); + assertThat(new IntegerResultNode().getClassId(), is(IntegerResultNode.classId)); + + } + + @Test + public void testTypeConversion() { + for (NumericResultNode node : getResultNodes(3)) { + assertThat(node.getInteger(), is(3l)); + assertEquals(node.getFloat(), 3.0, 0.01); + assertThat(node.getRaw(), is(new byte[]{0, 0, 0, 0, 0, 0, 0, (byte) 3})); + assertThat(node.getString(), is("3")); + assertThat(node.getNumber().toString(), is("3")); + } + } + + @Test + public void testMath() { + for (NumericResultNode node : getResultNodes(5)) { + assertThat(node.getInteger(), is(5l)); + node.negate(); + assertThat(node.getInteger(), is(-5l)); + node.multiply(new Int32ResultNode(3)); + assertThat(node.getInteger(), is(-15l)); + node.add(new Int32ResultNode(1)); + assertThat(node.getInteger(), is(-14l)); + node.divide(new Int32ResultNode(2)); + assertThat(node.getInteger(), is(-7l)); + node.modulo(new Int32ResultNode(3)); + assertThat(node.getInteger(), is(-1l)); + node.min(new Int32ResultNode(2)); + assertThat(node.getInteger(), is(-1l)); + node.min(new Int32ResultNode(-2)); + assertThat(node.getInteger(), is(-2l)); + node.max(new Int32ResultNode(-4)); + assertThat(node.getInteger(), is(-2l)); + node.max(new Int32ResultNode(4)); + assertThat(node.getInteger(), is(4l)); + assertThat(node.onCmp(new Int32ResultNode(3)), is(1)); + assertThat(node.onCmp(new Int32ResultNode(4)), is(0)); + assertThat(node.onCmp(new Int32ResultNode(5)), is(-1)); + node.set(new Int32ResultNode(8)); + assertThat(node.getInteger(), is(8l)); + assertThat(node.hashCode(), is((int)(8 + node.getClassId()))); + assertTrue(dumpNode(node).contains("value: 8")); + } + } + + @Test + public void testInt8() { + Int8ResultNode node = new Int8ResultNode(); + node.setValue((byte) 5); + assertThat(node.getInteger(), is(5l)); + } + + @Test + public void testInt16() { + Int16ResultNode node = new Int16ResultNode(); + node.setValue((short)5); + assertThat(node.getInteger(), is(5l)); + } + + @Test + public void testInt32() { + Int32ResultNode node = new Int32ResultNode(); + node.setValue(5); + assertThat(node.getInteger(), is(5l)); + } + + @Test + public void testLong() { + IntegerResultNode node = new IntegerResultNode(); + node.setValue(5); + assertThat(node.getInteger(), is(5l)); + } + + @Test + public void testSerialization() throws IllegalAccessException, InstantiationException { + for (NumericResultNode node : getResultNodes(8)) { + assertThat(node.getInteger(), is(8l)); + NumericResultNode out = node.getClass().newInstance(); + assertCorrectSerialization(node, out); + assertThat(out.getInteger(), is(node.getInteger())); + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java new file mode 100644 index 00000000000..9eb4ee4fea7 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/NullResultNodeTestCase.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.ObjectDumper; +import org.junit.Test; + +import java.util.regex.Pattern; + +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsNot.not; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class NullResultNodeTestCase { + @Test + public void testNullResultNode() { + NullResultNode nullRes = new NullResultNode(); + assertThat(nullRes.onGetClassId(), is(NullResultNode.classId)); + assertThat(nullRes.getInteger(), is(0l)); + assertThat(nullRes.getString(), is("")); + assertThat(nullRes.getRaw(), is(new byte[0])); + assertEquals(nullRes.getFloat(), 0.0, 0.01); + assertThat(nullRes.onCmp(new NullResultNode()), is(0)); + assertThat(nullRes.onCmp(new IntegerResultNode(0)), is(not(0))); + ObjectDumper dumper = new ObjectDumper(); + nullRes.visitMembers(dumper); + assertTrue(dumper.toString().contains("result: ")); + nullRes.set(new IntegerResultNode(3)); + assertThat(nullRes.onCmp(new IntegerResultNode(3)), is(not(0))); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java new file mode 100755 index 00000000000..2924ee945e5 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ObjectVisitorTestCase.java @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.ObjectDumper; +import com.yahoo.searchlib.expression.FixedWidthBucketFunctionNode; +import com.yahoo.searchlib.expression.IntegerResultNode; +import com.yahoo.searchlib.expression.AttributeNode; +import junit.framework.TestCase; + +import java.util.Arrays; + +/** + * @author Simon Thoresen + */ +public class ObjectVisitorTestCase extends TestCase { + + public void testObjectDumper() { + assertDump("test: \n", null); + assertDump("test: 1\n", 1); + assertDump("test: 'foo'\n", "foo"); + assertDump("test: List {\n" + + " [0]: 'foo'\n" + + " [1]: 69\n" + + " [2]: \n" + + "}\n", + Arrays.asList("foo", 69, null)); + assertDump("test: String[] {\n" + + " [0]: 'foo'\n" + + " [1]: 'bar'\n" + + " [2]: 'baz'\n" + + "}\n", + new String[] { "foo", "bar", "baz" }); + assertDump("test: IntegerResultNode {\n" + + " classId: 16491\n" + + " value: 5\n" + + "}\n", + new IntegerResultNode(5)); + assertDump("test: FixedWidthBucketFunctionNode {\n" + + " classId: 16461\n" + + " result: \n" + + " args: List {\n" + + " [0]: AttributeNode {\n" + + " classId: 16439\n" + + " result: \n" + + " attribute: 'foo'\n" + + " }\n" + + " }\n" + + " width: IntegerResultNode {\n" + + " classId: 16491\n" + + " value: 5\n" + + " }\n" + + "}\n", + new FixedWidthBucketFunctionNode(new IntegerResultNode(5), new AttributeNode("foo"))); + } + + private void assertDump(String expected, Object obj) { + ObjectDumper dump = new ObjectDumper(); + dump.visit("test", obj); + assertEquals(expected, dump.toString()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java new file mode 100644 index 00000000000..d2db697c743 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/RangeBucketPreDefFunctionTestCase.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.junit.Assert.assertSame; + +/** + * @author Simon Thoresen + */ +public class RangeBucketPreDefFunctionTestCase { + + @Test + public void requireThatAccessorsWork() { + ResultNodeVector bucketList = new IntegerResultNodeVector(); + ExpressionNode arg = new AttributeNode("foo"); + RangeBucketPreDefFunctionNode node = new RangeBucketPreDefFunctionNode(bucketList, arg); + assertSame(bucketList, node.getBucketList()); + assertSame(arg, node.getArg()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java new file mode 100644 index 00000000000..83a36445294 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/RawBucketResultNodeTestCase.java @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class RawBucketResultNodeTestCase extends ResultNodeTest { + @Test + public void testEmpty() { + RawBucketResultNode bucket = new RawBucketResultNode(new RawResultNode(new byte[]{6, 9}), new RawResultNode(new byte[]{6, 9})); + assertTrue(bucket.empty()); + assertCorrectSerialization(bucket, new RawBucketResultNode()); + } + + @Test + public void testRange() { + RawBucketResultNode bucket = new RawBucketResultNode(new RawResultNode(new byte[]{6, 9}), new RawResultNode(new byte[]{9, 6})); + assertFalse(bucket.empty()); + assertThat(bucket.getFrom(), is(new byte[]{6, 9})); + assertThat(bucket.getTo(), is(new byte[]{9, 6})); + assertCorrectSerialization(bucket, new RawBucketResultNode()); + assertTrue(dumpNode(bucket).contains("value: RawData(data = [6, 9])")); + assertTrue(dumpNode(bucket).contains("value: RawData(data = [9, 6])")); + } + + private RawBucketResultNode createNode(int from, int to) { + return new RawBucketResultNode(new RawResultNode(new byte[]{(byte)from}), + new RawResultNode(new byte[]{(byte)to})); + } + + @Test + public void testCmp() { + assertOrder(createNode(6, 9), createNode(7, 9), createNode(8, 9)); + assertOrder(createNode(6, 7), createNode(6, 8), createNode(6, 9)); + assertOrder(createNode(6, 3), createNode(7, 2), createNode(8, 1)); + assertTrue(createNode(6, 8).onCmp(new NullResultNode()) != 0); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java new file mode 100644 index 00000000000..17744db7edb --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeTest.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.BufferSerializer; +import com.yahoo.vespa.objects.ObjectDumper; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class ResultNodeTest { + public String dumpNode(ResultNode node) { + ObjectDumper dump = new ObjectDumper(); + node.visitMembers(dump); + return dump.toString(); + } + + public void assertCorrectSerialization(ResultNode from, ResultNode to) { + BufferSerializer buffer = new BufferSerializer(); + from.serialize(buffer); + buffer.flip(); + to.deserialize(buffer); + assertThat(from.onCmp(to), is(0)); + } + + public void assertOrder(ResultNode a, ResultNode b, ResultNode c) { + assertTrue(a.onCmp(a) == 0); + assertTrue(a.onCmp(b) < 0); + assertTrue(a.onCmp(c) < 0); + + assertTrue(b.onCmp(a) > 0); + assertTrue(b.onCmp(b) == 0); + assertTrue(b.onCmp(c) < 0); + + assertTrue(c.onCmp(a) > 0); + assertTrue(c.onCmp(b) > 0); + assertTrue(c.onCmp(c) == 0); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java new file mode 100644 index 00000000000..ba306099a80 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ResultNodeVectorTestCase.java @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import com.yahoo.vespa.objects.BufferSerializer; +import org.junit.Test; + +import java.util.List; + +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsNot.not; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class ResultNodeVectorTestCase extends ResultNodeTest { + @Test + public void testClassId() { + assertThat(new IntegerResultNodeVector().getClassId(), is(IntegerResultNodeVector.classId)); + assertThat(new Int32ResultNodeVector().getClassId(), is(Int32ResultNodeVector.classId)); + assertThat(new Int16ResultNodeVector().getClassId(), is(Int16ResultNodeVector.classId)); + assertThat(new Int8ResultNodeVector().getClassId(), is(Int8ResultNodeVector.classId)); + assertThat(new FloatResultNodeVector().getClassId(), is(FloatResultNodeVector.classId)); + } + + @Test + public void testVectorAdd() { + Int8ResultNodeVector i8 = new Int8ResultNodeVector(); + i8.add(new Int8ResultNode((byte)9)); + i8.add(new Int8ResultNode((byte)2)); + i8.add((ResultNode)new Int8ResultNode((byte)5)); + assertThat(i8.getVector().size(), is(3)); + + Int16ResultNodeVector i16 = new Int16ResultNodeVector(); + i16.add(new Int16ResultNode((short)9)); + i16.add(new Int16ResultNode((short)2)); + i16.add((ResultNode)new Int16ResultNode((short)5)); + assertThat(i16.getVector().size(), is(3)); + + Int32ResultNodeVector i32 = new Int32ResultNodeVector(); + i32.add(new Int32ResultNode(9)); + i32.add(new Int32ResultNode(2)); + i32.add((ResultNode)new Int32ResultNode(5)); + assertThat(i32.getVector().size(), is(3)); + + IntegerResultNodeVector ieger = new IntegerResultNodeVector(); + ieger.add(new IntegerResultNode(9)); + ieger.add(new IntegerResultNode(2)); + ieger.add((ResultNode)new IntegerResultNode(5)); + assertThat(ieger.getVector().size(), is(3)); + + FloatResultNodeVector floatvec = new FloatResultNodeVector(); + floatvec.add(new FloatResultNode(3.3)); + floatvec.add(new FloatResultNode(3.4)); + floatvec.add((ResultNode)new FloatResultNode(3.5)); + assertThat(floatvec.getVector().size(), is(3)); + } + + @Test + public void testCmp() { + ResultNodeVector int8vec = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 2)); + ResultNodeVector int8veclarge = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 2)).add(new Int8ResultNode((byte) 5)); + ResultNodeVector int8vecsmall = new Int8ResultNodeVector().add(new Int8ResultNode((byte) 1)); + + ResultNodeVector int16vec = new Int16ResultNodeVector().add(new Int16ResultNode((short) 2)); + ResultNodeVector int16veclarge = new Int16ResultNodeVector().add(new Int16ResultNode((short) 2)).add(new Int16ResultNode((short) 5)); + ResultNodeVector int16vecsmall = new Int16ResultNodeVector().add(new Int16ResultNode((short) 1)); + + ResultNodeVector int32vec = new Int32ResultNodeVector().add(new Int32ResultNode(2)); + ResultNodeVector int32veclarge = new Int32ResultNodeVector().add(new Int32ResultNode(2)).add(new Int32ResultNode(5)); + ResultNodeVector int32vecsmall = new Int32ResultNodeVector().add(new Int32ResultNode(1)); + + ResultNodeVector intvec = new IntegerResultNodeVector().add(new IntegerResultNode(2)); + ResultNodeVector intveclarge = new IntegerResultNodeVector().add(new IntegerResultNode(2)).add(new IntegerResultNode(5)); + ResultNodeVector intvecsmall = new IntegerResultNodeVector().add(new IntegerResultNode(1)); + + FloatResultNodeVector floatvec = new FloatResultNodeVector().add(new FloatResultNode(2.2)); + FloatResultNodeVector floatveclarge = new FloatResultNodeVector().add(new FloatResultNode(2.2)).add(new FloatResultNode(5.5)); + FloatResultNodeVector floatvecsmall = new FloatResultNodeVector().add(new FloatResultNode(1.2)); + + StringResultNodeVector strvec = new StringResultNodeVector().add(new StringResultNode("foo")); + StringResultNodeVector strveclarge = new StringResultNodeVector().add(new StringResultNode("foolio")); + StringResultNodeVector strvecsmall = new StringResultNodeVector().add(new StringResultNode("bario")); + + RawResultNodeVector rawvec = new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 9})); + RawResultNodeVector rawveclarge = new RawResultNodeVector().add(new RawResultNode(new byte[]{9, 6})); + RawResultNodeVector rawvecsmall = new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 6})); + + assertClassCmp(int8vec); + assertClassCmp(int16vec); + assertClassCmp(int32vec); + assertClassCmp(intvec); + assertClassCmp(floatvec); + assertClassCmp(strvec); + assertClassCmp(rawvec); + + assertVecEqual(int8vec, int8vec); + assertVecLt(int8vec, int8veclarge); + assertVecGt(int8veclarge, int8vec); + assertVecGt(int8vec, int8vecsmall); + assertVecLt(int8vecsmall, int8vec); + + assertVecEqual(int16vec, int16vec); + assertVecLt(int16vec, int16veclarge); + assertVecGt(int16veclarge, int16vec); + assertVecGt(int16vec, int16vecsmall); + assertVecLt(int16vecsmall, int16vec); + + assertVecEqual(int32vec, int32vec); + assertVecLt(int32vec, int32veclarge); + assertVecGt(int32veclarge, int32vec); + assertVecGt(int32vec, int32vecsmall); + assertVecLt(int32vecsmall, int32vec); + + assertVecEqual(intvec, intvec); + assertVecLt(intvec, intveclarge); + assertVecGt(intveclarge, intvec); + assertVecGt(intvec, intvecsmall); + assertVecLt(intvecsmall, intvec); + + assertVecEqual(floatvec, floatvec); + assertVecLt(floatvec, floatveclarge); + assertVecGt(floatveclarge, floatvec); + assertVecGt(floatvec, floatvecsmall); + assertVecLt(floatvecsmall, floatvec); + + assertVecEqual(strvec, strvec); + assertVecLt(strvec, strveclarge); + assertVecGt(strveclarge, strvec); + assertVecGt(strvec, strvecsmall); + assertVecLt(strvecsmall, strvec); + + assertVecEqual(rawvec, rawvec); + assertVecLt(rawvec, rawveclarge); + assertVecGt(rawveclarge, rawvec); + assertVecGt(rawvec, rawvecsmall); + assertVecLt(rawvecsmall, rawvec); + } + + private void assertVecLt(ResultNodeVector vec1, ResultNodeVector vec2) { + assertTrue(vec1.onCmp(vec2) < 0); + } + + private void assertVecGt(ResultNodeVector vec1, ResultNodeVector vec2) { + assertTrue(vec1.onCmp(vec2) > 0); + } + + private void assertVecEqual(ResultNodeVector vec1, ResultNodeVector vec2) { + assertThat(vec1.onCmp(vec2), is(0)); + } + + private void assertClassCmp(ResultNodeVector add) { + assertThat(add.onCmp(new NullResultNode()), is(not(0))); + } + + @Test + public void testSerialize() throws InstantiationException, IllegalAccessException { + assertCorrectSerialization(new FloatResultNodeVector().add(new FloatResultNode(1.1)).add(new FloatResultNode(3.3)), new FloatResultNodeVector()); + assertCorrectSerialization(new IntegerResultNodeVector().add(new IntegerResultNode(1)).add(new IntegerResultNode(3)), new IntegerResultNodeVector()); + assertCorrectSerialization(new Int16ResultNodeVector().add(new Int16ResultNode((short) 1)).add(new Int16ResultNode((short) 3)), new Int16ResultNodeVector()); + assertCorrectSerialization(new Int8ResultNodeVector().add(new Int8ResultNode((byte) 1)).add(new Int8ResultNode((byte) 3)), new Int8ResultNodeVector()); + assertCorrectSerialization(new StringResultNodeVector().add(new StringResultNode("foo")).add(new StringResultNode("bar")), new StringResultNodeVector()); + assertCorrectSerialization(new RawResultNodeVector().add(new RawResultNode(new byte[]{6, 9})).add(new RawResultNode(new byte[]{9, 6})), new RawResultNodeVector()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java new file mode 100644 index 00000000000..b82c7a34048 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/StringBucketResultNodeTestCase.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.hamcrest.core.Is.is; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +/** + * @author lulf + * @since 5.1 + */ +public class StringBucketResultNodeTestCase extends ResultNodeTest { + @Test + public void testEmpty() { + StringBucketResultNode bucket = new StringBucketResultNode("aaa", "aaa"); + assertTrue(bucket.empty()); + assertCorrectSerialization(bucket, new StringBucketResultNode()); + } + + @Test + public void testRange() { + StringBucketResultNode bucket = new StringBucketResultNode("a", "d"); + assertThat(bucket.getFrom(), is("a")); + assertThat(bucket.getTo(), is("d")); + assertTrue(dumpNode(bucket).contains("value: 'a'")); + assertTrue(dumpNode(bucket).contains("value: 'd'")); + assertCorrectSerialization(bucket, new StringBucketResultNode()); + } + + @Test + public void testCmp() { + StringBucketResultNode b1 = new StringBucketResultNode("a", "d"); + StringBucketResultNode b2 = new StringBucketResultNode("d", "h"); + StringBucketResultNode b3 = new StringBucketResultNode("h", "u"); + assertTrue(b1.onCmp(b1) == 0); + assertTrue(b1.onCmp(b2) < 0); + assertTrue(b1.onCmp(b3) < 0); + + assertTrue(b2.onCmp(b1) > 0); + assertTrue(b2.onCmp(b2) == 0); + assertTrue(b2.onCmp(b3) < 0); + + assertTrue(b3.onCmp(b1) > 0); + assertTrue(b3.onCmp(b2) > 0); + assertTrue(b3.onCmp(b3) == 0); + + b2 = new StringBucketResultNode("a", "b"); + assertTrue(b1.onCmp(b2) > 0); + b2 = new StringBucketResultNode("a", "f"); + assertTrue(b1.onCmp(b2) < 0); + b2 = new StringBucketResultNode("k", "a"); + assertTrue(b1.onCmp(b2) < 0); + assertTrue(b1.onCmp(new NullResultNode()) != 0); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java new file mode 100644 index 00000000000..4d591843321 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/TimeStampFunctionTestCase.java @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; + +/** + * @author Simon Thoresen + */ +public class TimeStampFunctionTestCase { + + @Test + public void requireThatAccessorsWork() { + ExpressionNode arg = new AttributeNode("foo"); + for (TimeStampFunctionNode.TimePart part : TimeStampFunctionNode.TimePart.values()) { + for (Boolean gmt : Arrays.asList(true, false)) { + TimeStampFunctionNode node = new TimeStampFunctionNode(arg, part, gmt); + assertSame(arg, node.getArg()); + assertEquals(part, node.getTimePart()); + assertEquals(gmt, node.isGmt()); + assertEquals(!gmt, node.isLocal()); + } + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java new file mode 100644 index 00000000000..899e4e28a20 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/expression/ZCurveFunctionTestCase.java @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.expression; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; + +/** + * @author Simon Thoresen + */ +public class ZCurveFunctionTestCase { + + @Test + public void requireThatAccessorsWork() { + ExpressionNode arg = new AttributeNode("foo"); + ZCurveFunctionNode node = new ZCurveFunctionNode(arg, ZCurveFunctionNode.Dimension.X); + assertSame(arg, node.getArg()); + assertEquals(ZCurveFunctionNode.Dimension.X, node.getDimension()); + + node = new ZCurveFunctionNode(arg, ZCurveFunctionNode.Dimension.Y); + assertSame(arg, node.getArg()); + assertEquals(ZCurveFunctionNode.Dimension.Y, node.getDimension()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java new file mode 100644 index 00000000000..fc21b3496f9 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtConverterTestCase.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.security.Permission; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +/** + * @author Simon Thoresen Hult + */ +public class GbdtConverterTestCase { + + @Before + public void enableSecurityManager() { + System.setSecurityManager(new NoExitSecurityManager()); + } + + @After + public void disableSecurityManager() { + System.setSecurityManager(null); + } + + @Test + public void testOnlyOneArgumentIsAccepted() throws UnsupportedEncodingException { + assertError("Usage: GbdtConverter \n", new String[0]); + assertError("Usage: GbdtConverter \n", new String[] { "foo", "bar" }); + } + + @Test + public void testFileIsFound() throws UnsupportedEncodingException { + assertError("Could not find file 'not.found'.\n", new String[] { "not.found" }); + } + + @Test + public void testFileParsingExceptionIsCaught() throws UnsupportedEncodingException { + assertError("An error occurred while parsing the content of file 'src/test/files/gbdt_err.xml': " + + "Node 'Unknown' has no 'DecisionTree' children.\n", + new String[] { "src/test/files/gbdt_err.xml" }); + } + + @Test + public void testEmptyTreesAreIgnored() throws Exception { + assertConvert("src/test/files/gbdt_empty_tree.xml", + "if (INFD_SCORE < 3.2105989, if (GMP_SCORE < 0.013873, if (INFD_SCORE < 1.8138845, 0.0018257, if (GMP_SCORE < 0.006184, 0.0034753, 0.0062119)), if (INFD_SCORE < 1.5684295, if (GMP_SCORE < 0.0217475, 0.0043064, 0.0082065), 0.0110743)), if (GMP_SCORE < 0.010012, if (INFD_SCORE < 5.5982456, if (GMP_SCORE < 0.0052305, 0.0060169, 0.0094888), 0.0119292), 0.017415))\n" + + "\n"); + } + + @Test + public void testTreesMayContainAResponse() throws Exception { + assertConvert("src/test/files/gbdt_tree_response.xml", + "if (INFD_SCORE < 2.128036, -1.12E-5, 8.71E-5) +\n" + + "if (value(0) < 1.0, 2.8E-6, 0.0) +\n" + + "if (GMP_SCORE < 0.016798, if (INFD_SCORE < 3.9760852, if (INFD_SCORE < 0.1266405, -5.98E-5, 2.25E-5), -1.383E-4), 1.529E-4)\n" + + "\n"); + } + + @Test + public void testConvertedModelIsPrintedToSystemOut() throws Exception { + assertConvert("src/test/files/gbdt.xml", + "if (F55 < 2.0932798, if (F42 < 1.7252731, if (F33 < 0.5, if (F38 < 1.5367546, 1.7333333, 1.3255814), if (F37 < 0.675922, 1.9014085, 1.0)), if (F109 < 0.5, if (F116 < 5.25, if (F111 < 0.0521445, 1.0, 1.9090909), if (F38 < 4.0740733, 0.8, if (F38 < 6.6152048, 1.7142857, 0.625))), 1.5945946)), if (F109 < 0.5, if (F113 < 0.7835808, if (F110 < 491.0, if (F56 < 2.5423126, if (F108 < 243.5, 1.375, 0.78), 0.5), 2.0), if (F103 < 0.9918365, 1.6, 0.3333333)), if (F59 < 0.9207, if (F30 < 0.86, 1.5890411, 0.625), if (F100 < 5.9548216, 1.0, 0.0)))) +\n" + + "if (F55 < 59.5480576, if (F42 < 1.8308522, if (F100 < 5.9549484, if (F107 < 0.5, -0.3406279, if (F56 < 1.7057916, if (F36 < 3.778285, if (F103 < 0.5600199, 0.047108, if (F36 < 1.2203553, if (F102 < 1.5, 0.0460316, -0.473794), -0.9825869)), -0.8848045), if (F47 < 15.5, 0.348047, -1.0890411))), 1.75), if (F113 < 0.8389627, if (F110 < 7.5, -0.5778378, if (F111 < 0.8596972, if (F114 < 831.5, if (F113 < 0.3807178, 0.0497646, if (F110 < 63.0, 0.6549377, 0.2486999)), if (F39 < 8.9685574, 0.3222195, -0.1690968)), 1.0381818)), if (F58 < 0.889763, -0.0702703, -1.6))), if (F102 < 3.5, -0.3059684, -1.5890411)) +\n" + + "if (F55 < 119.6311035, if (F55 < 90.895813, if (F39 < 12.162282, if (F35 < 1.1213787, if (F55 < 34.9389648, if (F45 < 3.5, if (F51 < 0.0502058, if (F103 < 0.8550526, if (F55 < 4.96804, 0.048519, 0.6596588), if (F38 < 1.3808891, -0.7416763, 0.0176633)), 0.4502234), -0.6811898), 0.5572351), if (F100 < 3.3971992, if (F39 < 7.0869236, if (F43 < 5.5100875, if (F46 < 4.5, -0.1702421, -0.9797453), -1.5426025), 0.0774408), if (F52 < 22.3562355, if (F35 < 4.4263992, 0.4011598, -0.3898472), -1.75))), if (F39 < 14.5762558, if (F109 < 0.5, 1.6616928, 0.4001626), if (F100 < 3.0519419, 0.616491, -0.1808479))), -1.2135522), 0.5535716) +\n" + + "if (F43 < 9.272151, if (F36 < 9.0613861, if (F115 < 36.5, if (F34 < 1.4407213, if (F41 < 10.4713802, if (F34 < 1.2610778, if (F105 < 8.2159586, if (F46 < 88.5, 0.0075843, -0.6358738), if (F105 < 9.5308332, 1.4464284, -0.0895592)), 0.3532708), -1.8289603), if (F45 < 24.5, if (F111 < 0.9095335, if (F113 < 0.0529755, -0.6272416, if (F50 < 34.2163391, if (F113 < 0.0813664, 0.3683843, if (F34 < 1.6283135, -0.6334628, -0.1610307)), 1.5559684)), -1.7492068), 1.5060212)), if (F49 < 23.5787125, if (F100 < 6.5115452, if (F37 < 0.8601408, if (F57 < 6.5, 0.0547747, 1.193346), 0.6402962), 1.7395205), 2.5559684)), -3.1016318), 1.8657542) +\n" + + "if (F55 < 764.9404297, if (F34 < 23.2379246, if (F36 < 9.2296076, if (F114 < 116.0, if (F108 < 13.5, if (F108 < 12.5, -0.2736142, -1.7384173), if (F110 < 10.5, 0.0794336, -0.2171646)), if (F114 < 129.0, if (F109 < 0.5, 1.4407836, -0.1458547), if (F111 < 0.9703438, if (F47 < 18.5, if (F32 < 3.5, 0.0708936, if (F118 < 0.6794872, if (F119 < 3.8533711, if (F34 < 0.1213822, -2.0046196, -8.566E-4), -0.9490828), 0.0790339)), if (F113 < 0.3637481, 0.1161088, -0.9997786)), 1.3003114))), if (F111 < 0.2438112, -2.0582902, 0.6918949)), if (F115 < 95.0, -2.8602383, -0.0063699)), if (F101 < 0.9411763, -2.0253283, -0.6417007)) +\n" + + "if (F114 < 516.0, if (F49 < 8.9197922, if (F48 < 3.5, if (F36 < 1.3889931, if (F43 < 0.9699799, if (F34 < 9.6113167, if (F106 < 8.5, if (F108 < 153.5, if (F110 < 130.5, 0.180242, 2.545163), if (F108 < 161.5, -2.2253985, if (F55 < 31.4965668, -0.0122572, 0.7364454))), -0.2596613), 0.7247348), if (F111 < 0.2817393, -0.6409092, 0.2100071)), if (F116 < 18.75, 0.511352, -0.1093323)), 0.9379161), 0.3603908), if (F46 < 32.5, if (F46 < 5.5, if (F39 < 11.7440758, if (F115 < 774.0, -0.0433343, -1.7439904), -0.3662575), 0.5413771), if (F110 < 67.0, if (F46 < 34.5, -2.6581287, -0.9399502), 0.075664))) +\n" + + "if (F42 < 24.3080139, if (F118 < 0.8452381, if (F119 < 6.2847767, if (F100 < 3.2778931, if (F46 < 30.0, if (F43 < 1.2712233, if (F104 < 3.5, 0.1365837, 0.5592712), if (F39 < 0.6294491, -0.8729556, -0.0123421)), 3.7677864), if (F111 < 0.6580936, if (F103 < 0.9319581, -0.2822538, if (F107 < 1.5, -0.3983539, if (F104 < 5.5, 0.0792465, 0.7273864))), if (F104 < 3.5, -1.1550477, 0.0490706))), 1.4735778), if (F111 < 0.3724709, if (F51 < 16.0989189, if (F114 < 154.0, if (F108 < 57.5, -0.0675733, -0.3994327), -0.0250285), -1.4871782), if (F34 < 2.1943491, 0.0229469, if (F108 < 1527.0, 1.4706301, 0.0285333)))), 3.489949) +\n" + + "if (F34 < 30.3465347, if (F103 < 0.9996098, if (F38 < 0.558669, if (F105 < 3.6287756, if (F104 < 3.5, if (F31 < 0.86, 0.1121421, 1.8153648), -0.8281607), if (F55 < 37.6819153, 0.9656266, 0.1585065)), if (F113 < 0.840385, if (F38 < 9.6623116, if (F46 < 136.0, if (F53 < 0.5548913, if (F38 < 8.4469957, if (F34 < 3.1969421, if (F114 < 20.0, -0.2944335, 0.03499), if (F34 < 3.4671984, -1.3154796, -0.1742507)), 0.4071658), if (F105 < 2.315434, if (F110 < 59.5, -0.1713032, -1.420465), -0.1456236)), 0.5520287), if (F108 < 12156.5, if (F111 < 0.3892631, -0.16285, -0.9015614), -2.6391831)), 0.2011691)), -3.073049), -3.2461861) +\n" + + "if (F55 < 28.4668102, if (F34 < 0.4929269, if (F30 < 0.86, if (F37 < 0.8360082, -0.0815482, -0.7898247), -0.5144471), if (F108 < 20498.0, if (F44 < 1.1856511, if (F56 < 1.0706565, if (F39 < 8.377079, if (F59 < 0.5604, 0.0429508, if (F34 < 0.7287493, -1.0264078, 0.6052195)), -0.4814408), if (F119 < 3.7530813, if (F115 < 8.5, 0.4916013, 0.0457533), if (F114 < 1093.5, 1.1673864, 0.3411176))), -0.6176305), if (F100 < 3.151973, 2.6908011, 0.3835885))), if (F116 < 62.0, if (F114 < 562.0, -0.415543, if (F103 < 0.9826763, -0.1169933, if (F104 < 0.5, -0.0665763, 1.0238317))), if (F100 < 5.8046961, -3.2954836, 0.2781039))) +\n" + + "if (F34 < 26.9548168, if (F35 < 18.4714928, if (F115 < 698.0, if (F116 < 41.5, if (F38 < 1.1138718, if (F46 < 9.0, if (F31 < 0.86, 0.1059075, -0.2995292), if (F46 < 25.5, if (F46 < 13.0, 0.6297316, 1.8451736), 0.2079161)), if (F38 < 19.3839836, if (F49 < 29.9797497, if (F46 < 235.5, if (F38 < 1.2626771, -0.5165347, if (F35 < 10.3027954, if (F50 < 0.2823648, -0.0424489, if (F113 < 0.0776736, 0.7495954, -0.2948665)), 0.3229146)), -1.0711968), 0.3153474), if (F116 < 5.2182379, 2.8017734, 0.3444192))), if (F113 < 0.5691726, 1.7530511, 0.3534861)), -2.4915219), if (F103 < 0.9680555, -2.1724317, 0.2143739)), 3.1712332)\n" + + "\n"); + } + + @Test + public void testSetTestsWork() throws Exception { + assertConvert("src/test/files/gbdt_set_inclusion_test.xml", + "if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.25, 0.125), if (AGE_GROUP$ in [1.0], 0.125, 0.25)) +\n" + + "if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.2189117, -0.0), if (EDUCATION_LEVEL$ in [0.0], 0.1094559, 0.2343953)) +\n" + + "if (AGE_GROUP$ in [2.0], -0.0962185, if (EDUCATION_LEVEL$ in [0.0], if (AGE_GROUP$ in [1.0], 0.0, 0.2055456), 0.205553)) +\n" + + "if (EDUCATION_LEVEL$ in [0.0], 0.0905977, 0.1812016) +\n" + + "if (EDUCATION_LEVEL$ in [0.0, 1.0], if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [0.0], -0.191772, -0.0), if (AGE_GROUP$ in [1.0], if (EDUCATION_LEVEL$ in [0.0], 0.0, 0.1608304), 0.1708644)), 0.1923393) +\n" + + "if (EDUCATION_LEVEL$ in [\"foo\", \"bar\"], if (AGE_GROUP$ in [2.0], if (EDUCATION_LEVEL$ in [\"baz\"], -0.1696624, -0.0), if (AGE_GROUP$ in [1.0], if (EDUCATION_LEVEL$ in [0.0], 0.0, 0.1438091), 0.1521967)), 0.2003772) +\n" + + "if (value(0) < 1.0, -0.0108278, 0.0) +\n" + + "if (EDUCATION_LEVEL$ in [0.0], -0.1500528, if (GENDER$ in [1.0], 0.0652894, 0.1543407)) +\n" + + "if (AGE_GROUP$ in [1.0], 0.0, 0.1569706) +\n" + + "if (AGE_GROUP$ in [1.0], 0.0, if (EDUCATION_LEVEL$ in [1.0], 0.0, 0.1405829))\n" + + "\n"); + } + + @Test + public void testExtModelCausesBranchProbabilitiesToBeUsed() throws Exception { + assertConvert("src/test/files/gbdt.ext.xml", + "if (F4 < 0.6972222, if (F1 < 0.7928572, if (F54 < 0.9166666, 0.1145211, if (F111 < 1105.0, 0.3115265, 1.6772487, 0.77256316), 0.89193755), 1.493617, 0.970347), if (F111 < 85.5, 1.1202186, 2.5111421, 0.33763838), 0.93598676) +\n" + + "if (F1 < 0.8875, if (F1 < 0.0634921, 0.4755052, if (F111 < 8765.0, -0.0572274, 0.542222, 0.983461), 0.04500549), if (F114 < 55.0, -0.2409815, if (F54 < 0.55, 0.2211539, 1.3125561, 0.29620853), 0.21268657), 0.9683477) +\n" + + "if (F4 < 0.6972222, if (F3 < 0.9285715, if (F8 < 0.0540936, -0.007629, 0.322873, 0.95869595), if (F1 < 0.8166667, 0.843579, 0.1053924, 0.57522124), 0.97148263), if (F4 < 0.7619048, -0.5500016, 0.0274784, 0.5784133), 0.93598676) +\n" + + "if (F74 < 0.875, if (F54 < 0.8452381, -0.0031926, if (F111 < 141.5, -0.1402742, if (F4 < 0.5871212, 1.2691849, 0.2681826, 0.35703), 0.47206005), 0.92346483), if (F111 < 1105.0, -0.0588169, -0.7294473, 0.7697161), 0.92512107) +\n" + + "if (F1 < 0.7619048, 0.0089472, if (F3 < 0.9285715, if (F114 < 36.5, -1.1389426, if (F97 < 0.0468557, if (F6 < 0.5357143, 0.5614127, -0.2162048, 0.32456142), -0.8289478, 0.742671), 0.21483377), 0.0168442, 0.3867458), 0.9402976) +\n" + + "if (F1 < 0.6583333, -0.0187975, if (F74 < 0.2104235, 0.1951745, if (F68 < 0.8158333, if (F68 < 0.7616667, -0.0701389, -1.908711, 0.8685714), if (F91 < 0.9516667, 0.2880719, 0.0202404, 0.08918849), 0.043402776), 0.12821622), 0.72688085) +\n" + + "if (F97 < 0.0104738, if (F4 < 0.6833333, -0.1119661, -0.7331711, 0.795539), if (F111 < 1.5, -0.0487729, if (F54 < 0.0294118, if (F6 < 0.225, 0.3140816, 0.0241852, 0.44444445), 0.0063921, 0.077068806), 0.20816082), 0.015885202) +\n" + + "if (F8 < 0.0488095, if (F97 < 0.0196587, -0.037317, if (F4 < 0.5527778, 0.0085123, if (F111 < 4064.5, if (F111 < 109.5, 0.2020749, -0.1841633, 0.5994437), 0.4359319, 0.8789731), 0.86483806), 0.24595065), -0.1090751, 0.94791543) +\n" + + "if (F111 < 7801.5, 0.005243, if (F4 < 0.5444444, -0.4434354, if (F4 < 0.725, if (F111 < 86382.5, if (F77 < 0.0250039, 0.9485625, 0.1099304, 0.2840909), -1.5740248, 0.9361702), -0.2924902, 0.48205128), 0.47580644), 0.97803235) +\n" + + "if (F4 < 0.9270834, if (F1 < 0.8166667, 0.0033574, if (F4 < 0.7071428, -0.2470163, 0.0482702, 0.5796915), 0.9535162), if (F54 < 0.5833334, 0.8142192, if (F1 < 0.95, 1.2211719, -0.0357525, 0.07643312), 0.20304568), 0.9883666) +\n" + + "if (F113 < 37.5050011, if (F111 < 252.5, -0.0110506, if (F4 < 0.69375, if (F5 < 0.9, 0.0488562, 0.3987899, 0.9362022), if (F74 < 0.75, -0.2113237, 0.3806402, 0.8606272), 0.8527072), 0.7694356), -0.5899943, 0.9981103) +\n" + + "if (F3 < 0.4365079, -0.0192181, if (F77 < 0.1715686, if (F111 < 1187.5, 0.016142, if (F112 < 467.5, if (F68 < 0.855, 0.9831077, 0.227789, 0.12048193), 0.0345274, 0.36617646), 0.89238805), 0.7605657, 0.9962163), 0.62542814) +\n" + + "if (F5 < 0.6125, if (F4 < 0.7928572, 0.0063205, 1.68561, 0.99925923), if (F113 < 1.6900001, if (F113 < 1.635, -0.0275853, 1.1438084, 0.99412453), if (F97 < 0.0363399, -0.0843354, -0.346791, 0.552356), 0.8166987), 0.876934) +\n" + + "if (F8 < 0.1396104, -0.001079, if (F54 < 0.55, if (F111 < 513.5, if (F77 < 0.0380987, -0.1117221, 0.9370234, 0.6551724), 1.654114, 0.7631579), if (F113 < 1.0700001, 0.1069487, -1.0835573, 0.8292683), 0.48101267), 0.9953348) +\n" + + "if (F6 < 0.7321429, 0.0033418, if (F111 < 74.5, if (F4 < 0.6708333, if (F1 < 0.5435606, 0.5229282, -0.451666, 0.11594203), 0.253665, 0.3270142), if (F113 < 2.47, -0.2267124, 0.2586769, 0.8803419), 0.4741573), 0.947443)\n" + + "\n"); + } + + private static void assertConvert(String gbdtModelFile, String expectedExpression) + throws ParseException, UnsupportedEncodingException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + System.setOut(new PrintStream(out)); + GbdtConverter.main(new String[] { gbdtModelFile }); + String actualExpression = out.toString("UTF-8"); + assertEquals(expectedExpression, actualExpression); + assertNotNull(new RankingExpression(actualExpression)); + } + + private static void assertError(String expected, String[] args) throws UnsupportedEncodingException { + ByteArrayOutputStream err = new ByteArrayOutputStream(); + System.setErr(new PrintStream(err)); + try { + GbdtConverter.main(args); + fail(); + } catch (ExitException e) { + assertEquals(1, e.status); + assertEquals(expected, err.toString("UTF-8")); + } + } + + private static class NoExitSecurityManager extends SecurityManager { + + @Override + public void checkPermission(Permission perm) { + // allow anything + } + + @Override + public void checkPermission(Permission perm, Object context) { + // allow anything + } + + @Override + public void checkExit(int status) { + throw new ExitException(status); + } + } + + private static class ExitException extends SecurityException { + + final int status; + + ExitException(int status) { + this.status = status; + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java new file mode 100644 index 00000000000..0561fb8ac7f --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/GbdtModelTestCase.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import org.junit.Test; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen + */ +public class GbdtModelTestCase { + + @Test + public void requireThatFactoryMethodWorks() throws Exception { + GbdtModel model = GbdtModel.fromXmlFile("src/test/files/gbdt.xml"); + assertEquals(10, model.trees().size()); + String exp = model.toRankingExpression(); + assertEquals(readFile("src/test/files/gbdt.expression").trim(), exp.trim()); + assertNotNull(new RankingExpression(exp)); + } + + @Test + public void requireThatIllegalXmlThrowsException() throws Exception { + assertIllegalXml(""); + assertIllegalXml(""); + assertIllegalXml("" + + " " + + ""); + assertIllegalXml("" + + " " + + ""); + assertIllegalXml("" + + " " + + " " + + " " + + ""); + } + + private static void assertIllegalXml(String xml) throws Exception { + try { + GbdtModel.fromXml(xml); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + private static String readFile(String file) throws IOException { + StringBuilder ret = new StringBuilder(); + BufferedReader in = new BufferedReader(new FileReader(file)); + while (true) { + String str = in.readLine(); + if (str == null) { + break; + } + ret.append(str).append("\n"); + } + return ret.toString(); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java new file mode 100644 index 00000000000..6b4e075b769 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ReferenceNodeTestCase.java @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import org.junit.Test; + +import java.util.Optional; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen Hult + */ +public class ReferenceNodeTestCase { + + @Test + public void requireThatAccessorsWork() { + TreeNode lhs = new ResponseNode(6.0, null); + TreeNode rhs = new ResponseNode(9.0, null); + NumericFeatureNode node = new NumericFeatureNode("foo", new DoubleValue(6.9), null, lhs, rhs); + assertEquals("foo", node.feature()); + assertEquals(6.9, node.value().asDouble(), 1E-6); + assertSame(lhs, node.left()); + assertSame(rhs, node.right()); + } + + @Test + public void requireThatRankingExpressionCanBeGenerated() { + assertExpression("if (a < 0.0, b, c)", new NumericFeatureNode("a", new DoubleValue(0), null, new MyNode("b"), new MyNode("c"))); + assertExpression("if (d < 1.0, e, f)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e"), new MyNode("f"))); + assertExpression("if (d < 1.0, e, f, 0.5)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e", 1), new MyNode("f", 1))); + assertExpression("if (d < 1.0, e, f, 0.75)", new NumericFeatureNode("d", new DoubleValue(1), null, new MyNode("e", 3), new MyNode("f", 1))); + } + + @Test + public void requireThatNodeCanBeGeneratedFromDomNode() throws Exception { + String xml = "\n" + + " \n" + + " \n" + + "\n"; + NumericFeatureNode node = (NumericFeatureNode)FeatureNode.fromDom(XmlHelper.parseXml(xml)); + assertEquals("a", node.feature()); + assertEquals(1, node.value().asDouble(), 1E-6); + assertTrue(node.left() instanceof ResponseNode); + assertEquals(2, ((ResponseNode)node.left()).value(), 1E-6); + assertTrue(node.right() instanceof ResponseNode); + assertEquals(4, ((ResponseNode)node.right()).value(), 1E-6); + } + + @Test + public void requireThatUnknownNodeThrowsException() throws Exception { + String xml = "\n" + + " \n" + + "\n"; + try { + TreeNode.fromDom(XmlHelper.parseXml(xml)); + fail(); + } catch (IllegalArgumentException e) { + + } + xml = "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + try { + TreeNode.fromDom(XmlHelper.parseXml(xml)); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + private static void assertExpression(String expected, TreeNode node) { + assertEquals(expected, node.toRankingExpression()); + } + + private static class MyNode extends TreeNode { + + final String str; + + MyNode(String str) { + this(str, Optional.empty()); + } + + MyNode(String str, int samples) { + super(Optional.of(samples)); + this.str = str; + } + + MyNode(String str, Optional samples) { + super(samples); + this.str = str; + } + + @Override + public String toRankingExpression() { + return str; + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java new file mode 100644 index 00000000000..7d6022fa304 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/ResponseNodeTestCase.java @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.junit.Test; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +/** + * @author Simon Thoresen Hult + */ +public class ResponseNodeTestCase { + + @Test + public void requireThatAccessorsWork() { + ResponseNode node = new ResponseNode(6.9, null); + assertEquals(6.9, node.value(), 1E-6); + } + + @Test + public void requireThatRankingExpressionCanBeGenerated() { + assertExpression("0.0", new ResponseNode(0, null)); + assertExpression("1.0", new ResponseNode(1, null)); + } + + @Test + public void requireThatNodeCanBeGeneratedFromDomNode() throws ParserConfigurationException, IOException, SAXException { + String xml = "\n"; + ResponseNode node = ResponseNode.fromDom(XmlHelper.parseXml(xml)); + assertEquals(1, node.value(), 1E-6); + } + + private static void assertExpression(String expected, TreeNode node) { + assertEquals(expected, node.toRankingExpression()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java new file mode 100644 index 00000000000..572bd2d8c11 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/TreeNodeTestCase.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.junit.Test; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen Hult + */ +public class TreeNodeTestCase { + + @Test + public void requireThatFeatureNodeCanBeGeneratedFromDomNode() + throws ParserConfigurationException, IOException, SAXException + { + String xml = "\n" + + " \n" + + " \n" + + "\n"; + TreeNode obj = TreeNode.fromDom(XmlHelper.parseXml(xml)); + assertTrue(obj instanceof FeatureNode); + NumericFeatureNode node = (NumericFeatureNode)obj; + assertEquals("a", node.feature()); + assertEquals(1, node.value().asDouble(), 1E-6); + assertTrue(node.left() instanceof ResponseNode); + assertEquals(2, ((ResponseNode)node.left()).value(), 1E-6); + assertTrue(node.right() instanceof ResponseNode); + assertEquals(4, ((ResponseNode)node.right()).value(), 1E-6); + } + + @Test + public void requireThatResponseNodeCanBeGeneratedFromDomNode() + throws ParserConfigurationException, IOException, SAXException + { + String xml = "\n"; + TreeNode obj = TreeNode.fromDom(XmlHelper.parseXml(xml)); + assertTrue(obj instanceof ResponseNode); + assertEquals(1, ((ResponseNode)obj).value(), 1E-6); + } + + @Test + public void requireThatUnknownNodeThrowsException() + throws ParserConfigurationException, IOException, SAXException + { + try { + TreeNode.fromDom(XmlHelper.parseXml("")); + fail(); + } catch (UnsupportedOperationException e) { + assertEquals("Unknown", e.getMessage()); + } + } +} \ No newline at end of file diff --git a/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java new file mode 100644 index 00000000000..7dc7c42f590 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/gbdt/XmlHelperTestCase.java @@ -0,0 +1,153 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.gbdt; + +import org.junit.Test; +import org.w3c.dom.Element; + +import java.util.List; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen Hult + */ +public class XmlHelperTestCase { + + @Test + public void requireThatAttributeTextCanBeRetrieved() throws Exception { + Element node = XmlHelper.parseXml(""); + assertEquals("v1", XmlHelper.getAttributeText(node, "a1")); + assertEquals("v2", XmlHelper.getAttributeText(node, "a2")); + } + + @Test + public void requireThatMissingAttributeTextThrowsIllegalArgument() throws Exception { + try { + XmlHelper.getAttributeText(XmlHelper.parseXml(""), "a1"); + fail(); + } catch (IllegalArgumentException e) { + + } + try { + XmlHelper.getAttributeText(XmlHelper.parseXml(""), "a1"); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + @Test + public void requireThatSingleElementCanBeRetrieved() throws Exception { + String xml = "" + + " " + + ""; + Element element = XmlHelper.getSingleElement(XmlHelper.parseXml(xml), null); + assertNotNull(element); + assertEquals("a", XmlHelper.getAttributeText(element, "id")); + } + + @Test + public void requireThatNamedSingleElementCanBeRetrieved() throws Exception { + String xml = "" + + " " + + " " + + " " + + ""; + Element element = XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child"); + assertNotNull(element); + assertEquals("b", XmlHelper.getAttributeText(element, "id")); + } + + @Test + public void requireThatMissingSingleElementThrowsIllegalArgument() throws Exception { + try { + XmlHelper.getSingleElement(XmlHelper.parseXml(""), null); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + @Test + public void requireThatMissingNamedSingleElementThrowsIllegalArgument() throws Exception { + String xml = "" + + " " + + ""; + try { + XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child"); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + @Test + public void requireThatAmbigousSingleElementThrowsIllegalArgument() throws Exception { + String xml = "" + + " " + + " " + + ""; + try { + XmlHelper.getSingleElement(XmlHelper.parseXml(xml), null); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + @Test + public void requireThatAmbigousNamedSingleElementThrowsIllegalArgument() throws Exception { + String xml = "" + + " " + + " " + + " " + + ""; + try { + XmlHelper.getSingleElement(XmlHelper.parseXml(xml), "child"); + fail(); + } catch (IllegalArgumentException e) { + + } + } + + @Test + public void requireThatChildElementsCanBeRetrieved() throws Exception { + String xml = "" + + " " + + " " + + ""; + List lst = XmlHelper.getChildElements(XmlHelper.parseXml(xml), null); + assertNotNull(lst); + assertEquals(2, lst.size()); + assertEquals("a", XmlHelper.getAttributeText(lst.get(0), "id")); + assertEquals("b", XmlHelper.getAttributeText(lst.get(1), "id")); + } + + @Test + public void requireThatNamedChildElementsCanBeRetrieved() throws Exception { + String xml = "" + + " " + + " " + + " " + + ""; + List lst = XmlHelper.getChildElements(XmlHelper.parseXml(xml), "child"); + assertNotNull(lst); + assertEquals(2, lst.size()); + assertEquals("a", XmlHelper.getAttributeText(lst.get(0), "id")); + assertEquals("c", XmlHelper.getAttributeText(lst.get(1), "id")); + } + + @Test + public void requireThatChildElementsAreNeverNull() throws Exception { + List lst = XmlHelper.getChildElements(XmlHelper.parseXml(""), null); + assertNotNull(lst); + assertTrue(lst.isEmpty()); + } + + @Test + public void requireThatNamedChildElementsAreNeverNull() throws Exception { + List lst = XmlHelper.getChildElements(XmlHelper.parseXml(""), "child"); + assertNotNull(lst); + assertTrue(lst.isEmpty()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java new file mode 100644 index 00000000000..e95af6ad61d --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/CsvFileCaseListTestCase.java @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.test; + +import com.yahoo.searchlib.mlr.ga.TrainingParameters; +import com.yahoo.searchlib.mlr.ga.caselist.CsvFileCaseList; +import com.yahoo.yolean.Exceptions; +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * @author Jon Bratseth + */ +public class CsvFileCaseListTestCase { + + private static final double delta = 0.000001; + + @Test + public void testLegalFile() { + CsvFileCaseList list = new CsvFileCaseList("src/test/files/mlr/cases.csv"); + + assertEquals(3,list.cases().size()); + { + TrainingSet.Case case1 = list.cases().get(0); + assertEquals(1.0, case1.targetValue(), delta); + assertEquals(2, case1.arguments().names().size()); + assertEquals(2.0, case1.arguments().get("arg1").asDouble(),delta); + assertEquals(-1.3, case1.arguments().get("arg2").asDouble(),delta); + } + + { + TrainingSet.Case case2 = list.cases().get(1); + assertEquals(-1.003, case2.targetValue(), delta); + assertEquals(1, case2.arguments().names().size()); + assertEquals(500007, case2.arguments().get("arg1").asDouble(),delta); + } + + { + TrainingSet.Case case3 = list.cases().get(2); + assertEquals(0, case3.targetValue(), delta); + assertEquals(1, case3.arguments().names().size()); + assertEquals(1.0, case3.arguments().get("arg2").asDouble(),delta); + } + + TrainingSet trainingSet = new TrainingSet(list, new TrainingParameters()); + assertEquals(2, trainingSet.argumentNames().size()); + assertTrue(trainingSet.argumentNames().contains("arg1")); + assertTrue(trainingSet.argumentNames().contains("arg2")); + } + + @Test + public void testNonExistingFile() { + try { + new CsvFileCaseList("nosuchfile"); + } + catch (IllegalArgumentException e) { + assertEquals("Could not create a case list from file 'nosuchfile': nosuchfile (No such file or directory)", Exceptions.toMessageString(e)); + } + } + + @Test + public void testInvalidFile1() { + try { + new CsvFileCaseList("src/test/files/mlr/cases-illegal1.csv"); + } + catch (IllegalArgumentException e) { + assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal1.csv': At line 5, element 3: Expected argument on the form 'identifier:double', got ' arg2:'", Exceptions.toMessageString(e)); + } + } + + @Test + public void testInvalidFile2() { + try { + new CsvFileCaseList("src/test/files/mlr/cases-illegal2.csv"); + } + catch (IllegalArgumentException e) { + assertEquals("Could not create a case list from file 'src/test/files/mlr/cases-illegal2.csv': At line 2: Expected a target value double at the start of the line, got '5db'", Exceptions.toMessageString(e)); + } + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java new file mode 100644 index 00000000000..fc834181f53 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/ExampleLearningSessions.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.test; + +import com.yahoo.searchlib.mlr.ga.PrintingTracker; +import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; +import com.yahoo.searchlib.mlr.ga.Trainer; +import com.yahoo.searchlib.mlr.ga.TrainingParameters; +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.util.ArrayList; +import java.util.List; + +/** + * Main class - drives a learning session from the command line. + * + * @author Jon Bratseth + */ +public class ExampleLearningSessions { + + public static void main(String[] args) throws ParseException { + test3(); + } + + // Always learnt precisely in less than a second + private static void test1() throws ParseException { + TrainingParameters parameters = new TrainingParameters(); + + RankingExpression target = new RankingExpression("2*x"); + List arguments = new ArrayList<>(); + arguments.add(MapContext.fromString("x:0").freeze()); + arguments.add(MapContext.fromString("x:1").freeze()); + arguments.add(MapContext.fromString("x:2").freeze()); + TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); + + Trainer trainer = new Trainer(trainingSet); + + System.out.println("Learning ..."); + RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); + } + + // Solved well in a few seconds at most. Slow going thereafter. + private static void test2() throws ParseException { + TrainingParameters parameters = new TrainingParameters(); + parameters.setSpeciesLifespan(100); // Shorter lifespan is faster? + + RankingExpression target = new RankingExpression("5*x*x + 2*x + 13"); + List arguments = new ArrayList<>(); + arguments.add(MapContext.fromString("x:0").freeze()); + arguments.add(MapContext.fromString("x:1").freeze()); + arguments.add(MapContext.fromString("x:2").freeze()); + arguments.add(MapContext.fromString("x:3").freeze()); + arguments.add(MapContext.fromString("x:4").freeze()); + arguments.add(MapContext.fromString("x:5").freeze()); + arguments.add(MapContext.fromString("x:6").freeze()); + arguments.add(MapContext.fromString("x:7").freeze()); + arguments.add(MapContext.fromString("x:8").freeze()); + arguments.add(MapContext.fromString("x:9").freeze()); + arguments.add(MapContext.fromString("x:10").freeze()); + arguments.add(MapContext.fromString("x:50").freeze()); + arguments.add(MapContext.fromString("x:500").freeze()); + arguments.add(MapContext.fromString("x:5000").freeze()); + arguments.add(MapContext.fromString("x:50000").freeze()); + TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); + + Trainer trainer = new Trainer(trainingSet); + + System.out.println("Learning ..."); + RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); + } + + // Solved well in at most a few minutes + private static void test3() throws ParseException { + TrainingParameters parameters = new TrainingParameters(); + parameters.setAllowConditions(false); // disallow non-smooth functions: Speeds up learning of smooth ones greatly + + RankingExpression target = new RankingExpression("-2.7*x*x*x + 5*x*x + 2*x + 13"); + List arguments = new ArrayList<>(); + arguments.add(MapContext.fromString("x:-50000").freeze()); + arguments.add(MapContext.fromString("x:-5000").freeze()); + arguments.add(MapContext.fromString("x:-500").freeze()); + arguments.add(MapContext.fromString("x:-50").freeze()); + arguments.add(MapContext.fromString("x:-10").freeze()); + arguments.add(MapContext.fromString("x:0").freeze()); + arguments.add(MapContext.fromString("x:1").freeze()); + arguments.add(MapContext.fromString("x:2").freeze()); + arguments.add(MapContext.fromString("x:3").freeze()); + arguments.add(MapContext.fromString("x:4").freeze()); + arguments.add(MapContext.fromString("x:5").freeze()); + arguments.add(MapContext.fromString("x:6").freeze()); + arguments.add(MapContext.fromString("x:7").freeze()); + arguments.add(MapContext.fromString("x:8").freeze()); + arguments.add(MapContext.fromString("x:9").freeze()); + arguments.add(MapContext.fromString("x:10").freeze()); + arguments.add(MapContext.fromString("x:50").freeze()); + arguments.add(MapContext.fromString("x:500").freeze()); + arguments.add(MapContext.fromString("x:5000").freeze()); + arguments.add(MapContext.fromString("x:50000").freeze()); + TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), parameters); + + Trainer trainer = new Trainer(trainingSet); + + System.out.println("Learning ..."); + RankingExpression learntExpression = trainer.train(parameters, new PrintingTracker()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java new file mode 100644 index 00000000000..51460855983 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MainTestCase.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.test; + +import com.yahoo.searchlib.mlr.ga.Evolvable; +import com.yahoo.searchlib.mlr.ga.Main; +import com.yahoo.searchlib.mlr.ga.PrintingTracker; +import com.yahoo.searchlib.mlr.ga.Species; +import com.yahoo.searchlib.mlr.ga.Tracker; +import com.yahoo.searchlib.mlr.ga.TrainingParameters; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import org.junit.Test; +import static org.junit.Assert.*; + +import java.util.List; + +/** + * Tests the main class used from the command line + * + * @author Jon Bratseth + */ +public class MainTestCase { + + /** Tests that an extremely simple function expressed as cases in a file is learnt perfectly. */ + @Test + public void testMain() { + SilentTestTracker tracker = new SilentTestTracker(); + new Main(new String[] { "src/test/files/mlr/cases-linear.csv"}, tracker); + assertTrue(Double.isInfinite(tracker.winner.getFitness())); + } + + private static class SilentTestTracker implements Tracker { + + public Evolvable winner; + + @Override + public void newSpecies(Species predecessor, int initialSize, List genePool) { + } + + @Override + public void newSpeciesCreated(Species predecessor) { + } + + @Override + public void speciesCompleted(Species predecessor) { + } + + @Override + public void iteration(Species species, int generation) { + } + + @Override + public void result(Evolvable winner) { + this.winner = winner; + } + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java new file mode 100644 index 00000000000..ab1d5c362b8 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/MockTrainingSetTestCase.java @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.test; + +import com.yahoo.searchlib.mlr.ga.RankingExpressionCaseList; +import com.yahoo.searchlib.mlr.ga.TrainingParameters; +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import org.junit.Test; +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author Jon Bratseth + */ +public class MockTrainingSetTestCase { + + @Test + public void testMockTrainingSet() throws ParseException { + RankingExpression target = new RankingExpression("2*x"); + List arguments = new ArrayList<>(); + arguments.add(MapContext.fromString("x:0")); + arguments.add(MapContext.fromString("x:1")); + arguments.add(MapContext.fromString("x:2")); + TrainingSet trainingSet = new TrainingSet(new RankingExpressionCaseList(arguments, target), new TrainingParameters()); + assertTrue(Double.isInfinite(trainingSet.evaluate(new RankingExpression("2*x")))); + assertEquals(4.0, trainingSet.evaluate(new RankingExpression("x")), 0.001); + assertEquals(0.0, trainingSet.evaluate(new RankingExpression("x/x")), 0.001); + } + + @Test + public void testEvaluation() throws ParseException { + // with freezing + assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001); + assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2").freeze()).asDouble(),0.0001); + + // without freezing + assertEquals(16.0,new RankingExpression("2*x*x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001); + assertEquals(8.0,new RankingExpression("x*x+x*x").evaluate(MapContext.fromString("x:2")).asDouble(),0.0001); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java new file mode 100644 index 00000000000..9c3e514ddad --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/ga/test/TripAdvisorFileCaseList.java @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.ga.test; + +import com.yahoo.searchlib.mlr.ga.CaseList; +import com.yahoo.searchlib.mlr.ga.TrainingSet; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.*; + +/** + * Reads a tripadvisor Kaggle challenge training set + * + * @author Jon Bratseth + */ +public class TripAdvisorFileCaseList implements CaseList { + + private List cases = new ArrayList<>(); + private Map columnNames = new HashMap<>(); + + /** + * Reads a case list from file. + * + * @throws IllegalArgumentException if the file could not be found or opened + */ + public TripAdvisorFileCaseList(String fileName) throws IllegalArgumentException { + System.out.print("Reading training data "); + try (BufferedReader reader = new BufferedReader(new FileReader(fileName))) { + String line; + readColumnNames(reader.readLine()); + int lineNumber=1; + while (null != (line=reader.readLine())) { + lineNumber++; + line = line.trim(); + if (line.startsWith("#")) continue; + if (line.isEmpty()) continue; + cases.add(lineToCase(line, lineNumber)); + } + } + catch (IOException | IllegalArgumentException e) { + throw new IllegalArgumentException("Could not create a case list from file '" + fileName + "'", e); + } + System.out.println("done"); + } + + private void readColumnNames(String line) { + int columnNumber = 0; + for (String columnName : line.split(",")) + columnNames.put(columnNumber++, columnName); + } + + protected TrainingSet.Case lineToCase(String line, int lineNumber) { + if ((lineNumber % 10000) ==0) + System.out.print("."); + + Map columnValues = readColumns(line); + + double targetValue = columnValues.get("click_bool") + columnValues.get("booking_bool")*5; + + Context context = new MapContext(); + for (Map.Entry value : columnValues.entrySet()) { + if (value.getKey().equals("click_bool")) continue; + if (value.getKey().equals("gross_bookings_usd")) continue; + if (value.getKey().equals("booking_bool")) continue; + context.put(value.getKey(),value.getValue()); + } + return new TrainingSet.Case(context, targetValue); + } + + private Map readColumns(String line) { + Map columnValues = new LinkedHashMap<>(); + int columnNumber = 0; + for (String valueString : line.split(",")) { + String columnName = columnNames.get(columnNumber++); + if (columnName.equals("date_time")) continue; + Double columnValue; + if (valueString.equals("NULL")) { + columnValue = 0.0; + } + else { + try { + columnValue = Double.parseDouble(valueString); + } + catch (NumberFormatException e) { + throw new IllegalArgumentException("Could not parse column '" + columnName + "'",e); + } + } + columnValues.put(columnName, columnValue); + } + return columnValues; + } + + @Override + public List cases() { return Collections.unmodifiableList(cases); } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java b/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java new file mode 100644 index 00000000000..301fdfcd4f2 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/mlr/gbdt/ExpressionAnalysisRunner.java @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.mlr.gbdt; + +import org.junit.Ignore; +import org.junit.Test; + +/** + * Run an expression analyser without having to muck with classpath. + * + * @author bratseth + */ +public class ExpressionAnalysisRunner { + + @Test @Ignore + public void runAnalysis() { + ExpressionAnalysis.main(new String[] { "/Users/bratseth/Downloads/getty_mlr_001.expression"}); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java new file mode 100644 index 00000000000..804f34ccce8 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/ElementCompletenessTestCase.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features; + +import static org.junit.Assert.*; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author bratseth + */ +public class ElementCompletenessTestCase { + + private static final double delta = 0.0001; + + @Test + public void testElementCompleteness1() { + Map query = createQuery(); + ElementCompleteness.Item[] items = createField(1); + + Features f = ElementCompleteness.compute(query, items); + assertEquals(0.26111111111111107, f.get("completeness").asDouble(), delta); + assertEquals(1.0, f.get("fieldCompleteness").asDouble(), delta); + assertEquals(0.2222222222222222, f.get("queryCompleteness").asDouble(), delta); + assertEquals(3.0, f.get("elementWeight").asDouble(), delta); + } + + @Test + public void testElementCompleteness2() { + Map query = createQuery(); + ElementCompleteness.Item[] items = createField(2); + + Features f = ElementCompleteness.compute(query, items); + assertEquals(0.975, f.get("completeness").asDouble(), delta); + assertEquals(0.5, f.get("fieldCompleteness").asDouble(), delta); + assertEquals(1.0, f.get("queryCompleteness").asDouble(), delta); + assertEquals(4.0, f.get("elementWeight").asDouble(), delta); + } + + @Test + public void testElementCompleteness3() { + Map query = createQuery(); + ElementCompleteness.Item[] items = createField(3); + + Features f = ElementCompleteness.compute(query, items); + assertEquals(1.0, f.get("completeness").asDouble(), delta); + assertEquals(1.0, f.get("fieldCompleteness").asDouble(), delta); + assertEquals(1.0, f.get("queryCompleteness").asDouble(), delta); + assertEquals(5.0, f.get("elementWeight").asDouble(), delta); + } + + @Test + public void testElementCompletenessNoMatches() { + ElementCompleteness.Item[] items = createField(3); + + Features f = ElementCompleteness.compute(new HashMap(), items); + assertEquals(0.0, f.get("completeness").asDouble(), delta); + assertEquals(0.0, f.get("fieldCompleteness").asDouble(), delta); + assertEquals(0.0, f.get("queryCompleteness").asDouble(), delta); + assertEquals(0.0, f.get("elementWeight").asDouble(), delta); + } + + private Map createQuery() { + Map query = new HashMap<>(); + query.put("a", 100); + query.put("b", 150); + query.put("c", 200); + return query; + } + + private ElementCompleteness.Item[] createField(int size) { + ElementCompleteness.Item[] items = new ElementCompleteness.Item[size]; + if (size > 0) items[0] = new ElementCompleteness.Item("a", 3); // qc: 100/450=0.22, fc: 1.0, c: 0.611 + if (size > 1) items[1] = new ElementCompleteness.Item("a b c d e f", 4); // qc: 1.0, fc: 0.5, c: 0.75 + if (size > 2) items[2] = new ElementCompleteness.Item("a b c", 5); + return items; + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java new file mode 100644 index 00000000000..61c313956c5 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/FieldTermMatchTestCase.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features; + +import static org.junit.Assert.*; + +import org.junit.Test; + +/** + * @author bratseth + */ +public class FieldTermMatchTestCase { + + private static final double delta = 0.0001; + + @Test + public void testFieldTermMatch() { + assertEquals(1.0, FieldTermMatch.compute("a", "a b c").get("occurrences").asDouble(), delta); + assertEquals(0.0, FieldTermMatch.compute("a", "a b c").get("firstPosition").asDouble(), delta); + + assertEquals(3.0, FieldTermMatch.compute("a", "a a a").get("occurrences").asDouble(), delta); + assertEquals(0.0, FieldTermMatch.compute("a", "a a a").get("firstPosition").asDouble(), delta); + + assertEquals(0.0, FieldTermMatch.compute("d", "a b c").get("occurrences").asDouble(), delta); + assertEquals(1000000.0, FieldTermMatch.compute("d", "a b c").get("firstPosition").asDouble(), delta); + + assertEquals(0.0, FieldTermMatch.compute("d", "").get("occurrences").asDouble(), delta); + assertEquals(1000000, FieldTermMatch.compute("d", "").get("firstPosition").asDouble(), delta); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java new file mode 100644 index 00000000000..14ea58961ba --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/SemanticDistanceTestCase.java @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch; + +import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsComputer; + +import java.util.Set; +import java.util.HashSet; + +/** + * The "semantic distance" refers to the non-continuous distance from a token + * to the next token used by the string match metrics algorithm. This class + * tests invariants which must hold for any such distance metric as well as specifics + * for the currently used distance metric. + * + * @author bratseth + */ +public class SemanticDistanceTestCase extends junit.framework.TestCase { + + FieldMatchMetricsComputer c; + + public SemanticDistanceTestCase(String name) { + super(name); + } + + public @Override void setUp() { + c=new FieldMatchMetricsComputer(); + StringBuilder field=new StringBuilder(); + for (int i=0; i<150; i++) + field.append("t" + i + " "); + c.compute("query",field.toString()); // Just to set the field value + } + + /** Must be true using any semantic distance function */ + public void testBothWayConversionProducesOriginalValue() { + assertBothWayConversionProducesOriginalValue(50); + assertBothWayConversionProducesOriginalValue(10); + assertBothWayConversionProducesOriginalValue(5); + assertBothWayConversionProducesOriginalValue(0); + assertBothWayConversionProducesOriginalValue(140); + assertBothWayConversionProducesOriginalValue(145); + assertBothWayConversionProducesOriginalValue(149); + } + + /** Must be true using any semantic distance function */ + public void testFunctionsAreOneToOne() { + assertFunctionsAreOneToOne(50); + assertFunctionsAreOneToOne(10); + assertFunctionsAreOneToOne(5); + assertFunctionsAreOneToOne(0); + assertFunctionsAreOneToOne(140); + assertFunctionsAreOneToOne(145); + assertFunctionsAreOneToOne(149); + } + + /** Specific to this particular distance function */ + public void testFunction() { + int zeroJ=50; + assertFunction(50,0,zeroJ); + assertFunction(59,9,zeroJ); + assertFunction(49,10,zeroJ); + assertFunction(40,19,zeroJ); + assertFunction(60,20,zeroJ); + assertFunction(149,109,zeroJ); + assertFunction(39,110,zeroJ); + assertFunction(0,149,zeroJ); + + zeroJ=0; + assertFunction(0,0,zeroJ); + assertFunction(10,10,zeroJ); + assertFunction(20,20,zeroJ); + assertFunction(149,149,zeroJ); + + zeroJ=5; + assertFunction(5,0,zeroJ); + assertFunction(10,5,zeroJ); + assertFunction(14,9,zeroJ); + assertFunction(4,10,zeroJ); + assertFunction(0,14,zeroJ); + assertFunction(15,15,zeroJ); + assertFunction(25,25,zeroJ); + assertFunction(149,149,zeroJ); + + zeroJ=149; + assertFunction(149,0,zeroJ); + assertFunction(140,9,zeroJ); + assertFunction(130,19,zeroJ); + assertFunction(0,149,zeroJ); + + zeroJ=145; + assertFunction(145,0,zeroJ); + assertFunction(149,4,zeroJ); + assertFunction(144,5,zeroJ); + assertFunction(140,9,zeroJ); + assertFunction(135,14,zeroJ); + assertFunction(125,24,zeroJ); + assertFunction(0,149,zeroJ); + } + + /** Hits both limits at once */ + public void testSmallField() { + c=new FieldMatchMetricsComputer(); + c.compute("query","my field value four"); // Just to set the field value + assertBothWayConversionProducesOriginalValue(2); + assertBothWayConversionProducesOriginalValue(0); + assertBothWayConversionProducesOriginalValue(3); + assertFunctionsAreOneToOne(2); + assertFunctionsAreOneToOne(0); + assertFunctionsAreOneToOne(3); + + int zeroJ=2; + assertFunction(2,0,zeroJ); + assertFunction(3,1,zeroJ); + assertFunction(1,2,zeroJ); + assertFunction(0,3,zeroJ); + } + + private void assertBothWayConversionProducesOriginalValue(int zeroJ) { + // Starting point in the middle + for (int j=0; j=0", semanticDistance >= 0); + int backConvertedJ=c.semanticDistanceToFieldIndex(semanticDistance,zeroJ); + assertEquals("Using zeroJ=" + zeroJ + ": " + j + "->" + semanticDistance + "->" + backConvertedJ,j, backConvertedJ); + } + } + + private void assertFunctionsAreOneToOne(int zeroJ) { + Set distances=new HashSet(); + for (int j=0; j" + semanticDistance + " is unique", ! distances.contains(semanticDistance)); + distances.add(semanticDistance); + } + } + + private void assertFunction(int j,int semanticDistance,int zeroJ) { + assertEquals(j + "->" + semanticDistance,semanticDistance,c.fieldIndexToSemanticDistance(j,zeroJ)); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java new file mode 100644 index 00000000000..272ca98d7c4 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/OptimalStringAlignmentDistance.java @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch.reference; + +import java.util.Arrays; + +/** + * Implementation of optimal string alignment distance which also retains the four subdistances + * and which uses 2*query length memory rather than field length*query length. + * This class is not thread safe. + * + * @author Jon Bratseth + */ +public class OptimalStringAlignmentDistance { + + /** The cell containg the last calculated edit distance */ + private Cell value=new Cell(0,0,0,0); + + // Temporary variables + private Cell[] thisRow, previousRow, previousPreviousRow; + + private String[] query, field; + + private boolean printTable=false; + + public void calculate(String queryString,String fieldString) { + this.query=queryString.split(" "); + this.field=fieldString.split(" "); + + thisRow=new Cell[query.length+1]; + previousRow=new Cell[query.length+1]; + previousPreviousRow=new Cell[query.length+1]; + + for(int i=0; i<=query.length; i++) { + thisRow[i]=new Cell(i+1,0,0,0); + previousRow[i]=new Cell(i,0,0,0); + previousPreviousRow[i]=new Cell(i-1,0,0,0); + } + + print(previousRow); + + for(int j=1;j<=field.length; j++) { + thisRow[0].setTo(0,j,0,0); + for(int i=1; i<=query.length; i++) { + setCell(i,j); + } + + print(thisRow); + + // Shift round thisRow -> previousRow -> previousPreviousRow -> thisRow + Cell[] temporaryRow=thisRow; + thisRow=previousPreviousRow; + previousPreviousRow=previousRow; + previousRow=temporaryRow; + } + value=previousRow[query.length]; + } + + private void setCell(int i,int j) { + Cell thisCell=thisRow[i]; + Cell left=thisRow[i-1]; + Cell above=previousRow[i]; + Cell leftAbove=previousRow[i-1]; + + boolean substitution=!query[i-1].equals(field[j-1]); + + int leftCost=left.getTotal()+1; + int aboveCost=above.getTotal()+1; + int leftAboveCost=leftAbove.getTotal() + ( substitution ? 1 : 0 ); + + if (leftCost<=aboveCost && leftCost<=leftAboveCost) { + thisCell.setTo(left); + thisCell.addDeletion(); + } + else if (aboveCost<=leftCost && aboveCost<=leftAboveCost) { + thisCell.setTo(above); + thisCell.addInsertion(); + } + else { + thisCell.setTo(leftAbove); + if (substitution) + thisCell.addSubstitution(); + } + + if (i>1 && j>1 && query[i-1].equals(field[j-2]) && query[i-2].equals(field[j-1]) ) { + Cell twoAboveAndLeft=previousPreviousRow[i-2]; + int transpositionCost= + ( substitution ? 1 : 0); + if (transpositionCost 1 and j > 1 and str1[i] = str2[j-1] and str1[i-1] = str2[j]) then + d[i, j] := minimum( + d[i, j], + d[i-2, j-2] + cost // transposition + ) + */ + } + + public float getTotal() { return value.getTotal(); } + public float getSubstitutions() { return value.getSubstitutions(); } + public float getDeletions() { return value.getDeletions(); } + public float getInsertions() { return value.getInsertions(); } + public float getTranspositions() { return value.getTranspositions(); } + + /** Print the calculated edit distance table as we go */ + public void setPrintTable(boolean printTable) { + this.printTable=printTable; + } + + private void print(Cell[] row) { + if (!printTable) return; + for (Cell cell : row) { + System.out.print(cell.toShortString()); + System.out.print(" "); + } + System.out.println(); + } + + /** Returns the current state as a string */ + public String toString() { + StringBuffer b=new StringBuffer(); + b.append("Query: " + Arrays.toString(query) + "\n"); + b.append("Field: " + Arrays.toString(field) + "\n"); + b.append(value); + return b.toString(); + } + + /** An edit distance table cell */ + public static final class Cell { + + private int deletions, insertions, substitutions, transpositions; + + public Cell(int deletions,int insertions,int substitutions,int transpositions) { + setTo(deletions,insertions,substitutions,transpositions); + } + + public void setTo(Cell cell) { + this.deletions=cell.deletions; + this.insertions=cell.insertions; + this.substitutions=cell.substitutions; + this.transpositions=cell.transpositions; + } + + public void setTo(int deletions,int insertions,int substitutions,int transpositions) { + this.deletions=deletions; + this.insertions=insertions; + this.substitutions=substitutions; + this.transpositions=transpositions; + } + + public int getTotal() { + return deletions+insertions+substitutions+transpositions; + } + + public void addDeletion() { deletions++; } + public void addInsertion() { insertions++; } + public void addSubstitution() { substitutions++; } + public void addTransposition() { transpositions++; } + + public int getDeletions() { return deletions; } + public int getInsertions() { return insertions; } + public int getSubstitutions() { return substitutions; } + public int getTranspositions() { return transpositions; } + + public String toString() { + return "Total: " + getTotal() + ", substitutions: " + substitutions + ", deletions: " + + deletions + ", insertions: " + insertions + ", transpositions: " + transpositions + "\n"; + } + + public String toShortString() { + return "(" + substitutions + "," + deletions + "," + insertions + "," + transpositions + ")"; + } + + + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java new file mode 100644 index 00000000000..5ad3449a9d3 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/TextbookLevenshteinDistance.java @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch.reference; + +/** + * Textbook implementation from + * + * Wikipedia algorithms + * Licensed under the Creative Commons Attribution-ShareAlike License + */ +public class TextbookLevenshteinDistance { + + private static int minimum(int a, int b, int c){ + if (a<=b && a<=c) + return a; + if (b<=a && b<=c) + return b; + return c; + } + + public static int computeLevenshteinDistance(char[] str1, char[] str2) { + int[][] distance = new int[str1.length+1][]; + + for(int i=0; i<=str1.length; i++){ + distance[i] = new int[str2.length+1]; + distance[i][0] = i; + } + for(int j=0; j<=str2.length; j++) + distance[0][j]=j; + + for(int i=1; i<=str1.length; i++) + for(int j=1;j<=str2.length; j++) + distance[i][j]= minimum(distance[i-1][j]+1, distance[i][j-1]+1, + distance[i-1][j-1]+((str1[i-1]==str2[j-1])?0:1)); + + return distance[str1.length][str2.length]; + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java new file mode 100644 index 00000000000..398c4e70fb7 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/reference/test/OptimalStringAlignmentTestCase.java @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch.reference.test; + +import com.yahoo.searchlib.ranking.features.fieldmatch.reference.OptimalStringAlignmentDistance; + +/** + * @author Jon Bratseth + */ +public class OptimalStringAlignmentTestCase extends junit.framework.TestCase { + + public OptimalStringAlignmentTestCase(String name) { + super(name); + } + + public void testEditDistance() { + // Edit distance, substitution, deletion, insertion, transposition, query, field, print? + + boolean print=false; + assertEditDistance(0,0,0,0,0,"niels bohr","niels bohr",print); + assertEditDistance(1,1,0,0,0,"niels","bohr",print); + assertEditDistance(1,0,0,1,0,"niels","niels bohr",print); + assertEditDistance(1,0,1,0,0,"niels bohr","bohr",print); + assertEditDistance(1,0,0,0,1,"niels bohr","bohr niels",print); + assertEditDistance(1,0,0,1,0,"niels bohr","niels henrik bohr",print); + assertEditDistance(2,0,0,1,1,"niels bohr","bohr niels henrik",print); + assertEditDistance(4,1,0,3,0,"niels bohr","niels henrik bor i kopenhagen",print); + assertEditDistance(3,2,0,1,0,"niels bohr i kopenhagen","niels henrik bor i stockholm",print); + } + + public void testEditDistanceAsRelevance() { + boolean print=false; + assertEditDistance(2,0,0,2,0,"niels bohr","niels blah blah bohr",print); + assertEditDistance(4,0,1,3,0,"niels bohr","bohr blah blah niels",print); // Not desired + assertEditDistance(4,2,0,2,0,"niels bohr","koko blah blah bahia",print); + } + + private void assertEditDistance(int total,int substitution,int deletion,int insertion,int transposition,String query,String field,boolean printResult) { + assertEditDistance(total,substitution,deletion,insertion,transposition,query,field,printResult,false); + } + + private void assertEditDistance(int total,int substitution,int deletion,int insertion,int transposition,String query,String field,boolean printResult,boolean printTable) { + OptimalStringAlignmentDistance e=new OptimalStringAlignmentDistance(); + e.setPrintTable(printTable); + e.calculate(query,field); + + if (printResult) { + System.out.print(e.toString()); + System.out.println(); + } + + assertEquals("Substitutions",(float)substitution,e.getSubstitutions()); + assertEquals("Deletions",(float)deletion,e.getDeletions()); + assertEquals("Insertions",(float)insertion,e.getInsertions()); + assertEquals("Transpositions",(float)transposition,e.getTranspositions()); + assertEquals("Total",(float)total,e.getTotal()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java new file mode 100644 index 00000000000..ef8daec2b73 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/ranking/features/fieldmatch/test/FieldMatchMetricsTestCase.java @@ -0,0 +1,757 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.ranking.features.fieldmatch.test; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.ranking.features.fieldmatch.Field; +import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetrics; +import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsComputer; +import com.yahoo.searchlib.ranking.features.fieldmatch.FieldMatchMetricsParameters; +import com.yahoo.searchlib.ranking.features.fieldmatch.QueryTerm; +import com.yahoo.searchlib.ranking.features.fieldmatch.Query; + +import java.util.List; + +/** + * Tests of calculation of all the string match metrics. + * Add true as the fourth parameter to assertMetrics to see a trace of what the test is doing. + * + * @author bratseth + */ +public class FieldMatchMetricsTestCase extends junit.framework.TestCase { + + public FieldMatchMetricsTestCase(String name) { + super(name); + } + + public void testOutOfOrder() { + assertMetrics("outOfOrder:0","a","a"); + assertMetrics("outOfOrder:0","a b c","a b c"); + assertMetrics("outOfOrder:1","a b c","a c b"); + assertMetrics("outOfOrder:2","a b c","c b a"); + assertMetrics("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertMetrics("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertMetrics("outOfOrder:2", "a b c d e", "c x a b x x x x x e x x d"); + } + + public void testSegments() { + assertMetrics("segments:1","a","a"); + assertMetrics("segments:1","a b c","a b c"); + assertMetrics("segments:1","a b c","a x x b c"); + assertMetrics("segments:2","a b c","a x x x x x x x x x x x x x x x x x x x b c"); + assertMetrics("segments:2","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertMetrics("segments:2 gaps:1","a b c","x x x a x x x x x x x x x x x x x x x x x x x b x x c x x"); + assertMetrics("segments:2 gaps:0 outOfOrder:0","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertMetrics("segments:2 gaps:1","a b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); + assertMetrics("segments:2 gaps:1","a y y b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); + } + + public void testGaps() { + assertMetrics("gaps:0","a","a"); + assertMetrics("gaps:0","x�a","a"); + assertMetrics("gaps:0 gapLength:0","a b c","a b c"); + assertMetrics("gaps:1 gapLength:1","a b","b a"); + assertMetrics("gaps:1 gapLength:1","a b c","a x b c"); + assertMetrics("gaps:1 gapLength:3","a b c","a x X Xb c"); + assertMetrics("gaps:2 gapLength:2 outOfOrder:1","a b c","a c b"); + assertMetrics("gaps:2 gapLength:2 outOfOrder:0","a b c","a x b x c"); + assertMetrics("gaps:2 gapLength:5 outOfOrder:1","a b c","a x c x b"); + assertMetrics("gaps:3 outOfOrder:2 segments:1","a b c d e","x d x x b c x x a e"); + assertMetrics("gaps:0","y a b c","a b c x"); + } + + public void testHead() { + assertMetrics("head:0","a","a"); + assertMetrics("head:0","y","a"); + assertMetrics("head:1","a","x a"); + assertMetrics("head:2","a b c","x x a b c"); + assertMetrics("head:2","a b c","x x c x x a b"); + assertMetrics("head:2", "a b c", "x x c x x x x x x x x x x x x x x x a b"); + } + + public void testTail() { + assertMetrics("tail:0","a","a"); + assertMetrics("tail:0","y","a"); + assertMetrics("tail:1","a","a x"); + assertMetrics("tail:2","a b c","a b c x x"); + assertMetrics("tail:2","a b c","x x x c x x x x a b x x"); + assertMetrics("tail:0","a b c","x x c x x x x x x x x x x x x x x x a b"); + } + + public void testLongestSequence() { + assertMetrics("longestSequence:1","a","a"); + assertMetrics("longestSequence:1","a","a b c"); + assertMetrics("longestSequence:1","b","a b c"); + assertMetrics("longestSequence:3","a b c","x x a b c x x a b x"); + assertMetrics("longestSequence:3 segments:1","a b c","x x a b x x a b c x"); + assertMetrics("longestSequence:2","a b c d","x x c d x x a b x"); + assertMetrics("longestSequence:2","a b c d","x x a b x c d x x"); + assertMetrics("longestSequence:2","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x"); + assertMetrics("longestSequence:4 segments:1","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x a b c d"); + } + + public void testMatches() { + assertMetrics("matches:1 queryCompleteness:1 fieldCompleteness:1", "a","a"); + assertMetrics("matches:3 queryCompleteness:1 fieldCompleteness:1", "a b c","a b c"); + assertMetrics("matches:3 queryCompleteness:1 fieldCompleteness:0.5", "a b c","a b c a b d"); + assertMetrics("matches:3 queryCompleteness:0.5 fieldCompleteness:0.25","a y y b c y","a x x b c x a x a b x x"); + } + + public void testCompleteness() { + assertMetrics("completeness:1 queryCompleteness:1 fieldCompleteness:1", "a","a"); + assertMetrics("completeness:0 queryCompleteness:0 fieldCompleteness:0", "a","x"); + assertMetrics("completeness:0 queryCompleteness:0 fieldCompleteness:0", "y","a"); + assertMetrics("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a","a a"); + assertMetrics("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1", "a a","a"); + assertMetrics("completeness:1 queryCompleteness:1 fieldCompleteness:1", "a b c","a b c"); + assertMetrics("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1", "a b c d","a b"); + assertMetrics("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a b","a b c d"); + assertMetrics("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b c d e"); + assertMetrics("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b b b b"); + } + + public void testOrderness() { + assertMetrics("orderness:1", "a","a"); + assertMetrics("orderness:1", "a","x"); + assertMetrics("orderness:0", "a a a","a"); // Oh well... + assertMetrics("orderness:1", "a","a a a"); + assertMetrics("orderness:0", "a b","b a"); + assertMetrics("orderness:0.5","a b c","b a c"); + assertMetrics("orderness:0.5","a b c d","c b d x x x x x x x x x x x x x x x x x x x x x a"); + assertMetrics("orderness:1", "a b","b x x x x x x x x x x x x x x x x x x x x x a"); + } + + public void testRelatedness() { + assertMetrics("relatedness:1", "a","a"); + assertMetrics("relatedness:0", "a","x"); + assertMetrics("relatedness:1", "a b","a b"); + assertMetrics("relatedness:1", "a b c","a b c"); + assertMetrics("relatedness:0.5","a b c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); + assertMetrics("relatedness:0.5","a y b y y y c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); + } + + public void testLongestSequenceRatio() { + assertMetrics("longestSequenceRatio:1", "a","a"); + assertMetrics("longestSequenceRatio:0", "a","x"); + assertMetrics("longestSequenceRatio:1", "a a","a"); + assertMetrics("longestSequenceRatio:1", "a","a a"); + assertMetrics("longestSequenceRatio:1", "a b","a b"); + assertMetrics("longestSequenceRatio:1", "a y"," a x"); + assertMetrics("longestSequenceRatio:0.5","a b","a x b"); + assertMetrics("longestSequenceRatio:0.75","a b c d","x x a b x a x c d a b c x d x"); + } + + public void testEarliness() { + assertMetrics("earliness:1", "a","a"); + assertMetrics("earliness:0", "a","x"); + assertMetrics("earliness:1", "a","a a a"); + assertMetrics("earliness:1", "a a a","a"); + assertMetrics("earliness:0.8", "b","a b c"); + assertMetrics("earliness:0.8", "b","a b"); + assertMetrics("earliness:0.9091","a b c","x b c x x x x x a x x x"); + assertMetrics("earliness:0.2", "a b c","x b c a x x x x a x x x x x x x a b c x x"); + } + + public void testWeight() { + assertMetrics("weight:1", "a","a"); + assertMetrics("weight:0", "y","a"); + assertMetrics("weight:0.3333","a a a","a"); + assertMetrics("weight:1", "a","a a a"); + assertMetrics("weight:1", "a b c","a b c"); + assertMetrics("weight:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertMetrics("weight:0.3333","a b c","a"); + assertMetrics("weight:0.6667","a b c","a b"); + + assertMetrics("weight:1", "a b c!200","a b c"); // Best + assertMetrics("weight:0.75","a b c!200","b c"); // Middle + assertMetrics("weight:0.5", "a b c!200","a b"); // Worst + + assertMetrics("weight:1","a!300 b c!200","a b c"); // Best too + + assertMetrics("weight:1", "a b c!50","a b c"); // Best + assertMetrics("weight:0.6","a b c!50","b c"); // Worse + assertMetrics("weight:0.4","a b c!50","b"); // Worse + assertMetrics("weight:0.2","a b c!50","c"); // Worst + assertMetrics("weight:0.8","a b c!50","a b"); // Middle + + assertMetrics("weight:1", "a b c!0","a b c"); // Best + assertMetrics("weight:0.5","a b c!0","b c"); // Worst + assertMetrics("weight:1", "a b c!0","a b"); // As good as best + assertMetrics("weight:0", "a b c!0","c"); // No contribution + + assertMetrics("weight:0","a!0 b!0","a b"); + assertMetrics("weight:0","a!0 b!0",""); + + // The query also has other terms having a total weight of 300 + // so we add a weight parameter which is the sum of the weights of this query terms + 300 + assertMetrics("weight:0.25", "a","a",400); + assertMetrics("weight:0", "y","a",400); + assertMetrics("weight:0.1667","a a a","a",600); + assertMetrics("weight:0.25", "a","a a a",400); + assertMetrics("weight:0.5", "a b c","a b c",600); + assertMetrics("weight:0.5", "a b c","x x a b x a x c x x a b x c c x",600); + + assertMetrics("weight:0.1667","a b c","a",600); + assertMetrics("weight:0.3333","a b c","a b",600); + + assertMetrics("weight:0.5714","a b c!200","a b c",700); // Best + assertMetrics("weight:0.4286","a b c!200","b c",700); // Middle + assertMetrics("weight:0.2857","a b c!200","a b",700); // Worst + + assertMetrics("weight:0.6667","a!300 b c!200","a b c",900); // Better than best + + assertMetrics("weight:0.4545","a b c!50","a b c",550); // Best + assertMetrics("weight:0.2727","a b c!50","b c",550); // Worse + assertMetrics("weight:0.1818","a b c!50","b",550); // Worse + assertMetrics("weight:0.0909","a b c!50","c",550); // Worst + assertMetrics("weight:0.3636","a b c!50","a b",550); // Middle + + assertMetrics("weight:0.4","a b c!0","a b c",500); // Best + assertMetrics("weight:0.2","a b c!0","b c",500); // Worst + assertMetrics("weight:0.4","a b c!0","a b",500); // As good as best + assertMetrics("weight:0", "a b c!0","c",500); // No contribution + + assertMetrics("weight:0","a!0 b!0","a b",300); + assertMetrics("weight:0","a!0 b!0","",300); + } + + /** Calculated the same way as weight */ + public void testSignificance() { + assertMetrics("significance:1", "a","a"); + assertMetrics("significance:0", "a","x"); + assertMetrics("significance:0.3333","a a a","a"); + assertMetrics("significance:1", "a","a a a"); + assertMetrics("significance:1", "a b c","a b c"); + assertMetrics("significance:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertMetrics("significance:0.3333","a b c","a"); + assertMetrics("significance:0.6667","a b c","a b"); + + assertMetrics("significance:1", "a b c%0.2","a b c"); // Best + assertMetrics("significance:0.75","a b c%0.2","b c"); // Middle + assertMetrics("significance:0.5", "a b c%0.2","a b"); // Worst + + assertMetrics("significance:1","a%0.3 b c%0.2","a b c"); // Best too + + assertMetrics("significance:1", "a b c%0.05","a b c"); // Best + assertMetrics("significance:0.6","a b c%0.05","b c"); // Worse + assertMetrics("significance:0.4","a b c%0.05","b"); // Worse + assertMetrics("significance:0.2","a b c%0.05","c"); // Worst + assertMetrics("significance:0.8","a b c%0.05","a b"); // Middle + + assertMetrics("significance:1", "a b c%0","a b c"); // Best + assertMetrics("significance:0.5","a b c%0","b c"); // Worst + assertMetrics("significance:1", "a b c%0","a b"); // As good as best + assertMetrics("significance:0", "a b c%0","c"); // No contribution + + assertMetrics("significance:0","a%0 b%0","a b"); + assertMetrics("significance:0","a%0 b%0",""); + + // The query also has other terms having a total significance of 0.3 + // so we add a significance parameter which is the sum of the significances of this query terms + 0.3 + assertMetrics("significance:0.25", "a","a",0.4f); + assertMetrics("significance:0", "y","a",0.4f); + assertMetrics("significance:0.1667","a a a","a",0.6f); + assertMetrics("significance:0.25", "a","a a a",0.4f); + assertMetrics("significance:0.5", "a b c","a b c",0.6f); + assertMetrics("significance:0.5", "a b c","x x a b x a x c x x a b x c c x",0.6f); + + assertMetrics("significance:0.1667","a b c","a",0.6f); + assertMetrics("significance:0.3333","a b c","a b",0.6f); + + assertMetrics("significance:0.5714","a b c%0.2","a b c",0.7f); // Best + assertMetrics("significance:0.4286","a b c%0.2","b c",0.7f); // Middle + assertMetrics("significance:0.2857","a b c%0.2","a b",0.7f); // Worst + + assertMetrics("significance:0.6667","a%0.3 b c%0.2","a b c",0.9f); // Better than best + + assertMetrics("significance:0.4545","a b c%0.05","a b c",0.55f); // Best + assertMetrics("significance:0.2727","a b c%0.05","b c",0.55f); // Worse + assertMetrics("significance:0.1818","a b c%0.05","b",0.55f); // Worse + assertMetrics("significance:0.0909","a b c%0.05","c",0.55f); // Worst + assertMetrics("significance:0.3636","a b c%0.05","a b",0.55f); // Middle + + assertMetrics("significance:0.4","a b c%0","a b c",0.5f); // Best + assertMetrics("significance:0.2","a b c%0","b c",0.5f); // Worst + assertMetrics("significance:0.4","a b c%0","a b",0.5f); // As good as best + assertMetrics("significance:0", "a b c%0","c",0.5f); // No contribution + + assertMetrics("significance:0","a%0 b%0","a b",0.3f); + assertMetrics("significance:0","a%0 b%0","",0.3f); + } + + public void testImportance() { + assertMetrics("importance:0.75","a b c", "a x x b x c c c",600); + assertMetrics("importance:0.85","a b!500 c","a x x b x c c c",1000); + + // Twice as common - twice as weighty, but total weight has the extra 300 - less than the previous + assertMetrics("importance:0.7857","a b!200%0.05 c","a x x b x c c c",700); + // Here higher importancy exactly offsets the lowered uniqueness + assertMetrics("importance:0.85","a b!500%0.5 c","a x x b x c c c",1000); + } + + public void testOccurrence() { + assertMetrics("occurrence:0","a","x"); + assertMetrics("occurrence:1","a","a"); + assertMetrics("occurrence:0","a a a","x"); + assertMetrics("occurrence:1","a a a","a"); + assertMetrics("occurrence:1","a a a","a a a"); + assertMetrics("occurrence:1","a a a","a a a a"); + assertMetrics("occurrence:0.3571","a","x x x a x x a x a x x x a a"); + assertMetrics("occurrence:1","a","a a a a a a a a a a a a a a"); + assertMetrics("occurrence:1","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters(); + parameters.setMaxOccurrences(10); + parameters.freeze(); + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters); + assertMetrics("occurrence:1", "a b","a a a a a a a a a a b b",false,c); + assertMetrics("occurrence:0.9231","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("occurrence:0.6", "a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("occurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("occurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + } + + public void testAbsoluteOccurrence() { + assertMetrics("absoluteOccurrence:0", "a","x"); + assertMetrics("absoluteOccurrence:0.01","a","a"); + assertMetrics("absoluteOccurrence:0","a a a","x"); + assertMetrics("absoluteOccurrence:0.01", "a a a","a"); + assertMetrics("absoluteOccurrence:0.03", "a a a","a a a"); + assertMetrics("absoluteOccurrence:0.04", "a a a","a a a a"); + assertMetrics("absoluteOccurrence:0.05","a","x x x a x x a x a x x x a a"); + assertMetrics("absoluteOccurrence:0.14","a","a a a a a a a a a a a a a a"); + assertMetrics("absoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters(); + parameters.setMaxOccurrences(10); + parameters.freeze(); + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters); + assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a b b",false,c); + assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("absoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("absoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("absoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + } + + public void testWeightedOccurrence() { + assertMetrics("weightedOccurrence:0","a!200","x"); + assertMetrics("weightedOccurrence:1","a!200","a"); + assertMetrics("weightedOccurrence:0","a!200 a a","x"); + assertMetrics("weightedOccurrence:1","a!200 a a","a"); + assertMetrics("weightedOccurrence:1","a a a","a a a"); + assertMetrics("weightedOccurrence:1","a!200 a a","a a a a"); + assertMetrics("weightedOccurrence:0.3571","a!200","x x x a x x a x a x x x a a"); + assertMetrics("weightedOccurrence:1","a!200","a a a a a a a a a a a a a a"); + assertMetrics("weightedOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertMetrics("weightedOccurrence:0.5714","a!200 b","a b b a a a a a b a a b a a"); + assertMetrics("weightedOccurrence:0.6753","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertMetrics("weightedOccurrence:0.4286","a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertMetrics("weightedOccurrence:0.3061","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertMetrics("weightedOccurrence:0.30","a b", "a a b b b b x x x x"); + assertMetrics("weightedOccurrence:0.3333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertMetrics("weightedOccurrence:0.2667","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertMetrics("weightedOccurrence:0.2667","a b!50", "a a b b b b x x x x"); // Same relative + + assertMetrics("weightedOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters(); + parameters.setMaxOccurrences(10); + parameters.freeze(); + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters); + assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a b b",false,c); + assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("weightedOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("weightedOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("weightedOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + + assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a b b",false,c); + assertMetrics("weightedOccurrence:0.4667","a b!200","a a a a a a a a a a b b",false,c); + assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("weightedOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("weightedOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + } + + public void testWeightedAbsoluteOccurrence() { + assertMetrics("weightedAbsoluteOccurrence:0", "a!200","x"); + assertMetrics("weightedAbsoluteOccurrence:0.01", "a!200","a"); + assertMetrics("weightedAbsoluteOccurrence:0", "a!200 a a","x"); + assertMetrics("weightedAbsoluteOccurrence:0.01", "a!200 a a","a"); + assertMetrics("weightedAbsoluteOccurrence:0.03", "a a a","a a a"); + assertMetrics("weightedAbsoluteOccurrence:0.04", "a!200 a a","a a a a"); + assertMetrics("weightedAbsoluteOccurrence:0.05", "a!200","x x x a x x a x a x x x a a"); + assertMetrics("weightedAbsoluteOccurrence:0.14", "a!200","a a a a a a a a a a a a a a"); + assertMetrics("weightedAbsoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + assertMetrics("weightedAbsoluteOccurrence:0.08", "a!200 b","a b b a a a a a b a a b a a"); + assertMetrics("weightedAbsoluteOccurrence:0.0945","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertMetrics("weightedAbsoluteOccurrence:0.06", "a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertMetrics("weightedAbsoluteOccurrence:0.0429","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertMetrics("weightedAbsoluteOccurrence:0.03", "a b", "a a b b b b x x x x"); + assertMetrics("weightedAbsoluteOccurrence:0.0333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertMetrics("weightedAbsoluteOccurrence:0.0267","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertMetrics("weightedAbsoluteOccurrence:0.0267","a b!50", "a a b b b b x x x x"); // Same relative + + assertMetrics("weightedAbsoluteOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters(); + parameters.setMaxOccurrences(10); + parameters.freeze(); + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters); + assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a b b",false,c); + assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + + assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a b b",false,c); + assertMetrics("weightedAbsoluteOccurrence:0.4667","a b!200","a a a a a a a a a a b b",false,c); + assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + } + + public void testSignificantOccurrence() { + assertMetrics("significantOccurrence:0","a%0.2","x"); + assertMetrics("significantOccurrence:1","a%0.2","a"); + assertMetrics("significantOccurrence:0","a%0.2 a a","x"); + assertMetrics("significantOccurrence:1","a%0.2 a a","a"); + assertMetrics("significantOccurrence:1","a a a","a a a"); + assertMetrics("significantOccurrence:1","a%0.2 a a","a a a a"); + assertMetrics("significantOccurrence:0.3571","a%0.2","x x x a x x a x a x x x a a"); + assertMetrics("significantOccurrence:1","a%0.2","a a a a a a a a a a a a a a"); + assertMetrics("significantOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertMetrics("significantOccurrence:0.5714","a%0.2 b","a b b a a a a a b a a b a a"); + assertMetrics("significantOccurrence:0.6753","a%1 b","a b b a a a a a b a a b a a"); // Should be higher + assertMetrics("significantOccurrence:0.4286","a b%0.2","a b b a a a a a b a a b a a"); // Should be lower + assertMetrics("significantOccurrence:0.3247","a b%1","a b b a a a a a b a a b a a"); // Should be even lower + + assertMetrics("significantOccurrence:0.30","a b", "a a b b b b x x x x"); + assertMetrics("significantOccurrence:0.3333","a b%0.2","a a b b b b x x x x"); // More frequent is more important - higher + assertMetrics("significantOccurrence:0.2667","a%0.2 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertMetrics("significantOccurrence:0.2667","a b%0.05", "a a b b b b x x x x"); // Same relative + + assertMetrics("significantOccurrence:0","a%0 b%0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + FieldMatchMetricsParameters parameters=new FieldMatchMetricsParameters(); + parameters.setMaxOccurrences(10); + parameters.freeze(); + FieldMatchMetricsComputer c=new FieldMatchMetricsComputer(parameters); + assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a b b",false,c); + assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("significantOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("significantOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("significantOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + + assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a b b",false,c); + assertMetrics("significantOccurrence:0.4667","a b%0.2","a a a a a a a a a a b b",false,c); + assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a b b",false,c); // Starting to cut off + assertMetrics("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a a a a a a a a a a a b b",false,c); // Way beyond cutoff for a + assertMetrics("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a b b b b b b b b b b",false,c); // Exactly no cutoff + assertMetrics("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a a b b b b b b b b b b b",false,c); // Field is too large to consider field length + } + + public void testUnweightedProximity() { + assertMetrics("unweightedProximity:1", "a","a"); + assertMetrics("unweightedProximity:1", "a b c","a b c"); + assertMetrics("unweightedProximity:1", "a b c","a b c x"); + assertMetrics("unweightedProximity:1", "y a b c","a b c x"); + assertMetrics("unweightedProximity:1", "y a b c", "a b c x"); + assertMetrics("unweightedProximity:0.855", "y a b c", "a b x c x"); + assertMetrics("unweightedProximity:0.750","y a b c","a b x x c x"); + assertMetrics("unweightedProximity:0.71", "y a b c","a x b x c x"); // Should be slightly worse than the previous one + assertMetrics("unweightedProximity:0.605","y a b c","a x b x x c x"); + assertMetrics("unweightedProximity:0.53", "y a b c","a x b x x x c x"); + assertMetrics("unweightedProximity:0.5", "y a b c","a x x b x x c x"); + } + + public void testReverseProximity() { + assertMetrics("unweightedProximity:0.33", "a b","b a"); + assertMetrics("unweightedProximity:0.62", "a b c","c a b"); + assertMetrics("unweightedProximity:0.585", "y a b c","c x a b"); + assertMetrics("unweightedProximity:0.33", "a b c","c b a"); + assertMetrics("unweightedProximity:0.6875","a b c d e","a b d c e"); + assertMetrics("unweightedProximity:0.9275","a b c d e","a b x c d e"); + } + + public void testProximity() { + assertMetrics("absoluteProximity:0.1 proximity:1", "a b","a b"); + assertMetrics("absoluteProximity:0.3 proximity:1", "a 0.3:b","a b"); + assertMetrics("absoluteProximity:0.1 proximity:1", "a 0.0:b","a b"); + assertMetrics("absoluteProximity:1 proximity:1", "a 1.0:b","a b"); + assertMetrics("absoluteProximity:0.033 proximity:0.33", "a b","b a"); + assertMetrics("absoluteProximity:0.0108 proximity:0.0359","a 0.3:b","b a"); // Should be worse than the previous one + assertMetrics("absoluteProximity:0.1 proximity:1", "a 0.0:b","b a"); + assertMetrics("absoluteProximity:0 proximity:0", "a 1.0:b","b a"); + + // proximity with connextedness + assertMetrics("absoluteProximity:0.0605 proximity:0.605", "a b c","a x b x x c"); + assertMetrics("absoluteProximity:0.0701 proximity:0.2003","a 0.5:b 0.2:c","a x b x x c"); // Most important is close, less important is far: Better + assertMetrics("absoluteProximity:0.0605 proximity:0.605", "a b c","a x x b x c"); + assertMetrics("absoluteProximity:0.0582 proximity:0.1663","a 0.5:b 0.2:c","a x x b x c"); // Most important is far, less important is close: Worse + + assertMetrics("absoluteProximity:0.0727 proximity:0.7267","a b c d","a b x x x x x c d"); + assertMetrics("absoluteProximity:0.1 proximity:1", "a b 0:c d","a b x x x x x c d"); // Should be better because the gap is unimportant + } + + /** + * Tests exactness (using field exactness only - nothing additional of interest to test with query exactness + * as that is just another number multiplied with the term exactness) + */ + public void testExactness() { + assertMetrics("exactness:1", "a b c","a x b x x c"); + assertMetrics("exactness:0.9", "a b c","a x b:0.7 x x c"); + assertMetrics("exactness:0.7", "a b c","a x b:0.6 x x c:0.5"); + assertMetrics("exactness:0.775", "a!200 b c","a x b:0.6 x x c:0.5"); + assertMetrics("exactness:0.65", "a b c!200","a x b:0.6 x x c:0.5"); + } + + public void testMultiSegmentProximity() { + assertMetrics("absoluteProximity:0.1 proximity:1", "a b c", "a b x x x x x x x x x x x x x x x x x x x x x x c"); + assertMetrics("absoluteProximity:0.05 proximity:0.5","a b c", "a x x b x x x x x x x x x x x x x x x x x x x x x x c"); + assertMetrics("absoluteProximity:0.075 proximity:0.75","a b c d","a x x b x x x x x x x x x x x x x x x x x x x x x x c d"); + } + + public void testSegmentDistance() { + assertMetrics("segmentDistance:13 absoluteProximity:0.1", "a b c","a b x x x x x x x x x x c"); + assertMetrics("segmentDistance:13 absoluteProximity:0.5", "a 0.5:b c","a b x x x x x x x x x x c"); + assertMetrics("segmentDistance:13 absoluteProximity:0.1", "a b c","b c x x x x x x x x x x a"); + assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","b x x x x x x x x x x x a x x x x x x x x x x c"); + assertMetrics("segmentDistance:13 absoluteProximity:0.006","a b c","a x x x x x x x x x x x b x x x x x x x x c"); + assertMetrics("segmentDistance:24 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x c"); + assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x x c"); + assertMetrics("segmentDistance:25 absoluteProximity:0.1", "a b c","c x x x x x x x x x x x b x x x x x x x x x x a"); + } + + public void testSegmentProximity() { + assertMetrics("segmentProximity:1", "a","a"); + assertMetrics("segmentProximity:0", "a","x"); + assertMetrics("segmentProximity:1", "a","a x"); + assertMetrics("segmentProximity:0", "a b","a x x x x x x x x x x x x x x x x x x x x x x x b"); + assertMetrics("segmentProximity:0.4","a b","a x x x x x x x x x x x x x x x x x x x x x x b x x x x x x x x x x x x x x x x"); + assertMetrics("segmentProximity:0", "a b c","a b x x x x x x x x x x x x x x x x x x x x x c"); + assertMetrics("segmentProximity:0.4","a b c","a b x x x x x x x x x x x x x x x x x x x x x c x x x x x x x x x x x x x x x x"); + assertMetrics("segmentProximity:0.4","a b c","b c x x x x x x x x x x x x x x x x x x x x x a x x x x x x x x x x x x x x x x"); + } + + /** Test cases where we choose between multiple different segmentations */ + public void testSegmentSelection() { + assertMetrics("segments:2 absoluteProximity:0.1 proximity:1 segmentStarts:19,41", + "a b c d e","x a b x c x x x x x x x x x x x x x x a b c x x x x x x x x x e x d x c d x x x c d e"); + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 + // 0 1 2 3 4 + // Should choose - - - - - + + // Same as above but best matching segment have too low exactness + assertMetrics("segments:2 absoluteProximity:0.0903 proximity:0.9033 segmentStarts:1,41", + "a b c d e","x a b x c x x x x x x x x x x x x x x a:0.2 b:0.3 c:0.4 x x x x x x x x x e x d x c d x x x c d e"); + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 + // 0 1 2 3 4 + // Should choose - - - - - + + assertMetrics("segments:1 absoluteProximity:0.0778 proximity:0.778","a b c d e f","x x a b b b c f e d a b c d x e x x x x x f d e f a b c a a b b c c d d e e f f"); + + // Prefer one segment with ok proximity over two segments with great proximity + assertMetrics("segments:1 segmentStarts:0","a b c d","a b x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + assertMetrics("segments:1 segmentStarts:0","a b c d","a b x x x x x x x x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + } + + public void testMoreThanASegmentLengthOfUnmatchedQuery() { + assertMetrics("absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y","a b"); + assertMetrics("segments:2 absoluteProximity:0.1 proximity:1","a b c d y y y y y y y y y y y y y y y","a b x x x x x x x x x x x x x x x x x x c d"); + assertMetrics("segments:2 absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y c d","a b x x x x x x x x x x x x x x x x x x c d"); + } + + public void testQueryRepeats() { + // Not really handled perfectly, but good enough + assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0", "a a a","a"); + assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a a b c c"); + assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a b c"); + assertMetrics("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a b a b","a b a b"); + assertMetrics("absoluteProximity:0.0903 proximity:0.9033 head:0 tail:0 gapLength:1","a b a b","a b x a b"); + // Both terms take the same segment: + assertMetrics("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:18","a a","x x x a x x x x x x x x x x x x x x a x x x"); + // But not when the second is preferable + assertMetrics("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:3","a b b a","x x x a b x x x x x x x x x x x x x x b a x x x"); + + assertMetrics("matches:2 fieldCompleteness:1","a b b b","a b"); + } + + public void testZeroCases() { + assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","y","a"); + assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","a","x"); + assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","","x"); + assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","y",""); + assertMetrics("absoluteProximity:0.1 proximity:1 matches:0 exactness:0","",""); + } + + public void testExceedingIterationLimit() { + + { // Segments found: a x x b and c d + FieldMatchMetricsParameters p=new FieldMatchMetricsParameters(); + p.setMaxAlternativeSegmentations(0); + FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p); + assertMetrics("matches:4 tail:0 proximity:0.75 absoluteProximity:0.075","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m); + } + + { // Segments found: a x b and c d + FieldMatchMetricsParameters p=new FieldMatchMetricsParameters(); + p.setMaxAlternativeSegmentations(1); + FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p); + assertMetrics("matches:4 tail:0 proximity:0.855 absoluteProximity:0.0855","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m); + } + + { // Segments found: a b and c d + FieldMatchMetricsParameters p=new FieldMatchMetricsParameters(); + p.setMaxAlternativeSegmentations(2); + FieldMatchMetricsComputer m=new FieldMatchMetricsComputer(p); + assertMetrics("matches:4 tail:0 proximity:1 absoluteProximity:0.1","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d",false,m); + } + } + + public void testMatch() { + // Ordered by decreasing match score per query + assertMetrics("match:1", "a","a"); + assertMetrics("match:0.9339","a","a x"); + assertMetrics("match:0", "a","x"); + assertMetrics("match:0.9243","a","x a"); + assertMetrics("match:0.9025","a","x a x"); + + assertMetrics("match:1", "a b","a b"); + assertMetrics("match:0.9558","a b","a b x"); + assertMetrics("match:0.9463","a b","x a b"); + assertMetrics("match:0.1296","a b","a x x x x x x x x x x x x x x x x x x x x x x b"); + assertMetrics("match:0.1288","a b","a x x x x x x x x x x x x x x x x x x x x x x x x x x x b"); + + assertMetrics("match:0.8647","a b c","x x a x b x x x x x x x x a b c x x x x x x x x c x x"); + assertMetrics("match:0.861", "a b c","x x a x b x x x x x x x x x x a b c x x x x x x c x x"); + assertMetrics("match:0.4869","a b c","a b x x x x x x x x x x x x x x x x x x x x x x c x x"); + assertMetrics("match:0.4853","a b c","x x a x b x x x x x x x x x x b a c x x x x x x c x x"); + assertMetrics("match:0.3621","a b c","a x b x x x x x x x x x x x x x x x x x x x x x c x x"); + assertMetrics("match:0.3619","a b c","x x a x b x x x x x x x x x x x x x x x x x x x c x x"); + assertMetrics("match:0.3584","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x c"); + assertMetrics("match:0.3474","a b c","x x a x b x x x x x x x x x x x x x x b x x x b x b x"); + assertMetrics("match:0.3421","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x x"); + assertMetrics("match:0.305" ,"a b c","x x a x b:0.7 x x x x x x x x x x x x x x x x x x x x x x"); + assertMetrics("match:0.2927","a b!200 c","x x a x b:0.7 x x x x x x x x x x x x x x x x x x x x x x"); + } + + public void testRepeatedMatch() { + // gap==1 caused by finding two possible segments due to repeated matching + assertMetrics("fieldCompleteness:1 queryCompleteness:0.6667 segments:1 earliness:1 gaps:1", + "pizza hut pizza","pizza hut"); + } + + /** Three segments - improving the score on the first should impact the last */ + public void testNestedAlternatives() { + assertMetrics("segmentStarts:6,19,32 proximity:1", + "a b c d e f", + "a x b x x x a b x x x x x x x x x x x c d x x x x x x x x x x x e f"); + assertMetrics("segmentStarts:6,19,47 proximity:1", + "a b c d e f", + "a x b x x x a b x x x x x x x x x x x c d x x x x x x x x x x x e x f x x x x x x x x x x x x e f"); + } + + /** Nice demonstration of the limitations of this algorithm: Segment end points are determined greedily */ + public void testSegmentationGreedyness() { + assertMetrics("match:0.3717","a b c","a x b x x x x x x x x b c"); + assertMetrics("match:0.4981","a b c","a x z x x x x x x x x b c"); + } + + protected void assertMetrics(String correctSpec, String query, String field) { + assertMetrics(correctSpec, query, field, false); + } + + protected void assertMetrics(String correctSpec, String queryString, String field, int totalTermWeight) { + Query query=toQuery(queryString); + query.setTotalTermWeight(totalTermWeight); + assertMetrics(correctSpec, query, toField(field), false, new FieldMatchMetricsComputer()); + } + + protected void assertMetrics(String correctSpec, String queryString, String field, float totalSignificance) { + Query query=toQuery(queryString); + query.setTotalSignificance(totalSignificance); + assertMetrics(correctSpec, query, toField(field), false, new FieldMatchMetricsComputer()); + } + + protected void assertMetrics(String correctSpec,String query,String field,boolean printTrace) { + assertMetrics(correctSpec,query,field,printTrace,new FieldMatchMetricsComputer()); + } + + protected void assertMetrics(String correctSpec,String query,String field,boolean printTrace,FieldMatchMetricsComputer m) { + assertMetrics(correctSpec, toQuery(query), toField(field), printTrace, m); + } + + protected void assertMetrics(String correctSpec, Query query, Field field, boolean printTrace, FieldMatchMetricsComputer m) { + FieldMatchMetrics metrics = m.compute(query, field, printTrace); + if (printTrace) + System.out.println(metrics.trace()); + + if (printTrace) + System.out.println(metrics.toStringDump()); + + for (String correctValueSpec: correctSpec.split(" ")) { + if (correctValueSpec.trim().equals("")) continue; + String metricName=correctValueSpec.split(":")[0]; + String correctValueString=correctValueSpec.split(":")[1]; + if (metricName.equals("segmentStarts")) { + String[] correctSegmentStarts=correctValueString.split(","); + List segmentStarts=metrics.getSegmentStarts(); + assertEquals("Segment start count",correctSegmentStarts.length,segmentStarts.size()); + for (int i=0; i1) + query[i]=new QueryTerm(colonSplit[1],Float.parseFloat(colonSplit[0])); + else + query[i]=new QueryTerm(colonSplit[0]); + + if (bangSplit.length>1) + query[i].setWeight(Integer.parseInt(bangSplit[1])); + if (percentSplit.length>1) + query[i].setSignificance(Float.parseFloat(percentSplit[1])); + } + return new Query(query); + } + + private Field toField(String fieldString) { + if (fieldString.length() == 0) return new Field(ImmutableList.of()); + + ImmutableList.Builder terms = new ImmutableList.Builder<>(); + for (String termString : fieldString.split(" ")) { + String[] colonSplit = termString.split(":"); + if (colonSplit.length > 1) + terms.add(new Field.Term(colonSplit[0], Float.parseFloat(colonSplit[1]))); + else + terms.add(new Field.Term(colonSplit[0])); + } + return new Field(terms.build()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java new file mode 100755 index 00000000000..7399088ac1c --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/FeatureListTestCase.java @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression; + +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author Simon Thoresen + */ +public class FeatureListTestCase { + + @Test + public void requireThatFeatureListFromStringWorks() throws ParseException { + assertFromString("attribute(foo).out", + Arrays.asList("attribute(foo).out")); + assertFromString("attribute(foo).out attribute ( bar ) . out", + Arrays.asList("attribute(foo).out", "attribute(bar).out")); + assertFromString("foo\n bar\n \t \t \n baz \n", + Arrays.asList("foo", "bar", "baz")); + assertFromString("attribute attribute(foo) attribute(foo).out attribute(bar).out.out", + Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out")); + } + + @Test + public void requireThatFeatureListFromReaderWorks() throws ParseException { + assertFromReader(new StringReader("attribute(foo).out"), + Arrays.asList("attribute(foo).out")); + assertFromReader(new StringReader("attribute(foo).out attribute ( bar ) . out"), + Arrays.asList("attribute(foo).out", "attribute(bar).out")); + assertFromReader(new StringReader("foo\n bar\n \t \t \n baz \n"), + Arrays.asList("foo", "bar", "baz")); + assertFromReader(new StringReader("attribute attribute(foo) attribute(foo).out attribute(bar).out.out"), + Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out")); + } + + @Test + public void requireThatFeatureListFromFileWorks() throws ParseException, FileNotFoundException { + assertFromFile(new File("src/test/files/features01.expression"), + Arrays.asList("attribute(foo).out")); + assertFromFile(new File("src/test/files/features02.expression"), + Arrays.asList("attribute(foo).out", "attribute(bar).out")); + assertFromFile(new File("src/test/files/features03.expression"), + Arrays.asList("foo", "bar", "baz")); + assertFromFile(new File("src/test/files/features04.expression"), + Arrays.asList("attribute", "attribute(foo)", "attribute(foo).out", "attribute(bar).out.out")); + } + + public void assertFromString(String input, List expected) throws ParseException { + assertFeatureList(new FeatureList(input), expected); + } + + public void assertFromReader(Reader input, List expected) throws ParseException { + assertFeatureList(new FeatureList(input), expected); + } + + public void assertFromFile(File input, List expected) throws ParseException, FileNotFoundException { + assertFeatureList(new FeatureList(input), expected); + } + + public void assertFeatureList(FeatureList features, List expected) throws ParseException { + assertEquals(expected.size(), features.size()); + for (int i = 0; i < features.size(); ++i) { + assertTrue(features.get(i) != null); + assertEquals(expected.get(i), features.get(i).toString()); + } + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java new file mode 100755 index 00000000000..24d7c82235c --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/RankingExpressionTestCase.java @@ -0,0 +1,281 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression; + +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.IfNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.FunctionNode; +import junit.framework.TestCase; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.*; + +/** + * @author Simon Thoresen + */ +public class RankingExpressionTestCase extends TestCase { + + public void testParamInFeature() throws ParseException { + assertParse("if (1 > 2, dotProduct(allparentid,query(cate1_parentid)), 2)", + "if ( 1 > 2,\n" + + "dotProduct(allparentid, query(cate1_parentid)),\n" + + "2\n" + + ")"); + } + + public void testDollarShorthand() throws ParseException { + assertParse("query(var1)", " $var1"); + assertParse("query(var1)", " $var1 "); + assertParse("query(var1) + query(var2)", " $var1 + $var2 "); + assertParse("query(var1) + query(var2) - query(var3)", " $var1 + $var2 - $var3 "); + assertParse("query(var1) + query(var2) - query(var3) * query(var4) / query(var5)", " $var1 + $var2 - $var3 * $var4 / $var5 "); + assertParse("(query(var1) + query(var2)) - query(var3) * query(var4) / query(var5)", "($var1 + $var2)- $var3 * $var4 / $var5 "); + assertParse("query(var1) + (query(var2) - query(var3)) * query(var4) / query(var5)", " $var1 +($var2 - $var3)* $var4 / $var5 "); + assertParse("query(var1) + query(var2) - (query(var3) * query(var4)) / query(var5)", " $var1 + $var2 -($var3 * $var4)/ $var5 "); + assertParse("query(var1) + query(var2) - query(var3) * (query(var4) / query(var5))", " $var1 + $var2 - $var3 *($var4 / $var5)"); + assertParse("if (if (f1.out < query(p1), 0, 1) < if (f2.out < query(p2), 0, 1), f3.out, query(p3))", "if(if(f1.out<$p1,0,1) future = exec.submit(new Callable() { + @Override + public Boolean call() { + try { + new RankingExpression("if (fieldMatch(title) < 0.316316, if (now < 1.218627E9, if (now < 1.217667E9, if (now < 1.217244E9, if (rankBoost < 100050.0, 0.1424368, if (match < 0.284921, if (now < 1.217238E9, 0.1528184, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, if (now < 1.217238E9, 0.1, 0.1493261))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))), 0.1646852)), 0.1850886), if (match < 0.308468, if (firstPhase < 5891.5, 0.08424015, 0.1167076), if (rankBoost < 120050.0, 0.111576, 0.1370456))), if (match < 0.31644, 0.1543837, 0.1727403)), if (now < 1.218088E9, if (now < 1.217244E9, if (fieldMatch(metakeywords).significance < 0.1425405, if (match.totalWeight < 450.0, 0.1712793, 0.1632426), 0.1774488), 0.1895567), if (now < 1.218361E9, if (fieldTermMatch(keywords_1).firstPosition < 1.5, 0.1530005, 0.1370894), 0.1790079)))"); + return Boolean.TRUE; + } catch (ParseException e) { + return Boolean.FALSE; + } + } + }); + assertTrue(future.get(60, TimeUnit.SECONDS)); + } + + public void testSelfRecursionScript() throws ParseException { + List macros = new ArrayList<>(); + macros.add(new ExpressionFunction("foo", null, new RankingExpression("foo"))); + + RankingExpression exp = new RankingExpression("foo"); + try { + exp.getRankProperties(macros); + } catch (RuntimeException e) { + assertEquals("Cycle in ranking expression function: [foo[]]", e.getMessage()); + } + } + + public void testMacroCycleScript() throws ParseException { + List macros = new ArrayList<>(); + macros.add(new ExpressionFunction("foo", null, new RankingExpression("bar"))); + macros.add(new ExpressionFunction("bar", null, new RankingExpression("foo"))); + + RankingExpression exp = new RankingExpression("foo"); + try { + exp.getRankProperties(macros); + } catch (RuntimeException e) { + assertEquals("Cycle in ranking expression function: [foo[], bar[]]", e.getMessage()); + } + } + + public void testScript() throws ParseException { + List macros = new ArrayList<>(); + macros.add(new ExpressionFunction("foo", Arrays.asList("arg1", "arg2"), new RankingExpression("min(arg1, pow(arg2, 2))"))); + macros.add(new ExpressionFunction("bar", Arrays.asList("arg1", "arg2"), new RankingExpression("arg1 * arg1 + 2 * arg1 * arg2 + arg2 * arg2"))); + macros.add(new ExpressionFunction("baz", Arrays.asList("arg1", "arg2"), new RankingExpression("foo(1, 2) / bar(arg1, arg2)"))); + macros.add(new ExpressionFunction("cox", null, new RankingExpression("10 + 08 * 1977"))); + + assertScript("foo(1,2) + foo(3,4) * foo(5, foo(foo(6, 7), 8))", macros, + Arrays.asList( + "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) + rankingExpression(foo@af74e3fd9070bd18.a368ed0a5ba3a5d0) * rankingExpression(foo@dbab346efdad5362.e5c39e42ebd91c30)", + "min(5,pow(rankingExpression(foo@d1d1417259cdc651.573bbcd4be18f379),2))", + "min(6,pow(7,2))", + "min(1,pow(2,2))", + "min(3,pow(4,2))", + "min(rankingExpression(foo@84951be88255b0ec.d0303e061b36fab8),pow(8,2))" + )); + assertScript("foo(1, 2) + bar(3, 4)", macros, + Arrays.asList( + "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) + rankingExpression(bar@af74e3fd9070bd18.a368ed0a5ba3a5d0)", + "min(1,pow(2,2))", + "3 * 3 + 2 * 3 * 4 + 4 * 4" + )); + assertScript("baz(1, 2)", macros, + Arrays.asList( + "rankingExpression(baz@e2dc17a89864aed0.12232eb692c6c502)", + "min(1,pow(2,2))", + "rankingExpression(foo@e2dc17a89864aed0.12232eb692c6c502) / rankingExpression(bar@e2dc17a89864aed0.12232eb692c6c502)", + "1 * 1 + 2 * 1 * 2 + 2 * 2" + )); + assertScript("cox", macros, + Arrays.asList( + "rankingExpression(cox)", + "10 + 08 * 1977" + )); + } + + public void testBug3464208() throws ParseException { + List macros = new ArrayList<>(); + macros.add(new ExpressionFunction("log10tweetage", null, new RankingExpression("69"))); + + String lhs = "log10(0.01+attribute(user_followers_count)) * log10(socialratio) * " + + "log10(userage/(0.01+attribute(user_statuses_count)))"; + String rhs = "(log10tweetage * log10tweetage * log10tweetage) + 5.0 * " + + "attribute(ythl)"; + + String expLhs = "log10(0.01 + attribute(user_followers_count)) * log10(socialratio) * " + + "log10(userage / (0.01 + attribute(user_statuses_count)))"; + String expRhs = "(rankingExpression(log10tweetage) * rankingExpression(log10tweetage) * " + + "rankingExpression(log10tweetage)) + 5.0 * attribute(ythl)"; + + assertScript(lhs + " + " + rhs, macros, + Arrays.asList( + expLhs + " + " + expRhs, + "69" + )); + assertScript(lhs + " - " + rhs, macros, + Arrays.asList( + expLhs + " - " + expRhs, + "69" + )); + } + + public void testParse() throws ParseException, IOException { + BufferedReader reader = new BufferedReader(new FileReader("src/tests/rankingexpression/rankingexpressionlist")); + String line; + int lineNumber = 0; + while ((line = reader.readLine()) != null) { + lineNumber++; + if (line.length() == 0 || line.charAt(0) == '#') { + continue; + } + String[] parts = line.split(";"); + // System.out.println("Parsing '" + parts[0].trim() + "'.."); + RankingExpression expression = new RankingExpression(parts[0].trim()); + + String out = expression.toString(); + if (parts.length == 1) { + assertEquals(parts[0].trim(), out); + } else { + boolean ok = false; + String err = "Expression '" + out + "' not present in { "; + for (int i = 1; i < parts.length && !ok; ++i) { + err += "'" + parts[i].trim() + "'"; + if (parts[i].trim().equals(out)) { + ok = true; + } + if (i < parts.length - 1) { + err += ", "; + } + } + err += " }."; + assertTrue("At line " + lineNumber + ": " + err, ok); + } + } + } + + public void testIssue() throws ParseException { + assertEquals("feature.0", new RankingExpression("feature.0").toString()); + assertEquals("if (1 > 2, 3, 4) + feature(arg1).out.out", + new RankingExpression("if ( 1 > 2 , 3 , 4 ) + feature ( arg1 ) . out.out").toString()); + } + + public void testNegativeConstantArgument() throws ParseException { + assertEquals("foo(-1.2)", new RankingExpression("foo(-1.2)").toString()); + } + + public void testNaming() throws ParseException { + RankingExpression test = new RankingExpression("a+b"); + test.setName("test"); + assertEquals("test: a + b", test.toString()); + } + + public void testCondition() throws ParseException { + RankingExpression expression = new RankingExpression("if(1<2,3,4)"); + assertTrue(expression.getRoot() instanceof IfNode); + } + + public void testFileImporting() throws ParseException { + RankingExpression expression = new RankingExpression(new File("src/test/files/simple.expression")); + assertEquals("simple: a + b", expression.toString()); + } + + public void testNonCanonicalLegalStrings() throws ParseException { + assertParse("a * b + c * d", "a* (b) + \nc*d"); + } + + public void testEquality() throws ParseException { + assertEquals(new RankingExpression("if ( attribute(foo)==\"BAR\",log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"), + new RankingExpression("if(attribute(foo)==\"BAR\", log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)")); + + assertFalse(new RankingExpression("if ( attribute(foo)==\"BAR\",log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)").equals( + new RankingExpression("if(attribute(foo)==\"BAR\", log(attribute(popularity)+5),log(fieldMatch(title).earliness) * fieldMatch(title).completeness)"))); + } + + public void testSetMembershipConditions() throws ParseException { + assertEquals(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"), + new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)")); + + assertFalse(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)").equals( + new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).earliness) * fieldMatch(title).completeness)"))); + + assertEquals(new RankingExpression("if ( attribute(foo) in [attribute(category), \"BAR\"],log(attribute(popularity)+5),log(fieldMatch(title).proximity)*fieldMatch(title).completeness)"), + new RankingExpression("if(attribute(foo) in [attribute(category),\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)")); + assertEquals(new RankingExpression("if (GENDER$ in [-1.0, 1.0], 1, 0)"), new RankingExpression("if (GENDER$ in [-1.0, 1.0], 1, 0)")); + } + + public void testComments() throws ParseException { + assertEquals(new RankingExpression("if ( attribute(foo) in [\"FOO\", \"BAR\"],\n" + + "# a comment\n" + + "log(attribute(popularity)+5),log(fieldMatch(title).proximity)*" + + "# a multiline \n" + + " # comment\n" + + "fieldMatch(title).completeness)"), + new RankingExpression("if(attribute(foo) in [\"FOO\",\"BAR\"], log(attribute(popularity)+5),log(fieldMatch(title).proximity) * fieldMatch(title).completeness)")); + } + + public void testIsNan() throws ParseException { + String strExpr = "if (isNan(attribute(foo)) == 1.0, 1.0, attribute(foo))"; + RankingExpression expr = new RankingExpression(strExpr); + CompositeNode root = (CompositeNode)expr.getRoot(); + CompositeNode comparison = (CompositeNode)root.children().get(0); + ExpressionNode isNan = comparison.children().get(0); + assertTrue(isNan instanceof FunctionNode); + assertEquals("isNan(attribute(foo))", isNan.toString()); + } + + protected static void assertParse(String expected, String expression) throws ParseException { + assertEquals(expected, new RankingExpression(expression).toString()); + } + + private void assertScript(String expression, List macros, List expectedScripts) + throws ParseException { + boolean print = false; + if (print) + System.out.println("Parsing expression '" + expression + "'."); + + RankingExpression exp = new RankingExpression(expression); + Map scripts = exp.getRankProperties(macros); + if (print) { + for (String key : scripts.keySet()) { + System.out.println("Script '" + key + "': " + scripts.get(key)); + } + } + + for (Map.Entry m : scripts.entrySet()) + System.out.println(m); + for (int i = 0; i < expectedScripts.size();) { + String val = expectedScripts.get(i++); + assertTrue("Script contains " + val, scripts.containsValue(val)); + } + if (print) + System.out.println(""); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java new file mode 100644 index 00000000000..7690efb1112 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/Benchmark.java @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** + * @author Simon Thoresen + */ +public final class Benchmark { + + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Usage: Benchmark []"); + System.exit(1); + } + int numRuns = 1000; + if (args.length == 2) { + numRuns = Integer.valueOf(args[1]); + } + List res = new ArrayList(); + try { + BufferedReader in = new BufferedReader(new FileReader(args[0])); + StringBuilder str = new StringBuilder(); + String line; + while ((line = in.readLine()) != null) { + str.append(line); + } + String exp = str.toString(); + res.add(evaluateTree(exp, numRuns)); + res.add(evaluateTreeOptimized(exp, numRuns)); + res.add(evaluateForestOptimized(exp, numRuns)); + } catch (IOException e) { + System.out.println("An error occured while reading the content of file '" + args[0] + "': " + e); + System.exit(1); + } catch (ParseException e) { + System.out.println("An error occured while parsing the content of file '" + args[0] + "': " + e); + System.exit(1); + } + for (Result lhs : res) { + for (Result rhs : res) { + if (lhs.res < rhs.res - 1e-6 || lhs.res > rhs.res + 1e-6) { + System.err.println("Evaluation of '" + lhs.name + "' and '" + rhs.name + "' disagree on result; " + + "expected " + lhs.res + ", got " + rhs.res + "."); + System.exit(1); + } + } + System.out.format("%1$-16s : %2$8.04f ms (%3$-6.04f)\n", + lhs.name, lhs.millis, res.get(0).millis / lhs.millis); + } + } + + private static Result evaluateTree(String str, int numRuns) throws ParseException { + Result ret = new Result(); + ret.name = "Unoptimized"; + + RankingExpression exp = new RankingExpression(str); + List vars = new LinkedList(); + getFeatures(exp.getRoot(), vars); + + benchmark(exp, vars, new MapContext(), numRuns, ret); + return ret; + } + + private static Result evaluateTreeOptimized(String str, int numRuns) throws ParseException { + Result ret = new Result(); + ret.name = "Optimized tree"; + + RankingExpression exp = new RankingExpression(str); + List vars = new LinkedList(); + getFeatures(exp.getRoot(), vars); + + ArrayContext ctx = new ArrayContext(exp); + ExpressionOptimizer optimizer = new ExpressionOptimizer(); + optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false); + optimizer.optimize(exp, ctx); + + benchmark(exp, vars, ctx, numRuns, ret); + return ret; + } + + private static Result evaluateForestOptimized(String str, int numRuns) throws ParseException { + Result ret = new Result(); + ret.name = "Optimized forest"; + + RankingExpression exp = new RankingExpression(str); + List vars = new LinkedList(); + getFeatures(exp.getRoot(), vars); + + ArrayContext ctx = new ArrayContext(exp); + ExpressionOptimizer optimizer = new ExpressionOptimizer(); + optimizer.optimize(exp, ctx); + + benchmark(exp, vars, ctx, numRuns, ret); + return ret; + } + + private static void benchmark(RankingExpression exp, List vars, Context ctx, int numRuns, Result out) { + for (int i = 0, len = vars.size(); i < len; ++i) { + ctx.put(vars.get(i), i / (double)len); + } + for (int i = 0; i < numRuns; ++i) { + out.res = exp.evaluate(ctx).asDouble(); + } + long begin = System.nanoTime(); + for (int i = 0; i < numRuns; ++i) { + out.res = exp.evaluate(ctx).asDouble(); + } + long end = System.nanoTime(); + + out.millis = (end - begin) / (1000.0 * 1000.0); + } + + private static void getFeatures(ExpressionNode node, List out) { + if (node instanceof ReferenceNode) { + String feature = ((ReferenceNode)node).getName(); + if (!out.contains(feature)) { + out.add(feature); + } + } else if (node instanceof CompositeNode) { + CompositeNode cNode = (CompositeNode)node; + for (ExpressionNode child : cNode.children()) { + getFeatures(child, out); + } + } + } + + private static class Result { + String name = "anonymous"; + double millis = Double.MAX_VALUE; + double res = 0; + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java new file mode 100644 index 00000000000..708235647e6 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationBenchmark.java @@ -0,0 +1,474 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.io.IOUtils; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +/** + * Two small benchmarks of ranking expression evaluation + * + * @author bratseth + */ +public class EvaluationBenchmark { + + public void run() { + try { + //runNativeComparison(100*1000*1000); + + // benchmark with a large gbdt: Expected tree and forest speedup: 2x, 4x + runGBDT(1000*1000, gbdt); + + // benchmark with a large gbdt using set membership tests (on integers) extensively + // we simplify the attribute name to make it work with the map context implementation. + // Expected tree and forest speedup: 3x, 4x + // runGBDT(100*1000, readFile("src/test/files/ranking07.expression").replace("attribute(catid)","catid")); + } + catch (ParseException e) { + throw new RuntimeException("Benchmarking failed",e); + } + } + + private String readFile(String file) { + try { + return IOUtils.readFile(new File(file)); + } catch (IOException e) { + throw new AssertionError(e); + } + } + + public void runNativeComparison(int iterations) { + oul("Running native expression..."); + MapContext arguments=new MapContext(); + arguments.put("one",1d); + + out(" warming up..."); + double nativeTotal=0; + for (int i=0; i35) { + if (context.get("i").asDouble()>context.get("one").asDouble()) { + if (context.get("i").asDouble()>=670) + r=4; + else + r=8; + } + else { + if (context.get("i").asDouble()>8000) + r=5; + else + r=3; + } + } + else { + if (context.get("i").asDouble()==478) + r=90; + else + r=91; + } + return r*10; + } + + private void runGBDT(int iterations, String gbdtString) throws ParseException { + + // Unoptimized............... + double total = benchmark(new RankingExpression(gbdtString), new MapContext(), iterations, "Unoptimized"); + System.out.println("-----------------------------------------------------------------------------------------------------"); + + // Tree optimized................... + RankingExpression treeOptimized = new RankingExpression(gbdtString); + ArrayContext treeContext = new ArrayContext(treeOptimized, true); + ExpressionOptimizer optimizer = new ExpressionOptimizer(); + optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false); + System.out.print("Tree optimizing ... "); + OptimizationReport treeOptimizationReport = optimizer.optimize(treeOptimized, treeContext); + System.out.println("done"); + System.out.println(treeOptimizationReport); + double treeTotal = benchmark(treeOptimized, treeContext, iterations, "Tree optimized"); + assertEqualish(total, treeTotal); + System.out.println("-----------------------------------------------------------------------------------------------------"); + + // Forest optimized................... + RankingExpression forestOptimized=new RankingExpression(gbdtString); + DoubleOnlyArrayContext forestContext = new DoubleOnlyArrayContext(forestOptimized, true); + System.out.print("Forest optimizing ... "); + OptimizationReport forestOptimizationReport=new ExpressionOptimizer().optimize(forestOptimized, forestContext); + System.out.println("done"); + System.out.println(forestOptimizationReport); + double forestTotal=benchmark(forestOptimized,forestContext,iterations,"Forest optimized"); + assertEqualish(total,forestTotal); + System.out.println("-----------------------------------------------------------------------------------------------------"); + } + + private double benchmark(RankingExpression gbdt, Context context, int iterations, String description) { + oul("Running '" + description + "':"); + out(" Warming up ..."); + double total=0; + total+=benchmarkIterations(gbdt,context,iterations/5); + oul("done"); + + out(" Running " + iterations + " of '" + description + "' ..."); + long tStartTime=System.currentTimeMillis(); + total+=benchmarkIterations(gbdt,context,iterations); + long totalTime=System.currentTimeMillis()-tStartTime; + oul("done"); + oul(" Total time running '" + description + "': " + totalTime + " ms (" + totalTime*1000/iterations + " microseconds/expression)"); + return total; + } + + private double benchmarkIterations(RankingExpression gbdt, Context contextPrototype, int iterations) { + // This tries to simulate realistic use: The array context can be reused for a series of evaluations in a thread + // but each evaluation binds a new set of values. + double total=0; + Context context = copyForEvaluation(contextPrototype); + for (int i=0; i= Math.abs((a+b)/100000000) ) + throw new RuntimeException("Expected value " + a + " but optimized evaluation produced " + b); + } + + private final String gbdt = + "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS < 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.72503, -0.00239817, if (LW_USERS < 0.0977572, if (ISABSTRACT_AVG < 0.04, 0.041602, 0.0157381), if (LW_USERS < 0.602112, 0.0118004, 7.92829E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.53348, -0.00227065, if (LW_USERS < 0.0613667, 0.0345214, if (NUM_WORDS < 1.5, -9.25274E-4, if (BIDDED_SEARCHES < 0.538873, 0.0207086, 0.00549622)))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.50465, -0.00206609, if (LW_USERS < 0.183424, if (NUM_WORDS < 1.5, 0.00203703, if (BIDDED_SEARCHES < 0.0686975, 0.0412142, 0.0219894)), 0.00246537)) + \n" + + "if (NEWS_USERS < 0.0737993, -0.00298889, if (LW_USERS < 0.212577, if (NUM_WORDS < 1.5, 0.00385669, 0.0260773), if (NUM_WORDS < 1.5, -0.00141889, 0.00565858))) + \n" + + "if (NEWS_USERS < 0.0737993, -0.0026984, if (BIDDED_SEARCHES < 0.202548, if (NUM_WORDS < 1.5, 0.00356601, 0.026572), if (SUGG_OVERLAP < 34.5, 0.00642933, -8.83847E-4))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 8.47575, if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.0031992, if (ISTITLE_AVG < 0.315, 0.0106735, 1.98748E-4)), 0.00717291), 0.0216488) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.79697, if (NEWS_CTR < 0.659695, -0.0018297, 0.0062345), if (BIDDED_SEARCHES < 0.148816, if (NUM_WORDS < 1.5, 0.00397494, 0.0282706), 0.00287526)) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.81978, if (NUM_WORDS < 2.5, -0.00183825, 0.00447334), if (SUGG_OVERLAP < 8.5, if (SEARCHES < 0.0692601, 0.0319928, 0.0121653), 0.0010403)) + \n" + + "if (NEWS_CTR < 0.660025, if (PREV_DAY_NEWS_CTR_RATIO < 0.502543, if (SEARCHES < 0.245402, 0.0193446, 9.09694E-4), -0.00160176), if (NEWS_MAIN_SEARCHES_RATIO < 1.64873, 0.00264489, 0.0177375)) + \n" + + "if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.00238821, if (LW_USERS < 0.0143922, 0.0188957, 8.0445E-4)), if (LW_NEWS_SEARCHES_RATIO < 1.32846, 0.00349568, 0.015966)) + \n" + + "if (NUM_WORDS < 2.5, if (NEWS_USERS < 0.0737993, -0.002169, if (ISTITLE_AVG < 0.625, 0.00906748, -2.5122E-4)), if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.69164, 0.0039487, 0.0174816)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 8.66642, if (NUM_WORDS < 2.5, -8.59968E-4, if (NEWS_CTR < 0.632914, 0.00287223, 0.0148924)), if (SEARCHES < 0.0237478, 0.033539, 0.0071663)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 1.26315, -0.00130179, if (NEWS_CTR < 0.628621, if (PREV_DAY_NEWS_CTR_RATIO < 0.525166, if (SUGG_OVERLAP < 9.5, 0.0171556, 2.36297E-4), 2.29746E-4), 0.0123793)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 1.88252, if (NEWS_USERS < 0.0737993, -0.00207461, 6.60118E-4), if (NEWS_USERS < 0.0737993, 9.39125E-4, if (SEARCHES < 0.0248661, 0.0272446, 0.00973038))) + \n" + + "if (NUM_WORDS < 1.5, -0.0018842, if (NEWS_USERS < 0.0737993, -5.44658E-4, if (PREV_DAY_USERS < 0.43141, if (PREV_DAY_NEWS_CTR < 0.447268, 4.25375E-4, 0.0152695), 0.00230817))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 2.6946, -7.37738E-4, if (NEWS_CTR < 0.618656, if (PREV_DAY_NEWS_CTR_RATIO < 0.522617, if (ISTITLE_AVG < 0.21, 0.0202984, 0.00221158), 8.26792E-4), 0.0131518)) + \n" + + "if (NUM_WORDS < 3.5, if (NEWS_CTR < 0.660239, if (PREV_DAY_NEWS_CTR_RATIO < 0.505308, 0.00214801, -0.00113168), if (NEWS_MAIN_SEARCHES_RATIO < 0.9266, 1.28813E-4, 0.0090932)), 0.0111807) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 1.27238, -9.46325E-4, if (NEWS_USERS < 0.0737993, 2.20417E-4, if (ISTITLE_AVG < 0.435, 0.0143694, if (MIN_SCORE < 243538.0, 1.76879E-4, 0.00682761)))) + \n" + + "if (NUM_WORDS < 3.5, if (NUM_WORDS < 1.5, -0.00153422, if (NEWS_USERS < 0.0737993, -6.54983E-4, if (PREV_DAY_NEWS_CTR < 0.55636, -4.40154E-4, 0.00666305))), 0.00961529) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 1.88316, -6.18023E-4, if (NEWS_USERS < 0.0737993, if (NUM_WORDS < 2.5, -4.22107E-4, 0.00583448), if (SEARCHES < 0.0202227, 0.0218746, 0.0061446))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 1.91611, if (NEWS_MAIN_SEARCHES_RATIO < 0.384315, -0.0015553, 2.57266E-4), if (NEWS_CTR < 0.659281, if (NUM_WORDS < 2.5, 2.40504E-4, 0.00572176), 0.0105389)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 2.68704, -5.65225E-4, if (NEWS_CTR < 0.782417, if (PREV_DAY_NEWS_CTR_RATIO < 0.990517, if (NEWS_SEARCHES < 0.339382, 0.0135414, 0.00113811), 5.21526E-4), 0.0112535)) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, 0.00560086, if (NUM_WORDS < 1.5, -0.00130462, if (NEWS_USERS < 0.0737993, -7.52446E-4, if (BIDDED_SEARCHES < 1.29452, 0.00626868, 1.75195E-4)))) + \n" + + "if (NUM_WORDS < 3.5, if (NUM_WORDS < 1.5, -0.00114958, if (NEWS_USERS < 0.0737993, -5.00434E-4, if (PREV_DAY_NEWS_CTR < 0.563721, -6.96671E-4, 0.00517722))), 0.00807433) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 0.382901, -0.00122923, if (NEWS_USERS < 0.0737993, -4.15058E-4, if (ISABSTRACT_AVG < 0.095, if (PREV_DAY_NEWS_CTR < 0.557042, 8.71338E-4, 0.00994663), 1.56446E-4))) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, if (MAX_SCORE < 379805.0, 0.00362486, 0.0132902), if (NEWS_CTR < 0.913345, -3.53901E-4, if (NEWS_USERS < 2.48409, 0.00191813, 0.013908))) + \n" + + "if (HAS_NEWS_QC == 0.0, if (NUM_WORDS < 3.5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.90333, -6.26897E-4, if (ISTITLE_AVG < 0.355, 0.00723851, -2.62543E-5)), 0.0058211), 0.00433763) + \n" + + "if (NUM_WORDS < 2.5, if (NEWS_USERS < 2.28805, -5.10768E-4, 0.00255996), if (LW_MAIN_SEARCHES_RATIO < 1.84597, 3.31329E-4, if (DAY_WEEK_AVG_RATIO < 2.655, 0.00434755, 0.0196317))) + \n" + + "if (HAS_NEWS_QC == 0.0, if (BIDDED_SEARCHES < 0.0119577, if (PREV_DAY_NEWS_CTR_RATIO < 0.928266, 0.0111871, 0.00198432), -3.24627E-4), if (NEWS_MAIN_SEARCHES_RATIO < 2.71304, 0.00196875, 0.00945297)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 1.82872, -4.20354E-4, if (DAY_PD_HITS_RATIO < 3.61, if (NEWS_MAIN_SEARCHES_RATIO < 12.766, 7.51735E-4, if (LW_NEWS_SEARCHES_RATIO < 6.15807, 0.0147332, -0.0135118)), 0.010677)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 0.327632, -0.00102446, if (NEWS_USERS < 0.0737993, -3.80041E-4, if (ISABSTRACT_AVG < 0.105, if (NEWS_SEARCHES < 0.286926, 0.00928139, 0.00265099), 8.96147E-5))) + \n" + + "if (ALGO_CTR < 1.05585, if (HAS_NEWS_QC == 0.0, -4.34462E-4, 0.00319487), if (PREV_DAY_NEWS_CTR_RATIO < 0.541632, if (DAY_PD_HITS_RATIO < 5.75, 0.00845667, 0.0571546), 0.00162096)) + \n" + + "if (NUM_WORDS < 3.5, if (LW_NEWS_CTR < 0.59494, -3.29593E-4, if (NEWS_MAIN_SEARCHES_RATIO < 1.24936, 3.83584E-4, if (MAX_SCORE < 263568.0, 0.00219784, 0.0104741))), 0.00532617) + \n" + + "if (NUM_WORDS < 3.5, if (MAX_SCORE < 268176.0, -5.00757E-4, if (NEWS_MAIN_SEARCHES_RATIO < 0.812821, -3.72572E-4, if (NEWS_CTR < 0.898792, 0.0017999, 0.00908918))), 0.00538528) + \n" + + "if (ISTITLE_AVG < 0.705, if (NEWS_USERS < 0.0737993, 2.51012E-5, if (BIDDED_SEARCHES < 1.61095, if (YSM_N_ALGO_CTR_RATIO < 6.42257E-4, 0.0804317, 0.00586482), -4.26664E-4)), -4.79119E-4) + \n" + + "if (NUM_WORDS < 3.5, if (HAS_NEWS_QC == 0.0, -1.93562E-4, if (LW_MAIN_SEARCHES_RATIO < 1.72448, 0.00109732, 0.00738421)), if (NEWS_MAIN_SEARCHES_RATIO < 0.406201, -0.00263026, 0.00733129)) + \n" + + "if (BIDDED_SEARCHES < 0.0120163, 0.00278665, if (NEWS_USERS < 2.75198, -3.22197E-4, if (NEWS_MAIN_CTR_RATIO < 1.4679, 0.00148229, if (PREV_DAY_USERS < 0.117185, 0.0517723, 0.010204)))) + \n" + + "if (LW_NEWS_CTR < 0.597955, if (SUGG_OVERLAP < 0.5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.79767, 6.24799E-4, 0.0051004), -5.51886E-4), if (NEWS_MAIN_SEARCHES_RATIO < 0.660064, 2.21724E-4, 0.00474931)) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, 0.0030367, if (NEWS_USERS < 2.65484, -3.02764E-4, if (LW_MAIN_SEARCHES_RATIO < 1.39539, 6.36888E-4, if (NEWS_MAIN_CTR_RATIO < 2.18629, 0.00661051, 0.0228632)))) + \n" + + "if (LW_NEWS_CTR < 0.619817, if (LW_USERS < 0.0143922, 0.0012313, -4.11044E-4), if (NEWS_MAIN_SEARCHES_RATIO < 1.63866, 6.94464E-4, if (LW_MAIN_SEARCHES_RATIO < 2.79335, 0.00448877, 0.0171177))) + \n" + + "if (HAS_NEWS_QC == 0.0, if (ALGO_CTR < 1.1644, -2.80479E-4, 0.002092), if (NUM_WORDS < 2.5, 9.21741E-4, if (LW_MAIN_CTR_RATIO < 0.771928, 0.018042, 0.00519068))) + \n" + + "if (MAX_SCORE < 270938.0, -3.72001E-4, if (NEWS_MAIN_SEARCHES_RATIO < 0.382818, -8.43057E-4, if (NEWS_USERS < 0.0737993, 2.74749E-4, if (ISABSTRACT_AVG < 0.355, 0.00699732, 9.68093E-4)))) + \n" + + "if (NEWS_CTR < 0.187967, -0.00236148, if (LW_NEWS_CTR_RATIO < 0.501045, if (ISABSTRACT_AVG < 0.065, if (USERS < 0.79806, 0.00751647, 5.67897E-4), -1.95953E-4), -1.28664E-4)) + \n" + + "if (NEWS_CTR < 0.916156, if (NEWS_CTR < 0.131787, -0.00260812, -2.96076E-6), if (LW_MAIN_SEARCHES_RATIO < 1.7079, if (LW_NEWS_CTR < 0.827357, -0.00103106, 0.00752405), 0.00712343)) + \n" + + "if (ALGO_CTR < 1.11796, -9.56953E-5, if (LW_NEWS_CTR_RATIO < 0.965768, if (PREV_DAY_NEWS_CTR_RATIO < 0.318964, -0.0068748, if (DAY_PD_HITS_RATIO < 5.9, 0.00781228, 0.0430918)), 0.0010225)) + \n" + + "if (ISTITLE_AVG < 0.785, if (PREV_DAY_NEWS_CTR_RATIO < 0.937235, if (BIDDED_SEARCHES < 0.549316, 0.00782989, 5.1726E-4), if (LW_MAIN_SEARCHES_RATIO < 14.3819, -7.98452E-5, 0.00931358)), -3.44667E-4) + \n" + + "if (NUM_WORDS < 4.5, if (HAS_NEWS_QC == 0.0, -1.1162E-4, if (LW_NEWS_CTR < 0.625492, 0.00137801, if (NEWS_MAIN_SEARCHES_RATIO < 3.2392, 0.00481811, 0.0203582))), 0.00957663) + \n" + + "if (NUM_WORDS < 4.5, if (NEWS_MAIN_SEARCHES_RATIO < 12.878, -7.973E-5, if (SUGG_LW < 0.5, 0.0113112, if (PREV_DAY_NEWS_USERS < 1.63248, -0.0093633, 0.0081117))), 0.00891687) + \n" + + "if (NEWS_CTR < 0.260948, -0.00146919, if (PREV_DAY_NEWS_CTR_RATIO < 0.949304, if (NEWS_MAIN_SEARCHES_RATIO < 0.305788, -5.28063E-4, if (MIN_SCORE < 199600.0, 8.23835E-4, 0.00533948)), -1.59293E-4)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 0.116451, -0.00113111, if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, if (NEWS_SEARCHES < 0.30129, if (ISTITLE_AVG < 0.61, 0.00769846, 0.00162987), -2.39796E-4), -1.20795E-4)) + \n" + + "if (NEWS_USERS < 2.75198, -1.04934E-4, if (NEWS_CTR < 0.504788, -3.87773E-4, if (BIDDED_SEARCHES < 3.77166, if (LW_MAIN_SEARCHES_RATIO < 1.76307, 0.00639344, 0.0180493), 0.00240808))) + \n" + + "if (NUM_WORDS < 4.5, if (LW_NEWS_CTR < 0.789202, -2.11327E-4, if (NEWS_USERS < 0.312345, -4.52231E-4, if (SCIENCE < 0.535, 0.00367411, 0.0491292))), 0.00847389) + \n" + + "if (NEWS_CTR < 0.182514, -0.00177053, if (LW_NEWS_CTR_RATIO < 0.501045, if (USERS < 1.36009, if (MIN_SCORE < 187234.0, 3.6643E-4, 0.0055156), -0.0011557), -8.54842E-5)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.32584, if (NEWS_CTR < 1.19657, 0.00362961, if (PREV_DAY_NEWS_CTR_RATIO < 2.37995, if (NEWS_MAIN_SEARCHES_RATIO < 2.07684, 0.0176304, 0.0773353), 0.00489339)), -2.00322E-5) + \n" + + "if (AVG_SCORE < 354962.0, -1.53495E-4, if (NEWS_CTR < 0.596437, if (LW_SEARCHES < 0.0532569, 0.00410978, -0.00116517), if (LW_MAIN_CTR_RATIO < 0.779754, 0.0149197, 0.00348209))) + \n" + + "if (PREV_DAY_NEWS_USERS < 14.0861, if (BIDDED_SEARCHES < 3.24749, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.63285, -8.28682E-5, if (NEWS_SEARCHES < 0.317829, 0.00348768, -6.08623E-4)), -0.00114994), 0.00458862) + \n" + + "if (ISABSTRACT_AVG < 0.295, if (NEWS_USERS < 0.0737993, -1.36945E-4, if (MIN_SCORE < 233429.0, 2.59393E-5, if (NEWS_MAIN_SEARCHES_RATIO < 0.221135, -7.57098E-4, 0.00463699))), -4.62083E-4) + \n" + + "if (ALGO_CTR < 1.01522, -1.09825E-4, if (LW_NEWS_CTR_RATIO < 0.55285, if (LW_MAIN_SEARCHES_RATIO < 5.11061, if (NEWS_SEARCHES < 1.02345, 0.00847552, -0.00437523), -0.0112885), 6.61898E-4)) + \n" + + "if (NEWS_USERS < 4.05804, if (LW_NEWS_SEARCHES_RATIO < 6.67644, -1.03466E-5, if (USERS < 0.101853, -0.0245653, -0.00297792)), if (NEWS_MAIN_CTR_RATIO < 1.09325, 6.6298E-4, 0.00723109)) + \n" + + "if (NUM_WORDS < 4.5, if (LW_NEWS_USERS < 31.8516, -4.91517E-5, 0.00701562), if (ALGO_CLICKS < 0.012133, 0.020461, if (DAY_WEEK_AVG_RATIO < 2.93, 8.3867E-4, 0.0326788))) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.9286, if (NEWS_MAIN_SEARCHES_RATIO < 60.9048, 6.59836E-5, 0.0391173), if (NEWS_USERS < 0.223578, -0.0109831, if (NEWS_MAIN_SEARCHES_RATIO < 36.1125, -9.18296E-4, -0.0321067))) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.92945, if (NEWS_MAIN_SEARCHES_RATIO < 12.878, 3.89745E-5, if (PREV_DAY_NEWS_CTR < 0.537022, -0.00162034, 0.0079279)), if (NEWS_USERS < 0.245347, -0.0101132, -0.00126814)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.480833, if (NEWS_USERS < 0.0737993, 9.57273E-5, if (SUGG_LW < 12.5, if (PUB_TODAY_AVG < 0.355, 0.0161319, -0.00334364), 0.00260343)), -7.52983E-5) + \n" + + "if (PREV_DAY_NEWS_USERS < 38.5221, if (BIDDED_SEARCHES < 3.7973, if (PREV_DAY_NEWS_CTR_RATIO < 0.999247, if (ISABSTRACT_AVG < 0.075, 0.00272842, -3.86777E-5), -1.51219E-4), -0.00100249), 0.00670928) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.77887, 9.37848E-5, if (NEWS_USERS < 0.245347, if (SEARCHES < 0.013024, if (ENTERTAINMENT_QC == 0.0, 0.0110759, 0.0905384), -0.00681271), -6.6913E-4)) + \n" + + "if (NEWS_CTR < 0.916322, if (LW_NEWS_SEARCHES_RATIO < 5.23703, 2.81507E-5, if (SEARCHES < 0.233024, -0.0177547, -0.00220902)), if (NEWS_USERS < 2.30165, 0.00110318, 0.00810944)) + \n" + + "if (HAS_NEWS_QC == 0.0, -1.08882E-4, if (MAX_SCORE < 137730.0, if (ALGO_CTR < 0.489733, 0.0199541, 0.0026349), if (NEWS_USERS < 2.20454, -3.16208E-4, 0.00699663))) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, if (LW_NEWS_USERS < 1.81124, 0.00173624, if (PREV_DAY_USERS < 1.36892, 0.0405308, -0.00100716)), if (NEWS_MAIN_SEARCHES_RATIO < 58.9771, -1.26569E-4, 0.0286363)) + \n" + + "if (LW_NEWS_CTR < 0.621598, -1.10247E-4, if (LW_MAIN_SEARCHES_RATIO < 0.317173, 0.0110308, if (ALGO_CTR < 1.26031, 9.13964E-4, if (ALGO_CTR < 1.27034, 0.0667268, 0.00722662)))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 25.7554, -6.12962E-6, if (LW_NEWS_SEARCHES < 0.765878, if (DAY_WEEK_AVG_RATIO < 1.475, if (PREV_DAY_NEWS_SEARCHES < 0.285188, 0.00389095, -0.0350617), -0.0440429), -7.44561E-4)) + \n" + + "if (DAY_PD_HITS_RATIO < 16.25, -5.78971E-5, if (INTLNEWS < 0.235, if (BIDDED_SEARCHES < 0.401931, if (PREV_DAY_MAIN_CTR_RATIO < 0.852642, 0.00517, 0.0517763), 0.00726245), 0.00172079)) + \n" + + "if (DAY_PD_HITS_RATIO < 18.89, -9.58573E-5, if (NEWS_MAIN_CTR_RATIO < 4.42646, if (LW_MAIN_SEARCHES_RATIO < 1.64955, -0.00540243, if (PREV_DAY_CTR < 0.823034, 0.0147119, -0.00456252)), 0.0476969)) + \n" + + "if (LW_CTR < 1.01377, -9.34648E-5, if (NEWS_USERS < 0.0737993, -6.338E-5, if (MIN_SCORE < 376483.0, 0.00251265, if (LW_MAIN_SEARCHES_RATIO < 0.683623, 0.0350855, 0.00794114)))) + \n" + + "if (ISABSTRACT_AVG < 0.315, if (NEWS_USERS < 0.0737993, -1.37636E-4, if (LW_MAIN_SEARCHES_RATIO < 0.661526, if (SUGG_LW < 3.5, 0.0168399, 0.00323338), 9.73973E-4)), -4.12741E-4) + \n" + + "if (LW_CTR < 1.01683, -1.32017E-4, if (LW_NEWS_CTR_RATIO < 0.500058, if (SCIENCE < 0.55, 0.0039965, 0.0428649), if (NEWS_CTR < 0.594088, 3.24961E-6, 0.00367602))) + \n" + + "if (LW_NEWS_CTR < 0.856244, -1.10246E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 10.6833, if (LW_MAIN_SEARCHES_RATIO < 0.31726, if (LW_NEWS_CTR_RATIO < 1.23633, 0.00906872, 0.0473513), 0.00134361), 0.041372)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 6.69974, -1.86907E-5, if (NEWS_MAIN_CTR_RATIO < 1.46029, if (LW_NEWS_SEARCHES_RATIO < 6.53657, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.316051, 0.0332713, 0.00117973), -0.010984), 0.00761193)) + \n" + + "if (NEWS_CTR < 0.237839, if (USERS < 0.0168938, 0.0267063, if (LW_USERS < 0.0827926, if (PREV_DAY_NEWS_CTR < 1.08233, -0.0138873, 0.0330313), -8.56477E-4)), 1.37177E-4) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 7.02911, 5.45191E-5, if (USERS < 0.118739, -0.0243638, if (NEWS_MAIN_CTR_RATIO < 1.63574, if (SEARCHES < 0.478602, -0.0123115, -0.00225071), 0.0054502))) + \n" + + "if (BIDDED_SEARCHES < 3.7973, if (NEWS_USERS < 2.20454, 8.53898E-5, if (NEWS_MAIN_CTR_RATIO < 1.9298, 0.00163898, if (SUGG_OVERLAP < 34.0, 0.0222897, 0.00356636))), -8.81981E-4) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, if (MIN_SCORE < 253612.0, -5.12189E-4, if (MAX_MIN_SCORE < 35925.0, 0.00252377, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.610935, 0.0432434, 0.00906418))), -1.01198E-4) + \n" + + "if (DAY_PD_HITS_RATIO < 24.585, if (ALGO_CTR < 3.15833, -2.12884E-5, 0.0175937), if (PREV_DAY_CTR < 0.824546, if (LW_NEWS_CTR < 0.651434, 0.011673, 0.0567104), -0.00676867)) + \n" + + "if (LW_CTR < 1.551, if (LW_NEWS_USERS < 3.59178, -1.29153E-4, if (SUGG_LW < 46.5, 0.00702818, 2.27956E-4)), if (NEWS_MAIN_SEARCHES_RATIO < 8.86382, 0.0028952, 0.0366156)) + \n" + + "if (DAY_PD_HITS_RATIO < 18.89, -5.51307E-6, if (YSM_CTR < 0.0178362, if (ALGO_CLICKS < 0.127132, 0.0471277, if (SUGG_TW < 0.975545, 0.0048341, 0.0335537)), -0.00344397)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 8.21211, -5.10935E-5, if (DAY_WEEK_AVG_RATIO < 1.205, -4.84709E-4, if (NEWS_MAIN_SEARCHES_RATIO < 2.63328, if (LW_NEWS_SEARCHES_RATIO < 1.83743, 0.0125448, -0.00162932), 0.0144536))) + \n" + + "if (ALGO_CTR < 1.01463, -1.17159E-4, if (PREV_DAY_NEWS_CTR_RATIO < 0.780396, if (USERS < 0.614133, if (MAX_MIN_SCORE < 54869.8, 0.00624085, 0.0337856), 7.62548E-4), 3.62126E-4)) + \n" + + "if (NUM_WORDS < 3.5, -1.00136E-5, if (PREV_DAY_NEWS_CTR_RATIO < 0.958905, if (PREV_DAY_USERS < 0.377834, if (YSM_N_ALGO_CTR_RATIO < 0.189731, 0.0259994, -0.0142924), 4.37294E-4), 9.62911E-4)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 92.7164, if (LW_NEWS_CTR < 0.822371, -4.99393E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 13.0501, if (NEWS_USERS < 0.309237, -8.38369E-4, 0.00312145), 0.043612)), -0.00674822) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.51597, 1.01649E-4, if (SEARCHES < 0.0202227, if (PREV_DAY_MAIN_CTR_RATIO < 1.20113, 0.00953861, 0.0583575), if (USERS < 0.295073, -0.00536031, -4.99861E-4))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.146655, 0.00684325, if (LW_CTR < 1.43439, -5.31424E-5, if (NEWS_MAIN_SEARCHES_RATIO < 11.7367, if (PREV_DAY_NEWS_CTR_RATIO < 0.541013, 0.0101571, 0.0013804), 0.0362471))) + \n" + + "if (LW_NEWS_SEARCHES < 5.77429, -9.91104E-5, if (NEWS_CTR < 1.71804, if (SUGG_OVERLAP < 32.5, if (HAS_NEWS_QC == 0.0, 0.00333027, 0.0179206), 4.42358E-4), 0.0445137)) + \n" + + "if (ISABSTRACT_AVG < 0.435, if (NEWS_USERS < 0.158915, -2.22842E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, 0.00311367, if (USERS < 0.119577, -0.00919024, 7.29693E-4))), -3.98811E-4) + \n" + + "if (ALGO_CLICKS < 4.04596, if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.452288, 3.21367E-5, -0.00726485), if (LOCAL_QC == 1.0, -0.00144797, 0.00132603)), -9.1988E-4) + \n" + + "if (NEWS_CTR < 0.25921, -8.87978E-4, if (PREV_DAY_NEWS_CTR_RATIO < 0.530395, if (USERS < 0.710459, if (MAX_MIN_SCORE < 758.5, 0.00626933, 9.79114E-4), -3.43207E-4), -7.62231E-5)) + \n" + + "if (SUGG_TW < 0.0623373, if (LW_NEWS_SEARCHES_RATIO < 6.68433, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.89603, 1.96789E-4, if (LW_MAIN_SEARCHES_RATIO < 0.719144, 0.013244, 0.00182593)), -0.00570262), -2.16189E-4) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.07246, if (NEWS_MAIN_SEARCHES_RATIO < 53.2676, 8.99313E-5, 0.0338743), if (LW_SEARCHES < 0.216881, -0.00282376, if (PREV_DAY_SEARCHES < 0.0712414, 0.0484119, -3.84987E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.51974, 9.68801E-5, if (ALGO_CTR < 1.86978, if (LW_USERS < 0.0798854, if (NEWS_MAIN_CTR_RATIO < 0.42837, -0.0141747, -0.00244278), -4.47252E-4), 0.0201717)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 10.0121, -1.42949E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.47714, 9.66134E-4, if (BIDDED_SEARCHES < 0.0585926, if (WEEKAVG < 0.36, 0.00997522, 0.0530748), 0.00387354))) + \n" + + "if (SUGG_TW < 0.984769, -3.34988E-5, if (PREV_DAY_NEWS_CTR < 1.13129, 0.0013372, if (BUSINESS < 0.05, 0.00681273, if (LOCAL_QC == 0.0, 0.0221056, 0.13305)))) + \n" + + "if (LW_CTR < 1.63323, -1.51312E-5, if (LW_NEWS_SEARCHES_RATIO < 1.28425, 0.00114219, if (ELECTRONICS_QC == 0.0, if (PREV_DAY_MAIN_CTR_RATIO < 0.530832, 0.0312363, 0.00679683), 0.0640472))) + \n" + + "if (PREV_DAY_NEWS_USERS < 4.25111, -4.70532E-5, if (PREV_DAY_MAIN_CTR_RATIO < 2.58573, if (YSM_NCTR < 0.00660392, if (NEWS_MAIN_SEARCHES_RATIO < 1.27373, -1.99449E-4, 0.00625635), -5.22971E-4), 0.0405083)) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 377.799, if (LW_NEWS_SEARCHES_RATIO < 6.67644, 1.17654E-5, if (PUB_TODAY_AVG < 0.0050, -0.00565339, if (NATIONALNEWS < 0.55, 2.61588E-4, 0.0318784))), 0.0238311) + \n" + + "if (PREV_DAY_CTR < 1.16424, -7.76883E-5, if (LW_NEWS_SEARCHES_RATIO < 8.68994, 0.00182771, if (NEWS_SEARCHES < 7.1215, -0.013084, if (NEWS_MAIN_SEARCHES_RATIO < 3.58161, -0.00835768, 0.0377434)))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 26.7481, 4.45294E-5, if (LW_NEWS_SEARCHES_RATIO < 1.57387, if (LW_NEWS_SEARCHES_RATIO < 1.3782, if (LW_CTR < 0.34851, 0.0177335, -0.00964832), 0.024959), -0.016879)) + \n" + + "if (LOCAL_QC == 1.0, if (NEWS_USERS < 0.0737993, 1.57459E-4, if (ISTITLE_AVG < 0.515, -0.00580773, if (PREV_DAY_MAIN_SEARCHES_RATIO < 4.81114, -0.00140636, 0.0204618))), 1.02083E-4) + \n" + + "if (HAS_NEWS_QC == 0.0, -3.53931E-5, if (ALGO_CTR < 0.5969, if (MIN_SCORE < 30200.0, if (NEWS_CTR < 0.713517, 0.0124535, 0.049838), 0.00304798), -2.6664E-4)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (NEWS_MAIN_SEARCHES_RATIO < 46.9165, if (PREV_DAY_NEWS_SEARCHES_RATIO < 48.6166, -8.91528E-6, 0.0156096), -0.0194015), if (INTLNEWS < 0.275, 0.0391563, 2.94525E-5)) + \n" + + "if (ALGO_CTR < 3.09161, if (NEWS_MAIN_SEARCHES_RATIO < 71.6642, -7.16141E-5, 0.0245016), if (NEWS_MAIN_SEARCHES_RATIO < 5.48496, if (ELECTRONICS_QC == 0.0, 3.80175E-4, 0.134021), 0.0467547)) + \n" + + "if (LOCAL_QC == 1.0, if (PREV_DAY_NEWS_CTR_RATIO < 0.55814, if (LW_USERS < 0.179284, -0.0110475, -0.00187986), if (LW_NEWS_SEARCHES_RATIO < 11.9839, -4.62166E-4, 0.0120886)), 4.16986E-5) + \n" + + "if (LW_NEWS_USERS < 48.703, if (LW_MAIN_SEARCHES_RATIO < 104.672, -1.11529E-5, if (PUB_TODAY_AVG < 0.645, -0.0109524, if (LW_MAIN_CTR_RATIO < 0.820426, 0.0173264, -0.00598908))), 0.00642443) + \n" + + "if (NEWS_USERS < 26.8033, if (USERS < 2.70898, if (NEWS_USERS < 0.212247, if (NEWS_SEARCHES < 0.312345, 1.94111E-5, -0.00494194), 9.66727E-4), -7.27397E-4), 0.00366377) + \n" + + "if (PREV_DAY_NEWS_CTR_RATIO < 0.948678, if (ISTITLE_AVG < 0.565, if (PREV_DAY_MAIN_CTR_RATIO < 1.53864, 0.00145357, if (YSM_N_ALGO_CTR_RATIO < 0.00279164, 0.053982, 0.0096231)), 1.01252E-4), -9.24301E-5) + \n" + + "if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, 5.03044E-4, if (LW_MAIN_SEARCHES_RATIO < 11.8351, -2.19647E-4, if (DAY_WEEK_AVG_RATIO < 2.785, 0.00174311, if (ISABSTRACT_AVG < 0.73, 0.020265, -0.00658421)))) + \n" + + "if (SUGG_OVERLAP < 0.5, if (BIDDED_SEARCHES < 0.00581527, if (SUGG_LW < 8.5, 0.00316453, if (ELECTRONICS_QC == 0.0, 0.0240488, 0.285332)), 2.9583E-4), -1.0113E-4) + \n" + + "if (ALGO_CTR < 1.15516, -9.02219E-5, if (LW_NEWS_CTR_RATIO < 0.131516, 0.0416615, if (NEWS_CTR < 0.841155, 5.45051E-4, if (ALGO_CLICKS < 0.0703111, 0.0508979, 0.00584922)))) + \n" + + "if (ENTERTAINMENT < 0.305, if (ALGO_CTR < 1.53687, -1.42467E-4, if (PREV_DAY_NEWS_SEARCHES_RATIO < 2.43692, 0.00172748, if (LW_NEWS_CTR_RATIO < 1.09767, 0.0382724, 3.85821E-4))), 9.95127E-4) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.61514, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.904, -6.72591E-5, if (USERS < 1.06349, 0.00243637, -8.96343E-4)), if (NEWS_USERS < 0.179867, -0.00813249, -0.0012514)) + \n" + + "if (PREV_DAY_NEWS_USERS < 13.0067, -3.50928E-5, if (PREV_DAY_NEWS_CTR < 0.714421, 7.97227E-4, if (USERS < 3.56693, if (YSM_NCTR < 0.036612, 0.0297616, -0.00692722), 0.00476212))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.51803, 5.8313E-5, if (PREV_DAY_MAIN_CTR_RATIO < 2.34354, -0.00134957, if (LW_USERS < 0.0410895, if (AVG_SCORE < 284173.0, 0.046743, 0.00519612), 2.52E-4))) + \n" + + "if (YSM_CTR < 0.106731, -1.71864E-4, if (NEWS_MAIN_SEARCHES_RATIO < 9.26668, 5.48603E-4, if (USERS < 0.0145216, if (MAX_SCORE < 273414.0, 0.0139875, -0.0068697), -0.00914662))) + \n" + + "if (LW_CTR < 2.10467, if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.452288, -4.92949E-5, -0.00633483), 4.52239E-4), if (MAX_MIN_SCORE < 36225.0, 0.00340485, 0.0295635)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 27.2801, -3.29416E-5, if (LW_NEWS_USERS < 0.516988, -0.0205183, if (AVG_RANK < 9.5, -0.00354209, if (POLITICS_QC == 0.0, 0.0108605, 0.0656188)))) + \n" + + "if (LW_NEWS_CTR_RATIO < 0.130813, if (LW_USERS < 0.0675101, -0.0246242, -0.00263751), if (LW_NEWS_CTR_RATIO < 0.132702, 0.0418786, if (LW_MAIN_SEARCHES_RATIO < 0.4981, 0.0014675, -1.14374E-5))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 110.393, if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.725, -1.42702E-6, if (NEWS_MAIN_SEARCHES_RATIO < 3.54764, if (DAY_PD_HITS_RATIO < 5.165, -0.00858847, 0.0288169), 0.0673668)), -0.00630045) + \n" + + "if (SUGG_TW < 0.0905167, if (LW_NEWS_SEARCHES_RATIO < 5.38735, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.82076, 1.83185E-4, if (PREV_DAY_USERS < 0.729889, 0.00456522, -6.55502E-4)), -0.00337268), -1.85203E-4) + \n" + + "if (SUGG_TW < 0.985223, -7.3888E-5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.131265, 0.0410781, if (NEWS_USERS < 0.688593, 6.1809E-4, if (NEWS_USERS < 0.999268, 0.0215243, 0.00200864)))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.164771, if (NEWS_CTR < 0.581094, 0.001345, if (NEWS_MAIN_SEARCHES_RATIO < 4.47209, 0.00479447, 0.0485025)), if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.759041, 6.85213E-4, -1.09858E-4)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.480853, if (NEWS_USERS < 0.0737993, -2.87273E-4, if (SUGG_TW < 0.0811122, if (PREV_DAY_NEWS_SEARCHES_RATIO < 3.02247, 0.00952516, 0.0353053), 0.00133144)), 4.44055E-6) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 10.0035, 1.25006E-5, if (NEWS_CTR < 0.530131, if (SEARCHES < 0.261805, if (ENTERTAINMENT_QC == 0.0, -0.00352081, 0.040869), -0.0108829), 0.00398639)) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.21358, -1.96787E-4, if (ALGO_CTR < 3.09691, if (NEWS_SEARCHES < 0.542027, 8.12659E-4, if (NEWS_USERS < 0.193619, -0.00715108, -2.37342E-4)), 0.0334561)) + \n" + + "if (LW_NEWS_CTR < 0.598979, -7.53961E-5, if (PREV_DAY_CTR < 1.09221, if (SCIENCE < 0.55, 4.60354E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 1.34224, 0.00248627, 0.0781891)), 0.00584066)) + \n" + + "if (PREV_DAY_NEWS_USERS < 27.8368, if (BIDDED_SEARCHES < 5.85314, 6.36817E-5, -9.02941E-4), if (PREV_DAY_NEWS_CTR < 0.773569, 0.00156477, if (SEARCHES < 4.08125, 0.0385004, 0.00774733))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 109.125, if (DAY_PD_HITS_RATIO < 18.75, -2.83223E-5, if (YSM_CTR < 0.0174757, if (PREV_DAY_USERS < 0.117185, 0.0405892, 0.0056735), -0.00114963)), -0.00632407) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 67.6563, if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.127, 3.92781E-5, if (NEWS_CTR < 0.920256, -0.00180273, if (AVG_RANK < 8.9, 0.0401539, -0.0106621))), 0.0199772) + \n" + + "if (YSM_CTR < 0.0299671, if (PREV_DAY_NEWS_USERS < 1.57751, if (NEWS_MAIN_SEARCHES_RATIO < 9.09345, -3.3303E-4, if (PREV_DAY_HITS < 0.5, -0.0132526, 0.00213574)), 0.00135158), 2.86346E-4) + \n" + + "if (HAS_NEWS_QC == 0.0, -4.23535E-5, if (NUM_WORDS < 2.5, 1.89805E-4, if (MAX_MIN_SCORE < 34177.2, 0.00313415, if (MAX_MIN_SCORE < 38154.2, 0.0393482, 0.00649343)))) + \n" + + "if (NUM_WORDS < 4.5, -1.5123E-5, if (NEWS_MAIN_CTR_RATIO < 0.333385, 0.0256599, if (MIN_SCORE < 261595.0, if (LOCAL_QC == 0.0, 0.0096315, 0.0775677), -8.03435E-4))) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 4.7637, 4.65171E-5, if (SEARCHES < 0.273091, if (PREV_DAY_NEWS_SEARCHES_RATIO < 11.1511, if (PREV_DAY_NEWS_CTR_RATIO < 1.59426, -0.0164486, -6.06353E-4), 0.0148189), -7.27497E-4)) + \n" + + "if (NEWS_USERS < 1.78862, -9.34176E-5, if (NEWS_CTR < 0.503725, -3.05424E-4, if (SUGG_LW < 92.5, if (ALGO_CTR < 0.866144, 0.00709828, 2.29334E-4), -2.83122E-4))) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 31.9116, 1.62331E-5, if (SEARCHES < 0.437086, 0.05257, if (NEWS_MAIN_CTR_RATIO < 1.327, -0.00681457, if (PUB_TODAY_AVG < 0.365, -0.00897547, 0.0268691)))) + \n" + + "if (DAY_PD_HITS_RATIO < 79.5, if (BUSINESS < 0.195, 1.77773E-4, if (LW_MAIN_SEARCHES_RATIO < 59.4737, if (LW_MAIN_SEARCHES_RATIO < 50.3613, -3.2627E-4, 0.0335433), -0.0156041)), -0.0188673) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 20.9996, 2.5689E-5, if (PREV_DAY_MAIN_CTR_RATIO < 0.692676, if (DAY_WEEK_AVG_RATIO < 2.95, -0.00554275, 0.0235987), if (LW_NEWS_SEARCHES_RATIO < 1.70492, 0.00485286, -0.0165676))) + \n" + + "if (PREV_DAY_NEWS_CTR < 0.239879, if (NEWS_MAIN_SEARCHES_RATIO < 7.73296, -5.33314E-4, -0.00970318), if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.312345, 3.91472E-5, -0.00407912), 6.05168E-4)) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 2.51675, 1.51048E-5, if (LW_USERS < 0.228893, if (PREV_DAY_MAIN_CTR_RATIO < 3.25636, -0.00414565, if (PREV_DAY_USERS < 0.085979, -0.00665102, 0.0314653)), -1.93031E-4)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 11.633, -3.54622E-5, if (NEWS_CTR < 0.341241, if (SEARCHES < 0.263899, if (DUDE < 0.121324, 0.0225604, -0.023524), -0.0147208), 0.0032981)) + \n" + + "if (DAY_WEEK_AVG_RATIO < 14.225, if (YSM_CTR < 0.0816637, -2.14327E-4, if (LW_USERS < 3.05964, if (NEWS_USERS < 0.0737993, 2.02599E-5, 0.00173562), -7.80059E-4)), 0.0180608) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.743695, if (PREV_DAY_NEWS_CTR < 0.855411, 3.7646E-4, if (PREV_DAY_CTR < 1.27851, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.633963, 0.0103505, 0.0012006), -0.0123434)), -4.6606E-5) + \n" + + "if (DAY_WEEK_AVG_RATIO < 14.225, if (DAY_WEEK_AVG_RATIO < 6.985, -1.74518E-7, if (NEWS_USERS < 0.0737993, 0.00502863, -0.00831447)), if (LW_MAIN_SEARCHES_RATIO < 3.92095, -0.00421267, 0.0344091)) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.413706, if (PREV_DAY_NEWS_CTR < 0.874719, if (LW_MAIN_SEARCHES_RATIO < 11.2056, 0.00144004, -0.014018), if (ELECTRONICS_QC == 0.0, 0.0123258, 0.119451)), -3.38696E-5) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.77722, 4.48781E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, if (SUGG_TW < 0.054069, 0.00321739, -5.83152E-4), if (SEARCHES < 0.120921, -0.0131926, -0.0011303))) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 6.83205, 1.51597E-5, if (LW_CTR < 0.831409, -7.90698E-4, if (USERS < 0.249336, -0.0239581, if (PREV_DAY_MAIN_CTR_RATIO < 0.604761, 0.0179905, -0.00651038)))) + \n" + + "if (SUGG_TW < 0.967398, -6.92965E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 0.135403, 0.0310525, if (DUDE < 0.567766, if (SPORTS < 0.73, 0.00175153, 0.00962597), -0.00111687))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 8.3318, 4.08033E-5, if (NEWS_CTR < 0.445404, if (USERS < 0.179365, if (TOPSTORY < 0.155, -0.00102, 0.0450773), -0.0100005), 0.00289212)) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.18832, -1.47618E-4, if (ISABSTRACT_AVG < 0.03, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.21666, if (LW_MAIN_SEARCHES_RATIO < 0.747696, 0.024334, 0.00586202), 8.905E-4), -1.50915E-4)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 108.679, -8.69379E-6, if (PREV_DAY_CTR < 1.00967, if (USERS < 1.24806, -0.0260056, if (NATIONALNEWS < 0.225, -0.00279946, 0.0163558)), -0.0151386)) + \n" + + "if (LW_CTR < 0.968595, -9.48294E-5, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.904, 2.43976E-4, if (ISTITLE_AVG < 0.37, if (WEEKAVG < 3.5, 0.00702639, 0.0745663), 8.32579E-4))) + \n" + + "if (POLITICS < 0.145, if (PREV_DAY_NEWS_CTR_RATIO < 0.999206, 4.19546E-4, -1.41595E-4), if (LW_NEWS_SEARCHES_RATIO < 2.08879, -0.00110749, if (PREV_DAY_SEARCHES < 0.108517, -0.0335177, -0.00494023))) + \n" + + "if (ENTERTAINMENT < 0.315, -5.15746E-5, if (NEWS_MAIN_SEARCHES_RATIO < 8.67009, 6.82081E-4, if (SUGG_TW < 0.0215705, if (NEWS_MAIN_SEARCHES_RATIO < 10.1884, 0.0604503, 0.0100963), -0.00450777))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 66.4948, if (NUM_WORDS < 4.5, -1.49344E-5, if (DAY_WEEK_AVG_RATIO < 0.885, -0.00241279, if (NEWS_MAIN_SEARCHES_RATIO < 0.440079, -0.00883573, 0.0129644))), 0.0201648) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 168.382, if (NEWS_USERS < 1.4626, -9.7013E-5, 6.89976E-4), if (NEWS_MAIN_CTR_RATIO < 0.919145, -0.0160583, if (PREV_DAY_NEWS_SEARCHES_RATIO < 42.1812, -0.00586574, 0.0127268))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 69.7876, if (LW_CTR < 2.10467, -2.15592E-5, if (PREV_DAY_NEWS_CTR < 0.64905, if (SUGG_LW < 1.5, 0.011141, -0.00207262), 0.0231231)), -0.01887) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 28.9579, -1.03443E-5, if (NATIONALNEWS < 0.315, -0.00440798, if (ISTITLE_AVG < 0.685, if (SEARCHES < 0.599257, 0.0596869, 0.0120629), -0.0170673))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 25.791, 1.18225E-5, if (DAY_LW_DAY_HITS_RATIO < 1.96, if (POLITICS_QC == 0.0, if (AVG_SCORE < 377554.0, -0.00736807, 0.0228459), 0.0215737), -0.0192701)) + \n" + + "if (BIDDED_SEARCHES < 0.00581527, if (MAX_SCORE < 304693.0, -1.37144E-4, if (WEEKAVG < 0.325, 9.10176E-4, if (NEWS_USERS < 0.737974, 0.0207108, -0.0108051))), -4.27638E-5) + \n" + + "if (NATIONALNEWS < 0.215, -4.53121E-5, if (LW_NEWS_CTR < 1.22057, 6.18227E-4, if (NEWS_MAIN_SEARCHES_RATIO < 4.57054, if (MIN_SCORE < 241439.0, -0.00108969, 0.0178961), 0.0489683))) + \n" + + "if (LOCAL_QC == 1.0, if (LW_NEWS_CTR_RATIO < 0.592627, if (LW_SEARCHES < 0.101433, -0.0152231, if (PREV_DAY_USERS < 0.0833142, 0.0217818, -0.00211607)), -3.0503E-4), 7.1333E-5) + \n" + + "if (PREV_DAY_CTR < 1.27104, -1.52119E-5, if (DAY_PD_HITS_RATIO < 4.25, if (LW_NEWS_CTR_RATIO < 0.659092, if (PREV_DAY_NEWS_CTR_RATIO < 0.316981, -0.00815248, 0.00978334), 3.99397E-5), 0.0164301)) + \n" + + "if (YSM_CTR < 0.0209264, if (PREV_DAY_NEWS_USERS < 1.61612, if (NEWS_MAIN_SEARCHES_RATIO < 2.83652, -2.92569E-4, if (USERS < 0.0435647, -0.0269406, -0.00312742)), 0.00154452), 2.03401E-4) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 29.1019, -4.51494E-5, if (NATIONALNEWS < 0.58, if (NEWS_USERS < 0.0737993, 0.00750159, -0.00562872), if (YSM_NCTR < 0.0734117, -0.00843834, 0.0454542))) + \n" + + "if (PREV_DAY_NEWS_SEARCHES < 127.689, if (NEWS_MAIN_SEARCHES_RATIO < 20.0978, -5.5152E-5, if (ALGO_CTR < 1.31726, if (SPORTS < 0.55, -0.00726806, 0.0277824), 0.0380699)), 0.00603891) + \n" + + "if (NEWS_MAIN_CTR_RATIO < 0.118028, if (MIN_SCORE < 208142.0, 3.24283E-4, if (PREV_DAY_USERS < 0.364978, if (NEWS_SEARCHES < 0.405894, -0.00227332, -0.0219515), -0.00291436)), 7.25864E-5) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (DAY_PD_HITS_RATIO < 20.625, 2.84167E-5, if (PREV_DAY_CTR < 0.822381, 0.00954114, -0.00760958)), if (POLITICS_QC == 0.0, 0.00278641, 0.063001)) + \n" + + "if (MIN_RANK < 5.0, if (ALGO_CTR < 1.02929, 1.47687E-4, if (LW_NEWS_CTR_RATIO < 0.131516, 0.0453262, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.88827, 7.66045E-4, 0.00609536))), -2.16222E-4) + \n" + + "if (MIN_SCORE < 730226.0, if (MAX_SCORE < 633968.0, 1.63517E-5, if (ISTITLE_AVG < 0.27, if (SUPERDUPER_AVG < 0.11, 0.00752064, 0.0466919), 3.46353E-4)), -0.0132458) + \n" + + "if (HAS_NEWS_QC == 0.0, -4.23514E-5, if (ALGO_CLICKS < 0.0117185, if (SUGG_OVERLAP < 0.5, if (MIN_RANK < 3.0, 0.0232483, 0.00659311), -0.00265598), 4.52324E-4)) + \n" + + "if (LW_SEARCHES < 0.189865, if (LW_NEWS_SEARCHES_RATIO < 2.08206, if (NEWS_MAIN_SEARCHES_RATIO < 38.5995, -1.13521E-4, 0.04933), if (LW_NEWS_CTR < 0.873417, -0.00342462, 0.0176586)), 1.50845E-4) + \n" + + "if (ALGO_CTR < 0.298421, -5.53583E-4, if (LW_MAIN_CTR_RATIO < 0.511342, if (NEWS_MAIN_SEARCHES_RATIO < 8.48751, if (NUM_WORDS < 2.5, -1.33754E-5, 0.00448769), 0.00836265), -6.6781E-6)) + \n" + + "if (BIDDED_SEARCHES < 2.76167, if (LW_NEWS_CTR_RATIO < 0.932077, if (LW_MAIN_SEARCHES_RATIO < 5.40525, if (SUGG_OVERLAP < 16.5, 0.00258433, 2.00886E-4), -0.00529792), -8.74994E-5), -5.14235E-4) + \n" + + "if (NEWS_CTR < 4.12971, if (NEWS_MAIN_SEARCHES_RATIO < 8.67009, 1.65575E-5, if (ALGO_CTR < 0.691525, if (PREV_DAY_NEWS_CTR_RATIO < 0.0958437, 0.0404329, 1.86524E-4), -0.00627032)), 0.0202226) + \n" + + "if (DAY_PD_HITS_RATIO < 79.5, if (DAY_PD_HITS_RATIO < 6.655, -3.12406E-5, if (USERS < 0.0351556, if (DAY_WEEK_AVG_RATIO < 3.82, 0.045008, 0.00842323), 0.00104995)), -0.0188449) + \n" + + "if (NEWS_CTR < 4.12578, if (PREV_DAY_NEWS_USERS < 14.0861, -2.21302E-5, if (LW_USERS < 0.956063, if (NEWS_USERS < 14.236, 0.0231446, 1.33354E-4), 0.00122231)), 0.0216971) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 11.8368, -5.34645E-5, if (TOPSTORY < 0.39, 9.98011E-4, if (DUDE < 0.0709353, 0.00678128, if (DUDE < 1.97964, 0.0552834, -0.00176926)))) + \n" + + "if (AVG_RANK < 8.325, 1.49825E-4, if (LW_NEWS_SEARCHES_RATIO < 1.73427, -6.98057E-5, if (LW_MAIN_SEARCHES_RATIO < 0.662255, if (ALGO_CTR < 1.04359, 0.001813, 0.0309613), -0.00160574))) + \n" + + "if (NEWS_USERS < 1.4626, if (NEWS_SEARCHES < 1.12712, 1.58784E-5, -0.00187785), if (ALGO_CTR < 1.42277, 7.02003E-4, if (ALGO_CLICKS < 3.32631, 0.048031, -0.003279))) + \n" + + "if (POLITICS < 0.24, if (NEWS_MAIN_SEARCHES_RATIO < 35.8437, 5.18183E-5, if (POLITICS_QC == 0.0, -0.0123086, 0.0199861)), if (NEWS_USERS < 0.0737993, -6.26382E-4, -0.00549246)) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 71.6642, if (NEWS_MAIN_SEARCHES_RATIO < 15.8372, 6.81273E-7, if (AVG_SCORE < 363153.0, -0.00465733, if (ALGO_CTR < 0.695395, 0.0265484, -0.00829536))), 0.025954) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 11.8213, -4.37253E-6, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.405918, -0.0124363, if (NEWS_MAIN_SEARCHES_RATIO < 1.5753, if (LW_NEWS_SEARCHES_RATIO < 1.76106, 0.00532177, -0.00463922), 0.00671844))) + \n" + + "if (SUGG_TW < 0.999491, -2.8076E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.216777, -0.0226515, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.268164, 0.0268326, if (LW_NEWS_SEARCHES < 3.78249, 0.00120701, 0.0234219)))) + \n" + + "if (LIFESTYLE < 0.26, -7.02645E-5, if (LW_NEWS_SEARCHES_RATIO < 1.51723, 2.96639E-4, if (USERS < 0.723483, if (LW_MAIN_SEARCHES_RATIO < 1.39783, 0.016844, -0.00205856), 4.40619E-4))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 65.0244, if (BIDDED_SEARCHES < 7.80003, if (NEWS_USERS < 1.78862, 3.10706E-5, if (NEWS_CTR < 1.16946, 0.00108557, 0.00916809)), -7.32103E-4), -0.0167254) + \n" + + "if (MIN_SCORE < 706927.0, if (NEWS_MAIN_SEARCHES_RATIO < 34.6252, 2.64245E-7, if (PREV_DAY_USERS < 0.0768826, -0.018824, if (MAX_SCORE < 266368.0, -0.00725209, 0.0196337))), -0.0123073) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 6.50524, 1.03884E-5, if (PREV_DAY_MAIN_CTR_RATIO < 3.78226, if (SUGG_LW < 20.5, -0.00464095, if (LW_MAIN_SEARCHES_RATIO < 1.80191, -0.00156348, 0.00548999)), 0.0381174)) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.759041, if (PREV_DAY_NEWS_CTR < 0.863395, 2.90623E-4, if (PREV_DAY_NEWS_CTR < 0.888089, if (ELECTRONICS_QC == 0.0, 0.0295237, 0.234098), 0.00434771)), -1.29379E-4) + \n" + + "if (NUM_WORDS < 4.5, -1.45879E-5, if (LW_MAIN_SEARCHES_RATIO < 0.500824, 0.0301862, if (PREV_DAY_CTR < 0.774918, -0.00862254, if (NEWS_MAIN_CTR_RATIO < 0.379925, 0.0202843, 0.00289057)))) + \n" + + "if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.368987, -2.11848E-5, if (SUGG_OVERLAP < 27.5, if (SUGG_LW < 0.5, -1.33621E-4, -0.0115821), -9.66948E-4)), 4.18664E-4) + \n" + + "if (PREV_DAY_NEWS_CTR < 2.62668, if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, 2.95226E-5, if (POLITICS_QC == 0.0, 0.00292095, 0.0790473)), if (NEWS_MAIN_SEARCHES_RATIO < 1.59073, -0.00436975, -0.0244164)) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 7.02911, 6.98898E-5, if (USERS < 0.407094, if (PREV_DAY_CTR < 0.374015, if (SUGG_OVERLAP < 2.5, 0.0472886, -0.00535839), -0.0102684), -3.2528E-4)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 29.7446, 3.85782E-5, if (LW_NEWS_SEARCHES_RATIO < 1.21834, 0.0126136, if (NEWS_SEARCHES < 0.476744, -0.0168822, if (NEWS_SEARCHES < 0.555398, 0.020656, -0.00228292)))) + \n" + + "if (NEWS_USERS < 0.223578, -1.43802E-4, if (PREV_DAY_NEWS_USERS < 0.150071, if (BIDDED_SEARCHES < 0.867205, if (YSM_CTR < 0.297271, 0.00337525, 0.0192626), 3.30042E-4), -5.02865E-5)) + \n" + + "if (DAY_PD_HITS_RATIO < 24.585, -3.60812E-5, if (LW_NEWS_CTR < 0.657508, if (NEWS_SEARCHES < 0.504412, if (PREV_DAY_MAIN_CTR_RATIO < 1.04252, -0.00553346, 0.0408869), -0.0021064), 0.0343193)) + \n" + + "if (LW_NEWS_USERS < 53.7995, if (DAY_LW_DAY_HITS_RATIO < 32.5, -3.9933E-5, 0.0108883), if (LW_CTR < 0.355044, if (PREV_DAY_HITS < 50.0, 0.00394348, 0.0296827), 0.0024317)) + \n" + + "if (MAX_SCORE < 533059.0, if (NUM_WORDS < 4.5, 5.09566E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.45664, if (USERS < 0.0408067, 0.0119362, -0.00850073), 0.0335548)), -0.00280973) + \n" + + "if (ENTERTAINMENT < 0.385, -7.00557E-5, if (LW_NEWS_CTR_RATIO < 0.0964147, 0.0237102, if (DAY_LW_DAY_HITS_RATIO < 0.32, if (PREV_DAY_NEWS_CTR_RATIO < 0.357356, 0.0317688, 0.00391061), 4.78597E-4))) + \n" + + "if (YSM_CTR < 0.0466685, if (LW_NEWS_SEARCHES_RATIO < 1.77997, -3.31782E-5, if (USERS < 0.0159575, -0.0241302, if (LW_SEARCHES < 0.10844, -0.00372346, -5.03317E-4))), 2.60865E-4) + \n" + + "if (LW_NEWS_CTR_RATIO < 0.983428, if (PREV_DAY_MAIN_CTR_RATIO < 1.22783, 3.17497E-5, if (BIDDED_SEARCHES < 0.0585926, if (ALGO_CTR < 0.685543, 0.0286017, 0.00458196), 0.00141704)), -1.42453E-4) + \n" + + "if (DAY_WEEK_AVG_RATIO < 14.465, if (LW_NEWS_SEARCHES_RATIO < 2.48779, 5.43157E-5, if (LW_USERS < 0.0728421, if (LW_SEARCHES < 0.0462011, -0.00122403, -0.0108271), -1.06335E-4)), 0.0132433) + \n" + + "if (POLITICS < 0.24, if (BIDDED_SEARCHES < 7.93667, if (NEWS_MAIN_CTR_RATIO < 0.0682826, -0.003825, 8.27348E-5), -8.14933E-4), if (NEWS_MAIN_SEARCHES_RATIO < 13.1906, -0.00193925, -0.0207467)) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 31.9116, -2.99465E-5, if (SEARCHES < 0.43648, 0.044537, if (LW_MAIN_CTR_RATIO < 1.03393, 0.00821746, if (PREV_DAY_NEWS_CTR_RATIO < 1.72904, 9.38031E-5, -0.0300209)))) + \n" + + "if (ALGO_CLICKS < 3.95494, if (LW_NEWS_USERS < 3.59178, 9.55661E-5, if (PREV_DAY_MAIN_SEARCHES_RATIO < 1.45041, if (SUGG_OVERLAP < 19.5, 0.00838423, 2.60441E-4), 0.0134882)), -5.72155E-4) + \n" + + "if (LW_MAIN_CTR_RATIO < 0.44235, -8.26482E-4, if (LW_MAIN_CTR_RATIO < 0.572619, if (HAS_NEWS_QC == 0.0, 6.82192E-4, if (MIN_RANK < 3.0, 0.0197753, 0.00309895)), -2.28104E-5)) + \n" + + "if (PREV_DAY_NEWS_SEARCHES_RATIO < 1.82043, -4.7561E-5, if (MAX_MIN_SCORE < 99.25, if (PREV_DAY_MAIN_CTR_RATIO < 1.23087, 0.00100234, if (LW_NEWS_CTR < 1.55817, 0.00609794, 0.0443022)), -2.12393E-4)) + \n" + + "if (PREV_DAY_CTR < 1.25038, -5.38648E-6, if (DAY_WEEK_AVG_RATIO < 3.56, 9.65443E-4, if (ISABSTRACT_AVG < 0.17, if (INTLNEWS < 0.185, 0.0537142, 0.00762876), -0.00561531))) + \n" + + "if (NEWS_USERS < 2.76691, -1.93104E-5, if (DUDE < 1.53391, if (PREV_DAY_CTR < 0.546773, if (PREV_DAY_MAIN_CTR_RATIO < 0.500807, 0.0447824, 0.00516532), 0.00151797), -8.85898E-4)) + \n" + + "if (LW_NEWS_CTR < 1.82543, if (NUM_WORDS < 4.5, -9.66944E-6, if (ISTITLE_AVG < 0.225, if (NEWS_MAIN_CTR_RATIO < 0.784694, 0.00125644, 0.0174436), -0.00293329)), -0.00426485) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 32.8094, -6.29023E-5, if (MAX_MIN_SCORE < 29576.2, if (LW_MAIN_SEARCHES_RATIO < 10.4119, 0.0302981, -0.00444216), if (ALGO_CLICKS < 0.867944, 0.0333789, 0.00409533))) + \n" + + "if (LW_CTR < 1.02078, -3.94933E-5, if (LW_NEWS_CTR_RATIO < 0.144121, if (NEWS_CTR < 0.1349, -6.3538E-4, 0.0621699), if (NEWS_USERS < 0.158915, -3.31917E-5, 0.00191984))) + \n" + + "if (LW_USERS < 0.154237, if (SUGG_OVERLAP < 0.5, if (PREV_DAY_NEWS_CTR < 1.792, if (PREV_DAY_NEWS_CTR_RATIO < 0.922571, 0.00370056, -1.34817E-5), -0.0231586), -7.99336E-4), 1.05768E-4) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 38.9947, if (LW_MAIN_SEARCHES_RATIO < 0.855833, 4.19968E-4, -1.20742E-4), if (POLITICS_QC == 0.0, if (MAX_MIN_SCORE < 18453.8, 0.0234502, -0.00779192), 0.0350307)) + \n" + + "if (INTLNEWS < 0.575, 4.64842E-5, if (NEWS_CTR < 0.642032, -1.79661E-4, if (LW_USERS < 0.254706, if (PREV_DAY_MAIN_SEARCHES_RATIO < 13.2143, -0.0124018, 0.0123916), -0.00150583))) + \n" + + "if (LOCAL_QC == 1.0, if (NEWS_USERS < 0.0737993, 2.98545E-4, if (ALGO_CTR < 1.47845, if (ISTITLE_AVG < 0.515, -0.00529041, -0.00103532), 0.018429)), 6.65685E-5) + \n" + + "if (NEWS_USERS < 0.223578, if (NEWS_SEARCHES < 0.258474, -3.069E-6, if (SEARCHES < 0.0136022, 0.0176599, if (MIN_RANK < 1.0, -0.0134598, -0.00250337))), 3.60727E-4) + \n" + + "if (DAY_WEEK_AVG_RATIO < 11.945, if (LW_NEWS_USERS < 48.4961, -4.7398E-5, if (ALGO_CLICKS < 1.87723, 0.0178789, 0.00240131)), if (LW_NEWS_CTR_RATIO < 1.85865, -0.0164323, 0.00680461)) + \n" + + "if (PREV_DAY_NEWS_CTR_RATIO < 0.216219, if (PREV_DAY_USERS < 0.0840105, if (LW_NEWS_SEARCHES_RATIO < 1.96746, 0.00527106, -0.0232653), if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.533542, 0.0105354, -0.00147978)), 1.96032E-5) + \n" + + "if (LW_CTR < 2.10467, -2.34181E-5, if (ENTERTAINMENT < 0.055, if (LW_MAIN_CTR_RATIO < 0.545111, -0.00186289, if (NEWS_MAIN_SEARCHES_RATIO < 1.20214, -4.63276E-4, 0.0235053)), 0.0256623)) + \n" + + "if (NATIONALNEWS < 0.215, -9.95943E-5, if (LW_NEWS_CTR < 1.26504, 5.92433E-4, if (NEWS_MAIN_SEARCHES_RATIO < 4.23613, if (YSM_CTR < 0.00285117, 0.0324961, 0.00265865), 0.0435376))) + \n" + + "if (YSM_CTR < 0.0395997, if (LW_NEWS_SEARCHES_RATIO < 1.73969, if (LW_MAIN_SEARCHES_RATIO < 20.0256, -7.93453E-5, if (ALGO_CTR < 1.07219, 0.00480972, 0.0310903)), -9.97104E-4), 2.33769E-4) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.761868, if (PREV_DAY_NEWS_CTR_RATIO < 0.980053, if (AVG_SCORE < 400606.0, 0.00185437, if (LW_SEARCHES < 0.918927, 0.0198896, -0.0028412)), 6.98434E-6), -6.12361E-5) + \n" + + "if (NEWS_CTR < 1.31473, -1.47001E-5, if (LW_MAIN_SEARCHES_RATIO < 0.392613, if (PREV_DAY_NEWS_CTR < 0.855967, 0.00656145, 0.0531645), if (LW_MAIN_CTR_RATIO < 0.6064, 0.0105008, 2.09583E-4))) + \n" + + "if (ALGO_CTR < 3.3716, if (NEWS_CTR < 2.18598, 1.88911E-5, if (LW_NEWS_SEARCHES_RATIO < 2.3084, 8.27546E-4, -0.0121609)), if (NEWS_MAIN_SEARCHES_RATIO < 5.23803, -0.00440315, 0.0413186)) + \n" + + "if (DAY_PD_HITS_RATIO < 79.5, if (NEWS_MAIN_SEARCHES_RATIO < 72.49, if (PREV_DAY_NEWS_SEARCHES_RATIO < 11.2643, 3.08728E-5, if (DAY_HITS < 40.5, -0.00439766, 0.0200117)), -0.0203816), -0.0250875) + \n" + + "if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.422088, if (LW_MAIN_SEARCHES_RATIO < 15.8595, if (PREV_DAY_NEWS_CTR < 0.567401, 5.47179E-4, 0.0106651), if (BIDDED_SEARCHES < 0.77063, -0.0254713, 0.00761775)), -4.33956E-5) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 8.67546, -1.7051E-6, if (SEARCHES < 0.0118739, 0.0135766, if (ALGO_CTR < 0.697676, -2.48861E-4, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.480888, -0.0243516, -0.00457767)))) + \n" + + "if (LOCAL_QC == 1.0, if (NEWS_CTR < 0.346638, if (LW_USERS < 0.160728, if (POLITICS_QC == 0.0, -0.0103889, 0.0692409), -0.00150749), 1.47672E-4), 7.59478E-5) + \n" + + "if (BIDDED_SEARCHES < 7.79863, if (NEWS_USERS < 0.212247, -7.88991E-5, if (AVG_SCORE < 176423.0, -2.66651E-4, if (LW_MAIN_SEARCHES_RATIO < 0.662064, 0.00587605, 7.70017E-4))), -6.54564E-4) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 27.2801, 3.4831E-5, if (BIDDED_SEARCHES < 0.0164564, if (ENTERTAINMENT_QC == 0.0, -0.0319078, 0.0305272), if (ENTERTAINMENT_QC == 1.0, -0.0309537, 4.40641E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 9.60088, -2.84775E-6, if (MAX_MIN_RANK < 5.0, if (ALGO_CTR < 1.25608, -0.00440719, 0.00573622), if (NEWS_CTR < 1.49071, 0.00109495, 0.0298901))) + \n" + + "if (DAY_PD_HITS_RATIO < 80.5, if (LW_NEWS_SEARCHES_RATIO < 45.9165, -1.89532E-5, if (DAY_WEEK_AVG_RATIO < 0.595, 0.0244208, if (DAY_WEEK_AVG_RATIO < 1.15, -0.00810109, 0.00531513))), -0.016232) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 65.0244, if (NEWS_CTR < 2.21055, 1.4463E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.39998, if (USERS < 0.0902444, -0.0288837, -0.00520806), 0.0146236)), -0.0186142) + \n" + + "if (DAY_LW_DAY_HITS_RATIO < 33.75, if (AVG_SCORE < 297290.0, 7.35472E-5, -5.61732E-4), if (MAX_SCORE < 200514.0, -0.0113399, if (ALGO_CLICKS < 0.990362, 0.0451975, 0.0062547))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.77794, 7.00143E-5, if (PREV_DAY_NEWS_USERS < 0.0737993, if (MAX_MIN_SCORE < 0.75, 0.00257668, -8.8479E-4), if (SEARCHES < 0.156212, -0.0110319, -9.0476E-4))) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 0.146655, if (NEWS_MAIN_SEARCHES_RATIO < 3.25715, -2.57748E-5, if (USERS < 0.0468741, 4.50433E-4, if (YSM_NCTR < 0.0110028, 0.00633345, 0.0451608))), 4.31141E-5) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 29.7446, 8.17879E-6, if (LW_MAIN_SEARCHES_RATIO < 41.136, -0.0118399, if (NEWS_MAIN_SEARCHES_RATIO < 9.18546, if (ALGO_CLICKS < 0.578097, 0.0106049, -0.0015147), -0.0281154))) + \n" + + "if (NEWS_MAIN_SEARCHES_RATIO < 59.3594, if (LW_MAIN_SEARCHES_RATIO < 16.1013, -3.93323E-5, if (PREV_DAY_MAIN_CTR_RATIO < 1.64641, 5.96626E-4, 0.0120328)), if (POLITICS_QC == 0.0, 0.00481287, 0.0549552)) + \n" + + "if (DAY_PD_HITS_RATIO < 38.5, 1.00083E-5, if (ISTITLE_AVG < 0.25, if (POLITICS_QC == 0.0, if (HAS_NEWS_QC == 0.0, 0.0110884, 0.0787268), 0.165545), -4.90381E-5)) + \n" + + "if (DAY_PD_HITS_RATIO < 52.405, if (LW_MAIN_SEARCHES_RATIO < 21.0036, -2.31432E-6, if (LW_NEWS_SEARCHES_RATIO < 1.10007, 0.00655827, -0.00322249)), if (DUDE < 0.0213316, 0.00165459, 0.0267117)) + \n" + + "if (NEWS_CTR < 2.36159, 3.01421E-5, if (PUB_TODAY_AVG < 0.535, if (BIDDED_SEARCHES < 0.0756912, -0.0325427, if (SUGG_LW < 3.5, 0.0120055, -0.00841523)), 0.013018)) + \n" + + "if (PREV_DAY_MAIN_CTR_RATIO < 14.1009, if (PREV_DAY_MAIN_SEARCHES_RATIO < 293.154, if (PREV_DAY_MAIN_SEARCHES_RATIO < 106.761, 2.29918E-6, if (PREV_DAY_NEWS_SEARCHES_RATIO < 13.676, -0.0258432, 3.98425E-5)), 0.0146062), 0.0145195) + \n" + + "if (ISABSTRACT_AVG < 0.435, if (NEWS_CTR < 0.778514, 6.28755E-5, if (PREV_DAY_MAIN_CTR_RATIO < 0.325327, 0.0171532, 0.00156784)), if (NUM_WORDS < 2.5, -8.6343E-5, -0.00307349)) + \n" + + "if (PREV_DAY_NEWS_CTR_RATIO < 0.922298, if (SEARCHES < 0.0120163, if (DAY_PD_HITS_RATIO < 1.25, 0.0163074, -0.0180352), 2.42644E-4), if (LW_NEWS_SEARCHES_RATIO < 1.74561, 9.23406E-7, -0.00112041)) + \n" + + "if (PREV_DAY_CTR < 1.06609, -5.66011E-5, if (NEWS_SEARCHES < 1.12712, if (NEWS_USERS < 0.158915, 2.4137E-4, if (SUGG_TW < 0.0829887, 0.00752324, 7.62554E-4)), -0.00259993)) + \n" + + "if (NATIONALNEWS < 0.105, -4.51166E-5, if (AVG_SCORE < 359807.0, 3.6945E-4, if (ISTITLE_AVG < 0.885, if (MIN_SCORE < 346564.0, 0.041974, 0.0097136), -9.19818E-4))) + \n" + + "if (DAY_LW_DAY_HITS_RATIO < 57.5, if (NEWS_MAIN_SEARCHES_RATIO < 7.52403, -8.69476E-5, if (NEWS_CTR < 1.28406, 4.93978E-4, if (LW_MAIN_CTR_RATIO < 1.5554, 0.00922772, 0.0449952))), 0.0178316) + \n" + + "if (NEWS_CTR < 2.81183, -2.30122E-5, if (PEOPLE_QC == 0.0, if (DAY_WEEK_AVG_RATIO < 1.715, if (PREV_DAY_MAIN_SEARCHES_RATIO < 0.906796, 0.0145094, -0.00780837), 0.0292031), 0.166154)) + \n" + + "if (LW_MAIN_SEARCHES_RATIO < 19.6323, -9.07867E-6, if (LW_MAIN_CTR_RATIO < 1.34263, if (MIN_SCORE < 229623.0, -0.00765222, 0.00226572), if (LW_NEWS_SEARCHES_RATIO < 1.08241, 0.0130526, -0.0101225))) + \n" + + "if (DAY_PD_HITS_RATIO < 4.205, -4.64359E-5, if (LW_NEWS_CTR_RATIO < 0.13101, 0.0518657, if (LW_MAIN_SEARCHES_RATIO < 1.77864, -0.00106588, if (DAY_PD_HITS_RATIO < 4.55, 0.0407926, 0.00454191))))"; + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java new file mode 100644 index 00000000000..19948cad9f2 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/EvaluationTestCase.java @@ -0,0 +1,399 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.javacc.UnicodeUtilities; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.tensor.MapTensor; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.*; +import org.junit.Test; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Tests expression evaluation + * + * @author Jon Bratseth + */ +public class EvaluationTestCase extends junit.framework.TestCase { + + private Context defaultContext; + + @Override + protected void setUp() { + Map bindings = new HashMap(); + bindings.put("zero", DoubleValue.frozen(0d)); + bindings.put("one", DoubleValue.frozen(1d)); + bindings.put("one_half", DoubleValue.frozen(0.5d)); + bindings.put("a_quarter", DoubleValue.frozen(0.25d)); + bindings.put("foo", StringValue.frozen("foo")); + defaultContext = new MapContext(bindings); + } + + public void testEvaluation() { + assertEvaluates(0.5, "0.5"); + assertEvaluates(-0.5, "-0.5"); + assertEvaluates(0.5, "one_half"); + assertEvaluates(-0.5, "-one_half"); + assertEvaluates(0, "nonexisting"); + assertEvaluates(0.75, "0.5 + 0.25"); + assertEvaluates(0.75, "one_half + a_quarter"); + assertEvaluates(1.25, "0.5 - 0.25 + one"); + + // String + assertEvaluates(1, "if(\"a\"==\"a\",1,0)"); + + // Precedence + assertEvaluates(26, "2*3+4*5"); + assertEvaluates(1, "2/6+4/6"); + assertEvaluates(2 * 3 * 4 + 3 * 4 * 5 - 4 * 200 / 10, "2*3*4+3*4*5-4*200/10"); + + // Conditionals + assertEvaluates(2 * (3 * 4 + 3) * (4 * 5 - 4 * 200) / 10, "2*(3*4+3)*(4*5-4*200)/10"); + assertEvaluates(0.5, "if( 2<3, one_half, one_quarter)"); + assertEvaluates(0.25,"if( 2>3, one_half, a_quarter)"); + assertEvaluates(0.5, "if( 1==1, one_half, a_quarter)"); + assertEvaluates(0.5, "if( 1<=1, one_half, a_quarter)"); + assertEvaluates(0.5, "if( 1<=1.1, one_half, a_quarter)"); + assertEvaluates(0.25,"if( 1>=1.1, one_half, a_quarter)"); + assertEvaluates(0.5, "if( 0.33333333333333333333~=1/3, one_half, a_quarter)"); + assertEvaluates(0.25,"if( 0.33333333333333333333~=1/35, one_half, a_quarter)"); + assertEvaluates(5.5, "if(one_half in [one_quarter,one_half], one_half+5,log(one_quarter) * one_quarter)"); + assertEvaluates(0.5, "if( 1 in [1,2 , 3], one_half, a_quarter)"); + assertEvaluates(0.25,"if( 1 in [ 2,3,4], one_half, a_quarter)"); + assertEvaluates(0.5, "if( \"foo\" in [\"foo\",\"bar\"], one_half, a_quarter)"); + assertEvaluates(0.5, "if( foo in [\"foo\",\"bar\"], one_half, a_quarter)"); + assertEvaluates(0.5, "if( \"foo\" in [foo,\"bar\"], one_half, a_quarter)"); + assertEvaluates(0.5, "if( foo in [foo,\"bar\"], one_half, a_quarter)"); + assertEvaluates(0.25,"if( \"foo\" in [\"baz\",\"boz\"], one_half, a_quarter)"); + assertEvaluates(0.5, "if( one in [0, 1, 2], one_half, a_quarter)"); + assertEvaluates(0.25,"if( one in [2], one_half, a_quarter)"); + assertEvaluates(2.5, "if(1.0, 2.5, 3.5)"); + assertEvaluates(3.5, "if(0.0, 2.5, 3.5)"); + assertEvaluates(2.5, "if(1.0-1.1, 2.5, 3.5)"); + assertEvaluates(3.5, "if(1.0-1.0, 2.5, 3.5)"); + + // Conditionals with branch probabilities + RankingExpression e = assertEvaluates(3.5, "if(1.0-1.0, 2.5, 3.5, 0.3)"); + assertEquals(0.3, ((IfNode) e.getRoot()).getTrueProbability()); + + // Conditionals as expressions + assertEvaluates(new BooleanValue(true), "2<3"); + assertEvaluates(new BooleanValue(false), "2>3"); + assertEvaluates(new BooleanValue(false), "if (3>2, 2>3, 5.0)"); + assertEvaluates(new BooleanValue(true), "2>3<1"); // The result of 2>3 is converted to 0, which is <1 + assertEvaluates(2.5, "if(2>3<1, 2.5, 3.5)"); + assertEvaluates(2.5, "if(1+1>3<1+0, 2.5, 3.5)"); + + // Functions + assertEvaluates(0, "sin(0)"); + assertEvaluates(1, "cos(0)"); + assertEvaluates(8, "pow(4/2,min(cos(0)*3,5))"); + + // Combined + assertEvaluates(1.25, "5*if(1>=1.1, one_half, if(min(1,2) toSet(String values) { + Set set = new HashSet<>(); + for (String value : values.split(",")) + set.add(value.trim()); + return set; + } + + private static class StructuredTestContext extends MapContext { + + @Override + public Value get(String name, Arguments arguments, String output) { + if (!name.equals("average")) { + throw new IllegalArgumentException("Unknown operation '" + name + "'"); + } + if (arguments.expressions().size() != 2) { + throw new IllegalArgumentException("'average' takes 2 arguments"); + } + if (output != null && !output.equals("timesten")) { + throw new IllegalArgumentException("Unknown 'average' output '" + output + "'"); + } + + Value result = evaluateStringAsExpression(0, arguments).add(evaluateStringAsExpression(1, arguments)).divide(new DoubleValue(2)); + if ("timesten".equals(output)) { + result = result.multiply(new DoubleValue(10)); + } + return result; + } + + private Value evaluateStringAsExpression(int index, Arguments arguments) { + try { + ExpressionNode e = arguments.expressions().get(index); + if (e instanceof ConstantNode) { + return new DoubleValue(new RankingExpression(UnicodeUtilities.unquote(((ConstantNode)e).sourceString())).evaluate(this).asDouble()); + } + return e.evaluate(this); + } + catch (ParseException e) { + throw new RuntimeException("Could not evaluate argument '" + index + "'", e); + } + } + + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java new file mode 100644 index 00000000000..95c4402a612 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/NeuralNetEvaluationTestCase.java @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.tensor.MapTensor; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +/** + * Tests evaluating neural nets expressed as tensors + * + * @author bratseth + */ +public class NeuralNetEvaluationTestCase { + + /** "XOR" neural network, separate expression per layer */ + @Test + public void testPerLayerExpression() { + String input = "{ {x:1}:0, {x:2}:1 }"; + + String firstLayerWeights = "{ {x:1,h:1}:1, {x:1,h:2}:1, {x:2,h:1}:1, {x:2,h:2}:1 }"; + String firstLayerBias = "{ {h:1}:-0.5, {h:2}:-1.5 }"; + String firstLayerInput = "sum(" + input + "*" + firstLayerWeights + ", x) + " + firstLayerBias; + String firstLayerOutput = "min(1.0, max(0.0, 0.5 + " + firstLayerInput + "))"; // non-linearity, "poor man's sigmoid" + assertEvaluates("{ {h:1}:1.0, {h:2}:0.0} }", firstLayerOutput); + String secondLayerWeights = "{ {h:1,y:1}:1, {h:2,y:1}:-1 }"; + String secondLayerBias = "{ {y:1}:-0.5 }"; + String secondLayerInput = "sum(" + firstLayerOutput + "*" + secondLayerWeights + ", h) + " + secondLayerBias; + String secondLayerOutput = "min(1.0, max(0.0, 0.5 + " + secondLayerInput + "))"; // non-linearity, "poor man's sigmoid" + assertEvaluates("{ {y:1}:1 }", secondLayerOutput); + } + + private RankingExpression assertEvaluates(String tensorValue, String expressionString) { + return assertEvaluates(new TensorValue(MapTensor.from(tensorValue)), expressionString, new MapContext()); + } + + private RankingExpression assertEvaluates(Value value, String expressionString, Context context) { + try { + RankingExpression expression = new RankingExpression(expressionString); + assertEquals(expression.toString(), value, expression.evaluate(context)); + return expression; + } + catch (ParseException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java new file mode 100644 index 00000000000..837c7401813 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/StreamEvaluationBenchmark.java @@ -0,0 +1,160 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation; + +import com.yahoo.io.IOUtils; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization.GBDTForestOptimizer; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Two small benchmarks of ranking expression evaluation + * + * @author bratseth + */ +public class StreamEvaluationBenchmark { + + public void run() { + try { + List> features = readFeatures("/Users/bratseth/development/data/stream/gbdtFeatures"); + String streamExpression = readFile("/Users/bratseth/development/data/stream/stream.expression"); + run(streamExpression, features, 10); + } + catch (ParseException e) { + throw new RuntimeException("Benchmarking failed", e); + } + } + + private String readFile(String file) { + try { + return IOUtils.readFile(new File(file)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** Read an ad-hoc file format with some similarly ad hoc code */ + private List> readFeatures(String fileName) { + try (BufferedReader reader = IOUtils.createReader(fileName)) { + List> featureItems = new ArrayList<>(); + String line; + Map featureItem = null; + while (null != (line = reader.readLine())) { + if (line.trim().equals("Printing Feature Set")) { // new feature item + featureItem = new HashMap<>(); + featureItems.add(featureItem); + } + else { // a feature + line = line.replace("Feature key is ", ""); + line = line.replace(" Feature Value is ", "="); + // now we have featurekey=featurevalue + String[] keyValue = line.split("="); + if (keyValue.length != 2) + System.err.println("Skipping invalid feature line '" + line + "'"); + else + featureItem.put(keyValue[0], Double.parseDouble(keyValue[1])); + } + } + System.out.println("Read " + featureItems.size() + " feature items"); + return featureItems; + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void run(String expressionString, List> features, int iterations) throws ParseException { + // Optimize + RankingExpression expression = new RankingExpression(expressionString); + DoubleOnlyArrayContext contextPrototype = new DoubleOnlyArrayContext(expression, true); + OptimizationReport forestOptimizationReport = new ExpressionOptimizer().optimize(expression, contextPrototype); + System.out.println(forestOptimizationReport); + System.out.println("Optimized expression: " + expression.getRoot()); + + // Warm up + out("Warming up ..."); + double total = 0; + total += benchmarkIterations(expression , contextPrototype, features, Math.max(iterations/5, 1)); + oul("done"); + + // Benchmark + out("Running " + iterations + " of 'stream' ..."); + long tStartTime = System.currentTimeMillis(); + total += benchmarkIterations(expression, contextPrototype, features, iterations); + long totalTime = System.currentTimeMillis() - tStartTime; + oul("done"); + oul(" Total time running 'stream': " + totalTime + + " ms (" + totalTime*1000/(iterations*features.size()) + " microseconds/expression)"); + } + + private double benchmarkIterations(RankingExpression gbdt, Context contextPrototype, + List> features, int iterations) { + // Simulate realistic use: The array context can be reused for a series of evaluations in a thread + // but each evaluation binds a new set of values. + double total=0; + Context context = copyForEvaluation(contextPrototype); + long totalNanoTime = 0; + for (int i=0; i featureItem : features) { + long startTime = System.nanoTime(); + bindStreamingFeatures(featureItem, context); + total += gbdt.evaluate(context).asDouble(); + totalNanoTime += System.nanoTime() - startTime; + blowCaches(); + } + } + System.out.println("Total time fine-grain measured: " + totalNanoTime/(1000 * iterations * features.size())); + return total; + } + + private double blowCaches() { + List list = new ArrayList<>(); + for (int i = 0; i < 1000 * 1000; i++) { + list.add(i); + } + double total = 0; + for (Integer i : list) + total += i; + return total; + } + + private Context copyForEvaluation(Context contextPrototype) { + if (contextPrototype instanceof AbstractArrayContext) // optimized - contains name to index map + return ((AbstractArrayContext)contextPrototype).clone(); + else if (contextPrototype instanceof MapContext) // Unoptimized - nothing to keep + return new MapContext(); + else + throw new RuntimeException("Unknown context type " + contextPrototype.getClass()); + } + + private void out(String s) { + System.out.print(s); + } + + private void oul(String s) { + System.out.println(s); + } + + public static void main(String[] args) { + new StreamEvaluationBenchmark().run(); + } + + private void assertEqualish(double a,double b) { + if (Math.abs(a-b) >= Math.abs((a+b)/100000000) ) + throw new RuntimeException("Expected value " + a + " but optimized evaluation produced " + b); + } + + private void bindStreamingFeatures(Map featureItem, Context context) { + for (Map.Entry feature : featureItem.entrySet()) + context.put(feature.getKey(), feature.getValue()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java new file mode 100644 index 00000000000..998f25b943a --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/ContextReuseTestCase.java @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.io.IOUtils; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext; +import com.yahoo.searchlib.rankingexpression.evaluation.ExpressionOptimizer; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +import java.io.File; +import java.io.IOException; + +/** + * This tests reuse of a optimized context which is not initialized with + * all values referenced in the expression. + * + * @author bratseth + */ +public class ContextReuseTestCase extends junit.framework.TestCase { + + private String contextString= + "CONCEPTTYPE = 0.0\n" + + "REGEXTYPE = 0.0\n" + + "POS_18 = 0.0\n" + + "POS_19 = 0.0\n" + + "ORDER_IN_CLUSTER = 2.0\n" + + "GOOD_SYNTAX = 1.0\n" + + "POS_20 = 0.0\n" + + "POS_11 = 0.0\n" + + "POS_10 = 0.0\n" + + "CHUNKTYPE = 0.0\n" + + "POS_13 = 0.0\n" + + "STOP_WORD_1 = 0.0\n" + + "TERM_CASE_2 = 0.0\n" + + "TERM_CASE_3 = 0.0\n" + + "STOP_WORD_3 = 0.0\n" + + "POS_15 = 0.0\n" + + "TERM_CASE_1 = 0.0\n" + + "STOP_WORD_2 = 0.0\n" + + "POS_1 = 0.0\n" + + "TERM_CASE_4 = 1.0\n" + + "LENGTH = 6.0\n" + + "EXTENDEDTYPE = 0.0\n" + + "ENTITYPLACETYPE = 0.0\n"; + + public void testIt() throws ParseException, IOException { + // Prepare + RankingExpression expression=new RankingExpression(IOUtils.readFile(new File("src/test/files/s-expression.vre"))); + ArrayContext contextPrototype=new ArrayContext(expression); + new ExpressionOptimizer().optimize(expression,contextPrototype); + + // Execute + ArrayContext context=contextPrototype.clone(); + for (String contextValueString : contextString.split("\n")) { + String[] contextValueParts = contextValueString.split("="); + context.put(contextValueParts[0].trim(), Double.valueOf(contextValueParts[1].trim())); + } + assertEquals(-2.3450294999999994, expression.evaluate(context).asDouble()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java new file mode 100644 index 00000000000..7058e909ef1 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTForestOptimizerTestCase.java @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.*; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +/** + * @author bratseth + */ +public class GBDTForestOptimizerTestCase extends junit.framework.TestCase { + + public void testForestOptimization() throws ParseException { + String gbdtString = + "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))"; + RankingExpression gbdt = new RankingExpression(gbdtString); + + // Regular evaluation + MapContext arguments = new MapContext(); + arguments.put("LW_NEWS_SEARCHES_RATIO", 1d); + arguments.put("SUGG_OVERLAP", 17d); + double result1 = gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + arguments.put("SUGG_OVERLAP", 20d); + double result2 = gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + arguments.put("SUGG_OVERLAP", 40d); + double result3 = gbdt.evaluate(arguments).asDouble(); + + // Optimized evaluation + ArrayContext fArguments = new ArrayContext(gbdt); + ExpressionOptimizer optimizer = new ExpressionOptimizer(); + OptimizationReport report = optimizer.optimize(gbdt, fArguments); + assertEquals(4, report.getMetric("Optimized GDBT trees")); + assertEquals(4, report.getMetric("GBDT trees optimized to forests")); + assertEquals(1, report.getMetric("Number of forests")); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 1d); + fArguments.put("SUGG_OVERLAP", 17d); + double oResult1 = gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + fArguments.put("SUGG_OVERLAP", 20d); + double oResult2 = gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + fArguments.put("SUGG_OVERLAP", 40d); + double oResult3 = gbdt.evaluate(fArguments).asDouble(); + + // Assert the same results are produced + // (adding linearly to one double does not produce exactly the same double + // as adding up a tree of stack frames though) + assertEqualish(result1, oResult1); + assertEqualish(result2, oResult2); + assertEqualish(result3, oResult3); + } + + public void testForestOptimizationWithSetMembershipConditions() throws ParseException { + String gbdtString = + "if (MYSTRING in [\"string 1\",\"string 2\"], 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (MYSTRING in [\"string 2\"], 0.0897622, 0.0756903))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))"; + RankingExpression gbdt = new RankingExpression(gbdtString); + + // Regular evaluation + MapContext arguments = new MapContext(); + arguments.put("MYSTRING", new StringValue("string 1")); + arguments.put("LW_NEWS_SEARCHES_RATIO", 1d); + arguments.put("SUGG_OVERLAP", 17d); + double result1 = gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + arguments.put("SUGG_OVERLAP", 20d); + double result2 = gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + arguments.put("SUGG_OVERLAP", 40d); + double result3 = gbdt.evaluate(arguments).asDouble(); + + // Optimized evaluation + ArrayContext fArguments = new ArrayContext(gbdt); + ExpressionOptimizer optimizer = new ExpressionOptimizer(); + OptimizationReport report = optimizer.optimize(gbdt, fArguments); + assertEquals(4, report.getMetric("Optimized GDBT trees")); + assertEquals(4, report.getMetric("GBDT trees optimized to forests")); + assertEquals(1, report.getMetric("Number of forests")); + fArguments.put("MYSTRING", new StringValue("string 1")); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 1d); + fArguments.put("SUGG_OVERLAP", 17d); + double oResult1 = gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + fArguments.put("SUGG_OVERLAP", 20d); + double oResult2 = gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO", 2d); + fArguments.put("SUGG_OVERLAP", 40d); + double oResult3 = gbdt.evaluate(fArguments).asDouble(); + + // Assert the same results are produced + // (adding linearly to one double does not produce exactly the same double + // as adding up a tree of stack frames though) + assertEqualish(result1, oResult1); + assertEqualish(result2, oResult2); + assertEqualish(result3, oResult3); + } + + private void assertEqualish(double a, double b) { + assertTrue("Almost equal to " + a + ": " + b, Math.abs(a - b) < ((a + b) / 100000000)); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java new file mode 100644 index 00000000000..993262b1241 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/evaluation/gbdtoptimization/GBDTOptimizerTestCase.java @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.evaluation.gbdtoptimization; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.ArrayContext; +import com.yahoo.searchlib.rankingexpression.evaluation.ExpressionOptimizer; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.evaluation.OptimizationReport; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +/** + * @author bratseth + */ +public class GBDTOptimizerTestCase extends junit.framework.TestCase { + + public void testSimpleNodeOptimization() throws ParseException { + RankingExpression gbdt=new RankingExpression("if (a < 2, if (b < 2, 5, 6), 4) + if (a < 3, 7, 8)"); + + // Optimized evaluation + ArrayContext arguments=new ArrayContext(gbdt); + ExpressionOptimizer optimizer=new ExpressionOptimizer(); + optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false); + OptimizationReport report=optimizer.optimize(gbdt,arguments); + assertEquals(2,report.getMetric("Optimized GDBT trees")); + arguments.put("a",1d); + arguments.put("b",2d); + assertEquals(13.0,gbdt.evaluate(arguments).asDouble()); + } + + public void testNodeOptimization() throws ParseException { + String gbdtString= + "if (LW_NEWS_SEARCHES_RATIO < 1.72971, 0.0697159, if (LW_USERS < 0.10496, if (SEARCHES < 0.0329127, 0.151257, 0.117501), if (SUGG_OVERLAP < 18.5, 0.0897622, 0.0756903))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.73156, if (NEWS_USERS < 0.0737993, -0.00481646, 0.00110018), if (LW_USERS < 0.0844616, 0.0488919, if (SUGG_OVERLAP < 32.5, 0.0136917, 9.85328E-4))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.74451, -0.00298257, if (LW_USERS < 0.116207, if (SEARCHES < 0.0329127, 0.0676105, 0.0340198), if (NUM_WORDS < 1.5, -8.55514E-5, 0.0112406))) + \n" + + "if (LW_NEWS_SEARCHES_RATIO < 1.72995, if (NEWS_USERS < 0.0737993, -0.00407515, 0.00139088), if (LW_USERS == 0.0509035, 0.0439466, if (LW_USERS < 0.325818, 0.0187156, 0.00236949)))"; + RankingExpression gbdt=new RankingExpression(gbdtString); + + // Regular evaluation + MapContext arguments=new MapContext(); + arguments.put("LW_NEWS_SEARCHES_RATIO",1d); + arguments.put("SUGG_OVERLAP",17d); + double result1=gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO",2d); + arguments.put("SUGG_OVERLAP",20d); + double result2=gbdt.evaluate(arguments).asDouble(); + arguments.put("LW_NEWS_SEARCHES_RATIO",2d); + arguments.put("SUGG_OVERLAP",40d); + double result3=gbdt.evaluate(arguments).asDouble(); + + // Optimized evaluation + ArrayContext fArguments=new ArrayContext(gbdt); + ExpressionOptimizer optimizer=new ExpressionOptimizer(); + optimizer.getOptimizer(GBDTForestOptimizer.class).setEnabled(false); + OptimizationReport report=optimizer.optimize(gbdt,fArguments); + assertEquals(4,report.getMetric("Optimized GDBT trees")); + fArguments.put("LW_NEWS_SEARCHES_RATIO",1d); + fArguments.put("SUGG_OVERLAP",17d); + double oResult1=gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO",2d); + fArguments.put("SUGG_OVERLAP",20d); + double oResult2=gbdt.evaluate(fArguments).asDouble(); + fArguments.put("LW_NEWS_SEARCHES_RATIO",2d); + fArguments.put("SUGG_OVERLAP",40d); + double oResult3=gbdt.evaluate(fArguments).asDouble(); + + // Assert the same results are produced + assertEquals(result1,oResult1); + assertEquals(result2,oResult2); + assertEquals(result3,oResult3); + } + + public void testFeatureNamesWithDots() throws ParseException { + String gbdtString= + "if (a.b < 1.72971, 0.0697159, if (a.b.c < 0.10496, if (a.c < 0.0329127, 0.151257, 0.117501), if (a < 18.5, 0.0897622, 0.0756903))) + 1"; + RankingExpression gbdt=new RankingExpression(gbdtString); + + // Regular evaluation + MapContext arguments=new MapContext(); + arguments.put("a.b",1d); + arguments.put("a.b.c",0.1d); + arguments.put("a.c",0.01d); + arguments.put("a",19d); + double result=gbdt.evaluate(arguments).asDouble(); + + // Optimized evaluation + ArrayContext fArguments=new ArrayContext(gbdt); + OptimizationReport report=new OptimizationReport(); + new GBDTOptimizer().optimize(gbdt,fArguments,report); + assertEquals("Optimization result is as expected:\n" + report,1,report.getMetric("Optimized GDBT trees")); + fArguments.put("a.b",1d); + fArguments.put("a.b.c",0.1d); + fArguments.put("a.c",0.01d); + fArguments.put("a",19d); + double oResult=gbdt.evaluate(fArguments).asDouble(); + + // Assert the same results are produced + assertEquals(result,oResult); + } + + public void testBug4009433() throws ParseException { + RankingExpression exp = new RankingExpression("10*if(two>35,if(two>one,if(two>=670,4,8),if(two>8000,5,3)),if(two==478,90,91))"); + new GBDTOptimizer().optimize(exp, new ArrayContext(exp), new OptimizationReport()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java new file mode 100644 index 00000000000..5402935697d --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ArgumentsTestCase.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +import static org.junit.Assert.*; + +/** + * @author Simon Thoresen + */ +public class ArgumentsTestCase { + + @Test + public void requireThatAccessorsWork() { + Arguments args = new Arguments(null); + assertTrue(args.expressions().isEmpty()); + + args = new Arguments(Collections.emptyList()); + assertTrue(args.expressions().isEmpty()); + + NameNode foo = new NameNode("foo"); + NameNode bar = new NameNode("bar"); + args = new Arguments(Arrays.asList(foo, bar)); + assertEquals(2, args.expressions().size()); + assertSame(foo, args.expressions().get(0)); + assertSame(bar, args.expressions().get(1)); + } + + @Test + public void requireThatHashCodeAndEqualsWork() { + Arguments arg1 = new Arguments(Arrays.asList(new NameNode("foo"), new NameNode("bar"))); + Arguments arg2 = new Arguments(Arrays.asList(new NameNode("foo"), new NameNode("bar"))); + Arguments arg3 = new Arguments(Arrays.asList(new NameNode("foo"))); + + assertEquals(arg1.hashCode(), arg2.hashCode()); + assertTrue(arg1.equals(arg2)); + assertFalse(arg2.equals(arg3)); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java new file mode 100644 index 00000000000..6070a3805c6 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/rule/ReferenceNodeTestCase.java @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.rule; + +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author Simon Thoresen + */ +public class ReferenceNodeTestCase { + + @Test + public void requireThatAccessorsWork() { + ReferenceNode node = new ReferenceNode("foo", Arrays.asList(new NameNode("bar"), new NameNode("baz")), "cox"); + assertEquals("foo", node.getName()); + List args = node.getArguments().expressions(); + assertEquals(2, args.size()); + assertEquals(new NameNode("bar"), args.get(0)); + assertEquals(new NameNode("baz"), args.get(1)); + assertEquals("cox", node.getOutput()); + + node = node.setArguments(Arrays.asList(new NameNode("bar'"))); + assertEquals(new NameNode("bar'"), node.getArguments().expressions().get(0)); + + node = node.setArguments(Arrays.asList(new NameNode("baz'"))); + assertEquals(new NameNode("baz'"), node.getArguments().expressions().get(0)); + + node = node.setOutput("cox'"); + assertEquals("cox'", node.getOutput()); + } +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java new file mode 100644 index 00000000000..9fbaddaab1e --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/ConstantDereferencerTestCase.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import org.junit.Test; +import static org.junit.Assert.*; + +import java.util.HashMap; +import java.util.Map; + +/** + * @author Jon Bratseth + */ +public class ConstantDereferencerTestCase { + + @Test + public void testConstantDereferencer() throws ParseException { + Map constants = new HashMap<>(); + constants.put("a", Value.parse("1.0")); + constants.put("b", Value.parse("2")); + constants.put("c", Value.parse("3.5")); + ConstantDereferencer c = new ConstantDereferencer(constants); + + assertEquals("1.0 + 2.0 + 3.5", c.transform(new RankingExpression("a + b + c")).toString()); + assertEquals("myMacro(1.0,2.0)", c.transform(new RankingExpression("myMacro(a, b)")).toString()); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java new file mode 100644 index 00000000000..69ec3a914d1 --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/rankingexpression/transform/SimplifierTestCase.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.rankingexpression.transform; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.evaluation.Context; +import com.yahoo.searchlib.rankingexpression.evaluation.MapContext; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * @author Jon Bratseth + */ +public class SimplifierTestCase { + + @Test + public void testSimplify() throws ParseException { + Simplifier s = new Simplifier(); + assertEquals("a + b", s.transform(new RankingExpression("a + b")).toString()); + assertEquals("6.5", s.transform(new RankingExpression("1.0 + 2.0 + 3.5")).toString()); + assertEquals("6.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 )")).toString()); + assertEquals("6.5", s.transform(new RankingExpression("( 1.0 + 2.0 ) + 3.5 ")).toString()); + assertEquals("6.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 )")).toString()); + assertEquals("7.5", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 ) + 1")).toString()); + assertEquals("6.5 + a", s.transform(new RankingExpression("1.0 + ( 2.0 + 3.5 ) + a")).toString()); + assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * 0.0")).toString()); + assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * (0.0)")).toString()); + assertEquals("7.5", s.transform(new RankingExpression("7.5 + ( 2.0 + 3.5 ) * (1.0 - 1.0)")).toString()); + assertEquals("7.5", s.transform(new RankingExpression("if (2 > 0, 3.5 * 2 + 0.5, a *3 )")).toString()); + assertEquals("0.0", s.transform(new RankingExpression("0.0 * (1.3 + 7.0)")).toString()); + assertEquals("6.4", s.transform(new RankingExpression("max(0, 10.0-2.0)*(1-fabs(0.0-0.2))")).toString()); + assertEquals("(query(d) + query(b) - query(a)) * query(c) / query(e)", s.transform(new RankingExpression("(query(d) + query(b) - query(a)) * query(c) / query(e)")).toString()); + assertEquals("14.0", s.transform(new RankingExpression("5 + (2 + 3) + 4")).toString()); + assertEquals("28.0 + bar", s.transform(new RankingExpression("7.0 + 12.0 + 9.0 + bar")).toString()); + assertEquals("1.0 - 0.001 * attribute(number)", s.transform(new RankingExpression("1.0 - 0.001*attribute(number)")).toString()); + assertEquals("attribute(number) * 1.5 - 0.001 * attribute(number)", s.transform(new RankingExpression("attribute(number) * 1.5 - 0.001 * attribute(number)")).toString()); + } + + // A black box test verifying we are not screwing up real expressions + @Test + public void testSimplifyComplexExpression() throws ParseException { + RankingExpression initial = new RankingExpression("sqrt(if (if (INFERRED * 0.9 < INFERRED, GMP, (1 + 1.1) * INFERRED) < INFERRED * INFERRED - INFERRED, if (GMP < 85.80799542793133 * GMP, INFERRED, if (GMP < GMP, tanh(INFERRED), log(76.89956221113943))), tanh(tanh(INFERRED))) * sqrt(sqrt(GMP + INFERRED)) * GMP ) + 13.5 * (1 - GMP) * pow(GMP * 0.1, 2 + 1.1 * 0)"); + RankingExpression simplified = new Simplifier().transform(initial); + + Context context = new MapContext(); + context.put("INFERRED", 0.5); + context.put("GMP", 80.0); + context.put("value", 50.0); + assertEquals(initial.evaluate(context), simplified.evaluate(context)); + context.put("INFERRED", 38.0); + context.put("GMP", 80.0); + context.put("value", 50.0); + assertEquals(initial.evaluate(context), simplified.evaluate(context)); + context.put("INFERRED", 38.0); + context.put("GMP", 90.0); + context.put("value", 100.0); + assertEquals(initial.evaluate(context), simplified.evaluate(context)); + context.put("INFERRED", 500.0); + context.put("GMP", 90.0); + context.put("value", 100.0); + assertEquals(initial.evaluate(context), simplified.evaluate(context)); + } + + @Test + public void testParenthesisPreservation() throws ParseException { + Simplifier s = new Simplifier(); + CompositeNode transformed = (CompositeNode)s.transform(new RankingExpression("a + (b + c) / 100000000.0")).getRoot(); + assertEquals("a + (b + c) / 100000000.0", transformed.toString()); + } + + @Test + public void testSimplificationWithTensorConstants() throws ParseException { + new Simplifier().transform(new RankingExpression( + "sum(sum((tensorFromWeightedSet(query(wset_query),x)+" + + " tensorFromWeightedSet(attribute(wset),x)) * " + + " {{x:0,y:0}:54, {x:0,y:1} :69, {x:1,y:0} :72, {x:1,y:1} :93},x))")); + } + +} diff --git a/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java b/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java new file mode 100755 index 00000000000..0e27d53338a --- /dev/null +++ b/searchlib/src/test/java/com/yahoo/searchlib/treenet/TreeNetParserTestCase.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.searchlib.treenet; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.treenet.parser.ParseException; +import com.yahoo.searchlib.treenet.parser.TreeNetParser; +import junit.framework.TestCase; + +import java.io.*; + +/** + * @author Simon Thoresen + */ +public class TreeNetParserTestCase extends TestCase { + + private static final boolean WRITE_FILES = false; + + public void testRankingExpression() { + for (int i = 1; i <= 8; ++i) { + String inputFile = String.format("src/test/files/treenet%02d.model", i); + String outputFile = String.format("src/test/files/ranking%02d.expression", i); + String input = readFile(inputFile); + String expression = convertModel(inputFile, input); + if (WRITE_FILES) { + writeFile(outputFile, expression); + } + else { + String output = readFile(outputFile); + assertParseable(output, outputFile); + assertEquals(output.trim(), expression); + } + } + } + + private void assertParseable(String rankingExpressionString,String fileName) { + try { + new RankingExpression(rankingExpressionString); + } + catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) { + throw new RuntimeException("Could not parse ranking expression in '" + fileName + "'",e); + } + } + + private String convertModel(String modelFile, String model) { + try { + TreeNetParser parser = new TreeNetParser(new StringReader(model)); + return parser.treeNet().toRankingExpression(); + } catch (ParseException e) { + throw new AssertionError("In model " + modelFile + ": " + e.getMessage(), e); + } + } + + private String readFile(String file) { + try { + StringBuilder ret = new StringBuilder(); + BufferedReader in = new BufferedReader(new FileReader(file)); + while (true) { + String str = in.readLine(); + if (str == null) { + break; + } + ret.append(str).append("\n"); + } + return ret.toString(); + } catch (IOException e) { + throw new AssertionError(e); + } + } + + private void writeFile(String file, String content) { + try { + FileWriter out = new FileWriter(file); + out.write(content); + out.close(); + } catch (IOException e) { + throw new AssertionError(e); + } + } +} diff --git a/searchlib/src/testlist.txt b/searchlib/src/testlist.txt new file mode 100644 index 00000000000..a5f728cdbae --- /dev/null +++ b/searchlib/src/testlist.txt @@ -0,0 +1,137 @@ +?tests/groupingengine +?tests/sort +tests/aggregator +tests/alignment +tests/attribute +tests/attribute/attributefilewriter +tests/attribute/attributemanager +tests/attribute/bitvector +tests/attribute/comparator +tests/attribute/document_weight_iterator +tests/attribute/enumeratedsave +tests/attribute/enumstore +tests/attribute/extendattributes +tests/attribute/multivaluemapping +tests/attribute/postinglist +tests/attribute/postinglistattribute +tests/attribute/searchable +tests/attribute/searchcontext +tests/attribute/sourceselector +tests/attribute/stringattribute +tests/attribute/tensorattribute +tests/bitcompression/expgolomb +tests/bitvector +tests/btree +tests/bytecomplens +tests/common/bitvector +tests/common/foregroundtaskexecutor +tests/common/location +tests/common/packets +tests/common/rcuvector +tests/common/resultset +tests/common/sequencedtaskexecutor +tests/common/summaryfeatures +tests/datastore +tests/diskindex/bitvector +tests/diskindex/diskindex +tests/diskindex/fieldwriter +tests/diskindex/fusion +tests/diskindex/pagedict4 +tests/document_store +tests/document_store/visitor +tests/engine/docsumapi +tests/engine/monitorapi +tests/engine/searchapi +tests/engine/transportserver +tests/features +tests/features/beta +tests/features/element_completeness +tests/features/element_similarity_feature +tests/features/euclidean_distance +tests/features/item_raw_score +tests/features/native_dot_product +tests/features/ranking_expression +tests/features/raw_score +tests/features/subqueries +tests/features/tensor +tests/features/tensor_from_labels +tests/features/tensor_from_weighted_set +tests/features/text_similarity_feature +tests/features/util +tests/fef +tests/fef/attributecontent +tests/fef/featurenamebuilder +tests/fef/featurenameparser +tests/fef/featureoverride +tests/fef/object_passing +tests/fef/parameter +tests/fef/phrasesplitter +tests/fef/properties +tests/fef/rank_program +tests/fef/resolver +tests/fef/table +tests/fef/termfieldmodel +tests/fef/termmatchdatamerger +tests/fileheaderinspect +tests/fileheadertk +tests/forcelink +tests/grouping +tests/hitcollector +tests/index/docbuilder +tests/index/doctypebuilder +tests/indexmetainfo +tests/ld-library-path +tests/memoryindex/btree +tests/memoryindex/compact_document_words_store +tests/memoryindex/datastore +tests/memoryindex/dictionary +tests/memoryindex/document_remover +tests/memoryindex/documentinverter +tests/memoryindex/fieldinverter +tests/memoryindex/memoryindex +tests/memoryindex/urlfieldinverter +tests/memorytub +tests/nativerank +tests/nearsearch +tests/postinglistbm +tests/predicate +tests/prettyfloat +tests/query +tests/queryeval +tests/queryeval/blueprint +tests/queryeval/booleanmatchiteratorwrapper +tests/queryeval/dot_product +tests/queryeval/equiv +tests/queryeval/fake_searchable +tests/queryeval/getnodeweight +tests/queryeval/monitoring_search_iterator +tests/queryeval/multibitvectoriterator +tests/queryeval/parallel_weak_and +tests/queryeval/predicate +tests/queryeval/simple_phrase +tests/queryeval/sourceblender +tests/queryeval/sparse_vector_benchmark +tests/queryeval/termwise_eval +tests/queryeval/weak_and +tests/queryeval/weak_and_heap +tests/queryeval/weak_and_scorers +tests/queryeval/weighted_set_term +tests/rankingexpression/feature_name_extractor +tests/ranksetup +tests/ranksetup/verify_feature +tests/sortresults +tests/sortspec +tests/stackdumpiterator +tests/stringenum +tests/transactionlog +tests/transactionlogstress +tests/true +tests/url +tests/util +tests/util/bufferwriter +tests/util/ioerrorhandler +tests/util/searchable_stats +tests/util/sigbushandler +tests/util/slime_output_raw_buf_adapter +tests/util/statebuf +tests/util/statefile diff --git a/searchlib/src/tests/.gitignore b/searchlib/src/tests/.gitignore new file mode 100644 index 00000000000..a3e9c375723 --- /dev/null +++ b/searchlib/src/tests/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +*_test diff --git a/searchlib/src/tests/aggregator/.gitignore b/searchlib/src/tests/aggregator/.gitignore new file mode 100644 index 00000000000..fed1175d7cd --- /dev/null +++ b/searchlib/src/tests/aggregator/.gitignore @@ -0,0 +1,7 @@ +*.dat +.depend +Makefile +aggregator_test +perdocexpr_test +searchlib_attr_test_app +searchlib_perdocexpr_test_app diff --git a/searchlib/src/tests/aggregator/CMakeLists.txt b/searchlib/src/tests/aggregator/CMakeLists.txt new file mode 100644 index 00000000000..1cc750a8fac --- /dev/null +++ b/searchlib/src/tests/aggregator/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_perdocexpr_test_app + SOURCES + perdocexpr.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_perdocexpr_test_app COMMAND searchlib_perdocexpr_test_app) +vespa_add_executable(searchlib_attr_test_app + SOURCES + attr_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attr_test_app COMMAND searchlib_attr_test_app) diff --git a/searchlib/src/tests/aggregator/DESC b/searchlib/src/tests/aggregator/DESC new file mode 100644 index 00000000000..74bbb4a99fe --- /dev/null +++ b/searchlib/src/tests/aggregator/DESC @@ -0,0 +1 @@ +This is a test of the aggregator manager interface. diff --git a/searchlib/src/tests/aggregator/FILES b/searchlib/src/tests/aggregator/FILES new file mode 100644 index 00000000000..2d49a798a26 --- /dev/null +++ b/searchlib/src/tests/aggregator/FILES @@ -0,0 +1 @@ +aggregator.cpp diff --git a/searchlib/src/tests/aggregator/attr_test.cpp b/searchlib/src/tests/aggregator/attr_test.cpp new file mode 100644 index 00000000000..5184f61b573 --- /dev/null +++ b/searchlib/src/tests/aggregator/attr_test.cpp @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include +#include +#include +#include + +using namespace search; +using namespace search::expression; +using namespace vespalib; + + +struct AttributeFixture { + + AttributeGuard guard; + + const double doc0attr[11] = { + 0.1428571428571428, + 0.2539682539682539, + 0.3448773448773448, + 0.4218004218004217, + 0.4884670884670883, + 0.5472906178788530, + 0.5999221968262214, + 0.6475412444452690, + 0.6910195053148342, + 0.7310195053148342, + 0.7680565423518712 + }; + const double doc1attr[11] = { + 0.1408450704225352, + 0.2507351803126450, + 0.3408252704027350, + 0.4171611482653304, + 0.4833863138282443, + 0.5418658459919869, + 0.5942218669343952, + 0.6416152318633051, + 0.6849052751533483, + 0.7247459126035475, + 0.7616462816072375 + }; + + AttributeFixture() : guard() + { + MultiFloatExtAttribute *attr = new MultiFloatExtAttribute("sortedArrayAttr"); + DocId d = 0; + + attr->addDoc(d); + for (double val : doc0attr) { + attr->add(val); + } + attr->addDoc(d); + for (double val : doc1attr) { + attr->add(val); + } + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + +struct IntAttrFixture { + AttributeGuard guard; + + const int64_t doc0attr[11] = { + 1, + 333, + 88888888L, + -17 + }; + const double doc1attr[11] = { + 2, + -42, + 4444, + 999999999L + }; + + IntAttrFixture() : guard() + { + MultiIntegerExtAttribute *attr = new MultiIntegerExtAttribute("sortedArrayAttr"); + DocId d = 0; + attr->addDoc(d); + for (int64_t val : doc0attr) { + attr->add(val); + } + attr->addDoc(d); + for (int64_t val : doc1attr) { + attr->add(val); + } + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + +struct StringAttrFixture { + AttributeGuard guard; + StringAttrFixture() : guard() + { + MultiStringExtAttribute *attr = new MultiStringExtAttribute("sortedArrayAttr"); + DocId d = 0; + attr->addDoc(d); + attr->add("1"); + attr->add("333"); + attr->add("88888888"); + attr->addDoc(d); + attr->add("2"); + attr->add("4444"); + attr->add("999999999"); + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + + +TEST_F("testArrayAt", AttributeFixture()) { + for (int i = 0; i < 11; i++) { + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i))); + ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn)); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), f1.doc0attr[i]); + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), f1.doc1attr[i]); + } +} + +TEST_F("testArrayAtInt", IntAttrFixture()) { + for (int i = 0; i < 3; i++) { + ExpressionNode::CP othercn(new ConstantNode(new Int64ResultNode(4567))); + ArrayAtLookup *x = new ArrayAtLookup(*f1.guard, othercn); + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i))); + ArrayAtLookup *y = new ArrayAtLookup(*f1.guard, cn); + *x = *y; + delete y; + ExpressionNode::CP ln(x); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(IntegerResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getInteger(), f1.doc0attr[i]); + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getInteger(), f1.doc1attr[i]); + } +} + + +TEST_F("testArrayAtString", StringAttrFixture()) { + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(1))); + ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn)); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(StringResultNode::classId)); + + char mem[64]; + ResultNode::BufferRef buf(&mem, sizeof(mem)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("333")); + + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("4444")); +} + +struct ArrayAtExpressionFixture : + public AttributeFixture +{ + ExpressionNode::CP cn; + ExpressionNode::CP ln; + ExpressionTree et; + + ArrayAtExpressionFixture(int i) : + AttributeFixture(), + cn(new ConstantNode(new Int64ResultNode(i))), + ln(new ArrayAtLookup(*guard, cn)), + et(ln) + { + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + } +}; + + +TEST_F("testArrayAtBelowRange", ArrayAtExpressionFixture(-1)) { + EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(f1.et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[0]); + EXPECT_TRUE(f1.et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[0]); +} + +TEST_F("testArrayAtAboveRange", ArrayAtExpressionFixture(17)) { + EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(f1.et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[10]); + EXPECT_TRUE(f1.et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[10]); +} + +TEST_F("testInterpolatedLookup", AttributeFixture()) { + + ExpressionNode::CP c1(new ConstantNode(new FloatResultNode(f1.doc0attr[2]))); + ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, c1)); + + ExpressionTree et(l1); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.053082175617388); +} + +TEST_F("testWithRelevance", AttributeFixture()) { + + ExpressionNode::CP r1(new RelevanceNode()); + ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, r1)); + + ExpressionTree et(l1); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + // docid 0 + double expect0[] = { 0.0, 0.0, 0.0, + + 0.514285714285715012, + 1.506349206349207659, + 2.716594516594518005, + + 4.19605949605949835, + 6.001633866649353166, + 8.224512367129145574, + + 10.0, 10.0, 10.0 }; + + for (int i = 0; i < 12; i++) { + double r = i-1; + r *= 0.1; + TEST_STATE(vespalib::make_string("i=%d", i).c_str()); + EXPECT_TRUE(et.execute(0, HitRank(r))); + EXPECT_EQUAL(expect0[i], et.getResult().getFloat()); + } + + EXPECT_TRUE(et.execute(0, HitRank(f1.doc0attr[2]))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + // docid 1 + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0] - 0.001))); + EXPECT_EQUAL(et.getResult().getFloat(), 0.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0]))); + EXPECT_EQUAL(et.getResult().getFloat(), 0.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[2]))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[4]))); + EXPECT_EQUAL(et.getResult().getFloat(), 4.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10]))); + EXPECT_EQUAL(et.getResult().getFloat(), 10.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10] + 0.01))); + EXPECT_EQUAL(et.getResult().getFloat(), 10.0); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/aggregator/perdocexpr.cpp b/searchlib/src/tests/aggregator/perdocexpr.cpp new file mode 100644 index 00000000000..8f073187cce --- /dev/null +++ b/searchlib/src/tests/aggregator/perdocexpr.cpp @@ -0,0 +1,1693 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::expression; +using namespace search::aggregation; +using namespace vespalib; + +struct AggrGetter { + virtual ~AggrGetter() { } + virtual const ResultNode &operator()(const AggregationResult &r) const = 0; +}; + +AttributeGuard createInt64Attribute(); +AttributeGuard createInt32Attribute(); +AttributeGuard createInt16Attribute(); +AttributeGuard createInt8Attribute(); +template +void testCmp(const T & small, const T & medium, const T & large); + +void testMin(const ResultNode & a, const ResultNode & b) { + ASSERT_TRUE(a.cmp(b) < 0); + MinFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + ASSERT_TRUE(func.getResult().cmp(a) == 0); + + MinFunctionNode funcR; + funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false) + .execute(); + ASSERT_TRUE(funcR.getResult().cmp(a) == 0); +} + +TEST("testMin") { + testMin(Int64ResultNode(67), Int64ResultNode(68)); + testMin(FloatResultNode(67), FloatResultNode(68)); + testMin(StringResultNode("67"), StringResultNode("68")); + testMin(RawResultNode("67", 2), RawResultNode("68", 2)); + testMin(RawResultNode("-67", 2), RawResultNode("68", 2)); +} + +void testMax(const ResultNode & a, const ResultNode & b) { + ASSERT_TRUE(a.cmp(b) < 0); + MaxFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + ASSERT_TRUE(func.getResult().cmp(b) == 0); + + MaxFunctionNode funcR; + funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false) + .execute(); + ASSERT_TRUE(funcR.getResult().cmp(b) == 0); +} + +TEST("testMax") { + testMax(Int64ResultNode(67), Int64ResultNode(68)); + testMax(FloatResultNode(67), FloatResultNode(68)); + testMax(StringResultNode("67"), StringResultNode("68")); + testMax(RawResultNode("67", 2), RawResultNode("68", 2)); + testMax(RawResultNode("-67", 2), RawResultNode("68", 2)); +} + +ExpressionCountAggregationResult getExpressionCountWithNormalSketch() { + nbostream stream; + stream << (uint32_t)ExpressionCountAggregationResult::classId + << (char)0 << (uint32_t)0 + << (uint32_t)NormalSketch<>::classId + << NormalSketch<>::BUCKET_COUNT << NormalSketch<>::BUCKET_COUNT; + for (size_t i = 0; i < NormalSketch<>::BUCKET_COUNT; ++i) { + stream << static_cast(0); + } + NBOSerializer serializer(stream); + ExpressionCountAggregationResult result; + serializer >> result; + EXPECT_EQUAL(0u, stream.size()); + EXPECT_EQUAL(NormalSketch<>(), result.getSketch()); + return result; +} + +void testExpressionCount(const ResultNode &a, uint32_t bucket, uint8_t val) { + ExpressionCountAggregationResult func = + getExpressionCountWithNormalSketch(); + func.setExpression(ConstantNode(a)); + func.aggregate(DocId(42), HitRank(21)); + + const auto &sketch = func.getSketch(); + auto normal = dynamic_cast&>(sketch); + for (uint32_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + TEST_STATE(make_string("Bucket %u. Expected bucket %u=%u", + i, bucket, val).c_str()); + EXPECT_EQUAL(i == bucket? val : 0, (int) normal.bucket[i]); + } +} + +TEST("require that expression count can operate on different results") { + testExpressionCount(Int64ResultNode(67), 98, 2); + testExpressionCount(FloatResultNode(67), 545, 1); + testExpressionCount(StringResultNode("67"), 243, 1); + testExpressionCount(RawResultNode("67", 2), 243, 1); + testExpressionCount(RawResultNode("-67", 2), 434, 1); +} + +TEST("require that expression counts can be merged") { + ExpressionCountAggregationResult func1 = + getExpressionCountWithNormalSketch(); + func1.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + ExpressionCountAggregationResult func2 = + getExpressionCountWithNormalSketch(); + func2.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + + EXPECT_EQUAL(2, func1.getRank().getInteger()); + func1.merge(func2); + EXPECT_EQUAL(3, func1.getRank().getInteger()); + const auto &sketch = func1.getSketch(); + auto normal = dynamic_cast&>(sketch); + EXPECT_EQUAL(2, normal.bucket[98]); // from func1 + EXPECT_EQUAL(1, normal.bucket[545]); // from func2 +} + +TEST("require that expression counts can be serialized") { + ExpressionCountAggregationResult func; + func.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + func.setExpression(ConstantNode(Int64ResultNode(68))) + .aggregate(DocId(42), HitRank(21)); + + nbostream os; + NBOSerializer nos(os); + nos << func; + Identifiable::UP obj = Identifiable::create(nos); + auto *func2 = dynamic_cast(obj.get()); + ASSERT_TRUE(func2); + EXPECT_EQUAL(func.getSketch(), func2->getSketch()); +} + +TEST("require that expression count estimates rank") { + ExpressionCountAggregationResult func = + getExpressionCountWithNormalSketch(); + EXPECT_EQUAL(0, func.getRank().getInteger()); + func.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(2, func.getRank().getInteger()); + func.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(3, func.getRank().getInteger()); + func.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(3, func.getRank().getInteger()); +} + +void testAdd(const ResultNode &a, const ResultNode &b, const ResultNode &c) { + AddFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testAdd") { + testAdd(Int64ResultNode(67), Int64ResultNode(68), Int64ResultNode(67+68)); + testAdd(FloatResultNode(67), FloatResultNode(68), FloatResultNode(67+68)); + testAdd(StringResultNode("67"), StringResultNode("68"), + StringResultNode("lo")); + testAdd(RawResultNode("67", 2), RawResultNode("68", 2), + RawResultNode("lo", 2)); +} + +void testDivide(const ResultNode &a, const ResultNode &b, + const ResultNode &c) { + DivideFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testDivide") { + testDivide(Int64ResultNode(6), FloatResultNode(12.0), + FloatResultNode(0.5)); + testDivide(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(6)); + testDivide(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0)); +} + +void testModulo(const ResultNode &a, const ResultNode &b, + const ResultNode &c) { + ModuloFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testModulo") { + testModulo(Int64ResultNode(0), Int64ResultNode(6), Int64ResultNode(0)); + testModulo(Int64ResultNode(1), Int64ResultNode(6), Int64ResultNode(1)); + testModulo(Int64ResultNode(2), Int64ResultNode(6), Int64ResultNode(2)); + testModulo(Int64ResultNode(3), Int64ResultNode(6), Int64ResultNode(3)); + testModulo(Int64ResultNode(4), Int64ResultNode(6), Int64ResultNode(4)); + testModulo(Int64ResultNode(5), Int64ResultNode(6), Int64ResultNode(5)); + testModulo(Int64ResultNode(6), Int64ResultNode(6), Int64ResultNode(0)); + + testModulo(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(0)); + testModulo(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0)); + + testModulo(FloatResultNode(2), Int64ResultNode(6), FloatResultNode(2)); + testModulo(Int64ResultNode(3), FloatResultNode(6), FloatResultNode(3)); +} + +void testNegate(const ResultNode & a, const ResultNode & b) { + NegateFunctionNode func; + func.appendArg(ConstantNode(a)).prepare(false).execute(); + EXPECT_EQUAL(func.getResult().asString(), b.asString()); + EXPECT_EQUAL(func.getResult().cmp(b), 0); + EXPECT_EQUAL(b.cmp(func.getResult()), 0); +} + +TEST("testNegate") { + testNegate(Int64ResultNode(67), Int64ResultNode(-67)); + testNegate(FloatResultNode(67.0), FloatResultNode(-67.0)); + + char strnorm[4] = { 102, 111, 111, 0 }; + char strneg[4] = { -102, -111, -111, 0 }; + testNegate(StringResultNode(strnorm), StringResultNode(strneg)); + testNegate(RawResultNode(strnorm, 3), RawResultNode(strneg, 3)); +} + +template +void testBuckets(const T * b) { + EXPECT_TRUE(b[0].cmp(b[1]) < 0); + EXPECT_TRUE(b[1].cmp(b[2]) < 0); + EXPECT_TRUE(b[2].cmp(b[3]) < 0); + EXPECT_TRUE(b[3].cmp(b[4]) < 0); + EXPECT_TRUE(b[4].cmp(b[5]) < 0); + + EXPECT_TRUE(b[1].cmp(b[0]) > 0); + EXPECT_TRUE(b[2].cmp(b[1]) > 0); + EXPECT_TRUE(b[3].cmp(b[2]) > 0); + EXPECT_TRUE(b[4].cmp(b[3]) > 0); + EXPECT_TRUE(b[5].cmp(b[4]) > 0); + + EXPECT_TRUE(b[1].cmp(b[1]) == 0); + EXPECT_TRUE(b[2].cmp(b[2]) == 0); + EXPECT_TRUE(b[3].cmp(b[3]) == 0); + EXPECT_TRUE(b[4].cmp(b[4]) == 0); + EXPECT_TRUE(b[5].cmp(b[5]) == 0); + + EXPECT_TRUE(b[0].contains(b[1]) < 0); + EXPECT_TRUE(b[1].contains(b[2]) < 0); + EXPECT_TRUE(b[2].contains(b[3]) == 0); + EXPECT_TRUE(b[3].contains(b[4]) < 0); + EXPECT_TRUE(b[4].contains(b[5]) < 0); + + EXPECT_TRUE(b[1].contains(b[0]) > 0); + EXPECT_TRUE(b[2].contains(b[1]) > 0); + EXPECT_TRUE(b[3].contains(b[2]) == 0); + EXPECT_TRUE(b[4].contains(b[3]) > 0); + EXPECT_TRUE(b[5].contains(b[4]) > 0); + + EXPECT_TRUE(b[1].contains(b[1]) == 0); + EXPECT_TRUE(b[2].contains(b[2]) == 0); + EXPECT_TRUE(b[3].contains(b[3]) == 0); + EXPECT_TRUE(b[4].contains(b[4]) == 0); + EXPECT_TRUE(b[5].contains(b[5]) == 0); +} + +TEST("testBuckets") { + IntegerBucketResultNodeVector iv; + IntegerBucketResultNodeVector::Vector & ib = iv.getVector(); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + ib.resize(1); + ib[0] = IntegerBucketResultNode(7, 9); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL); + + ib.resize(6); + ib[0] = IntegerBucketResultNode(7, 9); + ib[1] = IntegerBucketResultNode(13, 17); + ib[2] = IntegerBucketResultNode(15, 30); + ib[3] = IntegerBucketResultNode(19, 27); + ib[4] = IntegerBucketResultNode(20, 33); + ib[5] = IntegerBucketResultNode(50, 50); + testBuckets(&ib[0]); + iv.sort(); + testBuckets(&ib[0]); + EXPECT_TRUE(ib[0].contains(6) > 0); + EXPECT_TRUE(ib[0].contains(7) == 0); + EXPECT_TRUE(ib[0].contains(8) == 0); + EXPECT_TRUE(ib[0].contains(9) < 0); + EXPECT_TRUE(ib[0].contains(10) < 0); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(14)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(27)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(32)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(33)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(50)) == NULL); + + FloatBucketResultNodeVector fv; + FloatBucketResultNodeVector::Vector & fb = fv.getVector(); + fb.resize(6); + fb[0] = FloatBucketResultNode(7, 9); + fb[1] = FloatBucketResultNode(13, 17); + fb[2] = FloatBucketResultNode(15, 30); + fb[3] = FloatBucketResultNode(19, 27); + fb[4] = FloatBucketResultNode(20, 33); + fb[5] = FloatBucketResultNode(50, 50); + testBuckets(&fb[0]); + fv.sort(); + testBuckets(&fb[0]); + EXPECT_TRUE(fb[0].contains(6) > 0); + EXPECT_TRUE(fb[0].contains(7) == 0); + EXPECT_TRUE(fb[0].contains(8) == 0); + EXPECT_TRUE(fb[0].contains(9) < 0); + EXPECT_TRUE(fb[0].contains(10) < 0); + EXPECT_TRUE(fv.find(FloatResultNode(6)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(7)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(8)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(9)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(10)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(14)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(27)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(32)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(33)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(50)) == NULL); + + StringBucketResultNodeVector sv; + StringBucketResultNodeVector::Vector & sb = sv.getVector(); + sb.resize(6); + sb[0] = StringBucketResultNode("07", "09"); + sb[1] = StringBucketResultNode("13", "17"); + sb[2] = StringBucketResultNode("15", "30"); + sb[3] = StringBucketResultNode("19", "27"); + sb[4] = StringBucketResultNode("20", "33"); + sb[5] = StringBucketResultNode("50", "50"); + testBuckets(&sb[0]); + sv.sort(); + testBuckets(&sb[0]); + EXPECT_TRUE(sb[0].contains("06") > 0); + EXPECT_TRUE(sb[0].contains("07") == 0); + EXPECT_TRUE(sb[0].contains("08") == 0); + EXPECT_TRUE(sb[0].contains("09") < 0); + EXPECT_TRUE(sb[0].contains("10") < 0); + EXPECT_TRUE(sv.find(StringResultNode("06")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("07")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("08")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("09")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("10")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("14")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("27")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("32")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("33")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("50")) == NULL); +} + +template +void testCmp(const T & small, const T & medium, const T & large) { + EXPECT_TRUE(small.cmp(medium) < 0); + EXPECT_TRUE(small.cmp(large) < 0); + EXPECT_TRUE(medium.cmp(large) < 0); + EXPECT_TRUE(medium.cmp(small) > 0); + EXPECT_TRUE(large.cmp(small) > 0); + EXPECT_TRUE(large.cmp(medium) > 0); +} + +TEST("testResultNodes") { + Int64ResultNode i(89); + char mem[64]; + ResultNode::BufferRef buf(&mem, sizeof(mem)); + EXPECT_EQUAL(i.getInteger(), 89); + EXPECT_EQUAL(i.getFloat(), 89.0); + EXPECT_EQUAL(i.getString(buf).c_str(), std::string("89")); + FloatResultNode f(2165.798); + EXPECT_EQUAL(f.getInteger(), 2166); + EXPECT_EQUAL(f.getFloat(), 2165.798); + EXPECT_EQUAL(f.getString(buf).c_str(), std::string("2165.8")); + StringResultNode s("17.89hjkljly"); + EXPECT_EQUAL(s.getInteger(), 17); + EXPECT_EQUAL(s.getFloat(), 17.89); + EXPECT_EQUAL(s.getString(buf).c_str(), std::string("17.89hjkljly")); + RawResultNode r("hjgasfdg", 9); + EXPECT_EQUAL(r.getString(buf).c_str(), std::string("hjgasfdg")); + int64_t j(789); + double d(786324.78); + nbostream os; + os << j << d; + RawResultNode r1(os.c_str(), sizeof(j)); + EXPECT_EQUAL(r1.getInteger(), 789); + RawResultNode r2(os.c_str() + sizeof(j), sizeof(d)); + EXPECT_EQUAL(r2.getFloat(), 786324.78); + + StringResultNode s1, s2("a"), s3("a"), s4("b"), s5("bb"); + EXPECT_EQUAL(s1.cmp(s1), 0); + EXPECT_EQUAL(s2.cmp(s3), 0); + EXPECT_EQUAL(s4.cmp(s4), 0); + EXPECT_EQUAL(s5.cmp(s5), 0); + testCmp(s1, s2, s4); + testCmp(s1, s2, s5); + testCmp(s2, s4, s5); + + { + Int64ResultNode i1(-1), i2(0), i3(1), i4(0x80000000lu); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(i3.cmp(i3), 0); + testCmp(i1, i2, i3); + testCmp(i1, i2, i4); + } + + { + FloatResultNode i1(-1), i2(0), i3(1), notanumber(nan("")), + minusInf(-INFINITY), plussInf(INFINITY); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(i3.cmp(i3), 0); + EXPECT_EQUAL(minusInf.cmp(minusInf), 0); + EXPECT_EQUAL(plussInf.cmp(plussInf), 0); + EXPECT_EQUAL(notanumber.cmp(notanumber), 0); + testCmp(i1, i2, i3); + testCmp(minusInf, i1, plussInf); + testCmp(minusInf, i2, plussInf); + testCmp(minusInf, i3, plussInf); + testCmp(notanumber, i2, i3); + testCmp(notanumber, i2, plussInf); + testCmp(notanumber, minusInf, plussInf); + } + { + FloatBucketResultNode + i1(-1, 3), i2(188000, 188500), i3(1630000, 1630500), + notanumber(-nan(""), nan("")), inf(-INFINITY, INFINITY); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(notanumber.cmp(notanumber), 0); + EXPECT_EQUAL(inf.cmp(inf), 0); + + testCmp(i1, i2, i3); + testCmp(inf, i1, i2); + testCmp(notanumber, i2, i3); + testCmp(notanumber, i1, i2); + testCmp(notanumber, inf, i1); + } +} + +void testStreaming(const Identifiable &v) { + nbostream os; + NBOSerializer nos(os); + nos << v; + Identifiable::UP s = Identifiable::create(nos); + ASSERT_TRUE(s.get() != NULL); + ASSERT_TRUE(v.cmp(*s) == 0); + nbostream os2, os3; + NBOSerializer nos2(os2), nos3(os3); + nos2 << v; + nos3 << *s; + + EXPECT_EQUAL(os2.size(), os3.size()); + ASSERT_TRUE(os2.size() == os3.size()); + EXPECT_EQUAL(0, memcmp(os2.c_str(), os3.c_str(), os3.size())); +} + +TEST("testTimeStamp") { + TimeStampFunctionNode t1; + testStreaming(t1); +} + +namespace { + +std::string +getVespaChecksumV2( + const std::string& ymumid, + int fid, + const std::string& flags_str) +{ + if (fid == 6 || fid == 0 || fid == 5) { + return 0; + } + + std::list flags_list; + flags_list.clear(); + for (unsigned int i = 0; i< flags_str.length();i++) + if (isalpha(flags_str[i])) + flags_list.push_back(flags_str[i]); + flags_list.sort(); + + std::string new_flags_str =""; + std::list::iterator it; + for (it = flags_list.begin();it!=flags_list.end();it++) + new_flags_str += *it; + + uint32_t networkFid = htonl(fid); + + int length = ymumid.length()+ + sizeof(networkFid)+ + new_flags_str.length(); + + unsigned char buffer[length]; + memset(buffer, 0x00, length); + memcpy(buffer, ymumid.c_str(), ymumid.length()); + memcpy(buffer + ymumid.length(), + (const char*)&networkFid, sizeof(networkFid)); + memcpy(buffer+ymumid.length()+sizeof(networkFid), new_flags_str.c_str(), + new_flags_str.length()); + + return std::string((char*)buffer, length); +} +} // namespace + +TEST("testMailChecksumExpression") { + document::TestDocMan testDocMan; + + int folder = 32; + std::string flags = "RWA"; + std::string ymumid = "barmuda"; + + document::Document::UP doc = + testDocMan.createDocument("foo", "userdoc:footype:1234:" + ymumid); + document::WeightedSetFieldValue + ws(doc->getField("byteweightedset").getDataType()); + + for (uint32_t i = 0; i < flags.size(); i++) { + ws.add(document::ByteFieldValue(flags[i])); + } + doc->setValue("headerval", document::IntFieldValue(folder)); + doc->setValue("byteweightedset", ws); + + CatFunctionNode e; + + // YMUMID + GetDocIdNamespaceSpecificFunctionNode* ns = + new GetDocIdNamespaceSpecificFunctionNode( + ResultNode::UP(new StringResultNode)); + e.appendArg(ExpressionNode::CP(ns)); + + // Folder + e.appendArg(DocumentFieldNode("headerval")); + + // Flags + e.appendArg(SortFunctionNode(DocumentFieldNode("byteweightedset"))); + + MD5BitFunctionNode node(e, 32); + + CatFunctionNode &cfn = + static_cast(*node.expressionNodeVector()[0]); + MultiArgFunctionNode::ExpressionNodeVector &xe = + cfn.expressionNodeVector(); + + for (uint32_t i = 0; i < xe.size(); i++) { + DocumentAccessorNode* rf = + dynamic_cast(xe[i].get()); + if (rf) { + rf->setDocType(doc->getType()); + rf->prepare(true); + rf->setDoc(*doc); + } else { + MultiArgFunctionNode * mf = + dynamic_cast(xe[i].get()); + MultiArgFunctionNode::ExpressionNodeVector& se = + mf->expressionNodeVector(); + for (uint32_t j = 0; j < se.size(); j++) { + DocumentAccessorNode* tf = + dynamic_cast(se[j].get()); + tf->setDocType(doc->getType()); + tf->prepare(true); + tf->setDoc(*doc); + } + } + } + // SortFunctionNode & sfn = static_cast(*xe[1]); + // sfn.prepare(false); + cfn.prepare(false); + + cfn.execute(); + ConstBufferRef ref = + static_cast(cfn.getResult()).get(); + + std::string cmp = getVespaChecksumV2(ymumid, folder, flags); + + EXPECT_EQUAL(ref.size(), 14u); + EXPECT_EQUAL(cmp.size(), ref.size()); + + for (uint32_t i = 0; i < ref.size(); i++) { + std::cerr << i << ": " << (int)ref.c_str()[i] << "/" << (int)cmp[i] + << "\n"; + } + + EXPECT_TRUE(memcmp(cmp.c_str(), ref.c_str(), cmp.size()) == 0); + + node.prepare(true); + node.execute(); + + ConstBufferRef ref2 = + static_cast(node.getResult()).get(); + + for (uint32_t i = 0; i < ref2.size(); i++) { + std::cerr << i << ": " << (int)ref2.c_str()[i] << "\n"; + } +} + +TEST("testDebugFunction") { + { + AddFunctionNode add; + add.appendArg(ConstantNode(Int64ResultNode(3))); + add.appendArg(ConstantNode(Int64ResultNode(4))); + DebugWaitFunctionNode n(add, 1.3, false); + n.prepare(false); + + FastOS_Time time; + time.SetNow(); + n.execute(); + EXPECT_TRUE(time.MilliSecsToNow() > 1000.0); + EXPECT_EQUAL(static_cast(n.getResult()).get(), + 7); + } + { + AddFunctionNode add; + add.appendArg(ConstantNode(Int64ResultNode(3))); + add.appendArg(ConstantNode(Int64ResultNode(4))); + DebugWaitFunctionNode n(add, 1.3, true); + n.prepare(false); + + FastOS_Time time; + time.SetNow(); + n.execute(); + EXPECT_TRUE(time.MilliSecsToNow() > 1000.0); + EXPECT_EQUAL(static_cast(n.getResult()).get(), + 7); + } +} + +TEST("testDivExpressions") { + { + StrLenFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(static_cast(e.getResult()).get(), + 6); + } + { + NormalizeSubjectFunctionNode + e(ConstantNode(StringResultNode("Re: Your mail"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get(), + "Your mail"); + } + { + NormalizeSubjectFunctionNode + e(ConstantNode(StringResultNode("Your mail"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get(), + "Your mail"); + } + { + StrCatFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.appendArg(ConstantNode(StringResultNode("ARG 2"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get(), + "238686ARG 2"); + } + + { + ToStringFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(strcmp(static_cast( + e.getResult()).get().c_str(), "238686"), 0); + } + + { + ToRawFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(strcmp(static_cast( + e.getResult()).get().c_str(), "238686"), 0); + } + + { + CatFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 8u); + } + { + CatFunctionNode e(ConstantNode(Int32ResultNode(23886))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 4u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + MD5BitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 16*8); + e.prepare(false); + e.execute(); + ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId)); + const RawResultNode & + r(static_cast(e.getResult())); + EXPECT_EQUAL(r.get().size(), 16u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + MD5BitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 2u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + XorBitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 1*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 1u); + EXPECT_EQUAL(static_cast( + e.getResult()).get().c_str()[0], + 0x7); + } + { + const uint8_t buf[4] = { 6, 0, 7, 7 }; + XorBitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 2u); + EXPECT_EQUAL((int)static_cast( + e.getResult()).get().c_str()[0], + 0x1); + EXPECT_EQUAL((int)static_cast( + e.getResult()).get().c_str()[1], + 0x7); + } + { + const uint8_t wantedBuf[14] = + { 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 65, 82, 87 }; + const uint8_t md5facit[16] = + { 0x22, 0x5, 0x22, 0x1c, 0x49, 0xff, 0x90, 0x25, 0xad, 0xbf, + 0x4e, 0x51, 0xdb, 0xca, 0x2a, 0xc5 }; + const uint8_t thomasBuf[22] = + { 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 0, + 0, 0, 3, 65, 82, 87 }; + const uint8_t currentBuf[26] = + { 0, 0, 0, 22, 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0, + 0, 0, 32, 0 , 0, 0, 3, 65, 82, 87 }; + + MD5BitFunctionNode + e(ConstantNode(RawResultNode(wantedBuf, sizeof(wantedBuf))), 16*8); + e.prepare(false); + e.execute(); + ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId)); + const RawResultNode & + r(static_cast(e.getResult())); + EXPECT_EQUAL(r.get().size(), 16u); + uint8_t md5[16]; + fastc_md5sum(currentBuf, sizeof(currentBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0); + fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) == 0); + fastc_md5sum(thomasBuf, sizeof(thomasBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0); + + MD5BitFunctionNode + finalCheck( + CatFunctionNode(ConstantNode(StringResultNode("barmuda"))) + .appendArg(ConstantNode(Int32ResultNode(32))) + .appendArg(SortFunctionNode( + ConstantNode(Int8ResultNodeVector() + .push_back(Int8ResultNode(87)) + .push_back(Int8ResultNode(65)) + .push_back(Int8ResultNode(82)) + ) + ) + ), 32); + finalCheck.prepare(false); + finalCheck.execute(); + const RawResultNode & + rr(static_cast(finalCheck.getResult())); + EXPECT_EQUAL(rr.get().size(), 4u); + fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5); + EXPECT_TRUE(memcmp(md5facit, md5, sizeof(md5)) == 0); + EXPECT_TRUE(memcmp(rr.get().data(), md5, rr.get().size()) == 0); + } + { + CatFunctionNode e(ConstantNode(Int16ResultNode(23886))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 2u); + } + { + CatFunctionNode + e(ConstantNode(Int8ResultNodeVector().push_back(Int8ResultNode(86)) + .push_back(Int8ResultNode(14)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 1*2u); + } + { + CatFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast(e.getResult()).get().size(), + 4*2u); + } + { + NumElemFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 1); + } + { + NumElemFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 2); + } + { + NumElemFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 2); + } +} + +bool test1MultivalueExpression(const MultiArgFunctionNode &exprConst, + const ExpressionNode::CP &mv, + const ResultNode & expected) { + MultiArgFunctionNode & expr(const_cast(exprConst)); + expr.appendArg(mv); + expr.prepare(false); + bool ok = EXPECT_TRUE(expr.execute()) && + EXPECT_EQUAL(0, expr.getResult().cmp(expected)); + if (!ok) { + std::cerr << "Expected:" << expected.asString() << std::endl + << "Got: " << expr.getResult().asString() << std::endl; + } + return ok; +} + +bool test1MultivalueExpressionException(const MultiArgFunctionNode & exprConst, + const ExpressionNode::CP & mv, + const char * expected) { + try { + test1MultivalueExpression(exprConst, mv, NullResultNode()); + return EXPECT_TRUE(false); + } catch (std::runtime_error & e) { + return EXPECT_TRUE(std::string(e.what()).find(expected) + != std::string::npos); + } +} + +TEST("testMultivalueExpression") { + IntegerResultNodeVector iv; + iv.push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)).push_back(Int64ResultNode(117)); + ExpressionNode::CP mv(new ConstantNode(iv)); + + EXPECT_TRUE(test1MultivalueExpression(AddFunctionNode(), mv, + Int64ResultNode(7 + 17 + 117))); + EXPECT_TRUE(test1MultivalueExpression(MultiplyFunctionNode(), mv, + Int64ResultNode(7 * 17 * 117))); + EXPECT_TRUE(test1MultivalueExpressionException(DivideFunctionNode(), mv, + "DivideFunctionNode")); + EXPECT_TRUE(test1MultivalueExpressionException(ModuloFunctionNode(), mv, + "ModuloFunctionNode")); + EXPECT_TRUE(test1MultivalueExpression(MinFunctionNode(), mv, + Int64ResultNode(7))); + EXPECT_TRUE(test1MultivalueExpression(MaxFunctionNode(), mv, + Int64ResultNode(117))); + + EXPECT_TRUE( + test1MultivalueExpression( + FixedWidthBucketFunctionNode() + .setWidth(Int64ResultNode(1)), mv, + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(7,8)) + .push_back(IntegerBucketResultNode(17,18)) + .push_back(IntegerBucketResultNode(117,118)))); + + EXPECT_TRUE( + test1MultivalueExpression( + RangeBucketPreDefFunctionNode() + .setBucketList( + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(0,10)) + .push_back(IntegerBucketResultNode(20,30)) + .push_back(IntegerBucketResultNode(100,120))), + mv, + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(0,10)) + .push_back(IntegerBucketResultNode(0,0)) + .push_back(IntegerBucketResultNode(100,120)))); + + EXPECT_TRUE( + test1MultivalueExpression( + TimeStampFunctionNode() + .setTimePart(TimeStampFunctionNode::Second), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117%60)))); + + EXPECT_TRUE( + test1MultivalueExpression(NegateFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(-7)) + .push_back(Int64ResultNode(-17)) + .push_back(Int64ResultNode(-117)))); + EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117)))); + EXPECT_TRUE(test1MultivalueExpression(ReverseFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(117)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(7)))); + EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(), + ReverseFunctionNode(mv), + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117)))); + EXPECT_TRUE(test1MultivalueExpression(AndFunctionNode(), mv, + Int64ResultNode(7 & 17 & 117))); + EXPECT_TRUE(test1MultivalueExpression(OrFunctionNode(), mv, + Int64ResultNode(7 | 17 | 117))); + EXPECT_TRUE(test1MultivalueExpression(XorFunctionNode(), mv, + Int64ResultNode(7 ^ 17 ^ 117))); +} + +TEST("testArithmeticNodes") { + AttributeGuard attr1 = createInt64Attribute(); + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1))); + ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(2))); + ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1))); + ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.9))); + ExpressionNode::CP s1(new ConstantNode(new StringResultNode("1"))); + ExpressionNode::CP s2(new ConstantNode(new StringResultNode("2"))); + ExpressionNode::CP r1(new ConstantNode(new RawResultNode("1", 1))); + ExpressionNode::CP r2(new ConstantNode(new RawResultNode("2", 1))); + ExpressionNode::CP a1(new AttributeNode(*attr1)); + ExpressionNode::CP a2(new AttributeNode(*attr1)); + AddFunctionNode add1; + add1.appendArg(i1); + add1.appendArg(i2); + ExpressionTree et(add1); + + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE( + et.getResult().getClass().inherits(IntegerResultNode::classId)); + EXPECT_TRUE(et.ExpressionNode::execute()); + EXPECT_EQUAL(et.getResult().getInteger(), 3); + EXPECT_TRUE(et.ExpressionNode::execute()); + EXPECT_EQUAL(et.getResult().getInteger(), 3); + AddFunctionNode add2; + add2.appendArg(i1); + add2.appendArg(f2); + add2.prepare(false); + EXPECT_TRUE( + add2.getResult().getClass().inherits(FloatResultNode::classId)); + AddFunctionNode add3; + add3.appendArg(i1); + add3.appendArg(s2); + add3.prepare(false); + EXPECT_TRUE( + add3.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add4; + add4.appendArg(i1); + add4.appendArg(r2); + add4.prepare(false); + EXPECT_TRUE( + add4.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add5; + add5.appendArg(i1); + add5.appendArg(a1); + add5.prepare(false); + EXPECT_TRUE( + add5.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add6; + add6.appendArg(f1); + add6.appendArg(a1); + add6.prepare(false); + EXPECT_TRUE( + add6.getResult().getClass().inherits(FloatResultNode::classId)); +} + +void testArith(MultiArgFunctionNode &op, const ExpressionNode::CP &arg1, + const ExpressionNode::CP & arg2, int64_t intResult, + double floatResult) { + op.appendArg(arg1); + op.appendArg(arg2); + op.prepare(false); + op.execute(); + EXPECT_EQUAL(intResult, op.getResult().getInteger()); + ASSERT_TRUE(intResult == op.getResult().getInteger()); + EXPECT_EQUAL(floatResult, op.getResult().getFloat()); +} + +void testArith2(MultiArgFunctionNode &op, int64_t intResult, + double floatResult) { + op.prepare(false); + op.execute(); + EXPECT_EQUAL(intResult, op.getResult().getInteger()); + ASSERT_TRUE(intResult == op.getResult().getInteger()); + EXPECT_EQUAL(floatResult, op.getResult().getFloat()); +} + +void testAdd(const ExpressionNode::CP &arg1, + const ExpressionNode::CP &arg2, + int64_t intResult, double floatResult){ + AddFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testMultiply(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + MultiplyFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testDivide(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + DivideFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testModulo(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + ModuloFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testArithmeticArguments(NumericFunctionNode &function, + std::vector & arg1, + std::vector & arg2, + const std::vector & result, + double flattenResult) { + ExpressionNode::CP scalarInt1(new ConstantNode(new Int64ResultNode( + static_cast(arg1[0])))); + ExpressionNode::CP scalarInt2(new ConstantNode(new Int64ResultNode( + static_cast(arg2[0])))); + ExpressionNode::CP scalarFloat1(new ConstantNode(new FloatResultNode( + arg1[0]))); + ExpressionNode::CP scalarFloat2(new ConstantNode(new FloatResultNode( + arg2[0]))); + + IntegerResultNodeVector iv1; + for (size_t i(0), m(arg1.size()); i(arg1[i]))); + } + IntegerResultNodeVector iv2; + for (size_t i(0), m(arg2.size()); i(arg2[i]))); + } + FloatResultNodeVector fv1; + for (size_t i(0), m(arg1.size()); i(result[0])); + + function.reset(); + + function.appendArg(scalarInt1).appendArg(scalarFloat2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(scalarFloat1).appendArg(scalarInt2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(scalarFloat1).appendArg(scalarFloat2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(vectorInt1); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(Int64ResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getInteger(), + static_cast(flattenResult)); + + function.reset(); + + function.appendArg(vectorFloat1); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), flattenResult); + + function.reset(); + + function.appendArg(vectorInt1).appendArg(vectorInt2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(IntegerResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(IntegerResultNodeVector::classId)); + EXPECT_EQUAL(static_cast( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(ir)); + + function.reset(); + + function.appendArg(vectorFloat1).appendArg(vectorFloat2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); + + function.reset(); + + function.appendArg(vectorInt1).appendArg(vectorFloat2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); + + function.reset(); + + function.appendArg(vectorFloat1).appendArg(vectorInt2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); +} + +TEST("testArithmeticOperations") { + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1793253241))); + ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(1676521321))); + ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1109876))); + ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.767681239))); + testAdd(i1, i2, 3469774562ull, 3469774562ull); + testAdd(i1, f2, 1793253251ull, 1793253250.767681239); + testAdd(f1, f2, 11, 10.878668839 ); + testMultiply(i1, i2, 3006427292488851361ull, 3006427292488851361ull); + testMultiply(i1, f2, 17515926039ull, 1793253241.0*9.767681239); + testMultiply(f1, f2, 11, 10.8517727372816364 ); + + std::vector a(5), b(7); + a[0] = b[0] = 1; + a[1] = b[1] = 2; + a[2] = b[2] = 3; + a[3] = b[3] = 4; + a[4] = b[4] = 5; + b[5] = 6; + b[6] = 7; + std::vector r(7); + { + r[0] = a[0] + b[0]; + r[1] = a[1] + b[1]; + r[2] = a[2] + b[2]; + r[3] = a[3] + b[3]; + r[4] = a[4] + b[4]; + r[5] = a[0] + b[5]; + r[6] = a[1] + b[6]; + AddFunctionNode f; + testArithmeticArguments(f, a, b, r, a[0]+a[1]+a[2]+a[3]+a[4]); + } + { + r[0] = a[0] * b[0]; + r[1] = a[1] * b[1]; + r[2] = a[2] * b[2]; + r[3] = a[3] * b[3]; + r[4] = a[4] * b[4]; + r[5] = a[0] * b[5]; + r[6] = a[1] * b[6]; + MultiplyFunctionNode f; + testArithmeticArguments(f, a, b, r, a[0]*a[1]*a[2]*a[3]*a[4]); + } +} + +TEST("testAggregatorsInExpressions") { + CountAggregationResult *c = new CountAggregationResult(); + c->setCount(3); + SumAggregationResult *s = new SumAggregationResult(); + ResultNode::CP r1(new Int64ResultNode(7)), + r2(new Int64ResultNode(22)); + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(7))), + i2(c), + i3(s), + i4(new ConstantNode(new Int64ResultNode(22))); + AggregationResult::Configure conf; + s->setExpression(i4).select(conf, conf); + s->aggregate(0, 0); + + testAdd(i1, i2, 10, 10); + testMultiply(i1, i2, 21, 21); + testMultiply(i2, i3, 66, 66); + testDivide(i3, i2, 7, 7); + testDivide(i3, i1, 3, 3); + testModulo(i3, i2, 1, 1); + testModulo(i3, i1, 1, 1); + + MinAggregationResult *min = new MinAggregationResult(); + min->setResult(r2); + ExpressionNode::CP imin(min); + testAdd(imin, i1, 29, 29); + + MaxAggregationResult *max = new MaxAggregationResult(); + max->setResult(r1); + ExpressionNode::CP imax(max); + testAdd(imin, imax, 29, 29); + + XorAggregationResult *x = new XorAggregationResult(); + x->setExpression(i4).select(conf, conf); + x->aggregate(0, 0); + ExpressionNode::CP ix(x); + testAdd(ix, i1, 29, 29); + + AverageAggregationResult *avg = new AverageAggregationResult(); + avg->setExpression(i4).select(conf, conf); + avg->aggregate(0, 0); + ExpressionNode::CP iavg(avg); + testAdd(iavg, i1, 29, 29); +} + +void testAggregationResult(AggregationResult & aggr, const AggrGetter & g, + const ResultNode & v, const ResultNode & i, + const ResultNode & m, const ResultNode & s) { + ExpressionNode::CP scalarInt1(new ConstantNode(v)); + AggregationResult::Configure conf; + aggr.setExpression(scalarInt1).select(conf, conf); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, i.cmp(g(aggr))); + aggr.aggregate(0,0); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, m.cmp(g(aggr))); + aggr.aggregate(1,0); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, s.cmp(g(aggr))); +} + +TEST("testAggregationResults") { + struct SumGetter : AggrGetter { + virtual const ResultNode &operator()(const AggregationResult & r) const + { return static_cast(r).getSum(); } + }; + SumAggregationResult sum; + testAggregationResult(sum, SumGetter(), Int64ResultNode(7), + Int64ResultNode(0), Int64ResultNode(7), + Int64ResultNode(14)); + testAggregationResult(sum, SumGetter(), FloatResultNode(7.77), + FloatResultNode(0), FloatResultNode(7.77), + FloatResultNode(15.54)); + IntegerResultNodeVector v; + v.push_back(Int64ResultNode(7)).push_back(Int64ResultNode(8)); + testAggregationResult(sum, SumGetter(), v, Int64ResultNode(0), + Int64ResultNode(15), Int64ResultNode(30)); + testAggregationResult(sum, SumGetter(), FloatResultNode(7.77), + FloatResultNode(0), FloatResultNode(7.77), + FloatResultNode(15.54)); +} + +TEST("testGrouping") { + AttributeGuard attr1 = createInt64Attribute(); + ExpressionNode::CP select1(new AttributeNode(*attr1)); + ExpressionNode::CP result1(new CountAggregationResult()); + (static_cast(*result1)).setExpression(select1); + ExpressionNode::CP result2( new SumAggregationResult()); + (static_cast(*result2)).setExpression(select1); + + Grouping grouping = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(GroupingLevel() + .setExpression(select1) + .addResult(result1) + .addResult(result2)); + + grouping.configureStaticStuff(ConfigureStaticParams(0, 0)); + grouping.aggregate(0u, 10u); + const Group::GroupList &groups = grouping.getRoot().groups(); + EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 9u); + ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id() == + CountAggregationResult::classId); + ASSERT_TRUE(groups[0]->getAggregationResult(1).getClass().id() == + SumAggregationResult::classId); + EXPECT_EQUAL(groups[0]->getId().getInteger(), 6u); + EXPECT_EQUAL(static_cast( + groups[0]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[0]->getAggregationResult(1)).getSum().getInteger(), + 6); + EXPECT_EQUAL(groups[1]->getId().getInteger(), 7u); + EXPECT_EQUAL(static_cast( + groups[1]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[1]->getAggregationResult(1)).getSum().getInteger(), + 7); + EXPECT_EQUAL(groups[2]->getId().getInteger(), 11u); + EXPECT_EQUAL(static_cast( + groups[2]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[2]->getAggregationResult(1)).getSum().getInteger(), + 11); + EXPECT_EQUAL(groups[3]->getId().getInteger(), 13u); + EXPECT_EQUAL(static_cast( + groups[3]->getAggregationResult(0)).getCount(), 2u); + EXPECT_EQUAL(static_cast( + groups[3]->getAggregationResult(1)).getSum().getInteger(), + 26); + EXPECT_EQUAL(groups[4]->getId().getInteger(), 17u); + EXPECT_EQUAL(static_cast( + groups[4]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[4]->getAggregationResult(1)).getSum().getInteger(), + 17); + EXPECT_EQUAL(groups[5]->getId().getInteger(), 27u); + EXPECT_EQUAL(static_cast( + groups[5]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[5]->getAggregationResult(1)).getSum().getInteger(), + 27); + EXPECT_EQUAL(groups[6]->getId().getInteger(), 34u); + EXPECT_EQUAL(static_cast( + groups[6]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[6]->getAggregationResult(1)).getSum().getInteger(), + 34); + EXPECT_EQUAL(groups[7]->getId().getInteger(), 67891u); + EXPECT_EQUAL(static_cast( + groups[7]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[7]->getAggregationResult(1)).getSum().getInteger(), + 67891); + EXPECT_EQUAL(groups[8]->getId().getInteger(), 67892u); + EXPECT_EQUAL(static_cast( + groups[8]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast( + groups[8]->getAggregationResult(1)).getSum().getInteger(), + 67892); + testStreaming(grouping); +} + +TEST("testGrouping2") { + AttributeGuard attr1 = createInt64Attribute(); + + RangeBucketPreDefFunctionNode *predef( + new RangeBucketPreDefFunctionNode(AttributeNode(*attr1))); + IntegerBucketResultNodeVector prevec; + prevec.getVector().push_back(IntegerBucketResultNode(6,7)); + prevec.getVector().push_back(IntegerBucketResultNode(7,14)); + prevec.getVector().push_back(IntegerBucketResultNode(18,50)); //30 + prevec.getVector() + .push_back(IntegerBucketResultNode(80,50000000000ull)); //30 + predef->setBucketList(prevec); + ExpressionNode::CP select1(predef); + ExpressionNode::CP result1( new CountAggregationResult()); + (static_cast(*result1)).setExpression(select1); + + Grouping grouping = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(GroupingLevel() + .setExpression(select1) + .addResult(result1)); + + grouping.configureStaticStuff(ConfigureStaticParams(0, 0)); + grouping.aggregate(0u, 10u); + const Group::GroupList &groups = grouping.getRoot().groups(); + EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 5u); + ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id() + == CountAggregationResult::classId); + EXPECT_EQUAL(groups[0]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast( + groups[0]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(groups[1]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast( + groups[1]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(groups[2]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast( + groups[2]->getAggregationResult(0)).getCount(), 4u); + EXPECT_EQUAL(groups[3]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast( + groups[3]->getAggregationResult(0)).getCount(), 2u); + EXPECT_EQUAL(groups[4]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast( + groups[4]->getAggregationResult(0)).getCount(), 2u); + testStreaming(grouping); +} + +AttributeGuard createInt64Attribute() { + SingleInt64ExtAttribute *selectAttr1( + new SingleInt64ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt32Attribute() { + SingleInt32ExtAttribute *selectAttr1( + new SingleInt32ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt16Attribute() { + SingleInt16ExtAttribute *selectAttr1( + new SingleInt16ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt8Attribute() { + SingleInt8ExtAttribute *selectAttr1( + new SingleInt8ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +TEST("testIntegerTypes") { + EXPECT_EQUAL(AttributeNode(*createInt8Attribute()).prepare(false) + .getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt8Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int8ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt16Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt16Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int16ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt32Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt32Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int32ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt64Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt64Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + + EXPECT_EQUAL( + AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt8ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL( + AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt8ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int8ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt16ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt16ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int16ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt32ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt32ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int32ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt64ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt64ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); +} + +TEST("testStreamingAll") { + testStreaming(Int64ResultNode(89)); + testStreaming(FloatResultNode(89.765)); + testStreaming(StringResultNode("Tester StringResultNode streaming")); + testStreaming(RawResultNode("Tester RawResultNode streaming", 30)); + testStreaming(CountAggregationResult()); + testStreaming(ExpressionCountAggregationResult()); + testStreaming(SumAggregationResult()); + testStreaming(MinAggregationResult()); + testStreaming(MaxAggregationResult()); + testStreaming(AverageAggregationResult()); + testStreaming(Group()); + testStreaming(Grouping()); + testStreaming(HitsAggregationResult()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/alignment/.gitignore b/searchlib/src/tests/alignment/.gitignore new file mode 100644 index 00000000000..9668e4fc02c --- /dev/null +++ b/searchlib/src/tests/alignment/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +alignment_test +searchlib_alignment_test_app diff --git a/searchlib/src/tests/alignment/CMakeLists.txt b/searchlib/src/tests/alignment/CMakeLists.txt new file mode 100644 index 00000000000..3695c600f9b --- /dev/null +++ b/searchlib/src/tests/alignment/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_alignment_test_app + SOURCES + alignment.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_alignment_test_app COMMAND searchlib_alignment_test_app) diff --git a/searchlib/src/tests/alignment/DESC b/searchlib/src/tests/alignment/DESC new file mode 100644 index 00000000000..a37dbbc1c7a --- /dev/null +++ b/searchlib/src/tests/alignment/DESC @@ -0,0 +1 @@ +alignment test. Take a look at alignment.cpp for details. diff --git a/searchlib/src/tests/alignment/FILES b/searchlib/src/tests/alignment/FILES new file mode 100644 index 00000000000..067828da485 --- /dev/null +++ b/searchlib/src/tests/alignment/FILES @@ -0,0 +1 @@ +alignment.cpp diff --git a/searchlib/src/tests/alignment/alignment.cpp b/searchlib/src/tests/alignment/alignment.cpp new file mode 100644 index 00000000000..882e0942976 --- /dev/null +++ b/searchlib/src/tests/alignment/alignment.cpp @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("alignment_test"); + +#include +#include +#include + +struct Timer { + rusage usage; + void start() { + getrusage(RUSAGE_SELF, &usage); + } + double stop() { + rusage tmp; + getrusage(RUSAGE_SELF, &tmp); + double startMs = (((double)usage.ru_utime.tv_sec) * 1000.0) + + (((double)usage.ru_utime.tv_usec) / 1000.0); + double stopMs = (((double)tmp.ru_utime.tv_sec) * 1000.0) + + (((double)tmp.ru_utime.tv_usec) / 1000.0); + return (stopMs - startMs); + } +}; + +TEST_SETUP(Test); + +double +timeAccess(void *bufp, uint32_t len, double &sum) +{ + double *buf = (double *)bufp; + Timer timer; + timer.start(); + for(uint32_t i = 0; i < 512 * 1024; ++i) { + for (uint32_t j = 0; j < len; ++j) { + sum += buf[j]; + } + } + double ret = timer.stop(); + return ret; +} + +int +Test::Main() +{ + TEST_INIT("alignment_test"); + + uint32_t buf[129]; + for (uint32_t i = 0; i < 129; ++i) { + buf[i] = i; + } + + uintptr_t ptr = reinterpret_cast(&buf[0]); + bool aligned = (ptr % sizeof(double) == 0); + + double foo = 0, bar = 0; + printf(aligned ? "ALIGNED\n" : "UNALIGNED\n"); + printf("warmup time = %.2f\n", timeAccess(reinterpret_cast(&buf[0]), 64, foo)); + printf("real time = %.2f\n", timeAccess(reinterpret_cast(&buf[0]), 64, bar)); + EXPECT_EQUAL(foo, bar); + + printf(!aligned ? "ALIGNED\n" : "UNALIGNED\n"); + printf("warmup time = %.2f\n", timeAccess(reinterpret_cast(&buf[1]), 64, foo)); + printf("real time = %.2f\n", timeAccess(reinterpret_cast(&buf[1]), 64, bar)); + EXPECT_EQUAL(foo, bar); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore new file mode 100644 index 00000000000..732912ab981 --- /dev/null +++ b/searchlib/src/tests/attribute/.gitignore @@ -0,0 +1,11 @@ +*.dat +*.idx +*.weight +.depend +Makefile +attribute_test +attributebenchmark +searchlib_attribute_test_app +searchlib_attributeguard_test_app +searchlib_changevector_test_app +searchlib_attributebenchmark_app diff --git a/searchlib/src/tests/attribute/CMakeLists.txt b/searchlib/src/tests/attribute/CMakeLists.txt new file mode 100644 index 00000000000..0598b5776a8 --- /dev/null +++ b/searchlib/src/tests/attribute/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributeguard_test_app + SOURCES + attributeguard.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributeguard_test_app COMMAND sh attributeguard_test.sh) +vespa_add_executable(searchlib_attribute_test_app + SOURCES + attribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_test_app COMMAND sh attribute_test.sh) +vespa_add_executable(searchlib_changevector_test_app + SOURCES + changevector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_changevector_test_app COMMAND sh changevector_test.sh) +vespa_add_executable(searchlib_attributebenchmark_app + SOURCES + attributebenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/attribute/DESC b/searchlib/src/tests/attribute/DESC new file mode 100644 index 00000000000..6a9215b1a3b --- /dev/null +++ b/searchlib/src/tests/attribute/DESC @@ -0,0 +1 @@ +Unit tests for attribute use. diff --git a/searchlib/src/tests/attribute/FILES b/searchlib/src/tests/attribute/FILES new file mode 100644 index 00000000000..b742644b750 --- /dev/null +++ b/searchlib/src/tests/attribute/FILES @@ -0,0 +1,2 @@ +attribute.cpp +attributebenchmark.cpp diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp new file mode 100644 index 00000000000..b1d4e675e23 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -0,0 +1,2200 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +LOG_SETUP("attribute_test"); + + +using namespace document; +using std::shared_ptr; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using search::attribute::BasicType; +using search::attribute::IAttributeVector; + +namespace +{ + + +vespalib::string empty; +vespalib::string clstmp("clstmp"); +vespalib::string asuDir("asutmp"); + +bool +isUnsignedSmallIntAttribute(const BasicType::Type &type) +{ + switch (type) + { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + return true; + default: + return false; + } +} + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + return isUnsignedSmallIntAttribute(a.getBasicType()); +} + +template +void +expectZero(const BufferType &b) +{ + EXPECT_EQUAL(0, b); +} + +template <> +void +expectZero(const vespalib::string &b) +{ + EXPECT_EQUAL(empty, b); +} + +uint64_t +statSize(const vespalib::string &fileName) +{ + FastOS_StatInfo statInfo; + if (EXPECT_TRUE(FastOS_File::Stat(fileName.c_str(), &statInfo))) { + return statInfo._size; + } else { + return 0u; + } +} + +uint64_t +statSize(const AttributeVector &a) +{ + vespalib::string baseFileName = a.getBaseFileName(); + uint64_t resultSize = statSize(baseFileName + ".dat"); + if (a.hasMultiValue()) { + resultSize += statSize(baseFileName + ".idx"); + } + if (a.hasWeightedSetType()) { + resultSize += statSize(baseFileName + ".weight"); + } + if (a.hasEnum() && a.getEnumeratedSave()) { + resultSize += statSize(baseFileName + ".udat"); + } + return resultSize; +} + + +bool +preciseEstimatedSize(const AttributeVector &a) +{ + if (a.getBasicType() == BasicType::STRING && + EXPECT_TRUE(a.hasEnum()) && !a.getEnumeratedSave()) { + return false; // Using average of string lens, can be somewhat off + } + return true; +} + +} + +namespace search { + +using attribute::CollectionType; +using attribute::Config; + +class AttributeTest : public vespalib::TestApp +{ +private: + typedef AttributeVector::SP AttributePtr; + + void addDocs(const AttributePtr & v, size_t sz); + template + void populate(VectorType & ptr, unsigned seed); + template + void compare(VectorType & a, VectorType & b); + + void testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + void testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + template + void testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c); + void testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + void testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + template + void testMemorySaver(const AttributePtr & a, const AttributePtr & b); + + void testReload(); + void testHasLoadData(); + void testMemorySaver(); + + void commit(const AttributePtr & ptr); + + template + void fillNumeric(std::vector & values, uint32_t numValues); + void fillString(std::vector & values, uint32_t numValues); + template + bool appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector & values); + template + bool checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value); + template + bool checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector & values); + + // CollectionType::SINGLE + template + void testSingle(const AttributePtr & ptr, const std::vector & values); + void testSingle(); + + // CollectionType::ARRAY + template + void printArray(const AttributePtr & ptr); + template + void testArray(const AttributePtr & ptr, const std::vector & values); + void testArray(); + + // CollectionType::WSET + template + void printWeightedSet(const AttributePtr & ptr); + template + void testWeightedSet(const AttributePtr & ptr, const std::vector & values); + void testWeightedSet(); + void testBaseName(); + + template + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template + void testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after); + void testArithmeticWithUndefinedValue(); + + template + void testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant); + void testMapValueUpdate(); + + void testStatus(); + void testNullProtection(); + void testGeneration(const AttributePtr & attr, bool exactStatus); + void testGeneration(); + + void + testCreateSerialNum(void); + + template + void + testCompactLidSpace(const Config &config, + bool fs, + bool es); + + template + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(void); + + template + void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch); + template + void requireThatAddressSpaceUsageIsReported(const Config &config); + void requireThatAddressSpaceUsageIsReported(); + +public: + AttributeTest() { } + int Main(); +}; + +void AttributeTest::testBaseName() +{ + AttributeVector::BaseName v("attr1"); + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_TRUE(v.getDirName().empty()); + v = "attribute/attr1/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1"); + v = "attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1/snapshot-X"); + v = "/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "/attribute/attr1/snapshot-X"); + v = "index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), "index.1/1.ready/attribute/attr1/snapshot-X"); + v = "/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "/index.1/1.ready/attribute/attr1/snapshot-X"); + v = "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X"); +} + +void AttributeTest::addDocs(const AttributePtr & v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + commit(v); + } +} + + +template <> +void AttributeTest::populate(IntegerAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand(), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand()) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(FloatingPointAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand() * 1.25, weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand() * 1.25) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(StringAttribute & v, unsigned seed) +{ + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +template +void AttributeTest::compare(VectorType & a, VectorType & b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast(a).get(i, av, asz), static_cast(a.getValueCount(i))); + EXPECT_EQUAL(static_cast(b).get(i, bv, bsz), static_cast(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) { + EXPECT_TRUE(av[j] == bv[j]); + } + } + delete [] bv; + delete [] av; +} + +void AttributeTest::testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast(*a.get()), 17); + populate(static_cast(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload(a, b, c); + } else { + testReload(a, b, c); + } +} + + +void AttributeTest::testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast(*a.get()), 17); + populate(static_cast(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload(a, b, c); + } else { + testReload(a, b, c); + } +} + +template +void AttributeTest::testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c) +{ + LOG(info, "testReload: vector '%s'", a->getName().c_str()); + + compare + (*(static_cast(a.get())), *(static_cast(b.get()))); + a->setCreateSerialNum(43u); + EXPECT_TRUE( a->saveAs(b->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*b), a->getEstimatedSaveByteSize()); + } else { + double estSize = a->getEstimatedSaveByteSize(); + double actSize = statSize(*b); + EXPECT_LESS_EQUAL(actSize * 1.0, estSize * 1.3); + EXPECT_GREATER_EQUAL(actSize * 1.0, estSize * 0.7); + } + EXPECT_TRUE( a->saveAs(c->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*c), a->getEstimatedSaveByteSize()); + } + EXPECT_TRUE( b->load() ); + EXPECT_EQUAL(43u, b->getCreateSerialNum()); + compare + (*(static_cast(a.get())), *(static_cast(b.get()))); + EXPECT_TRUE( c->load() ); + compare + (*(static_cast(a.get())), *(static_cast(c.get()))); + + if (isUnsignedSmallIntAttribute(*a)) + return; + populate(static_cast(*b.get()), 700); + populate(static_cast(*c.get()), 700); + compare + (*(static_cast(b.get())), *(static_cast(c.get()))); + + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + std::unique_ptr record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare + (*(static_cast(a.get())), + *(static_cast(b.get()))); + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + readC.enableDirectIO(); + writeC.enableDirectIO(); + std::unique_ptr record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare + (*(static_cast(a.get())), *(static_cast(b.get()))); +} + + +void AttributeTest::testReload() +{ + // IntegerAttribute + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sint32_3", Config(BasicType::INT32, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint4_3", Config(BasicType::UINT4, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint2_1", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint2_2", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint2_3", Config(BasicType::UINT2, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint1_1", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint1_2", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint1_3", Config(BasicType::UINT1, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::ARRAY + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("flag_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("flag_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("flag_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("aint32_3", Config(BasicType::INT32, CollectionType::ARRAY)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wint32_3", Config(BasicType::INT32, CollectionType::WSET)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + + + // StringAttribute + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstring_1", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstring_2", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sstring_3", Config(BasicType::STRING, CollectionType::SINGLE)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astring_1", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astring_2", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("astring_3", Config(BasicType::STRING, CollectionType::ARRAY)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstring_1", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstring_2", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wstring_3", Config(BasicType::STRING, CollectionType::WSET)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wsfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wsfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wsfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } +} + +void AttributeTest::testHasLoadData() +{ + { // single value + AttributePtr av = AttributeFactory::createAttribute("loaddata1", Config(BasicType::INT32)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata2"); + av = AttributeFactory::createAttribute("loaddata2", Config(BasicType::INT32)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata3"); + } + { // array + AttributePtr av = AttributeFactory::createAttribute("loaddata3", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata4"); + av = AttributeFactory::createAttribute("loaddata4", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata5"); + } + { // wset + AttributePtr av = AttributeFactory::createAttribute("loaddata5", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata6"); + av = AttributeFactory::createAttribute("loaddata6", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(av->hasLoadData()); + } +} + +void +AttributeTest::testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver(a, b); + } else { + testMemorySaver(a, b); + } +} + +void +AttributeTest::testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver(a, b); + } else { + testMemorySaver(a, b); + } +} + +template +void +AttributeTest::testMemorySaver(const AttributePtr & a, const AttributePtr & b) +{ + LOG(info, "testMemorySaver: vector '%s'", a->getName().c_str()); + + AttributeMemorySaveTarget saveTarget; + EXPECT_TRUE(a->saveAs(b->getBaseFileName(), saveTarget)); + FastOS_StatInfo statInfo; + vespalib::string datFile = vespalib::make_string("%s.dat", b->getBaseFileName().c_str()); + EXPECT_TRUE(!FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(saveTarget.writeToFile(TuneFileAttributes(), + DummyFileHeaderContext())); + EXPECT_TRUE(FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(b->load()); + compare + (*(static_cast(a.get())), *(static_cast(b.get()))); +} + +void +AttributeTest::testMemorySaver() +{ + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1ms", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2ms", Config(BasicType::INT32, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstr_1ms", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstr_2ms", Config(BasicType::STRING, CollectionType::SINGLE)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::ARRAY + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1ms", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2ms", Config(BasicType::INT32, CollectionType::ARRAY)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astr_1ms", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astr_2ms", Config(BasicType::STRING, CollectionType::ARRAY)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1ms", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2ms", Config(BasicType::INT32, CollectionType::WSET)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstr_1ms", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstr_2ms", Config(BasicType::STRING, CollectionType::WSET)); + testMemorySaverString(iv1, iv2, 100); + } +} + + +template +void +AttributeTest::fillNumeric(std::vector & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(static_cast(i)); + } +} + +void +AttributeTest::fillString(std::vector & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template +bool +AttributeTest::appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector & values) +{ + bool retval = true; + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && v.append(doc, values[i], 1))); + } + return retval; +} + +template +bool +AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value) +{ + std::vector buffer(valueCount); + if (!EXPECT_EQUAL(valueCount, ptr->getValueCount(doc))) return false; + if (!EXPECT_EQUAL(valueCount, ptr->get(doc, &buffer[0], buffer.size()))) return false; + if (!EXPECT_EQUAL(numValues, static_cast(std::count(buffer.begin(), buffer.end(), value)))) return false; + return true; +} + +template +bool +AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector & values) +{ + std::vector buffer(valueCount); + bool retval = true; + EXPECT_TRUE((retval = retval && (static_cast(ptr->getValueCount(doc)) == valueCount))); + EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount))); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range]))); + } + return retval; +} + + +//----------------------------------------------------------------------------- +// CollectionType::SINGLE +//----------------------------------------------------------------------------- + +template +void +AttributeTest::testSingle(const AttributePtr & ptr, const std::vector & values) +{ + LOG(info, "testSingle: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast(values.size())); + + VectorType & v = *(static_cast(ptr.get())); + uint32_t numUniques = values.size(); + std::vector buffer(1); + + // test update() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(ptr->getValueCount(doc) == 1); + uint32_t i = doc % numUniques; + uint32_t j = (doc + 1) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + + EXPECT_TRUE(v.update(doc, values[j])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[j])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.append(doc, values[0], 1)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.remove(doc, values[0], 1)); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + bool smallUInt = isUnsignedSmallIntAttribute(*ptr); + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t i = (doc + 2) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + if (doc % 2 == 0) { // alternate clearing + ptr->clearDoc(doc); + } + ptr->commit(); + EXPECT_EQUAL(1u, ptr->get(doc, &buffer[0], buffer.size())); + if (doc % 2 == 0) { + if (smallUInt) { + expectZero(buffer[0]); + } else { + EXPECT_TRUE(attribute::isUndefined(buffer[0])); + } + } else { + EXPECT_TRUE(!attribute::isUndefined(buffer[0])); + EXPECT_EQUAL(values[i], buffer[0]); + } + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +void +AttributeTest::testSingle() +{ + uint32_t numDocs = 1000; + uint32_t numUniques = 50; + uint32_t numUniqueNibbles = 9; + { + std::vector values; + fillNumeric(values, numUniques); + std::vector nibbleValues; + fillNumeric(nibbleValues, numUniqueNibbles); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-int32", Config(BasicType::INT32, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-uint4", Config(BasicType::UINT4, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle(ptr, nibbleValues); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-int32", cfg); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + } + { + std::vector values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-float", Config(BasicType::FLOAT, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-float", cfg); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + + } + { + std::vector values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-fs-string", cfg); + addDocs(ptr, numDocs); + testSingle(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::ARRAY +//----------------------------------------------------------------------------- + +template +void +AttributeTest::testArray(const AttributePtr & ptr, const std::vector & values) +{ + LOG(info, "testArray: vector '%s' with %i documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast(values.size())); + + VectorType & v = *(static_cast(ptr.get())); + uint32_t numUniques = values.size(); + ASSERT_TRUE(numUniques >= 6); + + + // test update() + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + size_t sumAppends(0); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + sumAppends += valueCount; + + uint32_t i = doc % numUniques; + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), (1 + 2)*ptr->getNumDocs() + sumAppends); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), sumAppends); + + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + // append unique values + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount, valueCount, values)); + + // append duplicates + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount * 2, valueCount, values)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + ptr->clearDoc(doc); + + EXPECT_TRUE(v.append(doc, values[1], 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(v.append(doc, values[3], 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(v.append(doc, values[5], 1)); + } + + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[0], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[1], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 8, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[5], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[5])); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[0], 1)); + } + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[1], 1)); + } + ptr->commit(); + + EXPECT_TRUE(checkCount(ptr, doc, valueCount, valueCount, values[1])); + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +template +void +AttributeTest::printArray(const AttributePtr & ptr) +{ + uint32_t bufferSize = ptr->getMaxValueCount(); + std::vector buffer(bufferSize); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = " << buffer[i] + << std::endl; + } + } +} + +void +AttributeTest::testArray() +{ + uint32_t numDocs = 100; + uint32_t numUniques = 50; + { // IntegerAttribute + std::vector values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-int32", Config(BasicType::INT32, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + } + { // FloatingPointAttribute + std::vector values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-float", Config(BasicType::FLOAT, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-float", cfg); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + } + { // StringAttribute + std::vector values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("afs-string", cfg); + addDocs(ptr, numDocs); + testArray(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::WSET +//----------------------------------------------------------------------------- + +template +void +AttributeTest::printWeightedSet(const AttributePtr & ptr) +{ + std::vector buffer(ptr->getMaxValueCount()); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = {" << buffer[i].getValue() + << ", " << buffer[i].getWeight() << "}" << std::endl; + } + } +} + +template +void +AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector & values) +{ + LOG(info, "testWeightedSet: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast(values.size())); + + VectorType & v = *(static_cast(ptr.get())); + uint32_t numDocs = v.getNumDocs(); + ASSERT_TRUE(values.size() >= numDocs + 10); + uint32_t bufferSize = numDocs + 10; + std::vector buffer(bufferSize); + + // fill and check + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + v.clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight())); + } + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(buffer[j].getValue() == values[j].getValue()); + EXPECT_TRUE(buffer[j].getWeight() == values[j].getWeight()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test append() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // append non-existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight())); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight()); + + // append existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight() + 10); + + // append non-existent value two times + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight())); + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(buffer[doc + 1].getWeight() == values[doc + 1].getWeight() + 10); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test remove() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // remove non-existent value + EXPECT_TRUE(static_cast(v.getValueCount(doc)) == valueCount + 2); + EXPECT_TRUE(v.remove(doc, values[doc + 2].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(static_cast(v.getValueCount(doc)) == valueCount + 2); + + // remove existent value + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + for (uint32_t i = 0; i < valueCount + 1; ++i) { + EXPECT_TRUE(buffer[i].getValue() != values[doc + 1].getValue()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); +} + +void +AttributeTest::testWeightedSet() +{ + uint32_t numDocs = 100; + uint32_t numValues = numDocs + 10; + { // IntegerAttribute + std::vector values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedInt(i, i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // FloatingPointAttribute + std::vector values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedFloat(i, i + numValues)); + } + + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + AttributePtr ptr = AttributeFactory::createAttribute("ws-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // StringAttribute + std::vector values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(AttributeVector::WeightedString(ss.str(), i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsstr", Config(BasicType::STRING, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", cfg); + addDocs(ptr, numDocs); + testWeightedSet(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("string00", e)); + } + } +} + +template +void +AttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast(*ptr.get()); + addDocs(ptr, 13); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < 13; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Add, -10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Sub, -10))); + EXPECT_TRUE(vec.apply(4, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(5, Arith(Arith::Mul, -10))); + EXPECT_TRUE(vec.apply(6, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(7, Arith(Arith::Div, -10))); + EXPECT_TRUE(vec.apply(8, Arith(Arith::Add, 10.5))); + EXPECT_TRUE(vec.apply(9, Arith(Arith::Sub, 10.5))); + EXPECT_TRUE(vec.apply(10, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(11, Arith(Arith::Mul, 0.8))); + EXPECT_TRUE(vec.apply(12, Arith(Arith::Div, 0.8))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 26u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + ptr->commit(); + + std::vector buf(1); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(2, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(3, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(4, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1000); + ptr->get(5, &buf[0], 1); + EXPECT_EQUAL(buf[0], -1000); + ptr->get(6, &buf[0], 1); + EXPECT_EQUAL(buf[0], 10); + ptr->get(7, &buf[0], 1); + EXPECT_EQUAL(buf[0], -10); + if (ptr->getBasicType() == BasicType::INT32) { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + } else if (ptr->getBasicType() == BasicType::FLOAT || + ptr->getBasicType() == BasicType::DOUBLE) + { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110.5); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 89.5); + } else { + ASSERT_TRUE(false); + } + ptr->get(10, &buf[0], 1); + EXPECT_EQUAL(buf[0], 120); + ptr->get(11, &buf[0], 1); + EXPECT_EQUAL(buf[0], 80); + ptr->get(12, &buf[0], 1); + EXPECT_EQUAL(buf[0], 125); + + + // try several arithmetic operations on the same document in a single commit + ASSERT_TRUE(vec.update(0, 1100)); + ASSERT_TRUE(vec.update(1, 1100)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 28u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + for (uint32_t i = 0; i < 10; ++i) { + ASSERT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ASSERT_TRUE(vec.apply(1, Arith(Arith::Add, 10))); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 48u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + + ASSERT_TRUE(vec.update(0, 10)); + ASSERT_TRUE(vec.update(1, 10)); + ASSERT_TRUE(vec.update(2, 10)); + ASSERT_TRUE(vec.update(3, 10)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 52u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + for (uint32_t i = 0; i < 8; ++i) { + EXPECT_TRUE(vec.apply(0, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Mul, 2.3))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 3.4))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Mul, 5.6))); + ptr->commit(); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 84u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + + + // try divide by zero + ASSERT_TRUE(vec.update(0, 100)); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 86u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 66u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } + ptr->get(0, &buf[0], 1); + if (ptr->getBasicType() == BasicType::INT32) { + EXPECT_EQUAL(buf[0], 100); + } + + // try divide by zero with empty change vector + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 87u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 67u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } +} + +void +AttributeTest::testArithmeticValueUpdate() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticValueUpdate(ptr); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticValueUpdate(ptr); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsint32", cfg); + testArithmeticValueUpdate(ptr); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsfloat", cfg); + testArithmeticValueUpdate(ptr); + } + { + Config cfg(BasicType::DOUBLE, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsdouble", cfg); + testArithmeticValueUpdate(ptr); + } +} + + +template +void +AttributeTest::testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after) +{ + LOG(info, "testArithmeticWithUndefinedValue: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast(*ptr.get()); + addDocs(ptr, 1); + ASSERT_TRUE(vec.update(0, before)); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ptr->commit(); + + std::vector buf(1); + ptr->get(0, &buf[0], 1); + + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_TRUE(std::isnan(buf[0])); + } else { + EXPECT_EQUAL(buf[0], after); + } +} + +void +AttributeTest::testArithmeticWithUndefinedValue() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue + (ptr, std::numeric_limits::min(), std::numeric_limits::min()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue + (ptr, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sdouble", Config(BasicType::DOUBLE, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue + (ptr, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()); + } +} + + +template +void +AttributeTest::testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant) +{ + LOG(info, "testMapValueUpdate: vector '%s'", ptr->getName().c_str()); + typedef MapValueUpdate MapVU; + typedef ArithmeticValueUpdate ArithVU; + VectorType & vec = static_cast(*ptr.get()); + + addDocs(ptr, 6); + for (uint32_t doc = 0; doc < 6; ++doc) { + ASSERT_TRUE(vec.append(doc, initValue.getValue(), 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 6u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, + ArithVU(ArithVU::Add, 10)))); + EXPECT_TRUE(ptr->apply(1, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 10)))); + EXPECT_TRUE(ptr->apply(2, MapVU(initFieldValue, + ArithVU(ArithVU::Mul, 10)))); + EXPECT_TRUE(ptr->apply(3, MapVU(initFieldValue, + ArithVU(ArithVU::Div, 10)))); + ptr->commit(); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 10u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 4u); + + std::vector buf(2); + ptr->get(0, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 110); + ptr->get(1, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 90); + ptr->get(2, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 1000); + ptr->get(3, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 10); + + // removeifzero + EXPECT_TRUE(ptr->apply(4, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 100)))); + ptr->commit(); + if (removeIfZero) { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(0)); + } else { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 0); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 11u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 5u); + + // createifnonexistant + EXPECT_TRUE(ptr->apply(5, MapVU(nonExistant, + ArithVU(ArithVU::Add, 10)))); + ptr->commit(); + if (createIfNonExistant) { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(2)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + EXPECT_EQUAL(buf[1].getWeight(), 10); + } else { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 12u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + + + // try divide by zero (should be ignored) + vec.clearDoc(0); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + ASSERT_TRUE(vec.append(0, initValue.getValue(), 12345)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, ArithVU(ArithVU::Div, 0)))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0].getWeight(), 12345); +} + +void +AttributeTest::testMapValueUpdate() +{ + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + testMapValueUpdate + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, true, false))); + testMapValueUpdate + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, false, true))); + testMapValueUpdate + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, true); + } + + Config setCfg(Config(BasicType::STRING, CollectionType::WSET)); + Config setRemoveCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, true, false))); + Config setCreateCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, false, true))); + + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setRemoveCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCreateCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } + + // fast-search - posting lists + { // regular set + setCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + setRemoveCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setRemoveCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + setCreateCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCreateCfg); + testMapValueUpdate + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } +} + + + +void +AttributeTest::commit(const AttributePtr & ptr) +{ + ptr->commit(); +} + + +void +AttributeTest::testStatus() +{ + std::vector values; + fillString(values, 16); + uint32_t numDocs = 100; + // No posting list + static constexpr size_t LeafNodeSize = + 4 + sizeof(EnumStoreBase::Index) * EnumTreeTraits::LEAF_SLOTS; + static constexpr size_t InternalNodeSize = + 8 + (sizeof(EnumStoreBase::Index) + + sizeof(btree::EntryRef)) * EnumTreeTraits::INTERNAL_SLOTS; + static constexpr size_t NestedVectorSize = 24; // sizeof(vespalib::Array) + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast(ptr.get())); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, 1, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), 1u); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex)) + expUsed += 100 * sizeof(search::multivalue::Index32) + 100 * 4; + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast(ptr.get())); + const size_t numUniq(16); + const size_t numValuesPerDoc(16); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), numDocs); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) + + // numdocs * sizeof(Array) (due to vector vector)) + expUsed += numDocs * sizeof(search::multivalue::Index32) + numDocs * numValuesPerDoc * sizeof(EnumStoreBase::Index) + ((numValuesPerDoc > search::multivalue::Index32::maxValues()) ? numDocs * NestedVectorSize : 0); + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } +} + +void +AttributeTest::testNullProtection() +{ + size_t len1 = strlen("evil"); + size_t len2 = strlen("string"); + size_t len = len1 + 1 + len2; + vespalib::string good("good"); + vespalib::string evil("evil string"); + vespalib::string pureEvil("evil"); + EXPECT_EQUAL(strlen(evil.data()), len); + EXPECT_EQUAL(strlen(evil.c_str()), len); + evil[len1] = 0; // replace space with '\0' + EXPECT_EQUAL(strlen(evil.data()), len1); + EXPECT_EQUAL(strlen(evil.c_str()), len1); + EXPECT_EQUAL(strlen(evil.data() + len1), 0u); + EXPECT_EQUAL(strlen(evil.c_str() + len1), 0u); + EXPECT_EQUAL(strlen(evil.data() + len1 + 1), len2); + EXPECT_EQUAL(strlen(evil.c_str() + len1 + 1), len2); + EXPECT_EQUAL(evil.size(), len); + { // string + AttributeVector::DocId docId; + std::vector buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::SINGLE)); + StringAttribute &v = static_cast(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.update(docId, evil)); + v.commit(); + size_t n = static_cast(v).get(docId, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0], pureEvil); + } + { // string array + AttributeVector::DocId docId; + std::vector buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::ARRAY)); + StringAttribute &v = static_cast(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 1)); + EXPECT_TRUE(v.append(0, evil, 1)); + EXPECT_TRUE(v.append(0, good, 1)); + v.commit(); + size_t n = static_cast(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 3u); + EXPECT_EQUAL(buf[0], good); + EXPECT_EQUAL(buf[1], pureEvil); + EXPECT_EQUAL(buf[2], good); + } + { // string set + AttributeVector::DocId docId; + std::vector buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::WSET)); + StringAttribute &v = static_cast(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 10)); + EXPECT_TRUE(v.append(0, evil, 20)); + v.commit(); + size_t n = static_cast(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 2u); + if (buf[0].getValue() != good) { + std::swap(buf[0], buf[1]); + } + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + EXPECT_EQUAL(buf[1].getValue(), pureEvil); + EXPECT_EQUAL(buf[1].getWeight(), 20); + + // remove + EXPECT_TRUE(v.remove(0, evil, 20)); + v.commit(); + n = static_cast(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + } +} + +void +AttributeTest::testGeneration(const AttributePtr & attr, bool exactStatus) +{ + LOG(info, "testGeneration(%s)", attr->getName().c_str()); + IntegerAttribute & ia = static_cast(*attr.get()); + // add docs to trigger inc generation when data vector is full + AttributeVector::DocId docId; + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(1u, ia.getCurrentGeneration()); + uint64_t lastAllocated; + uint64_t lastOnHold; + if (exactStatus) { + EXPECT_EQUAL(2u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + } else { + EXPECT_LESS(0u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + { + AttributeGuard ag(attr); // guard on generation 1 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(2u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(2u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + { + AttributeGuard ag(attr); // guard on generation 3 + ia.commit(true); + EXPECT_EQUAL(4u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of addDoc() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + { + AttributeGuard ag(attr); // guard on generation 4 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(5u, ia.getCurrentGeneration()); + ia.commit(); + EXPECT_EQUAL(6u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(4u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + ia.commit(true); + EXPECT_EQUAL(7u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of commit() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + } +} + +void +AttributeTest::testGeneration() +{ + { // single value attribute + Config cfg(BasicType::INT8); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("int8", cfg); + testGeneration(attr, true); + } + { // enum attribute (with fast search) + Config cfg(BasicType::INT8); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faint8", cfg); + testGeneration(attr, false); + } + { // multi value attribute + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("aint8", cfg); + testGeneration(attr, false); + } + { // multi value enum attribute (with fast search) + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faaint8", cfg); + testGeneration(attr, false); + } +} + + +void +AttributeTest::testCreateSerialNum() +{ + Config cfg(BasicType::INT32); + AttributePtr attr = AttributeFactory::createAttribute("int32", cfg); + attr->setCreateSerialNum(42u); + EXPECT_TRUE(attr->save()); + AttributePtr attr2 = AttributeFactory::createAttribute("int32", cfg); + EXPECT_TRUE(attr2->load()); + EXPECT_EQUAL(42u, attr2->getCreateSerialNum()); +} + + +template +void +AttributeTest::testCompactLidSpace(const Config &config, + bool fs, + bool es) +{ + uint32_t highDocs = 100; + uint32_t trimmedDocs = 30; + vespalib::string bts = config.basicType().asString(); + vespalib::string cts = config.collectionType().asString(); + vespalib::string fas = fs ? "-fs" : ""; + vespalib::string ess = es ? "-es" : ""; + Config cfg = config; + cfg.setFastSearch(fs); + + vespalib::string name = clstmp + "/" + bts + "-" + cts + fas + ess; + LOG(info, "testCompactLidSpace(%s)", name.c_str()); + AttributePtr attr = AttributeFactory::createAttribute(name, cfg); + VectorType &v = static_cast(*attr.get()); + attr->enableEnumeratedSave(es); + attr->addDocs(highDocs); + populate(v, 17); + AttributePtr attr2 = AttributeFactory::createAttribute(name, cfg); + VectorType &v2 = static_cast(*attr2.get()); + attr2->enableEnumeratedSave(es); + attr2->addDocs(trimmedDocs); + populate(v2, 17); + EXPECT_EQUAL(trimmedDocs, attr2->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr2->getCommittedDocIdLimit()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(highDocs, attr->getCommittedDocIdLimit()); + attr->compactLidSpace(trimmedDocs); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + EXPECT_TRUE(attr->save()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + AttributePtr attr3 = AttributeFactory::createAttribute(name, cfg); + EXPECT_TRUE(attr3->load()); + EXPECT_EQUAL(trimmedDocs, attr3->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr3->getCommittedDocIdLimit()); + VectorType &v3 = static_cast(*attr3.get()); + compare(v2, v3); + attr->shrinkLidSpace(); + EXPECT_EQUAL(trimmedDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + compare(v, v3); +} + + +template +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + testCompactLidSpace(config, false, false); + testCompactLidSpace(config, false, true); + bool smallUInt = isUnsignedSmallIntAttribute(config.basicType().type()); + if (smallUInt) + return; + testCompactLidSpace(config, true, false); + testCompactLidSpace(config, true, true); +} + + +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + switch (config.basicType().type()) { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + case BasicType::INT8: + case BasicType::INT16: + case BasicType::INT32: + case BasicType::INT64: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace(config); + } else { + testCompactLidSpace(config); + } + break; + case BasicType::FLOAT: + case BasicType::DOUBLE: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace(config); + } else { + testCompactLidSpace(config); + } + break; + case BasicType::STRING: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace(config); + } else { + testCompactLidSpace(config); + } + break; + default: + abort(); + } +} + + +void +AttributeTest::testCompactLidSpace() +{ + vespalib::rmdir(clstmp, true); + vespalib::mkdir(clstmp); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT1, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT2, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT4, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::WSET))); + vespalib::rmdir(clstmp, true); +} + +template +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch) +{ + uint32_t numDocs = 10; + vespalib::string attrName = asuDir + "/" + config.basicType().asString() + "-" + + config.collectionType().asString() + (fastSearch ? "-fs" : ""); + Config cfg = config; + cfg.setFastSearch(fastSearch); + + AttributePtr attrPtr = AttributeFactory::createAttribute(attrName, cfg); + addDocs(attrPtr, numDocs); + AddressSpaceUsage before = attrPtr->getAddressSpaceUsage(); + populate(static_cast(*attrPtr.get()), 5); + AddressSpaceUsage after = attrPtr->getAddressSpaceUsage(); + if (attrPtr->hasEnum()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used()); + EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit()); + EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_EQUAL(after.enumStoreUsage(), before.enumStoreUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultEnumStoreUsage(), after.enumStoreUsage()); + } + if (attrPtr->hasMultiValue()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_GREATER(after.multiValueUsage().used(), before.multiValueUsage().used()); + EXPECT_EQUAL(after.multiValueUsage().limit(), before.multiValueUsage().limit()); + EXPECT_EQUAL(134217728u, after.multiValueUsage().limit()); // multivalue::Index32::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_EQUAL(after.multiValueUsage(), before.multiValueUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultMultiValueUsage(), after.multiValueUsage()); + } +} + +template +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config) +{ + requireThatAddressSpaceUsageIsReported(config, false); + requireThatAddressSpaceUsageIsReported(config, true); +} + +void +AttributeTest::requireThatAddressSpaceUsageIsReported() +{ + vespalib::rmdir(asuDir, true); + vespalib::mkdir(asuDir); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::INT32, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::INT32, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::FLOAT, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::FLOAT, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::STRING, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported(Config(BasicType::STRING, CollectionType::ARRAY))); +} + +int AttributeTest::Main() +{ + TEST_INIT("attribute_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testBaseName(); + testReload(); + testHasLoadData(); + testMemorySaver(); + + testSingle(); + testArray(); + testWeightedSet(); + testArithmeticValueUpdate(); + testArithmeticWithUndefinedValue(); + testMapValueUpdate(); + testStatus(); + testNullProtection(); + testGeneration(); + testCreateSerialNum(); + TEST_DO(testCompactLidSpace()); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + + TEST_DONE(); +} + +} + + +TEST_APPHOOK(search::AttributeTest); diff --git a/searchlib/src/tests/attribute/attribute_test.sh b/searchlib/src/tests/attribute/attribute_test.sh new file mode 100644 index 00000000000..89c52129b74 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributebenchmark.cpp b/searchlib/src/tests/attribute/attributebenchmark.cpp new file mode 100644 index 00000000000..88446ef71f7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.cpp @@ -0,0 +1,678 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "attributesearcher.h" +#include "attributeupdater.h" +#include +#include "runnable.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP("attributebenchmark"); + +#include + +using vespalib::Monitor; +using vespalib::MonitorGuard; +using std::shared_ptr; + +typedef std::vector NumVector; +typedef std::vector StringVector; +typedef AttributeVector::SP AttributePtr; +typedef AttributeVector::DocId DocId; +typedef search::attribute::Config AttrConfig; +using search::attribute::BasicType; +using search::attribute::CollectionType; + +namespace search { + +class AttributeBenchmark : public FastOS_Application +{ +private: + class Config { + public: + vespalib::string _attribute; + uint32_t _numDocs; + uint32_t _numUpdates; + uint32_t _numValues; + uint32_t _numSearchers; + uint32_t _numQueries; + bool _searchersOnly; + bool _validate; + uint32_t _populateRuns; + uint32_t _updateRuns; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + uint32_t _minStringLen; + uint32_t _maxStringLen; + uint32_t _seed; + bool _writeAttribute; + int64_t _rangeStart; + int64_t _rangeEnd; + int64_t _rangeDelta; + bool _rangeSearch; + uint32_t _prefixLength; + bool _prefixSearch; + + + Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0), + _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0), + _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0), + _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false), + _prefixLength(0), _prefixSearch(false) {} + void printXML() const; + }; + + class Resource { + private: + std::vector _usages; + struct rusage _reset; + + public: + Resource() : _usages(), _reset() { reset(); }; + void reset() { + getrusage(0, &_reset); + } + void saveUsage() { + struct rusage now; + getrusage(0, &now); + struct rusage usage = computeDifference(_reset, now); + _usages.push_back(usage); + } + void printLastXML(uint32_t opCount) { + (void) opCount; + struct rusage & usage = _usages.back(); + std::cout << "" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000 + << "" << std::endl; + std::cout << "" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000 + << "" << std::endl; + std::cout << "" << usage.ru_nvcsw << "" << std::endl; + std::cout << "" << usage.ru_nivcsw << "" << std::endl; + } + static struct rusage computeDifference(struct rusage & first, struct rusage & second); + }; + + FastOS_ThreadPool * _threadPool; + Config _config; + RandomGenerator _rndGen; + + void init(const Config & config); + void usage(); + + // benchmark helper methods + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + template + void benchmarkPopulate(const AttributePtr & ptr, const std::vector & values, uint32_t id); + template + void benchmarkUpdate(const AttributePtr & ptr, const std::vector & values, uint32_t id); + + template + std::vector prepareForPrefixSearch(const std::vector & values) const; + template + void benchmarkSearch(const AttributePtr & ptr, const std::vector & values); + template + void benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector & values); + + template + void benchmarkAttribute(const AttributePtr & ptr, const std::vector & values); + + // Numeric Attribute + void benchmarkNumeric(const AttributePtr & ptr); + + // String Attribute + void benchmarkString(const AttributePtr & ptr); + + +public: + AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {} + ~AttributeBenchmark() { + if (_threadPool != NULL) { + delete _threadPool; + } + } + int Main(); +}; + + +void +AttributeBenchmark::Config::printXML() const +{ + std::cout << "" << std::endl; + std::cout << "" << _attribute << "" << std::endl; + std::cout << "" << _numDocs << "" << std::endl; + std::cout << "" << _numUpdates << "" << std::endl; + std::cout << "" << _numValues << "" << std::endl; + std::cout << "" << _numSearchers << "" << std::endl; + std::cout << "" << _numQueries << "" << std::endl; + std::cout << "" << (_searchersOnly ? "true" : "false") << "" << std::endl; + std::cout << "" << (_validate ? "true" : "false") << "" << std::endl; + std::cout << "" << _populateRuns << "" << std::endl; + std::cout << "" << _updateRuns << "" << std::endl; + std::cout << "" << _commitFreq << "" << std::endl; + std::cout << "" << _minValueCount << "" << std::endl; + std::cout << "" << _maxValueCount << "" << std::endl; + std::cout << "" << _minStringLen << "" << std::endl; + std::cout << "" << _maxStringLen << "" << std::endl; + std::cout << "" << _seed << "" << std::endl; + std::cout << "" << _rangeStart << "" << std::endl; + std::cout << "" << _rangeEnd << "" << std::endl; + std::cout << "" << _rangeDelta << "" << std::endl; + std::cout << "" << (_rangeSearch ? "true" : "false") << "" << std::endl; + std::cout << "" << _prefixLength << "" << std::endl; + std::cout << "" << (_prefixSearch ? "true" : "false") << "" << std::endl; + std::cout << "" << std::endl; +} + +void +AttributeBenchmark::init(const Config & config) +{ + _config = config; + _rndGen.srand(_config._seed); +} + + +//----------------------------------------------------------------------------- +// Benchmark helper methods +//----------------------------------------------------------------------------- +void +AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + DocId startDoc; + DocId lastDoc; + bool success = ptr->addDocs(startDoc, lastDoc, numDocs); + assert(success); + (void) success; + assert(startDoc == 0); + assert(lastDoc + 1 == numDocs); + assert(ptr->getNumDocs() == numDocs); +} + +template +void +AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector & values, uint32_t id) +{ + std::cout << "" << std::endl; + AttributeUpdater + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.populate(); + std::cout << "" << std::endl; + updater.getStatus().printXML(); + std::cout << "" << std::endl; + if (_config._validate) { + std::cout << "" << std::endl; + } +} + +template +void +AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector & values, uint32_t id) +{ + std::cout << "" << std::endl; + AttributeUpdater + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.update(_config._numUpdates); + std::cout << "" << std::endl; + updater.getStatus().printXML(); + std::cout << "" << std::endl; + if (_config._validate) { + std::cout << "" << std::endl; + } +} + +template +std::vector +AttributeBenchmark::prepareForPrefixSearch(const std::vector & values) const +{ + (void) values; + return std::vector(); +} + +template <> +std::vector +AttributeBenchmark::prepareForPrefixSearch(const std::vector & values) const +{ + std::vector retval; + retval.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + retval.push_back(values[i].getValue().substr(0, _config._prefixLength)); + } + return retval; +} + +template +void +AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector & values) +{ + std::vector searchers; + if (_config._numSearchers > 0) { + std::cout << "" << std::endl; + + std::vector prefixStrings = prepareForPrefixSearch(values); + + for (uint32_t i = 0; i < _config._numSearchers; ++i) { + if (_config._rangeSearch) { + RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta); + searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries)); + } else if (_config._prefixSearch) { + searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries)); + } else { + searchers.push_back(new AttributeFindSearcher(i, ptr, values, _config._numQueries)); + } + _threadPool->NewThread(searchers.back()); + } + + for (uint32_t i = 0; i < searchers.size(); ++i) { + searchers[i]->join(); + } + + AttributeSearcherStatus totalStatus; + for (uint32_t i = 0; i < searchers.size(); ++i) { + std::cout << "" << std::endl; + searchers[i]->getStatus().printXML(); + std::cout << "" << std::endl; + totalStatus.merge(searchers[i]->getStatus()); + delete searchers[i]; + } + std::cout << "" << std::endl; + totalStatus.printXML(); + std::cout << "" << std::endl; + } +} + +template +void +AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector & values) +{ + if (_config._numSearchers > 0) { + std::cout << "" << std::endl; + AttributeUpdaterThread + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + _threadPool->NewThread(&updater); + benchmarkSearch(ptr, values); + updater.stop(); + updater.join(); + std::cout << "" << std::endl; + updater.getStatus().printXML(); + std::cout << "" << std::endl; + if (_config._validate) { + std::cout << "" << std::endl; + } + } +} + +template +void +AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector & values) +{ + addDocs(ptr, _config._numDocs); + + // populate + for (uint32_t i = 0; i < _config._populateRuns; ++i) { + benchmarkPopulate(ptr, values, i); + } + + // update + if (_config._numUpdates > 0) { + for (uint32_t i = 0; i < _config._updateRuns; ++i) { + benchmarkUpdate(ptr, values, i); + } + } + + // search + if (_config._searchersOnly) { + benchmarkSearch(ptr, values); + } else { + benchmarkSearchWithUpdater(ptr, values); + } + + _threadPool->Close(); +} + + +//----------------------------------------------------------------------------- +// Numeric Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr) +{ + NumVector values; + if (_config._rangeSearch) { + values.reserve(_config._numValues); + for (uint32_t i = 0; i < _config._numValues; ++i) { + values.push_back(i); + } + } else { + _rndGen.fillRandomIntegers(values, _config._numValues); + } + + std::vector weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector weightedVector; + weightedVector.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedInt(values[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i])); + } + } + benchmarkAttribute + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// String Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkString(const AttributePtr & ptr) +{ + StringVector strings; + _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen); + + std::vector weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector weightedVector; + weightedVector.reserve(strings.size()); + for (size_t i = 0; i < strings.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedString(strings[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i])); + } + } + benchmarkAttribute + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// Resource utilization +//----------------------------------------------------------------------------- +struct rusage +AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second) +{ + struct rusage result; + // utime + uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec; + uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec; + uint64_t resultutime = secondutime - firstutime; + result.ru_utime.tv_sec = resultutime / 1000000; + result.ru_utime.tv_usec = resultutime % 1000000; + + // stime + uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec; + uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec; + uint64_t resultstime = secondstime - firststime; + result.ru_stime.tv_sec = resultstime / 1000000; + result.ru_stime.tv_usec = resultstime % 1000000; + + result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss; + result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss; + result.ru_idrss = second.ru_idrss; // - first.ru_idrss; + result.ru_isrss = second.ru_isrss; // - first.ru_isrss; + result.ru_minflt = second.ru_minflt - first.ru_minflt; + result.ru_majflt = second.ru_majflt - first.ru_majflt; + result.ru_nswap = second.ru_nswap - first.ru_nswap; + result.ru_inblock = second.ru_inblock - first.ru_inblock; + result.ru_oublock = second.ru_oublock - first.ru_oublock; + result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd; + result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv; + result.ru_nsignals = second.ru_nsignals - first.ru_nsignals; + result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw; + result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw; + + return result; +} + + +void +AttributeBenchmark::usage() +{ + std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl; + std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl; + std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl; + std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl; + std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl; + std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl; + std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl; + std::cout << " " << std::endl; + std::cout << " : s-uint32, a-uint32, ws-uint32" << std::endl; + std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl; + std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl; + std::cout << " s-string, a-string, ws-string" << std::endl; + std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl; +} + +int +AttributeBenchmark::Main() +{ + Config dc; + dc._numDocs = 50000; + dc._numUpdates = 50000; + dc._numValues = 1000; + dc._numSearchers = 0; + dc._numQueries = 1000; + dc._searchersOnly = true; + dc._validate = false; + dc._populateRuns = 1; + dc._updateRuns = 1; + dc._commitFreq = 1000; + dc._minValueCount = 0; + dc._maxValueCount = 20; + dc._minStringLen = 1; + dc._maxStringLen = 50; + dc._seed = 555; + dc._writeAttribute = false; + dc._rangeStart = 0; + dc._rangeEnd = 1000; + dc._rangeDelta = 10; + dc._rangeSearch = false; + dc._prefixLength = 2; + dc._prefixSearch = false; + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) { + switch (opt) { + case 'n': + dc._numDocs = atoi(arg); + break; + case 'u': + dc._numUpdates = atoi(arg); + break; + case 'v': + dc._numValues = atoi(arg); + break; + case 's': + dc._numSearchers = atoi(arg); + break; + case 'q': + dc._numQueries = atoi(arg); + break; + case 'p': + dc._populateRuns = atoi(arg); + break; + case 'r': + dc._updateRuns = atoi(arg); + break; + case 'c': + dc._commitFreq = atoi(arg); + break; + case 'l': + dc._minValueCount = atoi(arg); + break; + case 'h': + dc._maxValueCount = atoi(arg); + break; + case 'i': + dc._minStringLen = atoi(arg); + break; + case 'a': + dc._maxStringLen = atoi(arg); + break; + case 'e': + dc._seed = atoi(arg); + break; + case 'S': + dc._rangeStart = strtoll(arg, NULL, 10); + break; + case 'E': + dc._rangeEnd = strtoll(arg, NULL, 10); + break; + case 'D': + dc._rangeDelta = strtoll(arg, NULL, 10); + break; + case 'L': + dc._prefixLength = atoi(arg); + break; + case 'b': + dc._searchersOnly = false; + break; + case 'R': + dc._rangeSearch = true; + break; + case 'P': + dc._prefixSearch = true; + break; + case 't': + dc._validate = true; + break; + case 'w': + dc._writeAttribute = true; + break; + default: + optError = true; + break; + } + } + + if (_argc != (idx + 1) || optError) { + usage(); + return -1; + } + + dc._attribute = vespalib::string(_argv[idx]); + + _threadPool = new FastOS_ThreadPool(256000); + + std::cout << "" << std::endl; + init(dc); + _config.printXML(); + + AttributePtr ptr; + + if (_config._attribute == "s-int32") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-int32") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-int32") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-fs-int32") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-fs-int32") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-fs-int32") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-string") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE)); + benchmarkString(ptr); + + } else if (_config._attribute == "a-string") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY)); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-string") { + std::cout << "" << std::endl; + ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET)); + benchmarkString(ptr); + + } else if (_config._attribute == "s-fs-string") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "a-fs-string") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-fs-string") { + std::cout << "" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-string", cfg); + benchmarkString(ptr); + + } + + if (dc._writeAttribute) { + std::cout << "" << std::endl; + ptr->saveAs(ptr->getBaseFileName()); + } + + std::cout << "" << std::endl; + + return 0; +} +} + +int main(int argc, char ** argv) +{ + search::AttributeBenchmark myapp; + return myapp.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/attribute/attributebenchmark.rb b/searchlib/src/tests/attribute/attributebenchmark.rb new file mode 100644 index 00000000000..44b08ec4389 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50] + +vectors.each do |vector| + num_docs.each do |num| + unique_percent.each do |percent| + unique = num * percent + command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1" + puts command + `#{command}` + s = 1 + 5.times do + command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1" + puts command + `#{command}` + s = s*2; + end + end + end +end diff --git a/searchlib/src/tests/attribute/attributefilewriter/.gitignore b/searchlib/src/tests/attribute/attributefilewriter/.gitignore new file mode 100644 index 00000000000..ea6a0e03bf2 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/.gitignore @@ -0,0 +1 @@ +searchlib_attributefilewriter_test_app diff --git a/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt new file mode 100644 index 00000000000..a1d859bbfb9 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributefilewriter_test_app + SOURCES + attributefilewriter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributefilewriter_test_app COMMAND searchlib_attributefilewriter_test_app) diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp new file mode 100644 index 00000000000..acf61cd58bb --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("attributefilewriter_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::index::DummyFileHeaderContext; + +namespace search +{ + +namespace +{ + +vespalib::string testFileName("test.dat"); +vespalib::string hello("Hello world"); + +void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); } + +struct Fixture { + TuneFileAttributes _tuneFileAttributes; + DummyFileHeaderContext _fileHeaderContext; + IAttributeSaveTarget::Config _cfg; + const vespalib::string _desc; + AttributeFileWriter _writer; + + Fixture() + : _tuneFileAttributes(), + _fileHeaderContext(), + _cfg(), + _desc("Attribute file sample description"), + _writer(_tuneFileAttributes, + _fileHeaderContext, + _cfg, + _desc) + { + removeTestFile(); + } + + ~Fixture() { + removeTestFile(); + } + +}; + +} + + +TEST_F("Test that we can write empty attribute file", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(0u, loaded->size()); +} + + +TEST_F("Test that we destroy writer without calling close", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); +} + + +TEST_F("Test that buffer writer passes on written data", Fixture) +{ + std::vector a; + const size_t mysize = 3000000; + const size_t writerBufferSize = AttributeFileBufferWriter::BUFFER_SIZE; + EXPECT_GREATER(mysize * sizeof(int), writerBufferSize); + a.reserve(mysize); + search::Rand48 rnd; + for (uint32_t i = 0; i < mysize; ++i) { + a.emplace_back(rnd.lrand48()); + } + EXPECT_TRUE(f._writer.open(testFileName)); + std::unique_ptr writer(f._writer.allocBufferWriter()); + writer->write(&a[0], a.size() * sizeof(int)); + writer->flush(); + writer.reset(); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(a.size() * sizeof(int), loaded->size()); + EXPECT_TRUE(memcmp(&a[0], loaded->buffer(), loaded->size()) == 0); +} + + +TEST_F("Test that we can pass buffer directly", Fixture) +{ + using Buffer = IAttributeFileWriter::Buffer; + Buffer buf = f._writer.allocBuf(hello.size()); + buf->writeBytes(hello.c_str(), hello.size()); + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.writeBuf(std::move(buf)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(hello.size(), loaded->size()); + EXPECT_TRUE(memcmp(hello.c_str(), loaded->buffer(), loaded->size()) == 0); +} + + +} + + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/attributeguard.cpp b/searchlib/src/tests/attribute/attributeguard.cpp new file mode 100644 index 00000000000..5c90caa094b --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("attributeguard_test"); +#include +#include +#include + +namespace search { + +class AttributeGuardTest : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +AttributeGuardTest::Main() +{ + TEST_INIT("attributeguard_test"); + + + AttributeVector::SP ssattr(new SingleStringExtAttribute("ss1")); + AttributeEnumGuard guard(ssattr); + EXPECT_TRUE(guard.valid()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributeGuardTest); diff --git a/searchlib/src/tests/attribute/attributeguard_test.sh b/searchlib/src/tests/attribute/attributeguard_test.sh new file mode 100644 index 00000000000..6a9557e7da7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard_test.sh @@ -0,0 +1,7 @@ +#!/bin/bahs +$VALGRIND ./searchlib_attributeguard_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributemanager/.gitignore b/searchlib/src/tests/attribute/attributemanager/.gitignore new file mode 100644 index 00000000000..6fa89f09572 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +attributemanager_test +searchlib_attributemanager_test_app diff --git a/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt new file mode 100644 index 00000000000..ed3eeee1065 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributemanager_test_app + SOURCES + attributemanager_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributemanager_test_app COMMAND searchlib_attributemanager_test_app) diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp new file mode 100644 index 00000000000..bf247668843 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -0,0 +1,422 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("attribute_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace config; +using namespace vespa::config::search; +using namespace search; +using namespace search::attribute; +using vespalib::tensor::TensorType; +using std::shared_ptr; + +typedef BasicType BT; +typedef CollectionType CT; +typedef AttributeVector::SP AVSP; + +namespace search { + +class AttributeManagerTest : public vespalib::TestApp +{ +private: + void verifyLoad(AttributeVector & v); + void testLoad(); + void testGuards(); + void testConfigConvert(); + void testContext(); + + bool + assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in); + + bool + assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ = false, + bool createIfNe = false); + +public: + AttributeManagerTest() + { + } + int Main(); +}; + + +typedef MultiValueNumericAttribute< IntegerAttributeTemplate, + multivalue::MVMTemplateArg< + multivalue::Value, multivalue::Index32> > +TestAttributeBase; + +class TestAttribute : public TestAttributeBase +{ +public: + TestAttribute(const std::string &name) + : + TestAttributeBase(name) + { + } + + generation_t + getGen() const + { + return getCurrentGeneration(); + } + + uint32_t + getRefCount(generation_t gen) const + { + return getGenerationRefCount(gen); + } + + void + incGen() + { + incGeneration(); + } + + void + updateFirstUsedGen(void) + { + updateFirstUsedGeneration(); + } + + generation_t + getFirstUsedGen() const + { + return getFirstUsedGeneration(); + } +}; + + +void +AttributeManagerTest::testGuards() +{ + AttributeVector::SP vec(new TestAttribute("mvint") ); + TestAttribute * v = static_cast (vec.get()); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(2)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + + v->incGen(); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + v->incGen(); + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(2)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + v->updateFirstUsedGeneration(); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2)); + EXPECT_EQUAL(v->getGen(), unsigned(2)); +} + + +void +AttributeManagerTest::verifyLoad(AttributeVector & v) +{ + EXPECT_TRUE( !v.isLoaded() ); + EXPECT_TRUE( v.load() ); + EXPECT_TRUE( v.isLoaded() ); + EXPECT_EQUAL( v.getNumDocs(), size_t(100) ); +} + + +void +AttributeManagerTest::testLoad() +{ + { + TestAttributeBase v("mvint"); + EXPECT_TRUE(!v.isLoaded()); + for(size_t i(0); i < 100; i++) { + AttributeVector::DocId doc; + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i); + } + EXPECT_TRUE( v.getNumDocs() == 100); + for(size_t i(0); i < 100; i++) { + for(size_t j(0); j < i; j++) { + EXPECT_TRUE( v.append(i, j, 1) ); + } + v.commit(); + EXPECT_TRUE(size_t(v.getValueCount(i)) == i); + EXPECT_EQUAL(v.getMaxValueCount(), std::max(size_t(1), i)); + } + EXPECT_TRUE(v.isLoaded()); + EXPECT_TRUE(v.save()); + EXPECT_TRUE(v.isLoaded()); + } + { + TestAttributeBase v("mvint"); + verifyLoad(v); + } + { + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + TestAttributeBase v("mvint", config); + verifyLoad(v); + } + { + AttributeManager manager; + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + EXPECT_TRUE(manager.addVector("mvint", config)); + AttributeManager::AttributeList list; + manager.getAttributeList(list); + EXPECT_TRUE(list.size() == 1); + EXPECT_TRUE( list[0]->isLoaded()); + AttributeGuard::UP attrG(manager.getAttribute("mvint")); + EXPECT_TRUE( attrG->valid() ); + } +} + + +bool +AttributeManagerTest::assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in) +{ + AttributesConfig::Attribute a; + a.datatype = in; + return EXPECT_EQUAL(exp, ConfigConverter::convert(a).basicType().type()); +} + + +bool +AttributeManagerTest:: +assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ, + bool createIfNe) +{ + AttributesConfig::Attribute a; + a.collectiontype = in; + a.removeifzero = removeIfZ; + a.createifnonexistent = createIfNe; + AttributeVector::Config out = ConfigConverter::convert(a); + return EXPECT_EQUAL(exp.type(), out.collectionType().type()) && + EXPECT_EQUAL(exp.removeIfZero(), out.collectionType().removeIfZero()) && + EXPECT_EQUAL(exp.createIfNonExistant(), + out.collectionType().createIfNonExistant()); +} + + +void +AttributeManagerTest::testConfigConvert() +{ + // typedef AttributeVector::Config AVC; + typedef BT AVBT; + typedef CollectionType AVCT; + typedef AttributesConfig::Attribute CACA; + typedef ConfigConverter CC; + + EXPECT_TRUE(assertDataType(AVBT::STRING, CACA::STRING)); + EXPECT_TRUE(assertDataType(AVBT::INT8, CACA::INT8)); + EXPECT_TRUE(assertDataType(AVBT::INT16, CACA::INT16)); + EXPECT_TRUE(assertDataType(AVBT::INT32, CACA::INT32)); + EXPECT_TRUE(assertDataType(AVBT::INT64, CACA::INT64)); + EXPECT_TRUE(assertDataType(AVBT::FLOAT, CACA::FLOAT)); + EXPECT_TRUE(assertDataType(AVBT::DOUBLE, CACA::DOUBLE)); + EXPECT_TRUE(assertDataType(AVBT::PREDICATE, CACA::PREDICATE)); + EXPECT_TRUE(assertDataType(AVBT::TENSOR, CACA::TENSOR)); + EXPECT_TRUE(assertDataType(AVBT::NONE, CACA::NONE)); + + EXPECT_TRUE(assertCollectionType(AVCT::SINGLE, CACA::SINGLE)); + EXPECT_TRUE(assertCollectionType(AVCT::ARRAY, CACA::ARRAY)); + EXPECT_TRUE(assertCollectionType(AVCT::WSET, CACA::WEIGHTEDSET)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, true, false), + CACA::SINGLE, true, false)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, false, true), + CACA::SINGLE, false, true)); + + { // fastsearch + CACA a; + EXPECT_TRUE(!CC::convert(a).fastSearch()); + a.fastsearch = true; + EXPECT_TRUE(CC::convert(a).fastSearch()); + } + { // huge + CACA a; + EXPECT_TRUE(!CC::convert(a).huge()); + a.huge = true; + EXPECT_TRUE(CC::convert(a).huge()); + } + { // fastAccess + CACA a; + EXPECT_TRUE(!CC::convert(a).fastAccess()); + a.fastaccess = true; + EXPECT_TRUE(CC::convert(a).fastAccess()); + } + { // tensor + CACA a; + a.datatype = CACA::TENSOR; + a.tensortype = "tensor(x[5])"; + AttributeVector::Config out = ConfigConverter::convert(a); + EXPECT_EQUAL("tensor(x[5])", out.tensorType().toSpec()); + } +} + +bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) { + return a->getName() < b->getName(); +} + +void +AttributeManagerTest::testContext() +{ + std::vector attrs; + // create various attributes vectors + attrs.push_back(AttributeFactory::createAttribute("sint32", + Config(BT::INT32, CT::SINGLE))); + attrs.push_back(AttributeFactory::createAttribute("aint32", + Config(BT::INT32, CT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("wsint32", + Config(BT::INT32, CT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("dontcare", + Config(BT::INT32, CT::SINGLE))); + + // add docs + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->addDocs(64); + } + + // commit all attributes (current generation -> 1); + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->commit(); + } + + AttributeManager manager; + // add to manager + for (uint32_t i = 0; i < attrs.size(); ++i) { + manager.add(attrs[i]); + } + + { + IAttributeContext::UP first = manager.createContext(); + + // no generation guards taken yet + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + for (uint32_t i = 0; i < 2; ++i) { + EXPECT_TRUE(first->getAttribute("sint32") != NULL); + EXPECT_TRUE(first->getAttribute("aint32") != NULL); + EXPECT_TRUE(first->getAttribute("wsint32") != NULL); + EXPECT_TRUE(first->getAttributeStableEnum("wsint32") != NULL); + } + EXPECT_TRUE(first->getAttribute("foo") == NULL); + EXPECT_TRUE(first->getAttribute("bar") == NULL); + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + + { + IAttributeContext::UP second = manager.createContext(); + + EXPECT_TRUE(second->getAttribute("sint32") != NULL); + EXPECT_TRUE(second->getAttribute("aint32") != NULL); + EXPECT_TRUE(second->getAttribute("wsint32") != NULL); + EXPECT_TRUE(second->getAttributeStableEnum("wsint32") != NULL); + + // two generation guards taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 4u : 2u) : 0u); + } + } + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + } + + // no generation guards taken + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + { + IAttributeContext::UP ctx = manager.createContext(); + std::vector all; + ctx->getAttributeList(all); + EXPECT_EQUAL(4u, all.size()); + std::sort(all.begin(), all.end(), gt_attribute); + EXPECT_EQUAL("aint32", all[0]->getName()); + EXPECT_EQUAL("dontcare", all[1]->getName()); + EXPECT_EQUAL("sint32", all[2]->getName()); + EXPECT_EQUAL("wsint32", all[3]->getName()); + } +} + +int AttributeManagerTest::Main() +{ + TEST_INIT("attributemanager_test"); + + testLoad(); + testGuards(); + testConfigConvert(); + testContext(); + + TEST_DONE(); +} + +} // namespace search + + +TEST_APPHOOK(search::AttributeManagerTest); diff --git a/searchlib/src/tests/attribute/attributesearcher.h b/searchlib/src/tests/attribute/attributesearcher.h new file mode 100644 index 00000000000..7456d22f306 --- /dev/null +++ b/searchlib/src/tests/attribute/attributesearcher.h @@ -0,0 +1,265 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "runnable.h" +#include +#include +#include +#include +#include +#include + +namespace search { + +std::unique_ptr +performSearch(queryeval::SearchIterator & sb, uint32_t numDocs) +{ + queryeval::HitCollector hc(numDocs, numDocs, 0); + // assume strict toplevel search object located at start + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +class AttributeSearcherStatus +{ +public: + double _totalSearchTime; + uint64_t _totalHitCount; + uint64_t _numQueries; + uint64_t _numClients; + + AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {} + void merge(const AttributeSearcherStatus & status) { + _totalSearchTime += status._totalSearchTime; + _totalHitCount += status._totalHitCount; + _numQueries += status._numQueries; + _numClients += status._numClients; + } + void printXML() const { + std::cout << "" << _totalSearchTime << "" << std::endl; // ms + std::cout << "" << avgSearchTime() << "" << std::endl; // ms + std::cout << "" << searchThroughout() << "" << std::endl; // per/sec + std::cout << "" << _totalHitCount << "" << std::endl; + std::cout << "" << avgHitCount() << "" << std::endl; + } + double avgSearchTime() const { + return _totalSearchTime / _numQueries; + } + double searchThroughout() const { + return _numClients * 1000 * _numQueries / _totalSearchTime; + } + double avgHitCount() const { + return _totalHitCount / static_cast(_numQueries); + } +}; + + +class AttributeSearcher : public Runnable +{ +protected: + typedef AttributeVector::SP AttributePtr; + + const AttributePtr & _attrPtr; + FastOS_Time _timer; + AttributeSearcherStatus _status; + +public: + AttributeSearcher(uint32_t id, const AttributePtr & attrPtr) : + Runnable(id), _attrPtr(attrPtr), _timer(), _status() + { + _status._numClients = 1; + } + virtual void doRun() = 0; + AttributeSearcherStatus & getStatus() { return _status; } + void buildTermQuery(std::vector & buffer, const vespalib::string & index, const char * term, bool prefix = false); +}; + +void +AttributeSearcher::buildTermQuery(std::vector & buffer, const vespalib::string & index, const char * term, bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = strlen(term); + uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx) + + vespalib::compress::Integer::compressedPositiveLength(indexLen) + + vespalib::compress::Integer::compressedPositiveLength(termLen) + + indexLen + termLen; + buffer.resize(queryPacketSize); + char * p = &buffer[0]; + p += vespalib::compress::Integer::compressPositive(termIdx, p); + p += vespalib::compress::Integer::compressPositive(indexLen, p); + memcpy(p, index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, p); + memcpy(p, term, termLen); + p += termLen; + assert(p == (&buffer[0] + buffer.size())); +} + + +template +class AttributeFindSearcher : public AttributeSearcher +{ +private: + const std::vector & _values; + std::vector _query; + +public: + AttributeFindSearcher(uint32_t id, const AttributePtr & attrPtr, const std::vector & values, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +template +void +AttributeFindSearcher::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple term query + vespalib::asciistream ss; + ss << _values[i % _values.size()].getValue(); + this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr iterator = searchContext->createIterator(NULL, true); + std::unique_ptr results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class RangeSpec +{ +public: + int64_t _min; + int64_t _max; + int64_t _range; + RangeSpec(int64_t min, int64_t max, int64_t range) : + _min(min), _max(max), _range(range) + { + assert(_min < _max); + assert(_range <= (_max - _min)); + } +}; + +class RangeIterator +{ +private: + RangeSpec _spec; + int64_t _a; + int64_t _b; + +public: + RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {} + RangeIterator & operator++() { + _a += _spec._range; + _b += _spec._range; + if (_b > _spec._max) { + _a = _spec._min; + _b = _spec._min + _spec._range; + } + return *this; + } + int64_t a() const { return _a; } + int64_t b() const { return _b; } +}; + +class AttributeRangeSearcher : public AttributeSearcher +{ +private: + RangeSpec _spec; + std::vector _query; + +public: + AttributeRangeSearcher(uint32_t id, const AttributePtr & attrPtr, const RangeSpec & spec, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _spec(spec), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributeRangeSearcher::doRun() +{ + _timer.SetNow(); + RangeIterator iter(_spec); + for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) { + // build simple range term query + vespalib::asciistream ss; + ss << "[" << iter.a() << ";" << iter.b() << "]"; + buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr iterator = searchContext->createIterator(NULL, true); + std::unique_ptr results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class AttributePrefixSearcher : public AttributeSearcher +{ +private: + const std::vector & _values; + std::vector _query; + +public: + AttributePrefixSearcher(uint32_t id, const AttributePtr & attrPtr, + const std::vector & values, uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributePrefixSearcher::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple prefix term query + buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true); + + AttributeGuard guard(_attrPtr); + std::unique_ptr searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr iterator = searchContext->createIterator(NULL, true); + std::unique_ptr results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + + +} // search + diff --git a/searchlib/src/tests/attribute/attributeupdater.h b/searchlib/src/tests/attribute/attributeupdater.h new file mode 100644 index 00000000000..5193ca0f873 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeupdater.h @@ -0,0 +1,299 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "runnable.h" +#include + +#define VALIDATOR_STR(str) #str +#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc)) +#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b) + +namespace search { + +class AttributeValidator +{ +private: + uint32_t _totalCnt; + +public: + AttributeValidator() : _totalCnt(0) {} + uint32_t getTotalCnt() const { return _totalCnt; } + bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) { + _totalCnt++; + if (!rc) { + std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" (" + << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } + template + bool reportAssertEqual(const vespalib::string & file, uint32_t line, + const vespalib::string & aStr, const vespalib::string & bStr, + const A & a, const B & b) { + _totalCnt++; + if (!(a == b)) { + std::cout << "Assert equal failed: " << std::endl; + std::cout << aStr << ": " << a << std::endl; + std::cout << bStr << ": " << b << std::endl; + std::cout << "(" << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } +}; + +class AttributeUpdaterStatus +{ +public: + double _totalUpdateTime; + uint64_t _numDocumentUpdates; + uint64_t _numValueUpdates; + + AttributeUpdaterStatus() : + _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {} + void reset() { + _totalUpdateTime = 0; + _numDocumentUpdates = 0; + _numValueUpdates = 0; + } + void printXML() const { + std::cout << "" << _totalUpdateTime << "" << std::endl; + std::cout << "" << _numDocumentUpdates << "" << std::endl; + std::cout << "" << documentUpdateThroughput() << "" << std::endl; + std::cout << "" << avgDocumentUpdateTime() << "" << std::endl; + std::cout << "" << _numValueUpdates << "" << std::endl; + std::cout << "" << valueUpdateThroughput() << "" << std::endl; + std::cout << "" << avgValueUpdateTime() << "" << std::endl; + } + double documentUpdateThroughput() const { + return _numDocumentUpdates * 1000 / _totalUpdateTime; + } + double avgDocumentUpdateTime() const { + return _totalUpdateTime / _numDocumentUpdates; + } + double valueUpdateThroughput() const { + return _numValueUpdates * 1000 / _totalUpdateTime; + } + double avgValueUpdateTime() const { + return _totalUpdateTime / _numValueUpdates; + } +}; + +// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType +template +class AttributeUpdater +{ +protected: + typedef AttributeVector::SP AttributePtr; + typedef std::map > AttributeCommit; + + const AttributePtr & _attrPtr; + Vector & _attrVec; + const std::vector & _values; + std::vector _buffer; + std::vector _getBuffer; + RandomGenerator & _rndGen; + AttributeCommit _expected; + FastOS_Time _timer; + AttributeUpdaterStatus _status; + AttributeValidator _validator; + + // config + bool _validate; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + + uint32_t getRandomCount() { + return _rndGen.rand(_minValueCount, _maxValueCount); + } + uint32_t getRandomDoc() { + return _rndGen.rand(0, _attrPtr->getNumDocs() - 1); + } + const T & getRandomValue() { + return _values[_rndGen.rand(0, _values.size() - 1)]; + } + void updateValues(uint32_t doc); + void commit(); + +public: + AttributeUpdater(const AttributePtr & attrPtr, const std::vector & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + _attrPtr(attrPtr), _attrVec(*(static_cast(attrPtr.get()))), + _values(values), _buffer(), _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(), + _validate(validate), _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount) + { + } + void resetStatus() { + _status.reset(); + } + const AttributeUpdaterStatus & getStatus() const { + return _status; + } + const AttributeValidator & getValidator() const { + return _validator; + } + void populate(); + void update(uint32_t numUpdates); +}; + +template +class AttributeUpdaterThread : public AttributeUpdater, public Runnable +{ +private: + typedef AttributeVector::SP AttributePtr; + +public: + AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + AttributeUpdater(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount), + Runnable(0) {} + + virtual void doRun(); +}; + + +template +void +AttributeUpdater::updateValues(uint32_t doc) +{ + uint32_t valueCount = getRandomCount(); + + if (_validate) { + _buffer.clear(); + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + if (_attrPtr->hasWeightedSetType()) { + bool exists = false; + for (typename std::vector::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) { + if (iter->getValue() == value.getValue()) { + exists = true; + iter->setWeight(value.getWeight()); + break; + } + } + if (!exists) { + _buffer.push_back(value); + } + } else { + _buffer.push_back(value); + } + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _buffer.push_back(getRandomValue()); + _attrVec.update(doc, _buffer.back().getValue()); + } + _expected[doc] = _buffer; + + } else { + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _attrVec.update(doc, getRandomValue().getValue()); + } + } + + _status._numDocumentUpdates++; + _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1); +} + +template +void +AttributeUpdater::commit() +{ + AttributeGuard guard(this->_attrPtr); + if (_validate) { + _attrPtr->commit(); + _getBuffer.resize(_maxValueCount); + for (typename AttributeCommit::iterator iter = _expected.begin(); + iter != _expected.end(); ++iter) + { + uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size()); + _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount); + if (valueCount != iter->second.size()) { + std::cout << "validate(" << iter->first << ")" << std::endl; + std::cout << "expected(" << iter->second.size() << ")" << std::endl; + for (size_t i = 0; i < iter->second.size(); ++i) { + std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl; + } + std::cout << "actual(" << valueCount << ")" << std::endl; + for (size_t i = 0; i < valueCount; ++i) { + std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl; + } + } + _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue()); + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight()); + } + } + _expected.clear(); + } else { + _attrPtr->commit(); + } +} + +template +void +AttributeUpdater::populate() +{ + _timer.SetNow(); + for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) { + updateValues(doc); + if (doc % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template +void +AttributeUpdater::update(uint32_t numUpdates) +{ + _timer.SetNow(); + for (uint32_t i = 0; i < numUpdates; ++i) { + uint32_t doc = getRandomDoc(); + updateValues(doc); + if (i % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template +void +AttributeUpdaterThread::doRun() +{ + this->_timer.SetNow(); + while(!_done) { + uint32_t doc = this->getRandomDoc(); + this->updateValues(doc); + if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) { + this->commit(); + } + } + this->commit(); + this->_status._totalUpdateTime += this->_timer.MilliSecsToNow(); +} + + +} // search + diff --git a/searchlib/src/tests/attribute/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmarkplotter.rb new file mode 100644 index 00000000000..d77c92c8acd --- /dev/null +++ b/searchlib/src/tests/attribute/benchmarkplotter.rb @@ -0,0 +1,134 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require 'rexml/document' + +def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles) + plot_cmd = ""; + plot_cmd += "set terminal png\n" + plot_cmd += "set output \"#{plot_png}\"\n" + plot_cmd += "set title \"#{title}\"\n" + plot_cmd += "set xlabel \"#{xlabel}\"\n" + plot_cmd += "set ylabel \"#{ylabel}\"\n" + c = 2 + plots = [] + plot_cmd += "plot " + graph_titles.each do |title| + plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints") + c += 1 + end + plot_cmd += plots.join(", ") + + plot_cmd_file = File.open("plot_graph.cmd", "w") + plot_cmd_file.write(plot_cmd); + plot_cmd_file.close + cmd = "gnuplot plot_graph.cmd" + puts cmd + puts `#{cmd}` +end + +def extract_alpha(num_docs, percentages, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + percentages.each do |prc| + unique = num * prc + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + unique = num * percentage + num_threads.each do |thread| + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def xml_getter_update_0_throughput(xml_root) + return xml_root.elements["update[@id='0']"].elements["throughput"].text +end + +def xml_getter_search_throughput(xml_root) + return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text +end + +def xml_getter_updater_thread_throughput(xml_root) + return throughput = xml_root.elements["updater-summary"].elements["throughput"].text +end + + +vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50] +num_threads = [1, 2, 4, 8, 16] + +inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log", + "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"] +graph_titles = [[], []] +unique_percentages.each do |percentage| + graph_titles[0].push("#{percentage * 100} % uniques") +end +num_threads.each do |thread| + graph_titles[1].push("#{thread} searcher thread(s)") +end + +vectors.each do |vector| + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-update-speed.dat", + :xml_getter_update_0_throughput) + plot_graph("#{vector}-update-speed.dat", + "#{vector}-update-speed.png", + "Update speed when applying 1M updates", + "Number of documents", "Updates per/sec", graph_titles[0]) + + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-search-speed.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed.dat", + "#{vector}-search-speed.png", + "Search speed with 1 searcher thread", + "Number of documents", "Queries per/sec", graph_titles[0]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-search-speed-multiple.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed-multiple.dat", + "#{vector}-search-speed-multiple.png", + "Search speed with 1 update thread and X searcher threads", + "Number of documents", "Queries per/sec", graph_titles[1]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-update-speed-multiple.dat", + :xml_getter_updater_thread_throughput) + plot_graph("#{vector}-update-speed-multiple.dat", + "#{vector}-update-speed-multiple.png", + "Update speed with 1 update thread and X searcher threads", + "Number of documents", "Updates per/sec", graph_titles[1]) +end diff --git a/searchlib/src/tests/attribute/bitvector/.gitignore b/searchlib/src/tests/attribute/bitvector/.gitignore new file mode 100644 index 00000000000..05ec0a4df59 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/.gitignore @@ -0,0 +1 @@ +searchlib_bitvector_test_app diff --git a/searchlib/src/tests/attribute/bitvector/CMakeLists.txt b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..bc65fc04dc4 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_bitvector_test_app COMMAND searchlib_bitvector_test_app) diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..85f83d217eb --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -0,0 +1,632 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("bitvector_test"); +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::AttributeVector; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::BitVector; +using search::BitVectorIterator; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr SearchContextPtr; +typedef std::unique_ptr SearchBasePtr; + +struct BitVectorTest +{ + typedef AttributeVector::SP AttributePtr; + + BitVectorTest() { } + + ~BitVectorTest() { } + + template + VectorType & as(AttributePtr &v); + IntegerAttribute & asInt(AttributePtr &v); + StringAttribute & asString(AttributePtr &v); + FloatingPointAttribute & asFloat(AttributePtr &v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + void + addDocs(const AttributePtr &v, size_t sz); + + template + void populate(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + template + void populateAll(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + void + buildTermQuery(std::vector & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template + vespalib::string + getSearchStr(); + + template + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); + + template + SearchContextPtr + getSearch(const V & vec, bool useBitVector); + + void + checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expFastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + void + checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + template + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + template + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); +}; + + +template +VectorType & +BitVectorTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +BitVectorTest::asInt(AttributePtr &v) +{ + return as(v); +} + + +StringAttribute & +BitVectorTest::asString(AttributePtr &v) +{ + return as(v); +} + + +FloatingPointAttribute & +BitVectorTest::asFloat(AttributePtr &v) +{ + return as(v); +} + + +void +BitVectorTest::buildTermQuery(std::vector &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <> +vespalib::string +BitVectorTest::getSearchStr() +{ + return "[-42;-42]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr() +{ + return "[-42.0;-42.0]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr() +{ + return "foo"; +} + + +template +SearchContextPtr +BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, + bool useBitVector) +{ + std::vector query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params().useBitVector(useBitVector)); +} + + +template <> +SearchContextPtr +BitVectorTest::getSearch(const IntegerAttribute &v, + bool useBitVector) +{ + return getSearch(v, "[-42;-42]", false, useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest:: +getSearch(const FloatingPointAttribute &v, + bool useBitVector) +{ + return getSearch(v, "[-42.0;-42.0]", false, + useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest::getSearch(const StringAttribute &v, + bool useBitVector) +{ + return getSearch + (v, "foo", false, useBitVector); +} + + +BitVectorTest::AttributePtr +BitVectorTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + cfg.setFastSearch(fastSearch); + cfg.setEnableBitVectors(enableBitVectors); + cfg.setEnableOnlyBitVector(enableOnlyBitVector); + cfg.setIsFilter(filter); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +BitVectorTest::addDocs(const AttributePtr &v, size_t sz) +{ + while (v->getNumDocs() < sz) { + AttributeVector::DocId docId = 0; + EXPECT_TRUE(v->addDoc(docId)); + v->clearDoc(docId); + } + EXPECT_TRUE(v->getNumDocs() == sz); + v->commit(true); +} + + +template <> +void +BitVectorTest::populate(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + +template <> +void +BitVectorTest::populateAll(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + sb->initFullRange(); + sb->seek(1u); + uint32_t docId = sb->getDocId(); + uint32_t lastDocId = 0; + uint32_t docFreq = 0; + EXPECT_EQUAL(expFirstDocId, docId); + while (docId != search::endDocId) { + lastDocId = docId; + ++docFreq, + assert(!checkStride || (docId % 5) == 2u); + sb->unpack(docId); + EXPECT_EQUAL(md.getDocId(), docId); + if (v->getCollectionType() == CollectionType::SINGLE || + !weights) { + EXPECT_EQUAL(1, md.getWeight()); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(2, md.getWeight()); + } else { + if (v->getBasicType() == BasicType::STRING) { + EXPECT_EQUAL(24, md.getWeight()); + } else { + EXPECT_EQUAL(-3, md.getWeight()); + } + } + sb->seek(docId + 1); + docId = sb->getDocId(); + } + EXPECT_EQUAL(expLastDocId, lastDocId); + EXPECT_EQUAL(expDocFreq, docFreq); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + TermFieldMatchData md; + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + checkSearch(v, std::move(sb), md, + expFirstDocId, expLastDocId, expDocFreq, weights, + checkStride); +} + + +template +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + Config cfg(bt, ct); + AttributePtr v = make(cfg, pref, fastSearch, + enableBitVectors, enableOnlyBitVector, filter); + addDocs(v, 1024); + VectorType &tv = as(v); + populate(tv, 2, 1023, true); + + SearchContextPtr sc = getSearch(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + sc = getSearch(tv, false); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + const search::IDocumentWeightAttribute *dwa = + v->asDocumentWeightAttribute(); + if (dwa != NULL) { + search::IDocumentWeightAttribute::LookupResult lres = + dwa->lookup(getSearchStr()); + typedef search::queryeval::DocumentWeightSearchIterator DWSI; + typedef search::queryeval::SearchIterator SI; + TermFieldMatchData md; + SI::UP dwsi(new DWSI(md, *dwa, lres)); + if (!enableOnlyBitVector) { + checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true); + } else { + dwsi->initFullRange(); + EXPECT_TRUE(dwsi->isAtEnd()); + } + } + populate(tv, 2, 973, false); + sc = getSearch(tv, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector && + !filter, true); + populate(tv, 2, 973, true); + sc = getSearch(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + addDocs(v, 15000); + sc = getSearch(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + populateAll(tv, 10, 15000, true); + sc = getSearch(tv, true); + checkSearch(v, std::move(sc), 2, 14999, 14992, + !enableBitVectors && !filter, + false); +} + + +template +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref) +{ + LOG(info, + "test run, pref is %s", + pref.c_str()); + test(bt, ct, pref, + false, false, false, false); + test(bt, ct, pref, + false, false, false, true); + test(bt, ct, pref, + true, false, false, false); + test(bt, ct, pref, + true, false, false, true); + test(bt, ct, pref, + true, true, false, false); + test(bt, ct, pref, + true, true, false, true); + test(bt, ct, pref, + true, true, true, false); + test(bt, ct, pref, + true, true, true, true); +} + + +TEST_F("Test bitvectors with single value int32", BitVectorTest) +{ + f.template test(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test bitvectors with array value int32", BitVectorTest) +{ + f.template test(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test bitvectors with weighted set value int32", BitVectorTest) +{ + f.template test(BasicType::INT32, + CollectionType::WSET, + "int32_sv"); +} + +TEST_F("Test bitvectors with single value double", BitVectorTest) +{ + f.template test(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test bitvectors with array value double", BitVectorTest) +{ + f.template test(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test bitvectors with weighted set value double", BitVectorTest) +{ + f.template test(BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + +TEST_F("Test bitvectors with single value string", BitVectorTest) +{ + f.template test(BasicType::STRING, + CollectionType::SINGLE, + "string_sv"); +} + +TEST_F("Test bitvectors with array value string", BitVectorTest) +{ + f.template test(BasicType::STRING, + CollectionType::ARRAY, + "string_a"); +} + +TEST_F("Test bitvectors with weighted set value string", BitVectorTest) +{ + f.template test(BasicType::STRING, + CollectionType::WSET, + "string_ws"); +} + +TEST("Test bitvector iterators adheres to initRange") { + search::test::InitRangeVerifier initRangeTest; + BitVector::UP bv = BitVector::create(initRangeTest.getDocIdLimit()); + for (uint32_t docId: initRangeTest.getExpectedDocIds()) { + bv->setBit(docId); + } + TermFieldMatchData tfmd; + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, false)); + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, true)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.cpp b/searchlib/src/tests/attribute/changevector_test.cpp new file mode 100644 index 00000000000..9f0a796fd3e --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("changevector_test"); +#include + +#include + +using namespace search; + +template +void verifyStrictOrdering(const T & v) { + long count(0); + for (const auto & c : v) { + count++; + EXPECT_EQUAL(count, c._data.get()); + } + EXPECT_EQUAL(v.size(), size_t(count)); +} + +class Accessor { +public: + Accessor(const std::vector & v) : _size(v.size()), _current(v.begin()), _end(v.end()) { } + size_t size() const { return _size; } + void next() { _current++; } + long value() const { return *_current; } + int weight() const { return *_current; } +private: + size_t _size; + std::vector::const_iterator _current; + std::vector::const_iterator _end; +}; + +TEST("require insert ordering is preserved for same doc") +{ + typedef ChangeTemplate> Change; + typedef ChangeVectorT CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 7, 2)); + EXPECT_EQUAL(2u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved ") +{ + typedef ChangeTemplate> Change; + typedef ChangeVectorT CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 6, 3)); + EXPECT_EQUAL(3u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved with mix") +{ + typedef ChangeTemplate> Change; + typedef ChangeVectorT CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 5, 3)); + EXPECT_EQUAL(3u, a.size()); + a.push_back(Change(Change::NOOP, 6, 10)); + EXPECT_EQUAL(4u, a.size()); + std::vector v({4,5,6,7,8}); + Accessor ac(v); + a.push_back(5, ac); + EXPECT_EQUAL(9u, a.size()); + a.push_back(Change(Change::NOOP, 5, 9)); + EXPECT_EQUAL(10u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require that inserting empty vector does not affect the vector.") { + typedef ChangeTemplate> Change; + typedef ChangeVectorT CV; + CV a; + std::vector v; + Accessor ac(v); + a.push_back(1, ac); + EXPECT_EQUAL(0u, a.size()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.sh b/searchlib/src/tests/attribute/changevector_test.sh new file mode 100644 index 00000000000..cb70f5465a4 --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_changevector_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/comparator/.gitignore b/searchlib/src/tests/attribute/comparator/.gitignore new file mode 100644 index 00000000000..51c5b5944c9 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +comparator_test +searchlib_comparator_test_app diff --git a/searchlib/src/tests/attribute/comparator/CMakeLists.txt b/searchlib/src/tests/attribute/comparator/CMakeLists.txt new file mode 100644 index 00000000000..4a14181db3c --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_comparator_test_app + SOURCES + comparator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_comparator_test_app COMMAND searchlib_comparator_test_app) diff --git a/searchlib/src/tests/attribute/comparator/DESC b/searchlib/src/tests/attribute/comparator/DESC new file mode 100644 index 00000000000..6b3ba01c89b --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/DESC @@ -0,0 +1 @@ +comparator test. Take a look at comparator_test.cpp for details. diff --git a/searchlib/src/tests/attribute/comparator/FILES b/searchlib/src/tests/attribute/comparator/FILES new file mode 100644 index 00000000000..b4c23c09022 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/FILES @@ -0,0 +1 @@ +comparator_test.cpp diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp new file mode 100644 index 00000000000..2a4c3c6fb87 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("comparator_test"); +#include +#include +#include + +#include +#include +#include +#include + +namespace search { + +using namespace btree; + +typedef EnumStoreT > NumericEnumStore; +typedef EnumStoreComparatorT > NumericComparator; + +typedef EnumStoreT > FloatEnumStore; +typedef EnumStoreComparatorT > FloatComparator; + +typedef EnumStoreT StringEnumStore; +typedef EnumStoreComparatorT StringComparator; +typedef EnumStoreFoldedComparatorT FoldedStringComparator; + +typedef EnumStoreBase::Index EnumIndex; + +typedef BTreeRoot TreeType; +typedef TreeType::NodeAllocatorType NodeAllocator; + +class Test : public vespalib::TestApp { +private: + void requireThatNumericComparatorIsWorking(); + void requireThatFloatComparatorIsWorking(); + void requireThatStringComparatorIsWorking(); + void requireThatComparatorWithTreeIsWorking(); + void requireThatFoldedComparatorIsWorking(); + +public: + Test() {} + int Main(); +}; + +void +Test::requireThatNumericComparatorIsWorking() +{ + NumericEnumStore es(1024, false); + EnumIndex e1, e2; + es.addEnum(10, e1); + es.addEnum(30, e2); + NumericComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + NumericComparator cmp2(es, 20); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatFloatComparatorIsWorking() +{ + FloatEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum(10.5, e1); + es.addEnum(30.5, e2); + es.addEnum(std::numeric_limits::quiet_NaN(), e3); + FloatComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e3, e1)); // nan + EXPECT_TRUE(!cmp1(e1, e3)); // nan + EXPECT_TRUE(!cmp1(e3, e3)); // nan + FloatComparator cmp2(es, 20.5); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatStringComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + StringComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); // similar folded, fallback to regular + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(strcmp("aa", "aB") > 0); // regular + StringComparator cmp2(es, "AB"); + EXPECT_TRUE(cmp2(EnumIndex(), e3)); + EXPECT_TRUE(!cmp2(e3, EnumIndex())); +} + +void +Test::requireThatComparatorWithTreeIsWorking() +{ + NumericEnumStore es(2048, false); + vespalib::GenerationHandler g; + TreeType t; + NodeAllocator m; + EnumIndex ei; + for (int32_t v = 100; v > 0; --v) { + NumericComparator cmp(es, v); + EXPECT_TRUE(!t.find(EnumIndex(), m, cmp).valid()); + es.addEnum(v, ei); + t.insert(ei, BTreeNoLeafData(), m, cmp); + } + EXPECT_EQUAL(100u, t.size(m)); + int32_t exp = 1; + for (TreeType::Iterator itr = t.begin(m); itr.valid(); ++itr) { + EXPECT_EQUAL(exp++, es.getValue(itr.getKey())); + } + EXPECT_EQUAL(101, exp); + t.clear(m); + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +void +Test::requireThatFoldedComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3, e4; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + es.addEnum("Folded", e4); + FoldedStringComparator cmp1(es); + EXPECT_TRUE(!cmp1(e1, e2)); // similar folded + EXPECT_TRUE(!cmp1(e2, e1)); // similar folded + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(!cmp1(e3, e2)); // folded compare + FoldedStringComparator cmp2(es, "fol", false); + FoldedStringComparator cmp3(es, "fol", true); + EXPECT_TRUE(cmp2(EnumIndex(), e4)); + EXPECT_TRUE(!cmp2(e4, EnumIndex())); + EXPECT_TRUE(!cmp3(EnumIndex(), e4)); // similar when prefix + EXPECT_TRUE(!cmp3(e4, EnumIndex())); // similar when prefix +} + +int +Test::Main() +{ + TEST_INIT("comparator_test"); + + requireThatNumericComparatorIsWorking(); + requireThatFloatComparatorIsWorking(); + requireThatStringComparatorIsWorking(); + requireThatComparatorWithTreeIsWorking(); + requireThatFoldedComparatorIsWorking(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::Test); + diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore new file mode 100644 index 00000000000..08cae9a48df --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore @@ -0,0 +1 @@ +searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt new file mode 100644 index 00000000000..2a1b36a626d --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_weight_iterator_test_app + SOURCES + document_weight_iterator_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/FILES b/searchlib/src/tests/attribute/document_weight_iterator/FILES new file mode 100644 index 00000000000..9bb94dc8770 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/FILES @@ -0,0 +1 @@ +document_weight_iterator_test.cpp diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp new file mode 100644 index 00000000000..fbe62f80843 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::attribute; + +AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { + Config cfg(type, collection); + cfg.setFastSearch(fast_search); + return AttributeFactory::createAttribute("my_attribute", cfg); +} + +void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + attr_ptr->addDoc(docid); + } + attr_ptr->commit(); + ASSERT_EQUAL((limit - 1), docid); +} + +template +void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { + attr->clearDoc(docid); + attr->append(docid, key, weight); + attr->commit(); +} + +void populate_long(AttributeVector::SP attr_ptr) { + IntegerAttribute *attr = static_cast(attr_ptr.get()); + set_doc(attr, 1, int64_t(111), 20); + set_doc(attr, 5, int64_t(111), 5); + set_doc(attr, 7, int64_t(111), 10); +} + +void populate_string(AttributeVector::SP attr_ptr) { + StringAttribute *attr = static_cast(attr_ptr.get()); + set_doc(attr, 1, "foo", 20); + set_doc(attr, 5, "foo", 5); + set_doc(attr, 7, "foo", 10); +} + +struct LongFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_long(attr); + } +}; + +struct StringFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_string(attr); + } +}; + +TEST("require that appropriate attributes support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); +} + +TEST("require that inappropriate attributes do not support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); +} + +void verify_valid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_TRUE(result.posting_idx.valid()); + EXPECT_EQUAL(3u, result.posting_size); + EXPECT_EQUAL(5, result.min_weight); + EXPECT_EQUAL(20, result.max_weight); +} + +void verify_invalid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_FALSE(result.posting_idx.valid()); + EXPECT_EQUAL(0u, result.posting_size); + EXPECT_EQUAL(0, result.min_weight); + EXPECT_EQUAL(0, result.max_weight); +} + +TEST_F("require that integer lookup works correctly", LongFixture) { + verify_valid_lookup(f1.api->lookup("111")); + verify_invalid_lookup(f1.api->lookup("222")); +} + +TEST_F("require string lookup works correctly", StringFixture) { + verify_valid_lookup(f1.api->lookup("foo")); + verify_invalid_lookup(f1.api->lookup("bar")); +} + +void verify_posting(const IDocumentWeightAttribute &api, const char *term) { + auto result = api.lookup(term); + ASSERT_TRUE(result.posting_idx.valid()); + std::vector itr_store; + api.create(result.posting_idx, itr_store); + ASSERT_EQUAL(1u, itr_store.size()); + { + DocumentWeightIterator &itr = itr_store[0]; + if (itr.valid() && itr.getKey() < 1) { + itr.linearSeek(1); + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1u, itr.getKey()); // docid + EXPECT_EQUAL(20, itr.getData()); // weight + itr.linearSeek(2); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(5u, itr.getKey()); // docid + EXPECT_EQUAL(5, itr.getData()); // weight + itr.linearSeek(6); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(7u, itr.getKey()); // docid + EXPECT_EQUAL(10, itr.getData()); // weight + itr.linearSeek(8); + EXPECT_FALSE(itr.valid()); + } +} + +TEST_F("require that integer iterators are created correctly", LongFixture) { + verify_posting(*f1.api, "111"); +} + +TEST_F("require that string iterators are created correctly", StringFixture) { + verify_posting(*f1.api, "foo"); +} + +TEST("verify init range for document weight search iterator") { + search::test::InitRangeVerifier ir; + AttributeVector::SP attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)); + add_docs(attr, ir.getDocIdLimit()); + auto docids = ir.getExpectedDocIds(); + IntegerAttribute *int_attr = static_cast(attr.get()); + for (auto docid: docids) { + set_doc(int_attr, docid, int64_t(123), 1); + } + const IDocumentWeightAttribute *api(attr->asDocumentWeightAttribute()); + ASSERT_TRUE(api != nullptr); + auto dict_entry = api->lookup("123"); + ASSERT_TRUE(dict_entry.posting_idx.valid()); + fef::TermFieldMatchData tfmd; + queryeval::DocumentWeightSearchIterator itr(tfmd, *api, dict_entry); + ir.verify(itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/enumeratedsave/.gitignore b/searchlib/src/tests/attribute/enumeratedsave/.gitignore new file mode 100644 index 00000000000..a4680f95f72 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/.gitignore @@ -0,0 +1,127 @@ +/double_a0_e.udat +/double_a0_ee.udat +/double_a1_e.udat +/double_a1_ee.udat +/double_a2_e.udat +/double_a2_ee.udat +/double_sv0_e.udat +/double_sv0_ee.udat +/double_sv1_e.udat +/double_sv1_ee.udat +/double_sv2_e.udat +/double_sv2_ee.udat +/double_ws0_e.udat +/double_ws0_ee.udat +/double_ws1_e.udat +/double_ws1_ee.udat +/double_ws2_e.udat +/double_ws2_ee.udat +/float_a0_e.udat +/float_a0_ee.udat +/float_a1_e.udat +/float_a1_ee.udat +/float_a2_e.udat +/float_a2_ee.udat +/float_sv0_e.udat +/float_sv0_ee.udat +/float_sv1_e.udat +/float_sv1_ee.udat +/float_sv2_e.udat +/float_sv2_ee.udat +/float_ws0_e.udat +/float_ws0_ee.udat +/float_ws1_e.udat +/float_ws1_ee.udat +/float_ws2_e.udat +/float_ws2_ee.udat +/int16_a0_e.udat +/int16_a0_ee.udat +/int16_a1_e.udat +/int16_a1_ee.udat +/int16_a2_e.udat +/int16_a2_ee.udat +/int16_sv0_e.udat +/int16_sv0_ee.udat +/int16_sv1_e.udat +/int16_sv1_ee.udat +/int16_sv2_e.udat +/int16_sv2_ee.udat +/int16_ws0_e.udat +/int16_ws0_ee.udat +/int16_ws1_e.udat +/int16_ws1_ee.udat +/int16_ws2_e.udat +/int16_ws2_ee.udat +/int32_a0_e.udat +/int32_a0_ee.udat +/int32_a1_e.udat +/int32_a1_ee.udat +/int32_a2_e.udat +/int32_a2_ee.udat +/int32_sv0_e.udat +/int32_sv0_ee.udat +/int32_sv1_e.udat +/int32_sv1_ee.udat +/int32_sv2_e.udat +/int32_sv2_ee.udat +/int32_ws0_e.udat +/int32_ws0_ee.udat +/int32_ws1_e.udat +/int32_ws1_ee.udat +/int32_ws2_e.udat +/int32_ws2_ee.udat +/int64_a0_e.udat +/int64_a0_ee.udat +/int64_a1_e.udat +/int64_a1_ee.udat +/int64_a2_e.udat +/int64_a2_ee.udat +/int64_sv0_e.udat +/int64_sv0_ee.udat +/int64_sv1_e.udat +/int64_sv1_ee.udat +/int64_sv2_e.udat +/int64_sv2_ee.udat +/int64_ws0_e.udat +/int64_ws0_ee.udat +/int64_ws1_e.udat +/int64_ws1_ee.udat +/int64_ws2_e.udat +/int64_ws2_ee.udat +/int8_a0_e.udat +/int8_a0_ee.udat +/int8_a1_e.udat +/int8_a1_ee.udat +/int8_a2_e.udat +/int8_a2_ee.udat +/int8_sv0_e.udat +/int8_sv0_ee.udat +/int8_sv1_e.udat +/int8_sv1_ee.udat +/int8_sv2_e.udat +/int8_sv2_ee.udat +/int8_ws0_e.udat +/int8_ws0_ee.udat +/int8_ws1_e.udat +/int8_ws1_ee.udat +/int8_ws2_e.udat +/int8_ws2_ee.udat +/str_a0_e.udat +/str_a0_ee.udat +/str_a1_e.udat +/str_a1_ee.udat +/str_a2_e.udat +/str_a2_ee.udat +/str_sv0_e.udat +/str_sv0_ee.udat +/str_sv1_e.udat +/str_sv1_ee.udat +/str_sv2_e.udat +/str_sv2_ee.udat +/str_ws0_e.udat +/str_ws0_ee.udat +/str_ws1_e.udat +/str_ws1_ee.udat +/str_ws2_e.udat +/str_ws2_ee.udat +searchlib_enumeratedsave_test_app diff --git a/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt new file mode 100644 index 00000000000..0dbb59043c1 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumeratedsave_test_app + SOURCES + enumeratedsave_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumeratedsave_test_app COMMAND searchlib_enumeratedsave_test_app) diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp new file mode 100644 index 00000000000..312814eb55a --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -0,0 +1,944 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +LOG_SETUP("enumeratedsave_test"); +#include +#include +#include + + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::RandomGenerator; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::IAttributeFileWriter; +using search::BufferWriter; +using search::AttributeMemoryFileBufferWriter; + +typedef std::unique_ptr SearchContextPtr; +typedef std::unique_ptr SearchBasePtr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + + +class MemAttrFileWriter : public IAttributeFileWriter +{ +private: + Buffer _buf; + +public: + MemAttrFileWriter() + : _buf() + { + } + + virtual Buffer allocBuf(size_t size) override { + return std::make_unique(size, 4096); + } + + virtual void writeBuf(Buffer buf_in) override { + if (!_buf) { + _buf = std::move(buf_in); + } else { + _buf->writeBytes(buf_in->getData(), buf_in->getDataLen()); + } + } + + const Buffer &buf() const { return _buf; } + + std::unique_ptr allocBufferWriter() override; +}; + +std::unique_ptr +MemAttrFileWriter::allocBufferWriter() +{ + if (!_buf) { + _buf = allocBuf(1); + } + return std::make_unique(*this); +} + +class MemAttr : public search::IAttributeSaveTarget +{ +private: + MemAttrFileWriter _datWriter; + MemAttrFileWriter _idxWriter; + MemAttrFileWriter _weightWriter; + MemAttrFileWriter _udatWriter; + +public: + typedef std::shared_ptr SP; + + MemAttr(void) + : _datWriter(), + _idxWriter(), + _weightWriter(), + _udatWriter() + { + } + + // Implements IAttributeSaveTarget + virtual bool setup() { return true; } + virtual void close() {} + virtual IAttributeFileWriter &datWriter() override { return _datWriter; } + virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; } + virtual IAttributeFileWriter &weightWriter() override { + return _weightWriter; + } + virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; } + + bool + bufEqual(const Buffer &lhs, const Buffer &rhs) const; + + bool + operator==(const MemAttr &rhs) const; +}; + +class EnumeratedSaveTest +{ +private: + typedef AttributeVector::SP AttributePtr; + + template + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + FloatingPointAttribute & + asFloat(AttributePtr &v); + + void + addDocs(const AttributePtr &v, size_t sz); + + template + void populate(VectorType &v, unsigned seed, BasicType bt); + + template + void compare(VectorType &a, VectorType &b); + + void + buildTermQuery(std::vector & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template + SearchContextPtr + getSearch(const V & vec); + + MemAttr::SP + saveMem(AttributeVector &v); + + void + checkMem(AttributeVector &v, const MemAttr &e, bool enumerated); + + MemAttr::SP + saveBoth(AttributePtr v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch = false); + + void + load(AttributePtr v, const vespalib::string &name); + + template + void + checkLoad(AttributePtr v, + const vespalib::string &name, + AttributePtr ev); + + template + void + testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch); + +public: + template + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); + + EnumeratedSaveTest() + { + } +}; + + +bool +MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const +{ + if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL))) + return false; + if (lhs.get() == NULL) + return true; + if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) + return false; + if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(), + lhs->getDataLen()) == 0)) + return false; + return true; +} + +bool +MemAttr::operator==(const MemAttr &rhs) const +{ + if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf()))) + return false; + return true; +} + + +void +EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + v->commit(true); + } +} + + +template <> +void +EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, + BasicType bt) +{ + search::Rand48 rnd; + IntegerAttribute::largeint_t mask(std::numeric_limits + ::max()); + switch (bt.type()) { + case BasicType::INT8: + mask = 0x7f; + break; + case BasicType::INT16: + mask = 0x7fff; + break; + default: + ; + } + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -42) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48() & mask) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + search::Rand48 rnd; + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE( v.update(i, -42.0) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48()) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE( v.update(i, "foo") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +namespace +{ + +template +inline bool +equalsHelper(const T &lhs, const T &rhs) +{ + return lhs == rhs; +} + +template <> +inline bool +equalsHelper(const float &lhs, const float &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +template <> +inline bool +equalsHelper(const double &lhs, const double &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +} + +template +void +EnumeratedSaveTest::compare(VectorType &a, VectorType &b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount()); + EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast(a).get(i, av, asz), + static_cast(a.getValueCount(i))); + EXPECT_EQUAL(static_cast(b).get(i, bv, bsz), + static_cast(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); + j < k; j++) { + EXPECT_TRUE(equalsHelper(av[j], bv[j])); + } + } + delete [] bv; + delete [] av; +} + + +template +VectorType & +EnumeratedSaveTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +EnumeratedSaveTest::asInt(AttributePtr &v) +{ + return as(v); +} + + +StringAttribute & +EnumeratedSaveTest::asString(AttributePtr &v) +{ + return as(v); +} + + +FloatingPointAttribute & +EnumeratedSaveTest::asFloat(AttributePtr &v) +{ + return as(v); +} + + +void +EnumeratedSaveTest::buildTermQuery(std::vector &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template +SearchContextPtr +EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch(const IntegerAttribute &v) +{ + return getSearch(v, "[-42;-42]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch(const FloatingPointAttribute &v) +{ + return getSearch(v, "[-42.0;-42.0]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch(const StringAttribute &v) +{ + return getSearch + (v, "foo", false); +} + +MemAttr::SP +EnumeratedSaveTest::saveMem(AttributeVector &v) +{ + MemAttr::SP res(new MemAttr); + EXPECT_TRUE(v.save(*res)); + return res; +} + + +void +EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e, + bool enumerated) +{ + MemAttr m; + v.enableEnumeratedSave(enumerated); + EXPECT_TRUE(v.save(m)); + v.enableEnumeratedSave(false); + ASSERT_TRUE(m == e); +} + + +MemAttr::SP +EnumeratedSaveTest::saveBoth(AttributePtr v) +{ + EXPECT_TRUE(v->save()); + vespalib::string basename = v->getBaseFileName(); + AttributePtr v2 = make(v->getConfig(), basename, true); + EXPECT_TRUE(v2->load()); + v2->enableEnumeratedSave(true); + EXPECT_TRUE(v2->saveAs(basename + "_e")); + if ((v->getConfig().basicType() == BasicType::INT32 && + v->getConfig().collectionType() == CollectionType::WSET) || true) { + search::AttributeMemorySaveTarget ms; + search::TuneFileAttributes tune; + search::index::DummyFileHeaderContext fileHeaderContext; + EXPECT_TRUE(v2->saveAs(basename + "_ee", ms)); + EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext)); + } + return saveMem(*v2); +} + + +EnumeratedSaveTest::AttributePtr +EnumeratedSaveTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + cfg.setFastSearch(fastSearch); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); +} + +template +void +EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name, + AttributePtr ev) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); + compare(as(v), as(ev)); +} + + +template +void +EnumeratedSaveTest::testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + // typedef AttributePtr AVP; + + bool flagAttr = + cfg.collectionType() == CollectionType::ARRAY && + cfg.basicType() == BasicType::INT8 && + fastSearch; + bool supportsEnumerated = (fastSearch || + cfg.basicType() == BasicType::STRING) && + !flagAttr; + + + AttributePtr v = make(cfg, pref, fastSearch); + TEST_DO((checkLoad(v, pref + "0", v0))); + TEST_DO((checkLoad(v, pref + "1", v1))); + TEST_DO((checkLoad(v, pref + "2", v2))); + TEST_DO((checkLoad(v, pref + "1", v1))); + TEST_DO((checkLoad(v, pref + "0", v0))); + + TEST_DO((checkLoad(v, pref + "0", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad(v, pref + "1", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad(v, pref + "2", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TEST_DO((checkLoad(v, pref + "0_e", v0))); + TEST_DO((checkLoad(v, pref + "1_e", v1))); + TEST_DO((checkLoad(v, pref + "2_e", v2))); + TEST_DO((checkLoad(v, pref + "1_e", v1))); + TEST_DO((checkLoad(v, pref + "0_e", v0))); + + TEST_DO((checkLoad(v, pref + "0_e", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad(v, pref + "1_e", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad(v, pref + "2_e", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TermFieldMatchData md; + SearchContextPtr sc = getSearch(as(v)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + sb->seek(1u); + EXPECT_EQUAL(7u, sb->getDocId()); + sb->unpack(7u); + EXPECT_EQUAL(md.getDocId(), 7u); + if (v->getCollectionType() == CollectionType::SINGLE || + flagAttr) { + EXPECT_EQUAL(md.getWeight(), 1); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(md.getWeight(), 2); + } else { + if (cfg.basicType() == BasicType::STRING) { + EXPECT_EQUAL(md.getWeight(), 24); + } else { + EXPECT_EQUAL(md.getWeight(), -3); + } + } +} + + +template +void +EnumeratedSaveTest::test(BasicType bt, CollectionType ct, + const vespalib::string &pref) +{ + Config cfg(bt, ct); + AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg); + AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg); + AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg); + + addDocs(v0, 0); + addDocs(v1, 10); + addDocs(v2, 30); + + populate(as(v0), 0, bt); + populate(as(v1), 10, bt); + populate(as(v2), 30, bt); + + MemAttr::SP mv0 = saveMem(*v0); + MemAttr::SP mv1 = saveMem(*v1); + MemAttr::SP mv2 = saveMem(*v2); + + MemAttr::SP emv0 = saveBoth(v0); + MemAttr::SP emv1 = saveBoth(v1); + MemAttr::SP emv2 = saveBoth(v2); + + AttributePtr v = make(cfg, pref, true); + checkLoad(v, pref + "0_ee", v0); + checkLoad(v, pref + "1_ee", v1); + checkLoad(v, pref + "2_ee", v2); + v.reset(); + + TEST_DO((testReload(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + false))); + TEST_DO((testReload(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + true))); +} + +TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest) +{ + f.template test(BasicType::INT8, + CollectionType::SINGLE, + "int8_sv"); +} + +TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest) +{ + f.template test(BasicType::INT8, + CollectionType::ARRAY, + "int8_a"); +} + +TEST_F("Test enumerated save with weighted set value int8", + EnumeratedSaveTest) +{ + f.template test(BasicType::INT8, + CollectionType::WSET, + "int8_ws"); +} + +TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest) +{ + f.template test(BasicType::INT16, + CollectionType::SINGLE, + "int16_sv"); +} + +TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest) +{ + f.template test(BasicType::INT16, + CollectionType::ARRAY, + "int16_a"); +} + +TEST_F("Test enumerated save with weighted set value int16", + EnumeratedSaveTest) +{ + f.template test(BasicType::INT16, + CollectionType::WSET, + "int16_ws"); +} + +TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest) +{ + f.template test(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest) +{ + f.template test(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test enumerated save with weighted set value int32", + EnumeratedSaveTest) +{ + f.template test(BasicType::INT32, + CollectionType::WSET, + "int32_ws"); +} + +TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest) +{ + f.template test(BasicType::INT64, + CollectionType::SINGLE, + "int64_sv"); +} + +TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest) +{ + f.template test(BasicType::INT64, + CollectionType::ARRAY, + "int64_a"); +} + +TEST_F("Test enumerated save with weighted set value int64", + EnumeratedSaveTest) +{ + f.template test(BasicType::INT64, + CollectionType::WSET, + "int64_ws"); +} + +TEST_F("Test enumerated save with single value float", EnumeratedSaveTest) +{ + f.template test(BasicType::FLOAT, + CollectionType::SINGLE, + "float_sv"); +} + +TEST_F("Test enumerated save with array value float", EnumeratedSaveTest) +{ + f.template test(BasicType::FLOAT, + CollectionType::ARRAY, + "float_a"); +} + +TEST_F("Test enumerated save with weighted set value float", + EnumeratedSaveTest) +{ + f.template test( + BasicType::FLOAT, + CollectionType::WSET, + "float_ws"); +} + + +TEST_F("Test enumerated save with single value double", EnumeratedSaveTest) +{ + f.template test(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test enumerated save with array value double", EnumeratedSaveTest) +{ + f.template test(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test enumerated save with weighted set value double", + EnumeratedSaveTest) +{ + f.template test( + BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + + +TEST_F("Test enumerated save with single value string", EnumeratedSaveTest) +{ + f.template test(BasicType::STRING, + CollectionType::SINGLE, + "str_sv"); +} + +TEST_F("Test enumerated save with array value string", EnumeratedSaveTest) +{ + f.template test(BasicType::STRING, + CollectionType::ARRAY, + "str_a"); +} + +TEST_F("Test enumerated save with weighted set value string", + EnumeratedSaveTest) +{ + f.template test( + BasicType::STRING, + CollectionType::WSET, + "str_ws"); +} + +TEST_MAIN() +{ + AttributeVector::enableEnumeratedLoad(); + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/enumstore/.gitignore b/searchlib/src/tests/attribute/enumstore/.gitignore new file mode 100644 index 00000000000..c58a018bbd9 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +enumstore_test +searchlib_enumstore_test_app diff --git a/searchlib/src/tests/attribute/enumstore/CMakeLists.txt b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt new file mode 100644 index 00000000000..33190553747 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumstore_test_app + SOURCES + enumstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumstore_test_app COMMAND searchlib_enumstore_test_app) diff --git a/searchlib/src/tests/attribute/enumstore/DESC b/searchlib/src/tests/attribute/enumstore/DESC new file mode 100644 index 00000000000..514c9a47caf --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/DESC @@ -0,0 +1 @@ +This is a test for the EnumStore class. diff --git a/searchlib/src/tests/attribute/enumstore/FILES b/searchlib/src/tests/attribute/enumstore/FILES new file mode 100644 index 00000000000..6fdb2381292 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/FILES @@ -0,0 +1 @@ +enumstore.cpp diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp new file mode 100644 index 00000000000..e63889bbeb8 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -0,0 +1,879 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("enumstore_test"); +#include +//#define LOG_ENUM_STORE +#include +#include +#include +#include + +namespace search { + +size_t enumStoreAlign(size_t size) +{ + return (size + 15) & -UINT64_C(16); +} + +// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0 +const uint32_t RESERVED_BYTES = 16u; +typedef EnumStoreT > NumericEnumStore; + +class EnumStoreTest : public vespalib::TestApp +{ +private: + typedef EnumStoreT StringEnumStore; + typedef EnumStoreT > FloatEnumStore; + typedef EnumStoreT > DoubleEnumStore; + + typedef EnumStoreBase::Index EnumIndex; + typedef vespalib::GenerationHandler::generation_t generation_t; + + void testIndex(); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value); + void testStringEntry(); + void testNumericEntry(); + + template + void testFloatEnumStore(EnumStoreType & es); + void testFloatEnumStore(); + + void testAddEnum(); + template + void testAddEnum(bool hasPostings); + + template + void + testUniques(const EnumStoreType &ses, + const std::vector &unique); + + + void testCompaction(); + template + void testCompaction(bool hasPostings, bool disableReEnumerate); + + void testReset(); + template + void testReset(bool hasPostings); + + void testHoldListAndGeneration(); + void testMemoryUsage(); + void requireThatAddressSpaceUsageIsReported(); + void testBufferLimit(); + + // helper methods + typedef std::vector StringVector; + template + T random(T low, T high); + std::string getRandomString(uint32_t minLen, uint32_t maxLen); + StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen); + StringVector sortRandomStrings(StringVector & strings); + + struct StringEntry { + StringEntry(uint32_t e, uint32_t r, const std::string & s) : + _enum(e), _refCount(r), _string(s) {} + uint32_t _enum; + uint32_t _refCount; + std::string _string; + }; + + struct Reader { + typedef StringEnumStore::Index Index; + typedef std::vector IndexVector; + typedef std::vector ExpectedVector; + uint32_t _generation; + IndexVector _indices; + ExpectedVector _expected; + Reader(uint32_t generation, const IndexVector & indices, + const ExpectedVector & expected) : + _generation(generation), _indices(indices), _expected(expected) {} + }; + + void + checkReaders(const StringEnumStore &ses, + generation_t sesGen, + const std::vector &readers); + +public: + EnumStoreTest() {} + int Main(); +}; + +void +EnumStoreTest::testIndex() +{ + { + StringEnumStore::Index idx; + EXPECT_TRUE( ! idx.valid()); + EXPECT_EQUAL(idx.offset(), 0u); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx(enumStoreAlign(1000), 0); + EXPECT_TRUE(idx.offset() == enumStoreAlign(1000)); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + // Change offsets when alignment changes. + StringEnumStore::Index idx1(48, 0); + StringEnumStore::Index idx2(80, 0); + StringEnumStore::Index idx3(48, 0); + EXPECT_TRUE(!(idx1 == idx2)); + EXPECT_TRUE(idx1 == idx3); + } + { + EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2); + } +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string) +{ + StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str()); +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value) +{ + NumericEnumStore::insertEntry(data, enumValue, refCount, value); +} + +void +EnumStoreTest::testStringEntry() +{ + { + char data[9]; + fillDataBuffer(data, 0, 0, ""); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("") == + StringEnumStore::alignEntrySize(8 + 1)); + + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 1); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + } + { + char data[18]; + fillDataBuffer(data, 10, 5, "enumstore"); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") == + StringEnumStore::alignEntrySize(8 + 1 + 9)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 6); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + } +} + +void +EnumStoreTest::testNumericEntry() +{ + { + char data[12]; + fillDataBuffer(data, 10, 20, 30); + NumericEnumStore::Entry e(data); + EXPECT_TRUE(NumericEnumStore::getEntrySize(30) == + NumericEnumStore::alignEntrySize(8 + 4)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 21); + EXPECT_TRUE(e.getValue() == 30); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + } +} + +template +void +EnumStoreTest::testFloatEnumStore(EnumStoreType & es) +{ + EnumIndex idx; + + T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f}; + T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f}; + + for (uint32_t i = 0; i < 5; ++i) { + es.addEnum(a[i], idx); + } + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } + + es.addEnum(std::numeric_limits::quiet_NaN(), idx); + EXPECT_TRUE(es.findIndex(std::numeric_limits::quiet_NaN(), idx)); + EXPECT_TRUE(es.findIndex(std::numeric_limits::quiet_NaN(), idx)); + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } +} + +void +EnumStoreTest::testFloatEnumStore() +{ + { + FloatEnumStore fes(1000, false); + testFloatEnumStore(fes); + } + { + DoubleEnumStore des(1000, false); + testFloatEnumStore(des); + } +} + +void +EnumStoreTest::testAddEnum() +{ + testAddEnum(false); + + testAddEnum(true); +} + +template +void +EnumStoreTest::testAddEnum(bool hasPostings) +{ + EnumStoreType ses(100, hasPostings); + EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES, + ses.getBuffer(0).capacity()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + EnumIndex idx; + uint64_t offset = ses.getBuffer(0).size(); + std::vector indices; + std::vector unique; + unique.push_back(""); + unique.push_back("add"); + unique.push_back("enumstore"); + unique.push_back("unique"); + + for (uint32_t i = 0; i < unique.size(); ++i) { + ses.addEnum(unique[i].c_str(), idx); + EXPECT_EQUAL(offset, idx.offset()); + EXPECT_EQUAL(0u, idx.bufferId()); + ses.incRefCount(idx); + EXPECT_EQUAL(1u, ses.getRefCount(idx)); + indices.push_back(idx); + offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(ses.getLastEnum() == i); + } + ses.freezeTree(); + + for (uint32_t i = 0; i < indices.size(); ++i) { + uint32_t e = ses.getEnum(indices[i]); + EXPECT_EQUAL(i, e); + EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e)); + EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(idx == indices[i]); + EXPECT_EQUAL(1u, ses.getRefCount(indices[i])); + StringEntryType::Type value = 0; + EXPECT_TRUE(ses.getValue(indices[i], value)); + EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0); + } + + if (hasPostings) { + testUniques(ses, unique); + } else { + testUniques(ses, unique); + } +} + +template +void +EnumStoreTest::testUniques +(const EnumStoreType &ses, const std::vector &unique) +{ + const EnumStoreDict *enumDict = + dynamic_cast *> + (&ses.getEnumStoreDict()); + assert(enumDict != NULL); + const Dictionary &dict = enumDict->getDictionary(); + uint32_t i = 0; + EnumIndex idx; + for (typename Dictionary::Iterator iter = dict.begin(); + iter.valid(); ++iter, ++i) { + idx = iter.getKey(); + EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0); + } + EXPECT_EQUAL(static_cast(unique.size()), i); +} + + +void +EnumStoreTest::testCompaction() +{ + testCompaction(false, false); + testCompaction(true, false); + testCompaction(false, true); + testCompaction(true, true); +} + +template +void +EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) +{ + // entrySize = 15 before alignment + uint32_t entrySize = EnumStoreType::alignEntrySize(15); + uint32_t bufferSize = entrySize * 5; + EnumStoreType ses(bufferSize, hasPostings); + EnumIndex idx; + std::vector indices; + typename EnumStoreType::Type t = "foo"; + std::vector uniques; + uniques.push_back("enum00"); + uniques.push_back("enum01"); + uniques.push_back("enum02"); + uniques.push_back("enum03"); + uniques.push_back("enum04"); + + // fill with unique values + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize); + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + indices.push_back(idx); + } + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(0u, ses.getBuffer(0).remaining()); + EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + uint32_t failEntrySize = ses.getEntrySize("enum05"); + EXPECT_TRUE(failEntrySize > ses.getRemaining()); + + // change from enum00 -> enum01 + ses.decRefCount(indices[0]); + ses.incRefCount(indices[1]); + indices[0] = indices[1]; + + // check correct refcount + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + uint32_t refCount = ses.getRefCount(idx); + if (i == 0) { + EXPECT_TRUE(refCount == 0); + } else if (i == 1) { + EXPECT_TRUE(refCount == 2); + } else { + EXPECT_TRUE(refCount == 1); + } + } + + // free unused enums + ses.freeUnusedEnums(true); + EXPECT_TRUE(!ses.findIndex("enum00", idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + if (disableReEnumerate) { + ses.disableReEnumerate(); + } + EXPECT_TRUE(ses.performCompaction(3 * entrySize)); + if (disableReEnumerate) { + ses.enableReEnumerate(); + } + EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4); + EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0); + + EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum()); + + // add new unique strings + ses.addEnum("enum05", idx); + EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx)); + ses.addEnum("enum06", idx); + EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx)); + ses.addEnum("enum00", idx); + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx)); + + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum()); + + // compare old and new indices + for (uint32_t i = 0; i < indices.size(); ++i) { + EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx)); + EXPECT_TRUE(indices[i].bufferId() == 0); + EXPECT_TRUE(idx.bufferId() == 1); + EXPECT_TRUE(ses.getValue(indices[i], t)); + typename EnumStoreType::Type s = "bar"; + EXPECT_TRUE(ses.getValue(idx, s)); + EXPECT_TRUE(strcmp(t, s) == 0); + } + // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0 + EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx)); + EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset()); + EXPECT_EQUAL(entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx)); + EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset()); + EXPECT_EQUAL(2 * entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx)); + EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset()); + EXPECT_EQUAL(3 * entrySize, idx.offset()); +} + +void +EnumStoreTest::testReset() +{ + testReset(false); + + testReset(true); +} + +template +void +EnumStoreTest::testReset(bool hasPostings) +{ + uint32_t numUniques = 10000; + srand(123456789); + StringVector rndStrings = fillRandomStrings(numUniques, 10, 15); + EXPECT_EQUAL(rndStrings.size(), size_t(numUniques)); + StringVector uniques = sortRandomStrings(rndStrings); + EXPECT_EQUAL(uniques.size(), size_t(numUniques)); + // max entrySize = 25 before alignment + uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16); + EnumStoreType ses(numUniques * maxEntrySize, hasPostings); + EnumIndex idx; + + uint32_t cnt = 0; + // add new unique strings + for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) { + ses.addEnum(iter->c_str(), idx); + EXPECT_EQUAL(ses.getNumUniques(), ++cnt); + } + + // check for unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques(ses, uniques); + } else { + testUniques(ses, uniques); + } + + rndStrings = fillRandomStrings(numUniques, 15, 20); + StringVector newUniques = sortRandomStrings(rndStrings); + + typename EnumStoreType::Builder builder; + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + builder.insert(iter->c_str()); + } + + ses.reset(builder); + EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining()); + + // check for old unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx)); + } + + // check for new unique strings + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques(ses, newUniques); + } else { + testUniques(ses, newUniques); + } +} + +void +EnumStoreTest::testHoldListAndGeneration() +{ + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6); + StringEnumStore ses(100 * entrySize, false); + StringEnumStore::Index idx; + StringVector uniques; + generation_t sesGen = 0u; + uniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i); + uniques.push_back(tmp); + } + StringVector newUniques; + newUniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(tmp); + } + uint32_t generation = 0; + std::vector readers; + + // insert first batch of unique strings + for (uint32_t i = 0; i < 100; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + + // associate readers + if (i % 10 == 9) { + Reader::IndexVector indices; + Reader::ExpectedVector expected; + for (uint32_t j = i - 9; j <= i; ++j) { + EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx)); + indices.push_back(idx); + StringEnumStore::Entry entry = ses.getEntry(idx); + EXPECT_TRUE(entry.getEnum() == j); + EXPECT_TRUE(entry.getRefCount() == 1); + EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0); + expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(), + std::string(entry.getValue()))); + } + EXPECT_TRUE(indices.size() == 10); + EXPECT_TRUE(expected.size() == 10); + sesGen = generation++; + readers.push_back(Reader(sesGen, indices, expected)); + checkReaders(ses, sesGen, readers); + } + } + + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // remove all uniques + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + ses.decRefCount(idx); + EXPECT_EQUAL(0u, ses.getRefCount(idx)); + } + ses.freeUnusedEnums(true); + EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + + // check readers again + checkReaders(ses, sesGen, readers); + + // fill up buffer + uint32_t i = 0; + while (ses.getRemaining() >= newEntrySize) { + //LOG(info, "fill: %s", newUniques[i].c_str()); + ses.addEnum(newUniques[i++].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + EXPECT_LESS(ses.getRemaining(), newEntrySize); + // buffer on hold list + EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize)); + + checkReaders(ses, sesGen, readers); + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // buffer no longer on hold list + EXPECT_LESS(ses.getRemaining(), newEntrySize); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize); +} + +void +EnumStoreTest::testMemoryUsage() +{ + StringEnumStore ses(200, false); + StringEnumStore::Index idx; + uint32_t num = 8; + std::vector indices; + std::vector uniques; + for (uint32_t i = 0; i < num; ++i) { + std::stringstream ss; + ss << "enum" << i; + uniques.push_back(ss.str()); + } + generation_t sesGen = 0u; + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx") + + // usage before inserting enums + MemoryUsage usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0)); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + for (uint32_t i = 0; i < num; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + indices.push_back(idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + + // usage after inserting enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + // assign new enum for num / 2 of indices + for (uint32_t i = 0; i < num / 2; ++i) { + ses.decRefCount(indices[i]); + EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx)); + ses.incRefCount(idx); + indices[i] = idx; + } + ses.freeUnusedEnums(true); + + // usage after removing enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + ses.performCompaction(400); + + // usage after compaction + MemoryUsage usage2 = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes()); + EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold()); + + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // usage after hold list trimming + MemoryUsage usage3 = ses.getMemoryUsage(); + EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes()); + EXPECT_EQUAL(0u, usage3.deadBytes()); + EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold()); +} + +namespace { + +NumericEnumStore::Index +addEnum(NumericEnumStore &store, uint32_t value) +{ + NumericEnumStore::Index result; + store.addEnum(value, result); + store.incRefCount(result); + return result; +} + +void +decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx) +{ + store.decRefCount(idx); + store.freeUnusedEnums(false); +} + +} + +void +EnumStoreTest::requireThatAddressSpaceUsageIsReported() +{ + const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize() + NumericEnumStore store(200, false); + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx1 = addEnum(store, 10); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx2 = addEnum(store, 20); + EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx1); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx2); + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); +} + +size_t +digits(size_t num) +{ + size_t digits = 1; + while (num / 10 > 0) { + num /= 10; + digits++; + } + return digits; +} + +void +EnumStoreTest::testBufferLimit() +{ + size_t enumSize = StringEnumStore::Index::offsetSize(); + StringEnumStore es(enumSize, false); + + size_t strLen = 65536; + char str[strLen + 1]; + for (size_t i = 0; i < strLen; ++i) { + str[i] = 'X'; + } + str[strLen] = 0; + + size_t entrySize = StringEnumStore::getEntrySize(str); + size_t numUniques = enumSize / entrySize; + size_t uniqDigits = digits(numUniques); + + EnumIndex idx; + EnumIndex lastIdx; + for (size_t i = 0; i < numUniques; ++i) { + sprintf(str, "%0*zu", (int)uniqDigits, i); + str[uniqDigits] = 'X'; + es.addEnum(str, idx); + if (i % (numUniques / 32) == 1) { + EXPECT_TRUE(idx.offset() > lastIdx.offset()); + EXPECT_EQUAL(i + 1, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; + } + lastIdx = idx; + } + EXPECT_EQUAL(idx.offset(), lastIdx.offset()); + EXPECT_EQUAL(numUniques, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; +} + +template +T +EnumStoreTest::random(T low, T high) +{ + return (rand() % (high - low)) + low; +} + +std::string +EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen) +{ + uint32_t len = random(minLen, maxLen); + std::string retval; + for (uint32_t i = 0; i < len; ++i) { + char c = random('a', 'z'); + retval.push_back(c); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen) +{ + StringVector retval; + retval.reserve(numStrings); + for (uint32_t i = 0; i < numStrings; ++i) { + retval.push_back(getRandomString(minLen, maxLen)); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::sortRandomStrings(StringVector & strings) +{ + std::sort(strings.begin(), strings.end()); + std::vector retval; + retval.reserve(strings.size()); + std::vector::iterator pos = std::unique(strings.begin(), strings.end()); + std::copy(strings.begin(), pos, std::back_inserter(retval)); + return retval; +} + +void +EnumStoreTest::checkReaders(const StringEnumStore & ses, + generation_t sesGen, + const std::vector & readers) +{ + (void) sesGen; + //uint32_t refCount = 1000; + StringEnumStore::Type t = ""; + for (uint32_t i = 0; i < readers.size(); ++i) { + const Reader & r = readers[i]; + for (uint32_t j = 0; j < r._indices.size(); ++j) { + EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j])); + EXPECT_TRUE(ses.getValue(r._indices[j], t)); + EXPECT_TRUE(r._expected[j]._string == std::string(t)); + } + } +} + + +int +EnumStoreTest::Main() +{ + TEST_INIT("enumstore_test"); + + testIndex(); + testStringEntry(); + testNumericEntry(); + testFloatEnumStore(); + testAddEnum(); + testCompaction(); + testReset(); + testHoldListAndGeneration(); + testMemoryUsage(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + if (_argc > 1) { + testBufferLimit(); // large test with 8 GB buffer + } + + TEST_DONE(); +} +} + + +TEST_APPHOOK(search::EnumStoreTest); diff --git a/searchlib/src/tests/attribute/extendattributes/.gitignore b/searchlib/src/tests/attribute/extendattributes/.gitignore new file mode 100644 index 00000000000..4018a7d4f5b --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +extendattribute_test +searchlib_extendattribute_test_app diff --git a/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt new file mode 100644 index 00000000000..b0803f0a232 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_extendattribute_test_app + SOURCES + extendattribute.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_extendattribute_test_app COMMAND sh extendattribute_test.sh) diff --git a/searchlib/src/tests/attribute/extendattributes/DESC b/searchlib/src/tests/attribute/extendattributes/DESC new file mode 100644 index 00000000000..4f88189a1d7 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/DESC @@ -0,0 +1 @@ +Unit tests for extendable attributes. diff --git a/searchlib/src/tests/attribute/extendattributes/FILES b/searchlib/src/tests/attribute/extendattributes/FILES new file mode 100644 index 00000000000..930039cae19 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/FILES @@ -0,0 +1 @@ +extendattribute.cpp diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp new file mode 100644 index 00000000000..0bb751d26ee --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("extendattribute_test"); +#include +#include + +namespace search { + +class ExtendAttributeTest : public vespalib::TestApp +{ +private: + template + void testExtendInteger(Attribute & attr); + template + void testExtendFloat(Attribute & attr); + template + void testExtendString(Attribute & attr); + +public: + int Main(); +}; + +template +void ExtendAttributeTest::testExtendInteger(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + attr.add(2, 20); + EXPECT_EQUAL(attr.getInt(0), attr.hasMultiValue() ? 1 : 2); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[2]; + EXPECT_EQUAL((static_cast(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1); + EXPECT_EQUAL(v[1].getValue(), 2); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3, 30); + EXPECT_EQUAL(attr.getInt(1), 3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[1]; + EXPECT_EQUAL((static_cast(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template +void ExtendAttributeTest::testExtendFloat(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1.7, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + EXPECT_EQUAL(attr.getFloat(0), 1.7); + attr.add(2.3, 20); + EXPECT_EQUAL(attr.getFloat(0), attr.hasMultiValue() ? 1.7 : 2.3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[2]; + EXPECT_EQUAL((static_cast(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1.7); + EXPECT_EQUAL(v[1].getValue(), 2.3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3.6, 30); + EXPECT_EQUAL(attr.getFloat(1), 3.6); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[1]; + EXPECT_EQUAL((static_cast(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3.6); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template +void ExtendAttributeTest::testExtendString(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add("1.7", 10); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), "1.7"); + attr.add("2.3", 20); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), attr.hasMultiValue() ? "1.7" : "2.3"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[2]; + EXPECT_EQUAL((static_cast(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), "1.7"); + EXPECT_EQUAL(v[1].getValue(), "2.3"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add("3.6", 30); + EXPECT_EQUAL(std::string(attr.getString(1, NULL, 0)), "3.6"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[1]; + EXPECT_EQUAL((static_cast(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), "3.6"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +int +ExtendAttributeTest::Main() +{ + TEST_INIT("extendattribute_test"); + + SingleIntegerExtAttribute siattr("si1"); + MultiIntegerExtAttribute miattr("mi1"); + WeightedSetIntegerExtAttribute wsiattr("wsi1"); + EXPECT_TRUE( ! siattr.hasMultiValue() ); + EXPECT_TRUE( miattr.hasMultiValue() ); + EXPECT_TRUE( wsiattr.hasWeightedSetType() ); + testExtendInteger(siattr); + testExtendInteger(miattr); + testExtendInteger(wsiattr); + + SingleFloatExtAttribute sdattr("sd1"); + MultiFloatExtAttribute mdattr("md1"); + WeightedSetFloatExtAttribute wsdattr("wsd1"); + EXPECT_TRUE( ! sdattr.hasMultiValue() ); + EXPECT_TRUE( mdattr.hasMultiValue() ); + EXPECT_TRUE( wsdattr.hasWeightedSetType() ); + testExtendFloat(sdattr); + testExtendFloat(mdattr); + testExtendFloat(wsdattr); + + SingleStringExtAttribute ssattr("ss1"); + MultiStringExtAttribute msattr("ms1"); + WeightedSetStringExtAttribute wssattr("wss1"); + EXPECT_TRUE( ! ssattr.hasMultiValue() ); + EXPECT_TRUE( msattr.hasMultiValue() ); + EXPECT_TRUE( wssattr.hasWeightedSetType() ); + testExtendString(ssattr); + testExtendString(msattr); + testExtendString(wssattr); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::ExtendAttributeTest); diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh new file mode 100755 index 00000000000..6f335b18229 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_extendattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/gidmapattribute/.gitignore b/searchlib/src/tests/attribute/gidmapattribute/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/searchlib/src/tests/attribute/multivaluemapping/.gitignore b/searchlib/src/tests/attribute/multivaluemapping/.gitignore new file mode 100644 index 00000000000..743c738a0a2 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +multivaluemapping_test +searchlib_multivaluemapping_test_app diff --git a/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt new file mode 100644 index 00000000000..36c66b09966 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multivaluemapping_test_app + SOURCES + multivaluemapping_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multivaluemapping_test_app COMMAND searchlib_multivaluemapping_test_app) diff --git a/searchlib/src/tests/attribute/multivaluemapping/DESC b/searchlib/src/tests/attribute/multivaluemapping/DESC new file mode 100644 index 00000000000..44c27ec9926 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/DESC @@ -0,0 +1 @@ +This is a test for the MultivalueMapping class. diff --git a/searchlib/src/tests/attribute/multivaluemapping/FILES b/searchlib/src/tests/attribute/multivaluemapping/FILES new file mode 100644 index 00000000000..bf22403a5fe --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/FILES @@ -0,0 +1 @@ +multivaluemapping.cpp diff --git a/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp new file mode 100644 index 00000000000..e78e180856b --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp @@ -0,0 +1,836 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("multivaluemapping_test"); +#include +//#define DEBUG_MULTIVALUE_MAPPING +//#define LOG_MULTIVALUE_MAPPING +#include +#include +#include + +namespace search { + +namespace +{ + +uint32_t dummyCommittedDocIdLimit = std::numeric_limits::max(); + +} + +typedef MultiValueMappingT MvMapping; +typedef MvMapping::Index Index; +typedef multivalue::Index64 Index64; +typedef multivalue::Index32 Index32; +typedef MvMapping::Histogram Histogram; + +class MultiValueMappingTest : public vespalib::TestApp +{ +private: + typedef std::vector IndexVector; + typedef std::vector > ExpectedVector; + typedef vespalib::GenerationHandler::generation_t generation_t; + + class Reader { + public: + uint32_t _startGen; + uint32_t _endGen; + IndexVector _indices; + ExpectedVector _expected; + uint32_t numKeys() { return _indices.size(); } + Reader(uint32_t startGen, uint32_t endGen, const IndexVector & indices, + const ExpectedVector & expected) : + _startGen(startGen), _endGen(endGen), _indices(indices), _expected(expected) {} + }; + + typedef std::vector ReaderVector; + + void testIndex32(); + void testIndex64(); + void testSimpleSetAndGet(); + void testChangingValueCount(); + + void + checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers); + + void testHoldListAndGeneration(); + void testManualCompaction(); + void testVariousGets(); + void testReplace(); + void testMemoryUsage(); + void testShrink(); + void testHoldElem(); + void requireThatAddressSpaceUsageIsReported(); + void requireThatDeadIsNotAccountedInAddressSpaceUsage(); + +public: + int Main(); +}; + +void +MultiValueMappingTest::testIndex32() +{ + { + Index32 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index32 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x300003e8u); + } + { + Index32 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xffffffffu); + } + { + EXPECT_EQUAL(Index32::maxValues(), 15u); + EXPECT_EQUAL(Index32::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testIndex64() +{ + { + Index64 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index64 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x3000003e8ull); + } + { + Index64 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xf87ffffffull); + } + { + EXPECT_EQUAL(Index64::maxValues(), 1023u); + EXPECT_EQUAL(Index64::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testSimpleSetAndGet() +{ + uint32_t maxValueCount = Index::maxValues() * 2; + uint32_t numKeys = maxValueCount * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys); + EXPECT_EQUAL(mvm.getNumKeys(), numKeys); + Index idx; + + // insert values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + std::vector values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + if (!mvm.enoughCapacity(needed)) { + mvm.trimHoldLists(1); + mvm.performCompaction(needed); + } + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + idx = mvm._indices[key]; + if (valueCount < Index::maxValues()) { + EXPECT_EQUAL(idx.values(), valueCount); + } else { + EXPECT_EQUAL(idx.values(), Index::maxValues()); + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "------------------------------------------------------------"); +#endif + } + EXPECT_TRUE(!mvm.hasKey(numKeys)); + + // check for expected values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + std::vector buffer(valueCount); + EXPECT_EQUAL(mvm.get(key, buffer), valueCount); + EXPECT_TRUE(buffer.size() == valueCount); + EXPECT_EQUAL(static_cast(std::count(buffer.begin(), buffer.end(), key)), valueCount); + uint32_t value; + const uint32_t * handle = NULL; + EXPECT_EQUAL(mvm.get(key, handle), valueCount); + EXPECT_TRUE(valueCount == 0 ? handle == NULL : handle != NULL); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE(mvm.get(key, i, value)); + EXPECT_EQUAL(value, key); + EXPECT_TRUE(handle[i] == key); + } + EXPECT_TRUE(!mvm.get(key, valueCount, value)); + } + + // reset + mvm.reset(10); + EXPECT_TRUE(mvm.getNumKeys() == 10); + EXPECT_TRUE(!mvm.hasKey(10)); + EXPECT_TRUE(mvm._genHolder.getHeldBytes() == 0); + for (uint32_t key = 0; key < 10; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == 0); + std::vector buffer; + EXPECT_TRUE(mvm.get(key, buffer) == 0); + EXPECT_TRUE(buffer.size() == 0); + } + + // add more keys + for (uint32_t i = 0; i < 5; ++i) { + uint32_t key; + mvm.addKey(key); + EXPECT_TRUE(key == 10 + i); + EXPECT_TRUE(mvm.getNumKeys() == 11 + i); + } +} + +void +MultiValueMappingTest::testChangingValueCount() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = numKeys; + } + initCapacity[Index::maxValues()] = numKeys * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys, initCapacity); + + // Increasing the value count for some keys + for (uint32_t valueCount = 1; valueCount <= maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "########################### %u ##############################", valueCount); +#endif + uint32_t lastValueCount = valueCount - 1; + // set values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector buffer(valueCount, key); + mvm.set(key, buffer); + } + + Histogram remaining = mvm.getRemaining(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(remaining[valueCount] == 0); + } else { + EXPECT_TRUE(remaining[Index::maxValues()] == numKeys * (maxCount - valueCount)); + } + + if (valueCount < Index::maxValues()) { + MvMapping::SingleVectorPtr current = mvm.getSingleVector(valueCount, MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount)); + EXPECT_TRUE(current.first->dead() == 0); + + if (lastValueCount != 0) { + MvMapping::SingleVectorPtr last = mvm.getSingleVector(lastValueCount, MvMapping::ACTIVE); + EXPECT_TRUE(last.first->used() == numKeys * (lastValueCount)); + EXPECT_TRUE(last.first->dead() == numKeys * (lastValueCount)); + } + } else { + MvMapping::VectorVectorPtr current = mvm.getVectorVector(MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount - Index::maxValues() + 1)); + EXPECT_TRUE(current.first->dead() == numKeys * (valueCount - Index::maxValues())); + } + + // check values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + } +} + +void +MultiValueMappingTest::checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers) +{ + for (ReaderVector::iterator iter = readers.begin(); + iter != readers.end(); ) { + if (iter->_endGen <= mvmGen) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "check and remove reader: start = %u, end = %u", + iter->_startGen, iter->_endGen); +#endif + for (uint32_t key = 0; key < iter->numKeys(); ++key) { + Index idx = iter->_indices[key]; + uint32_t valueCount = iter->_expected[key].size(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(idx.values() == valueCount); + for (uint32_t i = idx.offset() * idx.values(), j = 0; + i < (idx.offset() + 1) * idx.values() && j < iter->_expected[key].size(); + ++i, ++j) + { + EXPECT_TRUE(mvm._singleVectors[idx.vectorIdx()][i] == iter->_expected[key][j]); + } + } else { + EXPECT_TRUE(mvm._vectorVectors[idx.alternative()][idx.offset()].size() == + valueCount); + EXPECT_TRUE(std::equal(mvm._vectorVectors[idx.alternative()][idx.offset()].begin(), + mvm._vectorVectors[idx.alternative()][idx.offset()].end(), + iter->_expected[key].begin())); + } + } + iter = readers.erase(iter); + } else { + ++iter; + } + } +} + +void +MultiValueMappingTest::testHoldListAndGeneration() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + uint32_t maxKeys = numKeys * 2; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = numKeys; // make enough capacity for 1/2 of the keys + } + MvMapping mvm(dummyCommittedDocIdLimit, maxKeys, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + ReaderVector readers; + uint32_t safeGen = std::numeric_limits::max(); + uint32_t readDuration = 2; + generation_t mvmGen = 0u; + + for (uint32_t valueCount = 1; valueCount < maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "#################### count(%u) - gen(%u) ####################", + valueCount, mvm.getGeneration()); +#endif + + // check and remove readers + checkReaders(mvm, mvmGen, readers); + + // update safe generation and removeOldGenerations + safeGen = std::numeric_limits::max(); + for (ReaderVector::iterator iter = readers.begin(); iter != readers.end(); ++iter) { + if ((*iter)._startGen < safeGen) { + safeGen= (*iter)._startGen; + } + } + mvm.trimHoldLists(safeGen); + + // set new values for 1/2 of the keys + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector values(valueCount, valueCount * numKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < numKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), valueCount * numKeys + key)) == valueCount); + } + mvm.transferHoldLists(mvmGen); + ++mvmGen; + + // associate reader with current generation + IndexVector indices; + ExpectedVector expected; + for (uint32_t key = 0; key < numKeys; ++key) { + indices.push_back(mvm._indices[key]); + expected.push_back(std::vector(valueCount, valueCount * numKeys + key)); + } + readers.push_back(Reader(mvmGen, mvmGen + readDuration, + indices, expected)); + readDuration = (readDuration % 4) + 2; + + // perform compaction + Histogram needed(Index::maxValues()); + needed[valueCount] = maxKeys; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + + // set new value for all keys (the associated reader should see the old values) + for (uint32_t key = 0; key < maxKeys; ++key) { + std::vector values(valueCount, valueCount * maxKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < maxKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), valueCount * maxKeys + key)) == valueCount); + } + + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } + while (!readers.empty()) { + checkReaders(mvm, mvmGen, readers); + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } +} + +void +MultiValueMappingTest::testManualCompaction() +{ + Histogram initCapacity(Index::maxValues()); + uint32_t maxCount = Index::maxValues() + 1; + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = 1; + } + MvMapping mvm(dummyCommittedDocIdLimit, maxCount * 2, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // first update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } + // second update pass. must perform compaction + for (uint32_t key = maxCount + 1; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + std::vector values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + } + // check for correct buffer values + for (uint32_t key = 0; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + + // reset + mvm.reset(maxCount, initCapacity); + EXPECT_TRUE(mvm.getNumKeys() == maxCount); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // new update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_EQUAL(mvm.getValueCount(key), 0u); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } +} + +void +MultiValueMappingTest::testVariousGets() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector(5, 50)); + mvm.set(2, std::vector(25, 250)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + std::vector buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 0) == 0); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 5) == 0); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 3) == 5); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 3); + } + { + std::vector buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 10) == 5); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + } + { + std::vector buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 23) == 25); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 23); + } + { + std::vector buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 30) == 25); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 25); + } +} + +void +MultiValueMappingTest::testReplace() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector(5, 50)); + mvm.set(2, std::vector(25, 100)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + EXPECT_TRUE(mvm.getValueCount(0) == 0); + std::vector replace(5, 50); + mvm.replace(0, replace); + EXPECT_TRUE(mvm.getValueCount(0) == 0); + } + { + EXPECT_TRUE(mvm.getValueCount(1) == 5); + std::vector buffer(5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + + std::vector replace(5, 55); + mvm.replace(1, replace); + EXPECT_TRUE(mvm.getValueCount(1) == 5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)55)) == 5); + } + { + EXPECT_TRUE(mvm.getValueCount(2) == 25); + std::vector buffer(25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)100)) == 25); + + std::vector replace(25, 200); + mvm.replace(2, replace); + EXPECT_TRUE(mvm.getValueCount(2) == 25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast(std::count(buffer.begin(), buffer.end(), (uint32_t)200)) == 25); + } +} + +void +MultiValueMappingTest::testMemoryUsage() +{ + uint32_t numKeys = Index::maxValues() + 4; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + uint32_t totalCnt = 0; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = 2; + exp.incAllocatedBytes(i * 2 * sizeof(uint32_t)); + } + initCapacity[Index::maxValues()] = 12; + exp.incAllocatedBytes(12 * sizeof(vespalib::Array)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + // insert values for all keys + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 1; + std::vector values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array)); // due to vector vector + } + } + + // usage after inserting values + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + totalCnt = 0; + // insert new values for all keys making dead bytes + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 2; + std::vector values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if ((cnt - 1) < Index::maxValues()) { + exp.incDeadBytes((cnt - 1) * sizeof(uint32_t)); // the previous values are marked dead + } else { + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array)); + } + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array)); // due to vector vector + } + } + + // usage after inserting new values making dead bytes + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + // make sure all internal vectors are put on hold list + mvm.performCompaction(initCapacity); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytes() - numKeys * sizeof(Index) + exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + + +void +MultiValueMappingTest::testShrink() +{ + uint32_t committedDocIdLimit = dummyCommittedDocIdLimit; + MvMapping mvm(committedDocIdLimit); + for (uint32_t i = 0; i < 10; ++i) { + uint32_t k; + mvm.addKey(k); + EXPECT_EQUAL(i, k); + } + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + uint32_t shrinkTarget = 4; + committedDocIdLimit = shrinkTarget; + mvm.shrinkKeys(shrinkTarget); + mvm.transferHoldLists(1); + mvm.trimHoldLists(2); + EXPECT_EQUAL(shrinkTarget, mvm.getNumKeys()); + EXPECT_EQUAL(shrinkTarget, mvm.getCapacityKeys()); +} + + +void +MultiValueMappingTest::testHoldElem() +{ + uint32_t numKeys = 1; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + + Histogram initCapacity(Index::maxValues()); + initCapacity[Index::maxValues()] = 3; + exp.incAllocatedBytes(3 * sizeof(vespalib::Array)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), 0u); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + uint32_t key = 0; + uint32_t cnt = Index::maxValues() + 3; + { + std::vector values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + ++cnt; + { + std::vector values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array)); + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + exp.incDeadBytes(sizeof(vespalib::Array)); + exp.decAllocatedBytes((cnt - 1) * sizeof(uint32_t)); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + +namespace { + +void +insertValues(MvMapping &mvm, uint32_t key, uint32_t count) +{ + std::vector values(count, 13); + mvm.set(key, values); +} + +Histogram +createHistogram(uint32_t numValuesPerValueClass) +{ + Histogram result(Index32::maxValues()); + for (uint32_t i = 0; i <= Index32::maxValues(); ++i) { + result[i] = numValuesPerValueClass; + } + return result; +} + +const size_t ADDRESS_LIMIT = 134217728; // Index32::offsetSize() + +struct AddressSpaceFixture +{ + MvMapping mvm; + AddressSpaceFixture() + : mvm(dummyCommittedDocIdLimit, 20, createHistogram(4), GrowStrategy(20)) + {} +}; + +} + +void +MultiValueMappingTest::requireThatAddressSpaceUsageIsReported() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 1); + EXPECT_EQUAL(AddressSpace(1, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 2); + insertValues(mvm, 3, 2); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 4, 13); + insertValues(mvm, 5, 13); + insertValues(mvm, 6, 13); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 7, 14); + insertValues(mvm, 8, 14); + insertValues(mvm, 9, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 16); + insertValues(mvm, 12, 17); + insertValues(mvm, 13, 18); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +void +MultiValueMappingTest::requireThatDeadIsNotAccountedInAddressSpaceUsage() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 3); + insertValues(mvm, 2, 3); + insertValues(mvm, 3, 3); + insertValues(mvm, 4, 3); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 4); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 5); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 15); + insertValues(mvm, 12, 15); + insertValues(mvm, 13, 15); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 11, 14); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +int +MultiValueMappingTest::Main() +{ + TEST_INIT("multivaluemapping_test"); + + testIndex32(); + testIndex64(); + testSimpleSetAndGet(); + testChangingValueCount(); + testHoldListAndGeneration(); + testManualCompaction(); + testVariousGets(); + testReplace(); + testMemoryUsage(); + testShrink(); + testHoldElem(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + TEST_DO(requireThatDeadIsNotAccountedInAddressSpaceUsage()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::MultiValueMappingTest); diff --git a/searchlib/src/tests/attribute/postinglist/.gitignore b/searchlib/src/tests/attribute/postinglist/.gitignore new file mode 100644 index 00000000000..8cf10f7f9dc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglist_test +searchlib_postinglist_test_app diff --git a/searchlib/src/tests/attribute/postinglist/CMakeLists.txt b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt new file mode 100644 index 00000000000..a22d1ae2fdc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglist_test_app + SOURCES + postinglist.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglist_test_app COMMAND searchlib_postinglist_test_app) diff --git a/searchlib/src/tests/attribute/postinglist/DESC b/searchlib/src/tests/attribute/postinglist/DESC new file mode 100644 index 00000000000..1499e3070fb --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/DESC @@ -0,0 +1 @@ +This is a test for the AttributePostingList class. diff --git a/searchlib/src/tests/attribute/postinglist/FILES b/searchlib/src/tests/attribute/postinglist/FILES new file mode 100644 index 00000000000..268f6c09f1e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/FILES @@ -0,0 +1 @@ +postinglist.cpp diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp new file mode 100644 index 00000000000..ab95ce27a0e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -0,0 +1,707 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("postinglist_test"); +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +using vespalib::GenerationHandler; + +/* + * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux. + */ + +class AttributePostingListTest : public vespalib::TestApp +{ +private: + /* Limited STL version for validation of full version */ + typedef std::set STLPostingList; + typedef std::map STLValueTree; + + class RandomValue + { + public: + uint32_t _docId; + int _value; + uint32_t _order; + + RandomValue(void) + : _docId(0), + _value(0u), + _order(0u) + { + } + + RandomValue(uint32_t docId, uint32_t value, uint32_t order) + : _docId(docId), + _value(value), + _order(order) + { + } + + bool + operator<(const RandomValue &rhs) const + { + return (_value < rhs._value || + (_value == rhs._value && + (_docId < rhs._docId || + (_docId == rhs._docId && + _order < rhs._order)))); + } + + bool + operator>(const RandomValue &rhs) const + { + return (_value > rhs._value || + (_value == rhs._value && + (_docId > rhs._docId || + (_docId == rhs._docId && + _order > rhs._order)))); + } + + bool + operator==(const RandomValue &rhs) const + { + return (_value == rhs._value && + _docId == rhs._docId && + _order == rhs._order); + } + }; + + class CompareOrder + { + public: + bool + operator()(const RandomValue &a, const RandomValue &b) + { + return (a._order < b._order || + (a._order == b._order && + (a._value < b._value || + (a._value == b._value && + a._docId < b._docId)))); + } + }; + std::vector _randomValues; + +public: + typedef btree::DataStore IntKeyStore; + typedef btree::BTreeKeyData + AttributePosting; + typedef btree::BTreeStore, + btree::BTreeDefaultTraits> + PostingList; + typedef PostingList::NodeAllocatorType PostingListNodeAllocator; + typedef btree::EntryRef PostingIdx; + typedef btree::EntryRef StoreIndex; + + class IntComp { + private: + const IntKeyStore & _store; + int _value; + int getValue(const StoreIndex & idx) const { + if (idx.valid()) { + return _store.getEntry(idx); + } + return _value; + } + public: + IntComp(const IntKeyStore & store) : _store(store), _value(0) {} + IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {} + bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const { + return getValue(lhs) < getValue(rhs); + } + }; + + typedef btree::BTreeRoot IntEnumTree; + typedef IntEnumTree::NodeAllocatorType IntEnumNodeAllocator; + typedef IntEnumTree Tree; + typedef IntEnumNodeAllocator TreeManager; + typedef IntKeyStore ValueHandle; + typedef std::vector RandomValuesVector; +private: + GenerationHandler _handler; + IntKeyStore *_intKeyStore; + IntEnumNodeAllocator *_intNodeAlloc; + IntEnumTree *_intTree; + PostingList *_intPostings; + STLValueTree *_stlTree; + + Rand48 _randomGenerator; + uint32_t _generation; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count, + unsigned int mvcount); + + void + insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + sortRandomValues(void); + + void + doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle); + + void + doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + AttributePostingListTest(void) + : vespalib::TestApp(), + _randomValues(), + _handler(), + _intKeyStore(NULL), + _intNodeAlloc(NULL), + _intTree(NULL), + _intPostings(NULL), + _stlTree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +AttributePostingListTest::allocTree(void) +{ + _intKeyStore = new IntKeyStore; + _intNodeAlloc = new IntEnumNodeAllocator(); + _intTree = new IntEnumTree(); + _intPostings = new PostingList(); + _stlTree = new STLValueTree; +} + + +void +AttributePostingListTest::freeTree(bool verbose) +{ + (void) verbose; + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %zu leaves", + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()), + _intTree->size(*_intNodeAlloc)); + _intTree->clear(*_intNodeAlloc); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + _intNodeAlloc->freeze(); + _intPostings->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intPostings->clearBuilder(); + _intPostings->transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); + _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration()); + _intPostings->trimHoldLists(_handler.getFirstUsedGeneration()); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + delete _stlTree; + _stlTree = NULL; + delete _intTree; + _intTree = NULL; + delete _intNodeAlloc; + _intNodeAlloc = NULL; + delete _intKeyStore; + _intKeyStore = NULL; + delete _intPostings; + _intPostings = NULL; +} + + +void +AttributePostingListTest:: +fillRandomValues(unsigned int count, + unsigned int mvcount) +{ + unsigned int i; + unsigned int j; + unsigned int mv; + unsigned int mvmax; + unsigned int mvcount2; + unsigned int mvcount3; + + mvmax = 100; + mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2; + LOG(info, + "Filling %u+%u random values", count, mvcount2); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i _value)); + if (!itr.valid()) { +#if 0 + if (valueHandle.needResize()) + doCompactEnumStore(tree, treeMgr, valueHandle); +#endif + StoreIndex idx = valueHandle.addEntry(i->_value); + if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) { + itr = tree.find(idx, treeMgr, IntComp(valueHandle)); + } + } else { + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey())); + + /* TODO: Insert docid to postinglist */ + PostingIdx oldIdx = itr.getData(); + PostingIdx newIdx = oldIdx; + AttributePosting newPosting(i->_docId, + btree::BTreeNoLeafData()); + std::vector additions; + std::vector removals; + additions.push_back(newPosting); + postings.apply(newIdx, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + if (it == stlTree->end()) { + std::pair ir = + stlTree->insert(std::make_pair(i->_value, + STLPostingList())); + ASSERT_TRUE(ir.second && ir.first != stlTree->end() && + ir.first->first == i->_value); + it = ir.first; + } + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + it->second.insert(i->_docId); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "insertRandomValues done"); +} + + +void +AttributePostingListTest:: +removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "removeRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + PostingIdx newIdx; + /* + * TODO: Remove docid from postinglist, and only remove + * value from tree if postinglist is empty + */ + if (itr.valid()) { + PostingIdx oldIdx = itr.getData(); + newIdx = oldIdx; + std::vector additions; + std::vector removals; + removals.push_back(i->_docId); + postings.apply(newIdx, &additions[0], &additions[0]+additions.size(), + &removals[0], &removals[0] + removals.size()); + if (newIdx != oldIdx) { + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + } + if (!newIdx.valid()) { + if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) { + itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + } + } + } + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + STLPostingList::iterator it2; + it2 = it->second.find(i->_docId); + ASSERT_TRUE(it2 != it->second.end() && + *it2 == i->_docId); + it->second.erase(it2); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "removeRandomValues done"); +} + + +void +AttributePostingListTest:: +lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "lookupRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + ASSERT_TRUE(itr.valid() && + valueHandle.getEntry(itr.getKey()) == i->_value); + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(itr.getData()); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(itr.getData()); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + LOG(info, "lookupRandomValues done"); +} + + +void +AttributePostingListTest::doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle) +{ + LOG(info, + "doCompactEnumStore start"); + + Tree::Iterator i = tree.begin(treeMgr); + + uint32_t numBuffers = valueHandle.getNumBuffers(); + std::vector toHold; + + for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) { + btree::BufferState &state = valueHandle.getBufferState(bufferId); + if (state._state == btree::BufferState::ACTIVE) { + toHold.push_back(bufferId); + // Freelists already disabled due to variable sized data + } + } + valueHandle.switchActiveBuffer(0, 0u); + + for (; i.valid(); ++i) + { + StoreIndex ov = i.getKey(); + StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov)); + + std::atomic_thread_fence(std::memory_order_release); + i.writeKey(nv); + } + typedef GenerationHandler::generation_t generation_t; + for (std::vector::const_iterator + it = toHold.begin(), ite = toHold.end(); it != ite; ++it) { + valueHandle.holdBuffer(*it); + } + generation_t generation = _handler.getCurrentGeneration(); + valueHandle.transferHoldLists(generation); + _handler.incGeneration(); + valueHandle.trimHoldLists(_handler.getFirstUsedGeneration()); + + LOG(info, + "doCompactEnumStore done"); +} + + +void +AttributePostingListTest:: +doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + LOG(info, + "doCompactPostingList start"); + +#if 0 + Tree::Iterator i(tree.begin(treeMgr)); + + postings.performCompaction(i, capacityNeeded); +#else + (void) tree; + (void) treeMgr; + (void) postings; + (void) postingsAlloc; +#endif + + LOG(info, + "doCompactPostingList done"); +} + + +void +AttributePostingListTest:: +bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.freeze(); + postingsAlloc.transferHoldLists(_handler.getCurrentGeneration()); + postings.transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); +} + +void +AttributePostingListTest:: +removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration()); + postings.trimHoldLists(_handler.getFirstUsedGeneration()); +} + +int +AttributePostingListTest::Main() +{ + TEST_INIT("postinglist_test"); + + fillRandomValues(1000, 10); + + allocTree(); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + _intNodeAlloc->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); + removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + freeTree(true); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributePostingListTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/.gitignore b/searchlib/src/tests/attribute/postinglistattribute/.gitignore new file mode 100644 index 00000000000..9614cdd7626 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglistattribute_test +searchlib_postinglistattribute_test_app diff --git a/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt new file mode 100644 index 00000000000..77d137c7b6e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglistattribute_test_app + SOURCES + postinglistattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglistattribute_test_app COMMAND sh postinglistattribute_test.sh) diff --git a/searchlib/src/tests/attribute/postinglistattribute/DESC b/searchlib/src/tests/attribute/postinglistattribute/DESC new file mode 100644 index 00000000000..04c97a729a0 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/DESC @@ -0,0 +1 @@ +Unit tests for subclasses of PostingListAttribute. diff --git a/searchlib/src/tests/attribute/postinglistattribute/FILES b/searchlib/src/tests/attribute/postinglistattribute/FILES new file mode 100644 index 00000000000..56029570a21 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/FILES @@ -0,0 +1 @@ +postinglistattribute.cpp diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp new file mode 100644 index 00000000000..5e248dc8758 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -0,0 +1,1021 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("postinglistattribute_test"); +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using std::shared_ptr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + +namespace search { + +using attribute::CollectionType; +using attribute::BasicType; +using attribute::Config; +using queryeval::PostingInfo; +using queryeval::MinMaxPostingInfo; +using search::fef::TermFieldMatchData; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr SearchContextPtr; +typedef std::unique_ptr SearchBasePtr; + +void +toStr(std::stringstream &ss, SearchIterator &it) +{ + it.initFullRange(); + it.seek(1u); + bool first = true; + while ( !it.isAtEnd()) { + if (first) + first = false; + else + ss << ","; + ss << it.getDocId(); + it.seek(it.getDocId() + 1); + } +} + + +bool +assertIterator(const std::string &exp, SearchIterator &it) +{ + std::stringstream ss; + toStr(ss, it); + if (!EXPECT_EQUAL(exp, ss.str())) + return false; + return true; +} + + +class PostingListAttributeTest : public vespalib::TestApp +{ +private: + typedef IntegerAttribute::largeint_t largeint_t; + typedef AttributeVector::SP AttributePtr; + typedef std::set DocSet; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute > > + Int32PostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute >, + multivalue::MVMTemplateArg< + multivalue::Value, + multivalue::Index32> > Int32ArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue, + multivalue::Index32> > Int32WsetPostingListAttribute; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute > > + FloatPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute >, + multivalue::MVMTemplateArg< + multivalue::Value, + multivalue::Index32> > FloatArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue, + multivalue::Index32> > FloatWsetPostingListAttribute; + + typedef SingleValueStringPostingAttribute StringPostingListAttribute; + typedef ArrayStringPostingAttribute StringArrayPostingListAttribute; + typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute; + + template + void + populate(VectorType &v); + + template + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + void + buildTermQuery(std::vector & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template + SearchContextPtr + getSearch(const V & vec); + + template + SearchContextPtr + getSearch2(const V & vec); + + bool + assertSearch(const std::string &exp, StringAttribute &sa); + + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + + template + void checkPostingList(const VectorType & vec, const std::vector & values, const Range & range); + + template + void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector & values); + void testPostingList(); + + template + void checkPostingList(AttributeType & vec, ValueType value, DocSet expected); + template + void checkNonExistantPostingList(AttributeType & vec, ValueType value); + template + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template + void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value); + void testReload(); + + template + void + testMinMax(AttributePtr &ptr1, uint32_t trimmed); + + template + void + testMinMax(AttributePtr &ptr1, AttributePtr &ptr2); + + void + testMinMax(void); + + void + testStringFold(void); +public: + int Main(); +}; + +template <> +void +PostingListAttributeTest::populate(IntegerAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 12); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, -42, 3); + } else { + v.update(i, -42); + } + v.commit(); + } + v.commit(); +} + +template <> +void +PostingListAttributeTest::populate(StringAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 12); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, "foo", 3); + } else { + v.update(i, "foo"); + } + v.commit(); + } +} + + +template +VectorType & +PostingListAttributeTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +PostingListAttributeTest::asInt(AttributePtr &v) +{ + return as(v); +} + + +StringAttribute & +PostingListAttributeTest::asString(AttributePtr &v) +{ + return as(v); +} + + +void +PostingListAttributeTest::buildTermQuery(std::vector &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template +SearchContextPtr +PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch(const IntegerAttribute &v) +{ + return getSearch(v, "[-42;-42]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch(const StringAttribute &v) +{ + return getSearch + (v, "foo", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2(const IntegerAttribute &v) +{ + return getSearch(v, "[-43;-43]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2(const StringAttribute &v) +{ + return getSearch + (v, "bar", false); +} + + +bool +PostingListAttributeTest::assertSearch(const std::string &exp, + StringAttribute &sa) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch(sa); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + if (!EXPECT_TRUE(assertIterator(exp, *sb))) + return false; + return true; +} + + +void +PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + uint32_t doc; + ASSERT_TRUE(ptr->addDoc(doc)); + ASSERT_TRUE(doc == i); + ASSERT_TRUE(ptr->getNumDocs() == i + 1); + } + ASSERT_TRUE(ptr->getNumDocs() == numDocs); +} + +class RangeAlpha { +private: + uint32_t _part; +public: + RangeAlpha(uint32_t part) : _part(part) { } + uint32_t getBegin(uint32_t i) const { return i * _part; } + uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; } +}; + +class RangeBeta { +private: + uint32_t _part; + uint32_t _numValues; +public: + RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { } + uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; } + uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; } +}; + +template +void +PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector & values, + const RangeGenerator & range) +{ + const typename VectorType::EnumStore & enumStore = vec.getEnumStore(); + const typename VectorType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename VectorType::PostingList & postingList = vec.getPostingList(); + + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docBegin = range.getBegin(i); + uint32_t docEnd = range.getEnd(i); + + typename VectorType::DictionaryIterator itr = + dict.find(typename VectorType::EnumIndex(), + typename VectorType::ComparatorType(enumStore, values[i])); + ASSERT_TRUE(itr.valid()); + + typename VectorType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + uint32_t doc = docBegin; + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(doc++, postings.getKey()); + } + EXPECT_EQUAL(doc, docEnd); + } +} + +template +void +PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector & values) +{ + LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast(*ptr1.get()); + VectorType & vec2 = static_cast(*ptr2.get()); + addDocs(ptr1, numDocs); + + uint32_t part = numDocs / values.size(); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = doc / part; + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + +#if 0 + std::cout << "***** printBuffer 0 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 0); + std::cout << "***** printBuffer 1 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 1); + std::cout << "***** printCurrentContent ***** " << std::endl; + vec1.getEnumStore().printCurrentContent(std::cout); + std::cout << "***** printPostingListContent *****" << std::endl; + vec1.printPostingListContent(std::cout); +#endif + + // check posting list for correct content + checkPostingList(vec1, values, RangeAlpha(part)); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); +#if 0 + std::cout << "***** vec2.printPostingListContent *****" << std::endl; + vec2.printPostingListContent(std::cout); +#endif + checkPostingList(vec2, values, RangeAlpha(part)); + + // insert values in another order + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = values.size() - 1 - (doc / part); + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + + // check posting list again for correct content + checkPostingList(vec1, values, RangeBeta(part, values.size())); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); + checkPostingList(vec2, values, RangeBeta(part, values.size())); +} + +void +PostingListAttributeTest::testPostingList() +{ + uint32_t numDocs = 1000; + uint32_t numValues = 50; + + { // IntegerAttribute + std::vector values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + } + + { // FloatingPointAttribute + std::vector values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg); + testPostingList(ptr1, ptr2, numDocs, values); + } + } + + { // StringAttribute + std::vector values; + std::vector charValues; + values.reserve(numValues); + charValues.reserve(numValues); + values.push_back(""); + charValues.push_back(values.back().c_str()); + for (uint32_t i = 1; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << i; + values.push_back(ss.str()); + charValues.push_back(values.back().c_str()); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testPostingList(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg); + testPostingList(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testPostingList(ptr1, ptr2, numDocs, charValues); + } + } +} + +template +void +PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected) +{ + const typename AttributeType::EnumStore & enumStore = vec.getEnumStore(); + const typename AttributeType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename AttributeType::PostingList & postingList = vec.getPostingList(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + ASSERT_TRUE(itr.valid()); + + typename AttributeType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + DocSet::iterator docBegin = expected.begin(); + DocSet::iterator docEnd = expected.end(); + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(*docBegin++, postings.getKey()); + } + EXPECT_TRUE(docBegin == docEnd); +} + +template +void +PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value) +{ + const typename AttributeType::Dictionary & dict = + vec.getEnumStore().getPostingDictionary(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + EXPECT_TRUE(!itr.valid()); +} + +template +void +PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + AttributeType & vec = static_cast(*ptr.get()); + + addDocs(ptr, 4); + + uint32_t allDocs[] = {0, 1, 2, 3}; + checkNonExistantPostingList(vec, 0); + + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + ptr->commit(); + + checkNonExistantPostingList(vec, 0); + checkPostingList(vec, 100, DocSet(allDocs, allDocs + 4)); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + { + uint32_t docs[] = {0}; + checkPostingList(vec, 110, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList(vec, 90, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList(vec, 1000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList(vec, 10, DocSet(docs, docs + 1)); + } + + + // several inside a single commit + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 2000)); + } + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + vespalib::asciistream ss; + vec.printPostingListContent(ss); + std::cout << ss.str(); + { + uint32_t docs[] = {0}; + checkPostingList(vec, 2020, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList(vec, 1980, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList(vec, 200000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList(vec, 20, DocSet(docs, docs + 1)); + } + checkNonExistantPostingList(vec, 100); + checkNonExistantPostingList(vec, 110); + checkNonExistantPostingList(vec, 90); + checkNonExistantPostingList(vec, 1000); + checkNonExistantPostingList(vec, 10); + checkNonExistantPostingList(vec, 2000); + checkNonExistantPostingList(vec, 2010); + checkNonExistantPostingList(vec, 1990); + checkNonExistantPostingList(vec, 20000); + checkNonExistantPostingList(vec, 200); +} + +void +PostingListAttributeTest::testArithmeticValueUpdate() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg); + testArithmeticValueUpdate(ptr); + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg); + testArithmeticValueUpdate(ptr); + } +} + + +template +void +PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value) +{ + LOG(info, "testReload: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast(*ptr1.get()); + + addDocs(ptr1, 5); + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(vec1.update(doc, value)); + } + ptr1->commit(); + + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + + EXPECT_TRUE(ptr2->getNumDocs() == 5); + ValueType buffer[1]; + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1); + EXPECT_EQUAL(buffer[0], value); + } +} + +void +PostingListAttributeTest::testReload() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload(ptr1, ptr2, 0); + } + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload(ptr1, ptr2, 0); + } + } + + { // StringAttribute + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload(ptr1, ptr2, "unique"); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload(ptr1, ptr2, ""); + } + } +} + +template +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch(as(ptr1)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + + const PostingInfo *pi = sb->getPostingInfo(); + ASSERT_TRUE(pi != NULL); + const MinMaxPostingInfo *mmpi = + dynamic_cast(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 2u) { + EXPECT_EQUAL(3, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(-3, mmpi->getMinWeight()); + } + EXPECT_EQUAL(3, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + + sb->seek(1u); + EXPECT_EQUAL(1u, sb->getDocId()); + + sc = getSearch2(as(ptr1)); + sc->fetchPostings(true); + sb = sc->createIterator(&md, true); + sb->initFullRange(); + + pi = sb->getPostingInfo(); + if (trimmed == 2) { + ASSERT_TRUE(pi == NULL); + } else { + ASSERT_TRUE(pi != NULL); + mmpi = dynamic_cast(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 0) { + EXPECT_EQUAL(12, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(14, mmpi->getMinWeight()); + } + EXPECT_EQUAL(14, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + } + + sb->seek(1u); + if (trimmed == 2u) { + EXPECT_TRUE(sb->isAtEnd()); + } else { + EXPECT_EQUAL(7u, sb->getDocId()); + } +} + +template +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2) +{ + uint32_t numDocs = 100; + addDocs(ptr1, numDocs); + populate(as(ptr1)); + + TEST_DO(testMinMax(ptr1, 0u)); + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + testMinMax(ptr2, 0u); + + ptr2->clearDoc(20); + ptr2->clearDoc(25); + ptr2->commit(); + TEST_DO(testMinMax(ptr2, 1u)); + + ptr2->clearDoc(7); + ptr2->commit(); + TEST_DO(testMinMax(ptr2, 2u)); + +} + +void +PostingListAttributeTest::testMinMax(void) +{ + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testMinMax(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = + AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = + AttributeFactory::createAttribute("wsint32_2", cfg); + testMinMax(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testMinMax(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testMinMax(ptr1, ptr2); + } +} + + +void +PostingListAttributeTest::testStringFold(void) +{ + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + + addDocs(ptr1, 6); + + StringAttribute &sa(asString(ptr1)); + + sa.update(1, "a"); + sa.commit(); + sa.update(3, "FOo"); + sa.commit(); + sa.update(4, "foo"); + sa.commit(); + sa.update(5, "z"); + sa.commit(); + + EXPECT_TRUE(assertSearch("3,4", sa)); + + sa.update(2, "FOO"); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3,4", sa)); + + sa.update(4, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3", sa)); + + sa.update(2, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("3", sa)); + + sa.update(3, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("", sa)); +} + + +int +PostingListAttributeTest::Main() +{ + TEST_INIT("postinglistattribute_test"); + + testPostingList(); + testArithmeticValueUpdate(); + testReload(); + testMinMax(); + testStringFold(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::PostingListAttributeTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh new file mode 100755 index 00000000000..e6f9c214cb9 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_postinglistattribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/runnable.h b/searchlib/src/tests/attribute/runnable.h new file mode 100644 index 00000000000..418230a2fc5 --- /dev/null +++ b/searchlib/src/tests/attribute/runnable.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +class Runnable : public FastOS_Runnable +{ +protected: + uint32_t _id; + vespalib::Monitor _cond; + bool _done; + bool _stopped; + +public: + Runnable(uint32_t id) : + _id(id), _cond(), _done(false), _stopped(false) + { } + void Run(FastOS_ThreadInterface *, void *) { + doRun(); + + vespalib::MonitorGuard guard(_cond); + _stopped = true; + guard.broadcast(); + } + virtual void doRun() = 0; + void stop() { + vespalib::MonitorGuard guard(_cond); + _done = true; + } + void join() { + vespalib::MonitorGuard guard(_cond); + while (!_stopped) { + guard.wait(); + } + } +}; + +} // search + diff --git a/searchlib/src/tests/attribute/searchable/.gitignore b/searchlib/src/tests/attribute/searchable/.gitignore new file mode 100644 index 00000000000..663692907f6 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/.gitignore @@ -0,0 +1,4 @@ +/my_logctl_file +searchlib_attribute_blueprint_test_app +searchlib_attribute_searchable_adapter_test_app +searchlib_attribute_weighted_set_blueprint_test_app diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt new file mode 100644 index 00000000000..ed76520af29 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_searchable_adapter_test_app + SOURCES + attribute_searchable_adapter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_searchable_adapter_test_app COMMAND sh attribute_searchable_adapter_test.sh) +vespa_add_executable(searchlib_attribute_weighted_set_blueprint_test_app + SOURCES + attribute_weighted_set_blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_weighted_set_blueprint_test_app COMMAND searchlib_attribute_weighted_set_blueprint_test_app) +vespa_add_executable(searchlib_attribute_blueprint_test_app + SOURCES + attributeblueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp new file mode 100644 index 00000000000..1d69f516b52 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -0,0 +1,689 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::AttributeEnumGuard; +using search::AttributeFactory; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::IntegerAttribute; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::PredicateQueryTerm; +using search::query::Rectangle; +using search::query::SimpleDotProduct; +using search::query::SimpleLocationTerm; +using search::query::SimplePredicateQuery; +using search::query::SimplePrefixTerm; +using search::query::SimpleRangeTerm; +using search::query::SimpleSuffixTerm; +using search::query::SimpleSubstringTerm; +using search::query::SimpleStringTerm; +using search::query::SimpleWandTerm; +using search::query::SimpleWeightedSetTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::FakeRequestContext; +using search::queryeval::MinMaxPostingInfo; +using search::queryeval::ParallelWeakAndSearch; +using search::queryeval::PostingInfo; +using search::queryeval::SearchIterator; +using std::vector; +using vespalib::string; +using namespace search::attribute; +using namespace search; + +namespace { + +const string field = "field"; +const string other = "other"; +const int32_t weight = 1; +const uint32_t num_docs = 1000; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::SP _other; + +public: + explicit MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr), _other() {} + + explicit MyAttributeManager(AttributeVector::SP attr) + : _attribute_vector(attr), _other() {} + + void set_other(AttributeVector::SP attr) { + _other = attr; + } + + virtual AttributeGuard::UP getAttribute(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeEnumGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual void getAttributeList(vector &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +struct Result { + struct Hit { + uint32_t docid; + double raw_score; + int32_t match_weight; + Hit(uint32_t id, double raw, int32_t match_weight_in) + : docid(id), raw_score(raw), match_weight(match_weight_in) {} + }; + size_t est_hits; + bool est_empty; + bool has_minmax; + int32_t min_weight; + int32_t max_weight; + size_t wand_hits; + int64_t wand_initial_threshold; + double wand_boost_factor; + std::vector hits; + vespalib::string iterator_dump; + + Result(size_t est_hits_in, bool est_empty_in) + : est_hits(est_hits_in), est_empty(est_empty_in), + has_minmax(false), min_weight(0), max_weight(0), + wand_hits(0), wand_initial_threshold(0), wand_boost_factor(0.0), + hits(), iterator_dump() {} + + void set_minmax(int32_t min, int32_t max) { + has_minmax = true; + min_weight = min; + max_weight = max; + } +}; + +void extract_posting_info(Result &result, const PostingInfo *postingInfo) { + if (postingInfo != NULL) { + const MinMaxPostingInfo *minMax = dynamic_cast(postingInfo); + if (minMax != NULL) { + result.set_minmax(minMax->getMinWeight(), minMax->getMaxWeight()); + } + } +} + +void extract_wand_params(Result &result, ParallelWeakAndSearch *wand) { + if (wand != nullptr) { + result.wand_hits = wand->getMatchParams().scores.getScoresToTrack(); + result.wand_initial_threshold = wand->getMatchParams().scoreThreshold; + result.wand_boost_factor = wand->getMatchParams().thresholdBoostFactor; + } +} + +Result do_search(IAttributeManager &attribute_manager, const Node &node, bool strict) { + uint32_t fieldId = 0; + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + AttributeBlueprintFactory source; + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + Blueprint::UP bp = source.createBlueprint(requestContext, FieldSpec(field, fieldId, handle), node); + ASSERT_TRUE(bp.get() != nullptr); + Result result(bp->getState().estimate().estHits, bp->getState().estimate().empty); + bp->fetchPostings(strict); + SearchIterator::UP iterator = bp->createSearch(*match_data, strict); + ASSERT_TRUE(iterator.get() != nullptr); + iterator->initFullRange(); + extract_posting_info(result, iterator->getPostingInfo()); + extract_wand_params(result, dynamic_cast(iterator.get())); + result.iterator_dump = iterator->asString(); + for (uint32_t docid = 1; docid < num_docs; ++docid) { + if (iterator->seek(docid)) { + iterator->unpack(docid); + result.hits.emplace_back(docid, + match_data->resolveTermField(handle)->getRawScore(), + match_data->resolveTermField(handle)->getWeight()); + } + } + return result; +} + +bool search(const Node &node, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_LESS(result.est_hits, num_docs / 10); + } else { + EXPECT_TRUE(!result.est_empty); + EXPECT_EQUAL(num_docs, result.est_hits); + } + return (result.hits.size() == 1) && (result.hits[0].docid == (num_docs - 1)); +} + +bool search(const string &term, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + return search(node, attribute_manager, fast_search, strict); +} + +template struct AttributeVectorTypeFinder { + //typedef search::SingleValueStringAttribute Type; + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder { + typedef search::SingleValueNumericAttribute > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +void add_docs(AttributeVector *attr, size_t n) { + AttributeVector::DocId docid; + for (size_t i = 0; i < n; ++i) { + attr->addDoc(docid); + if (attr->inherits(PredicateAttribute::classId)) { + const_cast(static_cast(attr)->getMinFeatureVector().first)[docid] = 0; + } + } + ASSERT_EQUAL(n - 1, docid); +} + +template +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + add_docs(attr, num_docs); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +MyAttributeManager makeFastSearchLongAttributeManager(int64_t value) { + Config cfg(BasicType::INT64, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + IntegerAttribute *attr = static_cast(attr_ptr.get()); + add_docs(attr, num_docs); + attr->update(num_docs - 1, value); + attr->commit(); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("requireThatIteratorsCanBeCreated") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +TEST("requireThatRangeTermsWorkToo") { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +TEST("requireThatPrefixTermsWork") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatOptimizedLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); +} + +TEST("require that optimized location search works with wrapped bounding box (no hits)") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + SimpleLocationTerm term1(Location(Rectangle(5, 5, 15, 15)), field, 0, Weight(0)); // unwrapped + SimpleLocationTerm term2(Location(Rectangle(15, 5, 5, 15)), field, 0, Weight(0)); // wrapped x + SimpleLocationTerm term3(Location(Rectangle(5, 15, 15, 5)), field, 0, Weight(0)); // wrapped y + Result result1 = do_search(attribute_manager, term1, true); + Result result2 = do_search(attribute_manager, term2, true); + Result result3 = do_search(attribute_manager, term3, true); + EXPECT_EQUAL(1u, result1.hits.size()); + EXPECT_EQUAL(0u, result2.hits.size()); + EXPECT_EQUAL(0u, result3.hits.size()); + EXPECT_TRUE(result1.iterator_dump.find("LocationPreFilterIterator") != vespalib::string::npos); + EXPECT_TRUE(result2.iterator_dump.find("EmptySearch") != vespalib::string::npos); + EXPECT_TRUE(result3.iterator_dump.find("EmptySearch") != vespalib::string::npos); +} + +void set_weights(StringAttribute *attr, uint32_t docid, + int32_t foo_weight, int32_t bar_weight, int32_t baz_weight) +{ + attr->clearDoc(docid); + if (foo_weight > 0) attr->append(docid, "foo", foo_weight); + if (bar_weight > 0) attr->append(docid, "bar", bar_weight); + if (baz_weight > 0) attr->append(docid, "baz", baz_weight); + attr->commit(); +} + +MyAttributeManager make_weighted_string_attribute_manager(bool fast_search) { + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(fast_search); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + StringAttribute *attr = static_cast(attr_ptr.get()); + add_docs(attr, num_docs); + set_weights(attr, 10, 0, 200, 0); + set_weights(attr, 20, 100, 200, 300); + set_weights(attr, 30, 0, 0, 300); + set_weights(attr, 40, 100, 0, 0); + set_weights(attr, 50, 1000, 0, 300); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("require that attribute dot product works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(200.0, result.hits[0].raw_score); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(600.0, result.hits[1].raw_score); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(300.0, result.hits[2].raw_score); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(100.0, result.hits[3].raw_score); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(1300.0, result.hits[4].raw_score); + } +} + +TEST("require that attribute dot product can produce no hits") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(0u, result.hits.size()); + EXPECT_EQUAL(0u, result.est_hits); + EXPECT_TRUE(result.est_empty); + } +} + +TEST("require that direct attribute iterators work") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleStringTerm empty_node("notfoo", "", 0, Weight(1)); + Result empty_result = do_search(attribute_manager, empty_node, strict); + EXPECT_EQUAL(0u, empty_result.hits.size()); + SimpleStringTerm node("foo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_EQUAL(3u, result.est_hits); + EXPECT_TRUE(result.has_minmax); + EXPECT_EQUAL(100, result.min_weight); + EXPECT_EQUAL(1000, result.max_weight); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + != vespalib::string::npos); + } else { + EXPECT_EQUAL(num_docs, result.est_hits); + EXPECT_FALSE(result.has_minmax); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + == vespalib::string::npos); + } + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); + } +} + +const char *as_str(bool flag) { return flag? "true" : "false"; } + +TEST("require that attribute parallel wand works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + if (EXPECT_EQUAL(2u, result.hits.size())) { + if (result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + EXPECT_EQUAL(10u, result.wand_hits); + EXPECT_EQUAL(500, result.wand_initial_threshold); + EXPECT_EQUAL(1.5, result.wand_boost_factor); + } + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(600.0, result.hits[0].raw_score); + EXPECT_EQUAL(50u, result.hits[1].docid); + EXPECT_EQUAL(1300.0, result.hits[1].raw_score); + } else { + fprintf(stderr, " (fast_search: %s, strict: %s)\n", + as_str(fast_search), as_str(strict)); + assert(false); + } + } +} + +TEST("require that attribute weighted set term works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWeightedSetTerm node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str()); + EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos); + } + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20, result.hits[0].match_weight); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30, result.hits[1].match_weight); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(30, result.hits[2].match_weight); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(10, result.hits[3].match_weight); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(30, result.hits[4].match_weight); + } +} + +TEST("require that predicate query in non-predicate field yields empty.") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_TRUE(result.est_empty); + EXPECT_EQUAL(0u, result.hits.size()); +} + +TEST("require that predicate query in predicate field yields results.") { + PredicateAttribute *attr = + new PredicateAttribute( + field, Config(BasicType::PREDICATE, + CollectionType::SINGLE)); + add_docs(attr, num_docs); + attr->getIndex().indexEmptyDocument(2); // matches anything + attr->getIndex().commit(); + const_cast(attr->getIntervalRangeVector())[2] = 1u; + MyAttributeManager attribute_manager(attr); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(1u, result.hits.size()); +} + +TEST("require that substring terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSubstringTerm node("a", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(4u, result.hits.size()); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(50u, result.hits[3].docid); +} + +TEST("require that suffix terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSuffixTerm node("oo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); +} + +void set_attr_value(AttributeVector &attr, uint32_t docid, size_t value) { + IntegerAttribute *int_attr = dynamic_cast(&attr); + FloatingPointAttribute *float_attr = dynamic_cast(&attr); + StringAttribute *string_attr = dynamic_cast(&attr); + if (int_attr != nullptr) { + int_attr->update(docid, value); + int_attr->commit(); + } else if (float_attr != nullptr) { + float_attr->update(docid, value); + float_attr->commit(); + } else if (string_attr != nullptr) { + ASSERT_LESS(value, size_t(27*26 + 26)); + vespalib::string str; + str.push_back('a' + value / 27); + str.push_back('a' + value % 27); + string_attr->update(docid, str); + string_attr->commit(); + } else { + ASSERT_TRUE(false); + } +} + +MyAttributeManager make_diversity_setup(BasicType::Type field_type, + bool field_fast_search, + BasicType::Type other_type, + bool other_fast_search) +{ + Config field_cfg(field_type, CollectionType::SINGLE); + field_cfg.setFastSearch(field_fast_search); + AttributeVector::SP field_attr = AttributeFactory::createAttribute(field, field_cfg); + Config other_cfg(other_type, CollectionType::SINGLE); + other_cfg.setFastSearch(other_fast_search); + AttributeVector::SP other_attr = AttributeFactory::createAttribute(other, other_cfg); + add_docs(&*field_attr, num_docs); + add_docs(&*other_attr, num_docs); + for (size_t i = 1; i < num_docs; ++i) { + set_attr_value(*field_attr, i, i / 5); + set_attr_value(*other_attr, i, i / 10); + } + MyAttributeManager attribute_manager(field_attr); + attribute_manager.set_other(other_attr); + return attribute_manager; +} + +size_t diversity_hits(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + return result.hits.size(); +} + +std::pair diversity_docid_range(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + std::pair range(0, 0); + for (const Result::Hit &hit: result.hits) { + if (range.first == 0) { + range.first = hit.docid; + range.second = hit.docid; + } else { + EXPECT_GREATER(size_t(hit.docid), range.second); + range.second = hit.docid; + } + } + return range; +} + +TEST("require that diversity range searches work for various types") { + for (auto field_type: std::vector({BasicType::INT32, BasicType::DOUBLE})) { + for (auto other_type: std::vector({BasicType::INT16, BasicType::INT32, BasicType::INT64, + BasicType::FLOAT, BasicType::DOUBLE, BasicType::STRING})) + { + for (bool other_fast_search: std::vector({true, false})) { + MyAttributeManager manager = make_diversity_setup(field_type, true, other_type, other_fast_search); + for (bool strict: std::vector({true, false})) { + TEST_STATE(vespalib::make_string("field_type: %s, other_type: %s, other_fast_search: %s, strict: %s", + BasicType(field_type).asString(), BasicType(other_type).asString(), + other_fast_search ? "true" : "false", strict ? "true" : "false").c_str()); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10]", strict)); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;1000;other;1]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;-1000;other;1]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;1000;other;3]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;-1000;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;10;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;-10;other;3]", strict)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3]", strict).first); + EXPECT_EQUAL(30u, diversity_docid_range(manager, "[;;10;other;3]", strict).second); + EXPECT_EQUAL(965u, diversity_docid_range(manager, "[;;-10;other;3]", strict).first); + EXPECT_EQUAL(997u, diversity_docid_range(manager, "[;;-10;other;3]", strict).second); + } + } + } + } +} + +TEST("require that diversity also works for a single unique value") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", false)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", false)); +} + +TEST("require that diversity range searches gives empty results for non-existing diversity attributes") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;;10]", true)); +} + +TEST("require that loose diversity gives enough diversity and hits while doing less work") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10;4;loose]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).first); + EXPECT_EQUAL(16u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).second); +} + +TEST("require that strict diversity gives enough diversity and hits while doing less work, even though more than loose, but more correct than loose") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10;4;strict]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).first); + EXPECT_EQUAL(23u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).second); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh new file mode 100755 index 00000000000..9fcee4b1ebb --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_searchable_adapter_test_sh +rm -f ./my_logctl_file +VESPA_LOG_CONTROL_FILE=./my_logctl_file VESPA_LOG_LEVEL=all $VALGRIND ./searchlib_attribute_searchable_adapter_test_app diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp new file mode 100644 index 00000000000..bd781a37a5b --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -0,0 +1,231 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using namespace search::attribute; + +namespace { + +class FakeAttributeManager : public IAttributeManager +{ +private: + typedef std::map Map; + Map _map; + + AttributeVector::SP lookup(const std::string &name) const { + Map::const_iterator pos = _map.find(name); + if (pos == _map.end()) { + return AttributeVector::SP(); + } + return pos->second; + } + +public: + FakeAttributeManager() : _map() {} + + void addAttribute(AttributeVector::SP attr) { + _map[attr->getName()] = attr; + } + + virtual AttributeGuard::UP getAttribute(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeGuard(lookup(name))); + } + + virtual AttributeGuard::UP getAttributeStableEnum(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeEnumGuard(lookup(name))); + } + + virtual void getAttributeList(std::vector &list) const { + Map::const_iterator pos = _map.begin(); + for (; pos != _map.end(); ++pos) { + list.push_back(pos->second); + } + } + + virtual IAttributeContext::UP createContext() const { + return IAttributeContext::UP(new AttributeContext(*this)); + } +}; + +void +setupAttributeManager(FakeAttributeManager &manager) +{ + AttributeVector::DocId docId; + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "integer", Config(BasicType("int64"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(docId, i); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "string", Config(BasicType("string"))); + StringAttribute *attr = (StringAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(i, std::string(1, '1' + i - 1).c_str()); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "multi", Config(BasicType("int64"), search::attribute::CollectionType("array"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->append(docId, i, 0); + attr->append(docId, i + 10, 1); + attr->commit(); + } + manager.addAttribute(attr_sp); + } +} + +struct WS { + static const uint32_t fieldId = 42; + IAttributeManager & attribute_manager; + MatchDataLayout layout; + TermFieldHandle handle; + std::vector > tokens; + + WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() { + MatchData::UP tmp = layout.createMatchData(); + ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + } + + WS &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + return (dynamic_cast(sb.get()) != 0); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + FakeResult result; + sb->initFullRange(); + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + TermFieldMatchData &data = *md->resolveTermField(handle); + FieldPositionsIterator itr = data.getIterator(); + for (; itr.valid(); itr.next()) { + result.elem(itr.getElementId()); + result.weight(itr.getElementWeight()); + result.pos(itr.getPosition()); + } + } + } + return result; + } +}; + +} // namespace + +class Test : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attribute_weighted_set_test"); + { + FakeAttributeManager manager; + setupAttributeManager(manager); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); + + EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); + } + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp new file mode 100644 index 00000000000..ed851d872e1 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("attributeblueprint_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::AttributeEnumGuard; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::SimpleLocationTerm; +using search::query::SimplePrefixTerm; +using search::query::SimpleStringTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::SearchIterator; +using search::queryeval::FakeRequestContext; +using std::string; +using std::vector; +using namespace search::attribute; +using namespace search; + +namespace { + +class Test : public vespalib::TestApp { + void requireThatIteratorsCanBeCreated(); + void requireThatRangeTermsWorkToo(); + void requireThatPrefixTermsWork(); + void requireThatLocationTermsWork(); + void requireThatFastSearchLocationTermsWork(); + + bool search(const string &term, IAttributeManager &attribute_manager); + bool search(const Node &term, IAttributeManager &attribute_manager); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attributeblueprint_test"); + + TEST_DO(requireThatIteratorsCanBeCreated()); + TEST_DO(requireThatRangeTermsWorkToo()); + TEST_DO(requireThatPrefixTermsWork()); + TEST_DO(requireThatLocationTermsWork()); + TEST_DO(requireThatFastSearchLocationTermsWork()); + + TEST_DONE(); +} + +const string field = "field"; +const int32_t weight = 1; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::DocId _docid; + +public: + MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr) {} + + virtual AttributeGuard::UP getAttribute(const string &) const { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &) const { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } + + virtual void getAttributeList(vector &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +bool Test::search(const string &term, IAttributeManager &attribute_manager) { + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + bool ret = search(node, attribute_manager); + return ret; +} + +bool Test::search(const Node &node, IAttributeManager &attribute_manager) { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + AttributeBlueprintFactory source; + Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node); + ASSERT_TRUE(result.get()); + EXPECT_TRUE(!result->getState().estimate().empty); + EXPECT_EQUAL(3u, result->getState().estimate().estHits); + result->fetchPostings(true); + SearchIterator::UP iterator = result->createSearch(*md, true); + ASSERT_TRUE((bool)iterator); + iterator->initFullRange(); + EXPECT_TRUE(!iterator->seek(1)); + return iterator->seek(2); +} + +template struct AttributeVectorTypeFinder { + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder { + typedef search::SingleValueNumericAttribute > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +struct FastSearchLongAttribute { + typedef search::SingleValueNumericPostingAttribute< search::EnumAttribute > > Type; + static void add(Type & a, int64_t v) { a.update(a.getNumDocs()-1, v); a.commit(); } +}; + +template +MyAttributeManager fill(typename AT::Type * attr, T value) { + AttributeVector::DocId docid; + attr->addDoc(docid); + attr->addDoc(docid); + attr->addDoc(docid); + assert(2u == docid); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +template +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + return fill(attr, value); +} + +MyAttributeManager makeFastSearchLongAttribute(int64_t value) { + typedef FastSearchLongAttribute::Type AttributeVectorType; + Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVectorType *attr = new AttributeVectorType(field, cfg); + return fill(attr, value); +} + +void Test::requireThatIteratorsCanBeCreated() { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +void Test::requireThatRangeTermsWorkToo() { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +void Test::requireThatPrefixTermsWork() +{ + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatFastSearchLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); +#if 0 + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +#endif +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchcontext/.gitignore b/searchlib/src/tests/attribute/searchcontext/.gitignore new file mode 100644 index 00000000000..61dc5e8fc8e --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searchcontext_test +searchlib_searchcontext_test_app diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt new file mode 100644 index 00000000000..24652373a00 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchcontext_test_app + SOURCES + searchcontext.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_searchcontext_test_app COMMAND sh searchcontext_test.sh) diff --git a/searchlib/src/tests/attribute/searchcontext/DESC b/searchlib/src/tests/attribute/searchcontext/DESC new file mode 100644 index 00000000000..8ce9805dbb0 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/DESC @@ -0,0 +1 @@ +Unit test for AttributeVector::SearchContext using all attribute vector implementations. diff --git a/searchlib/src/tests/attribute/searchcontext/FILES b/searchlib/src/tests/attribute/searchcontext/FILES new file mode 100644 index 00000000000..cebd66e863f --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/FILES @@ -0,0 +1 @@ +searchcontext.cpp diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp new file mode 100644 index 00000000000..6c69e79a93b --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp @@ -0,0 +1,1900 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +LOG_SETUP("searchcontext_test"); + +namespace search { + +namespace +{ + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + switch (a.getBasicType()) + { + case attribute::BasicType::UINT1: + case attribute::BasicType::UINT2: + case attribute::BasicType::UINT4: + return true; + default: + return false; + } +} + +} + +typedef AttributeVector::SP AttributePtr; +typedef std::unique_ptr SearchContextPtr; +typedef AttributeVector::SearchContext SearchContext; +using attribute::Config; +using attribute::BasicType; +using attribute::CollectionType; +typedef AttributeVector::largeint_t largeint_t; +typedef queryeval::SearchIterator::UP SearchBasePtr; +typedef std::unique_ptr ResultSetPtr; + +using queryeval::HitCollector; +using queryeval::SearchIterator; +using fef::MatchData; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using fef::TermFieldMatchDataPosition; + +class DocSet : public std::set +{ +public: + DocSet() : std::set() {} + DocSet(const uint32_t *b, const uint32_t *e) : std::set(b, e) {} + DocSet & put(const uint32_t &v) { + insert(v); + return *this; + } +}; + +template +class PostingList +{ +private: + V * _vec; + T _value; + DocSet _hits; + +public: + PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {} + const V & getAttribute() const { return *_vec; } + V & getAttribute() { return *_vec; } + const T & getValue() const { return _value; } + DocSet & getHits() { return _hits; } + const DocSet & getHits() const { return _hits; } + uint32_t getHitCount() const { return _hits.size(); } +}; + +class DocRange +{ +public: + uint32_t start; + uint32_t end; + DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {} +}; + +class SearchContextTest : public vespalib::TestApp +{ +private: + typedef std::map ConfigMap; + // Map of all config objects + ConfigMap _integerCfg; + ConfigMap _floatCfg; + ConfigMap _stringCfg; + + + // helper functions + void + addReservedDoc(AttributeVector &ptr); + + void addDocs(AttributeVector & ptr, uint32_t numDocs); + template + void fillVector(std::vector & values, size_t numValues); + template + void fillAttribute(V & vec, const std::vector & values); + template + void resetAttribute(V & vec, const T & value); + template + void fillPostingList(PostingList & pl, const DocRange & range); + template + void fillPostingList(PostingList & pl); + void buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, + QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template + SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); + template + ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template + void performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); + + template + void testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs); + void testInitRange(); + // test search functionality + template + void testFind(const PostingList & first); + + template + void testSearch(V & attribute, uint32_t numDocs, const std::vector & values); + template + void testSearch(const ConfigMap & cfgs); + template + void testMultiValueSearchHelper(V & vec, const std::vector & values); + template + void testMultiValueSearch(V & first, V & second, const std::vector & values); + void testSearch(); + + class IteratorTester { + public: + virtual bool matches(const SearchIterator & base) const = 0; + virtual ~IteratorTester() { } + }; + class AttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast(&base) != NULL; + } + }; + class FlagAttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return (dynamic_cast(&base) != NULL) || + (dynamic_cast(&base) != NULL) || + (dynamic_cast(&base) != NULL); + } + }; + class AttributePostingListIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast(&base) != NULL || + dynamic_cast(&base) != NULL; + + } + }; + + + // test search iterator functionality + void testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void fillForSearchIteratorTest(IntegerAttribute * ia); + void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia); + void testSearchIterator(); + + + // test search iterator unpacking + void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra); + void testSearchIteratorUnpacking(const AttributePtr & ptr, + SearchContext & sc, + bool extra, + bool strict); + void testSearchIteratorUnpacking(); + + + // test range search + template + void performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected); + template + void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values); + void testRangeSearch(); + void testRangeSearchLimited(); + + + // test case insensitive search + void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected); + void testCaseInsensitiveSearch(const AttributePtr & ptr); + void testCaseInsensitiveSearch(); + void testRegexSearch(const AttributePtr & ptr); + void testRegexSearch(); + + + // test prefix search + void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void testPrefixSearch(const AttributePtr & ptr); + void testPrefixSearch(); + + // test that search is working after clear doc + template + void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, const vespalib::string & term); + void requireThatSearchIsWorkingAfterClearDoc(); + + // test that search is working after load and clear doc + template + void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, ValueType defaultValue, + const vespalib::string & term); + void requireThatSearchIsWorkingAfterLoadAndClearDoc(); + + template + void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2); + void requireThatSearchIsWorkingAfterUpdates(); + + void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded(); + + template + void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value); + void requireThatInvalidSearchTermGivesZeroHits(); + + void requireThatFlagAttributeHandlesTheByteRange(); + + void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue); + void requireThatOutOfBoundsSearchTermGivesZeroHits(); + + // init maps with config objects + void initIntegerConfig(); + void initFloatConfig(); + void initStringConfig(); + +public: + SearchContextTest(); + int Main(); +}; + + +void +SearchContextTest::addReservedDoc(AttributeVector &ptr) +{ + ptr.addReservedDoc(); +} + + +void +SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs) +{ + uint32_t docId; + addReservedDoc(ptr); + for (uint32_t i = 1; i <= numDocs; ++i) { + ptr.addDoc(docId); + EXPECT_EQUAL(docId, i); + } + ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1); +} + +template +void +SearchContextTest::fillVector(std::vector & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 1; i <= numValues; ++i) { + values.push_back(static_cast(i)); + } +} + +template <> +void +SearchContextTest::fillVector(std::vector & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template +void +SearchContextTest::fillAttribute(V & vec, const std::vector & values) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + vec.clearDoc(doc); + uint32_t valueCount = doc % (values.size() + 1); + for (uint32_t i = 0; i < valueCount; ++i) { + // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl; + EXPECT_TRUE(vec.append(doc, values[i], 1)); + } + } + vec.commit(true); +} + +template +void +SearchContextTest::resetAttribute(V & vec, const T & value) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, value)); + } + vec.commit(true); +} + +template +void +SearchContextTest::fillPostingList(PostingList & pl, const DocRange & range) +{ + pl.getHits().clear(); + for (uint32_t doc = range.start; doc < range.end; ++doc) { + ASSERT_TRUE(doc < pl.getAttribute().getNumDocs()); + EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue())); + pl.getHits().insert(doc); + } + pl.getAttribute().commit(true); +} + +template +void +SearchContextTest::fillPostingList(PostingList & pl) +{ + AttributeVector & vec = dynamic_cast(pl.getAttribute()); + pl.getHits().clear(); + uint32_t sz = vec.getMaxValueCount(); + T * buf = new T[sz]; + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + uint32_t valueCount = vec.get(doc, buf, sz); + EXPECT_TRUE(valueCount <= sz); + for (uint32_t i = 0; i < valueCount; ++i) { + if (buf[i] == pl.getValue()) { + //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl; + pl.getHits().insert(doc); + break; + } + } + } + delete [] buf; +} + +void +SearchContextTest::buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + switch (termType) { + case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; + case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; + default: + buffer[p++] = ParseItem::ITEM_TERM; + break; + } + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + +template +SearchContextPtr +SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + std::vector query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), termType); + + return (dynamic_cast(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + +ResultSetPtr +SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) +{ + HitCollector hc(numDocs, numDocs, 0); + sb.initFullRange(); + // assume strict toplevel search object located at start + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +template +ResultSetPtr +SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + TermFieldMatchData dummy; + SearchContextPtr sc = getSearch(vec, term, termType); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); + return rs; +} + +template +void +SearchContextTest::performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ +#if 0 + std::cout << "performSearch[" << term << "]: {"; + std::copy(expected.begin(), expected.end(), std::ostream_iterator(std::cout, ", ")); + std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; +#endif + { // strict search iterator + ResultSetPtr rs = performSearch(vec, term, termType); + checkResultSet(*rs, expected, false); + } +} + +void +SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) +{ + EXPECT_EQUAL(rs.getNumHits(), expected.size()); + if (bitVector) { + const BitVector * vec = rs.getBitOverflow(); + if (expected.size() != 0) { + ASSERT_TRUE(vec != NULL); + for (const auto & expect : expected) { + EXPECT_TRUE(vec->testBit(expect)); + } + } + } else { + const RankedHit * array = rs.getArray(); + if (expected.size() != 0) { + ASSERT_TRUE(array != NULL); + uint32_t i = 0; + for (DocSet::const_iterator iter = expected.begin(); + iter != expected.end(); ++iter, ++i) + { + EXPECT_TRUE(array[i]._docId == *iter); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Test search functionality +//----------------------------------------------------------------------------- +template +void +SearchContextTest::testFind(const PostingList & pl) +{ + { // strict search iterator + SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs()); + checkResultSet(*rs, pl.getHits(), false); + } +} + +template +void +SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector & values) +{ + LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values", + attribute.getName().c_str(), numDocs, static_cast(values.size())); + + // fill attribute vectors + addDocs(attribute, numDocs); + + std::vector > lists; + + // fill posting lists + ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0); + uint32_t hitCount = attribute.getNumDocs() / values.size(); + for (uint32_t i = 0; i < values.size(); ++i) { + // for each value a range with hitCount documents will hit on that value + lists.push_back(PostingList(attribute, values[i])); + fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1)); + } + + // test find() + for (const auto & list : lists) { + testFind(list); + } +} + +template +void +SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector & values) +{ + std::vector > lists; + + // fill posting lists based on attribute content + for (const T & value : values) { + lists.push_back(PostingList(vec, value)); + fillPostingList(lists.back()); + } + + // test find() + for (const auto & list : lists) { + //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() + // << ", hit count = " << lists[i].getHitCount() << std::endl; + testFind(list); + } +} + +template +void +SearchContextTest::testMultiValueSearch(V & first, V & second, const std::vector & values) +{ + addDocs(first, second.getNumDocs()); + LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values", + first.getName().c_str(), first.getNumDocs(), static_cast(values.size())); + + fillAttribute(first, values); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); + + size_t sz = values.size(); + ASSERT_TRUE(sz > 2); + std::vector subset; + // values[sz - 2] is not used -> 0 hits + // values[sz - 1] is used once -> 1 hit + for (size_t i = 0; i < sz - 2; ++i) { + subset.push_back(values[i]); + } + + fillAttribute(first, subset); + + ASSERT_TRUE(1u < first.getNumDocs()); + EXPECT_TRUE(first.append(1u, values[sz - 1], 1)); + first.commit(true); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); +} + +template +void SearchContextTest::testSearch(const ConfigMap & cfgs) { + uint32_t numDocs = 100; + uint32_t numUniques = 20; + std::vector values; + fillVector(values, numUniques); + for (const auto & cfg : cfgs) { + AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second); + testSearch(*(dynamic_cast(second.get())), numDocs, values); + if (second->hasMultiValue()) { + AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second); + testMultiValueSearch(*(dynamic_cast(first.get())), + *(dynamic_cast(second.get())), values); + } + } +} + +using search::test::InitRangeVerifier; + +template +void SearchContextTest::testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs) { + InitRangeVerifier ir; + for (const auto & cfg : cfgs) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-initrange", cfg.second); + addDocs(*attribute, ir.getDocIdLimit()); + for (uint32_t doc : ir.getExpectedDocIds()) { + EXPECT_TRUE(nullptr != dynamic_cast(attribute.get())); + EXPECT_TRUE(dynamic_cast(attribute.get())->update(doc, key)); + } + attribute->commit(true); + SearchContextPtr sc = getSearch(*attribute, keyAsString); + ASSERT_TRUE(sc->valid()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ir.verify(*sb); + } +} + +void SearchContextTest::testInitRange() { + testInitRange(42, "42", _integerCfg); + testInitRange(42.42, "42.42", _floatCfg); + testInitRange("any-key", "any-key", _stringCfg); +} + +void +SearchContextTest::testSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numUniques = 20; + + { // IntegerAttribute + for (const auto & cfg : _integerCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + + + { // CollectionType::ARRAY Flags. + std::vector values; + fillVector(values, numUniques); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg); + testSearch(*(dynamic_cast(second.get())), numDocs, values); + AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg); + testMultiValueSearch(*(dynamic_cast(first.get())), + *(dynamic_cast(second.get())), values); + } + } + + { // FloatingPointAttribute + for (const auto & cfg : _floatCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "7.3"); + ASSERT_TRUE( sc->valid() ); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + } + + testSearch(_integerCfg); + testSearch(_floatCfg); + testSearch(_stringCfg); +} + +//----------------------------------------------------------------------------- +// Test search iterator functionality +//----------------------------------------------------------------------------- +void +SearchContextTest::testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with 3 hits + threeHits.fetchPostings(true); + SearchBasePtr sb = threeHits.createIterator(&dummy, true); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->getDocId() == 1u); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->isAtEnd()); + } + + { // search for value with no hits + noHits.fetchPostings(true); + SearchBasePtr sb = noHits.createIterator(&dummy, true); + sb->initFullRange(); + ASSERT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_TRUE(sb->isAtEnd()); + } +} + +void +SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with three hits + threeHits.fetchPostings(false); + SearchBasePtr sb = threeHits.createIterator(&dummy, false); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd()); + } + { // search for value with no hits + noHits.fetchPostings(false); + SearchBasePtr sb = noHits.createIterator(&dummy, false); + sb->initFullRange(); + + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_NOT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_NOT_EQUAL(sb->getDocId(), 6u); + } +} + +void +SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 10); + ia->update(2, 20); + ia->update(3, 10); + ia->update(4, 20); + ia->update(5, 10); + ia->commit(true); +} + +void +SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 1); + ia->update(2, 2); + ia->update(3, 1); + ia->update(4, 2); + ia->update(5, 1); + ia->commit(true); +} + +void +SearchContextTest::testSearchIterator() +{ + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::UINT2, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg); + fillForSemiNibbleSearchIteratorTest(dynamic_cast + (ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 1); + SearchContextPtr noHits = getSearch(*ptr.get(), 3); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 1); + noHits = getSearch(*ptr.get(), 3); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg); + StringAttribute * sa = dynamic_cast(ptr.get()); + addReservedDoc(*ptr); + ptr->addDocs(5); + sa->update(1, "three"); + sa->update(2, "two"); + sa->update(3, "three"); + sa->update(4, "two"); + sa->update(5, "three"); + ptr->commit(true); + + SearchContextPtr threeHits = getSearch(*ptr.get(), "three"); + SearchContextPtr noHits = getSearch(*ptr.get(), "none"); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + FlagAttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } +} + + + +//----------------------------------------------------------------------------- +// Test search iterator unpacking +//----------------------------------------------------------------------------- +void +SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, + bool extra) +{ + addReservedDoc(*ia); + ia->addDocs(3); + if (ia->getCollectionType() == CollectionType::SINGLE) { + ia->update(1, 10); + ia->update(2, 10); + ia->update(3, 10); + } else if (ia->getCollectionType() == CollectionType::ARRAY) { + ia->append(1, 10, 1); + ia->append(2, 10, 1); + ia->append(2, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + } else { // WEIGHTED SET + ia->append(1, 10, -50); + ia->append(2, 10, 0); + ia->append(3, 10, 50); + } + ia->commit(true); + if (!extra) + return; + ia->addDocs(20); + for (uint32_t d = 4; d < 24; ++d) { + if (ia->getCollectionType() == CollectionType::SINGLE) + ia->update(d, 10); + else + ia->append(d, 10, 1); + } + ia->commit(true); +} + +void +SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, + SearchContext & sc, + bool extra, + bool strict) +{ + LOG(info, + "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str()); + + TermFieldMatchData md; + md.reset(100); + + TermFieldMatchDataPosition pos; + pos.setElementWeight(100); + md.appendPosition(pos); + + sc.fetchPostings(strict); + SearchBasePtr sb = sc.createIterator(&md, strict); + sb->initFullRange(); + + std::vector weights(3); + if (attr->getCollectionType() == CollectionType::SINGLE || + (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8)) + { + weights[0] = 1; + weights[1] = 1; + weights[2] = 1; + } else if (attr->getCollectionType() == CollectionType::ARRAY) { + weights[0] = 1; + weights[1] = 2; + weights[2] = 3; + } else { + weights[0] = -50; + weights[1] = 0; + weights[2] = 50; + } + + // unpack and check weights + sb->unpack(1); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_EQUAL(md.getDocId(), 1u); + EXPECT_EQUAL(md.getWeight(), weights[0]); + + sb->unpack(2); + EXPECT_EQUAL(sb->getDocId(), 2u); + EXPECT_EQUAL(md.getDocId(), 2u); + EXPECT_EQUAL(md.getWeight(), weights[1]); + + sb->unpack(3); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_EQUAL(md.getDocId(), 3u); + EXPECT_EQUAL(md.getWeight(), weights[2]); + if (extra) { + sb->unpack(4); + EXPECT_EQUAL(sb->getDocId(), 4u); + EXPECT_EQUAL(md.getDocId(), 4u); + EXPECT_EQUAL(md.getWeight(), 1); + } +} + +void +SearchContextTest::testSearchIteratorUnpacking() +{ + std::vector > config; + + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + config.emplace_back("s-int32", cfg); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + config.emplace_back("s-uint4", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + config.emplace_back("a-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + config.emplace_back("w-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + config.emplace_back("sfs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("afs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + config.emplace_back("wfs-int32", cfg); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("flags", cfg); + } + + for (const auto & cfg : config) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast(ptr.get()), false); + SearchContextPtr sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, true); + sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, false); + if (cfg.second.fastSearch()) { + AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast(ptr2.get()), true); + SearchContextPtr sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, true); + sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, false); + } + } +} + + + +//----------------------------------------------------------------------------- +// Test range search +//----------------------------------------------------------------------------- + +template +void +SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +template +void +SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values) +{ + LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str()); + + VectorType & vec = dynamic_cast(*ptr.get()); + + addDocs(vec, numDocs); + + std::map postingList; + + uint32_t docCnt = 0; + for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) { + //std::cout << "postingList[" << values[i] << "]: {"; + for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) { + EXPECT_TRUE(vec.update(docCnt + 1u, values[i])); + postingList[values[i]].insert(docCnt + 1u); + //std::cout << docCnt << ", "; + } + //std::cout << "}" << std::endl; + } + ptr->commit(true); + uint32_t smallHits = 0; + ValueType zeroValue = 0; + bool smallUInt = isUnsignedSmallIntAttribute(vec); + if (smallUInt) { + for (uint32_t i = docCnt ; i < numDocs; ++i) { + postingList[zeroValue].insert(i + 1u); + ++smallHits; + } + } + + // test less than ("a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << ">" << values[i]; + DocSet expected; + for (uint32_t j = i + 1; j < values.size(); ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test range ("[a;b]") + for (uint32_t i = 0; i < values.size(); ++i) { + for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i + vespalib::asciistream ss; + ss << "[" << values[i] << ";" << values[j] << "]"; + DocSet expected; + for (uint32_t k = i; k < j + 1; ++k) { + expected.insert(postingList[values[k]].begin(), postingList[values[k]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + } + + { // test large range + vespalib::asciistream ss; + ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]"; + DocSet expected; + for (uint32_t doc = 0; doc < numDocs; ++doc) { + expected.insert(doc + 1); + } + performRangeSearch(vec, ss.str(), expected); + } +} + +void +SearchContextTest::testRangeSearchLimited() +{ + largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 }; + std::vector values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0])); + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg); + IntegerAttribute & vec = dynamic_cast(*ptr); + addDocs(vec, values.size()); + for (size_t i(1); i < values.size(); i++) { + EXPECT_TRUE(vec.update(i, values[i])); + } + ptr->commit(true); + + DocSet expected; + for (size_t i(1); i < 12; i++) { + expected.put(i); + } + performRangeSearch(vec, "[1;9]", expected); + performRangeSearch(vec, "[1;9;100]", expected); + performRangeSearch(vec, "[1;9;-100]", expected); + expected.clear(); + expected.put(3); + performRangeSearch(vec, "<1;3>", expected); + expected.put(4); + performRangeSearch(vec, "<1;3]", expected); + expected.clear(); + expected.put(1).put(2).put(3); + performRangeSearch(vec, "[1;3>", expected); + expected.put(4); + performRangeSearch(vec, "[1;3]", expected); + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[1;9;1]", expected); + performRangeSearch(vec, "[1;9;2]", expected); + expected.put(3); + performRangeSearch(vec, "[1;9;3]", expected); + expected.clear(); + expected.put(10).put(11); + performRangeSearch(vec, "[1;9;-1]", expected); + performRangeSearch(vec, "[1;9;-2]", expected); + expected.put(9); + performRangeSearch(vec, "[1;9;-3]", expected); + performRangeSearch(vec, "[1;9;-3]", expected); + + expected.clear(); + for (size_t i(1); i < 13; i++) { + expected.put(i); + } + performRangeSearch(vec, "[;;100]", expected); + performRangeSearch(vec, "[;;-100]", expected); + + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[;;1]", expected); + expected.clear(); + expected.put(12); + performRangeSearch(vec, "[;;-1]", expected); +} + +void +SearchContextTest::testRangeSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numValues = 20; + const uint32_t numNibbleValues = 9; + + { // IntegerAttribute + std::vector values; + std::vector nibbleValues; + largeint_t start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + for (uint32_t i = 0; i < numNibbleValues; ++i) { + nibbleValues.push_back(start + i); + } + + for (const auto & cfg : _integerCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch(ptr, numDocs, values); + } + { // CollectionType::ARRAY Flags. + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + testRangeSearch(ptr, numDocs, values); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg); + testRangeSearch(ptr, numDocs, nibbleValues); + } + } + + { // FloatingPointAttribute + std::vector values; + double start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + + for (const auto & cfg : _floatCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch(ptr, numDocs, values); + } + } +} + + +//----------------------------------------------------------------------------- +// Test case insensitive search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +void +SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr) +{ + LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 5 * 5; + addDocs(*ptr.get(), numDocs); + + const char * terms[][5] = { + {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower + {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper + {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper + {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase + {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase + }; + + uint32_t doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc++, terms[i][j])); + } + } + + ptr->commit(true); + + const char * buffer[1]; + doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1)); + EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j])); + } + } + + DocSet empty; + for (uint32_t j = 0; j < 5; ++j) { + DocSet expected; + for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) { + expected.insert(doc); + } + // for non-posting attributes only lower case search terms should give hits + performCaseInsensitiveSearch(vec, terms[0][j], expected); + + if (ptr->getConfig().fastSearch()) { + for (uint32_t i = 1; i < 5; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], expected); + } + } else { + for (uint32_t i = 1; i < 4; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], empty); + } + } + } + performCaseInsensitiveSearch(vec, "none", empty); + performCaseInsensitiveSearch(vec, "NONE", empty); + performCaseInsensitiveSearch(vec, "None", empty); +} + +void +SearchContextTest::testRegexSearch(const AttributePtr & ptr) +{ + LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"}; + std::vector terms = { "abc", "bc2de" }; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "abc" + } + { + uint32_t docs[] = {2, 3}; + expected.push_back(DocSet(docs, docs + 2)); // "bc2de" + } + + for (uint32_t i = 0; i < terms.size(); ++i) { + performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP); + performSearch(vec, terms[i], empty, QueryTermSimple::WORD); + } +} + + +void +SearchContextTest::testCaseInsensitiveSearch() +{ + for (const auto & cfg : _stringCfg) { + testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +void +SearchContextTest::testRegexSearch() +{ + for (const auto & cfg : _stringCfg) { + testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + + +//----------------------------------------------------------------------------- +// Test prefix search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ + performSearch(vec, term, expected, termType); +} + +void +SearchContextTest::testPrefixSearch(const AttributePtr & ptr) +{ + LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"}; + const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"}, + {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}}; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "pre" + } + { + uint32_t docs[] = {1, 2, 3}; + expected.push_back(DocSet(docs, docs + 3)); // "pref" + } + { + uint32_t docs[] = {4, 5, 6}; + expected.push_back(DocSet(docs, docs + 3)); // "prec" + } + expected.push_back(DocSet()); // "prex" + + for (uint32_t i = 0; i < 4; ++i) { + for (uint32_t j = 0; j < 3; ++j) { + if (j == 0 || ptr->getConfig().fastSearch()) { + performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } else { + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } + } + } +} + + +void +SearchContextTest::testPrefixSearch() +{ + for (const auto & cfg : _stringCfg) { + testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(4); + VectorType & v = dynamic_cast(*a); + resetAttribute(v, startValue); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(4u, rs->getNumHits()); + ASSERT_TRUE(4u == rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(1u, array[0]._docId); + EXPECT_EQUAL(2u, array[1]._docId); + EXPECT_EQUAL(3u, array[2]._docId); + EXPECT_EQUAL(4u, array[3]._docId); + } + a->clearDoc(1); + a->clearDoc(3); + a->commit(true); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(2u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(2u, array[0]._docId); + EXPECT_EQUAL(4u, array[1]._docId); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "10"); + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "<11"); + } + + for (const auto & cfg : _floatCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "10.5"); + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "<10.6"); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, "start", "start"); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + ValueType defaultValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(15); + VectorType & va = dynamic_cast(*a); + resetAttribute(va, startValue); // triggers vector vector in posting list (count 15) + AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg); + EXPECT_TRUE(a->saveAs(b->getBaseFileName())); + EXPECT_TRUE(b->load()); + b->clearDoc(6); // goes from vector vector to single vector with count 14 + b->commit(true); + { + ResultSetPtr rs = performSearch(dynamic_cast(*b), term); + EXPECT_EQUAL(14u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + for (uint32_t i = 0; i < 14; ++i) { + if (i < 5) { + EXPECT_EQUAL(i + 1, array[i]._docId); + } else + EXPECT_EQUAL(i + 2, array[i]._docId); + } + } + ValueType buf; + if (cfg.collectionType().isMultiValue()) { + EXPECT_EQUAL(0u, b->get(6, &buf, 1)); + } else { + EXPECT_EQUAL(1u, b->get(6, &buf, 1)); + EXPECT_EQUAL(defaultValue, buf); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc() +{ + { + int64_t value = 10; + int64_t defValue = search::attribute::getUndefined(); + requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-int32", _integerCfg["s-fs-int32"], + value, defValue, "10"); + requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-int32", _integerCfg["a-fs-int32"], + value, defValue, "10"); + } + { + vespalib::string value = "foo"; + vespalib::string defValue = ""; + requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-str", _stringCfg["s-fs-str"], + value, defValue, value); + requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-str", _stringCfg["a-fs-str"], + value, defValue, value); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast(*a); + LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(2); + va.update(1, value1); + va.commit(true); + va.update(2, value1); + va.update(2, value2); + va.commit(true); + { + ResultSetPtr rs = performSearch(va, value1); + EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value + } + { + ResultSetPtr rs = performSearch(va, value2); + EXPECT_EQUAL(1u, rs->getNumHits()); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, 10, 20); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, "foo", "bar"); + } +} + +void +SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded() +{ + LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + { + cfg.setGrowStrategy(GrowStrategy(1, 0, 1)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast(*a); + addReservedDoc(fa); + fa.addDocs(1); + fa.append(1, 10, 1); + fa.append(1, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(2, 20, 1); + fa.append(2, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(3, 30, 1); + fa.append(3, 26, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(4, 40, 1); + fa.append(4, 24, 1); + fa.commit(true); + { + ResultSetPtr rs = performSearch(fa, "<24"); + EXPECT_EQUAL(2u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + } + { + ResultSetPtr rs = performSearch(fa, "24"); + EXPECT_EQUAL(3u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + EXPECT_EQUAL(4u, rs->getArray()[2]._docId); + } + } + { + cfg.setGrowStrategy(GrowStrategy(4, 0, 4)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast(*a); + std::vector exp50; + std::vector exp60; + addReservedDoc(fa); + for (uint32_t i = 0; i < 200; ++i) { + uint32_t docId; + EXPECT_TRUE(fa.addDoc(docId)); + if (i % 2 == 0) { + fa.append(docId, 50, 1); + exp50.push_back(docId); + } else { + fa.append(docId, 60, 1); + exp60.push_back(docId); + } + fa.commit(true); + { + ResultSetPtr rs1 = performSearch(fa, "50"); + ResultSetPtr rs2 = performSearch(fa, "<51"); + EXPECT_EQUAL(exp50.size(), rs1->getNumHits()); + EXPECT_EQUAL(exp50.size(), rs2->getNumHits()); + for (size_t j = 0; j < exp50.size(); ++j) { + EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId); + EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId); + } + } + { + ResultSetPtr rs = performSearch(fa, "60"); + EXPECT_EQUAL(exp60.size(), rs->getNumHits()); + for (size_t j = 0; j < exp60.size(); ++j) { + EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId); + } + } + } + } +} + +template +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast(*a); + LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(1); + va.update(1, value); + va.commit(true); + ResultSetPtr rs = performSearch(va, "foo"); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); + } + for (const auto & cfg : _floatCfg) { + requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); + } +} + +void +SearchContextTest::requireThatFlagAttributeHandlesTheByteRange() +{ + LOG(info, "requireThatFlagAttributeHandlesTheByteRange()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast(*a); + addReservedDoc(fa); + fa.addDocs(5); + fa.append(1, -128, 1); + fa.append(2, -64, 1); + fa.append(2, -8, 1); + fa.append(3, 0, 1); + fa.append(3, 8, 1); + fa.append(4, 64, 1); + fa.append(4, 24, 1); + fa.append(5, 127, 1); + fa.commit(true); + + performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD); + performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD); + performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD); + performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD); + performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + IntegerAttribute &ia = dynamic_cast(*a); + addReservedDoc(*a); + a->addDocs(1); + ia.update(1, maxValue); + ia.commit(true); + vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1); + LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str()); + ResultSetPtr rs = performSearch(ia, term); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + int32_t maxValue = std::numeric_limits::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + int8_t maxValue = std::numeric_limits::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue); + } +} + + +void +SearchContextTest::initIntegerConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::INT32, CollectionType::SINGLE); + _integerCfg["s-int32"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + _integerCfg["s-fs-int32"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::INT32, CollectionType::ARRAY); + _integerCfg["a-int32"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + _integerCfg["a-fs-int32"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::INT32, CollectionType::WSET); + _integerCfg["w-int32"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + _integerCfg["w-fs-int32"] = cfg; + } +} + +void +SearchContextTest::initFloatConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + _floatCfg["s-float"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + _floatCfg["s-fs-float"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + _floatCfg["a-float"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + _floatCfg["a-fs-float"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::FLOAT, CollectionType::WSET); + _floatCfg["w-float"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + _floatCfg["w-fs-float"] = cfg; + } +} + +void +SearchContextTest::initStringConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::STRING, CollectionType::SINGLE); + _stringCfg["s-str"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::STRING, CollectionType::ARRAY); + _stringCfg["a-str"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::STRING, CollectionType::WSET); + _stringCfg["w-str"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + _stringCfg["s-fs-str"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + _stringCfg["a-fs-str"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + _stringCfg["w-fs-str"] = cfg; + } +} + +SearchContextTest::SearchContextTest() : + _integerCfg(), + _floatCfg(), + _stringCfg() +{ + initIntegerConfig(); + initFloatConfig(); + initStringConfig(); +} + +int +SearchContextTest::Main() +{ + TEST_INIT("searchcontext_test"); + EXPECT_TRUE(true); + + testSearch(); + testInitRange(); + testRangeSearch(); + testRangeSearchLimited(); + testCaseInsensitiveSearch(); + testRegexSearch(); + testPrefixSearch(); + testSearchIterator(); + testSearchIteratorUnpacking(); + TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterUpdates()); + TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()); + TEST_DO(requireThatInvalidSearchTermGivesZeroHits()); + TEST_DO(requireThatFlagAttributeHandlesTheByteRange()); + TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::SearchContextTest); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh new file mode 100755 index 00000000000..3aae4bfe4d5 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_searchcontext_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/sourceselector/.gitignore b/searchlib/src/tests/attribute/sourceselector/.gitignore new file mode 100644 index 00000000000..265c856fd01 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +sourceselector_test +searchlib_sourceselector_test_app diff --git a/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt new file mode 100644 index 00000000000..24b7a75dd07 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sourceselector_test_app + SOURCES + sourceselector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sourceselector_test_app COMMAND searchlib_sourceselector_test_app) diff --git a/searchlib/src/tests/attribute/sourceselector/DESC b/searchlib/src/tests/attribute/sourceselector/DESC new file mode 100644 index 00000000000..7568f5de080 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/DESC @@ -0,0 +1 @@ +This is a test of the sourceselector interface. diff --git a/searchlib/src/tests/attribute/sourceselector/FILES b/searchlib/src/tests/attribute/sourceselector/FILES new file mode 100644 index 00000000000..0d2803e762d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/FILES @@ -0,0 +1 @@ +sourceselector.cpp diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp new file mode 100644 index 00000000000..a3595f8724d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp @@ -0,0 +1,216 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sourceselector. + +#include +#include +LOG_SETUP("sourceselector_test"); + +#include +#include +#include + +using std::unique_ptr; +using std::string; +using namespace search; +using namespace search::queryeval; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; + +namespace { +template size_t arraysize(const T (&)[N]) { return N; } + +const uint32_t maxDocId = 4096; +struct DocSource { uint32_t docId; uint8_t source; }; +const DocSource docs[] = { {0,1}, {1, 0}, {2, 2}, {4, 3}, {8, 9}, {16, 178}, + {32, 1}, {64, 2}, {128, 3}, {256,4}, {512, 2}, + {1024, 1}, {2048,5}, {maxDocId,1} }; +const string index_dir = "test_data"; +const string base_file_name = "test_data/sourcelist"; +const string base_file_name2 = "test_data/sourcelist2"; +const uint32_t default_source = 7; +const uint32_t base_id = 42; + +class Test : public vespalib::TestApp +{ +public: + int Main(); +private: + void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector); + void testFixed(const DocSource *docSource, size_t sz); + template + void requireThatSelectorCanCloneAndSubtract(); + void requireThatSelectorCanCloneAndSubtract(); + template + void requireThatSelectorCanSaveAndLoad(); + void requireThatSelectorCanSaveAndLoad(); + template + void requireThatCompleteSourceRangeIsHandled(); + void requireThatCompleteSourceRangeIsHandled(); + template + void requireThatSourcesAreCountedCorrectly(); + void requireThatSourcesAreCountedCorrectly(); +}; + +int +Test::Main() +{ + TEST_INIT("sourceselector_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testFixed(docs, arraysize(docs)); + TEST_DO(requireThatSelectorCanCloneAndSubtract()); + TEST_DO(requireThatSelectorCanSaveAndLoad()); + TEST_DO(requireThatCompleteSourceRangeIsHandled()); + TEST_DO(requireThatSourcesAreCountedCorrectly()); + + TEST_DONE(); +} + +void setSources(ISourceSelector &selector) { + for (size_t i = 0; i < arraysize(docs); ++i) { + selector.setSource(docs[i].docId, docs[i].source); + } +} + +void Test::testFixed(const DocSource *docSource, size_t sz) +{ + FixedSourceSelector selector(default_source, base_file_name, 10); + EXPECT_EQUAL(default_source, selector.getDefaultSource()); + EXPECT_EQUAL(10u, selector.getDocIdLimit()); +// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1)); + setSources(selector); + testSourceSelector(docSource, sz, selector.getDefaultSource(), selector); + EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit()); +} + +void Test::testSourceSelector(const DocSource *docSource, size_t sz, + uint8_t defaultSource, ISourceSelector &selector) +{ + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0; i < sz; ++i) { + EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId)); + } + } + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) { + if (i != docSource[j].docId) { + EXPECT_EQUAL(defaultSource, it->getSource(i)); + } else { + EXPECT_EQUAL(docSource[j].source, it->getSource(i)); + ++j; + } + } + } +} + +template +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + SelectorType selector(default_source, base_file_name); + setSources(selector); + selector.setBaseId(base_id); + + const uint32_t diff = 3; + typename SelectorType::UP + new_selector(selector.cloneAndSubtract(base_file_name2, diff)); + EXPECT_EQUAL(default_source - diff, new_selector->getDefaultSource()); + EXPECT_EQUAL(base_id + diff, new_selector->getBaseId()); + EXPECT_EQUAL(maxDocId+1, new_selector->getDocIdLimit()); + + ISourceSelector::Iterator::UP it(new_selector->createIterator()); + for(size_t i = 0; i < arraysize(docs); ++i) { + if (docs[i].source > diff) { + EXPECT_EQUAL(docs[i].source - diff, it->getSource(docs[i].docId)); + } else { + EXPECT_EQUAL(0, it->getSource(docs[i].docId)); + } + } +} + +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + requireThatSelectorCanCloneAndSubtract(); +} + +template +void +Test::requireThatSelectorCanSaveAndLoad() +{ + SelectorType selector(default_source, base_file_name2); + setSources(selector); + selector.setBaseId(base_id); + selector.setSource(maxDocId + 1, default_source); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); + FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str()); + + SourceSelector::SaveInfo::UP save_info = + selector.extractSaveInfo(base_file_name); + save_info->save(TuneFileAttributes(), DummyFileHeaderContext()); + typename SelectorType::UP + selector2(SelectorType::load(base_file_name)); + testSourceSelector(docs, arraysize(docs), default_source, *selector2); + EXPECT_EQUAL(base_id, selector2->getBaseId()); + EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit()); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); +} + +void +Test::requireThatSelectorCanSaveAndLoad() +{ + requireThatSelectorCanSaveAndLoad(); +} + +template +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + selector.setSource(i, i); + } + ISourceSelector::Iterator::UP itr = selector.createIterator(); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + EXPECT_EQUAL((queryeval::Source)i, itr->getSource(i)); + } +} + +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + requireThatCompleteSourceRangeIsHandled(); +} + +template +void +Test::requireThatSourcesAreCountedCorrectly() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < 256; ++i) { + selector.setSource(i, i%16); + } + SourceSelector::Histogram hist = selector.getDistribution(); + for (uint32_t i = 0; i < 16; ++i) { + EXPECT_EQUAL(16u, hist[i]); + } + for (uint32_t i = 16; i < 256; ++i) { + EXPECT_EQUAL(0u, hist[i]); + } +} + +void +Test::requireThatSourcesAreCountedCorrectly() +{ + requireThatSourcesAreCountedCorrectly(); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/stringattribute/.gitignore b/searchlib/src/tests/attribute/stringattribute/.gitignore new file mode 100644 index 00000000000..0e8a04bc19d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +stringattribute_test +searchlib_stringattribute_test_app diff --git a/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt new file mode 100644 index 00000000000..032ce9cac4e --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_stringattribute_test_app + SOURCES + stringattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stringattribute_test_app COMMAND sh stringattribute_test.sh) diff --git a/searchlib/src/tests/attribute/stringattribute/DESC b/searchlib/src/tests/attribute/stringattribute/DESC new file mode 100644 index 00000000000..5d94ab94325 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/DESC @@ -0,0 +1 @@ +Unit tests for SingleValueStringAttribute and MultiValueStringAttribute. diff --git a/searchlib/src/tests/attribute/stringattribute/FILES b/searchlib/src/tests/attribute/stringattribute/FILES new file mode 100644 index 00000000000..e68ef57177d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/FILES @@ -0,0 +1 @@ +stringattribute.cpp diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp new file mode 100644 index 00000000000..154340ba408 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("stringattribute_test"); +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace search { + +using attribute::CollectionType; +using attribute::IAttributeVector; + +class StringAttributeTest : public vespalib::TestApp +{ +private: + typedef ArrayStringAttribute ArrayStr; + typedef WeightedSetStringAttribute WeightedSetStr; + typedef ArrayStringPostingAttribute ArrayStrPosting; + typedef WeightedSetStringPostingAttribute WeightedSetStrPosting; + typedef attribute::Config Config; + typedef attribute::BasicType BasicType; + + template + void addDocs(Attribute & vec, uint32_t numDocs); + template + void checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value); + void testMultiValue(); + template + void testMultiValue(Attribute & attr, uint32_t numDocs); + void testMultiValueMultipleClearDocBetweenCommit(); + void testMultiValueRemove(); + void testSingleValue(); + void testDefaultValueOnAddDoc(AttributeVector & v); + template + void testSingleValue(Attribute & svsa, Config &cfg); + +public: + int Main(); +}; + +template +void +StringAttributeTest::addDocs(Attribute & vec, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + typename Attribute::DocId doc; + EXPECT_TRUE(vec.addDoc(doc)); + EXPECT_TRUE(doc == i); + EXPECT_TRUE(vec.getNumDocs() == i + 1); + EXPECT_TRUE(vec.getValueCount(doc) == 0); + } + EXPECT_TRUE(vec.getNumDocs() == numDocs); +} + +template +void +StringAttributeTest::checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value) +{ + std::vector buffer(valueCount); + EXPECT_TRUE(static_cast(vec.getValueCount(doc)) == valueCount); + EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount); + EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues); +} + + +void +StringAttributeTest::testMultiValue() +{ + uint32_t numDocs = ArrayStr::MultiValueMapping::maxValues() + 1; + + { // Array String Attribute + ASSERT_TRUE(ArrayStr::MultiValueMapping::maxValues() == numDocs - 1); + ArrayStr attr("a-string"); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Attribute + ASSERT_TRUE(WeightedSetStr::MultiValueMapping::maxValues() == numDocs - 1); + WeightedSetStr attr("ws-string", + Config(BasicType::STRING, CollectionType::WSET)); + testMultiValue(attr, numDocs); + } + { // Array String Posting Attribute + ASSERT_TRUE(ArrayStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ArrayStrPosting attr("a-fs-string", cfg); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Posting Attribute + ASSERT_TRUE(WeightedSetStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + WeightedSetStrPosting attr("ws-fs-string", cfg); + testMultiValue(attr, numDocs); + } + +} + + +template +void +StringAttributeTest::testMultiValue(Attribute & attr, uint32_t numDocs) +{ + EXPECT_TRUE(attr.getNumDocs() == 0); + + // generate two sets of unique strings + std::vector uniqueStrings; + uniqueStrings.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "enum0%u" : "enum%u", i); + uniqueStrings.push_back(vespalib::string(unique)); + } + std::vector newUniques; + newUniques.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(vespalib::string(unique)); + } + + // add docs + addDocs(attr, numDocs); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, uniqueStrings[j], 1)); + } + attr.commit(); + } + + //attr.getEnumStore().printCurrentContent(); + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + EXPECT_TRUE(valueCount == doc); + + // test get first + if (valueCount == 0) { + EXPECT_TRUE(attr.get(doc) == NULL); + EXPECT_TRUE(attr.getEnum(doc) == std::numeric_limits::max()); + } else { + EXPECT_TRUE(strcmp(attr.get(doc), uniqueStrings[0].c_str()) == 0); + uint32_t e; + EXPECT_TRUE(attr.findEnum(uniqueStrings[0].c_str(), e)); + EXPECT_TRUE(attr.getEnum(doc) == e); + } + + // test get all + std::vector values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector enums(valueCount); + EXPECT_TRUE((static_cast(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == uniqueStrings[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check for correct refcounts + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(uniqueStrings[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } + + typename Attribute::Histogram remaining = attr.getMultiValueMapping().getRemaining(); + for (typename Attribute::Histogram::const_iterator it(remaining.begin()), mt(remaining.end()); it != mt; ++it) { + EXPECT_TRUE(it->second == 0); + } + + // clear and insert new unique strings + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t oldValueCount = doc; + uint32_t valueCount = numDocs - 1 - doc; + //LOG(info, "clear and insert: doc = %u, valueCount = %u", doc, valueCount); + EXPECT_TRUE(attr.clearDoc(doc) == oldValueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, newUniques[j], 1)); + } + attr.commit(); + + //attr.getEnumStore().printCurrentContent(); + } + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + uint32_t expectedValueCount = numDocs - 1 - doc; + EXPECT_TRUE(valueCount == expectedValueCount); + + // test get all + std::vector values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector enums(valueCount); + EXPECT_TRUE((static_cast(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == newUniques[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check that enumXX strings are removed + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + uint32_t e; + EXPECT_TRUE(!attr.findEnum(uniqueStrings[i].c_str(), e)); + } + + // check for correct refcounts + for (uint32_t i = 0; i < newUniques.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(newUniques[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } +} + +void +StringAttributeTest::testMultiValueMultipleClearDocBetweenCommit() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector buffer(numDocs); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "first", 1)); + } + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "second", 1)); + } + mvsa.commit(); + + // check for correct values + checkCount(mvsa, doc, valueCount, valueCount, "second"); + } +} + + +void +StringAttributeTest::testMultiValueRemove() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector buffer(9); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + EXPECT_TRUE(mvsa.append(doc, "one", 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(mvsa.append(doc, "three", 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(mvsa.append(doc, "five", 1)); + } + + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "zero", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "one", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 8, 0, "one"); + checkCount(mvsa, doc, 8, 3, "three"); + checkCount(mvsa, doc, 8, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "five", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 3, 0, "one"); + checkCount(mvsa, doc, 3, 3, "three"); + checkCount(mvsa, doc, 3, 0, "five"); + } +} + +void +StringAttributeTest::testSingleValue() +{ + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + SingleValueStringAttribute svsa("svsa", cfg); + const IAttributeVector * ia = &svsa; + EXPECT_TRUE(dynamic_cast(ia) != nullptr); + testSingleValue(svsa, cfg); + + SingleValueStringAttribute svsb("svsa", cfg); + testDefaultValueOnAddDoc(svsb); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + SingleValueStringPostingAttribute svsa("svspb", cfg); + testSingleValue(svsa, cfg); + + SingleValueStringPostingAttribute svsb("svspb", cfg); + testDefaultValueOnAddDoc(svsb); + } +} + +void StringAttributeTest::testDefaultValueOnAddDoc(AttributeVector & v) +{ + EXPECT_EQUAL(0u, v.getNumDocs()); + v.addReservedDoc(); + EXPECT_EQUAL(1u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(0)).valid() ); + uint32_t doc(7); + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_EQUAL(1u, doc); + EXPECT_EQUAL(2u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(doc)).valid() ); + EXPECT_EQUAL(0u, strlen(v.getString(doc, NULL, 0))); +} + +template +void +StringAttributeTest::testSingleValue(Attribute & svsa, Config &cfg) +{ + StringAttribute & v = svsa; + const char * t = "not defined"; + uint32_t doc = 2000; + uint32_t e1 = 2000; + uint32_t e2 = 2000; + uint32_t numDocs = 1000; + char tmp[32]; + + // add docs + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i ); + EXPECT_TRUE( v.getNumDocs() == i + 1 ); + EXPECT_TRUE( v.getValueCount(doc) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(doc)).valid() ); + } + + std::map enums; + // 10 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( v.update(i, tmp) ); + EXPECT_TRUE( v.getValueCount(i) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(i)).valid() ); + if ((i % 10) == 9) { + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "enum%u", j % 10); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + if (enums.count(vespalib::string(t)) == 0) { + enums[vespalib::string(t)] = e1; + } else { + EXPECT_TRUE( e1 == enums[vespalib::string(t)]); + EXPECT_TRUE( e2 == enums[vespalib::string(t)]); + } + } + } + } + + //svsa.printBuffers(); + + // 1000 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "unique%u", i); + EXPECT_TRUE( v.update(i, tmp) ); + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( strcmp(v.get(i), tmp) == 0 ); + if ((i % 10) == 9) { + //LOG(info, "commit: i = %u", i); + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "unique%u", j); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + } + //svsa.printBuffers(); + } + } + //svsa.printBuffers(); + + // check that enumX strings are removed ( + for (uint32_t i = 0; i < 10; ++i) { + sprintf(tmp, "enum%u", i); + EXPECT_TRUE( !v.findEnum(tmp, e1) ); + } + + + Attribute load("load", cfg); + svsa.saveAs(load.getBaseFileName()); + load.load(); +} + + + +int +StringAttributeTest::Main() +{ + TEST_INIT("stringattribute_test"); + + testMultiValue(); + + testMultiValueMultipleClearDocBetweenCommit(); + + testMultiValueRemove(); + + testSingleValue(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::StringAttributeTest); diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh new file mode 100755 index 00000000000..d7ac263c1c9 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_stringattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/tensorattribute/.gitignore b/searchlib/src/tests/attribute/tensorattribute/.gitignore new file mode 100644 index 00000000000..08519fe7ae8 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/.gitignore @@ -0,0 +1 @@ +searchlib_tensorattribute_test_app diff --git a/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt new file mode 100644 index 00000000000..ec16b4363eb --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensorattribute_test_app + SOURCES + tensorattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensorattribute_test_app COMMAND sh tensorattribute_test.sh) diff --git a/searchlib/src/tests/attribute/tensorattribute/DESC b/searchlib/src/tests/attribute/tensorattribute/DESC new file mode 100644 index 00000000000..1cd9aa7cf14 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/DESC @@ -0,0 +1 @@ +Unit tests for TensorAttribute. diff --git a/searchlib/src/tests/attribute/tensorattribute/FILES b/searchlib/src/tests/attribute/tensorattribute/FILES new file mode 100644 index 00000000000..1c8480ffde7 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/FILES @@ -0,0 +1 @@ +tensorattribute.cpp diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp new file mode 100644 index 00000000000..137f93bcffe --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("tensorattribute_test"); +#include +#include +#include +#include +#include +#include + +using search::attribute::TensorAttribute; +using search::AttributeGuard; +using search::AttributeVector; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; +using vespalib::tensor::TensorType; +using vespalib::tensor::SimpleTensorBuilder; + +namespace vespalib { +namespace tensor { + +static bool operator==(const Tensor &lhs, const Tensor &rhs) +{ + return lhs.equals(rhs); +} + +} +} + + +struct Fixture +{ + using BasicType = search::attribute::BasicType; + using CollectionType = search::attribute::CollectionType; + using Config = search::attribute::Config; + + Config _cfg; + vespalib::string _name; + std::shared_ptr _tensorAttr; + std::shared_ptr _attr; + vespalib::tensor::DefaultTensor::builder _builder; + + Fixture(const vespalib::string &typeSpec) + : _cfg(BasicType::TENSOR, CollectionType::SINGLE), + _name("test"), + _tensorAttr(), + _attr() + { + _cfg.setTensorType(TensorType::fromSpec(typeSpec)); + _tensorAttr = std::make_shared(_name, _cfg); + _attr = _tensorAttr; + _attr->addReservedDoc(); + } + + Tensor::UP createTensor(const TensorCells &cells) { + return TensorFactory::create(cells, _builder); + } + Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + void ensureSpace(uint32_t docId) { + while (_attr->getNumDocs() <= docId) { + uint32_t newDocId = 0u; + _attr->addDoc(newDocId); + _attr->commit(); + } + } + + void clearTensor(uint32_t docId) { + ensureSpace(docId); + _tensorAttr->clearDoc(docId); + _attr->commit(); + } + + void setTensor(uint32_t docId, const Tensor &tensor) { + ensureSpace(docId); + _tensorAttr->setTensor(docId, tensor); + _attr->commit(); + } + + search::attribute::Status getStatus() { + _attr->commit(true); + return _attr->getStatus(); + } + + void + assertGetNoTensor(uint32_t docId) { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_FALSE(actTensor); + } + + void + assertGetTensor(const Tensor &expTensor, uint32_t docId) + { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_TRUE(static_cast(actTensor)); + EXPECT_EQUAL(expTensor, *actTensor); + } + + void + assertGetTensor(const TensorCells &expCells, + const TensorDimensions &expDimensions, + uint32_t docId) + { + Tensor::UP expTensor = createTensor(expCells, expDimensions); + assertGetTensor(*expTensor, docId); + } + + void save() { + bool saveok = _attr->save(); + EXPECT_TRUE(saveok); + } + + void load() { + _tensorAttr = std::make_shared(_name, _cfg); + _attr = _tensorAttr; + bool loadok = _attr->load(); + EXPECT_TRUE(loadok); + } +}; + + +TEST_F("Test empty tensor attribute", Fixture("tensor()")) +{ + EXPECT_EQUAL(1u, f._attr->getNumDocs()); + EXPECT_EQUAL(1u, f._attr->getCommittedDocIdLimit()); +} + + +TEST_F("Test setting tensor value", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetNoTensor(4)); + f.setTensor(4, *f.createTensor({}, {})); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetNoTensor(2)); + TEST_DO(f.clearTensor(3)); + TEST_DO(f.assertGetNoTensor(3)); +} + + +TEST_F("Test saving / loading tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + f.setTensor(4, *f.createTensor({}, {})); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.save()); + TEST_DO(f.load()); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + TEST_DO(f.assertGetNoTensor(2)); +} + + +TEST_F("Test compaction of tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + Tensor::UP emptytensor = f.createTensor({}, {}); + Tensor::UP emptyxytensor = f.createTensor({}, {"x", "y"}); + Tensor::UP simpletensor = f.createTensor({ {{}, 3} }, { "x", "y"}); + Tensor::UP filltensor = f.createTensor({ {{}, 5} }, { "x", "y"}); + f.setTensor(4, *emptytensor); + f.setTensor(3, *simpletensor); + f.setTensor(2, *filltensor); + f.clearTensor(2); + f.setTensor(2, *filltensor); + search::attribute::Status oldStatus = f.getStatus(); + search::attribute::Status newStatus = oldStatus; + uint64_t iter = 0; + uint64_t iterLimit = 100000; + for (; iter < iterLimit; ++iter) { + f.clearTensor(2); + f.setTensor(2, *filltensor); + newStatus = f.getStatus(); + if (newStatus.getUsed() < oldStatus.getUsed()) { + break; + } + oldStatus = newStatus; + } + EXPECT_GREATER(iterLimit, iter); + LOG(info, + "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64, + iter, oldStatus.getUsed(), newStatus.getUsed()); + TEST_DO(f.assertGetNoTensor(1)); + TEST_DO(f.assertGetTensor(*filltensor, 2)); + TEST_DO(f.assertGetTensor(*simpletensor, 3)); + TEST_DO(f.assertGetTensor(*emptyxytensor, 4)); +} + +TEST_F("Test tensortype file header tag", Fixture("tensor(x[10])")) +{ + f.ensureSpace(4); + TEST_DO(f.save()); + + vespalib::FileHeader header; + FastOS_File file; + EXPECT_TRUE(file.OpenReadOnly("test.dat")); + (void) header.readFile(file); + file.Close(); + EXPECT_TRUE(header.hasTag("tensortype")); + EXPECT_EQUAL("tensor(x[10])", header.getTag("tensortype").asString()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh new file mode 100644 index 00000000000..2e940d5d99a --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_tensorattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/bitcompression/expgolomb/.gitignore b/searchlib/src/tests/bitcompression/expgolomb/.gitignore new file mode 100644 index 00000000000..5ba0f36a2f0 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/.gitignore @@ -0,0 +1 @@ +searchlib_expgolomb_test_app diff --git a/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt new file mode 100644 index 00000000000..f724773dfd6 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_expgolomb_test_app + SOURCES + expgolomb_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_expgolomb_test_app NO_VALGRIND COMMAND searchlib_expgolomb_test_app) diff --git a/searchlib/src/tests/bitcompression/expgolomb/DESC b/searchlib/src/tests/bitcompression/expgolomb/DESC new file mode 100644 index 00000000000..4abef0ecf24 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/DESC @@ -0,0 +1 @@ +Exp golomb encoding / decoding test. Take a look at expgolomb_test.cpp for details. diff --git a/searchlib/src/tests/bitcompression/expgolomb/FILES b/searchlib/src/tests/bitcompression/expgolomb/FILES new file mode 100644 index 00000000000..dbc3fa5e527 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/FILES @@ -0,0 +1 @@ +expgolomb_test.cpp diff --git a/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp new file mode 100644 index 00000000000..dcf0f69ee55 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp @@ -0,0 +1,621 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("expglomb_test"); +#include +#include +#include + +using search::bitcompression::DecodeContext64; +using search::bitcompression::DecodeContext64Base; +using search::bitcompression::EncodeContext64; +using search::bitcompression::EncodeContext64Base; + +template +class DecodeContext : public DecodeContext64 +{ +public: + using Parent = DecodeContext64; + using Parent::defineReadOffset; + using EC = EncodeContext64; + + DecodeContext(const uint64_t *compr, int bitOffset) + : DecodeContext64(compr, bitOffset) + { + this->defineReadOffset(0); + } +}; + + +class IDecodeFunc +{ +public: + virtual uint64_t decode() = 0; + virtual void skip() = 0; + virtual uint64_t decodeSmall() = 0; + virtual uint64_t decodeSmallApply() = 0; + virtual void skipSmall() = 0; + + virtual ~IDecodeFunc() { } + +}; + + +/* + * Exp golomb decode functions getting kValue from a variable, i.e. + * compiler is not allowed to generate shift instructions with immediate values. + * Expressions involving kValue are not constant and can thus not be + * folded to constant values. + */ +template +class DecodeExpGolombVarK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext; + using EC = typename DC::EC; + + DCB &_dc; + int _kValue; + + DecodeExpGolombVarK(DCB &dc, int kValue) + : _dc(dc), + _kValue(kValue) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + static std::unique_ptr + make(DCB &dc, int kValue) + { + return std::unique_ptr + (new DecodeExpGolombVarK(dc, kValue)); + } +}; + + +/* + * Exp golomb decode functions getting kValue from a template argument + * i.e. compiler is allowed to generate shift instructions with + * immediate values and fold constant expressions involving kValue. + */ +template +class DecodeExpGolombConstK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext; + using EC = typename DC::EC; + + DCB &_dc; + + DecodeExpGolombConstK(DCB &dc) + : _dc(dc) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + static std::unique_ptr + make(DCB &dc, int) + { + return std::unique_ptr + (new DecodeExpGolombConstK(dc)); + } +}; + + +using IDecodeFuncFactory = + std::unique_ptr (*)(DecodeContext64Base &dc, int kValue); + + +template +class DecodeFuncFactories +{ +public: + using IDF = IDecodeFuncFactory; + std::vector _constK; + IDF _varK; + +public: + DecodeFuncFactories(); + + void + addConstKFactory(int kValue, IDecodeFuncFactory factory) + { + assert(static_cast(kValue) == _constK.size()); + _constK.push_back(factory); + } + + IDecodeFuncFactory + getConstKFactory(int kValue) const + { + assert(kValue >= 0 && + static_cast(kValue) < _constK.size()); + return _constK[kValue]; + } + + IDecodeFuncFactory + getVarKFactory() const + { + return _varK; + } +}; + + +template +struct RegisterFactoryPtr; + + +template +using RegisterFactory = void (*)(DecodeFuncFactories &factories, + RegisterFactoryPtr &ptr); + + +template +struct RegisterFactoryPtr +{ + RegisterFactory _ptr; + + RegisterFactoryPtr(RegisterFactory ptr) + : _ptr(ptr) + { + } +}; + + +template +class RegisterFactories +{ +public: + static void registerFactory(DecodeFuncFactories &factories, + RegisterFactoryPtr &ptr) + { + factories.addConstKFactory(kValue, + &DecodeExpGolombConstK:: + make); + ptr._ptr = &RegisterFactories::registerFactory; + } +}; + + +template +class RegisterFactories +{ +public: + static void registerFactory(DecodeFuncFactories &factories, + RegisterFactoryPtr &ptr) + { + (void) factories; + ptr._ptr = nullptr; + } +}; + + +template +DecodeFuncFactories::DecodeFuncFactories() + : _constK(), + _varK(&DecodeExpGolombVarK::make) +{ + RegisterFactoryPtr f( + &RegisterFactories::registerFactory); + while (f._ptr) { + (*f._ptr)(*this, f); + } +} + + +class TestFixtureBase +{ +public: + std::vector _randNums; + using EC = EncodeContext64Base; + + void fillRandNums(); + + void + calcBoundaries(int kValue, bool small, std::vector &v); + + void + testBoundaries(int kValue, bool small, + std::vector &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply); + + void + testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip); +}; + + +void +TestFixtureBase::fillRandNums() +{ + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + _randNums.push_back(rval); + } + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + uint32_t bits = (rand() & 63); + rval &= ((UINT64_C(1) << bits) - 1); + _randNums.push_back(rval); + } +} + + +namespace +{ + +/* + * Add values around a calculated boundary, to catch off by one errors. + */ +void +addBoundary(uint64_t boundary, uint64_t maxVal, std::vector &v) +{ + uint64_t low = boundary > 2u ? boundary - 2 : 0; + uint64_t high = maxVal - 2u < boundary ? maxVal : boundary + 2; + assert(low <= high); + LOG(info, "low=0x%lx, high=0x%lx", low, high); + uint64_t i = low; + for (;;) { + v.push_back(i); + if (i == high) + break; + ++i; + } +} + +} + +void +TestFixtureBase::calcBoundaries(int kValue, bool small, + std::vector &v) +{ + const char *smallStr = small ? "small" : "not small"; + v.push_back(0); + uint64_t maxVal = EC::maxExpGolombVal(kValue); // encode method limit + if (small) { + maxVal = EC::maxExpGolombVal(kValue, 64); + } + LOG(debug, "kValue=%u, %s, maxVal is 0x%lx", kValue, smallStr, maxVal); + for (int bits = kValue + 1; + bits + kValue <= 128 && (bits <= 64 || !small); + ++bits) { + uint64_t boundary = EC::maxExpGolombVal(kValue, bits); + if (bits + kValue == 128) { + LOG(debug, + "boundary for kValue=%d, %s, bits=%d: 0x%lx", + kValue, smallStr, bits, boundary); + } + addBoundary(boundary, maxVal, v); + } + std::sort(v.begin(), v.end()); + auto ve = std::unique(v.begin(), v.end()); + uint32_t oldSize = v.size(); + v.resize(ve - v.begin()); + uint32_t newSize = v.size(); + LOG(debug, + "kValues=%u, %s, boundaries %u -> %u, maxVal=0x%lx, highest=0x%lx", + kValue, smallStr, oldSize, newSize, maxVal, v.back()); +} + + +void +TestFixtureBase::testBoundaries(int kValue, bool small, + std::vector &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply) +{ + uint32_t bits = 0; + uint64_t maxSame = 0; + + for (auto num : v) { + uint64_t prevPos = dc.getReadOffset(); + uint64_t val64 = small ? df.decodeSmall() : df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + if (small) { + dfSkip.skipSmall(); + } else { + dfSkip.skip(); + } + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + if (small) { + uint64_t sval64 = dfApply.decodeSmallApply(); + EXPECT_EQUAL(num, sval64); + EXPECT_EQUAL(currPos, dcApply.getReadOffset()); + } + if (num == 0) { + bits = currPos - prevPos; + maxSame = EC::maxExpGolombVal(kValue, bits); + } else { + assert(bits <= currPos - prevPos); + if (bits < currPos - prevPos) { + ASSERT_EQUAL(bits + 2, currPos - prevPos); + bits += 2; + ASSERT_EQUAL(maxSame + 1, num); + maxSame = EC::maxExpGolombVal(kValue, bits); + } + } + } +} + + +void +TestFixtureBase::testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip) +{ + for (auto num : _randNums) { + uint64_t val64 = df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + dfSkip.skip(); + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + } +} + + + +template +class TestFixture : public TestFixtureBase +{ +public: + DecodeFuncFactories _factories; + using DC = DecodeContext; + using EC = typename DC::EC; + using Parent = TestFixtureBase; + using Parent::testBoundaries; + using Parent::testRandNums; + + TestFixture() + : TestFixtureBase(), + _factories() + { + fillRandNums(); + } + + void + testBoundaries(int kValue, bool small, + std::vector &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + void + testBoundaries(int kValue, bool small, std::vector &v); + + void + testBoundaries(); + + void + testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + + void + testRandNums(int kValue); + + void + testRandNums(); +}; + + +template +void +TestFixture::testBoundaries(int kValue, bool small, + std::vector &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast(wc._comprBuf), 0); + DC dcSkip(static_cast(wc._comprBuf), 0); + DC dcApply(static_cast(wc._comprBuf), 0); + std::unique_ptr df((*f)(dc, kValue)); + std::unique_ptr dfSkip((*f)(dcSkip, kValue)); + std::unique_ptr dfApply((*f)(dcApply, kValue)); + testBoundaries(kValue, small, v, dc, dcSkip, dcApply, + *df, *dfSkip, *dfApply); +} + + +template +void +TestFixture::testBoundaries(int kValue, bool small, + std::vector &v) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : v) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testBoundaries(kValue, small, v, f, wc); + f = _factories.getVarKFactory(); + testBoundaries(kValue, small, v, f, wc); +} + + +template +void +TestFixture::testBoundaries() +{ + for (int kValue = 0; kValue < 64; ++kValue) { + std::vector v; + calcBoundaries(kValue, false, v); + testBoundaries(kValue, false, v); + /* + * Note: We don't support kValue being 63 for when decoding + * "small" numbers (limited to 64 bits in encoded form) since + * performance penalty is not worth the extra flexibility. + */ + if (kValue < 63) { + v.clear(); + calcBoundaries(kValue, true, v); + testBoundaries(kValue, true, v); + } + } +} + + +template +void +TestFixture::testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast(wc._comprBuf), 0); + DC dcSkip(static_cast(wc._comprBuf), 0); + std::unique_ptr df((*f)(dc, kValue)); + std::unique_ptr dfSkip((*f)(dcSkip, kValue)); + testRandNums(dc, dcSkip, *df, *dfSkip); +} + + +template +void +TestFixture::testRandNums(int kValue) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : _randNums) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testRandNums(kValue, f, wc); + f = _factories.getVarKFactory(); + testRandNums(kValue, f, wc); +} + + +template +void +TestFixture::testRandNums() +{ + for (int k = 0; k < 64; ++k) { + testRandNums(k); + } +} + + +TEST_F("Test bigendian expgolomb encoding/decoding", TestFixture) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_F("Test little expgolomb encoding/decoding", TestFixture) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/bitvector/.gitignore b/searchlib/src/tests/bitvector/.gitignore new file mode 100644 index 00000000000..21aed8ce6b2 --- /dev/null +++ b/searchlib/src/tests/bitvector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +bitvectorbenchmark_test +searchlib_bitvectorbenchmark_test_app diff --git a/searchlib/src/tests/bitvector/CMakeLists.txt b/searchlib/src/tests/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..7edae6f7cc4 --- /dev/null +++ b/searchlib/src/tests/bitvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvectorbenchmark_test_app + SOURCES + bitvectorbenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvectorbenchmark_test_app COMMAND searchlib_bitvectorbenchmark_test_app BENCHMARK) diff --git a/searchlib/src/tests/bitvector/DESC b/searchlib/src/tests/bitvector/DESC new file mode 100644 index 00000000000..1a6c0fc2959 --- /dev/null +++ b/searchlib/src/tests/bitvector/DESC @@ -0,0 +1 @@ +This is a test for the BitVector class. diff --git a/searchlib/src/tests/bitvector/FILES b/searchlib/src/tests/bitvector/FILES new file mode 100644 index 00000000000..0688c3933eb --- /dev/null +++ b/searchlib/src/tests/bitvector/FILES @@ -0,0 +1 @@ +bitvectorbenchmark.cpp diff --git a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp new file mode 100644 index 00000000000..c9b962495f4 --- /dev/null +++ b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp @@ -0,0 +1,225 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include + +LOG_SETUP("bitvectorbenchmark"); + +namespace search { + +class BitVectorBenchmark : public FastOS_Application +{ +private: + std::vector _bv; + std::vector _bvc; + void testCountSpeed1(); + void testCountSpeed2(); + void testCountSpeed3(); + void testOrSpeed1(); + void testOrSpeed2(); + static void usage(); + void init(size_t n); +public: + BitVectorBenchmark(); + ~BitVectorBenchmark(); + int Main(); +}; + +BitVectorBenchmark::BitVectorBenchmark() : + _bv() +{ +} + +BitVectorBenchmark::~BitVectorBenchmark() +{ + for(size_t i(0); i < _bv.size(); i++) { + delete _bv[i]; + } +} + +void BitVectorBenchmark::usage() +{ + std::cout << "usage: bitvectorbenchmark [-n numBits] [-t operation]" << std::endl; +} + +void BitVectorBenchmark::init(size_t n) +{ + BitVector *a(BitVector::create(n).release()); + BitVector *b(BitVector::create(n).release()); + srand(1); + for(size_t i(0), j(0); i < n; i += rand()%10, j++) { + a->flip(i); + } + for(size_t i(0), j(0); i < n; i += rand()%10, j++) { + b->flip(i); + } + a->invalidateCachedCount(); + b->invalidateCachedCount(); + _bv.push_back(a); + _bvc.push_back(a->countTrueBits()); + _bv.push_back(b); + _bvc.push_back(b->countTrueBits()); +} + +void BitVectorBenchmark::testOrSpeed1() +{ + _bv[0]->orWith(*_bv[1]); +} + +void BitVectorBenchmark::testCountSpeed1() +{ + _bv[0]->invalidateCachedCount(); + unsigned int cnt = _bv[0]->countTrueBits(); + assert(cnt = _bvc[0]); + (void) cnt; +} + +static int bitTab[256] = { + 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 +}; + +void BitVectorBenchmark::testCountSpeed2() +{ + const unsigned char * p = reinterpret_cast(_bv[0]->getStart()); + size_t sz = _bv[0]->size()/8; + size_t sum0(0); + size_t sum1(0); + size_t sum2(0); + size_t sum3(0); + for (size_t i(0); i < sz; i+=4) { + sum0 += bitTab[p[i+0]]; + sum1 += bitTab[p[i+1]]; + sum2 += bitTab[p[i+2]]; + sum3 += bitTab[p[i+3]]; + } + assert(sum0 + sum1 + sum2 + sum3 == _bvc[0]); +} + + +static int +popCount(unsigned int bits) +{ + unsigned int odd = bits & 0x55555555; + unsigned int even = bits & 0xaaaaaaaa; + bits = odd + (even >> 1); + odd = bits & 0x33333333; + even = bits & 0xcccccccc; + bits = odd + (even >> 2); + odd = bits & 0x0f0f0f0f; + even = bits & 0xf0f0f0f0; + bits = odd + (even >> 4); + odd = bits & 0x00ff00ff; + even = bits & 0xff00ff00; + bits = odd + (even >> 8); + odd = bits & 0x0000ffff; + even = bits & 0xffff0000; + bits = odd + (even >> 16); + return bits; +} + + +void +BitVectorBenchmark::testCountSpeed3() +{ + const unsigned int * p = static_cast(_bv[0]->getStart()); + const unsigned int * pe = p + (_bv[0]->size()/(sizeof(uint32_t)*8)); + size_t sum(0); + for (; p < pe; ++p) { + sum += popCount(*p); + } + assert(sum == _bvc[0]); +} + +void BitVectorBenchmark::testOrSpeed2() +{ + typedef uint64_t T; + T * a = reinterpret_cast(_bv[0]->getStart()); + const T * b = reinterpret_cast(_bv[1]->getStart()); + size_t sz = _bv[0]->size()/(8*sizeof(*a)); + for (size_t i(0); i < sz; i+=2) { + a[i] |= b[i]; + a[i+1] |= b[i+1]; + // a[i+2] |= b[i+2]; + // a[i+3] |= b[i+3]; + } +} + +int BitVectorBenchmark::Main() +{ + int idx = 1; + std::string operation; + size_t numBits(8*1000000); + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("n:t:", arg, idx)) != -1) { + switch (opt) { + case 'n': + numBits = strtoll(arg, NULL, 10); + break; + case 't': + operation = arg; + break; + default: + optError = true; + break; + } + } + + if ((_argc != idx ) || optError) { + usage(); + return -1; + } + + init(numBits); + for (size_t i(0); i < operation.size(); i++) { + char op(operation[i]); + size_t splitBits1 = rand() % numBits; + size_t splitBits2 = rand() % numBits; + if (splitBits1 > splitBits2) + std::swap(splitBits1, splitBits2); + for (size_t j(0); j < 1000; j++) { + if (op == 'c') { + testCountSpeed1(); + } else if (op == 'd') { + testCountSpeed2(); + } else if (op == 'e') { + testCountSpeed3(); + } else if (op == 'o') { + testOrSpeed1(); + } else if (op == 'p') { + testOrSpeed2(); + } else { + std::cerr << "Unknown operation " << op << std::endl; + } + } + } + + return 0; +} +} + +int main(int argc, char ** argv) +{ + search::BitVectorBenchmark myapp; + return myapp.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/btree/.gitignore b/searchlib/src/tests/btree/.gitignore new file mode 100644 index 00000000000..a6bdd572c7d --- /dev/null +++ b/searchlib/src/tests/btree/.gitignore @@ -0,0 +1,3 @@ +iteratespeed +searchlib_btreeaggregation_test_app +searchlib_iteratespeed_app diff --git a/searchlib/src/tests/btree/CMakeLists.txt b/searchlib/src/tests/btree/CMakeLists.txt new file mode 100644 index 00000000000..d88953d43fd --- /dev/null +++ b/searchlib/src/tests/btree/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_btreeaggregation_test_app + SOURCES + btreeaggregation_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btreeaggregation_test_app COMMAND searchlib_btreeaggregation_test_app) +vespa_add_executable(searchlib_iteratespeed_app + SOURCES + iteratespeed.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_iteratespeed_app COMMAND searchlib_iteratespeed_app BENCHMARK) diff --git a/searchlib/src/tests/btree/DESC b/searchlib/src/tests/btree/DESC new file mode 100644 index 00000000000..da074ca2c45 --- /dev/null +++ b/searchlib/src/tests/btree/DESC @@ -0,0 +1 @@ +btree aggregation test. Take a look at btreeaggregation_test.cpp for details. diff --git a/searchlib/src/tests/btree/FILES b/searchlib/src/tests/btree/FILES new file mode 100644 index 00000000000..45756255961 --- /dev/null +++ b/searchlib/src/tests/btree/FILES @@ -0,0 +1 @@ +btreeaggregation_test.cpp diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp new file mode 100644 index 00000000000..bb8e86ef49d --- /dev/null +++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp @@ -0,0 +1,1146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("btreeaggregation_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using vespalib::GenerationHandler; + +namespace search { +namespace btree { + +namespace { + +int32_t +toVal(uint32_t key) +{ + return key + 1000; +} + +int32_t +toHighVal(uint32_t key) +{ + return toVal(key) + 1000; +} + +int32_t +toLowVal(uint32_t key) +{ + return toVal(key) - 1000000; +} + +int32_t +toNotVal(uint32_t key) +{ + return key + 2000; +} + +template +void +aggrToStr(std::stringstream &ss, const AggrT &aggr) +{ + (void) aggr; + ss << "[noaggr]"; +} + +template <> +void +aggrToStr(std::stringstream &ss, + const MinMaxAggregated &aggr) +{ + ss << "[min=" << aggr.getMin() << ",max=" << aggr.getMax() << "]"; +} + + +template +void +leafNodeToStr(std::stringstream &ss, const LeafNode &n) +{ + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":" << n.getData(i); + } + aggrToStr(ss, n.getAggregated()); + ss << "]"; +} + +template +void +nodeToStr(std::stringstream &ss, const BTreeNode::Ref &node, + const NodeAllocator &allocator) +{ + if (!node.valid()) { + ss << "[]"; + return; + } + if (allocator.isLeafRef(node)) { + leafNodeToStr(ss, *allocator.mapLeafRef(node)); + return; + } + const InternalNode &n(*allocator.mapInternalRef(node)); + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":"; + nodeToStr(ss, n.getChild(i), allocator); + } + aggrToStr(ss, n.getAggregated()); + ss << "]"; +} + + +template +void +treeToStr(std::stringstream &ss, const Tree &t) +{ + nodeToStr(ss, t.getRoot(), t.getAllocator()); +} + + +} + +typedef BTreeTraits<4, 4, 31, false> MyTraits; + +#define KEYWRAP + +#ifdef KEYWRAP + +// Force use of functor to compare keys. +class WrapInt +{ +public: + int _val; + WrapInt(int val) : _val(val) {} + WrapInt(void) : _val(0) {} + bool operator==(const WrapInt & rhs) const { return _val == rhs._val; } +}; + +std::ostream & +operator<<(std::ostream &s, const WrapInt &i) +{ + s << i._val; + return s; +} + +typedef WrapInt MyKey; +class MyComp +{ +public: + bool + operator()(const WrapInt &a, const WrapInt &b) const + { + return a._val < b._val; + } +}; + +#define UNWRAP(key) (key._val) +#else +typedef int MyKey; +typedef std::less MyComp; +#define UNWRAP(key) (key) +#endif + +typedef BTree MyTree; +typedef BTreeStore MyTreeStore; +typedef MyTree::Builder MyTreeBuilder; +typedef MyTree::LeafNodeType MyLeafNode; +typedef MyTree::InternalNodeType MyInternalNode; +typedef MyTree::NodeAllocatorType MyNodeAllocator; +typedef MyTree::Builder::Aggregator MyAggregator; +typedef MyTree::AggrCalcType MyAggrCalc; +typedef std::pair LeafPair; +typedef MyTreeStore::KeyDataType MyKeyData; +typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair; + +typedef BTree SetTreeB; + +typedef BTreeTraits<16, 16, 10, false> LSeekTraits; +typedef BTree, LSeekTraits> SetTreeL; + +struct LeafPairLess { + bool operator()(const LeafPair & lhs, const LeafPair & rhs) const { + return UNWRAP(lhs.first) < UNWRAP(rhs.first); + } +}; + + +class MockTree +{ +public: + typedef std::map MTree; + typedef std::map > MRTree; + MTree _tree; + MRTree _rtree; + + MockTree() + : _tree(), + _rtree() + { + } + + + void + erase(uint32_t key) + { + MTree::iterator it(_tree.find(key)); + if (it == _tree.end()) + return; + int32_t oval = it->second; + MRTree::iterator rit(_rtree.find(oval)); + assert(rit != _rtree.end()); + size_t ecount = rit->second.erase(key); + assert(ecount == 1); + (void) ecount; + if (rit->second.empty()) { + _rtree.erase(oval); + } + _tree.erase(key); + } + + void + insert(uint32_t key, int32_t val) + { + erase(key); + _tree[key] = val; + _rtree[val].insert(key); + } +}; + + +class MyTreeForceApplyStore : public MyTreeStore +{ +public: + typedef MyComp CompareT; + + bool + insert(EntryRef &ref, const KeyType &key, const DataType &data, + CompareT comp = CompareT()); + + bool + remove(EntryRef &ref, const KeyType &key, CompareT comp = CompareT()); +}; + + +bool +MyTreeForceApplyStore::insert(EntryRef &ref, + const KeyType &key, const DataType &data, + CompareT comp) +{ + bool retVal = true; + if (ref.valid()) { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + const NodeAllocatorType &allocator = getAllocator(); + Iterator itr = tree->find(key, allocator, comp); + if (itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi < olde && !comp(key, oldi->_key)) + retVal = false; // key already present + } + } + KeyDataType addition(key, data); + if (retVal) { + apply(ref, &addition, &addition+1, NULL, NULL, comp); + } + return retVal; +} + + +bool +MyTreeForceApplyStore::remove(EntryRef &ref, const KeyType &key, + CompareT comp) +{ + bool retVal = true; + if (!ref.valid()) + retVal = false; // not found + else { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + const NodeAllocatorType &allocator = getAllocator(); + Iterator itr = tree->find(key, allocator, comp); + if (!itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi == olde || comp(key, oldi->_key)) + retVal = false; // not found + } + } + std::vector additions; + std::vector removals; + removals.push_back(key); + apply(ref, + &additions[0], &additions[additions.size()], + &removals[0], &removals[removals.size()], + comp); + return retVal; +} + + +template +void +freezeTree(GenerationHandler &g, ManagerType &m) +{ + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +template +void +cleanup(GenerationHandler &g, ManagerType &m) +{ + freezeTree(g, m); +} + +template +void +cleanup(GenerationHandler & g, + ManagerType & m, + BTreeNode::Ref n1Ref, NodeType * n1, + BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL) +{ + assert(ManagerType::isValidRef(n1Ref)); + m.holdNode(n1Ref, n1); + if (n2 != NULL) { + assert(ManagerType::isValidRef(n2Ref)); + m.holdNode(n2Ref, n2); + } else { + assert(!ManagerType::isValidRef(n2Ref)); + } + cleanup(g, m); +} + +class Test : public vespalib::TestApp { +private: + template + bool + assertTree(const std::string & exp, const Tree &t); + + template + bool + assertAggregated(const MockTree &m, const Tree &t); + + template + bool + assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref); + + void + buildSubTree(const std::vector &sub, + size_t numEntries); + + void requireThatNodeInsertWorks(); + void requireThatNodeSplitInsertWorks(); + void requireThatNodeStealWorks(); + void requireThatNodeRemoveWorks(); + void requireThatWeCanInsertAndRemoveFromTree(); + void requireThatSortedTreeInsertWorks(); + void requireThatCornerCaseTreeFindWorks(); + void requireThatBasicTreeIteratorWorks(); + void requireThatTreeIteratorAssignWorks(); + void requireThatUpdateOfKeyWorks(); + void requireThatUpdateOfDataWorks(); + + template + void + requireThatSmallNodesWorks(); +public: + int Main(); +}; + + +template +bool +Test::assertTree(const std::string &exp, const Tree &t) +{ + std::stringstream ss; + treeToStr(ss, t); + if (!EXPECT_EQUAL(exp, ss.str())) return false; + return true; +} + + +template +bool +Test::assertAggregated(const MockTree &m, const Tree &t) +{ + const MinMaxAggregated &ta(t.getAggregated()); + if (t.getRoot().valid()) { + return + EXPECT_FALSE(m._rtree.empty()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + ta.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + ta.getMin()); + } else { + return EXPECT_TRUE(m._rtree.empty()) && + EXPECT_EQUAL(std::numeric_limits::min(), + ta.getMax()) && + EXPECT_EQUAL(std::numeric_limits::max(), + ta.getMin()); + } +} + +template +bool +Test::assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref) +{ + typename TreeStore::Iterator i(s.begin(ref)); + MinMaxAggregated sa(s.getAggregated(ref)); + const MinMaxAggregated &ia(i.getAggregated()); + if (ref.valid()) { + return + EXPECT_FALSE(m._rtree.empty()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + ia.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + ia.getMin()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + sa.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + sa.getMin()); + } else { + return EXPECT_TRUE(m._rtree.empty()) && + EXPECT_EQUAL(std::numeric_limits::min(), + ia.getMax()) && + EXPECT_EQUAL(std::numeric_limits::max(), + ia.getMin()) && + EXPECT_EQUAL(std::numeric_limits::min(), + sa.getMax()) && + EXPECT_EQUAL(std::numeric_limits::max(), + sa.getMin()); + } +} + + +void +Test::requireThatNodeInsertWorks() +{ + MyTree t; + t.insert(20, 102); + EXPECT_TRUE(assertTree("[20:102[min=102,max=102]]", t)); + t.insert(10, 101); + EXPECT_TRUE(assertTree("[10:101,20:102[min=101,max=102]]", t)); + t.insert(30, 103); + t.insert(40, 104); + EXPECT_TRUE(assertTree("[10:101,20:102,30:103,40:104" + "[min=101,max=104]]", t)); +} + +void +getLeafNode(MyTree &t) +{ + t.insert(1, 101); + t.insert(3, 103); + t.insert(5, 105); + t.insert(7, 107); +// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t)); +} + +void +Test::requireThatNodeSplitInsertWorks() +{ + { // new entry in current node + MyTree t; + getLeafNode(t); + t.insert(4, 104); + EXPECT_TRUE(assertTree("[4:" + "[1:101,3:103,4:104[min=101,max=104]]" + ",7:" + "[5:105,7:107[min=105,max=107]]" + "[min=101,max=107]]", t)); + } + { // new entry in split node + MyTree t; + getLeafNode(t); + t.insert(6, 106); + EXPECT_TRUE(assertTree("[5:" + "[1:101,3:103,5:105[min=101,max=105]]" + ",7:" + "[6:106,7:107[min=106,max=107]]" + "[min=101,max=107]]", t)); + } + { // new entry at end + MyTree t; + getLeafNode(t); + t.insert(8, 108); + EXPECT_TRUE(assertTree("[5:" + "[1:101,3:103,5:105[min=101,max=105]]" + ",8:" + "[7:107,8:108[min=107,max=108]]" + "[min=101,max=108]]", t)); + } +} + +struct BTreeStealTraits +{ + static const size_t LEAF_SLOTS = 6; + static const size_t INTERNAL_SLOTS = 6; + static const size_t PATH_SIZE = 20; + static const bool BINARY_SEEK = true; +}; + +void +Test::requireThatNodeStealWorks() +{ + typedef BTree MyStealTree; + { // steal all from left + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(35, 135); + t.remove(35); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130[min=110,max=130]]" + ",60:" + "[40:140,50:150,60:160[min=140,max=160]]" + "[min=110,max=160]]", t)); + t.remove(50); + EXPECT_TRUE(assertTree("[10:110,20:120,30:130,40:140,60:160" + "[min=110,max=160]]", t)); + } + { // steal all from right + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(35, 135); + t.remove(35); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130[min=110,max=130]]" + ",60:" + "[40:140,50:150,60:160[min=140,max=160]]" + "[min=110,max=160]]", t)); + t.remove(20); + EXPECT_TRUE(assertTree("[10:110,30:130,40:140,50:150,60:160" + "[min=110,max=160]]", t)); + } + { // steal some from left + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(60, 160); + t.insert(70, 170); + t.insert(80, 180); + t.insert(50, 150); + t.insert(40, 140); + EXPECT_TRUE(assertTree("[50:" + "[10:110,20:120,30:130,40:140,50:150" + "[min=110,max=150]]" + ",80:" + "[60:160,70:170,80:180[min=160,max=180]]" + "[min=110,max=180]]", t)); + t.remove(60); + EXPECT_TRUE(assertTree("[40:" + "[10:110,20:120,30:130,40:140" + "[min=110,max=140]]" + ",80:" + "[50:150,70:170,80:180[min=150,max=180]]" + "[min=110,max=180]]", t)); + } + { // steal some from right + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(70, 170); + t.insert(80, 180); + t.insert(90, 190); + t.remove(40); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130" + "[min=110,max=130]]" + ",90:" + "[50:150,60:160,70:170,80:180,90:190" + "[min=150,max=190]]" + "[min=110,max=190]]", t)); + t.remove(20); + EXPECT_TRUE(assertTree("[50:" + "[10:110,30:130,50:150" + "[min=110,max=150]]" + ",90:" + "[60:160,70:170,80:180,90:190" + "[min=160,max=190]]" + "[min=110,max=190]]", t)); + } +} + +void +Test::requireThatNodeRemoveWorks() +{ + MyTree t; + getLeafNode(t); + t.remove(3); + EXPECT_TRUE(assertTree("[1:101,5:105,7:107[min=101,max=107]]", t)); + t.remove(1); + EXPECT_TRUE(assertTree("[5:105,7:107[min=105,max=107]]", t)); + t.remove(7); + EXPECT_TRUE(assertTree("[5:105[min=105,max=105]]", t)); +} + +void +generateData(std::vector & data, size_t numEntries) +{ + data.reserve(numEntries); + Rand48 rnd; + rnd.srand48(10); + for (size_t i = 0; i < numEntries; ++i) { + int num = rnd.lrand48() % 10000000; + uint32_t val = toVal(num); + data.push_back(std::make_pair(num, val)); + } +} + +void +Test::buildSubTree(const std::vector &sub, + size_t numEntries) +{ + GenerationHandler g; + MyTree tree; + MyTreeBuilder builder(tree.getAllocator()); + MockTree mock; + + std::vector sorted(sub.begin(), sub.begin() + numEntries); + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(sorted[i].first); + const uint32_t & val = sorted[i].second; + builder.insert(num, val); + mock.insert(num, val); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + EXPECT_EQUAL(numEntries, tree.size()); + EXPECT_TRUE(tree.isValid()); + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr = itr; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + ++itr; + } + EXPECT_TRUE(!itr.valid()); + ritr = itr; + EXPECT_TRUE(!ritr.valid()); + --ritr; + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData()); + --ritr; + } + EXPECT_TRUE(!ritr.valid()); +} + +void +Test::requireThatWeCanInsertAndRemoveFromTree() +{ + GenerationHandler g; + MyTree tree; + MockTree mock; + std::vector exp; + std::vector sorted; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + size_t numEntries = 1000; + generateData(exp, numEntries); + sorted = exp; + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + // insert entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + const uint32_t & val = exp[i].second; + EXPECT_TRUE(!tree.find(num).valid()); + //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str()); + EXPECT_TRUE(tree.insert(num, val)); + EXPECT_TRUE(!tree.insert(num, val)); + mock.insert(num, val); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (size_t j = 0; j <= i; ++j) { + //LOG(info, "find[%zu](%d)", j, exp[j].first._val); + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(i + 1u, tree.size()); + EXPECT_TRUE(tree.isValid()); + buildSubTree(exp, i + 1); + } + //std::cout << "tree: " << tree.toString() << std::endl; + + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itre = itr; + MyTree::Iterator itre2; + MyTree::Iterator ritr = itr; + while (itre.valid()) + ++itre; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + MyTree::Iterator pitr = itr; + for (size_t i = 0; i < numEntries; ++i) { + ssize_t si = i; + ssize_t sileft = numEntries - i; + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(i, itr.position()); + EXPECT_EQUAL(sileft, itre - itr); + EXPECT_EQUAL(-sileft, itr - itre); + EXPECT_EQUAL(sileft, itre2 - itr); + EXPECT_EQUAL(-sileft, itr - itre2); + EXPECT_EQUAL(si, itr - tree.begin()); + EXPECT_EQUAL(-si, tree.begin() - itr); + EXPECT_EQUAL(i != 0, itr - pitr); + EXPECT_EQUAL(-(i != 0), pitr - itr); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + pitr = itr; + ++itr; + ritr = itr; + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_TRUE(ritr == pitr); + } + EXPECT_TRUE(!itr.valid()); + EXPECT_EQUAL(numEntries, itr.position()); + ssize_t sNumEntries = numEntries; + EXPECT_EQUAL(sNumEntries, itr - tree.begin()); + EXPECT_EQUAL(-sNumEntries, tree.begin() - itr); + EXPECT_EQUAL(1, itr - pitr); + EXPECT_EQUAL(-1, pitr - itr); + } + // compact full tree by calling incremental compaction methods in a loop + { + MyTree::NodeAllocatorType &manager = tree.getAllocator(); + std::vector toHold = manager.startCompact(); + MyTree::Iterator itr = tree.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot())); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + manager.finishCompact(toHold); + manager.freeze(); + manager.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + manager.trimHoldLists(g.getFirstUsedGeneration()); + } + // remove entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + //LOG(info, "remove[%zu](%d)", i, num); + //std::cout << "tree: " << tree.toString() << std::endl; + EXPECT_TRUE(tree.remove(num)); + EXPECT_TRUE(!tree.find(num).valid()); + EXPECT_TRUE(!tree.remove(num)); + EXPECT_TRUE(tree.isValid()); + mock.erase(num); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (size_t j = i + 1; j < numEntries; ++j) { + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(numEntries - 1 - i, tree.size()); + } +} + +void +Test::requireThatSortedTreeInsertWorks() +{ + { + MyTree tree; + MockTree mock; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (int i = 0; i < 1000; ++i) { + EXPECT_TRUE(tree.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + } + } + { + MyTree tree; + MockTree mock; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (int i = 1000; i > 0; --i) { + EXPECT_TRUE(tree.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + } + } +} + +void +Test::requireThatCornerCaseTreeFindWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 1; i < 100; ++i) { + tree.insert(i, toVal(i)); + } + EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest + EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest +} + +void +Test::requireThatBasicTreeIteratorWorks() +{ + GenerationHandler g; + MyTree tree; + EXPECT_TRUE(!tree.begin().valid()); + std::vector exp; + size_t numEntries = 1000; + generateData(exp, numEntries); + for (size_t i = 0; i < numEntries; ++i) { + tree.insert(exp[i].first, exp[i].second); + } + std::sort(exp.begin(), exp.end(), LeafPairLess()); + size_t ei = 0; + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr; + EXPECT_EQUAL(1000u, itr.size()); + for (; itr.valid(); ++itr) { + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey())); + EXPECT_EQUAL(exp[ei].second, itr.getData()); + ei++; + ritr = itr; + } + EXPECT_EQUAL(numEntries, ei); + for (; ritr.valid(); --ritr) { + --ei; + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey())); + EXPECT_EQUAL(exp[ei].second, ritr.getData()); + } +} + + + +void +Test::requireThatTreeIteratorAssignWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + tree.insert(i, toVal(i)); + } + for (int i = 0; i < 1000; ++i) { + MyTree::Iterator itr = tree.find(i); + MyTree::Iterator itr2 = itr; + EXPECT_TRUE(itr == itr2); + int expNum = i; + for (; itr2.valid(); ++itr2) { + EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey())); + } + EXPECT_EQUAL(1000, expNum); + } +} + +struct UpdKeyComp { + int _remainder; + mutable size_t _numErrors; + UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {} + bool operator() (const int & lhs, const int & rhs) const { + if (lhs % 2 != _remainder) ++_numErrors; + if (rhs % 2 != _remainder) ++_numErrors; + return lhs < rhs; + } +}; + +void +Test::requireThatUpdateOfKeyWorks() +{ + typedef BTree UpdKeyTree; + typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + GenerationHandler g; + UpdKeyTree t; + UpdKeyComp cmp1(0); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1)); + } + EXPECT_EQUAL(0u, cmp1._numErrors); + for (int i = 0; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp1); + itr.writeKey(i + 1); + } + UpdKeyComp cmp2(1); + for (int i = 1; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp2); + EXPECT_TRUE(itr.valid()); + } + EXPECT_EQUAL(0u, cmp2._numErrors); +} + + +void +Test::requireThatUpdateOfDataWorks() +{ + // typedef MyTree::Iterator Iterator; + GenerationHandler g; + MyTree t; + MockTree mock; + MyAggrCalc ac; + MyTree::NodeAllocatorType &manager = t.getAllocator(); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + } + freezeTree(g, manager); + for (int i = 0; i < 1000; i+=2) { + MyTree::Iterator itr = t.find(i); + MyTree::Iterator itr2 = itr; + t.thaw(itr); + itr.updateData(toHighVal(i), ac); + EXPECT_EQUAL(toHighVal(i), itr.getData()); + EXPECT_EQUAL(toVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toHighVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + itr = t.find(i); + itr2 = itr; + t.thaw(itr); + itr.updateData(toLowVal(i), ac); + EXPECT_EQUAL(toLowVal(i), itr.getData()); + EXPECT_EQUAL(toHighVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toLowVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + itr = t.find(i); + itr2 = itr; + t.thaw(itr); + itr.updateData(toVal(i), ac); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_EQUAL(toLowVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + } +} + + +template +void +Test::requireThatSmallNodesWorks(void) +{ + GenerationHandler g; + TreeStore s; + MockTree mock; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 40, toVal(40))); + mock.insert(40, toVal(40)); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 20, toVal(20))); + mock.insert(20, toVal(20)); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 60, toVal(60))); + mock.insert(60, toVal(60)); + EXPECT_TRUE(!s.insert(root, 60, toNotVal(60))); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 50, toVal(50))); + mock.insert(50, toVal(50)); + EXPECT_TRUE(!s.insert(root, 50, toNotVal(50))); + EXPECT_TRUE(!s.insert(root, 60, toNotVal(60))); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.insert(root, 1000 + i, 42)); + mock.insert(1000 + i, 42); + if (i > 0) { + EXPECT_TRUE(!s.insert(root, 1000 + i - 1, 42)); + } + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + } + EXPECT_TRUE(s.remove(root, 40)); + mock.erase(40); + EXPECT_TRUE(!s.remove(root, 40)); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.remove(root, 20)); + mock.erase(20); + EXPECT_TRUE(!s.remove(root, 20)); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.remove(root, 50)); + mock.erase(50); + EXPECT_TRUE(!s.remove(root, 50)); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.remove(root, 1000 + i)); + mock.erase(1000 + i); + if (i > 0) { + EXPECT_TRUE(!s.remove(root, 1000 + i - 1)); + } + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + + +int +Test::Main() +{ + TEST_INIT("btreeaggregation_test"); + + requireThatNodeInsertWorks(); + requireThatNodeSplitInsertWorks(); + requireThatNodeStealWorks(); + requireThatNodeRemoveWorks(); + requireThatWeCanInsertAndRemoveFromTree(); + requireThatSortedTreeInsertWorks(); + requireThatCornerCaseTreeFindWorks(); + requireThatBasicTreeIteratorWorks(); + requireThatTreeIteratorAssignWorks(); + requireThatUpdateOfKeyWorks(); + requireThatUpdateOfDataWorks(); + TEST_DO(requireThatSmallNodesWorks()); + TEST_DO(requireThatSmallNodesWorks()); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); diff --git a/searchlib/src/tests/btree/iteratespeed.cpp b/searchlib/src/tests/btree/iteratespeed.cpp new file mode 100644 index 00000000000..719dc28c036 --- /dev/null +++ b/searchlib/src/tests/btree/iteratespeed.cpp @@ -0,0 +1,213 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("iteratespeed"); +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace btree { + +enum class IterateMethod +{ + FORWARD, + BACKWARDS, + LAMBDA +}; + +class IterateSpeed : public FastOS_Application +{ + template + void + workLoop(int loops, bool enableForward, bool enableBackwards, + bool enableLambda, int leafSlots); + + void usage(); + + int + Main(void); +}; + + +namespace { + +const char *iterateMethodName(IterateMethod iterateMethod) +{ + switch (iterateMethod) { + case IterateMethod::FORWARD: + return "forward"; + case IterateMethod::BACKWARDS: + return "backwards"; + default: + return "lambda"; + } +} + +} + +template +void +IterateSpeed::workLoop(int loops, bool enableForward, bool enableBackwards, + bool enableLambda, int leafSlots) +{ + if ((iterateMethod == IterateMethod::FORWARD && !enableForward) || + (iterateMethod == IterateMethod::BACKWARDS && !enableBackwards) || + (iterateMethod == IterateMethod::LAMBDA && !enableLambda) || + (leafSlots != 0 && + leafSlots != static_cast(Traits::LEAF_SLOTS))) + return; + vespalib::GenerationHandler g; + using Tree = BTree, Traits>; + using Builder = typename Tree::Builder; + using ConstIterator = typename Tree::ConstIterator; + Tree tree; + Builder builder(tree.getAllocator()); + size_t numEntries = 1000000; + size_t numInnerLoops = 1000; + for (size_t i = 0; i < numEntries; ++i) { + builder.insert(i, 0); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + for (int l = 0; l < loops; ++l) { + fastos::TimeStamp before = fastos::ClockSystem::now(); + uint64_t sum = 0; + for (size_t innerl = 0; innerl < numInnerLoops; ++innerl) { + if (iterateMethod == IterateMethod::FORWARD) { + ConstIterator itr(BTreeNode::Ref(), tree.getAllocator()); + itr.begin(tree.getRoot()); + while (itr.valid()) { + sum += itr.getKey(); + ++itr; + } + } else if (iterateMethod == IterateMethod::BACKWARDS) { + ConstIterator itr(BTreeNode::Ref(), tree.getAllocator()); + itr.end(tree.getRoot()); + --itr; + while (itr.valid()) { + sum += itr.getKey(); + --itr; + } + } else { + tree.getAllocator().foreach_key(tree.getRoot(), + [&](int key) { sum += key; } ); + } + } + fastos::TimeStamp after = fastos::ClockSystem::now(); + double used = after.sec() - before.sec(); + printf("Elapsed time for iterating %ld steps is %8.5f, " + "direction=%s, fanout=%u,%u, sum=%" PRIu64 "\n", + numEntries * numInnerLoops, + used, + iterateMethodName(iterateMethod), + static_cast(Traits::LEAF_SLOTS), + static_cast(Traits::INTERNAL_SLOTS), + sum); + fflush(stdout); + } +} + + +void +IterateSpeed::usage() +{ + printf("iteratspeed " + "[-F ] " + "[-b] " + "[-c ] " + "[-f] " + "[-l]\n"); +} + +int +IterateSpeed::Main() +{ + int argi; + char c; + const char *optArg; + argi = 1; + int loops = 1; + bool backwards = false; + bool forwards = false; + bool lambda = false; + int leafSlots = 0; + while ((c = GetOpt("F:bc:fl", optArg, argi)) != -1) { + switch (c) { + case 'F': + leafSlots = atoi(optArg); + break; + case 'b': + backwards = true; + break; + case 'c': + loops = atoi(optArg); + break; + case 'f': + forwards = true; + break; + case 'l': + lambda = true; + break; + default: + usage(); + return 1; + } + } + if (!backwards && !forwards && !lambda) { + backwards = true; + forwards = true; + lambda = true; + } + + using SmallTraits = BTreeTraits<4, 4, 31, false>; + using DefTraits = BTreeDefaultTraits; + using LargeTraits = BTreeTraits<32, 16, 10, true>; + using HugeTraits = BTreeTraits<64, 16, 10, true>; + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + workLoop(loops, forwards, backwards, + lambda, leafSlots); + return 0; +} + +} +} + +FASTOS_MAIN(search::btree::IterateSpeed); + + diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore new file mode 100644 index 00000000000..afe9bff02f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/.gitignore @@ -0,0 +1,5 @@ +*.So +.depend* +Makefile +bytecomp_test +searchlib_bytecomp_test_app diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt new file mode 100644 index 00000000000..188c3fccbdf --- /dev/null +++ b/searchlib/src/tests/bytecomplens/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bytecomp_test_app + SOURCES + bytecomp.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app) diff --git a/searchlib/src/tests/bytecomplens/DESC b/searchlib/src/tests/bytecomplens/DESC new file mode 100644 index 00000000000..e40e528ddea --- /dev/null +++ b/searchlib/src/tests/bytecomplens/DESC @@ -0,0 +1 @@ +Test of search::ByteCompressedLengths class. Look at bytecomp.cpp for details. diff --git a/searchlib/src/tests/bytecomplens/FILES b/searchlib/src/tests/bytecomplens/FILES new file mode 100644 index 00000000000..c44e7f254f8 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/FILES @@ -0,0 +1 @@ +bytecomplens.cpp diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp new file mode 100644 index 00000000000..63aa2da15f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/bytecomp.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +LOG_SETUP("bytecomplens_test"); +#include +#include +#include + + +class Test : public vespalib::TestApp { +private: + void testRandomLengths(); + +public: + int Main() { + TEST_INIT("bytecomplens_test"); + testRandomLengths(); TEST_FLUSH(); + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + + +void +Test::testRandomLengths() +{ + vespalib::RandomGen rndgen(0x07031969); + +#define TBLSIZ 0xc00000 + + auto lentable = std::unique_ptr(new uint32_t[TBLSIZ]); + auto offtable = std::unique_ptr(new uint64_t[TBLSIZ]); + + uint64_t offset = 16; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + LOG(info, "made %d random offsets", TBLSIZ); + + search::ByteCompressedLengths foo; + + LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed()); + + foo.addOffsetTable(TBLSIZ/4, offtable.get()); + foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4)); + + LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed()); + + search::ByteCompressedLengths bar; + foo.swap(bar); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4)); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4)); + foo.swap(bar); + + LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed()); + + LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1); + + for (int i = 0; i < TBLSIZ-1; i++) { + search::ByteCompressedLengths::OffLen offlen; + offlen = foo.getOffLen(i); + + if ((i % 1000000) == 0) { + LOG(info, "data blob [%d] length %ld offset %ld", i, offlen.length, offlen.offset); + } + EXPECT_EQUAL(lentable[i], offlen.length); + EXPECT_EQUAL(offtable[i], offlen.offset); + } +} + diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt new file mode 100644 index 00000000000..6dc3df0118a --- /dev/null +++ b/searchlib/src/tests/bytecomplens/example.txt @@ -0,0 +1,122 @@ +offset length BCN val L0 len/off skipL1 skipL2 skipL3 + +976 18707 [ 93 92 01 ] 3/0 976/0/0/0 +19683 11527 [ 87 5A ] 2/3 +31210 3926 [ D6 1E ] 2/5 +35136 2 [ 02 ] 1/7 +35138 6060 [ AC 2F ] 2/8 34162/8 +41198 649445 [ E5 D1 27 ] 3/10 +690643 2866 [ B2 16 ] 2/13 +693509 824767 [ BF AB 32 ] 3/15 +1518276 499173 [ E5 BB 1E ] 3/18 1483138/10 +2017449 20455 [ E7 9F 01 ] 3/21 +2037904 11 [ 0B ] 1/24 +2037915 19207 [ 87 96 01 ] 3/25 +2057122 6355 [ D3 31 ] 2/28 538846/10 +2063477 3422 [ DE 1A ] 2/30 +2066899 10683 [ BB 53 ] 2/32 +2077582 7360 [ C0 39 ] 2/34 +2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12 +2102911 6114 [ E2 2F ] 2/39 +2109025 31741 [ FD F7 01 ] 3/41 +2140766 581588 [ D4 BF 23 ] 3/44 +2722354 5341 [ DD 29 ] 2/47 637412/11 +2727695 13774 [ CE 6B ] 2/49 +2741469 717809 [ F1 E7 2B ] 3/51 +3459278 815406 [ AE E2 31 ] 3/54 +4274684 89 [ 59 ] 1/57 1552330/10 +4274773 4545 [ C1 23 ] 2/58 +4279318 803868 [ 9C 88 31 ] 3/60 +5083186 12865 [ C1 64 ] 2/63 +5096051 75 [ 4B ] 1/65 821367/8 +5096126 40734 [ 9E BE 02 ] 3/66 +5136860 101 [ 65 ] 1/69 +5136961 128 [ 80 01 ] 2/70 +5137089 253 [ FD 01 ] 2/72 3052147/36/12 +5137342 13 [ 0D ] 1/74 +5137355 24986 [ 9A C3 01 ] 3/75 +5162341 231 [ E7 01 ] 2/78 +5162572 997853 [ DD F3 3C ] 3/80 25483/8 +6160425 4728 [ F8 24 ] 2/83 +6165153 2025 [ E9 0F ] 2/85 +6167178 7281 [ F1 38 ] 2/87 +6174459 1026302 [ FE D1 3E ] 3/89 1011887/9 +7200761 848783 [ 8F E7 33 ] 3/92 +8049544 145767 [ E7 F2 08 ] 3/95 +8195311 19103 [ 9F 95 01 ] 3/98 +8214414 22166 [ 96 AD 01 ] 3/101 2039955/12 +8236580 30020 [ C4 EA 01 ] 3/104 +8266600 13 [ 0D ] 1/107 +8266613 120 [ 78 ] 1/108 +8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12 +8289131 10832 [ D0 54 ] 2/112 +8299963 3765 [ B5 1D ] 2/114 +8303728 432771 [ 83 B5 1A ] 3/116 +8736499 30133 [ B5 EB 01 ] 3/119 469766/10 +8766632 6444 [ AC 32 ] 2/122 +8773076 16033 [ A1 7D ] 2/124 +8789109 78 [ 4E ] 1/126 +8789187 12510 [ DE 61 ] 2/127 52688/8 +8801697 12441 [ 99 61 ] 2/129 +8814138 117 [ 75 ] 1/131 +8814255 7147 [ EB 37 ] 2/132 +8821402 189 [ BD 01 ] 2/134 32215/7 +8821591 199704 [ 98 98 0C ] 3/136 +9021295 13240 [ B8 67 ] 2/139 +9034535 110 [ 6E ] 1/141 +9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17 +9066322 18547 [ F3 90 01 ] 3/145 +9084869 734679 [ D7 EB 2C ] 3/148 +9819548 112 [ 70 ] 1/151 +9819660 883565 [ ED F6 35 ] 3/152 785015/10 +10703225 10290 [ B2 50 ] 2/155 +10713515 21410 [ A2 A7 01 ] 3/157 +10734925 15 [ 0F ] 1/160 +10734940 747774 [ FE D1 2D ] 3/161 915280/9 +11482714 39 [ 27 ] 1/164 +11482753 77 [ 4D ] 1/165 +11482830 235 [ EB 01 ] 2/166 +11483065 1991 [ C7 0F ] 2/168 748125/7 +11485056 9187 [ E3 47 ] 2/170 +11494243 18800 [ F0 92 01 ] 3/172 +11513043 1042219 [ AB CE 3F ] 3/175 +12555262 9154 [ C2 47 ] 2/178 3520617/36/12 +12564416 43582 [ BE D4 02 ] 3/180 +12607998 847240 [ 88 DB 33 ] 3/183 +13455238 4726 [ F6 24 ] 2/186 +13459964 590348 [ 8C 84 24 ] 3/188 904702/10 +14050312 8659 [ D3 43 ] 2/191 +14058971 116 [ 74 ] 1/193 +14059087 13563 [ FB 69 ] 2/194 +14072650 713064 [ E8 C2 2B ] 3/196 612686/8 +14785714 40321 [ 81 BB 02 ] 3/199 +14826035 2296 [ F8 11 ] 2/202 +14828331 7273 [ E9 38 ] 2/204 +14835604 68285 [ BD 95 04 ] 3/206 762954/10 +14903889 235 [ EB 01 ] 2/209 +14904124 4669 [ BD 24 ] 2/211 +14908793 28535 [ F7 DE 01 ] 3/213 +14937328 19 [ 13 ] 1/216 2382066/38/12 +14937347 5369 [ F9 29 ] 2/217 +14942716 602191 [ CF E0 24 ] 3/219 +15544907 2653 [ DD 14 ] 2/222 +15547560 25755 [ 9B C9 01 ] 3/224 610232/8 +15573315 11349 [ D5 58 ] 2/227 +15584664 15006 [ 9E 75 ] 2/229 +15599670 89 [ 59 ] 1/231 +15599759 52772 [ A4 9C 03 ] 3/232 52199/8 +15652531 776175 [ EF AF 2F ] 3/235 +16428706 126 [ 7E ] 1/238 +16428832 3884 [ AC 1E ] 2/239 +16432716 33958 [ A6 89 02 ] 3/241 832957/9 +16466674 122 [ 7A ] 1/244 +16466796 41895 [ A7 C7 02 ] 3/245 +16508691 105882 [ 9A BB 06 ] 3/248 +16614573 11067 [ BB 56 ] 2/251 1677245/35/12 +16625640 4588 [ EC 23 ] 2/253 +16630228 7349 [ B5 39 ] 2/255 +16637577 902638 [ EE 8B 37 ] 3/257 +17540215 8737 [ A1 44 ] 2/260 925642/9 +17548952 29186 [ 82 E4 01 ] 3/262 +17578138 41 [ 29 ] 1/265 +17578179 diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp new file mode 100644 index 00000000000..93657d82178 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/tblprint.cpp @@ -0,0 +1,357 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("tblprint"); +#include + +#include +#include + + +/** + * Class compressing a table of offsets in memory. + * After adding (n) offsets you can access + * (n-1) pairs of (length, offset). + * All offsets must be increasing, but they + * may be added in several chunks. + **/ +class ByteCompressedLengths +{ +public: + /** + * Construct an empty instance + **/ + ByteCompressedLengths(); + + /** + * add the given offset table. + * @param entries number of offsets to store. + * @param offsets table that contains (entries) offsets. + **/ + void addOffsetTable(uint64_t entries, uint64_t *offsets); + + /** + * free resources + **/ + ~ByteCompressedLengths(); + + /** + * Fetch a length and offset from compressed data. + * Note invariant: id < size(); size() == (entries-1) + * + * @param id The index into the offset table + * @param offset Will be incremented by offset[id] + * @return The delta (offset[id+1] - offset[id]) + **/ + uint64_t getLength(uint64_t id, uint64_t &offset) const; + + /** + * The number of (length, offset) pairs stored + **/ + uint64_t size() const { return _entries; } + + struct L3Entry { + uint64_t offset; + uint64_t l0toff; + uint64_t l1toff; + uint64_t l2toff; + }; + vespalib::DataBuffer _l0space; + vespalib::DataBuffer _l1space; + vespalib::DataBuffer _l2space; + const uint8_t *_l0table; + const uint8_t *_l1table; + const uint8_t *_l2table; + + std::vector _l3table; + + uint64_t _lenSum1; + uint64_t _lenSum2; + uint64_t _l0oSum1; + uint64_t _l0oSum2; + uint64_t _l1oSum2; + uint64_t _last_offset; + uint64_t _entries; + + void addOffset(uint64_t offset); +}; + +/** + * get "Byte Compressed Number" from buffer, incrementing pointer + **/ +static inline uint64_t getBCN(const uint8_t *&buffer) +{ + uint8_t b = *buffer++; + uint64_t len = (b & 127); + unsigned shiftLen = 0; + while (b & 128) { + shiftLen += 7; + b = *buffer++; + len |= ((b & 127) << shiftLen); + } + return len; +} + +static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) +{ + size_t bytes = 0; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + buf.ensureFree(1); + buf.writeInt8(b); + ++bytes; + } while (len > 0); + return bytes; +} + + +ByteCompressedLengths::ByteCompressedLengths() + : _l0space(), + _l1space(), + _l2space(), + _l3table(), + _lenSum1(0), + _lenSum2(0), + _l0oSum1(0), + _l0oSum2(0), + _l1oSum2(0), + _last_offset(0), + _entries(0) +{ +} + + +void +ByteCompressedLengths::addOffset(uint64_t offset) +{ + assert(offset >= _last_offset); + + uint64_t len = offset - _last_offset; + uint64_t i = _entries++; + + if ((i & 3) == 0) { + _lenSum2 += _lenSum1; + _l0oSum2 += _l0oSum1; + + uint64_t t1n = i >> 2; + if ((t1n & 3) == 0) { + uint64_t t2n = t1n >> 2; + + if ((t2n & 3) == 0) { + L3Entry e; + e.offset = _last_offset; + e.l0toff = _l0space.getDataLen(); + e.l1toff = _l1space.getDataLen(); + e.l2toff = _l2space.getDataLen(); + + _l3table.push_back(e); + } else { + writeLen(_l2space, _lenSum2); + writeLen(_l2space, _l0oSum2); + writeLen(_l2space, _l1oSum2); + } + _lenSum2 = 0; + _l0oSum2 = 0; + _l1oSum2 = 0; + } else { + _l1oSum2 += writeLen(_l1space, _lenSum1); + _l1oSum2 += writeLen(_l1space, _l0oSum1); + } + _lenSum1 = 0; + _l0oSum1 = 0; + } + _l0oSum1 += writeLen(_l0space, len); + _lenSum1 += len; + _last_offset = offset; +} + + +void +ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) +{ + if (entries == 0) return; + // Do we have some offsets already? + if (_entries > 0) { + // yes, add first offset normally + addOffset(offsets[0]); + } else { + // no, special treatment for very first offset + _last_offset = offsets[0]; + } + for (uint64_t cnt = 1; cnt < entries; ++cnt) { + addOffset(offsets[cnt]); + } + _l0table = (uint8_t *)_l0space.getData(); + _l1table = (uint8_t *)_l1space.getData(); + _l2table = (uint8_t *)_l2space.getData(); + + LOG(debug, "compressed %ld offsets", (_entries+1)); + LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t)); + LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", + _l0space.getDataLen(), + _l1space.getDataLen(), + _l2space.getDataLen(), + _l3table.size()); + LOG(debug, "(%ld bytes)", + (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + + _l3table.size()*sizeof(L3Entry))); +} + + +ByteCompressedLengths::~ByteCompressedLengths() +{ +} + +uint64_t +ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const +{ + assert(numSkip < _entries); + + unsigned skipL0 = numSkip & 3; + unsigned skipL1 = (numSkip >> 2) & 3; + unsigned skipL2 = (numSkip >> 4) & 3; + uint64_t skipL3 = (numSkip >> 6); + + offset += _l3table[skipL3].offset; + uint64_t l0toff = _l3table[skipL3].l0toff; + uint64_t l1toff = _l3table[skipL3].l1toff; + uint64_t l2toff = _l3table[skipL3].l2toff; + + // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + + const uint8_t *l2pos = _l2table + l2toff; + + while (skipL2 > 0) { + --skipL2; + offset += getBCN(l2pos); + l0toff += getBCN(l2pos); + l1toff += getBCN(l2pos); + } + + const uint8_t *l1pos = _l1table + l1toff; + + while (skipL1 > 0) { + --skipL1; + offset += getBCN(l1pos); + l0toff += getBCN(l1pos); + + } + const uint8_t *l0pos = _l0table + l0toff; + + while (skipL0 > 0) { + --skipL0; + offset += getBCN(l0pos); + } + // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + return getBCN(l0pos); +} + + + +class Test { +public: + static void printTable(); +}; + + + +int main(int /*argc*/, char ** /*argv*/) +{ + Test::printTable(); + return 0; +} + +void +Test::printTable() +{ + vespalib::RandomGen rndgen(0x07031969); +#define TBLSIZ 120 + uint32_t *lentable = new uint32_t[TBLSIZ]; + uint64_t *offtable = new uint64_t[TBLSIZ]; + + uint64_t offset = 16 + TBLSIZ*8; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + ByteCompressedLengths foo; + foo.addOffsetTable(TBLSIZ, offtable); + + const uint8_t *l1pos = foo._l1table; + const uint8_t *l2pos = foo._l2table; + + printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3"); + + int slb = 0; + for (int i = 0; i+1 < TBLSIZ; i++) { + printf("%ld\t%d\t[", offtable[i], lentable[i]); + int bytes=0; + uint64_t len = lentable[i]; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + printf(" %02X", b); + ++bytes; + } while (len > 0); + printf(" ]\t%d", bytes); + printf("/%d", slb); + slb += bytes; + + if ((i & 63) == 0) { + printf("\t\t\t%ld/%ld/%ld/%ld", + foo._l3table[i >> 6].offset, + foo._l3table[i >> 6].l0toff, + foo._l3table[i >> 6].l1toff, + foo._l3table[i >> 6].l2toff); + } else + if ((i & 15) == 0) { + printf("\t\t%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + } else + if ((i & 3) == 0) { + printf("\t%ld", getBCN(l1pos)); + printf("/%ld", getBCN(l1pos)); + } + printf("\n"); + } + printf("%ld\n", offtable[TBLSIZ-1]); + fflush(stdout); +} diff --git a/searchlib/src/tests/common/bitvector/.gitignore b/searchlib/src/tests/common/bitvector/.gitignore new file mode 100644 index 00000000000..bdc2879ea74 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +*_test +*_benchmark +/bitvector_test-common +searchlib_condensedbitvector_test_app +searchlib_bitvector_benchmark_app +searchlib_bitvector_test-common_app diff --git a/searchlib/src/tests/common/bitvector/CMakeLists.txt b/searchlib/src/tests/common/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..ce49872319a --- /dev/null +++ b/searchlib/src/tests/common/bitvector/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test-common_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_test-common_app COMMAND searchlib_bitvector_test-common_app) +vespa_add_executable(searchlib_bitvector_benchmark_app + SOURCES + bitvector_benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_benchmark_app COMMAND searchlib_bitvector_benchmark_app BENCHMARK) +vespa_add_executable(searchlib_condensedbitvector_test_app + SOURCES + condensedbitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_condensedbitvector_test_app COMMAND searchlib_condensedbitvector_test_app) diff --git a/searchlib/src/tests/common/bitvector/DESC b/searchlib/src/tests/common/bitvector/DESC new file mode 100644 index 00000000000..313f0f89f2a --- /dev/null +++ b/searchlib/src/tests/common/bitvector/DESC @@ -0,0 +1 @@ +bitvector test. Take a look at bitvector_test.cpp for details. diff --git a/searchlib/src/tests/common/bitvector/FILES b/searchlib/src/tests/common/bitvector/FILES new file mode 100644 index 00000000000..a2583d74519 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/FILES @@ -0,0 +1 @@ +bitvector_test.cpp diff --git a/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp new file mode 100644 index 00000000000..cc0ef78c193 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("bitvector_benchmark"); +#include +#include + +using namespace search; + +namespace { + +size_t scan(BitVector & bv) __attribute__((noinline)); + +size_t scan(BitVector & bv) +{ + size_t count(0); + for (BitVector::Index i(bv.getFirstTrueBit()), m(bv.size()); i < m; i = bv.getNextTrueBit(i+1)) { + count++; + } + return count; +} + +} + +// This test is 10% faster with table lookup than with runtime shifting. +TEST("speed of getNextTrueBit") +{ + BitVector::UP bv(BitVector::create(100000000)); + bv->setInterval(0, bv->size() - 1); + + for (size_t i(0); i < 10; i++) { + EXPECT_EQUAL(bv->size(), scan(*bv)); + } + EXPECT_EQUAL(bv->size(), bv->countTrueBits()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..11c43166ef5 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -0,0 +1,541 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("bitvector_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; + +namespace { + +std::string +toString(const BitVector & bv) +{ + std::stringstream ss; + ss << "["; + bool first = true; + uint32_t nextBit = bv.getStartIndex(); + for (;;) { + nextBit = bv.getNextTrueBit(nextBit); + if (nextBit >= bv.size()) { + break; + } + if (!first) { + ss << ","; + } + ss << nextBit++; + first = false; + } + ss << "]"; + return ss.str(); +} + + +std::string +toString(BitVectorIterator &b) +{ + std::stringstream ss; + ss << "["; + bool first = true; + b.initFullRange(); + for (uint32_t docId = 1; ! b.isAtEnd(docId); ) { + if (!b.seek(docId)) { + docId = std::max(docId + 1, b.getDocId()); + if (b.isAtEnd(docId)) + break; + continue; + } + if (!first) { + ss << ","; + } + b.unpack(docId); + ss << docId++; + first = false; + } + ss << "]"; + return ss.str(); +} + + + +uint32_t +myCountInterval(const BitVector &bv, uint32_t low, uint32_t high) +{ + uint32_t res = 0u; + if (bv.size() == 0u) + return 0u; + if (high >= bv.size()) + high = bv.size() - 1; + for (; low <= high; ++low) { + if (bv.testBit(low)) + ++res; + } + return res; +} + +void +scan(uint32_t count, uint32_t offset, uint32_t size, Rand48 &rnd) +{ + std::vector lids; + lids.reserve(count); + uint32_t end = size + offset; + for (uint32_t i = 0; i < count; ++i) { + uint32_t lid = offset + (rnd.lrand48() % (size - 1)) + 1; + lids.push_back(lid); + } + std::sort(lids.begin(), lids.end()); + lids.resize(std::unique(lids.begin(), lids.end()) - lids.begin()); + BitVector::UP bv(BitVector::create(offset, end)); + for (auto lid : lids) { + bv->setBit(lid); + } + EXPECT_EQUAL(bv->getFirstTrueBit(), bv->getNextTrueBit(bv->getStartIndex())); + uint32_t prevLid = bv->getStartIndex(); + for (auto lid : lids) { + EXPECT_EQUAL(lid, bv->getNextTrueBit(prevLid + 1)); + EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(lid - 1)); + prevLid = lid; + } + EXPECT_TRUE(bv->getNextTrueBit(prevLid + 1) >= end); + EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(end - 1)); +} + +void +scanWithOffset(uint32_t offset) +{ + Rand48 rnd; + + rnd.srand48(32); + scan(10, offset, 1000000, rnd); + scan(100, offset, 1000000, rnd); + scan(1000, offset, 1000000, rnd); + scan(10000, offset, 1000000, rnd); + scan(100000, offset, 1000000, rnd); + scan(500000, offset, 1000000, rnd); + scan(1000000, offset, 1000000, rnd); +} + +} + +bool +assertBV(const std::string & exp, const BitVector & act) +{ + bool res1 = EXPECT_EQUAL(exp, toString(act)); + search::fef::TermFieldMatchData f; + search::fef::TermFieldMatchDataArray a; + a.add(&f); + queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, a, true)); + BitVectorIterator & b(dynamic_cast(*it)); + bool res2 = EXPECT_EQUAL(exp, toString(b)); + return res1 && res2; +} + +void +fill(BitVector & bv, const std::vector & bits, uint32_t offset) +{ + for (uint32_t bit : bits) { + bv.setBit(bit + offset); + } +} + +vespalib::string +fill(const std::vector & bits, uint32_t offset) +{ + vespalib::asciistream os; + os << "["; + size_t count(0); + for (uint32_t bit : bits) { + count++; + os << bit + offset; + if (count != bits.size()) { os << ","; } + } + os << "]"; + return os.str(); +} + +std::vector A = {7, 39, 71, 103}; +std::vector B = {15, 39, 71, 100}; + +void +testAnd(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->andWith(*v2); + EXPECT_TRUE(assertBV(fill({39,71}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); +} + +void +testOr(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->orWith(*v2); + EXPECT_TRUE(assertBV(fill({7,15,39,71,100,103}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); +} + +void +testAndNot(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->andNotWith(*v2); + EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + v3->clear(); + fill(*v3, A, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + + + std::vector rh; + rh.emplace_back(15u+offset, 0.0); + rh.emplace_back(39u+offset, 0.0); + rh.emplace_back(71u+offset, 0.0); + rh.emplace_back(100u+offset, 0.0); + + v3->andNotWithT(RankedHitIterator(&rh[0], 4)); + EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3)); +} + +TEST("requireThatSequentialOperationsOnPartialWorks") +{ + PartialBitVector p1(717,919); + + EXPECT_FALSE(p1.hasTrueBits()); + EXPECT_EQUAL(0u, p1.countTrueBits()); + p1.setBit(719); + EXPECT_EQUAL(0u, p1.countTrueBits()); + p1.invalidateCachedCount(); + EXPECT_TRUE(p1.hasTrueBits()); + EXPECT_EQUAL(1u, p1.countTrueBits()); + p1.slowSetBit(718); + p1.slowSetBit(739); + p1.slowSetBit(871); + p1.slowSetBit(903); + EXPECT_EQUAL(5u, p1.countTrueBits()); + EXPECT_TRUE(assertBV("[718,719,739,871,903]", p1)); + + PartialBitVector p2(717,919); + EXPECT_FALSE(p1 == p2); + p2.slowSetBit(719); + p2.slowSetBit(718); + p2.slowSetBit(739); + p2.slowSetBit(871); + EXPECT_FALSE(p1 == p2); + p2.slowSetBit(903); + EXPECT_TRUE(p1 == p2); + + AllocatedBitVector full(1000); + full.setInterval(0, 1000); + EXPECT_EQUAL(5u, p2.countTrueBits()); + p2.orWith(full); + EXPECT_EQUAL(202u, p2.countTrueBits()); +} + +TEST("requireThatInitRangeStaysWithinBounds") { + AllocatedBitVector v1(128); + search::fef::TermFieldMatchData f; + search::fef::TermFieldMatchDataArray a; + a.add(&f); + queryeval::SearchIterator::UP it(BitVectorIterator::create(&v1, a, true)); + it->initRange(700, 800); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST("requireThatAndWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testAnd(offset); + } +} + +TEST("requireThatOrWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testOr(offset); + } +} + + +TEST("requireThatAndNotWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testAndNot(offset); + } +} + +TEST("requireThatClearWorks") +{ + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_TRUE(assertBV("[7,39,71,103]", v1)); + + v1.clear(); + EXPECT_TRUE(assertBV("[]", v1)); +} + +TEST("requireThatForEachWorks") { + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_EQUAL(128u, v1.size()); + + size_t sum(0); + v1.foreach_truebit([&](uint32_t key) { sum += key; }); + EXPECT_EQUAL(220u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7); + EXPECT_EQUAL(220u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 6, 7); + EXPECT_EQUAL(0u, sum); + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7, 8); + EXPECT_EQUAL(7u, sum); + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8, 9); + EXPECT_EQUAL(0u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8); + EXPECT_EQUAL(213u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 6); + EXPECT_EQUAL(5u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 7); + EXPECT_EQUAL(11u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 8); + EXPECT_EQUAL(11u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 9); + EXPECT_EQUAL(19u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 6); + EXPECT_EQUAL(size_t((((6+127)*(127-6 + 1)) >> 1) - 220), sum); +} + + +TEST("requireThatSetWorks") +{ + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_TRUE(assertBV("[7,39,71,103]", v1)); + v1.invalidateCachedCount(); + EXPECT_EQUAL(4u, v1.countTrueBits()); + + v1.setBit(80); + EXPECT_EQUAL(4u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1)); + + v1.clearBit(35); + EXPECT_EQUAL(5u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1)); + v1.clearBit(71); + EXPECT_EQUAL(5u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(4u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); + + v1.slowSetBit(39); + EXPECT_EQUAL(4u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); + v1.slowSetBit(57); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,57,80,103]", v1)); +} + + +TEST("requireThatClearIntervalWorks") +{ + AllocatedBitVector v1(1200); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + v1.setBit(200); + v1.setBit(500); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + + v1.clearInterval(40, 70); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + v1.clearInterval(39, 71); + EXPECT_TRUE(assertBV("[7,71,103,200,500]", v1)); + v1.clearInterval(39, 72); + EXPECT_TRUE(assertBV("[7,103,200,500]", v1)); + v1.clearInterval(20, 501); + EXPECT_TRUE(assertBV("[7]", v1)); +} + + +TEST("requireThatSetIntervalWorks") +{ + AllocatedBitVector v1(1200); + + EXPECT_FALSE(v1.hasTrueBits()); + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + v1.setBit(200); + v1.setBit(500); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + + v1.setInterval(40, 46); + EXPECT_TRUE(assertBV("[7,39,40,41,42,43,44,45,71,103,200,500]", v1)); + EXPECT_TRUE(v1.hasTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(12u, v1.countTrueBits()); + EXPECT_EQUAL(12u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(12u, myCountInterval(v1, 1, 1199)); + + v1.setInterval(40, 200); + EXPECT_EQUAL(164u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(164u, myCountInterval(v1, 1, 1199)); + EXPECT_EQUAL(163u, v1.countInterval(1, 201)); + EXPECT_EQUAL(162u, v1.countInterval(1, 200)); + EXPECT_EQUAL(163u, v1.countInterval(7, 201)); + EXPECT_EQUAL(162u, v1.countInterval(8, 201)); + EXPECT_EQUAL(161u, v1.countInterval(8, 200)); + v1.clearInterval(72, 174); + EXPECT_EQUAL(62u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(62u, myCountInterval(v1, 1, 1199)); + EXPECT_EQUAL(61u, v1.countInterval(1, 201)); + EXPECT_EQUAL(60u, v1.countInterval(1, 200)); + EXPECT_EQUAL(61u, v1.countInterval(7, 201)); + EXPECT_EQUAL(60u, v1.countInterval(8, 201)); + EXPECT_EQUAL(59u, v1.countInterval(8, 200)); + EXPECT_EQUAL(51u, v1.countInterval(8, 192)); + EXPECT_EQUAL(50u, v1.countInterval(8, 191)); + + EXPECT_EQUAL(1u, v1.countInterval(1, 20)); + EXPECT_EQUAL(1u, v1.countInterval(7, 20)); + EXPECT_EQUAL(0u, v1.countInterval(8, 20)); + EXPECT_EQUAL(1u, v1.countInterval(1, 8)); + EXPECT_EQUAL(0u, v1.countInterval(1, 7)); +} + +TEST("requireThatScanWorks") +{ + scanWithOffset(0); + scanWithOffset(19876); +} + +TEST("requireThatGrowWorks") +{ + vespalib::GenerationHolder g; + GrowableBitVector v(200, 200, g); + + v.setBit(7); + v.setBit(39); + v.setBit(71); + v.setBit(103); + + EXPECT_EQUAL(200u, v.size()); + v.invalidateCachedCount(); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.reserve(204); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.extend(202); + EXPECT_EQUAL(202u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.shrink(200); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.reserve(204); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.shrink(202); + EXPECT_EQUAL(202u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + + v.shrink(100); + EXPECT_EQUAL(100u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71]", v)); + EXPECT_EQUAL(3u, v.countTrueBits()); + g.transferHoldLists(1); + g.trimHoldLists(2); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp new file mode 100644 index 00000000000..eddd3941c35 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +LOG_SETUP("condensedbitvector_test"); + +using search::CondensedBitVector; +using vespalib::GenerationHolder; + +TEST("Verify state after init") +{ + GenerationHolder genHolder; + CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder)); + EXPECT_EQUAL(32u, cbv->getKeyCapacity()); + EXPECT_EQUAL(8u, cbv->getCapacity()); + EXPECT_EQUAL(8u, cbv->getSize()); +} + + +TEST("Verify set/get") +{ + GenerationHolder genHolder; + CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder)); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + EXPECT_FALSE(cbv->get(i,j)); + } + } + cbv->set(23,5, false); + EXPECT_FALSE(cbv->get(23, 5)); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + EXPECT_FALSE(cbv->get(i,j)); + } + } + cbv->set(23,5, true); + EXPECT_TRUE(cbv->get(23, 5)); + size_t sum(0); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + sum += cbv->get(i,j) ? 1 : 0; + } + } + EXPECT_EQUAL(1u, sum); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore new file mode 100644 index 00000000000..0bd7759156b --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore @@ -0,0 +1 @@ +searchlib_foregroundtaskexecutor_test_app diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt new file mode 100644 index 00000000000..dd0e5c0b039 --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_foregroundtaskexecutor_test_app + SOURCES + foregroundtaskexecutor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_foregroundtaskexecutor_test_app COMMAND searchlib_foregroundtaskexecutor_test_app) diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/DESC b/searchlib/src/tests/common/foregroundtaskexecutor/DESC new file mode 100644 index 00000000000..bfa0dfa3e6a --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/DESC @@ -0,0 +1 @@ +foregroundtaskexecutor test. Take a look at foregroundtaskexecutor_test.cpp for details. diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/FILES b/searchlib/src/tests/common/foregroundtaskexecutor/FILES new file mode 100644 index 00000000000..5c0c9178abd --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/FILES @@ -0,0 +1 @@ +foregroundtaskexecutor_test.cpp diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp new file mode 100644 index 00000000000..49ebbf12bc0 --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("foregroundtaskexecutor_test"); +#include +#include + +#include +#include + +namespace search +{ + +namespace common +{ + + +class Fixture +{ +public: + ForegroundTaskExecutor _threads; + + Fixture() + : _threads() + { + } +}; + + +class TestObj +{ +public: + std::mutex _m; + std::condition_variable _cv; + int _done; + int _fail; + int _val; + + TestObj() + : _m(), + _cv(), + _done(0), + _fail(0), + _val(0) + { + } + + void + modify(int oldValue, int newValue) + { + { + std::lock_guard guard(_m); + if (_val == oldValue) { + _val = newValue; + } else { + ++_fail; + } + ++_done; + } + _cv.notify_all(); + } + + void + wait(int wantDone) + { + std::unique_lock guard(_m); + _cv.wait(guard, [=] { return this->_done >= wantDone; }); + } +}; + +TEST_F("testExecute", Fixture) { + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(1, [=]() { tv->modify(0, 42); }); + tv->wait(1); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + + +TEST_F("require that task with same id are serialized", Fixture) +{ + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(0, [=]() { tv->modify(14, 42); }); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different ids are serialized", Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(1, [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt >= 100); +} + + +} // namespace common +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/location/.gitignore b/searchlib/src/tests/common/location/.gitignore new file mode 100644 index 00000000000..ec9acbe771e --- /dev/null +++ b/searchlib/src/tests/common/location/.gitignore @@ -0,0 +1 @@ +searchlib_location_test_app diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt new file mode 100644 index 00000000000..3617657cdf9 --- /dev/null +++ b/searchlib/src/tests/common/location/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_location_test_app + SOURCES + location_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app) diff --git a/searchlib/src/tests/common/location/FILES b/searchlib/src/tests/common/location/FILES new file mode 100644 index 00000000000..7bd6fa8b581 --- /dev/null +++ b/searchlib/src/tests/common/location/FILES @@ -0,0 +1 @@ +location_test.cpp diff --git a/searchlib/src/tests/common/location/location_test.cpp b/searchlib/src/tests/common/location/location_test.cpp new file mode 100644 index 00000000000..1cbe24ec225 --- /dev/null +++ b/searchlib/src/tests/common/location/location_test.cpp @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +using search::common::Location; + +bool is_parseable(const char *str) { + Location loc; + return loc.parse(str); +} + +Location parse(const char *str) { + Location loc; + if (!EXPECT_TRUE(loc.parse(str))) { + fprintf(stderr, " parse error: %s\n", loc.getParseError()); + } + return loc; +} + +TEST("require that malformed bounding boxes are not parseable") { + EXPECT_TRUE(is_parseable("[2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[1,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[3,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40")); + EXPECT_FALSE(is_parseable("[2,10,20,30]")); + EXPECT_FALSE(is_parseable("[10,20,30,40]")); +} + +TEST("require that malformed circles are not parseable") { + EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000")); + EXPECT_FALSE(is_parseable("(10,20,5)")); +} + +TEST("require that bounding boxes can be parsed") { + Location loc = parse("[2,10,20,30,40]"); + EXPECT_EQUAL(false, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(0, loc.getX()); + EXPECT_EQUAL(0, loc.getY()); + EXPECT_EQUAL(std::numeric_limits::max(), loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(30, loc.getMaxX()); + EXPECT_EQUAL(40, loc.getMaxY()); +} + +TEST("require that circles can be parsed") { + Location loc = parse("(2,10,20,5,0,0,0)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(5, loc.getMinX()); + EXPECT_EQUAL(15, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that circles can have aspect ratio") { + Location loc = parse("(2,10,20,5,0,0,0,2147483648)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(2147483648u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(-1, loc.getMinX()); + EXPECT_EQUAL(15, loc.getMinY()); + EXPECT_EQUAL(21, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that bounding box can be specified after circle") { + Location loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that circles can be specified after bounding box") { + Location loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that santa search gives non-wrapped bounding box") { + Location loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)"); + EXPECT_GREATER_EQUAL(loc.getMaxX(), loc.getMinX()); + EXPECT_GREATER_EQUAL(loc.getMaxY(), loc.getMinY()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/packets/.gitignore b/searchlib/src/tests/common/packets/.gitignore new file mode 100644 index 00000000000..e3dcf5376d5 --- /dev/null +++ b/searchlib/src/tests/common/packets/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +packets_test +searchlib_packets_test_app diff --git a/searchlib/src/tests/common/packets/CMakeLists.txt b/searchlib/src/tests/common/packets/CMakeLists.txt new file mode 100644 index 00000000000..e35883b1d8c --- /dev/null +++ b/searchlib/src/tests/common/packets/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_packets_test_app + SOURCES + packets_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_packets_test_app COMMAND searchlib_packets_test_app) diff --git a/searchlib/src/tests/common/packets/DESC b/searchlib/src/tests/common/packets/DESC new file mode 100644 index 00000000000..0808703b5fb --- /dev/null +++ b/searchlib/src/tests/common/packets/DESC @@ -0,0 +1 @@ +packets test. Take a look at packets.cpp for details. diff --git a/searchlib/src/tests/common/packets/FILES b/searchlib/src/tests/common/packets/FILES new file mode 100644 index 00000000000..35191f9a36d --- /dev/null +++ b/searchlib/src/tests/common/packets/FILES @@ -0,0 +1 @@ +packets.cpp diff --git a/searchlib/src/tests/common/packets/packets_test.cpp b/searchlib/src/tests/common/packets/packets_test.cpp new file mode 100644 index 00000000000..443436537e1 --- /dev/null +++ b/searchlib/src/tests/common/packets/packets_test.cpp @@ -0,0 +1,705 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("packets_test"); + +#include +#include +#include +#include +#include + +using namespace search::fs4transport; + +// ---------------------------------------------------------------------------- +// +// Utilities +// +// ---------------------------------------------------------------------------- + +#define QRF_RANKTYPE QRF_RANKTYPE_DOUBLE + +#define PCODE_BEGIN PCODE_EOL +#define PCODE_END PCODE_LastCode + +class MyPersistentPacketStreamer : public FS4PersistentPacketStreamer { +public: + MyPersistentPacketStreamer() : + FS4PersistentPacketStreamer(FS4PacketFactory::CreateFS4Packet) { + // empty + } + + uint32_t getChannelId(uint32_t pcode, uint32_t chid) { + return HasChannelID(pcode) ? chid : -1u; + } +}; + +FNET_Packet * +testEncodeDecode(FS4PersistentPacketStreamer &streamer, FNET_Packet &packet) +{ + FNET_Context ctx; + FNET_DataBuffer buf; + buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the front. + streamer.Encode(&packet, 1u, &buf); + buf.DataToDead(sizeof(uint32_t)); + + FNET_DataBuffer lhs; + lhs.WriteBytes(buf.GetData(), buf.GetDataLen()); + + buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the end. + + bool broken; + uint32_t plen, pcode, chid; + MyPersistentPacketStreamer myStreamer; + EXPECT_TRUE(streamer.GetPacketInfo(&buf, &plen, &pcode, &chid, &broken)); + if ((pcode & ~PCODE_MASK) == 0) { + EXPECT_EQUAL(packet.GetLength(), plen); + } + EXPECT_EQUAL(packet.GetPCODE() & PCODE_MASK, pcode & PCODE_MASK); + EXPECT_EQUAL(myStreamer.getChannelId(pcode, 1u), chid); + + FNET_Packet *ret = streamer.Decode(&buf, plen, pcode, ctx); + ASSERT_TRUE(ret); + if (ret->GetPCODE() == (pcode & PCODE_MASK)) { + FNET_DataBuffer rhs; + streamer.Encode(ret, 1u, &rhs); + if (!EXPECT_TRUE(lhs.Equals(&rhs))) { + lhs.HexDump(); + rhs.HexDump(); + } + } else { + // Packet was transcoded. + } + return ret; +} + +FNET_Packet * +testEncodeDecode(FNET_Packet &packet) +{ + return testEncodeDecode(FS4PersistentPacketStreamer::Instance, packet); +} + +void fillProperties(FS4Properties &props, const std::string &name, + uint32_t len) { + props.setName(name); + props.allocEntries(len); + for (uint32_t i = 0; i < len; ++i) { + std::string key = vespalib::make_string("key%d", i); + props.setKey(i, key); + + std::string val = vespalib::make_string("val%d", i); + props.setValue(i, val); + } +} + +void testProperties(FS4Properties &props, const std::string &name, + uint32_t len) { + EXPECT_EQUAL(name, props.getName()); + EXPECT_EQUAL(name.size(), props.getNameLen()); + for (uint32_t i = 0; i < len; ++i) { + std::string key = vespalib::make_string("key%d", i); + EXPECT_EQUAL(key, std::string(props.getKey(i), props.getKeyLen(i))); + + std::string val = vespalib::make_string("val%d", i); + EXPECT_EQUAL(val, + std::string(props.getValue(i), props.getValueLen(i))); + } +} + + +// ---------------------------------------------------------------------------- +// +// Tests +// +// ---------------------------------------------------------------------------- + +document::GlobalId gid0("aaaaaaaaaaaa"); +document::GlobalId gid1("bbbbbbbbbbbb"); + +TEST("testPacketArray") { + PacketArray arr; + for (uint32_t i = 0; i < 32; ++i) { + EXPECT_EQUAL(i, arr.Length()); + arr.Add(new FNET_ControlPacket(i)); + EXPECT_EQUAL(i, static_cast(*arr.Array()[i]).GetCommand()); + } + for (uint32_t i = 0; i < arr.Length(); ++i) { + delete static_cast(arr.Array()[i]); + } +} + +TEST("testPacketFactory") { + ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_BEGIN - 1) == NULL); + + ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_END) == NULL); + + for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) { + if ((pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) && + (pcode != PCODE_QUERY_NOTUSED) && + (pcode != PCODE_QUERY2_NOTUSED) && + (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED)) + { + std::unique_ptr aptr(FS4PacketFactory::CreateFS4Packet(pcode)); + ASSERT_TRUE(aptr.get() != NULL); + EXPECT_EQUAL(pcode, aptr->GetPCODE()); + } + } +} + +TEST("testPersistentPacketStreamer") { + for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) { + if ((pcode == PCODE_QUERYX) || + (pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) || + (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED)) + { + continue; + } + std::unique_ptr arg(FS4PacketFactory::CreateFS4Packet(pcode)); + std::unique_ptr ret(testEncodeDecode(FS4PersistentPacketStreamer::Instance, *arg)); + EXPECT_TRUE(ret.get() != NULL); + + FNET_Packet *raw = testEncodeDecode(FS4PersistentPacketStreamer::Instance, + *FS4PacketFactory::CreateFS4Packet(pcode)); + EXPECT_TRUE(raw != NULL); + } +} + +TEST("testProperties") { + FS4Properties src; + fillProperties(src, "foo", 32u); + testProperties(src, "foo", 32u); + + FNET_DataBuffer buf; + src.encode(buf); + FNET_DataBuffer lhs; + lhs.WriteBytes(buf.GetData(), buf.GetDataLen()); + + uint32_t len = buf.GetDataLen(); + FS4Properties dst; + dst.decode(buf, len); + EXPECT_EQUAL(src.getLength(), dst.getLength()); + + testProperties(dst, "foo", 32u); + + FNET_DataBuffer rhs; + dst.encode(rhs); + EXPECT_TRUE(lhs.Equals(&rhs)); +} + +TEST("testEol") { + FS4Packet_EOL *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_EOL)); + ASSERT_TRUE(src != NULL); + + std::vector lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_EOL *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_EOL, ptr->GetPCODE()); + EXPECT_EQUAL(0u, ptr->GetLength()); + + delete ptr; + } +} + +TEST("testError") { + FS4Packet_ERROR *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + ASSERT_TRUE(src != NULL); + src->_errorCode = 1u; + src->setErrorMessage("foo"); + + std::vector lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_ERROR *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE()); + EXPECT_EQUAL(11u, ptr->GetLength()); + EXPECT_EQUAL(1u, ptr->_errorCode); + EXPECT_EQUAL("foo", ptr->_message); + + delete ptr; + } +} + +TEST("testDocsum") { + FS4Packet_DOCSUM *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_DOCSUM)); + ASSERT_TRUE(src != NULL); + src->setGid(gid0); + src->SetBuf("foo", 3u); + + std::vector lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_DOCSUM *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_DOCSUM, ptr->GetPCODE()); + EXPECT_EQUAL(3u + 12u, ptr->GetLength()); + EXPECT_EQUAL(gid0, ptr->getGid()); + EXPECT_EQUAL("foo", std::string(ptr->getBuf().c_str(), ptr->getBuf().size())); + + delete ptr; + } +} + +TEST("testMonitorQueryX") { + FS4Packet_MONITORQUERYX *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORQUERYX)); + ASSERT_TRUE(src != NULL); + src->_qflags = 1u; + + std::vector lst; + for (uint32_t i = MQF_QFLAGS, len = (uint32_t)(MQF_QFLAGS << 1); i < len; ++i) { + if (i & ~FNET_MQF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_MONITORQUERYX *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_MONITORQUERYX, ptr->GetPCODE()); + EXPECT_EQUAL(ptr->_features & MQF_QFLAGS ? 1u : 0u, ptr->_qflags); + + delete ptr; + } +} + +TEST("testMonitorResultX") { + FS4Packet_MONITORRESULTX *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORRESULTX)); + ASSERT_TRUE(src != NULL); + src->_partid = 1u; + src->_timestamp = 2u; + src->_totalNodes = 3u; + src->_activeNodes = 4u; + src->_totalParts = 5u; + src->_activeParts = 6u; + src->_rflags = 7u; + + std::vector lst; + for (uint32_t i = MRF_MLD, len = (uint32_t)(MRF_RFLAGS << 1); i < len; ++i) { + if (i & ~FNET_MRF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_MONITORRESULTX *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_MONITORRESULTX, ptr->GetPCODE()); + EXPECT_EQUAL(1u, ptr->_partid); + EXPECT_EQUAL(2u, ptr->_timestamp); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 3u : 0u, ptr->_totalNodes); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 4u : 0u, ptr->_activeNodes); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 5u : 0u, ptr->_totalParts); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 6u : 0u, ptr->_activeParts); + EXPECT_EQUAL(ptr->_features & MRF_RFLAGS ? 7u : 0u, ptr->_rflags); + + delete ptr; + } +} + +TEST("testClearCaches") { + FS4Packet_CLEARCACHES *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_CLEARCACHES)); + ASSERT_TRUE(src != NULL); + + std::vector lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_CLEARCACHES *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_CLEARCACHES, ptr->GetPCODE()); + EXPECT_EQUAL(0u, ptr->GetLength()); + + delete ptr; + } +} + +TEST("testQueueLen") { + FS4Packet_QUEUELEN *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_QUEUELEN)); + ASSERT_TRUE(src != NULL); + src->_queueLen = 1u; + src->_dispatchers = 2u; + + std::vector lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_QUEUELEN *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_QUEUELEN, ptr->GetPCODE()); + EXPECT_EQUAL(8u, ptr->GetLength()); + EXPECT_EQUAL(1u, ptr->_queueLen); + EXPECT_EQUAL(2u, ptr->_dispatchers); + + delete ptr; + } +} + +TEST("testQueryResultX") { + FS4Packet_QUERYRESULTX *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYRESULTX)); + ASSERT_TRUE(src != NULL); + src->_offset = 1u; + src->_totNumDocs = 2u; + src->_maxRank = (search::HitRank)3; + src->setDistributionKey(4u); + src->_coverageDocs = 6u; + src->_activeDocs = 7u; + uint32_t sortIndex[3] = { 0u, 1u, 3u /* size of data */}; // numDocs + 1 + src->SetSortDataRef(2, sortIndex, "foo"); + src->SetAggrDataRef("bar", 3u); + src->SetGroupDataRef("baz", 3u); + src->AllocateHits(2); + src->_hits[0]._gid = gid0; + src->_hits[0]._metric = (search::HitRank)2; + src->_hits[0]._partid = 3u; + src->_hits[0].setDistributionKey(4u); + src->_hits[1]._gid = gid1; + src->_hits[1]._metric = (search::HitRank)3; + src->_hits[1]._partid = 4u; + src->_hits[1].setDistributionKey(5u); + + std::vector lst; + for (uint32_t i = QRF_MLD, len = (uint32_t)(QRF_GROUPDATA << 1); i < len; ++i) { + if (i & ~FNET_QRF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_QUERYRESULTX *ptr = dynamic_cast(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_QUERYRESULTX, ptr->GetPCODE()); + + EXPECT_EQUAL(1u, ptr->_offset); + EXPECT_EQUAL(2u, ptr->_totNumDocs); + EXPECT_EQUAL((search::HitRank)3, ptr->_maxRank); + EXPECT_EQUAL(4u, ptr->getDistributionKey()); + EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 6u : 0u, ptr->_coverageDocs); + EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 7u : 0u, ptr->_activeDocs); + if (ptr->_features & QRF_SORTDATA) { + EXPECT_EQUAL(0u, ptr->_sortIndex[0]); + EXPECT_EQUAL(1u, ptr->_sortIndex[1]); + EXPECT_EQUAL(3u, ptr->_sortIndex[2]); + EXPECT_EQUAL("foo", std::string(ptr->_sortData, ptr->_sortIndex[2])); + } else { + EXPECT_EQUAL((void*)NULL, ptr->_sortIndex); + EXPECT_EQUAL((void*)NULL, ptr->_sortData); + } + if (ptr->_features & QRF_AGGRDATA) { + EXPECT_EQUAL("bar", std::string(ptr->_aggrData, ptr->_aggrDataLen)); + } else { + EXPECT_EQUAL(0u, ptr->_aggrDataLen); + EXPECT_EQUAL((void*)NULL, ptr->_aggrData); + } + if (ptr->_features & QRF_GROUPDATA) { + EXPECT_EQUAL("baz", std::string(ptr->_groupData, ptr->_groupDataLen)); + } else { + EXPECT_EQUAL(0u, ptr->_groupDataLen); + EXPECT_EQUAL((void*)NULL, ptr->_groupData); + } + EXPECT_EQUAL(2u, ptr->_numDocs); + for (uint32_t i = 0; i < ptr->_numDocs; ++i) { + EXPECT_EQUAL(i == 0 ? gid0 : gid1, ptr->_hits[i]._gid); + EXPECT_EQUAL((search::HitRank)2 + i, ptr->_hits[i]._metric); + EXPECT_EQUAL(ptr->_features & QRF_MLD ? 3u + i : 0u, ptr->_hits[i]._partid); + EXPECT_EQUAL(ptr->_features & QRF_MLD ? 4u + i : ptr->getDistributionKey(), ptr->_hits[i].getDistributionKey()); + } + + delete ptr; + } +} + +FS4Packet_QUERYX * +createAndFill_QUERYX() +{ + FS4Packet_QUERYX *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYX)); + ASSERT_TRUE(src != NULL); + src->_offset = 2u; + src->_maxhits = 3u; + src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout()); + src->setTimeout(fastos::TimeStamp(-4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(0l, src->getTimeout()); + src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout()); + src->_qflags = 5u; + src->setRanking("seven"); + src->_numStackItems = 14u; + src->_propsVector.resize(2); + fillProperties(src->_propsVector[0], "foo", 8); + fillProperties(src->_propsVector[1], "bar", 16); + src->setSortSpec("sortspec"); + src->setAggrSpec("aggrspec"); + src->setGroupSpec("groupspec"); + src->setLocation("location"); + src->setStackDump("stackdump"); + return src; +} + +void +verifyQueryX(FS4Packet_QUERYX & queryX, uint32_t features) +{ + EXPECT_EQUAL((uint32_t)PCODE_QUERYX, queryX.GetPCODE()); + EXPECT_EQUAL(features, queryX._features); + EXPECT_EQUAL(2u, queryX._offset); + EXPECT_EQUAL(3u, queryX._maxhits); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), queryX.getTimeout()); + EXPECT_EQUAL(0x5u, queryX._qflags); + if (queryX._features & QF_RANKP) { + EXPECT_EQUAL("seven", queryX._ranking); + } else { + EXPECT_EQUAL("", queryX._ranking); + } + EXPECT_EQUAL(queryX._features & QF_PARSEDQUERY ? 14u : 0u, queryX._numStackItems); + if (queryX._features & QF_PROPERTIES) { + EXPECT_EQUAL(2u, queryX._propsVector.size()); + testProperties(queryX._propsVector[0], "foo", 8); + testProperties(queryX._propsVector[1], "bar", 16); + } else { + EXPECT_EQUAL(0u, queryX._propsVector.size()); + } + if (queryX._features & QF_SORTSPEC) { + EXPECT_EQUAL("sortspec", queryX._sortSpec); + } else { + EXPECT_EQUAL(0u, queryX._sortSpec.size()); + } + if (queryX._features & QF_AGGRSPEC) { + EXPECT_EQUAL("aggrspec", queryX._aggrSpec); + } else { + EXPECT_EQUAL(0u, queryX._aggrSpec.size()); + } + if (queryX._features & QF_GROUPSPEC) { + EXPECT_EQUAL("groupspec", queryX._groupSpec); + } else { + EXPECT_EQUAL(0u, queryX._groupSpec.size()); + } + if (queryX._features & QF_LOCATION) { + EXPECT_EQUAL("location", queryX._location); + } else { + EXPECT_EQUAL(0u, queryX._location.size()); + } + if (queryX._features & QF_PARSEDQUERY) { + EXPECT_EQUAL("stackdump", queryX._stackDump); + } else { + EXPECT_EQUAL(0u, queryX._stackDump.size()); + } +} + +TEST("testQueryX") { + FS4Packet_QUERYX *src = createAndFill_QUERYX(); + std::vector> lst; + for (uint32_t i = QF_PARSEDQUERY, len = (uint32_t)(QF_GROUPSPEC << 1), skip = 0; i < len; ++i) { + if (!(i & QF_PARSEDQUERY)) { + continue; // skip most + } + if (i & ~FNET_QF_SUPPORTED_MASK) { + continue; // not supported + } + if (++skip % 10) { + continue; // skip most + } + src->_features = i; + lst.emplace_back(testEncodeDecode(*src), i); + } + src->_features = uint32_t(-1); + lst.emplace_back(src, -1); + + for (const auto & pfPair : lst) { + FS4Packet_QUERYX *ptr = dynamic_cast(pfPair.first); + ASSERT_TRUE(ptr != NULL); + verifyQueryX(*ptr, pfPair.second); + + delete ptr; + } +} + +TEST("testSharedPacket") { + FNET_Packet::SP src(createAndFill_QUERYX()); + static_cast(src.get())->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::SP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_TRUE(decoded.get() != nullptr); + FS4Packet_Shared shared(decoded); + FNET_Packet::UP decoded2(testEncodeDecode(shared)); + EXPECT_TRUE(decoded2.get() != nullptr); + EXPECT_TRUE(nullptr == dynamic_cast(decoded2.get())); + EXPECT_TRUE(nullptr != dynamic_cast(decoded2.get())); + EXPECT_EQUAL(src->GetLength(), decoded2->GetLength()); + verifyQueryX(*static_cast(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + +TEST("test pre serializing packets no compression") { + FNET_Packet::UP src(createAndFill_QUERYX()); + FS4Packet_QUERYX * queryX = static_cast(src.get()); + queryX->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::UP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_EQUAL(512u, src->GetLength()); + EXPECT_EQUAL(src->GetLength(), decoded->GetLength()); + FS4Packet_PreSerialized serialized(*src); + EXPECT_EQUAL(218u, serialized.GetPCODE()); + EXPECT_EQUAL(512u, serialized.GetLength()); + FNET_Packet::UP decoded2(testEncodeDecode(serialized)); + EXPECT_EQUAL(512u, decoded2->GetLength()); + verifyQueryX(*static_cast(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + +TEST("test pre serializing packets with compression") { + FNET_Packet::UP src(createAndFill_QUERYX()); + FS4Packet_QUERYX * queryX = static_cast(src.get()); + queryX->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::UP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_EQUAL(512u, src->GetLength()); + EXPECT_EQUAL(src->GetLength(), decoded->GetLength()); + FS4PersistentPacketStreamer::Instance.SetCompressionLimit(100); + FS4Packet_PreSerialized serialized(*src); + EXPECT_EQUAL(218u | (document::CompressionConfig::LZ4 << 24), serialized.GetPCODE()); + EXPECT_GREATER_EQUAL(321u, serialized.GetLength()); + FNET_Packet::UP decoded2(testEncodeDecode(serialized)); + EXPECT_EQUAL(512u, decoded2->GetLength()); + verifyQueryX(*static_cast(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + + +TEST("testGetDocsumsX") { + FS4Packet_GETDOCSUMSX *src = dynamic_cast(FS4PacketFactory::CreateFS4Packet(PCODE_GETDOCSUMSX)); + ASSERT_TRUE(src != NULL); + src->setTimeout(fastos::TimeStamp(2*fastos::TimeStamp::MS)); + src->setRanking("four"); + src->_qflags = 5u; + src->_stackItems = 7u; + src->_propsVector.resize(2); + fillProperties(src->_propsVector[0], "foo", 8); + fillProperties(src->_propsVector[1], "bar", 16); + src->setResultClassName("resultclassname"); + src->setStackDump("stackdump"); + src->setLocation("location"); + src->_flags = GDFLAG_IGNORE_ROW; + src->AllocateDocIDs(2); + src->_docid[0]._gid = gid0; + src->_docid[0]._partid = 2u; + src->_docid[1]._gid = gid1; + src->_docid[1]._partid = 3u; + + std::vector> lst; + for (uint32_t i = GDF_MLD, len = (uint32_t)(GDF_FLAGS << 1); i < len; ++i) { + if (i & ~FNET_GDF_SUPPORTED_MASK) { + continue; // not supported + } + src->_features = i; + lst.emplace_back(testEncodeDecode(*src), i); + } + src->_features = uint32_t(-1); + lst.emplace_back(src, uint32_t(-1)); + + for (const auto & pfPair : lst) { + FS4Packet_GETDOCSUMSX *ptr = dynamic_cast(pfPair.first); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_GETDOCSUMSX, ptr->GetPCODE()); + EXPECT_EQUAL(pfPair.second, ptr->_features); + EXPECT_EQUAL(fastos::TimeStamp(2*fastos::TimeStamp::MS), ptr->getTimeout()); + if (ptr->_features & GDF_RANKP_QFLAGS) { + EXPECT_EQUAL("four", ptr->_ranking); + } else { + EXPECT_EQUAL("", ptr->_ranking); + } + EXPECT_EQUAL(ptr->_features & GDF_RANKP_QFLAGS ? 5u : 0u, ptr->_qflags); + EXPECT_EQUAL(ptr->_features & GDF_QUERYSTACK ? 7u : 0u, ptr->_stackItems); + if (ptr->_features & GDF_PROPERTIES) { + EXPECT_EQUAL(2u, ptr->_propsVector.size()); + testProperties(ptr->_propsVector[0], "foo", 8); + testProperties(ptr->_propsVector[1], "bar", 16); + } else { + EXPECT_EQUAL(0u, ptr->_propsVector.size()); + } + if (ptr->_features & GDF_RESCLASSNAME) { + EXPECT_EQUAL("resultclassname", ptr->_resultClassName); + } else { + EXPECT_EQUAL(0u, ptr->_resultClassName.size()); + } + if (ptr->_features & GDF_QUERYSTACK) { + EXPECT_EQUAL("stackdump", ptr->_stackDump); + } else { + EXPECT_EQUAL(0u, ptr->_stackDump.size()); + } + if (ptr->_features & GDF_LOCATION) { + EXPECT_EQUAL("location", ptr->_location); + } else { + EXPECT_EQUAL(0u, ptr->_location.size()); + } + if (ptr->_features & GDF_FLAGS) { + EXPECT_EQUAL(static_cast(GDFLAG_IGNORE_ROW), + ptr->_flags); + } else { + EXPECT_EQUAL(0u, ptr->_flags); + } + EXPECT_EQUAL(2u, ptr->_docidCnt); + ASSERT_TRUE(ptr->_docid != NULL); + for (uint32_t i = 0; i < ptr->_docidCnt; ++i) { + EXPECT_EQUAL(i == 0u ? gid0 : gid1, ptr->_docid[i]._gid); + EXPECT_EQUAL(ptr->_features & GDF_MLD ? 2u + i : 0u, ptr->_docid[i]._partid); + } + + delete ptr; + } +} + +TEST("require that FS4PersistentPacketStreamer can compress packets") { + FS4Packet_ERROR *packet = static_cast(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + packet->_errorCode = 1u; + packet->setErrorMessage(string(1000, 'a')); + + FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet); + + FNET_DataBuffer buf1; + streamer.Encode(packet, 1u, &buf1); + EXPECT_EQUAL(1020u, buf1.GetDataLen()); + + streamer.SetCompressionLimit(100); + FNET_DataBuffer buf2; + streamer.Encode(packet, 1u, &buf2); + EXPECT_EQUAL(38u, buf2.GetDataLen()); + + std::vector lst{ packet, testEncodeDecode(streamer, *packet) }; + + for (FNET_Packet * fnetPacket : lst) { + FS4Packet_ERROR *ptr = dynamic_cast(fnetPacket); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE()); + EXPECT_EQUAL(1008u, ptr->GetLength()); + delete ptr; + } +} + +TEST("require that FS4PersistentPacketStreamer can avoid compressing small packets") { + FS4Packet_ERROR *packet = static_cast(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + packet->_errorCode = 1u; + packet->setErrorMessage("a"); + + FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet); + + FNET_DataBuffer buf1; + streamer.Encode(packet, 1u, &buf1); + EXPECT_EQUAL(21u, buf1.GetDataLen()); + + streamer.SetCompressionLimit(10); + FNET_DataBuffer buf2; + streamer.Encode(packet, 1u, &buf2); + EXPECT_EQUAL(21u, buf2.GetDataLen()); + + delete packet; +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/rcuvector/.gitignore b/searchlib/src/tests/common/rcuvector/.gitignore new file mode 100644 index 00000000000..d88533ed6af --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +rcuvector_test +searchlib_rcuvector_test_app diff --git a/searchlib/src/tests/common/rcuvector/CMakeLists.txt b/searchlib/src/tests/common/rcuvector/CMakeLists.txt new file mode 100644 index 00000000000..362dbf68dca --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_rcuvector_test_app + SOURCES + rcuvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_rcuvector_test_app COMMAND searchlib_rcuvector_test_app) diff --git a/searchlib/src/tests/common/rcuvector/DESC b/searchlib/src/tests/common/rcuvector/DESC new file mode 100644 index 00000000000..38af6317f80 --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/DESC @@ -0,0 +1 @@ +rcuvector test. Take a look at rcuvector.h for details. diff --git a/searchlib/src/tests/common/rcuvector/FILES b/searchlib/src/tests/common/rcuvector/FILES new file mode 100644 index 00000000000..a8bae8dbd5c --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/FILES @@ -0,0 +1 @@ +rcuvector.h diff --git a/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp new file mode 100644 index 00000000000..dd50de79f17 --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp @@ -0,0 +1,284 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("rcuvector_test"); +#include +#include + +namespace search { +namespace attribute { + +using vespalib::GenerationHandler; +using vespalib::GenerationHolder; +using vespalib::GenerationHeldBase; + +class Test : public vespalib::TestApp { +private: + bool assertUsage(const MemoryUsage & exp, const MemoryUsage & act); + void testGenerationHolder(); + void testBasic(); + void testResize(); + void testGenerationHandling(); + void testMemoryUsage(); + + void + testShrink(); + void testSmallExpand(); +public: + int Main(); +}; + +bool +Test::assertUsage(const MemoryUsage & exp, const MemoryUsage & act) +{ + bool retval = true; + if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) retval = false; + if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) retval = false; + if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) retval = false; + if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) retval = false; + return retval; +} + +void +Test::testGenerationHolder() +{ + typedef std::unique_ptr IntPtr; + GenerationHolder gh; + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld(sizeof(int32_t), + IntPtr(new int32_t(0))))); + gh.transferHoldLists(0); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld(sizeof(int32_t), + IntPtr(new int32_t(1))))); + gh.transferHoldLists(1); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld(sizeof(int32_t), + IntPtr(new int32_t(2))))); + gh.transferHoldLists(2); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld(sizeof(int32_t), + IntPtr(new int32_t(4))))); + gh.transferHoldLists(4); + EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(0); + EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(1); + EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(2); + EXPECT_EQUAL(2u * sizeof(int32_t), gh.getHeldBytes()); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld(sizeof(int32_t), + IntPtr(new int32_t(6))))); + gh.transferHoldLists(6); + EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(6); + EXPECT_EQUAL(1u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(7); + EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(7); + EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes()); +} + +void +Test::testBasic() +{ + { // insert + RcuVector v(4, 0, 4); + for (int32_t i = 0; i < 100; ++i) { + v.push_back(i); + EXPECT_EQUAL(i, v[i]); + EXPECT_EQUAL((size_t)i + 1, v.size()); + } + for (int32_t i = 0; i < 100; ++i) { + v[i] = i + 1; + EXPECT_EQUAL(i + 1, v[i]); + EXPECT_EQUAL(100u, v.size()); + } + } +} + +void +Test::testResize() +{ + { // resize percent + RcuVector v(2, 50, 0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(3u, v.capacity()); + EXPECT_TRUE(v.isFull()); + } + { // resize delta + RcuVector v(1, 0, 3); + EXPECT_EQUAL(1u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(4u, v.capacity()); + EXPECT_TRUE(!v.isFull()); + } + { // resize both + RcuVector v(2, 200, 3); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(9u, v.capacity()); + EXPECT_TRUE(!v.isFull()); + } + { // reserve + RcuVector v(2, 0, 0); + EXPECT_EQUAL(2u, v.capacity()); + v.unsafe_reserve(8); + EXPECT_EQUAL(8u, v.capacity()); + } + { // explicit resize + GenerationHolder g; + RcuVectorBase v(g); + v.push_back(1); + v.push_back(2); + g.transferHoldLists(0); + g.trimHoldLists(1); + const int8_t *old = &v[0]; + EXPECT_EQUAL(16u, v.capacity()); + EXPECT_EQUAL(2u, v.size()); + v.ensure_size(32, 3); + v[0] = 3; + v[1] = 3; + g.transferHoldLists(1); + EXPECT_EQUAL(1, old[0]); + EXPECT_EQUAL(2, old[1]); + EXPECT_EQUAL(3, v[0]); + EXPECT_EQUAL(3, v[1]); + EXPECT_EQUAL(3, v[2]); + EXPECT_EQUAL(3, v[31]); + EXPECT_EQUAL(64u, v.capacity()); + EXPECT_EQUAL(32u, v.size()); + g.trimHoldLists(2); + } +} + +void +Test::testGenerationHandling() +{ + RcuVector v(2, 0, 2); + v.push_back(0); + v.push_back(10); + EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(20); // new array + EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold()); + + v.setGeneration(1); + v.push_back(30); + EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(40); // new array + EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold()); + + v.setGeneration(2); + v.push_back(50); + v.removeOldGenerations(3); + EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(60); // new array + EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold()); +} + +void +Test::testMemoryUsage() +{ + RcuVector v(2, 0, 2); + EXPECT_TRUE(assertUsage(MemoryUsage(2,0,0,0), v.getMemoryUsage())); + v.push_back(0); + EXPECT_TRUE(assertUsage(MemoryUsage(2,1,0,0), v.getMemoryUsage())); + v.push_back(1); + EXPECT_TRUE(assertUsage(MemoryUsage(2,2,0,0), v.getMemoryUsage())); + v.push_back(2); + EXPECT_TRUE(assertUsage(MemoryUsage(4,3,0,2), v.getMemoryUsage())); + v.push_back(3); + EXPECT_TRUE(assertUsage(MemoryUsage(4,4,0,2), v.getMemoryUsage())); + v.push_back(4); + EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,6), v.getMemoryUsage())); + v.removeOldGenerations(1); + EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,0), v.getMemoryUsage())); +} + + +void +Test::testShrink() +{ + GenerationHolder g; + RcuVectorBase v(g); + v.push_back(1); + v.push_back(2); + v.push_back(3); + v.push_back(4); + g.transferHoldLists(0); + g.trimHoldLists(1); + MemoryUsage mu; + mu = v.getMemoryUsage(); + mu.incAllocatedBytesOnHold(g.getHeldBytes()); + EXPECT_TRUE(assertUsage(MemoryUsage(16, 4, 0, 0), mu)); + EXPECT_EQUAL(4u, v.size()); + EXPECT_TRUE(v.capacity() >= 4u); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + EXPECT_EQUAL(3, v[2]); + EXPECT_EQUAL(4, v[3]); + const int8_t *old = &v[0]; + v.shrink(2); + g.transferHoldLists(1); + EXPECT_EQUAL(2u, v.size()); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + EXPECT_EQUAL(1, old[0]); + EXPECT_EQUAL(2, old[1]); + g.trimHoldLists(2); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + mu = v.getMemoryUsage(); + mu.incAllocatedBytesOnHold(g.getHeldBytes()); + EXPECT_TRUE(assertUsage(MemoryUsage(2, 2, 0, 0), mu)); +} + +void +Test::testSmallExpand() +{ + GenerationHolder g; + RcuVectorBase v(1, 50, 0, g); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_EQUAL(0u, v.size()); + v.push_back(1); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_EQUAL(1u, v.size()); + v.push_back(2); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_EQUAL(2u, v.size()); + g.transferHoldLists(1); + g.trimHoldLists(2); +} + + +int +Test::Main() +{ + TEST_INIT("rcuvector_test"); + + testGenerationHolder(); + testBasic(); + testResize(); + testGenerationHandling(); + testMemoryUsage(); + testShrink(); + testSmallExpand(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::attribute::Test); diff --git a/searchlib/src/tests/common/resultset/.gitignore b/searchlib/src/tests/common/resultset/.gitignore new file mode 100644 index 00000000000..41242fde289 --- /dev/null +++ b/searchlib/src/tests/common/resultset/.gitignore @@ -0,0 +1 @@ +searchlib_resultset_test_app diff --git a/searchlib/src/tests/common/resultset/CMakeLists.txt b/searchlib/src/tests/common/resultset/CMakeLists.txt new file mode 100644 index 00000000000..0aed46f6e89 --- /dev/null +++ b/searchlib/src/tests/common/resultset/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_resultset_test_app + SOURCES + resultset_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_resultset_test_app COMMAND searchlib_resultset_test_app) diff --git a/searchlib/src/tests/common/resultset/resultset_test.cpp b/searchlib/src/tests/common/resultset/resultset_test.cpp new file mode 100644 index 00000000000..983dc10b914 --- /dev/null +++ b/searchlib/src/tests/common/resultset/resultset_test.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for resultset. + +#include +#include +LOG_SETUP("resultset_test"); + +#include +#include +#include +#include + +using namespace search; +using vespalib::arraysize; + +namespace { + +void concatenate(const ResultSet *input_array[], size_t array_size, + ResultSet &output) +{ + size_t hit_count = 0; + for (size_t i = 0; i < array_size; ++i) { + hit_count += input_array[i]->getArrayUsed(); + } + output.allocArray(hit_count); + RankedHit *p = output.getArray(); + for (size_t i = 0; i < array_size; ++i) { + const ResultSet &set = *input_array[i]; + memcpy(p, set.getArray(), set.getArrayUsed() * sizeof(RankedHit)); + p += set.getArrayUsed(); + if (set.getBitOverflow()) { + if (output.getBitOverflow()) { + output.getBitOverflow()->orWith(*set.getBitOverflow()); + } else { + output.setBitOverflow(BitVector::create(*set.getBitOverflow())); + } + } + } + output.setArrayUsed(hit_count); +} + + +void addHit(ResultSet &set, unsigned int doc_id, double rank) { + if (set.getArrayAllocated() == 0) { + set.allocArray(10); + } + ASSERT_LESS(set.getArrayUsed(), set.getArrayAllocated()); + RankedHit *hit_array = set.getArray(); + hit_array[set.getArrayUsed()]._docId = doc_id; + hit_array[set.getArrayUsed()]._rankValue = rank; + set.setArrayUsed(set.getArrayUsed() + 1); +} + +TEST("require that mergeWithOverflow works") { + ResultSet set1; + addHit(set1, 2, 4.2); + addHit(set1, 4, 3.2); + BitVector::UP bit_vector = BitVector::create(20); + bit_vector->setBit(2); + bit_vector->setBit(4); + bit_vector->setBit(7); + bit_vector->invalidateCachedCount(); + set1.setBitOverflow(std::move(bit_vector)); + EXPECT_EQUAL(3u, set1.getNumHits()); + set1.mergeWithBitOverflow(); + EXPECT_EQUAL(3u, set1.getNumHits()); +} + +TEST("require that resultsets can be concatenated") { + ResultSet set1; + addHit(set1, 2, 4.2); + addHit(set1, 4, 3.2); + BitVector::UP bit_vector = BitVector::create(20); + bit_vector->setBit(7); + set1.setBitOverflow(std::move(bit_vector)); + + ResultSet set2; + addHit(set2, 12, 4.2); + addHit(set2, 14, 3.2); + bit_vector = BitVector::create(20); + bit_vector->setBit(17); + set2.setBitOverflow(std::move(bit_vector)); + + const ResultSet *sets[] = { &set1, &set2 }; + ResultSet target; + concatenate(sets, arraysize(sets), target); + + EXPECT_EQUAL(4u, target.getArrayAllocated()); + ASSERT_EQUAL(4u, target.getArrayUsed()); + EXPECT_EQUAL(2u, target.getArray()[0]._docId); + EXPECT_EQUAL(4.2, target.getArray()[0]._rankValue); + EXPECT_EQUAL(4u, target.getArray()[1]._docId); + EXPECT_EQUAL(3.2, target.getArray()[1]._rankValue); + EXPECT_EQUAL(12u, target.getArray()[2]._docId); + EXPECT_EQUAL(4.2, target.getArray()[2]._rankValue); + EXPECT_EQUAL(14u, target.getArray()[3]._docId); + EXPECT_EQUAL(3.2, target.getArray()[3]._rankValue); + + BitVector * bv = target.getBitOverflow(); + ASSERT_TRUE(bv); + EXPECT_EQUAL(20u, bv->size()); + EXPECT_EQUAL(7u, bv->getNextTrueBit(0)); + EXPECT_EQUAL(17u, bv->getNextTrueBit(8)); + EXPECT_EQUAL(20u, bv->getNextTrueBit(18)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore new file mode 100644 index 00000000000..35d038b0b7c --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore @@ -0,0 +1 @@ +searchlib_sequencedtaskexecutor_test_app diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt new file mode 100644 index 00000000000..501fd3b07f1 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sequencedtaskexecutor_test_app + SOURCES + sequencedtaskexecutor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sequencedtaskexecutor_test_app COMMAND searchlib_sequencedtaskexecutor_test_app) diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/DESC b/searchlib/src/tests/common/sequencedtaskexecutor/DESC new file mode 100644 index 00000000000..29ac00d3453 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/DESC @@ -0,0 +1 @@ +sequencedtaskexecutor test. Take a look at sequencedtaskexecutor_test.cpp for details. diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/FILES b/searchlib/src/tests/common/sequencedtaskexecutor/FILES new file mode 100644 index 00000000000..a8ebec0ebca --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/FILES @@ -0,0 +1 @@ +sequencedtaskexecutor_test.cpp diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp new file mode 100644 index 00000000000..98436364ea0 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp @@ -0,0 +1,194 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("sequencedtaskexecutor_test"); +#include +#include +#include + +#include +#include + +namespace search +{ + +namespace common +{ + + +class Fixture +{ +public: + SequencedTaskExecutor _threads; + + Fixture() + : _threads(2) + { + } +}; + + +class TestObj +{ +public: + std::mutex _m; + std::condition_variable _cv; + int _done; + int _fail; + int _val; + + TestObj() + : _m(), + _cv(), + _done(0), + _fail(0), + _val(0) + { + } + + void + modify(int oldValue, int newValue) + { + { + std::lock_guard guard(_m); + if (_val == oldValue) { + _val = newValue; + } else { + ++_fail; + } + ++_done; + } + _cv.notify_all(); + } + + void + wait(int wantDone) + { + std::unique_lock guard(_m); + _cv.wait(guard, [=] { return this->_done >= wantDone; }); + } +}; + +TEST_F("testExecute", Fixture) { + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(1, [=]() { tv->modify(0, 42); }); + tv->wait(1); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + + +TEST_F("require that task with same id are serialized", Fixture) +{ + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(0, [=]() { tv->modify(14, 42); }); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different ids are not serialized", Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(2, [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt < 100); +} + + +TEST_F("require that task with same string id are serialized", Fixture) +{ + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + auto test2 = [=]() { tv->modify(14, 42); }; + f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute("0", test2); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different string ids are not serialized", + Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr tv(std::make_shared()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute("2", [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt < 100); +} + + +TEST_F("require that execute works with const lambda", Fixture) +{ + int i = 5; + std::vector res; + const auto lambda = [i, &res]() mutable + { res.push_back(i--); res.push_back(i--); }; + f._threads.execute(0, lambda); + f._threads.execute(0, lambda); + f._threads.sync(); + std::vector exp({5, 4, 5, 4}); + EXPECT_EQUAL(exp, res); + EXPECT_EQUAL(5, i); +} + +TEST_F("require that execute works with reference to lambda", Fixture) +{ + int i = 5; + std::vector res; + auto lambda = [i, &res]() mutable + { res.push_back(i--); res.push_back(i--); }; + auto &lambdaref = lambda; + f._threads.execute(0, lambdaref); + f._threads.execute(0, lambdaref); + f._threads.sync(); + std::vector exp({5, 4, 5, 4}); + EXPECT_EQUAL(exp, res); + EXPECT_EQUAL(5, i); +} + + +} // namespace common +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/summaryfeatures/.gitignore b/searchlib/src/tests/common/summaryfeatures/.gitignore new file mode 100644 index 00000000000..543319fb8dd --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +summaryfeatures_test +searchlib_summaryfeatures_test_app diff --git a/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt new file mode 100644 index 00000000000..3b6cb392615 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_summaryfeatures_test_app + SOURCES + summaryfeatures.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_summaryfeatures_test_app COMMAND searchlib_summaryfeatures_test_app) diff --git a/searchlib/src/tests/common/summaryfeatures/DESC b/searchlib/src/tests/common/summaryfeatures/DESC new file mode 100644 index 00000000000..9cc24928a82 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/DESC @@ -0,0 +1 @@ +summaryfeatures test. Take a look at summaryfeatures.cpp for details. diff --git a/searchlib/src/tests/common/summaryfeatures/FILES b/searchlib/src/tests/common/summaryfeatures/FILES new file mode 100644 index 00000000000..19692b59229 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/FILES @@ -0,0 +1 @@ +summaryfeatures.cpp diff --git a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp new file mode 100644 index 00000000000..6d4e8bc49c8 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("summaryfeatures_test"); +#include +#include + +using namespace search; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("summaryfeatures_test"); + { + FeatureSet sf; + EXPECT_EQUAL(sf.getNames().size(), 0u); + EXPECT_EQUAL(sf.numFeatures(), 0u); + EXPECT_EQUAL(sf.numDocs(), 0u); + EXPECT_TRUE(sf.getFeaturesByIndex(0) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(0) == 0); + std::vector docs; + EXPECT_TRUE(sf.contains(docs)); + docs.push_back(1); + EXPECT_TRUE(!sf.contains(docs)); + } + { + FeatureSet::StringVector n; + n.push_back("f1"); + n.push_back("f2"); + n.push_back("f3"); + + FeatureSet sf(n, 5); + EXPECT_EQUAL(sf.getNames().size(), 3u); + EXPECT_EQUAL(sf.getNames()[0], "f1"); + EXPECT_EQUAL(sf.getNames()[1], "f2"); + EXPECT_EQUAL(sf.getNames()[2], "f3"); + EXPECT_EQUAL(sf.numFeatures(), 3u); + EXPECT_EQUAL(sf.numDocs(), 0u); + EXPECT_EQUAL(sf.addDocId(10), 0u); + EXPECT_EQUAL(sf.addDocId(20), 1u); + EXPECT_EQUAL(sf.addDocId(30), 2u); + EXPECT_EQUAL(sf.addDocId(40), 3u); + EXPECT_EQUAL(sf.addDocId(50), 4u); + EXPECT_EQUAL(sf.numDocs(), 5u); + feature_t *f; + const feature_t *cf; + f = sf.getFeaturesByIndex(0); + ASSERT_TRUE(f != 0); + f[0] = 11.0; + f[1] = 12.0; + f[2] = 13.0; + f = sf.getFeaturesByIndex(1); + ASSERT_TRUE(f != 0); + f[0] = 21.0; + f[1] = 22.0; + f[2] = 23.0; + f = sf.getFeaturesByIndex(2); + ASSERT_TRUE(f != 0); + f[0] = 31.0; + f[1] = 32.0; + f[2] = 33.0; + f = sf.getFeaturesByIndex(3); + ASSERT_TRUE(f != 0); + f[0] = 41.0; + f[1] = 42.0; + f[2] = 43.0; + f = sf.getFeaturesByIndex(4); + ASSERT_TRUE(f != 0); + f[0] = 51.0; + f[1] = 52.0; + f[2] = 53.0; + EXPECT_TRUE(sf.getFeaturesByIndex(5) == 0); + { + std::vector docs; + EXPECT_TRUE(sf.contains(docs)); + } + { + std::vector docs; + docs.push_back(1); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector docs; + docs.push_back(31); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector docs; + docs.push_back(51); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector docs; + docs.push_back(20); + docs.push_back(40); + EXPECT_TRUE(sf.contains(docs)); + } + { + std::vector docs; + docs.push_back(10); + docs.push_back(20); + docs.push_back(30); + docs.push_back(40); + docs.push_back(50); + EXPECT_TRUE(sf.contains(docs)); + } + { + cf = sf.getFeaturesByDocId(10); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 11.0, 10e-6); + EXPECT_APPROX(cf[1], 12.0, 10e-6); + EXPECT_APPROX(cf[2], 13.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(20); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 21.0, 10e-6); + EXPECT_APPROX(cf[1], 22.0, 10e-6); + EXPECT_APPROX(cf[2], 23.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(30); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 31.0, 10e-6); + EXPECT_APPROX(cf[1], 32.0, 10e-6); + EXPECT_APPROX(cf[2], 33.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(40); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 41.0, 10e-6); + EXPECT_APPROX(cf[1], 42.0, 10e-6); + EXPECT_APPROX(cf[2], 43.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(50); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 51.0, 10e-6); + EXPECT_APPROX(cf[1], 52.0, 10e-6); + EXPECT_APPROX(cf[2], 53.0, 10e-6); + } + EXPECT_TRUE(sf.getFeaturesByDocId(5) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(15) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(25) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(35) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(45) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(55) == 0); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/create-test.sh b/searchlib/src/tests/create-test.sh new file mode 100755 index 00000000000..d2bc3ded67b --- /dev/null +++ b/searchlib/src/tests/create-test.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +gen_project_file() { + echo "generating '$1' ..." + echo "APPLICATION ${test}_test" > $1 + echo "OBJS ${test}_test" >> $1 + echo "LIBS searchlib/searchlib" >> $1 + echo "EXTERNALLIBS searchcommon persistencetypes metrics" >> $1 + echo "" >> $1 + echo "CUSTOMMAKE" >> $1 + echo "test: all" >> $1 + echo -e "\t\$(HIDE) \$(LDL) \$(VALGRIND) ./${test}_test" >> $1 +} + +gen_source() { + echo "generating '$1' ..." + echo "#include " >> $1 + echo "" >> $1 + echo "// using namespace search;" >> $1 + echo "" >> $1 + echo "TEST(\"require something\") {" >> $1 + echo "}" >> $1 + echo "" >> $1 + echo "TEST_MAIN() { TEST_RUN_ALL(); }" >> $1 +} + +gen_file_list() { + echo "generating '$1' ..." + echo "${test}_test.cpp" > $1 +} + +if [ $# -ne 1 ]; then + echo "usage: $0 " + echo " name: name of the test to create" + exit 1 +fi + +test=$1 +if [ -e $test ]; then + echo "$test already present, don't want to mess it up..." + exit 1 +fi + +echo "creating directory '$test' ..." +mkdir -p $test || exit 1 +cd $test || exit 1 +test=`basename $test` + +gen_project_file fastos.project +gen_source ${test}_test.cpp +gen_file_list FILES diff --git a/searchlib/src/tests/datastore/.gitignore b/searchlib/src/tests/datastore/.gitignore new file mode 100644 index 00000000000..0f6b605a280 --- /dev/null +++ b/searchlib/src/tests/datastore/.gitignore @@ -0,0 +1,8 @@ +*.So +*_test +.depend* +Makefile +vlog1.txt +vlog2.txt +vlog3.txt +searchlib_logdatastore_test_app diff --git a/searchlib/src/tests/datastore/CMakeLists.txt b/searchlib/src/tests/datastore/CMakeLists.txt new file mode 100644 index 00000000000..b10bc4d4e09 --- /dev/null +++ b/searchlib/src/tests/datastore/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_logdatastore_test_app + SOURCES + logdatastore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_logdatastore_test_app COMMAND sh logdatastore_test.sh) diff --git a/searchlib/src/tests/datastore/DESC b/searchlib/src/tests/datastore/DESC new file mode 100644 index 00000000000..f035e6aecfb --- /dev/null +++ b/searchlib/src/tests/datastore/DESC @@ -0,0 +1 @@ +Tests behavior of class search::DataStore from diff --git a/searchlib/src/tests/datastore/FILES b/searchlib/src/tests/datastore/FILES new file mode 100644 index 00000000000..6bfee2917f4 --- /dev/null +++ b/searchlib/src/tests/datastore/FILES @@ -0,0 +1 @@ +datastore.cpp diff --git a/searchlib/src/tests/datastore/bad.dat b/searchlib/src/tests/datastore/bad.dat new file mode 100644 index 00000000000..1bf7a93a2f8 Binary files /dev/null and b/searchlib/src/tests/datastore/bad.dat differ diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat new file mode 100644 index 00000000000..dfeedf08029 Binary files /dev/null and b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat differ diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx new file mode 100644 index 00000000000..883a5265afe Binary files /dev/null and b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx differ diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.dat b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat new file mode 100644 index 00000000000..cb202f8d72a Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat differ diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.idx b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx new file mode 100644 index 00000000000..0fc41cdf9e0 Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx differ diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.dat b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat new file mode 100644 index 00000000000..cb202f8d72a Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat differ diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.idx b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx new file mode 100644 index 00000000000..0fc41cdf9e0 Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx differ diff --git a/searchlib/src/tests/datastore/dangling/3425506005745465000.dat b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat new file mode 100644 index 00000000000..cb202f8d72a Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat differ diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.dat b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat new file mode 100644 index 00000000000..cb202f8d72a Binary files /dev/null and b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat differ diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.idx b/searchlib/src/tests/datastore/dangling/4425506005745465000.idx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/searchlib/src/tests/datastore/datastore.dat b/searchlib/src/tests/datastore/datastore.dat new file mode 100644 index 00000000000..34d6ed1392f Binary files /dev/null and b/searchlib/src/tests/datastore/datastore.dat differ diff --git a/searchlib/src/tests/datastore/logdatastore_test.cpp b/searchlib/src/tests/datastore/logdatastore_test.cpp new file mode 100644 index 00000000000..776e6b25533 --- /dev/null +++ b/searchlib/src/tests/datastore/logdatastore_test.cpp @@ -0,0 +1,468 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("datastore_test"); + +#include +#include +#include +#include +#include + +#include + +class MyTlSyncer : public search::transactionlog::SyncProxy { + search::SerialNum _syncedTo; +public: + MyTlSyncer(void) : _syncedTo(0) { } + + void sync(search::SerialNum syncTo) { + _syncedTo = syncTo; + } +}; + +using namespace search; +using search::index::DummyFileHeaderContext; + +namespace { + +void +showStats(const DataStoreStorageStats &stats) +{ + fprintf(stdout, + "Storage stats usage=%9lu bloat=%9lu" + " lastSerial=%9lu lastFlushedSerial=%9lu" + " maxBucketSpread=%6.2f\n", + stats.diskUsage(), stats.diskBloat(), + stats.lastSerialNum(), stats.lastFlushedSerialNum(), + stats.maxBucketSpread()); + fflush(stdout); +} + +void +showChunks(const std::vector &chunkStats) +{ + fprintf(stdout, "Number of chunks is %zu\n", chunkStats.size()); + for (const auto &chunk : chunkStats) { + fprintf(stdout, + "Chunk %019lu usage=%9lu bloat=%9lu" + " lastSerial=%9lu lastFlushedSerial=%9lu" + " bucketSpread=%6.2f\n", + chunk.nameId(), chunk.diskUsage(), chunk.diskBloat(), + chunk.lastSerialNum(), chunk.lastFlushedSerialNum(), + chunk.maxBucketSpread()); + } + fflush(stdout); +} + +SerialNum +calcLastSerialNum(const std::vector &chunkStats) +{ + SerialNum lastSerialNum = 0u; + for (const auto &chunk : chunkStats) { + lastSerialNum = std::max(lastSerialNum, chunk.lastSerialNum()); + } + return lastSerialNum; +} + +SerialNum +calcLastFlushedSerialNum(const std::vector &chunkStats) +{ + SerialNum lastFlushedSerialNum = 0u; + for (const auto &chunk : chunkStats) { + lastFlushedSerialNum = std::max(lastFlushedSerialNum, + chunk.lastFlushedSerialNum()); + } + return lastFlushedSerialNum; +} + +uint64_t +calcDiskUsage(const std::vector &chunkStats) +{ + uint64_t diskUsage = 0u; + for (const auto &chunk : chunkStats) { + diskUsage += chunk.diskUsage(); + } + return diskUsage; +} + +uint64_t +calcDiskBloat(const std::vector &chunkStats) +{ + uint64_t diskBloat = 0u; + for (const auto &chunk : chunkStats) { + diskBloat += chunk.diskBloat(); + } + return diskBloat; +} + +void +checkStats(IDataStore &store, + SerialNum expLastSerial, SerialNum expLastFlushedSerial) +{ + DataStoreStorageStats storageStats(store.getStorageStats()); + std::vector chunkStats; + chunkStats = store.getFileChunkStats(); + showStats(storageStats); + showChunks(chunkStats); + EXPECT_EQUAL(expLastSerial, storageStats.lastSerialNum()); + EXPECT_EQUAL(expLastFlushedSerial, storageStats.lastFlushedSerialNum()); + EXPECT_EQUAL(storageStats.lastSerialNum(), calcLastSerialNum(chunkStats)); + EXPECT_EQUAL(storageStats.lastFlushedSerialNum(), + calcLastFlushedSerialNum(chunkStats)); + EXPECT_EQUAL(storageStats.diskUsage(), + calcDiskUsage(chunkStats)); + EXPECT_EQUAL(storageStats.diskBloat(), calcDiskBloat(chunkStats)); +} + + +} + +TEST("testThatLidInfoOrdersFileChunkSize") { + EXPECT_TRUE(LidInfo(1, 1, 1) == LidInfo(1, 1, 1)); + EXPECT_FALSE(LidInfo(1, 1, 1) < LidInfo(1, 1, 1)); + + EXPECT_FALSE(LidInfo(1, 1, 1) == LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 1, 1) < LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 2, 1) < LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 1, 2) < LidInfo(2, 1, 1)); +} + +TEST("testGrowing") { + FastOS_File::EmptyAndRemoveDirectory("growing"); + EXPECT_TRUE(FastOS_File::MakeDirectory("growing")); + LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true, + WriteableFileChunk::Config( + document::CompressionConfig( + document::CompressionConfig:: + LZ4, 9, 60), + 1000, + 20)); + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + DummyFileHeaderContext fileHeaderContext; + MyTlSyncer tlSyncer; + { + LogDataStore datastore(executor, + "growing", + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + srand(7); + char buffer[12000]; + SerialNum lastSyncToken(0); + for (size_t i(0); i < sizeof(buffer); i++) { + buffer[i] = rand() & 0xff; + } + for (size_t i(1); i < 10000; i++) { + long r = rand()%10000; + assert(i > lastSyncToken); + lastSyncToken = i; + datastore.write(i, i, &buffer[r], uint8_t(buffer[r])*4); + } + datastore.flush(datastore.initFlush(lastSyncToken)); + for (size_t i(1); i < 200; i++) { + assert(i + 20000 > lastSyncToken); + lastSyncToken = i + 20000; + datastore.remove(i + 20000, i); + } + for (size_t i(201); i < 2000; i+= 2) { + assert(i + 20000 > lastSyncToken); + lastSyncToken = i + 20000; + datastore.remove(i + 20000, i); + } + datastore.flush(datastore.initFlush(lastSyncToken)); + datastore.compact(30000); + datastore.remove(31000, 0); + checkStats(datastore, 31000, 30000); + } + { + LogDataStore datastore(executor, + "growing", + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + checkStats(datastore, 30000, 30000); + } + + FastOS_File::EmptyAndRemoveDirectory("growing"); +} + +void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz) +{ + vespalib::DataBuffer buf; + EXPECT_EQUAL(static_cast(sz), datastore.read(lid, buf)); + EXPECT_EQUAL(buf.getDataLen(), sz); + EXPECT_TRUE(memcmp(a, buf.getData(), sz) == 0); +} + +TEST("testTruncatedIdxFile"){ + LogDataStore::Config config; + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + { + // Files comes from the 'growing test'. + LogDataStore datastore(executor, "bug-7257706", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(354ul, datastore.lastSyncToken()); + } + { + LogDataStore datastore(executor, "bug-7257706-truncated", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(331ul, datastore.lastSyncToken()); + } + { + LogDataStore datastore(executor, "bug-7257706-truncated", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(331ul, datastore.lastSyncToken()); + } +} + +TEST("testThatEmptyIdxFilesAndDanglingDatFilesAreRemoved") { + LogDataStore::Config config; + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "dangling-test", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(354ul, datastore.lastSyncToken()); + EXPECT_EQUAL(4096u + 480u, datastore.getDiskHeaderFootprint()); + EXPECT_EQUAL(datastore.getDiskHeaderFootprint() + 94016u, datastore.getDiskFootprint()); +} + +TEST("testWriteRead") { + FastOS_File::RemoveDirectory("empty"); + const char * bufA = "aaaaaaaaaaaaaaaaaaaaa"; + const char * bufB = "bbbbbbbbbbbbbbbb"; + const vespalib::ConstBufferRef a[2] = { vespalib::ConstBufferRef(bufA, strlen(bufA)), vespalib::ConstBufferRef(bufB, strlen(bufB))}; + LogDataStore::Config config; + { + EXPECT_TRUE(FastOS_File::MakeDirectory("empty")); + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "empty", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + ASSERT_TRUE(datastore.lastSyncToken() == 0); + size_t headerFootprint = datastore.getDiskHeaderFootprint(); + EXPECT_LESS(0u, headerFootprint); + EXPECT_EQUAL(datastore.getDiskFootprint(), headerFootprint); + EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); + EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + datastore.write(1, 0, a[0].c_str(), a[0].size()); + fetchAndTest(datastore, 0, a[0].c_str(), a[0].size()); + datastore.write(2, 0, a[1].c_str(), a[1].size()); + fetchAndTest(datastore, 0, a[1].c_str(), a[1].size()); + fetchAndTest(datastore, 1, NULL, 0); + datastore.remove(3, 0); + fetchAndTest(datastore, 0, "", 0); + + SerialNum lastSyncToken(0); + for(size_t i=0; i < 100; i++) { + datastore.write(i+4, i, a[i%2].c_str(), a[i%2].size()); + assert(i +4 > lastSyncToken); + lastSyncToken = i + 4; + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + EXPECT_EQUAL(datastore.getDiskFootprint(), + 2711ul + headerFootprint); + EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); + EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + datastore.flush(datastore.initFlush(lastSyncToken)); + } + { + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "empty", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + size_t headerFootprint = datastore.getDiskHeaderFootprint(); + EXPECT_LESS(0u, headerFootprint); + EXPECT_EQUAL(4944ul + headerFootprint, datastore.getDiskFootprint()); + EXPECT_EQUAL(0ul, datastore.getDiskBloat()); + EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + for(size_t i=0; i < 100; i++) { + datastore.write(i+3+100, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + } + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + } + + EXPECT_EQUAL(7594ul + headerFootprint, datastore.getDiskFootprint()); + EXPECT_EQUAL(0ul, datastore.getDiskBloat()); + EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + } + FastOS_File::EmptyAndRemoveDirectory("empty"); +} + +TEST("requireThatSyncTokenIsUpdatedAfterFlush") { +#if 0 + std::string file = "sync.dat"; + FastOS_File::Delete(file.c_str()); + { + vespalib::DataBuffer buf; + SimpleDataStore store(file); + EXPECT_EQUAL(0u, store.lastSyncToken()); + makeData(buf, 10); + store.write(0, buf, 10); + store.flush(4); + EXPECT_EQUAL(4u, store.lastSyncToken()); + } + FastOS_File::Delete(file.c_str()); +#endif +} + +class GuardDirectory { +public: + GuardDirectory(const vespalib::string & dir) : _dir(dir) + { + FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); + EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str())); + } + ~GuardDirectory() { + FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); + } + const vespalib::string & getDir() const { return _dir; } +private: + vespalib::string _dir; +}; + +TEST("requireThatFlushTimeIsAvailableAfterFlush") { + GuardDirectory testDir("flushtime"); + fastos::TimeStamp before(fastos::ClockSystem::now()); + DummyFileHeaderContext fileHeaderContext; + LogDataStore::Config config; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore store(executor, + testDir.getDir(), + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + EXPECT_EQUAL(0, store.getLastFlushTime().time()); + uint64_t flushToken = store.initFlush(5); + EXPECT_EQUAL(5u, flushToken); + store.flush(flushToken); + fastos::TimeStamp after(fastos::ClockSystem::now()); + // the file name of the dat file is 'magic', using the clock instead of stating the file + EXPECT_LESS_EQUAL(before.time(), store.getLastFlushTime().time()); + EXPECT_GREATER_EQUAL(after.time(), store.getLastFlushTime().time()); +} + +TEST("requireThatChunksObeyLimits") { + Chunk c(0, Chunk::Config(256, 2)); + EXPECT_TRUE(c.hasRoom(1000)); // At least 1 is allowed no matter what the size is. + c.append(1, "abc", 3); + EXPECT_TRUE(c.hasRoom(229)); + EXPECT_FALSE(c.hasRoom(230)); + c.append(2, "abc", 3); + EXPECT_FALSE(c.hasRoom(20)); +} + +TEST("requireThatChunkCanProduceUniqueList") { + const char *d = "ABCDEF"; + Chunk c(0, Chunk::Config(100, 20)); + c.append(1, d, 1); + c.append(2, d, 2); + c.append(3, d, 3); + c.append(2, d, 4); + c.append(1, d, 5); + EXPECT_EQUAL(5u, c.count()); + const Chunk::LidList & all = c.getLids(); + EXPECT_EQUAL(5u, all.size()); + Chunk::LidList unique = c.getUniqueLids(); + EXPECT_EQUAL(3u, unique.size()); + EXPECT_EQUAL(1u, unique[0].getLid()); + EXPECT_EQUAL(5u, unique[0].netSize()); + EXPECT_EQUAL(2u, unique[1].getLid()); + EXPECT_EQUAL(4u, unique[1].netSize()); + EXPECT_EQUAL(3u, unique[2].getLid()); + EXPECT_EQUAL(3u, unique[2].netSize()); +} + +void testChunkFormat(ChunkFormat & cf, size_t expectedLen, const vespalib::string & expectedContent) +{ + document::CompressionConfig cfg; + uint64_t MAGIC_CONTENT(0xabcdef9876543210); + cf.getBuffer() << MAGIC_CONTENT; + vespalib::DataBuffer buffer; + cf.pack(7, buffer, cfg); + EXPECT_EQUAL(expectedLen, buffer.getDataLen()); + std::ostringstream os; + os << vespalib::HexDump(buffer.getData(), buffer.getDataLen()); + EXPECT_EQUAL(expectedContent, os.str()); +} + +TEST("requireThatChunkFormatsDoesNotChangeBetweenReleases") { + ChunkFormatV1 v1(10); + testChunkFormat(v1, 26, "26 000000000010ABCDEF987654321000000000000000079CF5E79B"); + ChunkFormatV2 v2(10); + testChunkFormat(v2, 34, "34 015BA32DE7000000220000000010ABCDEF987654321000000000000000074D000694"); +} + +class DummyBucketizer : public IBucketizer +{ +public: + DummyBucketizer(uint32_t mod) : _mod(mod) { } + uint64_t getBucketOf(const vespalib::GenerationHandler::Guard &, uint32_t lid) const override { + return lid%_mod; + } + vespalib::GenerationHandler::Guard getGuard() const override { + return vespalib::GenerationHandler::Guard(); + } +private: + uint32_t _mod; +}; + +TEST("testBucketDensityComputer") { + DummyBucketizer bucketizer(100); + BucketDensityComputer bdc(&bucketizer); + vespalib::GenerationHandler::Guard guard = bdc.getGuard(); + EXPECT_EQUAL(0u, bdc.getNumBuckets()); + bdc.recordLid(guard, 1, 1); + EXPECT_EQUAL(1u, bdc.getNumBuckets()); + bdc.recordLid(guard, 2, 1); + EXPECT_EQUAL(2u, bdc.getNumBuckets()); + bdc.recordLid(guard, 3, 1); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 2, 1); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 4, 0); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 4, 1); + EXPECT_EQUAL(4u, bdc.getNumBuckets()); + + BucketDensityComputer nonRecording(nullptr); + guard = nonRecording.getGuard(); + EXPECT_EQUAL(0u, nonRecording.getNumBuckets()); + nonRecording.recordLid(guard, 1, 1); + EXPECT_EQUAL(0u, nonRecording.getNumBuckets()); +} + +TEST_MAIN() { + DummyFileHeaderContext::setCreator("logdatastore_test"); + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/datastore/logdatastore_test.sh b/searchlib/src/tests/datastore/logdatastore_test.sh new file mode 100755 index 00000000000..46455e1fae9 --- /dev/null +++ b/searchlib/src/tests/datastore/logdatastore_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +cp -r bug-7257706 bug-7257706-truncated +mkdir dangling-test +cp bug-7257706/*.dat dangling-test/ +cp bug-7257706/*.idx dangling-test/ +cp dangling/*.dat dangling-test/ +cp dangling/*.idx dangling-test/ +truncate --size 3830 bug-7257706-truncated/1422358701368384000.idx +VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_logdatastore_test_app +rm -rf bug-7257706-truncated dangling-test diff --git a/searchlib/src/tests/diskindex/bitvector/.gitignore b/searchlib/src/tests/diskindex/bitvector/.gitignore new file mode 100644 index 00000000000..32b1b86e1e5 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +bitvector_test +dump +/bitvector_test-diskindex +searchlib_bitvector_test-diskindex_app diff --git a/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..27c03b483ab --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test-diskindex_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_test-diskindex_app COMMAND searchlib_bitvector_test-diskindex_app) diff --git a/searchlib/src/tests/diskindex/bitvector/DESC b/searchlib/src/tests/diskindex/bitvector/DESC new file mode 100644 index 00000000000..313f0f89f2a --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/DESC @@ -0,0 +1 @@ +bitvector test. Take a look at bitvector_test.cpp for details. diff --git a/searchlib/src/tests/diskindex/bitvector/FILES b/searchlib/src/tests/diskindex/bitvector/FILES new file mode 100644 index 00000000000..a2583d74519 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/FILES @@ -0,0 +1 @@ +bitvector_test.cpp diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..bf95e3d56a6 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -0,0 +1,221 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("bitvector_test"); +#include +#include +#include +#include +#include + +using namespace search::index; + +namespace search { +namespace diskindex { + +struct FieldWriterWrapper +{ + FieldWriter _writer; + + FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds); + + FieldWriterWrapper & + newWord(const vespalib::stringref &word); + + FieldWriterWrapper & + add(uint32_t docId); + + bool + open(const std::string &path, + const Schema &schema, + const uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext); +}; + + +FieldWriterWrapper::FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds) + : _writer(docIdLimit, numWordIds) +{ +} + +bool +FieldWriterWrapper::open(const std::string &path, + const Schema &schema, + const uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext) +{ + vespalib::mkdir(path, false); + _writer.earlyOpen(path, 64, 10000, false, schema, indexId, tuneFileWrite); + return _writer.lateOpen(tuneFileWrite, fileHeaderContext); +} + +FieldWriterWrapper & +FieldWriterWrapper::newWord(const vespalib::stringref &word) +{ + _writer.newWord(word); + return *this; +} + + +FieldWriterWrapper & +FieldWriterWrapper::add(uint32_t docId) +{ + DocIdAndFeatures daf; + daf._docId = docId; + daf._elements.push_back(WordDocElementFeatures(0)); + daf._elements.back().setNumOccs(1); + daf._wordPositions.push_back(WordDocElementWordPosFeatures(0)); + //LOG(info, "add(%" PRIu64 ", %u)", wordNum, docId); + _writer.add(daf); + return *this; +} + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + uint32_t _indexId; +public: + void + requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap); + + void + requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap); + + Test(); + int Main(); +}; + +void +Test::requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap) +{ + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + DummyFileHeaderContext fileHeaderContext; + + if (directio) { + tuneFileWrite.setWantDirectIO(); + tuneFileRead.setWantDirectIO(); + } + if (readmmap) + tuneFileRead.setWantMemoryMap(); + FieldWriterWrapper fww(5, 2); + vespalib::mkdir("dump", false); + EXPECT_TRUE(fww.open("dump/1/", _schema, _indexId, tuneFileWrite, + fileHeaderContext)); + fww.newWord("1").add(1); + fww.newWord("2").add(2).add(3); + EXPECT_TRUE(fww._writer.close()); + + BitVectorDictionary dict; + BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS); + EXPECT_TRUE(dict.open("dump/1/", tuneFileRead, bvScope)); + EXPECT_EQUAL(5u, dict.getDocIdLimit()); + EXPECT_EQUAL(0u, dict.getEntries().size()); + EXPECT_TRUE(dict.lookup(1).get() == NULL); + EXPECT_TRUE(dict.lookup(2).get() == NULL); +} + +void +Test::requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap) +{ + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + DummyFileHeaderContext fileHeaderContext; + + if (directio) { + tuneFileWrite.setWantDirectIO(); + tuneFileRead.setWantDirectIO(); + } + if (readmmap) + tuneFileRead.setWantMemoryMap(); + FieldWriterWrapper fww(64, 6); + EXPECT_TRUE(fww.open("dump/2/", _schema, _indexId, tuneFileWrite, + fileHeaderContext)); + // must have >16 docs in order to create bitvector for a word + // 17 docs for word 1 + BitVector::UP bv1exp(BitVector::create(64)); + fww.newWord("1"); + for (uint32_t docId = 1; docId < 18; ++docId) { + fww.add(docId); + bv1exp->setBit(docId); + } + fww.newWord("2").add(1); + // 16 docs for word 3 + fww.newWord("3"); + for (uint32_t docId = 1; docId < 17; ++docId) { + fww.add(docId); + } + fww.newWord("4").add(1); + // 23 docs for word 5 + BitVector::UP bv5exp(BitVector::create(64)); + fww.newWord("5"); + for (uint32_t docId = 1; docId < 24; ++docId) { + fww.add(docId * 2); + bv5exp->setBit(docId * 2); + } + fww.newWord("6").add(1); + EXPECT_TRUE(fww._writer.close()); + + BitVectorDictionary dict; + BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS); + EXPECT_TRUE(dict.open("dump/2/", tuneFileRead, bvScope)); + EXPECT_EQUAL(64u, dict.getDocIdLimit()); + EXPECT_EQUAL(2u, dict.getEntries().size()); + + BitVectorWordSingleKey e; + e = dict.getEntries()[0]; + EXPECT_EQUAL(1u, e._wordNum); + EXPECT_EQUAL(17u, e._numDocs); + e = dict.getEntries()[1]; + EXPECT_EQUAL(5u, e._wordNum); + EXPECT_EQUAL(23u, e._numDocs); + + EXPECT_TRUE(dict.lookup(2).get() == NULL); + EXPECT_TRUE(dict.lookup(3).get() == NULL); + EXPECT_TRUE(dict.lookup(4).get() == NULL); + EXPECT_TRUE(dict.lookup(6).get() == NULL); + + BitVector::UP bv1act = dict.lookup(1); + EXPECT_TRUE(bv1act.get() != NULL); + EXPECT_TRUE(*bv1exp == *bv1act); + + BitVector::UP bv5act = dict.lookup(5); + EXPECT_TRUE(bv5act.get() != NULL); + EXPECT_TRUE(*bv5exp == *bv5act); +} + +Test::Test() + : _schema(), + _indexId(0) +{ + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); +} + +int +Test::Main() +{ + TEST_INIT("bitvector_test"); + + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + TEST_DO(requireThatDictionaryHandlesNoEntries(false, false)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, false)); + TEST_DO(requireThatDictionaryHandlesNoEntries(true, false)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(true, false)); + TEST_DO(requireThatDictionaryHandlesNoEntries(false, true)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, true)); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/diskindex/.gitignore b/searchlib/src/tests/diskindex/diskindex/.gitignore new file mode 100644 index 00000000000..58819f1c4bb --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +diskindex_test +index +searchlib_diskindex_test_app diff --git a/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt new file mode 100644 index 00000000000..7cee100f534 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_diskindex_test_app + SOURCES + diskindex_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_diskindex_test_app COMMAND searchlib_diskindex_test_app) diff --git a/searchlib/src/tests/diskindex/diskindex/DESC b/searchlib/src/tests/diskindex/diskindex/DESC new file mode 100644 index 00000000000..fc14faaca7a --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/DESC @@ -0,0 +1 @@ +diskindex test. Take a look at diskindex_test.cpp for details. diff --git a/searchlib/src/tests/diskindex/diskindex/FILES b/searchlib/src/tests/diskindex/diskindex/FILES new file mode 100644 index 00000000000..54eef52f856 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/FILES @@ -0,0 +1 @@ +diskindex_test.cpp diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp new file mode 100644 index 00000000000..a8972d2a289 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -0,0 +1,330 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP("diskindex_test"); + +using search::BitVectorIterator; +using namespace search::fef; +using namespace search::index; +using namespace search::query; +using namespace search::queryeval; +using namespace search::queryeval::blueprint; +using search::test::InitRangeVerifier; +using namespace search::fakedata; + +namespace search { +namespace diskindex { + +typedef DiskIndex::LookupResult LookupResult; + +std::string +toString(SearchIterator & sb) +{ + std::ostringstream oss; + bool first = true; + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + if (!first) oss << ","; + oss << sb.getDocId(); + first = false; + } + return oss.str(); +} + +SimpleStringTerm +makeTerm(const std::string & term) +{ + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +class Test : public vespalib::TestApp, public TestDiskIndex { +private: + FakeRequestContext _requestContext; + + void requireThatLookupIsWorking(bool fieldEmpty, bool docEmpty, bool wordEmpty); + void requireThatWeCanReadPostingList(); + void requireThatWeCanReadBitVector(); + void requireThatBlueprintIsCreated(); + void requireThatBlueprintCanCreateSearchIterators(); + void requireThatInitRangeConforms(); +public: + Test(); + int Main(); +}; + +void +Test::requireThatInitRangeConforms() +{ + InitRangeVerifier ir; + Schema schema; + schema.addIndexField(Schema::IndexField("a", Schema::DataType::STRING)); + bitcompression::PosOccFieldsParams params; + params.setSchemaParams(schema, 0); + search::fakedata::FakeWord fw(ir.getDocIdLimit(), ir.getExpectedDocIds(), "a", params, 0); + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + std::vector v; + v.push_back(&fw); + std::set ignored = { "MemTreeOcc", "MemTreeOcc2", + "FilterOcc", "ZcFilterOcc", + "ZcNoSkipFilterOcc", "ZcSkipFilterOcc", + "ZcbFilterOcc", + "EGCompr64FilterOcc", "EGCompr64LEFilterOcc", + "EGCompr64NoSkipFilterOcc", "EGCompr64SkipFilterOcc" }; + for (auto postingType : search::fakedata::getPostingTypes()) { + if (ignored.find(postingType) == ignored.end()) { + std::cerr << "Verifying " << postingType << std::endl; + std::unique_ptr ff(getFPFactory(postingType, schema)); + ff->setup(v); + FakePosting::SP f(ff->make(fw)); + TEST_DO(ir.verify(f->createIterator(tfmda))); + } + } +} + +void +Test::requireThatLookupIsWorking(bool fieldEmpty, + bool docEmpty, + bool wordEmpty) +{ + uint32_t f1(_schema.getIndexFieldId("f1")); + uint32_t f2(_schema.getIndexFieldId("f2")); + uint32_t f3(_schema.getIndexFieldId("f3")); + LookupResult::UP r; + r = _index->lookup(f1, "not"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f1, "w1not"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f1, "wnot"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + { // field 'f1' + r = _index->lookup(f1, "w1"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(1u, r->wordNum); + EXPECT_EQUAL(2u, r->counts._numDocs); + } + r = _index->lookup(f1, "w2"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } + { // field 'f2' + r = _index->lookup(f2, "w1"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(1u, r->wordNum); + EXPECT_EQUAL(3u, r->counts._numDocs); + } + r = _index->lookup(f2, "w2"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(2u, r->wordNum); + EXPECT_EQUAL(17u, r->counts._numDocs); + } + } + { // field 'f3' doesn't exist + r = _index->lookup(f3, "w1"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f3, "w2"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } +} + +void +Test::requireThatWeCanReadPostingList() +{ + TermFieldMatchDataArray mda; + { // field 'f1' + LookupResult::UP r = _index->lookup(0, "w1"); + PostingListHandle::UP h = _index->readPostingList(*r); + SearchIterator * sb = h->createIterator(r->counts, mda); + sb->initFullRange(); + EXPECT_EQUAL("1,3", toString(*sb)); + delete sb; + } +} + +void +Test::requireThatWeCanReadBitVector() +{ + { // word 'w1' + LookupResult::UP r = _index->lookup(1, "w1"); + // not bit vector for 'w1' + EXPECT_TRUE(_index->readBitVector(*r).get() == NULL); + } + { // word 'w2' + BitVector::UP exp(BitVector::create(32)); + for (uint32_t docId = 1; docId < 18; ++docId) exp->setBit(docId); + { // field 'f2' + LookupResult::UP r = + _index->lookup(1, "w2"); + BitVector::UP bv = _index->readBitVector(*r); + EXPECT_TRUE(bv.get() != NULL); + EXPECT_TRUE(*bv == *exp); + } + } +} + +void +Test::requireThatBlueprintIsCreated() +{ + { // unknown field + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("none", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + } + { // unknown word + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("none")); + EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + } + { // known field & word with hits + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast(b.get()) != NULL); + EXPECT_EQUAL(2u, b->getState().estimate().estHits); + EXPECT_TRUE(!b->getState().estimate().empty); + } + { // known field & word without hits + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w2")); +// std::cerr << "BP = " << typeid(*b).name() << std::endl; + EXPECT_TRUE((dynamic_cast(b.get()) != NULL) || + (dynamic_cast(b.get()) != NULL)); + EXPECT_EQUAL(0u, b->getState().estimate().estHits); + EXPECT_TRUE(b->getState().estimate().empty); + } +} + +void +Test::requireThatBlueprintCanCreateSearchIterators() +{ + TermFieldMatchData md; + TermFieldMatchDataArray mda; + mda.add(&md); + Blueprint::UP b; + SearchIterator::UP s; + { // bit vector due to isFilter + b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, true), makeTerm("w2")); + b->fetchPostings(true); + s = (dynamic_cast(b.get()))->createLeafSearch(mda, true); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + } + { // bit vector due to no ranking needed + b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, false), makeTerm("w2")); + b->fetchPostings(true); + s = (dynamic_cast(b.get()))->createLeafSearch(mda, true); + EXPECT_FALSE(dynamic_cast(s.get()) != NULL); + TermFieldMatchData md2; + md2.tagAsNotNeeded(); + TermFieldMatchDataArray mda2; + mda2.add(&md2); + EXPECT_TRUE(mda2[0]->isNotNeeded()); + s = (dynamic_cast(b.get()))->createLeafSearch(mda2, false); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + } + { // fake bit vector + b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0, true), makeTerm("w2")); +// std::cerr << "BP = " << typeid(*b).name() << std::endl; + b->fetchPostings(true); + s = (dynamic_cast(b.get()))->createLeafSearch(mda, true); +// std::cerr << "SI = " << typeid(*s).name() << std::endl; + EXPECT_TRUE((dynamic_cast(s.get()) != NULL) || + dynamic_cast(s.get())); + } + { // posting list iterator + b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); + b->fetchPostings(true); + s = (dynamic_cast(b.get()))->createLeafSearch(mda, true); + ASSERT_TRUE(dynamic_cast *>(s.get()) != NULL); + } +} + +Test::Test() : + TestDiskIndex() +{ +} + +int +Test::Main() +{ + TEST_INIT("diskindex_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + + vespalib::mkdir("index", false); + TEST_DO(openIndex("index/1fedewe", false, false, true, true, true)); + TEST_DO(requireThatLookupIsWorking(true, true, true)); + TEST_DO(openIndex("index/1fede", false, false, true, true, false)); + TEST_DO(requireThatLookupIsWorking(true, true, false)); + TEST_DO(openIndex("index/1fewe", false, false, true, false, true)); + TEST_DO(requireThatLookupIsWorking(true, false, true)); + TEST_DO(openIndex("index/1fe", false, false, true, false, false)); + TEST_DO(requireThatLookupIsWorking(true, false, false)); + buildSchema(); + TEST_DO(openIndex("index/1dewe", false, false, false, true, true)); + TEST_DO(requireThatLookupIsWorking(false, true, true)); + TEST_DO(openIndex("index/1de", false, false, false, true, false)); + TEST_DO(requireThatLookupIsWorking(false, true, false)); + TEST_DO(openIndex("index/1we", false, false, false, false, true)); + TEST_DO(requireThatLookupIsWorking(false, false, true)); + TEST_DO(openIndex("index/1", false, false, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/2", true, false, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/3", false, true, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/4", true, true, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + TEST_DO(requireThatInitRangeConforms()); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/fieldwriter/.gitignore b/searchlib/src/tests/diskindex/fieldwriter/.gitignore new file mode 100644 index 00000000000..bdb91bca5eb --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/.gitignore @@ -0,0 +1,3 @@ +/field1.f +/index +searchlib_fieldwriter_test_app diff --git a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt new file mode 100644 index 00000000000..a03313fac35 --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fieldwriter_test_app + SOURCES + fieldwriter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND sh runtests.sh) diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp new file mode 100644 index 00000000000..ab6be2e0801 --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp @@ -0,0 +1,972 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("fieldwriter_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using search::ResultSet; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using search::fakedata::FakeWord; +using search::fakedata::FakeWordSet; +using search::index::PostingListParams; +using search::index::PostingListCounts; +using search::index::PostingListOffsetAndCounts; +using search::index::Schema; +using search::index::SchemaUtil; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using search::diskindex::CheckPointFile; +using search::TuneFileSeqRead; +using search::TuneFileSeqWrite; +using search::TuneFileRandRead; +using vespalib::nbostream; +using search::diskindex::FieldWriter; +using search::diskindex::FieldReader; +using search::diskindex::DocIdMapping; +using search::diskindex::WordNumMapping; +using search::diskindex::PageDict4RandRead; + +// needed to resolve external symbol from httpd.h on AIX +void FastS_block_usr2() {} + +namespace fieldwriter +{ + +uint32_t minSkipDocs = 64; +uint32_t minChunkDocs = 262144; + +vespalib::string dirprefix = "index/"; + +void +disableSkip(void) +{ + minSkipDocs = 10000000; + minChunkDocs = 1 << 30; +} + +void +enableSkip(void) +{ + minSkipDocs = 64; + minChunkDocs = 1 << 30; +} + +void +enableSkipChunks(void) +{ + minSkipDocs = 64; + minChunkDocs = 9000; // Unrealistic low for testing +} + + +vespalib::string +makeWordString(uint64_t wordNum) +{ + using AS = vespalib::asciistream; + AS ws; + ws << AS::Width(4) << AS::Fill('0') << wordNum; + return ws.str(); +} + + +typedef std::shared_ptr FieldReaderSP; +typedef std::shared_ptr FieldWriterSP; + +class FieldWriterTest : public FastOS_Application +{ +private: + bool _verbose; + uint32_t _numDocs; + uint32_t _commonDocFreq; + uint32_t _numWordsPerClass; + FakeWordSet _wordSet; + FakeWordSet _wordSet2; +public: + search::Rand48 _rnd; + +private: + void Usage(void); + void testFake(const std::string &postingType, FakeWord &fw); +public: + FieldWriterTest(void); + ~FieldWriterTest(void); + int Main(void); +}; + + +void +FieldWriterTest::Usage(void) +{ + printf("fieldwriter_test " + "[-c ] " + "[-d ] " + "[-v] " + "[-w ]\n"); +} + + +FieldWriterTest::FieldWriterTest(void) + : _verbose(false), + _numDocs(3000000), + _commonDocFreq(50000), + _numWordsPerClass(6), + _wordSet(), + _wordSet2(), + _rnd() +{ +} + + +FieldWriterTest::~FieldWriterTest(void) +{ +} + + +class WrappedFieldWriter : public search::fakedata::CheckPointCallback +{ +public: + FieldWriterSP _fieldWriter; +private: + bool _dynamicK; + uint32_t _numWordIds; + uint32_t _docIdLimit; + vespalib::string _namepref; + Schema _schema; + uint32_t _indexId; + +public: + + WrappedFieldWriter(const vespalib::string &namepref, + bool dynamicK, + uint32_t numWordIds, + uint32_t docIdLimit); + + virtual void + checkPoint(void) override; + + void + earlyOpen(void); + + void + lateOpen(void); + + void + open(void); + + void + close(void); + + void + writeCheckPoint(void); + + void + readCheckPoint(bool first); +}; + + +WrappedFieldWriter::WrappedFieldWriter(const vespalib::string &namepref, + bool dynamicK, + uint32_t numWordIds, + uint32_t docIdLimit) + : _fieldWriter(), + _dynamicK(dynamicK), + _numWordIds(numWordIds), + _docIdLimit(docIdLimit), + _namepref(dirprefix + namepref), + _schema(), + _indexId() +{ + Schema::CollectionType ct(Schema::SINGLE); + _schema.addIndexField(Schema::IndexField("field1", Schema::STRING, ct)); + _indexId = _schema.getIndexFieldId("field1"); +} + + +void +WrappedFieldWriter::earlyOpen(void) +{ + TuneFileSeqWrite tuneFileWrite; + _fieldWriter.reset(new FieldWriter(_docIdLimit, _numWordIds)); + _fieldWriter->earlyOpen(_namepref, + minSkipDocs, minChunkDocs, _dynamicK, _schema, + _indexId, + tuneFileWrite); +} + + +void +WrappedFieldWriter::lateOpen(void) +{ + TuneFileSeqWrite tuneFileWrite; + DummyFileHeaderContext fileHeaderContext; + fileHeaderContext.disableFileName(); + _fieldWriter->lateOpen(tuneFileWrite, fileHeaderContext); +} + + +void +WrappedFieldWriter::open(void) +{ + earlyOpen(); + lateOpen(); +} + + +void +WrappedFieldWriter::close(void) +{ + _fieldWriter->close(); + _fieldWriter.reset(); +} + + +void +WrappedFieldWriter::writeCheckPoint(void) +{ + CheckPointFile chkptfile("chkpt"); + nbostream out; + _fieldWriter->checkPointWrite(out); + chkptfile.write(out, DummyFileHeaderContext()); +} + + +void +WrappedFieldWriter::readCheckPoint(bool first) +{ + CheckPointFile chkptfile("chkpt"); + nbostream in; + bool openRes = chkptfile.read(in); + assert(first || openRes); + (void) first; + if (!openRes) + return; + _fieldWriter->checkPointRead(in); + assert(in.empty()); +} + + +void +WrappedFieldWriter::checkPoint(void) +{ + writeCheckPoint(); + _fieldWriter.reset(); + earlyOpen(); + readCheckPoint(false); + lateOpen(); +} + + +class WrappedFieldReader : public search::fakedata::CheckPointCallback +{ +public: + FieldReaderSP _fieldReader; +private: + std::string _namepref; + uint32_t _numWordIds; + uint32_t _docIdLimit; + WordNumMapping _wmap; + DocIdMapping _dmap; + Schema _oldSchema; + Schema _schema; + +public: + WrappedFieldReader(const vespalib::string &namepref, + uint32_t numWordIds, + uint32_t docIdLimit); + + ~WrappedFieldReader(void); + + void + earlyOpen(void); + + void + lateOpen(void); + + void + open(void); + + void + close(void); + + void + writeCheckPoint(void); + + void + readCheckPoint(bool first); + + virtual void + checkPoint(void) override; +}; + + +WrappedFieldReader::WrappedFieldReader(const vespalib::string &namepref, + uint32_t numWordIds, + uint32_t docIdLimit) + : search::fakedata::CheckPointCallback(), + _fieldReader(), + _namepref(dirprefix + namepref), + _numWordIds(numWordIds), + _docIdLimit(docIdLimit), + _wmap(), + _dmap(), + _oldSchema(), + _schema() +{ + Schema::CollectionType ct(Schema::SINGLE); + _oldSchema.addIndexField(Schema::IndexField("field1", + Schema::STRING, + ct)); + _schema.addIndexField(Schema::IndexField("field1", + Schema::STRING, + ct)); +} + + +WrappedFieldReader::~WrappedFieldReader(void) +{ +} + + +void +WrappedFieldReader::earlyOpen(void) +{ + TuneFileSeqRead tuneFileRead; + _fieldReader.reset(new FieldReader()); + _fieldReader->earlyOpen(_namepref, tuneFileRead); +} + + +void +WrappedFieldReader::lateOpen(void) +{ + TuneFileSeqRead tuneFileRead; + _wmap.setup(_numWordIds); + _dmap.setup(_docIdLimit); + _fieldReader->setup(_wmap, _dmap); + _fieldReader->lateOpen(_namepref, tuneFileRead); +} + + +void +WrappedFieldReader::open(void) +{ + earlyOpen(); + lateOpen(); +} + + +void +WrappedFieldReader::close(void) +{ + _fieldReader->close(); + _fieldReader.reset(); +} + + +void +WrappedFieldReader::writeCheckPoint(void) +{ + CheckPointFile chkptfile("chkpt"); + nbostream out; + _fieldReader->checkPointWrite(out); + chkptfile.write(out, DummyFileHeaderContext()); +} + + +void +WrappedFieldReader::readCheckPoint(bool first) +{ + CheckPointFile chkptfile("chkpt"); + nbostream in; + bool openRes = chkptfile.read(in); + assert(first || openRes); + (void) first; + if (!openRes) + return; + _fieldReader->checkPointRead(in); + assert(in.empty()); +} + + +void +WrappedFieldReader::checkPoint(void) +{ + writeCheckPoint(); + _fieldReader.reset(); + earlyOpen(); + readCheckPoint(false); + lateOpen(); +} + + +void +writeField(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + + LOG(info, + "enter writeField, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + FieldWriter::remove(namepref); + ostate.open(); + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + ostate._fieldWriter->newWord(makeWordString(wordNum)); + fw.dump(ostate._fieldWriter, false, + checkPointCheck, + checkPointInterval, + NULL); + ++wordNum; + } + } + ostate.close(); + + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave writeField, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +writeFieldCheckPointed(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + bool first = true; + + LOG(info, + "enter writeFieldCheckPointed, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + ostate.earlyOpen(); + ostate.readCheckPoint(first); + first = false; + ostate.lateOpen(); + ostate._fieldWriter->newWord(makeWordString(wordNum)); + fw.dump(ostate._fieldWriter, false, + checkPointCheck, + checkPointInterval, + &ostate); + ostate.writeCheckPoint(); + ++wordNum; + } + } + do { + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + ostate.earlyOpen(); + ostate.readCheckPoint(first); + ostate.lateOpen(); + ostate.close(); + } while (0); + CheckPointFile dropper("chkpt"); + dropper.remove(); + + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave writeFieldCheckPointed, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +readField(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK, + bool verbose) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + WrappedFieldReader istate(namepref, wordSet.getNumWords(), + docIdLimit); + LOG(info, + "enter readField, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + istate.open(); + if (istate._fieldReader->isValid()) + istate._fieldReader->read(); + + TermFieldMatchData mdfield1; + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + TermFieldMatchDataArray tfmda; + tfmda.add(&mdfield1); + + fw.validate(istate._fieldReader, wordNum, + tfmda, verbose, + checkPointCheck, checkPointInterval, &istate); + ++wordNum; + } + } + + istate.close(); + tv.SetNow(); + after = tv.Secs(); + CheckPointFile dropper("chkpt"); + dropper.remove(); + LOG(info, + "leave readField, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +randReadField(FakeWordSet &wordSet, + const std::string &namepref, + bool dynamicK, + bool verbose) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + PostingListCounts counts; + + LOG(info, + "enter randReadField," + " namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + + std::string cname = dirprefix + namepref; + cname += "dictionary"; + + std::unique_ptr dictFile; + dictFile.reset(new PageDict4RandRead); + + search::index::PostingListFileRandRead *postingFile = NULL; + if (dynamicK) + postingFile = + new search::diskindex::ZcPosOccRandRead; + else + postingFile = + new search::diskindex::Zc4PosOccRandRead; + + TuneFileSeqRead tuneFileRead; + TuneFileRandRead tuneFileRandRead; + bool openCntRes = dictFile->open(cname, tuneFileRandRead); + assert(openCntRes); + (void) openCntRes; + vespalib::string cWord; + + std::string pname = dirprefix + namepref + "posocc.dat"; + pname += ".compressed"; + bool openPostingRes = postingFile->open(pname, tuneFileRandRead); + assert(openPostingRes); + (void) openPostingRes; + + for (int loop = 0; loop < 1; ++loop) { + unsigned int wordNum = 1; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + PostingListOffsetAndCounts offsetAndCounts; + uint64_t checkWordNum; + dictFile->lookup(makeWordString(wordNum), + checkWordNum, + offsetAndCounts); + assert(wordNum == checkWordNum); + + counts = offsetAndCounts._counts; + search::index::PostingListHandle handle; + + handle._bitLength = counts._bitLength; + handle._file = postingFile; + handle._bitOffset = offsetAndCounts._offset; + + postingFile->readPostingList(counts, + 0, + counts._segments.empty() ? 1 : counts._segments.size(), + handle); + + TermFieldMatchData mdfield1; + TermFieldMatchDataArray tfmda; + tfmda.add(&mdfield1); + + std::unique_ptr + sb(handle.createIterator(counts, tfmda)); + + // LOG(info, "loop=%d, wordNum=%u", loop, wordNum); + fw.validate(sb.get(), tfmda, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 19, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 99, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 799, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 6399, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 11999, verbose); + ++wordNum; + } + } + } + + postingFile->close(); + dictFile->close(); + delete postingFile; + dictFile.reset(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave randReadField, namepref=%s," + " dynamicK=%s, " + "elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +fusionField(uint32_t numWordIds, + uint32_t docIdLimit, + const vespalib::string &ipref, + const vespalib::string &opref, + bool doRaw, + bool dynamicK) +{ + const char *rawStr = doRaw ? "true" : "false"; + const char *dynamicKStr = dynamicK ? "true" : "false"; + + + LOG(info, + "enter fusionField, ipref=%s, opref=%s," + " raw=%s," + " dynamicK=%s", + ipref.c_str(), + opref.c_str(), + rawStr, + dynamicKStr); + + FastOS_Time tv; + double before; + double after; + WrappedFieldWriter ostate(opref, + dynamicK, + numWordIds, docIdLimit); + WrappedFieldReader istate(ipref, numWordIds, docIdLimit); + + tv.SetNow(); + before = tv.Secs(); + + ostate.open(); + istate.open(); + + if (doRaw) { + PostingListParams featureParams; + featureParams.clear(); + featureParams.set("cooked", false); + istate._fieldReader->setFeatureParams(featureParams); + } + if (istate._fieldReader->isValid()) + istate._fieldReader->read(); + + while (istate._fieldReader->isValid()) { + istate._fieldReader->write(*ostate._fieldWriter); + istate._fieldReader->read(); + } + istate.close(); + ostate.close(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave fusionField, ipref=%s, opref=%s," + " raw=%s dynamicK=%s, " + " elapsed=%10.6f", + ipref.c_str(), + opref.c_str(), + rawStr, + dynamicKStr, + after - before); +} + + +void +testFieldWriterVariants(FakeWordSet &wordSet, + uint32_t docIdLimit, bool verbose) +{ + CheckPointFile dropper("chkpt"); + dropper.remove(); + disableSkip(); + writeField(wordSet, docIdLimit, "new4", true); + readField(wordSet, docIdLimit, "new4", true, verbose); + readField(wordSet, docIdLimit, "new4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "new6", true); + writeField(wordSet, docIdLimit, "new5", false); + readField(wordSet, docIdLimit, "new5", false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "new7", false); + enableSkip(); + writeField(wordSet, docIdLimit, "newskip4", true); + readField(wordSet, docIdLimit, "newskip4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newskip6", + true); + writeField(wordSet, docIdLimit, "newskip5", false); + readField(wordSet, docIdLimit, "newskip5", false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newskip7", + false); + enableSkipChunks(); + writeField(wordSet, docIdLimit, "newchunk4", true); + readField(wordSet, docIdLimit, "newchunk4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newchunk6", + true); + writeField(wordSet, docIdLimit, "newchunk5", false); + readField(wordSet, docIdLimit, + "newchunk5",false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newchunk7", + false); + disableSkip(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new4", "new4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new4", "new4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new5", "new5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new5", "new5xx", + true, false); + randReadField(wordSet, "new4", true, verbose); + randReadField(wordSet, "new5", false, verbose); + enableSkip(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip4", "newskip4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip4", "newskip4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip5", "newskip5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip5", "newskip5xx", + true, false); + randReadField(wordSet, "newskip4", true, verbose); + randReadField(wordSet, "newskip5", false, verbose); + enableSkipChunks(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk4", "newchunk4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk4", "newchunk4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk5", "newchunk5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk5", "newchunk5xx", + true, false); + randReadField(wordSet, "newchunk4", true, verbose); + randReadField(wordSet, "newchunk5", false, verbose); +} + + +void +testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit, + bool verbose) +{ + CheckPointFile dropper("chkpt"); + dropper.remove(); + disableSkip(); + writeField(wordSet, docIdLimit, "hlid4", true); + readField(wordSet, docIdLimit, "hlid4", true, verbose); + writeField(wordSet, docIdLimit, "hlid5", false); + readField(wordSet, docIdLimit, "hlid5", false, verbose); + randReadField(wordSet, "hlid4", true, verbose); + randReadField(wordSet, "hlid5", false, verbose); + enableSkip(); + writeField(wordSet, docIdLimit, "hlidskip4", true); + readField(wordSet, docIdLimit, "hlidskip4", true, verbose); + writeField(wordSet, docIdLimit, "hlidskip5", false); + readField(wordSet, docIdLimit, "hlidskip5", false, verbose); + randReadField(wordSet, "hlidskip4", true, verbose); + randReadField(wordSet, "hlidskip5", false, verbose); + enableSkipChunks(); + writeField(wordSet, docIdLimit, "hlidchunk4", true); + readField(wordSet, docIdLimit, "hlidchunk4", true, verbose); + writeField(wordSet, docIdLimit, "hlidchunk5", false); + readField(wordSet, docIdLimit, "hlidchunk5", false, verbose); + randReadField(wordSet, "hlidchunk4", true, verbose); + randReadField(wordSet, "hlidchunk5", false, verbose); +} + +int +FieldWriterTest::Main(void) +{ + int argi; + char c; + const char *optArg; + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + argi = 1; + + while ((c = GetOpt("c:d:vw:", optArg, argi)) != -1) { + switch(c) { + case 'c': + _commonDocFreq = atoi(optArg); + if (_commonDocFreq == 0) + _commonDocFreq = 1; + break; + case 'd': + _numDocs = atoi(optArg); + break; + case 'v': + _verbose = true; + break; + case 'w': + _numWordsPerClass = atoi(optArg); + break; + default: + Usage(); + return 1; + } + } + + if (_commonDocFreq > _numDocs) { + Usage(); + return 1; + } + + _wordSet.setupParams(false, false); + _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass); + + vespalib::mkdir("index", false); + testFieldWriterVariants(_wordSet, _numDocs, _verbose); + + _wordSet2.setupParams(false, false); + _wordSet2.setupWords(_rnd, _numDocs, _commonDocFreq, 3); + uint32_t docIdBias = 700000000; + _wordSet2.addDocIdBias(docIdBias); // Large skip numbers + testFieldWriterVariantsWithHighLids(_wordSet2, _numDocs + docIdBias, + _verbose); + return 0; +} + +} // namespace fieldwriter + +int +main(int argc, char **argv) +{ + fieldwriter::FieldWriterTest app; + + setvbuf(stdout, NULL, _IOLBF, 32768); + app._rnd.srand48(32); + return app.Entry(argc, argv); +} diff --git a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh new file mode 100755 index 00000000000..1f2b6d6076f --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh @@ -0,0 +1,66 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +rm -f new* chkpt* +sync +sleep 2 + +if ${VALGRIND} ./searchlib_fieldwriter_test_app "$@" +then + : +else + echo FAILURE: ./searchlib_fieldwriter_test_app program failed. + exit 1 +fi + +checksame() +{ + file1=$1 + rval=0 + shift + for file in $* + do + if cmp -s $file1 $file + then + : + else + echo "FAILURE: $file1 != $file" + rval=1 + fi + done + return $rval +} + +newpcntfiles1=index/new[46]*dictionary.pdat +newpcntfiles1b=index/new[46]*dictionary.spdat +newpcntfiles1c=index/new[46]*dictionary.ssdat +newpcntfiles2=index/newskip[46]*dictionary.pdat +newpcntfiles2b=index/newskip[46]*dictionary.pdat +newpcntfiles2c=index/newskip[46]*dictionary.pdat +newpcntfiles3=index/newchunk[46]*dictionary.pdat +newpcntfiles3b=index/newchunk[46]*dictionary.pdat +newpcntfiles3c=index/newchunk[46]*dictionary.pdat +newpcntfiles4=index/new[57]*dictionary.pdat +newpcntfiles4b=index/new[57]*dictionary.pdat +newpcntfiles4c=index/new[57]*dictionary.pdat +newpcntfiles5=index/newskip[57]*dictionary.pdat +newpcntfiles5b=index/newskip[57]*dictionary.pdat +newpcntfiles5c=index/newskip[57]*dictionary.pdat +newpcntfiles6=index/newchunk[57]*dictionary.pdat +newpcntfiles6b=index/newchunk[57]*dictionary.pdat +newpcntfiles6c=index/newchunk[57]*dictionary.pdat +newpfiles1=index/new[46]*posocc.dat.compressed +newpfiles2=index/newskip[46]*posocc.dat.compressed +newpfiles3=index/newchunk[46]*posocc.dat.compressed +newpfiles4=index/new[57]*posocc.dat.compressed +newpfiles5=index/newskip[57]*posocc.dat.compressed +newpfiles6=index/newchunk[57]*posocc.dat.compressed + +if checksame $newpcntfiles1 && checksame $newpcntfiles1b && checksame $newpcntfiles1c && checksame $newpfiles1 && checksame $newpcntfiles2 && checksame $newpcntfiles2b && checksame $newpcntfiles2c && checksame $newpfiles2 && checksame $newpcntfiles3 && checksame $newpcntfiles3b && checksame $newpcntfiles3c && checksame $newpfiles3 && checksame $newpcntfiles4 && checksame $newpcntfiles4b && checksame $newpcntfiles4c && checksame $newpfiles4 && checksame $newpcntfiles5 && checksame $newpcntfiles5b && checksame $newpcntfiles5c && checksame $newpfiles5 && checksame $newpcntfiles6 && checksame $newpcntfiles6b && checksame $newpcntfiles6c && checksame $newpfiles6 +then + echo SUCCESS: Files match up + exit 0 +else + echo FAILURE: Files do not match up + exit 1 +fi diff --git a/searchlib/src/tests/diskindex/fusion/.gitignore b/searchlib/src/tests/diskindex/fusion/.gitignore new file mode 100644 index 00000000000..8526d6faa38 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/.gitignore @@ -0,0 +1,37 @@ +.depend +Makefile +[dms]dump[1-5] +chkpt +ddump2 +ddump3 +ddump4 +ddump5 +dmdump2 +dmdump3 +dmdump4 +dmdump5 +dmdump[1-5] +dump2 +dump3 +dump4 +dump5 +dump[1-5] +fusion_test +mdump2 +mdump3 +mdump4 +mdump5 +sdump2 +sdump3 +sdump4 +sdump5 +/ddump6 +/dmdump6 +/dump6 +/dumpwords.out +/mdump6 +/transpose.out +/usage.out +/zwordc0coll.out +/zwordf0field.out +searchlib_fusion_test_app diff --git a/searchlib/src/tests/diskindex/fusion/CMakeLists.txt b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt new file mode 100644 index 00000000000..9c079b09c90 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fusion_test_app + SOURCES + fusion_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fusion_test_app COMMAND sh fusion_test.sh) diff --git a/searchlib/src/tests/diskindex/fusion/DESC b/searchlib/src/tests/diskindex/fusion/DESC new file mode 100644 index 00000000000..b0db86422b9 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/DESC @@ -0,0 +1 @@ +fusion test. Performs basic fusion operations and validates results. diff --git a/searchlib/src/tests/diskindex/fusion/FILES b/searchlib/src/tests/diskindex/fusion/FILES new file mode 100644 index 00000000000..fb22ce21a9d --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/FILES @@ -0,0 +1 @@ +fusion_test.cpp diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp new file mode 100644 index 00000000000..4191a8f8d2b --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -0,0 +1,506 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fusion_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + + +using document::Document; +using fef::FieldPositionsIterator; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using index::DocBuilder; +using index::DocIdAndFeatures; +using index::Schema; +using index::SchemaUtil; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using memoryindex::Dictionary; +using memoryindex::DocumentInverter; +using queryeval::SearchIterator; + +namespace diskindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + const Schema & getSchema() const { return _schema; } + + void + requireThatFusionIsWorking(const vespalib::string &prefix, + bool directio, + bool readmmap); + +public: + Test(); + int Main(); +}; + + +namespace +{ + +void +myPushDocument(DocumentInverter &inv, Dictionary &d) +{ + inv.pushDocuments(d, std::shared_ptr()); +} + + +} + +vespalib::string +toString(FieldPositionsIterator posItr, + bool hasElements = false, bool hasWeights = false) +{ + vespalib::asciistream ss; + ss << "{"; + ss << posItr.getFieldLength() << ":"; + bool first = true; + for (; posItr.valid(); posItr.next()) { + if (!first) ss << ","; + ss << posItr.getPosition(); + first = false; + if (hasElements) { + ss << "[e=" << posItr.getElementId(); + if (hasWeights) + ss << ",w=" << posItr.getElementWeight(); + ss << ",l=" << posItr.getElementLen() << "]"; + } + } + ss << "}"; + return ss.str(); +} + + +#if 0 +vespalib::string +toString(DocIdAndFeatures &features) +{ + vespalib::asciistream ss; + ss << "{"; + std::vector::const_iterator + element = features._elements.begin(); + std::vector:: + const_iterator position = features._wordPositions.begin(); + for (; field != fielde; ++field) { + ss << "f=" << field->getFieldId() << "{"; + uint32_t numElements = field->getNumElements(); + while (numElements--) { + ss << "e=" << element->getElementId() << "," + << "ew=" << element->getWeight() << "," + << "el=" << element->getElementLen() << "{"; + uint32_t numOccs = element->getNumOccs(); + while (numOccs--) { + ss << position->getWordPos(); + if (numOccs != 0) + ss << ","; + } + ss << "}"; + if (numElements != 0) + ss << ","; + } + ss << "}"; + } + ss << "}"; + return ss.str(); +} +#endif + + +void +validateDiskIndex(DiskIndex &dw, + bool f2HasElements, + bool f3HasWeights) +{ + typedef DiskIndex::LookupResult LR; + typedef index::PostingListHandle PH; + typedef search::queryeval::SearchIterator SB; + + const Schema &schema(dw.getSchema()); + + { + uint32_t id1(schema.getIndexFieldId("f0")); + LR::UP lr1(dw.lookup(id1, "c")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f0; + TermFieldMatchDataArray a; + a.add(&f0); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f0.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + EXPECT_EQUAL("{7:2}", toString(f0.getIterator())); + } + { + uint32_t id1(schema.getIndexFieldId("f2")); + LR::UP lr1(dw.lookup(id1, "ax")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f2; + TermFieldMatchDataArray a; + a.add(&f2); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f2.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + if (f2HasElements) { + EXPECT_EQUAL("{3:0[e=0,l=3],0[e=1,l=1]}", + toString(f2.getIterator(), true)); + } else { + EXPECT_EQUAL("{3:0[e=0,l=3]}", + toString(f2.getIterator(), true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "wx")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + if (f3HasWeights) { + EXPECT_EQUAL("{2:0[e=0,w=4,l=2]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{2:0[e=0,w=1,l=2]}", + toString(f3.getIterator(), true, true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "zz")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(11)); + sbap->unpack(11); + if (f3HasWeights) { + EXPECT_EQUAL("{1:0[e=0,w=-27,l=1]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}", + toString(f3.getIterator(), true, true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "zz0")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(12)); + sbap->unpack(12); + if (f3HasWeights) { + EXPECT_EQUAL("{1:0[e=0,w=0,l=1]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}", + toString(f3.getIterator(), true, true)); + } + } +} + + +void +Test::requireThatFusionIsWorking(const vespalib::string &prefix, + bool directio, + bool readmmap) +{ + Schema schema; + Schema schema2; + Schema schema3; + for (SchemaUtil::IndexIterator it(getSchema()); it.isValid(); ++it) { + const Schema::IndexField &iField = + _schema.getIndexField(it.getIndex()); + schema.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + iField.getCollectionType())); + if (iField.getCollectionType() == Schema::WEIGHTEDSET) + schema2.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + Schema::ARRAY)); + else + schema2.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + iField.getCollectionType())); + schema3.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + Schema::SINGLE)); + } + schema3.addIndexField(Schema::IndexField("f4", + Schema::STRING)); + schema.addFieldSet(Schema::FieldSet("nc0"). + addField("f0").addField("f1")); + schema2.addFieldSet(Schema::FieldSet("nc0"). + addField("f1").addField("f0")); + schema3.addFieldSet(Schema::FieldSet("nc2"). + addField("f0").addField("f1"). + addField("f2").addField("f3"). + addField("f4")); + Dictionary d(schema); + DocBuilder b(schema); + SequencedTaskExecutor invertThreads(2); + SequencedTaskExecutor pushThreads(2); + DocumentInverter inv(schema, invertThreads, pushThreads); + Document::UP doc; + + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + addStr("e").addStr("f").addStr("z"). + endField(); + b.startIndexField("f1"). + addStr("w").addStr("x"). + addStr("y").addStr("z"). + endField(); + b.startIndexField("f2"). + startElement(4).addStr("ax").addStr("ay").addStr("z").endElement(). + startElement(5).addStr("ax").endElement(). + endField(); + b.startIndexField("f3"). + startElement(4).addStr("wx").addStr("z").endElement(). + endField(); + + doc = b.endDocument(); + inv.invertDocument(10, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + b.startDocument("doc::11"). + startIndexField("f3"). + startElement(-27).addStr("zz").endElement(). + endField(); + doc = b.endDocument(); + inv.invertDocument(11, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + b.startDocument("doc::12"). + startIndexField("f3"). + startElement(0).addStr("zz0").endElement(). + endField(); + doc = b.endDocument(); + inv.invertDocument(12, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + IndexBuilder ib(schema); + vespalib::string dump2dir = prefix + "dump2"; + ib.setPrefix(dump2dir); + uint32_t numDocs = 12 + 1; + uint32_t numWords = d.getNumUniqueWords(); + bool dynamicKPosOcc = false; + TuneFileIndexing tuneFileIndexing; + TuneFileSearch tuneFileSearch; + DummyFileHeaderContext fileHeaderContext; + if (directio) { + tuneFileIndexing._read.setWantDirectIO(); + tuneFileIndexing._write.setWantDirectIO(); + tuneFileSearch._read.setWantDirectIO(); + } + if (readmmap) + tuneFileSearch._read.setWantMemoryMap(); + ib.open(numDocs, numWords, tuneFileIndexing, fileHeaderContext); + d.dump(ib); + ib.close(); + + vespalib::string tsName = dump2dir + "/.teststamp"; + typedef search::FileKit FileKit; + EXPECT_TRUE(FileKit::createStamp(tsName)); + EXPECT_TRUE(FileKit::hasStamp(tsName)); + EXPECT_TRUE(FileKit::removeStamp(tsName)); + EXPECT_FALSE(FileKit::hasStamp(tsName)); + + do { + DiskIndex dw2(prefix + "dump2"); + if (!EXPECT_TRUE(dw2.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw2, true, true)); + } while (0); + + do { + std::vector sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump2"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump3", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw3(prefix + "dump3"); + if (!EXPECT_TRUE(dw3.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw3, true, true)); + } while (0); + do { + std::vector sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema2, + prefix + "dump4", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw4(prefix + "dump4"); + if (!EXPECT_TRUE(dw4.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw4, true, false)); + } while (0); + do { + std::vector sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema3, + prefix + "dump5", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw5(prefix + "dump5"); + if (!EXPECT_TRUE(dw5.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw5, false, false)); + } while (0); + do { + std::vector sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump6", + sources, selector, + !dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw6(prefix + "dump6"); + if (!EXPECT_TRUE(dw6.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw6, true, true)); + } while (0); + do { + std::vector sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump2"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump3", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw3(prefix + "dump3"); + if (!EXPECT_TRUE(dw3.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw3, true, true)); + } while (0); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + _schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("fusion_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + + TEST_DO(requireThatFusionIsWorking("", false, false)); + TEST_DO(requireThatFusionIsWorking("d", true, false)); + TEST_DO(requireThatFusionIsWorking("m", false, true)); + TEST_DO(requireThatFusionIsWorking("dm", true, true)); + + TEST_DONE(); +} + +} + + +} + + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.sh b/searchlib/src/tests/diskindex/fusion/fusion_test.sh new file mode 100755 index 00000000000..127453fae07 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.sh @@ -0,0 +1,15 @@ +#!/bin/bash +IINSPECT=../../../apps/vespa-index-inspect/searchlib_vespa-index-inspect_app +ECHO_CMD=echo + +$VALGRIND ./searchlib_fusion_test_app +$ECHO_CMD showing usage +$IINSPECT --help > usage.out 2>&1 || true +$ECHO_CMD dumping dictionary words for field f0 +$IINSPECT dumpwords --indexdir dump3 --field f0 > dumpwords.out +$ECHO_CMD transposing index back for inspection +$IINSPECT showpostings --transpose --indexdir dump3 > transpose.out +$ECHO_CMD dumping posting list for word z in field f0 +$IINSPECT showpostings --indexdir dump3 --field f0 z > zwordf0field.out +$ECHO_CMD inspection done. + diff --git a/searchlib/src/tests/diskindex/pagedict4/.gitignore b/searchlib/src/tests/diskindex/pagedict4/.gitignore new file mode 100644 index 00000000000..2381ed57229 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +pagedict4_test +fakedict.* +searchlib_pagedict4_test_app diff --git a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt new file mode 100644 index 00000000000..f8aef573c9a --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_pagedict4_test_app + SOURCES + pagedict4test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_pagedict4_test_app COMMAND searchlib_pagedict4_test_app) diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp new file mode 100644 index 00000000000..03d73e84b42 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp @@ -0,0 +1,876 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("pagedict4test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::bitcompression::PostingListCountFileEncodeContext; +using search::bitcompression::PostingListCountFileDecodeContext; +using search::index::PostingListCounts; +using search::index::PostingListOffsetAndCounts; +using search::index::PostingListParams; +using search::bitcompression::PageDict4SSWriter; +using search::bitcompression::PageDict4SPWriter; +using search::bitcompression::PageDict4PWriter; +using search::bitcompression::PageDict4Reader; +using search::bitcompression::PageDict4SSReader; +using search::bitcompression::PageDict4SSLookupRes; +using search::bitcompression::PageDict4SPLookupRes; +using search::bitcompression::PageDict4PLookupRes; +using search::index::Schema; +using search::index::DictionaryFileSeqRead; +using search::index::DictionaryFileSeqWrite; +using search::index::DictionaryFileRandRead; +using search::diskindex::PageDict4FileSeqRead; +using search::diskindex::PageDict4FileSeqWrite; +using search::diskindex::PageDict4RandRead; +using search::index::DummyFileHeaderContext; + +typedef search::bitcompression::PageDict4StartOffset StartOffset; + +namespace +{ + + +class Writer : public search::diskindex::ThreeLevelCountWriteBuffers +{ +public: + PageDict4SSWriter *_ssw; + PageDict4SPWriter *_spw; + PageDict4PWriter *_pw; + + Writer(EC &sse, + EC &spe, + EC &pe) + : ThreeLevelCountWriteBuffers(sse, spe, pe), + _ssw(NULL), + _spw(NULL), + _pw(NULL) + { + } + + ~Writer(void) + { + delete _ssw; + delete _spw; + delete _pw; + } + + void + allocWriters() + { + _ssw = new PageDict4SSWriter(_sse); + _spw = new PageDict4SPWriter(*_ssw, _spe); + _pw = new PageDict4PWriter(*_spw, _pe); + _spw->setup(); + _pw->setup(); + } + + void + flush(void) + { + _pw->flush(); + ThreeLevelCountWriteBuffers::flush(); + } + + void + addCounts(const std::string &word, + const PostingListCounts &counts) + { + _pw->addCounts(word, counts); + } +}; + + +class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + PageDict4Reader _pr; + + SeqReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _pr(_ssr, spd, pd) + { + _ssr.setup(ssd); + _pr.setup(); + } + + void + readCounts(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) + { + _pr.readCounts(word, wordNum, counts); + } +}; + +class RandReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + const char *_spData; + const char *_pData; + size_t _pageSize; + + RandReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _spData(static_cast(_rcspd._comprBuf)), + _pData(static_cast(_rcpd._comprBuf)), + _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize()) + { + _ssr.setup(ssd); + } + + bool + lookup(const std::string &key, + uint64_t &wordNum, + PostingListCounts &counts, + StartOffset &offsets) + { + PageDict4SSLookupRes sslr; + + sslr = _ssr.lookup(key); + if (!sslr._res) { + counts.clear(); + offsets = sslr._l6StartOffset; + wordNum = sslr._l6WordNum; + return false; + } + + if (sslr._overflow) { + wordNum = sslr._l6WordNum; + counts = sslr._counts; + offsets = sslr._startOffset; + return true; + } + PageDict4SPLookupRes splr; + splr.lookup(_ssr, + _spData + + _pageSize * sslr._sparsePageNum, + key, + sslr._l6Word, + sslr._lastWord, + sslr._l6StartOffset, + sslr._l6WordNum, + sslr._pageNum); + + PageDict4PLookupRes plr; + plr.lookup(_ssr, + _pData + _pageSize * splr._pageNum, + key, + splr._l3Word, + splr._lastWord, + splr._l3StartOffset, + splr._l3WordNum); + wordNum = plr._wordNum; + offsets = plr._startOffset; + if (plr._res) { + counts = plr._counts; + return true; + } + counts.clear(); + return false; + } +}; + +} + +class PageDict4TestApp : public FastOS_Application +{ +public: + search::Rand48 _rnd; + bool _stress; + bool _emptyWord; + bool _firstWordForcedCommon; + bool _lastWordForcedCommon; + + void + usage(void); + + int + Main(void); + + void + testWords(void); + + PageDict4TestApp(void) + : _rnd(), + _stress(false), + _emptyWord(false), + _firstWordForcedCommon(false), + _lastWordForcedCommon(false) + { + } +}; + + +void +PageDict4TestApp::usage(void) +{ + printf("Usage: wordnumbers\n"); + fflush(stdout); +} + + +int +PageDict4TestApp::Main(void) +{ + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + _rnd.srand48(32); + for (int32_t i = 1; i < _argc; ++i) { + if (strcmp(_argv[i], "stress") == 0) + _stress = true; + if (strcmp(_argv[i], "emptyword") == 0) + _emptyWord = true; + if (strcmp(_argv[i], "firstwordforcedcommon") == 0) + _firstWordForcedCommon = true; + if (strcmp(_argv[i], "lastwordforcedcommon") == 0) + _lastWordForcedCommon = true; + } + testWords(); + + LOG(info, + "_stress is %s", + _stress ? "true" : "false"); + LOG(info, + "_emptyWord is %s", + _emptyWord ? "true" : "false"); + LOG(info, + "_firstWordForcedCommon is %s", + _firstWordForcedCommon ? "true" : "false"); + LOG(info, + "_lastWordForcedCommon is %s", + _lastWordForcedCommon ? "true" : "false"); + + LOG(info, "SUCCESS"); + return 0; +} + + +class WordIndexCounts +{ +public: + uint32_t _numDocs; + uint64_t _fileOffset; + uint64_t _bitLength; + uint64_t _accNumDocs; + + WordIndexCounts(uint64_t bitLength, + uint32_t numDocs) + : _numDocs(numDocs), + _fileOffset(0), + _bitLength(bitLength), + _accNumDocs(0) + { + } + + WordIndexCounts() + : _numDocs(0), + _fileOffset(0), + _bitLength(0), + _accNumDocs(0) + { + } +}; + +class WordCounts +{ +public: + std::string _word; + WordIndexCounts _counts; + + bool + operator!=(const WordCounts &rhs) const + { + return _word != rhs._word; + } + + WordCounts(const std::string &word) + : _word(word), + _counts() + { + } + + bool + operator<(const WordCounts &rhs) const + { + return _word < rhs._word; + } +}; + + +void +deDup(std::vector &v) +{ + std::vector v2; + std::sort(v.begin(), v.end()); + for (std::vector::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +void +deDup(std::vector &v) +{ + std::vector v2; + std::sort(v.begin(), v.end()); + for (std::vector::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +static WordIndexCounts +makeIndex(search::Rand48 &rnd, bool forceCommon) +{ + uint64_t bitLength = 10; + uint32_t numDocs = 1; + if ((rnd.lrand48() % 150) == 0 || forceCommon) { + bitLength = 1000000000; + numDocs = 500000; + } + return WordIndexCounts(bitLength, numDocs); +} + + +void +makeIndexes(search::Rand48 &rnd, + WordIndexCounts &counts, + bool forceCommon) +{ + counts = makeIndex(rnd, forceCommon); +} + + +static void +makeWords(std::vector &v, + search::Rand48 &rnd, + uint32_t numWordIds, + uint32_t tupleCount, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + v.clear(); + for (unsigned int i = 0; i < tupleCount; ++i) { + uint64_t word = rnd.lrand48() % numWordIds; + uint64_t wordCount = (rnd.lrand48() % 10) + 1; + for (unsigned int j = 0; j < wordCount; ++j) { + uint64_t nextWord = rnd.lrand48() % numWordIds; + uint64_t nextWordCount = 0; + bool incomplete = true; + nextWordCount = rnd.lrand48() % 10; + incomplete = (rnd.lrand48() % 3) == 0 || nextWordCount == 0; + for (unsigned int k = 0; k < nextWordCount; ++k) { + uint64_t nextNextWord = rnd.lrand48() % numWordIds; + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << nextNextWord; + v.push_back(WordCounts(w.str())); + } + if (incomplete) { + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << "9999999999999999"; + v.push_back(WordCounts(w.str())); + } + } + } + deDup(v); + if (!v.empty() && emptyWord) + v.front()._word = ""; + for (std::vector::iterator + i = v.begin(), ib = v.begin(), ie = v.end(); + i != ie; ++i) { + std::vector indexes; + makeIndexes(rnd, i->_counts, + (i == ib && firstWordForcedCommon) || + (i + 1 == ie && lastWordForcedCommon)); + } + uint64_t fileOffset = 0; + uint64_t accNumDocs = 0; + for (std::vector::iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + WordIndexCounts *f = &i->_counts; + assert(f->_numDocs > 0); + assert(f->_bitLength > 0); + f->_fileOffset = fileOffset; + f->_accNumDocs = accNumDocs; + fileOffset += f->_bitLength; + accNumDocs += f->_numDocs; + } +} + + +void +makeCounts(PostingListCounts &counts, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts c; + const WordIndexCounts *j = &i._counts; + c._bitLength = j->_bitLength; + c._numDocs = j->_numDocs; + c._segments.clear(); + assert(j->_numDocs > 0); + uint32_t numChunks = (j->_numDocs + chunkSize - 1) / chunkSize; + if (numChunks > 1) { + uint32_t chunkBits = j->_bitLength / numChunks; + for (uint32_t chunkNo = 0; chunkNo < numChunks; ++chunkNo) { + PostingListCounts::Segment seg; + seg._bitLength = chunkBits; + seg._numDocs = chunkSize; + seg._lastDoc = (chunkNo + 1) * chunkSize - 1; + if (chunkNo + 1 == numChunks) { + seg._bitLength = c._bitLength - + (numChunks - 1) * chunkBits; + seg._lastDoc = c._numDocs - 1; + seg._numDocs = c._numDocs - (numChunks - 1) * chunkSize; + } + c._segments.push_back(seg); + } + } + counts = c; +} + + +void +checkCounts(const std::string &word, + const PostingListCounts &counts, + const StartOffset &fileOffset, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts answer; + + makeCounts(answer, i, chunkSize); + assert(word == i._word); + (void) word; + (void) fileOffset; + const WordIndexCounts *j = &i._counts; + assert(counts._bitLength == j->_bitLength); + assert(counts._numDocs == j->_numDocs); + assert(fileOffset._fileOffset == j->_fileOffset); + assert(fileOffset._accNumDocs == j->_accNumDocs); + assert(counts._segments == answer._segments); + assert(counts == answer); + (void) counts; +} + + +void +testWords(const std::string &logname, + search::Rand48 &rnd, + uint64_t numWordIds, + uint32_t tupleCount, + uint32_t chunkSize, + uint32_t ssPad, + uint32_t spPad, + uint32_t pPad, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + typedef search::bitcompression::PostingListCountFileEncodeContext EC; + typedef search::bitcompression::PostingListCountFileDecodeContext DC; + + LOG(info, "%s: word test start", logname.c_str()); + std::vector myrand; + makeWords(myrand, rnd, numWordIds, tupleCount, + emptyWord, firstWordForcedCommon, lastWordForcedCommon); + + PostingListCounts xcounts; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(xcounts, *i, chunkSize); + } + LOG(info, "%s: word counts generated", logname.c_str()); + + EC pe; + EC spe; + EC sse; + + sse._minChunkDocs = chunkSize; + sse._numWordIds = numWordIds; + spe.copyParams(sse); + pe.copyParams(sse); + Writer w(sse, spe, pe); + w.startPad(ssPad, spPad, pPad); + w.allocWriters(); + + PostingListCounts counts; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + w.addCounts(i->_word, counts); + } + w.flush(); + + LOG(info, + "%s: Used %" PRIu64 "+%" PRIu64 "+%" PRIu64 + " bits for %d words", + logname.c_str(), + w._pFileBitSize, + w._spFileBitSize, + w._ssFileBitSize, + (int) myrand.size()); + + StartOffset checkOffset; + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + SeqReader r(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + vespalib::string word; + counts.clear(); + r.readCounts(word, checkWordNum, counts); + checkCounts(word, counts, checkOffset, *i, chunkSize); + assert(checkWordNum == wordNum); + checkOffset._fileOffset += counts._bitLength; + checkOffset._accNumDocs += counts._numDocs; + } + assert(pd.getReadOffset() == w._pFileBitSize); + LOG(info, "%s: words seqRead test OK", logname.c_str()); + } + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + RandReader rr(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + checkWordNum = 0; + bool res = rr.lookup(i->_word, + checkWordNum, + counts, + checkOffset); + assert(res); + (void) res; + checkCounts(i->_word, counts, checkOffset, + *i, chunkSize); + assert(checkWordNum == wordNum); + } + LOG(info, "%s: word randRead test OK", logname.c_str()); + } + + Schema schema; + std::vector indexes; + { + std::ostringstream fn; + fn << "f0"; + schema.addIndexField(Schema:: + IndexField(fn.str(), + Schema::STRING, + Schema::SINGLE)); + indexes.push_back(0); + } + { + std::unique_ptr + dw(new PageDict4FileSeqWrite); + std::vector wIndexes; + std::vector wCounts; + search::TuneFileSeqWrite tuneFileWrite; + DummyFileHeaderContext fileHeaderContext; + PostingListParams params; + params.set("numWordIds", numWordIds); + params.set("minChunkDocs", chunkSize); + dw->setParams(params); + bool openres = dw->open("fakedict", + tuneFileWrite, + fileHeaderContext); + assert(openres); + + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + dw->writeWord(i->_word, counts); + } + bool closeres = dw->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 written", logname.c_str()); + } + { + std::unique_ptr dr(new PageDict4FileSeqRead); + search::TuneFileSeqRead tuneFileRead; + + bool openres = dr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + checkWord.clear(); + checkWordNum = 0; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(checkWord == i->_word); + } + + checkWord = "bad"; + checkWordNum = 5; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(checkWord.empty()); + assert(checkWordNum == DictionaryFileSeqRead::noWordNumHigh()); + bool closeres = dr->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 seqverify OK", logname.c_str()); + } + { + std::unique_ptr drr(new PageDict4RandRead); + search::TuneFileRandRead tuneFileRead; + bool openres = drr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wOffset; + uint64_t rOffset; + PostingListOffsetAndCounts rOffsetAndCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + std::string missWord; + wOffset = 0; + for (std::vector::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + + checkWordNum = 0; + rCounts.clear(); + rOffset = 0; + bool lres = drr->lookup(i->_word, checkWordNum, + rOffsetAndCounts); + assert(lres); + (void) lres; + assert((rOffsetAndCounts._counts._bitLength == 0) == + (rOffsetAndCounts._counts._numDocs == 0)); + rOffset = rOffsetAndCounts._offset; + rCounts = rOffsetAndCounts._counts; + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(rOffset == wOffset); + + wOffset += wCounts._bitLength; + lastWord = i->_word; + + missWord = i->_word; + missWord.append(1, '\1'); + checkWordNum = 0; + lres = drr->lookup(missWord, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == wordNum + 1); + } + + checkWordNum = 0; + std::string notfoundword = "Thiswordhasbetternotbeindictionary"; + bool lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + checkWordNum = 0; + notfoundword = lastWord + "somethingmore"; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + (void) lres; + LOG(info, "Lookup beyond dict EOF gave wordnum %d", (int) checkWordNum); + + if (firstWordForcedCommon) { + if (!emptyWord) { + checkWordNum = 0; + notfoundword = ""; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 1); + } + if (!myrand.empty()) { + checkWordNum = 0; + notfoundword = myrand.front()._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 2); + } + } + if (lastWordForcedCommon && !myrand.empty()) { + if (myrand.size() > 1) { + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 2]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size()); + } + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 1]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size() + 1); + } + bool closeres = drr->close(); + assert(closeres); + (void) closeres; + LOG(info, "%s: pagedict4 randverify OK", logname.c_str()); + } +} + + +void +PageDict4TestApp::testWords(void) +{ + ::testWords("smallchunkwordsempty", _rnd, + 1000000, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordsempty2", _rnd, + 0, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwords", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordswithemptyword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, false, false); + ::testWords("smallchunkwordswithcommonfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, true, false); + ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, true, false); + ::testWords("smallchunkwordswithcommonlastword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, true); +#if 1 + ::testWords("smallchunkwords2", _rnd, + 1000000, _stress ? 10000 : 100, + 64, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +#if 1 + ::testWords("stdwords", _rnd, + 1000000, _stress ? 10000 : 100, + 262144, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +} + +FASTOS_MAIN(PageDict4TestApp); diff --git a/searchlib/src/tests/document_store/.gitignore b/searchlib/src/tests/document_store/.gitignore new file mode 100644 index 00000000000..bc9b97decab --- /dev/null +++ b/searchlib/src/tests/document_store/.gitignore @@ -0,0 +1 @@ +searchlib_document_store_test_app diff --git a/searchlib/src/tests/document_store/CMakeLists.txt b/searchlib/src/tests/document_store/CMakeLists.txt new file mode 100644 index 00000000000..18b9e408fae --- /dev/null +++ b/searchlib/src/tests/document_store/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_store_test_app + SOURCES + document_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_store_test_app COMMAND searchlib_document_store_test_app) diff --git a/searchlib/src/tests/document_store/FILES b/searchlib/src/tests/document_store/FILES new file mode 100644 index 00000000000..b1dd2b610d0 --- /dev/null +++ b/searchlib/src/tests/document_store/FILES @@ -0,0 +1 @@ +document_store_test.cpp diff --git a/searchlib/src/tests/document_store/document_store_test.cpp b/searchlib/src/tests/document_store/document_store_test.cpp new file mode 100644 index 00000000000..e6a3d9b5c3d --- /dev/null +++ b/searchlib/src/tests/document_store/document_store_test.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +using namespace search; + +document::DocumentTypeRepo repo; + +struct NullDataStore : IDataStore { + NullDataStore() : IDataStore("") {} + ssize_t read(uint32_t, vespalib::DataBuffer &) const override { return 0; } + void read(const LidVector &, IBufferVisitor &) const override { } + void write(uint64_t, uint32_t, const void *, size_t) override {} + void remove(uint64_t, uint32_t) override {} + void flush(uint64_t) override {} + + uint64_t initFlush(uint64_t syncToken) override { return syncToken; } + + size_t memoryUsed() const override { return 0; } + size_t memoryMeta() const override { return 0; } + size_t getDiskFootprint() const override { return 0; } + size_t getDiskBloat() const override { return 0; } + uint64_t lastSyncToken() const override { return 0; } + uint64_t tentativeLastSyncToken() const override { return 0; } + fastos::TimeStamp getLastFlushTime() const override { return fastos::TimeStamp(); } + void accept(IDataStoreVisitor &, IDataStoreVisitorProgress &, bool) override { } + double getVisitCost() const override { return 1.0; } + virtual DataStoreStorageStats getStorageStats() const override { + return DataStoreStorageStats(0, 0, 0.0, 0, 0); + } + virtual std::vector + getFileChunkStats() const override { + std::vector result; + return result; + } +}; + +TEST_FFF("require that uncache docstore lookups are counted", + DocumentStore::Config(document::CompressionConfig::NONE, 0, 0), + NullDataStore(), DocumentStore(f1, f2)) +{ + EXPECT_EQUAL(0u, f3.getCacheStats().misses); + f3.read(1, repo); + EXPECT_EQUAL(1u, f3.getCacheStats().misses); +} + +TEST_FFF("require that cached docstore lookups are counted", + DocumentStore::Config(document::CompressionConfig::NONE, 100000, 100), + NullDataStore(), DocumentStore(f1, f2)) +{ + EXPECT_EQUAL(0u, f3.getCacheStats().misses); + f3.read(1, repo); + EXPECT_EQUAL(1u, f3.getCacheStats().misses); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/document_store/visitor/.gitignore b/searchlib/src/tests/document_store/visitor/.gitignore new file mode 100644 index 00000000000..c97186f86d7 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/.gitignore @@ -0,0 +1 @@ +searchlib_document_store_visitor_test_app diff --git a/searchlib/src/tests/document_store/visitor/CMakeLists.txt b/searchlib/src/tests/document_store/visitor/CMakeLists.txt new file mode 100644 index 00000000000..976463bdfe8 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_store_visitor_test_app + SOURCES + document_store_visitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_store_visitor_test_app COMMAND searchlib_document_store_visitor_test_app) diff --git a/searchlib/src/tests/document_store/visitor/DESC b/searchlib/src/tests/document_store/visitor/DESC new file mode 100644 index 00000000000..03e9c6681ad --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/DESC @@ -0,0 +1 @@ +Document store visiting test. diff --git a/searchlib/src/tests/document_store/visitor/FILES b/searchlib/src/tests/document_store/visitor/FILES new file mode 100644 index 00000000000..412f9879bb5 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/FILES @@ -0,0 +1 @@ +document_store_visitor_test.cpp diff --git a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp new file mode 100644 index 00000000000..1898fa35a29 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp @@ -0,0 +1,466 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("document_store_visitor_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; + +using vespalib::string; +using document::DataType; +using document::Document; +using document::DocumentId; +using document::DocumentType; +using document::DocumentTypeRepo; +using vespalib::asciistream; +using index::DummyFileHeaderContext; + +namespace +{ + +const string doc_type_name = "test"; +const string header_name = doc_type_name + ".header"; +const string body_name = doc_type_name + ".body"; + +document::DocumenttypesConfig +makeDocTypeRepoConfig(void) +{ + const int32_t doc_type_id = 787121340; + document::config_builder::DocumenttypesConfigBuilderHelper builder; + builder.document(doc_type_id, + doc_type_name, + document::config_builder::Struct(header_name), + document::config_builder::Struct(body_name). + addField("main", DataType::T_STRING). + addField("extra", DataType::T_STRING)); + return builder.config(); +} + + +Document::UP +makeDoc(const DocumentTypeRepo &repo, uint32_t i, bool before) +{ + asciistream idstr; + idstr << "id:test:test:: " << i; + DocumentId id(idstr.str()); + const DocumentType *docType = repo.getDocumentType(doc_type_name); + Document::UP doc(new Document(*docType, id)); + ASSERT_TRUE(doc.get()); + asciistream mainstr; + mainstr << "static text" << i << " body something"; + for (uint32_t j = 0; j < 10; ++j) { + mainstr << (j + i * 1000) << " "; + } + mainstr << " and end field"; + doc->set("main", mainstr.c_str()); + if (!before) { + doc->set("extra", "foo"); + } + + return doc; +} + +} + +class MyTlSyncer : public transactionlog::SyncProxy +{ + SerialNum _syncedTo; + +public: + MyTlSyncer(void) + : _syncedTo(0) + { + } + + void + sync(SerialNum syncTo) + { + _syncedTo = syncTo; + } +}; + + +class MyVisitorBase +{ +public: + DocumentTypeRepo &_repo; + uint32_t _visitCount; + uint32_t _visitRmCount; + uint32_t _docIdLimit; + BitVector::UP _valid; + bool _before; + + MyVisitorBase(DocumentTypeRepo &repo, uint32_t docIdLimit, bool before); +}; + +MyVisitorBase::MyVisitorBase(DocumentTypeRepo &repo, + uint32_t docIdLimit, + bool before) + : _repo(repo), + _visitCount(0u), + _visitRmCount(0u), + _docIdLimit(docIdLimit), + _valid(BitVector::create(docIdLimit)), + _before(before) +{ +} + + +class MyVisitor : public MyVisitorBase, + public IDocumentStoreReadVisitor +{ +public: + using MyVisitorBase::MyVisitorBase; + + virtual void + visit(uint32_t lid, const Document &doc); + + virtual void + visit(uint32_t lid); +}; + + +void +MyVisitor::visit(uint32_t lid, const Document &doc) +{ + ++_visitCount; + assert(lid < _docIdLimit); + Document::UP expDoc(makeDoc(_repo, lid, _before)); + EXPECT_TRUE(*expDoc == doc); + _valid->slowSetBit(lid); +} + + +void +MyVisitor::visit(uint32_t lid) +{ + ++_visitRmCount; + assert(lid < _docIdLimit); + _valid->slowClearBit(lid); +} + + +class MyRewriteVisitor : public MyVisitorBase, + public IDocumentStoreRewriteVisitor +{ +public: + using MyVisitorBase::MyVisitorBase; + + virtual void + visit(uint32_t lid, Document &doc); +}; + + +void +MyRewriteVisitor::visit(uint32_t lid, Document &doc) +{ + ++_visitCount; + assert(lid < _docIdLimit); + Document::UP expDoc(makeDoc(_repo, lid, _before)); + EXPECT_TRUE(*expDoc == doc); + _valid->slowSetBit(lid); + doc.set("extra", "foo"); +} + + +class MyVisitorProgress : public IDocumentStoreVisitorProgress +{ +public: + double _progress; + uint32_t _updates; + + MyVisitorProgress(); + + virtual void + updateProgress(double progress); + + virtual double + getProgress() const; +}; + + +MyVisitorProgress::MyVisitorProgress() + : _progress(0.0), + _updates(0) +{ +} + + +void +MyVisitorProgress::updateProgress(double progress) +{ + EXPECT_TRUE(progress >= _progress); + _progress = progress; + ++_updates; + LOG(info, + "updateProgress(%6.2f), %u updates", + progress, _updates); +} + + +double +MyVisitorProgress::getProgress() const +{ + return _progress; +} + + +struct Fixture +{ + string _baseDir; + DocumentTypeRepo _repo; + LogDocumentStore::Config _storeConfig; + vespalib::ThreadStackExecutor _executor; + DummyFileHeaderContext _fileHeaderContext; + MyTlSyncer _tlSyncer; + std::unique_ptr _store; + uint64_t _syncToken; + uint32_t _docIdLimit; + BitVector::UP _valid; + + Fixture(); + + ~Fixture(); + + Document::UP + makeDoc(uint32_t i); + + void + resetDocStore(); + + void + mkdir(); + + void + rmdir(); + + void + setDocIdLimit(uint32_t docIdLimit); + + void + put(const Document &doc, uint32_t lid); + + void + remove(uint32_t lid); + + void + flush(); + + void + populate(uint32_t low, uint32_t high, uint32_t docIdLimit); + + void + applyRemoves(uint32_t rmDocs); + + void + checkRemovePostCond(uint32_t numDocs, + uint32_t docIdLimit, + uint32_t rmDocs, + bool before); +}; + +Fixture::Fixture() + : _baseDir("visitor"), + _repo(makeDocTypeRepoConfig()), + _storeConfig(DocumentStore:: + Config(document::CompressionConfig::NONE, 0, 0), + LogDataStore:: + Config(50000, 0.2, 3.0, 0.2, 1, true, + WriteableFileChunk::Config( + document::CompressionConfig(), + 16384, + 64))), + _executor(_storeConfig.getLogConfig().getNumThreads(), 128 * 1024), + _fileHeaderContext(), + _tlSyncer(), + _store(), + _syncToken(0u), + _docIdLimit(0u), + _valid(BitVector::create(0u)) +{ + rmdir(); + mkdir(); + resetDocStore(); +} + + +Fixture::~Fixture() +{ + _store.reset(); + rmdir(); +} + +Document::UP +Fixture::makeDoc(uint32_t i) +{ + return ::makeDoc(_repo, i, true); +} + +void +Fixture::resetDocStore() +{ + _store.reset(new LogDocumentStore(_executor, + _baseDir, + _storeConfig, + GrowStrategy(), + TuneFileSummary(), + _fileHeaderContext, + _tlSyncer, + NULL)); +} + + +void +Fixture::rmdir() +{ + vespalib::rmdir(_baseDir, true); +} + +void +Fixture::mkdir() +{ + vespalib::mkdir(_baseDir, false); +} + + +void +Fixture::setDocIdLimit(uint32_t docIdLimit) +{ + _docIdLimit = docIdLimit; + _valid->resize(_docIdLimit); +} + +void +Fixture::put(const Document &doc, uint32_t lid) +{ + ++_syncToken; + assert(lid < _docIdLimit); + _store->write(_syncToken, doc, lid); + _valid->slowSetBit(lid); +} + + +void +Fixture::remove(uint32_t lid) +{ + ++_syncToken; + assert(lid < _docIdLimit); + _store->remove(_syncToken, lid); + _valid->slowClearBit(lid); +} + + +void +Fixture::flush() +{ + _store->initFlush(_syncToken); + _store->flush(_syncToken); +} + + +void +Fixture::populate(uint32_t low, uint32_t high, uint32_t docIdLimit) +{ + setDocIdLimit(docIdLimit); + for (uint32_t lid = low; lid < high; ++lid) { + Document::UP doc = makeDoc(lid); + put(*doc, lid); + } +} + + +void +Fixture::applyRemoves(uint32_t rmDocs) +{ + for (uint32_t lid = 20; lid < 20 + rmDocs; ++lid) { + remove(lid); + } + put(*makeDoc(25), 25); + remove(25); + put(*makeDoc(25), 25); +} + + +void +Fixture::checkRemovePostCond(uint32_t numDocs, + uint32_t docIdLimit, + uint32_t rmDocs, + bool before) +{ + MyVisitor visitor(_repo, docIdLimit, before); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + _store->accept(visitor, visitorProgress, _repo); + EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount); + EXPECT_EQUAL(rmDocs - 1, visitor._visitRmCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*_valid == *visitor._valid); +} + + +TEST_F("require that basic visit works", Fixture()) +{ + uint32_t numDocs = 3000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + f.flush(); + MyVisitor visitor(f._repo, docIdLimit, true); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + f._store->accept(visitor, visitorProgress, f._repo); + EXPECT_EQUAL(numDocs, visitor._visitCount); + EXPECT_EQUAL(0u, visitor._visitRmCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*f._valid == *visitor._valid); +} + + +TEST_F("require that visit with remove works", Fixture()) +{ + uint32_t numDocs = 1000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + uint32_t rmDocs = 20; + f.applyRemoves(rmDocs); + f.flush(); + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true); +} + +TEST_F("require that visit with rewrite and remove works", Fixture()) +{ + uint32_t numDocs = 1000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + uint32_t rmDocs = 20; + f.applyRemoves(rmDocs); + f.flush(); + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true); + { + MyRewriteVisitor visitor(f._repo, docIdLimit, true); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + f._store->accept(visitor, visitorProgress, f._repo); + EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*f._valid == *visitor._valid); + f.flush(); + } + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, false); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/engine/docsumapi/.gitignore b/searchlib/src/tests/engine/docsumapi/.gitignore new file mode 100644 index 00000000000..1b38a4ff745 --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +docsumapi_test +searchlib_docsumapi_test_app diff --git a/searchlib/src/tests/engine/docsumapi/CMakeLists.txt b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt new file mode 100644 index 00000000000..a8fbe70de4b --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_docsumapi_test_app + SOURCES + docsumapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_docsumapi_test_app COMMAND searchlib_docsumapi_test_app) diff --git a/searchlib/src/tests/engine/docsumapi/DESC b/searchlib/src/tests/engine/docsumapi/DESC new file mode 100644 index 00000000000..fa9d72e98be --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/DESC @@ -0,0 +1 @@ +docsumapi test. Take a look at docsumapi.cpp for details. diff --git a/searchlib/src/tests/engine/docsumapi/FILES b/searchlib/src/tests/engine/docsumapi/FILES new file mode 100644 index 00000000000..3e2e2e636be --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/FILES @@ -0,0 +1 @@ +docsumapi.cpp diff --git a/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp new file mode 100644 index 00000000000..d96295bb7ad --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("docsumapi_test"); +#include +#include +#include +#include + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +// light-weight network hop simulation +template void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace + +class Test : public vespalib::TestApp +{ +public: + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +document::GlobalId gid0("aaaaaaaaaaaa"); +document::GlobalId gid1("bbbbbbbbbbbb"); + +void +Test::convertToRequest() +{ + const string sessionId("qrserver.0.XXXXXXXXXXXXX.0"); + + FS4Packet_GETDOCSUMSX src; + src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + src._features |= GDF_RANKP_QFLAGS; + src.setRanking("seven"); + src._qflags = 5u; + src._features |= GDF_RESCLASSNAME; + src.setResultClassName("resclass"); + src._features |= GDF_PROPERTIES; + src._propsVector.resize(3); + src._propsVector[0].allocEntries(2); + src._propsVector[0].setName("feature", strlen("feature")); + src._propsVector[0].setKey(0, "p1k1", strlen("p1k1")); + src._propsVector[0].setValue(0, "p1v1", strlen("p1v1")); + src._propsVector[0].setKey(1, "p1k2", strlen("p1k2")); + src._propsVector[0].setValue(1, "p1v2", strlen("p1v2")); + src._propsVector[1].allocEntries(2); + src._propsVector[1].setName("caches", strlen("caches")); + src._propsVector[1].setKey(0, "p2k1", strlen("p2k1")); + src._propsVector[1].setValue(0, "p2v1", strlen("p2v1")); + src._propsVector[1].setKey(1, "p2k2", strlen("p2k2")); + src._propsVector[1].setValue(1, "p2v2", strlen("p2v2")); + src._propsVector[2].allocEntries(1); + src._propsVector[2].setName("rank", strlen("rank")); + src._propsVector[2].setKey(0, "sessionId", strlen("sessionId")); + src._propsVector[2].setValue(0, sessionId.c_str(), sessionId.size()); + src._features |= GDF_QUERYSTACK; + src._stackItems = 14u; + src.setStackDump("stackdump"); + src._features |= GDF_LOCATION; + src.setLocation("location"); + src._features |= GDF_MLD; + src.AllocateDocIDs(2); + src._docid[0]._gid = gid0; + src._docid[0]._partid = 5; + src._docid[1]._gid = gid1; + src._docid[1]._partid = 6; + + { // full copy + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u); + EXPECT_EQUAL(dst.ranking, "seven"); + EXPECT_EQUAL(dst.queryFlags, 5u); + EXPECT_EQUAL(dst.resultClassName, "resclass"); + EXPECT_EQUAL(dst.propertiesMap.size(), 3u); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1")); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2")); + EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string("")); + EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump"); + EXPECT_EQUAL(dst.location, "location"); + EXPECT_EQUAL(dst._flags, 0u); + EXPECT_EQUAL(dst.hits.size(), 2u); + EXPECT_EQUAL(dst.hits[0].docid, 0u); + EXPECT_TRUE(dst.hits[0].gid == gid0); + EXPECT_EQUAL(dst.hits[0].path, 5u); + EXPECT_EQUAL(dst.hits[1].docid, 0u); + EXPECT_TRUE(dst.hits[1].gid == gid1); + EXPECT_EQUAL(dst.hits[1].path, 6u); + EXPECT_EQUAL(sessionId, + string(&dst.sessionId[0], dst.sessionId.size())); + } + { // without datetime + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + } + { // without mld + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + cpy._features &= ~GDF_MLD; + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL(dst.useWideHits, false); + EXPECT_EQUAL(dst.hits.size(), 2u); + EXPECT_EQUAL(dst.hits[0].docid, 0u); + EXPECT_TRUE(dst.hits[0].gid == gid0); + EXPECT_EQUAL(dst.hits[1].docid, 0u); + EXPECT_TRUE(dst.hits[1].gid == gid1); + } + { // with ignore row flag + FS4Packet_GETDOCSUMSX tcpy; + copyPacket(src, tcpy); + tcpy._features |= GDF_FLAGS; + tcpy._flags = GDFLAG_IGNORE_ROW; + FS4Packet_GETDOCSUMSX cpy; + copyPacket(tcpy, cpy); + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL(dst._flags, static_cast(GDFLAG_IGNORE_ROW)); + } +} + +void +Test::convertFromReply() +{ + DocsumReply src; + src.docsums.resize(2); + src.docsums[0].docid = 1; + src.docsums[0].gid = gid0; + src.docsums[0].data.resize(2); + src.docsums[0].data.str()[0] = 5; + src.docsums[0].data.str()[1] = 6; + src.docsums[1].docid = 2; + src.docsums[1].gid = gid1; + src.docsums[1].data.resize(3); + src.docsums[1].data.str()[0] = 7; + src.docsums[1].data.str()[1] = 8; + src.docsums[1].data.str()[2] = 9; + + { // test first + FS4Packet_DOCSUM dst; + PacketConverter::fromDocsumReplyElement(src.docsums[0], dst); + EXPECT_EQUAL(dst.getGid(), gid0); + EXPECT_EQUAL(dst.getBuf().size(), 2u); + EXPECT_EQUAL(dst.getBuf().c_str()[0], 5); + EXPECT_EQUAL(dst.getBuf().c_str()[1], 6); + } + { // test second + FS4Packet_DOCSUM dst; + PacketConverter::fromDocsumReplyElement(src.docsums[1], dst); + EXPECT_EQUAL(dst.getGid(), gid1); + EXPECT_EQUAL(dst.getBuf().size(), 3u); + EXPECT_EQUAL(dst.getBuf().c_str()[0], 7); + EXPECT_EQUAL(dst.getBuf().c_str()[1], 8); + EXPECT_EQUAL(dst.getBuf().c_str()[2], 9); + } +} + +int +Test::Main() +{ + TEST_INIT("docsumapi_test"); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/monitorapi/.gitignore b/searchlib/src/tests/engine/monitorapi/.gitignore new file mode 100644 index 00000000000..66fc005087f --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +monitorapi_test +searchlib_monitorapi_test_app diff --git a/searchlib/src/tests/engine/monitorapi/CMakeLists.txt b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt new file mode 100644 index 00000000000..f78a8e04fd1 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_monitorapi_test_app + SOURCES + monitorapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_monitorapi_test_app COMMAND searchlib_monitorapi_test_app) diff --git a/searchlib/src/tests/engine/monitorapi/DESC b/searchlib/src/tests/engine/monitorapi/DESC new file mode 100644 index 00000000000..882636f1952 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/DESC @@ -0,0 +1 @@ +monitorapi test. Take a look at monitorapi.cpp for details. diff --git a/searchlib/src/tests/engine/monitorapi/FILES b/searchlib/src/tests/engine/monitorapi/FILES new file mode 100644 index 00000000000..16ad6789632 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/FILES @@ -0,0 +1 @@ +monitorapi.cpp diff --git a/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp new file mode 100644 index 00000000000..0df52cbe0d8 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("monitorapi_test"); +#include +#include +#include +#include + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +bool checkFeature(uint32_t features, uint32_t mask) { + return ((features & mask) != 0); +} + +bool checkNotFeature(uint32_t features, uint32_t mask) { + return !checkFeature(features, mask); +} + +// light-weight network hop simulation +template void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace + +class Test : public vespalib::TestApp +{ +public: + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +void +Test::convertToRequest() +{ + FS4Packet_MONITORQUERYX src; + src._features |= MQF_QFLAGS; + src._qflags = 1u; + + { // copy all + FS4Packet_MONITORQUERYX cpy; + copyPacket(src, cpy); + + MonitorRequest dst; + PacketConverter::toMonitorRequest(cpy, dst); + EXPECT_EQUAL(dst.flags, 1u); + } +} + +void +Test::convertFromReply() +{ + MonitorReply src; + src.mld = true; + src.partid = 1u; + src.timestamp = 2u; + src.totalNodes = 3u; + src.activeNodes = 4u; + src.totalParts = 5u; + src.activeParts = 6u; + src.flags = 7u; + src.activeDocs = 8u; + src.activeDocsRequested = true; + + { // full copy + MonitorReply cpy = src; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_EQUAL(dst._partid, 1u); + EXPECT_EQUAL(dst._timestamp, 2u); + EXPECT_TRUE(checkFeature(dst._features, MRF_MLD)); + EXPECT_EQUAL(dst._totalNodes, 3u); + EXPECT_EQUAL(dst._activeNodes, 4u); + EXPECT_EQUAL(dst._totalParts, 5u); + EXPECT_EQUAL(dst._activeParts, 6u); + EXPECT_TRUE(checkFeature(dst._features, MRF_RFLAGS)); + EXPECT_EQUAL(dst._rflags, 7u); + EXPECT_EQUAL(dst._activeDocs, 8u); + EXPECT_TRUE(checkFeature(dst._features, MRF_ACTIVEDOCS)); + } + { // non-mld + MonitorReply cpy = src; + cpy.mld = false; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_MLD)); + } + { // without flags + MonitorReply cpy = src; + cpy.flags = 0; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_RFLAGS)); + EXPECT_EQUAL(dst._rflags, 0u); + } + { // without activedocs + MonitorReply cpy = src; + cpy.activeDocsRequested = false; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_ACTIVEDOCS)); + EXPECT_EQUAL(dst._activeDocs, 0u); + } +} + +int +Test::Main() +{ + TEST_INIT("monitorapi_test"); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/searchapi/.gitignore b/searchlib/src/tests/engine/searchapi/.gitignore new file mode 100644 index 00000000000..92089e63cdd --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searchapi_test +searchlib_searchapi_test_app diff --git a/searchlib/src/tests/engine/searchapi/CMakeLists.txt b/searchlib/src/tests/engine/searchapi/CMakeLists.txt new file mode 100644 index 00000000000..89d1b8197a5 --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchapi_test_app + SOURCES + searchapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_searchapi_test_app COMMAND searchlib_searchapi_test_app) diff --git a/searchlib/src/tests/engine/searchapi/DESC b/searchlib/src/tests/engine/searchapi/DESC new file mode 100644 index 00000000000..b006841d75d --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/DESC @@ -0,0 +1 @@ +searchapi test. Take a look at searchapi.cpp for details. diff --git a/searchlib/src/tests/engine/searchapi/FILES b/searchlib/src/tests/engine/searchapi/FILES new file mode 100644 index 00000000000..806f04bbe4c --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/FILES @@ -0,0 +1 @@ +searchapi.cpp diff --git a/searchlib/src/tests/engine/searchapi/searchapi_test.cpp b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp new file mode 100644 index 00000000000..cd040bfaeac --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp @@ -0,0 +1,267 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("searchapi_test"); +#include +#include +#include +#include + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +bool checkFeature(uint32_t features, uint32_t mask) { + return ((features & mask) != 0); +} + +bool checkNotFeature(uint32_t features, uint32_t mask) { + return !checkFeature(features, mask); +} + +// light-weight network hop simulation +template void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace + +class Test : public vespalib::TestApp +{ +public: + void propertyNames(); + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +void +Test::propertyNames() +{ + EXPECT_EQUAL(search::MapNames::RANK, "rank"); + EXPECT_EQUAL(search::MapNames::FEATURE, "feature"); + EXPECT_EQUAL(search::MapNames::HIGHLIGHTTERMS, "highlightterms"); + EXPECT_EQUAL(search::MapNames::MATCH, "match"); + EXPECT_EQUAL(search::MapNames::CACHES, "caches"); +} + +void +Test::convertToRequest() +{ + FS4Packet_QUERYX src; + src._offset = 2u; + src._maxhits = 3u; + src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + src._qflags = 5u; + src._features |= QF_RANKP; + src.setRanking("seven"); + src._features |= QF_PROPERTIES; + src._propsVector.resize(2); + src._propsVector[0].allocEntries(2); + src._propsVector[0].setName("feature", strlen("feature")); + src._propsVector[0].setKey(0, "p1k1", strlen("p1k1")); + src._propsVector[0].setValue(0, "p1v1", strlen("p1v1")); + src._propsVector[0].setKey(1, "p1k2", strlen("p1k2")); + src._propsVector[0].setValue(1, "p1v2", strlen("p1v2")); + src._propsVector[1].allocEntries(2); + src._propsVector[1].setName("caches", strlen("caches")); + src._propsVector[1].setKey(0, "p2k1", strlen("p2k1")); + src._propsVector[1].setValue(0, "p2v1", strlen("p2v1")); + src._propsVector[1].setKey(1, "p2k2", strlen("p2k2")); + src._propsVector[1].setValue(1, "p2v2", strlen("p2v2")); + src._features |= QF_SORTSPEC; + src.setSortSpec("sortspec"); + src._features |= QF_AGGRSPEC; + src.setAggrSpec("aggrspec"); + src._features |= QF_GROUPSPEC; + src.setGroupSpec("groupspec"); + src._features |= QF_SESSIONID; + src.setSessionId("sessionid"); + src._features |= QF_LOCATION; + src.setLocation("location"); + src._features |= QF_PARSEDQUERY; + src._numStackItems = 14u; + src.setStackDump("stackdump"); + + { // full copy + FS4Packet_QUERYX cpy; + copyPacket(src, cpy); + + SearchRequest dst; + PacketConverter::toSearchRequest(cpy, dst); + EXPECT_EQUAL(dst.offset, 2u); + EXPECT_EQUAL(dst.maxhits, 3u); + EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u); + EXPECT_EQUAL(dst.queryFlags, 5u); + EXPECT_EQUAL(vespalib::string("seven"), dst.ranking); + EXPECT_EQUAL(dst.propertiesMap.size(), 2u); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1")); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2")); + EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string("")); + EXPECT_EQUAL(dst.sortSpec, "sortspec"); + EXPECT_EQUAL(std::string(&dst.groupSpec[0], dst.groupSpec.size()), "groupspec"); + EXPECT_EQUAL(std::string(&dst.sessionId[0], dst.sessionId.size()), "sessionid"); + EXPECT_EQUAL(dst.location, "location"); + EXPECT_EQUAL(dst.stackItems, 14u); + EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump"); + } + { // without datetime + FS4Packet_QUERYX cpy; + copyPacket(src, cpy); + + SearchRequest dst; + PacketConverter::toSearchRequest(cpy, dst); + } +} + +void +Test::convertFromReply() +{ + SearchReply src; + src.offset = 1u; + src.totalHitCount = 2u; + src.maxRank = 3; + src.setDistributionKey(4u); + src.sortIndex.push_back(0); + src.sortIndex.push_back(1); + src.sortIndex.push_back(2); + src.sortData.push_back(11); + src.sortData.push_back(22); + src.groupResult.push_back(2); + src.useCoverage = true; + src.coverage = SearchReply::Coverage(5, 3); + src.useWideHits = true; + src.hits.resize(2); + document::GlobalId gid0("aaaaaaaaaaaa"); + document::GlobalId gid1("bbbbbbbbbbbb"); + src.hits[0].gid = gid0; + src.hits[0].metric = 5; + src.hits[0].path = 11; + src.hits[0].setDistributionKey(100); + src.hits[1].gid = gid1; + src.hits[1].metric = 4; + src.hits[1].path = 10; + src.hits[1].setDistributionKey(105); + + { // full copy + SearchReply cpy = src; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_EQUAL(dst._offset, 1u); + EXPECT_EQUAL(dst._numDocs, 2u); + EXPECT_EQUAL(dst._totNumDocs, 2u); + EXPECT_EQUAL(dst._maxRank, 3); + EXPECT_EQUAL(4u, dst.getDistributionKey()); + EXPECT_TRUE(checkFeature(dst._features, QRF_SORTDATA)); + EXPECT_EQUAL(dst._sortIndex[0], 0u); + EXPECT_EQUAL(dst._sortIndex[1], 1u); + EXPECT_EQUAL(dst._sortIndex[2], 2u); + EXPECT_EQUAL(dst._sortData[0], 11); + EXPECT_EQUAL(dst._sortData[1], 22); + EXPECT_TRUE(checkFeature(dst._features, QRF_GROUPDATA)); + EXPECT_EQUAL(dst._groupDataLen, 1u); + EXPECT_EQUAL(dst._groupData[0], 2); + EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE)); + EXPECT_EQUAL(dst._coverageDocs, 3u); + EXPECT_EQUAL(dst._activeDocs, 5u); + EXPECT_TRUE(checkFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_EQUAL(dst._hits[0]._partid, 11u); + EXPECT_EQUAL(dst._hits[0].getDistributionKey(), 100u); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + EXPECT_EQUAL(dst._hits[1]._partid, 10u); + EXPECT_EQUAL(dst._hits[1].getDistributionKey(), 105u); + } + { // not sortdata + SearchReply cpy = src; + cpy.sortIndex.clear(); + cpy.sortData.clear(); + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_SORTDATA)); + } + { // not groupdata + SearchReply cpy = src; + cpy.groupResult.clear(); + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_GROUPDATA)); + } + { // non-full coverage + SearchReply cpy = src; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE)); + EXPECT_EQUAL(dst._coverageDocs, 3u); + EXPECT_EQUAL(dst._activeDocs, 5u); + } + { // not coverage + SearchReply cpy = src; + cpy.useCoverage = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_COVERAGE)); + } + { // non-mld + SearchReply cpy = src; + cpy.useWideHits = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + } + { // non-mld not siteid + SearchReply cpy = src; + cpy.useWideHits = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + } +} + +int +Test::Main() +{ + TEST_INIT("searchapi_test"); + propertyNames(); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/transportserver/.gitignore b/searchlib/src/tests/engine/transportserver/.gitignore new file mode 100644 index 00000000000..09d836e0004 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +transportserver_test +vlog.txt +searchlib_transportserver_test_app diff --git a/searchlib/src/tests/engine/transportserver/CMakeLists.txt b/searchlib/src/tests/engine/transportserver/CMakeLists.txt new file mode 100644 index 00000000000..502279bc728 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_transportserver_test_app + SOURCES + transportserver_test.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_transportserver_test_app + COMMAND searchlib_transportserver_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog.txt;VESPA_LOG_LEVEL=\"all -spam\"" +) diff --git a/searchlib/src/tests/engine/transportserver/DESC b/searchlib/src/tests/engine/transportserver/DESC new file mode 100644 index 00000000000..2fb736a9319 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/DESC @@ -0,0 +1 @@ +transportserver test. Take a look at transportserver.cpp for details. diff --git a/searchlib/src/tests/engine/transportserver/FILES b/searchlib/src/tests/engine/transportserver/FILES new file mode 100644 index 00000000000..ec1b60cf739 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/FILES @@ -0,0 +1 @@ +transportserver.cpp diff --git a/searchlib/src/tests/engine/transportserver/transportserver_test.cpp b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp new file mode 100644 index 00000000000..af4dc4761bc --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp @@ -0,0 +1,187 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("transportserver_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace document; +using namespace vespalib; +using namespace search::engine; +using namespace search::fs4transport; + +class SyncServer : public search::engine::SearchServer, + public search::engine::DocsumServer, + public search::engine::MonitorServer +{ +private: + virtual SearchReply::UP search(SearchRequest::Source request, SearchClient &client); + virtual DocsumReply::UP getDocsums(DocsumRequest::Source request, DocsumClient &client); + virtual MonitorReply::UP ping(MonitorRequest::UP request, MonitorClient &client); + + SyncServer(const SyncServer &); + SyncServer &operator=(const SyncServer &); +public: + SyncServer() {} + virtual ~SyncServer() {} +}; + +SearchReply::UP +SyncServer::search(SearchRequest::Source request, SearchClient &) +{ + const SearchRequest &req = *request.get(); + SearchReply::UP reply(new SearchReply()); + SearchReply &ret = *reply; + ret.request = request.release(); + LOG(info, "responding to search request..."); + ret.offset = req.offset; + return reply; +} + +DocsumReply::UP +SyncServer::getDocsums(DocsumRequest::Source request, DocsumClient &) +{ + DocsumReply::UP reply(new DocsumReply()); + DocsumReply &ret = *reply; + ret.request = request.release(); + LOG(info, "responding to docsum request..."); + ret.docsums.resize(1); + ret.docsums[0].setData("data", strlen("data")); + ret.docsums[0].gid = DocumentId(vespalib::make_string("doc::100")).getGlobalId(); + return reply; +} + +MonitorReply::UP +SyncServer::ping(MonitorRequest::UP request, MonitorClient &) +{ + MonitorRequest &req = *request; + MonitorReply::UP reply(new MonitorReply()); + MonitorReply &ret = *reply; + LOG(info, "responding to monitor request..."); + ret.timestamp = req.flags; + return reply; +} + +TEST("transportserver") { + { + SyncServer server; + TransportServer transport(server, server, server, 0, + TransportServer::DEBUG_ALL); + ASSERT_TRUE(transport.start()); + int port = transport.getListenPort(); + ASSERT_TRUE(port > 0); + { + FNET_Context ctx; + FastOS_ThreadPool pool(128 * 1024); + FNET_Transport client; + ASSERT_TRUE(client.Start(&pool)); + + FNET_PacketQueue adminQ; + FNET_Connection *conn = client.Connect(make_string("tcp/localhost:%d", port).c_str(), + &FS4PersistentPacketStreamer::Instance, &adminQ); + ASSERT_TRUE(conn != 0); + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 30; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 30u); + p->Free(); + } + { + FNET_PacketQueue q; + FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context()); + FS4Packet_QUERYX *qx = new FS4Packet_QUERYX(); + qx->_features |= QF_PARSEDQUERY; + qx->_offset = 100; + ch->Send(qx); + FNET_Packet *p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_QUERYRESULTX); + FS4Packet_QUERYRESULTX *r = (FS4Packet_QUERYRESULTX*)p; + EXPECT_EQUAL(r->_offset, 100u); + p->Free(); + ch->CloseAndFree(); + } + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 40; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 40u); + p->Free(); + } + { + FNET_PacketQueue q; + FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context()); + FS4Packet_GETDOCSUMSX *qdx = new FS4Packet_GETDOCSUMSX(); + ch->Send(qdx); + FNET_Packet *p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_DOCSUM); + FS4Packet_DOCSUM *r = (FS4Packet_DOCSUM*)p; + EXPECT_EQUAL(r->getGid(), DocumentId("doc::100").getGlobalId()); + p->Free(); + p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_EOL); + p->Free(); + ch->CloseAndFree(); + } + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 50; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 50u); + p->Free(); + } + // shut down client + conn->CloseAdminChannel(); + client.Close(conn); + conn->SubRef(); + client.sync(); + client.ShutDown(true); + pool.Close(); + } + + } +} + +void printError(ErrorCode ecode) { + fprintf(stderr, "error code %u: '%s'\n", ecode, getStringFromErrorCode(ecode)); +} + +TEST("print errors") { + printError(ECODE_NO_ERROR); + printError(ECODE_GENERAL_ERROR); + printError(ECODE_QUERY_PARSE_ERROR); + printError(ECODE_ALL_PARTITIONS_DOWN); + printError(ECODE_ILLEGAL_DATASET); + printError(ECODE_OVERLOADED); + printError(ECODE_NOT_IMPLEMENTED); + printError(ECODE_QUERY_NOT_ALLOWED); + printError(ECODE_TIMEOUT); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/.gitignore b/searchlib/src/tests/features/.gitignore new file mode 100644 index 00000000000..1c71377a25e --- /dev/null +++ b/searchlib/src/tests/features/.gitignore @@ -0,0 +1,11 @@ +.depend +Makefile +beta_features_test +featurebenchmark +nativerank_test +prod_features_test +vlog1.txt +vlog2.txt +vlog3.txt +searchlib_prod_features_test_app +searchlib_featurebenchmark_app diff --git a/searchlib/src/tests/features/CMakeLists.txt b/searchlib/src/tests/features/CMakeLists.txt new file mode 100644 index 00000000000..f1703b02c8b --- /dev/null +++ b/searchlib/src/tests/features/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_prod_features_test_app + SOURCES + prod_features.cpp + prod_features_framework.cpp + prod_features_attributematch.cpp + prod_features_fieldmatch.cpp + prod_features_fieldtermmatch.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_prod_features_test_app COMMAND sh prod_features_test.sh) +vespa_add_executable(searchlib_featurebenchmark_app + SOURCES + featurebenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurebenchmark_app COMMAND searchlib_featurebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/features/DESC b/searchlib/src/tests/features/DESC new file mode 100644 index 00000000000..333541aa0a0 --- /dev/null +++ b/searchlib/src/tests/features/DESC @@ -0,0 +1 @@ +features test. Take a look at features.cpp for details. diff --git a/searchlib/src/tests/features/FILES b/searchlib/src/tests/features/FILES new file mode 100644 index 00000000000..6e53d562fc0 --- /dev/null +++ b/searchlib/src/tests/features/FILES @@ -0,0 +1,3 @@ +beta_features.cpp +prod_features.cpp +nativerank.cpp diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt new file mode 100644 index 00000000000..a4319bdae53 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=double +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt new file mode 100644 index 00000000000..0371c72f13a --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=float +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt new file mode 100644 index 00000000000..0e27edf2e09 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=int +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt new file mode 100644 index 00000000000..ca1aa57e738 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=long +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt new file mode 100644 index 00000000000..38c323c667d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=1000000 +numdocs=1000 +numvalues=1000 +collectiontype=wset +datatype=int +dotProduct.vector={0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2,21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2,41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2,61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2,81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2,101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2,118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2,127:2,128:2,129:2,130:2,131:2,132:2,133:2,134:2,135:2,136:2,137:2,138:2,139:2,140:2,141:2,142:2,143:2,144:2,145:2,146:2,147:2,148:2,149:2,150:2,151:2,152:2,153:2,154:2,155:2,156:2,157:2,158:2,159:2,160:2,161:2,162:2,163:2,164:2,165:2,166:2,167:2,168:2,169:2,170:2,171:2,172:2,173:2,174:2,175:2,176:2,177:2,178:2,179:2,180:2,181:2,182:2,183:2,184:2,185:2,186:2,187:2,188:2,189:2,190:2,191:2,192:2,193:2,194:2,195:2,196:2,197:2,198:2,199:2,200:2,201:2,202:2,203:2,204:2,205:2,206:2,207:2,208:2,209:2,210:2,211:2,212:2,213:2,214:2,215:2,216:2,217:2,218:2,219:2,220:2,221:2,222:2,223:2,224:2,225:2,226:2,227:2,228:2,229:2,230:2,231:2,232:2,233:2,234:2,235:2,236:2,237:2,238:2,239:2,240:2,241:2,242:2,243:2,244:2,245:2,246:2,247:2,248:2,249:2,250:2,251:2,252:2,253:2,254:2,255:2,256:2,257:2,258:2,259:2,260:2,261:2,262:2,263:2,264:2,265:2,266:2,267:2,268:2,269:2,270:2,271:2,272:2,273:2,274:2,275:2,276:2,277:2,278:2,279:2,280:2,281:2,282:2,283:2,284:2,285:2,286:2,287:2,288:2,289:2,290:2,291:2,292:2,293:2,294:2,295:2,296:2,297:2,298:2,299:2,300:2,301:2,302:2,303:2,304:2,305:2,306:2,307:2,308:2,309:2,310:2,311:2,312:2,313:2,314:2,315:2,316:2,317:2,318:2,319:2,320:2,321:2,322:2,323:2,324:2,325:2,326:2,327:2,328:2,329:2,330:2,331:2,332:2,333:2,334:2,335:2,336:2,337:2,338:2,339:2,340:2,341:2,342:2,343:2,344:2,345:2,346:2,347:2,348:2,349:2,350:2,351:2,352:2,353:2,354:2,355:2,356:2,357:2,358:2,359:2,360:2,361:2,362:2,363:2,364:2,365:2,366:2,367:2,368:2,369:2,370:2,371:2,372:2,373:2,374:2,375:2,376:2,377:2,378:2,379:2,380:2,381:2,382:2,383:2,384:2,385:2,386:2,387:2,388:2,389:2,390:2,391:2,392:2,393:2,394:2,395:2,396:2,397:2,398:2,399:2,400:2,401:2,402:2,403:2,404:2,405:2,406:2,407:2,408:2,409:2,410:2,411:2,412:2,413:2,414:2,415:2,416:2,417:2,418:2,419:2,420:2,421:2,422:2,423:2,424:2,425:2,426:2,427:2,428:2,429:2,430:2,431:2,432:2,433:2,434:2,435:2,436:2,437:2,438:2,439:2,440:2,441:2,442:2,443:2,444:2,445:2,446:2,447:2,448:2,449:2,450:2,451:2,452:2,453:2,454:2,455:2,456:2,457:2,458:2,459:2,460:2,461:2,462:2,463:2,464:2,465:2,466:2,467:2,468:2,469:2,470:2,471:2,472:2,473:2,474:2,475:2,476:2,477:2,478:2,479:2,480:2,481:2,482:2,483:2,484:2,485:2,486:2,487:2,488:2,489:2,490:2,491:2,492:2,493:2,494:2,495:2,496:2,497:2,498:2,499:2,500:2,501:2,502:2,503:2,504:2,505:2,506:2,507:2,508:2,509:2,510:2,511:2,512:2,513:2,514:2,515:2,516:2,517:2,518:2,519:2,520:2,521:2,522:2,523:2,524:2,525:2,526:2,527:2,528:2,529:2,530:2,531:2,532:2,533:2,534:2,535:2,536:2,537:2,538:2,539:2,540:2,541:2,542:2,543:2,544:2,545:2,546:2,547:2,548:2,549:2,550:2,551:2,552:2,553:2,554:2,555:2,556:2,557:2,558:2,559:2,560:2,561:2,562:2,563:2,564:2,565:2,566:2,567:2,568:2,569:2,570:2,571:2,572:2,573:2,574:2,575:2,576:2,577:2,578:2,579:2,580:2,581:2,582:2,583:2,584:2,585:2,586:2,587:2,588:2,589:2,590:2,591:2,592:2,593:2,594:2,595:2,596:2,597:2,598:2,599:2,600:2,601:2,602:2,603:2,604:2,605:2,606:2,607:2,608:2,609:2,610:2,611:2,612:2,613:2,614:2,615:2,616:2,617:2,618:2,619:2,620:2,621:2,622:2,623:2,624:2,625:2,626:2,627:2,628:2,629:2,630:2,631:2,632:2,633:2,634:2,635:2,636:2,637:2,638:2,639:2,640:2,641:2,642:2,643:2,644:2,645:2,646:2,647:2,648:2,649:2,650:2,651:2,652:2,653:2,654:2,655:2,656:2,657:2,658:2,659:2,660:2,661:2,662:2,663:2,664:2,665:2,666:2,667:2,668:2,669:2,670:2,671:2,672:2,673:2,674:2,675:2,676:2,677:2,678:2,679:2,680:2,681:2,682:2,683:2,684:2,685:2,686:2,687:2,688:2,689:2,690:2,691:2,692:2,693:2,694:2,695:2,696:2,697:2,698:2,699:2,700:2,701:2,702:2,703:2,704:2,705:2,706:2,707:2,708:2,709:2,710:2,711:2,712:2,713:2,714:2,715:2,716:2,717:2,718:2,719:2,720:2,721:2,722:2,723:2,724:2,725:2,726:2,727:2,728:2,729:2,730:2,731:2,732:2,733:2,734:2,735:2,736:2,737:2,738:2,739:2,740:2,741:2,742:2,743:2,744:2,745:2,746:2,747:2,748:2,749:2,750:2,751:2,752:2,753:2,754:2,755:2,756:2,757:2,758:2,759:2,760:2,761:2,762:2,763:2,764:2,765:2,766:2,767:2,768:2,769:2,770:2,771:2,772:2,773:2,774:2,775:2,776:2,777:2,778:2,779:2,780:2,781:2,782:2,783:2,784:2,785:2,786:2,787:2,788:2,789:2,790:2,791:2,792:2,793:2,794:2,795:2,796:2,797:2,798:2,799:2,800:2,801:2,802:2,803:2,804:2,805:2,806:2,807:2,808:2,809:2,810:2,811:2,812:2,813:2,814:2,815:2,816:2,817:2,818:2,819:2,820:2,821:2,822:2,823:2,824:2,825:2,826:2,827:2,828:2,829:2,830:2,831:2,832:2,833:2,834:2,835:2,836:2,837:2,838:2,839:2,840:2,841:2,842:2,843:2,844:2,845:2,846:2,847:2,848:2,849:2,850:2,851:2,852:2,853:2,854:2,855:2,856:2,857:2,858:2,859:2,860:2,861:2,862:2,863:2,864:2,865:2,866:2,867:2,868:2,869:2,870:2,871:2,872:2,873:2,874:2,875:2,876:2,877:2,878:2,879:2,880:2,881:2,882:2,883:2,884:2,885:2,886:2,887:2,888:2,889:2,890:2,891:2,892:2,893:2,894:2,895:2,896:2,897:2,898:2,899:2,900:2,901:2,902:2,903:2,904:2,905:2,906:2,907:2,908:2,909:2,910:2,911:2,912:2,913:2,914:2,915:2,916:2,917:2,918:2,919:2,920:2,921:2,922:2,923:2,924:2,925:2,926:2,927:2,928:2,929:2,930:2,931:2,932:2,933:2,934:2,935:2,936:2,937:2,938:2,939:2,940:2,941:2,942:2,943:2,944:2,945:2,946:2,947:2,948:2,949:2,950:2,951:2,952:2,953:2,954:2,955:2,956:2,957:2,958:2,959:2,960:2,961:2,962:2,963:2,964:2,965:2,966:2,967:2,968:2,969:2,970:2,971:2,972:2,973:2,974:2,975:2,976:2,977:2,978:2,979:2,980:2,981:2,982:2,983:2,984:2,985:2,986:2,987:2,988:2,989:2,990:2,991:2,992:2,993:2,994:2,995:2,996:2,997:2,998:2,999:2} diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt new file mode 100644 index 00000000000..3b3e0915e9e --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt new file mode 100644 index 00000000000..322784fc409 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt new file mode 100644 index 00000000000..9a31201941c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt new file mode 100644 index 00000000000..0a7b99c79fb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt new file mode 100644 index 00000000000..1f859dc4ac6 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt new file mode 100644 index 00000000000..1d9b6de23a4 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt new file mode 100644 index 00000000000..c50f602a111 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt new file mode 100644 index 00000000000..163a9bfd96d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt new file mode 100644 index 00000000000..b6a1094140b --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt new file mode 100644 index 00000000000..d3fc48be0be --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt new file mode 100644 index 00000000000..b6d4d2b4bb3 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt new file mode 100644 index 00000000000..67d1db34e17 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt new file mode 100644 index 00000000000..838ee6871f0 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt new file mode 100644 index 00000000000..3e02b0ee27f --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt new file mode 100644 index 00000000000..407579b6bee --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt new file mode 100644 index 00000000000..57aa1759b23 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt new file mode 100644 index 00000000000..d91604f0bb5 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt new file mode 100644 index 00000000000..7d388e25cfa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt new file mode 100644 index 00000000000..7cfc899b1f3 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt new file mode 100644 index 00000000000..f06091fbcaa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt new file mode 100644 index 00000000000..b62b8b21e7c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt new file mode 100644 index 00000000000..19f133833aa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt new file mode 100644 index 00000000000..7dbfc2731a1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt new file mode 100644 index 00000000000..e436ffb270c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt new file mode 100644 index 00000000000..ec2727a7035 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt new file mode 100644 index 00000000000..cadd682a817 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt new file mode 100644 index 00000000000..66c3203ad25 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt new file mode 100644 index 00000000000..c82fba41604 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt new file mode 100644 index 00000000000..bd2404eba81 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt new file mode 100644 index 00000000000..6266271fe4f --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt new file mode 100644 index 00000000000..9f7593f8c76 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt new file mode 100644 index 00000000000..20a26196c44 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt new file mode 100644 index 00000000000..126a7f4355d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt new file mode 100644 index 00000000000..456762710e1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt new file mode 100644 index 00000000000..2839245ccdd --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt new file mode 100644 index 00000000000..a94fb7cecd8 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt new file mode 100644 index 00000000000..a53dd4fd6a7 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt new file mode 100644 index 00000000000..82d455795d4 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt new file mode 100644 index 00000000000..b55e2d60429 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt new file mode 100644 index 00000000000..8f934a3e2a1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt new file mode 100644 index 00000000000..e1b687802f9 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb new file mode 100644 index 00000000000..ffbbc25e354 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb @@ -0,0 +1,30 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require '../plotlib' + +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +extra = "" +extra = ARGV[1] if ARGV.size == 2 +field = [20, 100, 1000, 10000] +segmentation = [1, 5, 10, 50, 100, 500, 1000, 10000] + +dat = folder + "/plot.dat" +png = folder + "/plot.png" + +file = File.open(dat, "w") +segmentation.each do |s| + file.write("#{s} ") + field.each do |f| + file.write(extract_data(folder + "/c-#{f}-#{s}.out") + " ") + end + file.write("\n") +end +file.close + +titles = ["fl-20", "fl-100", "fl-1000", "fl-10000"] + +plot_graph(dat, titles, png, "fieldMatch feature (#{extra})", "maxAlternativeSegmentations", "execution time per document (ms)", folder) diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt new file mode 100644 index 00000000000..a96922e58fb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt @@ -0,0 +1,22 @@ +** Running the benchmark ** +ruby run.rb folder +folder is the place to store the output files. + + +** Generating gnu plots ** +ruby plot.rb folder "description" +folder contains the output files and description are used when setting the title of the graph. + + +** Config file format ** +c-x-y.txt +x is the length of the field and y is the value for maxAlternativeSegmentations. + + +** Running callgrind ** +valgrind --tool=callgrind ../../featurebenchmark -c c-1000-1-callgrind.txt +valgrind --tool=callgrind ../../featurebenchmark -c c-1000-100-callgrind.txt +The numruns config value is reduced in these two config files. + +The output after running callgrind is two files: callgrind.out.x and callgrind.out.y. +Use kcachegrind to look at these two files. diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/run.rb b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb new file mode 100644 index 00000000000..d0350c454e8 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb @@ -0,0 +1,17 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +cases = [20, 100, 1000, 10000] +segmentations = [1, 5, 10, 50, 100, 500, 1000, 10000] +cases.each do |c| + segmentations.each do |s| + file = "c-#{c}-#{s}" + cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out" + puts cmd + `#{cmd}` + end +end diff --git a/searchlib/src/tests/features/benchmark/plotlib.rb b/searchlib/src/tests/features/benchmark/plotlib.rb new file mode 100644 index 00000000000..53a1ee984a9 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/plotlib.rb @@ -0,0 +1,36 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +def plot_graph(dat, titles, png, title, xlabel, ylabel, folder) + plot_cmd = ""; + plot_cmd += "set terminal png\n" + plot_cmd += "set output \"#{png}\"\n" + plot_cmd += "set title \"#{title}\"\n" + plot_cmd += "set xlabel \"#{xlabel}\"\n" + plot_cmd += "set ylabel \"#{ylabel}\"\n" + plot_cmd += "set logscale\n" + + plots = [] + c = 2 + titles.each do |title| + plots.push("\"#{dat}\" using 1:#{c} title \"#{title}\" with linespoints") + c += 1 + end + plot_cmd += "plot " + plot_cmd += plots.join(", ") + + plot_cmd_file = File.open(folder + "/plot.cmd", "w") + plot_cmd_file.write(plot_cmd); + plot_cmd_file.close + cmd = "gnuplot " + folder + "/plot.cmd" + puts cmd + puts `#{cmd}` +end + +def extract_data(file_name) + content = IO.readlines(file_name).join + r = /ETPD:\s*(\d+\.\d+)/ + if content =~ r + return $1 + end + return "0" +end + diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt new file mode 100644 index 00000000000..f46508379af --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt new file mode 100644 index 00000000000..cd9a34865cb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt new file mode 100644 index 00000000000..1d3007a14c5 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt new file mode 100644 index 00000000000..0a9db3c3539 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt new file mode 100644 index 00000000000..41600fb943d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt new file mode 100644 index 00000000000..b4704f8a822 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt new file mode 100644 index 00000000000..74790ff0a21 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt new file mode 100644 index 00000000000..57c250137fe --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb new file mode 100644 index 00000000000..ca586e1176e --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require '../plotlib' + +folder = ARGV[0] +extra = "" +extra = ARGV[1] if ARGV.size == 2 +trees = [1, 5, 10, 50, 100, 200, 400, 800] + +dat = folder + "/plot.dat" +png = folder + "/plot.png" + +file = File.open(dat, "w") +trees.each do |t| + file.write("#{t} ") + file.write(extract_data(folder + "/c-#{t}.out") + " ") + file.write("\n") +end +file.close + +titles = ["expression"] + +plot_graph(dat, titles, png, "rankingExpression feature (#{extra})", "number of trees", "execution time per document (ms)", folder) diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/run.rb b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb new file mode 100644 index 00000000000..2f707e35b51 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +trees = [1, 5, 10, 50, 100, 200, 400, 800] +trees.each do |t| + file = "c-#{t}" + cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out" + puts cmd + `#{cmd}` +end diff --git a/searchlib/src/tests/features/beta/.gitignore b/searchlib/src/tests/features/beta/.gitignore new file mode 100644 index 00000000000..3a7ba416343 --- /dev/null +++ b/searchlib/src/tests/features/beta/.gitignore @@ -0,0 +1 @@ +searchlib_beta_features_test_app diff --git a/searchlib/src/tests/features/beta/CMakeLists.txt b/searchlib/src/tests/features/beta/CMakeLists.txt new file mode 100644 index 00000000000..ee7020f01fc --- /dev/null +++ b/searchlib/src/tests/features/beta/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_beta_features_test_app + SOURCES + beta_features.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_beta_features_test_app + COMMAND searchlib_beta_features_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog1.txt" +) diff --git a/searchlib/src/tests/features/beta/beta_features.cpp b/searchlib/src/tests/features/beta/beta_features.cpp new file mode 100644 index 00000000000..e5642f475de --- /dev/null +++ b/searchlib/src/tests/features/beta/beta_features.cpp @@ -0,0 +1,726 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("beta_features_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +//--------------------------------------------------------------------------------------------------------------------- +// TermPositionList +//--------------------------------------------------------------------------------------------------------------------- +typedef std::pair TermPosition; +class TermPositionList : public std::vector { +public: + TermPositionList &add(uint32_t termId, uint32_t pos) { + push_back(TermPosition(termId, pos)); + return *this; + } + TermPositionList &clear() { + std::vector::clear(); + return *this; + } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// Test +//--------------------------------------------------------------------------------------------------------------------- +class Test : public FtTestApp { +public: + int Main(); + void testJaroWinklerDistance(); + void testProximity(); + void testFlowCompleteness(); + void testQueryCompleteness(); + void testReverseProximity(); + void testTermEditDistance(); + +private: + void assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected); + void assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss); + void assertTermEditDistance(const vespalib::string &query, const vespalib::string &field, + uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub); + +private: + search::fef::BlueprintFactory _factory; +}; + +TEST_APPHOOK(Test); + +int +Test::Main() +{ + TEST_INIT("beta_features_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + // Test all features. + testJaroWinklerDistance(); TEST_FLUSH(); + testProximity(); TEST_FLUSH(); + testFlowCompleteness(); TEST_FLUSH(); + testQueryCompleteness(); TEST_FLUSH(); + testReverseProximity(); TEST_FLUSH(); + testTermEditDistance(); TEST_FLUSH(); + + TEST_DONE(); + return 0; +} + +void +Test::testJaroWinklerDistance() +{ + { + // Test blueprint. + JaroWinklerDistanceBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "jaroWinklerDistance")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("0")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(pt, ie, params); + FT_SETUP_OK (pt, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out")); + FT_SETUP_FAIL(pt, ie, params.add("afoo")); + FT_SETUP_FAIL(pt, ie, params.add("wfoo")); + FT_SETUP_FAIL(pt, ie, params.add("1")); + } + { + FT_DUMP_EMPTY(_factory, "jaroWinklerDistance"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "jaroWinklerDistance", ie); // must be a single value index field + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + StringList dump; + FT_DUMP(_factory, "jaroWinklerDistance", ie, dump/*.add("jaroWinklerDistance(bar).out")*/); + } + } + { + // These measures are taken from table 6 in the paper "Overview of Record Linkage and Current Research Directions" + // by William E. Winkler. It is available at: http://www.census.gov/srd/papers/pdf/rrs2006-02.pdf + // + // Note that the strings used as query and field here are transformed into query and field terms, and therefore + // they all need to be unique. The second occurence of a character in the below names are therefore + // capitalized. A comment is given whenever our result is different from what is presented in the paper (only 2 + // of 17 is actually different). + assertJaroWinklerDistance("shackleford", "shackelford", 1 - 0.982f); + assertJaroWinklerDistance("dunNigham", "cunnigham", 1 - 0.852f); // 3x'n' in query, removed one + assertJaroWinklerDistance("nichlesoN", "nichulsoN", 1 - 0.956f); + assertJaroWinklerDistance("jones", "johnsoN", 1 - 0.832f); + assertJaroWinklerDistance("masSey", "masSie", 1 - 0.933f); + assertJaroWinklerDistance("abroms", "abrAms", 1 - 0.922f); + assertJaroWinklerDistance("hardin", "martinez", 1 - 0.722f); // no measure was given + assertJaroWinklerDistance("itman", "smith", 1 - 0.622f); // no measure was given + assertJaroWinklerDistance("jeraldinE", "geraldinE", 1 - 0.926f); + assertJaroWinklerDistance("marhtA", "marthA", 1 - 0.961f); + assertJaroWinklerDistance("micheLlE", "michael", 1 - 0.921f); + assertJaroWinklerDistance("julies", "juliUs", 1 - 0.933f); + assertJaroWinklerDistance("tanyA", "tonyA", 1 - 0.880f); + assertJaroWinklerDistance("dwayne", "duane", 1 - 0.765f); // was 0.840 in paper + assertJaroWinklerDistance("sean", "suSan", 1 - 0.672f); // was 0.805 in paper + assertJaroWinklerDistance("jon", "john", 1 - 0.933f); + assertJaroWinklerDistance("jon", "jan", 1 - 0.800f); // no measure was given + } +} + +void +Test::assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected) +{ + FtFeatureTest ft(_factory, "jaroWinklerDistance(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP(ft, query, StringMap().add("foo", field), 1); + + RankResult res; + ASSERT_TRUE(ft.execute(res.setEpsilon(0.001).addScore("jaroWinklerDistance(foo).out", expected))); +} + +void +Test::testProximity() +{ + + { // Test blueprint. + ProximityBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "proximity")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + FT_SETUP_FAIL(prototype, params.add("1")); + FT_SETUP_FAIL(prototype, params.add("2")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(prototype, ie, params.add("foo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB")); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + } + + { + FT_DUMP_EMPTY(_factory, "proximity"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "proximity", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); +#ifdef VISIT_BETA_FEATURES + for (uint32_t a = 0; a < 5; ++a) { + for (uint32_t b = a + 1; b < 6; ++b) { + vespalib::string bn = vespalib::make_string("proximity(bar,%u,%u)", a, b); + dump.add(bn + ".out"); + dump.add(bn + ".posA"); + dump.add(bn + ".posB"); + } + } +#endif + FT_DUMP(_factory, "proximity", ie, dump); + } + } + { + // Test executor. + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + + search::fef::test::RankResult exp; + exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posA", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posB", util::FEATURE_MIN); + ASSERT_TRUE(ft.execute(exp, 1)); + } + { + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ASSERT_TRUE(!ft.setup()); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 50)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 30)); + search::fef::test::RankResult exp; + exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posA", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posB", util::FEATURE_MIN); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(exp, 1)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 20)); + ASSERT_TRUE(mdb->apply(2)); + ASSERT_TRUE(ft.execute(exp, 2)); + + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb->apply(3)); + exp .clear() + .addScore("proximity(foo,0,1).out", 10.0f) + .addScore("proximity(foo,0,1).posA", 10.0f) + .addScore("proximity(foo,0,1).posB", 20.0f); + ASSERT_TRUE(ft.execute(exp, 3)); + } + { + for (int a = 0; a < 10; ++a) { + for (int b = 0; b < 10; ++b) { + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, a)); + ASSERT_TRUE(mdb->addOccurence("foo", 1, b)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("proximity(foo,0,1).out", a < b ? b - a : util::FEATURE_MAX) + .addScore("proximity(foo,0,1).posA", a < b ? a : util::FEATURE_MAX) + .addScore("proximity(foo,0,1).posB", a < b ? b : util::FEATURE_MIN); + TEST_STATE(vespalib::make_string("a=%u, b=%u", a, b).c_str()); + EXPECT_TRUE(ft.execute(exp)); + } + } + } +} + +void +Test::testQueryCompleteness() +{ + { // Test blueprint. + QueryCompletenessBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "queryCompleteness")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_OK (prototype, ie, params.add("foo"), in, out.add("hit").add("miss")); + FT_SETUP_OK (prototype, ie, params.add("0"), in, out); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + + FT_DUMP_EMPTY(_factory, "queryCompleteness"); + FT_DUMP_EMPTY(_factory, "queryCompleteness", ie); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 5 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + // from 0 to 5 hits (5 to 0 misses) + for (uint32_t i = 0; i < 6; ++i) { + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 10); + for (uint32_t j = 0; j < i; ++j) { + mdb->addOccurence("foo", j, j); + } + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.addScore("queryCompleteness(foo).hit", (feature_t)(i)); + exp.addScore("queryCompleteness(foo).miss", (feature_t)(5 - i)); + EXPECT_TRUE(ft.execute(exp)); + } + } + { // Test executor. + FtFeatureTest ft(_factory, "queryCompleteness(foo,5,10)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + // before window + assertQueryCompleteness(ft, 4, 0, 1); + // inside window + assertQueryCompleteness(ft, 5, 1, 0); + // inside window + assertQueryCompleteness(ft, 9, 1, 0); + // after window + assertQueryCompleteness(ft, 10, 0, 1); + } +} + +void +Test::assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss) +{ + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 20); + mdb->addOccurence("foo", 0, firstOcc); + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.addScore("queryCompleteness(foo,5,10).hit", hits); + exp.addScore("queryCompleteness(foo,5,10).miss", miss); + EXPECT_TRUE(ft.execute(exp)); +} + +// BFI implementation: brute force and ignorance +int cntFlow(int m1, int m2, int m3, int m4) +{ + int flow = 0; + + for (int p1p = 0; p1p < 4; p1p++) { + if (((1 << p1p) & m1) == 0) continue; + for (int p2p = 0; p2p < 4; p2p++) { + if (((1 << p2p) & m2) == 0) continue; + int f2 = 1; + if (p2p != p1p) ++f2; + for (int p3p = 0; p3p < 4; p3p++) { + if (((1 << p3p) & m3) == 0) continue; + int f3 = f2; + if (p3p != p1p && p3p != p2p) ++f3; + for (int p4p = 0; p4p < 4; p4p++) { + if (((1 << p4p) & m4) == 0) continue; + int f4 = f3; + if (p4p != p1p && p4p != p2p && p4p != p3p) ++f4; + if (flow < f4) flow = f4; + } + } + } + } + return flow; +} + +void +Test::testFlowCompleteness() +{ + { // Test blueprint. + TEST_STATE("test flow completeness blueprint"); + FlowCompletenessBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "flowCompleteness")); + + StringList params, in, out; + TEST_DO(FT_SETUP_FAIL(prototype, params)); + TEST_DO(FT_SETUP_FAIL(prototype, params.add("foo"))); + TEST_DO(FT_SETUP_FAIL(prototype, params.add("0"))); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + + params.clear(); + params.add("foo"); + + out.add("completeness").add("fieldCompleteness") + .add("queryCompleteness").add("elementWeight") + .add("weight").add("flow"); + + StringList expDump; + for (size_t i = 0; i < out.size(); ++i) { + vespalib::string fn = "flowCompleteness(foo)."; + fn.append(out[i]); + expDump.push_back(fn); + } + + TEST_DO(FT_SETUP_OK(prototype, ie, params, in, out)); + TEST_DO(FT_SETUP_FAIL(prototype, ie, params.add("2"))); + TEST_DO(FT_DUMP_EMPTY(_factory, "flowCompleteness")); +#ifdef notyet + TEST_DO(FT_DUMP(_factory, "flowCompleteness", ie, expDump)); +#endif + } + + { // Test executor. + TEST_STATE("test flow completeness executor"); + + FtFeatureTest ft(_factory, "flowCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 5 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + // from 0 to 5 hits (5 to 0 misses) + for (uint32_t i = 0; i < 6; ++i) { + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 10); + for (uint32_t j = 0; j < i; ++j) { + mdb->addOccurence("foo", j, j); + } + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.setEpsilon(0.000001); + exp.addScore("flowCompleteness(foo)", i * 0.15); + exp.addScore("flowCompleteness(foo).completeness", i * 0.15); // == 0.1*0.5 + 0.2*(1-0.5) + exp.addScore("flowCompleteness(foo).fieldCompleteness", i * 0.1); + exp.addScore("flowCompleteness(foo).queryCompleteness", i * 0.2); + exp.addScore("flowCompleteness(foo).elementWeight", i > 0 ? 1 : 0); + exp.addScore("flowCompleteness(foo).weight", 100.0); + exp.addScore("flowCompleteness(foo).flow", i); + TEST_STATE("run execute"); + EXPECT_TRUE(ft.execute(exp)); + } + } + + + { // Test executor, pass 2 + TEST_STATE("test flow completeness executor (pass 2)"); + + FtFeatureTest ft(_factory, "flowCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 4 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + // each term will have 1 to 3 positions it matches, + // with various points of overlap + + for (uint32_t t0m = 1; t0m < 15 ; ++t0m) { + + for (uint32_t t1m = 1; t1m < 15 ; ++t1m) { + + for (uint32_t t2m = 1; t2m < 15 ; ++t2m) { + + for (uint32_t t3m = 1; t3m < 15 ; ++t3m) { + + int flow = cntFlow(t0m, t1m, t2m, t3m); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 4); + for (int pos = 0; pos < 4; ++pos) { + if (((1 << pos) & t0m) != 0) mdb->addOccurence("foo", 0, pos); + if (((1 << pos) & t1m) != 0) mdb->addOccurence("foo", 1, pos); + if (((1 << pos) & t2m) != 0) mdb->addOccurence("foo", 2, pos); + if (((1 << pos) & t3m) != 0) mdb->addOccurence("foo", 3, pos); + } + + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.setEpsilon(0.0001); + exp.addScore("flowCompleteness(foo)", flow * 0.25); + exp.addScore("flowCompleteness(foo).completeness", flow * 0.25); + exp.addScore("flowCompleteness(foo).fieldCompleteness", flow * 0.25); + exp.addScore("flowCompleteness(foo).queryCompleteness", flow * 0.25); + exp.addScore("flowCompleteness(foo).elementWeight", 1); + exp.addScore("flowCompleteness(foo).weight", 100.0); + exp.addScore("flowCompleteness(foo).flow", flow); + TEST_STATE(vespalib::make_string("execute t0m=%u t1m=%u t2m=%u t3m=%u flow=%u", + t0m, t1m, t2m, t3m, flow).c_str()); + ASSERT_TRUE(ft.execute(exp)); + } + } + } + } + } +} + + +void +Test::testReverseProximity() +{ + { // Test blueprint. + ReverseProximityBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "reverseProximity")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + FT_SETUP_FAIL(prototype, params.add("1")); + FT_SETUP_FAIL(prototype, params.add("2")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(prototype, ie, params.add("foo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB")); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + } + + { + FT_DUMP_EMPTY(_factory, "reverseProximity"); + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "reverseProximity", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); +#ifdef VISIT_BETA_FEATURES + for (uint32_t a = 0; a < 5; ++a) { + for (uint32_t b = a + 1; b < 6; ++b) { + vespalib::string bn = vespalib::make_string("reverseProximity(bar,%u,%u)", a, b); + dump.add(bn + ".out"); + dump.add(bn + ".posA"); + dump.add(bn + ".posB"); + } + } +#endif + FT_DUMP(_factory, "reverseProximity", ie, dump); + } + } + + + { // Test executor. + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + search::fef::test::RankResult exp; + exp.addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX). + addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN). + addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX); + ASSERT_TRUE(ft.execute(exp, 1)); + } + { + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); ASSERT_TRUE(!ft.setup()); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 50)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 20)); + search::fef::test::RankResult exp; + exp .addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX) + .addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN) + .addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(exp, 1)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 30)); + ASSERT_TRUE(mdb->apply(2)); + ASSERT_TRUE(ft.execute(exp, 2)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 10)); + ASSERT_TRUE(mdb->apply(3)); + exp .clear() + .addScore("reverseProximity(foo,0,1).out", 10.0f) + .addScore("reverseProximity(foo,0,1).posA", 20.0f) + .addScore("reverseProximity(foo,0,1).posB", 10.0f); + ASSERT_TRUE(ft.execute(exp, 3)); + } + { + for (int a = 0; a < 10; ++a) { + for (int b = 0; b < 10; ++b) { + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, a)); + ASSERT_TRUE(mdb->addOccurence("foo", 1, b)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("reverseProximity(foo,0,1).out", a >= b ? a - b : util::FEATURE_MAX) + .addScore("reverseProximity(foo,0,1).posA", a >= b ? a : util::FEATURE_MIN) + .addScore("reverseProximity(foo,0,1).posB", a >= b ? b : util::FEATURE_MAX); + ASSERT_TRUE(ft.execute(exp)); + } + } + } +} + +void +Test::testTermEditDistance() +{ + { // Test blueprint. + TermEditDistanceBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "termEditDistance")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(prototype, ie, params.clear()); + FT_SETUP_OK (prototype, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out").add("del").add("ins").add("sub")); + FT_SETUP_FAIL(prototype, ie, params.add("afoo")); + FT_SETUP_FAIL(prototype, ie, params.add("wfoo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + } + + { + FT_DUMP_EMPTY(_factory, "termEditDistance"); + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "termEditDistance", ie); // must be a single-value index field + + StringList dump; +#ifdef VISIT_BETA_FEATURES + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + vespalib::string bn = "termEditDistance(bar)"; + dump.add(bn + ".out"); + dump.add(bn + ".del"); + dump.add(bn + ".ins"); + dump.add(bn + ".sub"); +#endif + FT_DUMP(_factory, "termEditDistance", ie, dump); + } + } + + { // Test executor. + assertTermEditDistance("abcde", "abcde", 0, 0, 0); + assertTermEditDistance("abcde", "abcd.", 0, 0, 1); + assertTermEditDistance("abcde", ".bcd.", 0, 0, 2); + assertTermEditDistance("abcde", ".bc..", 0, 0, 3); + assertTermEditDistance("abcde", "..c..", 0, 0, 4); + assertTermEditDistance("abcd" , "..c..", 0, 1, 3); + assertTermEditDistance("abc", "..c..", 0, 2, 2); + assertTermEditDistance("ab", "..b..", 0, 3, 1); + assertTermEditDistance("a", "..a..", 0, 4, 0); + } +} + +// #pragma GCC diagnostic ignored "-Wstrict-aliasing" + +void +Test::assertTermEditDistance(const vespalib::string &query, const vespalib::string &field, + uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub) +{ + // Setup feature test. + vespalib::string feature = "termEditDistance(foo)"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + StringMap foo; + foo.add("foo", field); + FT_SETUP(ft, query, foo, 1); + + // Execute and compare results. + search::fef::test::RankResult exp; + exp .addScore(feature + ".out", (feature_t)(expectedDel*1 + expectedIns*1 + expectedSub*1)) + .addScore(feature + ".del", (feature_t)expectedDel) + .addScore(feature + ".ins", (feature_t)expectedIns) + .addScore(feature + ".sub", (feature_t)expectedSub); + ASSERT_TRUE(ft.execute(exp)); +} diff --git a/searchlib/src/tests/features/element_completeness/.gitignore b/searchlib/src/tests/features/element_completeness/.gitignore new file mode 100644 index 00000000000..9d45fbda0ad --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/.gitignore @@ -0,0 +1 @@ +searchlib_element_completeness_test_app diff --git a/searchlib/src/tests/features/element_completeness/CMakeLists.txt b/searchlib/src/tests/features/element_completeness/CMakeLists.txt new file mode 100644 index 00000000000..aee13befe2d --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_element_completeness_test_app + SOURCES + element_completeness_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_element_completeness_test_app COMMAND searchlib_element_completeness_test_app) diff --git a/searchlib/src/tests/features/element_completeness/FILES b/searchlib/src/tests/features/element_completeness/FILES new file mode 100644 index 00000000000..5b995b34729 --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/FILES @@ -0,0 +1 @@ +element_completeness_test.cpp diff --git a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp new file mode 100644 index 00000000000..24d1625520d --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +std::vector featureNamesFoo() { + std::vector f; + f.push_back("elementCompleteness(foo).completeness"); + f.push_back("elementCompleteness(foo).fieldCompleteness"); + f.push_back("elementCompleteness(foo).queryCompleteness"); + f.push_back("elementCompleteness(foo).elementWeight"); + return f; +} + +const size_t TOTAL = 0; +const size_t FIELD = 1; +const size_t QUERY = 2; +const size_t WEIGHT = 3; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector expect; + size_t dumped; + virtual void visitDumpFeature(const vespalib::string &name) { + EXPECT_LESS(dumped, expect.size()); + EXPECT_EQUAL(expect[dumped++], name); + } + FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {} +}; + +struct RankFixture : BlueprintFactoryFixture { + Properties idxProps; + RankFixture() : BlueprintFactoryFixture(), idxProps() {} + void test(const vespalib::string &queryStr, const FtIndex &index, + feature_t field, feature_t query, int32_t weight = 1, feature_t factor = 0.5, + bool useStaleMatchData = false) + { + std::vector names = featureNamesFoo(); + ASSERT_TRUE(names.size() == 4u); + RankResult expect; + expect.addScore(names[TOTAL], field*factor + query*(1-factor)) + .addScore(names[FIELD], field).addScore(names[QUERY], query) + .addScore(names[WEIGHT], (double)weight); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getProperties().import(idxProps); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(queryStr), index, 1); + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1)); + for (size_t i = 0; i < names.size(); ++i) { + TEST_STATE(names[i].c_str()); + EXPECT_EQUAL(expect.getScore(names[i]), actual.getScore(names[i])); + } + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("elementCompleteness"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", ElementCompletenessBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_EQUAL(f3.expect.size(), f3.dumped); +} + +TEST_FF("require that setup can be done on index field", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); +} + +TEST_FF("require that setup can not be done on attribute field", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "bar"))); +} + +TEST_FF("require that default config parameters are correct", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); + EXPECT_EQUAL(0u, f1.getParams().fieldId); + EXPECT_EQUAL(0.5, f1.getParams().fieldCompletenessImportance); +} + +TEST_FF("require that blueprint can be configured", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + f2.indexEnv.getProperties().add("elementCompleteness(foo).fieldCompletenessImportance", "0.75"); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); + EXPECT_EQUAL(0.75, f1.getParams().fieldCompletenessImportance); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("y"), 0.0, 0.0, 0)); +} + +TEST_F("require that perfect match gives max outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x"), 1.0, 1.0)); +} + +TEST_F("require that matching half the field gives appropriate outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x y"), 0.5, 1.0)); + TEST_DO(f.test("x y", indexFoo().element("x y a b"), 0.5, 1.0)); +} + +TEST_F("require that matching half the query gives appropriate outputs", RankFixture) { + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5)); + TEST_DO(f.test("x y a b", indexFoo().element("x y"), 1.0, 0.5)); +} + +TEST_F("require that query completeness is affected by query term weight", RankFixture) { + TEST_DO(f.test("x!300 y!100", indexFoo().element("y"), 1.0, 0.25)); + TEST_DO(f.test("x!300 y!100", indexFoo().element("x"), 1.0, 0.75)); +} + +TEST_F("require that field completeness is not affected by duplicate field tokens", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x y y y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x y y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x x y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00)); +} + +TEST_F("require that field completeness is affected by duplicate query terms", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00)); + TEST_DO(f.test("x x", indexFoo().element("x x x x"), 0.50, 1.00)); + TEST_DO(f.test("x x x", indexFoo().element("x x x x"), 0.75, 1.00)); + TEST_DO(f.test("x x x x", indexFoo().element("x x x x"), 1.00, 1.00)); +} + +TEST_F("require that a single field token can match multiple query terms", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x x x", indexFoo().element("x"), 1.00, 1.00)); +} + +TEST_F("require that field completeness importance can be adjusted", RankFixture) { + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.1"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.1)); + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.4"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.4)); + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.7"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.7)); +} + +TEST_F("require that order is not relevant", RankFixture) { + TEST_DO(f.test("x y a b", indexFoo().element("n x n y"), 0.5, 0.5)); + TEST_DO(f.test("a b x y", indexFoo().element("y x n n"), 0.5, 0.5)); + TEST_DO(f.test("a y x b", indexFoo().element("x n y n"), 0.5, 0.5)); +} + +TEST_F("require that element is selected based on completeness times element weight", RankFixture) { + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.0"); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 19).element("x y a b", 10), 1.0, 1.0, 10, 0.0)); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 21).element("x y a b", 10), 1.0, 0.5, 21, 0.0)); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 45).element("a b", 21).element("x y a b", 10), 1.0, 0.25, 45, 0.0)); +} + +TEST_F("require that stale match data is ignored", RankFixture) { + TEST_DO(f.test("x y a b", indexFoo().element("x y"), 0.0, 0.0, 0, 0.5, true)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/element_similarity_feature/.gitignore b/searchlib/src/tests/features/element_similarity_feature/.gitignore new file mode 100644 index 00000000000..36e60cd547e --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/.gitignore @@ -0,0 +1 @@ +searchlib_element_similarity_feature_test_app diff --git a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt new file mode 100644 index 00000000000..08e3b04cd73 --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_element_similarity_feature_test_app + SOURCES + element_similarity_feature_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_element_similarity_feature_test_app COMMAND searchlib_element_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp new file mode 100644 index 00000000000..181f2fb71f3 --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp @@ -0,0 +1,371 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const vespalib::string DEFAULT = "elementSimilarity(foo)"; +const vespalib::string PROXIMITY = "elementSimilarity(foo).proximity"; +const vespalib::string ORDER = "elementSimilarity(foo).order"; +const vespalib::string QUERY = "elementSimilarity(foo).query_coverage"; +const vespalib::string FIELD = "elementSimilarity(foo).field_coverage"; +const vespalib::string WEIGHT = "elementSimilarity(foo).weight"; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +//----------------------------------------------------------------------------- + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "fox"); + set("elementSimilarity(foo).output.proximity", "max(p)"); + set("elementSimilarity(foo).output.order", "max(o)"); + set("elementSimilarity(foo).output.query_coverage", "max(q)"); + set("elementSimilarity(foo).output.field_coverage", "max(f)"); + set("elementSimilarity(foo).output.weight", "max(w)"); + set("elementSimilarity(bar).output.default", "avg(1)"); + } + IndexFixture &set(const vespalib::string &key, const vespalib::string &value) { + Properties tmp; + tmp.add(key, value); + indexEnv.getProperties().import(tmp); + return *this; + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector actual; + FeatureDumpFixture() : IDumpFeatureVisitor(), actual() {} + virtual void visitDumpFeature(const vespalib::string &name) { + actual.push_back(name); + } +}; + +struct RankFixture : BlueprintFactoryFixture { + RankFixture() : BlueprintFactoryFixture() {} + double get_feature(const vespalib::string &query, const FtIndex &index, const vespalib::string &select, + const IndexFixture &idx_env = IndexFixture()) + { + std::vector names({"elementSimilarity(foo).default", // use 'default' explicitly to verify default output name + "elementSimilarity(foo).proximity", + "elementSimilarity(foo).order", + "elementSimilarity(foo).query_coverage", + "elementSimilarity(foo).field_coverage", + "elementSimilarity(foo).weight"}); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().getIndexEnv().getProperties().import(idx_env.indexEnv.getProperties()); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + { + RankResult stale; + EXPECT_TRUE(ft.executeOnly(stale, 2)); + EXPECT_EQUAL(0.0, stale.getScore(select)); + } + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, 1)); + return actual.getScore(select); + } +}; + +//----------------------------------------------------------------------------- + +double prox(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); +} + +double sum(std::initializer_list values) { + double my_sum = 0.0; + for (double value: values) { + my_sum += value; + } + return my_sum; +} + +double comb(std::initializer_list values) { + return (sum(values)/values.size()); +} + +double mix(double proximity, double order, double query, double field) { + return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field); +} + +//----------------------------------------------------------------------------- + +template +bool cmp_lists_impl(const A &a, const B &b) { + std::vector tmp_a(a.begin(), a.end()); + std::vector tmp_b(b.begin(), b.end()); + std::sort(tmp_a.begin(), tmp_a.end()); + std::sort(tmp_b.begin(), tmp_b.end()); + if (!EXPECT_EQUAL(tmp_a.size(), tmp_b.size())) { + return false; + } + for (size_t i = 0; i < tmp_a.size(); ++i) { + if(!EXPECT_EQUAL(tmp_a[i], tmp_b[i])) { + return false; + } + } + return true; +} + +template +void dump_list(const vespalib::string &name, const T &list) { + fprintf(stderr, "list(name: '%s', size: %zu)\n", name.c_str(), list.size()); + std::vector tmp(list.begin(), list.end()); + std::sort(tmp.begin(), tmp.end()); + for (vespalib::string item: tmp) { + fprintf(stderr, " '%s'\n", item.c_str()); + } +} + +template +bool cmp_lists(const A &a, const B &b) { + if(!cmp_lists_impl(a, b)) { + dump_list("expected", a); + dump_list("actual", b); + return false; + } + return true; +}; + +//----------------------------------------------------------------------------- + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("elementSimilarity"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", ElementSimilarityBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_TRUE(cmp_lists(std::vector({"elementSimilarity(foo)", + "elementSimilarity(foo).proximity", + "elementSimilarity(foo).order", + "elementSimilarity(foo).query_coverage", + "elementSimilarity(foo).field_coverage", + "elementSimilarity(foo).weight", + "elementSimilarity(bar)"}), + f3.actual)); +} + +bool try_setup(ElementSimilarityBlueprint &blueprint, const IndexFixture &index, const vespalib::string &field) { + DummyDependencyHandler deps(blueprint); + blueprint.setName(vespalib::make_string("%s(%s)", blueprint.getBaseName().c_str(), field.c_str())); + return ((Blueprint&)blueprint).setup(index.indexEnv, std::vector(1, field)); +} + +TEST_FF("require that setup can be done on weighted set index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup can be done on array index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "bar")); +} + +TEST_FF("require that setup can be done on single value index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "baz")); +} + +TEST_FF("require that setup can not be done on single value attribute field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_FALSE(try_setup(f1, f2, "fox")); +} + +TEST_FF("require that setup will fail if output expression does not contain an aggregator", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "p"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression contains an unknown aggregator", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "bogus(p)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression contains an unknown symbol", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(bogus)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression is malformed", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(w+)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), DEFAULT)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD)); +} + +TEST_F("require that minal perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), DEFAULT)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD)); +} + +TEST_F("require that larger perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), DEFAULT)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD)); +} + +TEST_F("require that extra query terms reduces order but not proximity", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY)); + + EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER)); +} + +TEST_F("require that extra field terms reduces proximity but not order", RankFixture) { + EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY)); + EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY)); + EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY)); + + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER)); +} + +TEST_F("require that proximity acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY)); +} + +TEST_F("require that field order does not affect proximity score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY)); +} + +TEST_F("require that order score acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER)); +} + +TEST_F("require that proximity does not affect order score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER)); +} + +TEST_F("require that query coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY)); +} + +TEST_F("require that field coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD)); +} + +TEST_F("require that first unique match is used per query term", RankFixture) { + EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD)); + + EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD)); +} + +TEST_F("require that default score combines individual signals appropriately", RankFixture) { + EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY)); + EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD)); + EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), DEFAULT)); + EXPECT_EQUAL(7.0 * mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d", 7), DEFAULT)); +} + +TEST_FF("require that max aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that avg aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "avg(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that sum aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "sum(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that element demultiplexing works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "sum(q)"); + EXPECT_EQUAL(sum({0.25, 0.5, 0.5, 0.25, 0.5}), + f1.get_feature("x y z t", indexFoo() + .element("x") + .element("x y") + .element("x z") + .element("y") + .element("x z"), DEFAULT, f2)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/euclidean_distance/.gitignore b/searchlib/src/tests/features/euclidean_distance/.gitignore new file mode 100644 index 00000000000..2d08dd27122 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/.gitignore @@ -0,0 +1 @@ +searchlib_euclidean_distance_test_app diff --git a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt new file mode 100644 index 00000000000..d79aa9572bc --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_euclidean_distance_test_app + SOURCES + euclidean_distance_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_euclidean_distance_test_app COMMAND searchlib_euclidean_distance_test_app) diff --git a/searchlib/src/tests/features/euclidean_distance/FILES b/searchlib/src/tests/features/euclidean_distance/FILES new file mode 100644 index 00000000000..4ed7d9969b3 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/FILES @@ -0,0 +1 @@ +euclidean_distance_test.cpp diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp new file mode 100644 index 00000000000..b0d97902728 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::FloatingPointAttribute; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + EuclideanDistanceBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + FieldInfo myField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "myAttribute", 1); + indexEnv.getFields().push_back(myField); + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "euclideanDistance")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("myAttribute").add("myVector"), + StringList(), StringList().add("distance")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector attrs; + attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); + + test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint", 0)); + test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat", 1)); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + IntegerAttribute *aint = static_cast(attrs[0].get()); + aint->append(1, 1, 0); + aint->append(1, -2, 0); + aint->append(1, 3, 0); + + FloatingPointAttribute *afloat = static_cast(attrs[1].get()); + afloat->append(1, 1.3, 0); + afloat->append(1, 1.5, 0); + afloat->append(1, -1.7, 0); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("euclideanDistance.intquery", "[4 5 -6]"); + test.getQueryEnv().getProperties().add("euclideanDistance.floatquery", "[4.1 15 0.001]"); + } + +}; + +TEST_F("require that distance is calculated for integer vectors", + ExecFixture("euclideanDistance(aint,intquery)")) +{ + EXPECT_TRUE(f.test.execute(11.789826, 0.000001)); +} + +TEST_F("require that distance is calculated for floating point vectors", + ExecFixture("euclideanDistance(afloat,floatquery)")) +{ + EXPECT_TRUE(f.test.execute(13.891846, 0.000001)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp new file mode 100644 index 00000000000..14e43fa7d47 --- /dev/null +++ b/searchlib/src/tests/features/featurebenchmark.cpp @@ -0,0 +1,657 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("featurebenchmark"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + +typedef AttributeVector::SP AttributePtr; + +class Benchmark : public FtTestApp { +public: + typedef std::vector > KeyValueVector; + + class Config { + private: + typedef std::map StringMap; + StringMap _config; + + bool isKnown(const vespalib::string & key) const; + + public: + Config() : _config() {} + Config(const vespalib::string & fileName) : _config() { + init(fileName); + } + void init(const vespalib::string & fileName); + + void add(const vespalib::string & key, const vespalib::string & value) { + _config[key] = value; + } + + void addIfNotFound(const vespalib::string & key, const vespalib::string & value) { + if (_config.count(key) == 0) { + add(key, value); + } + } + + // known config values + vespalib::string getCase(const vespalib::string & fallback = "") const { + return getAsStr("case", fallback); + } + vespalib::string getFeature(const vespalib::string & fallback = "") const { + return getAsStr("feature", fallback); + } + vespalib::string getIndex(const vespalib::string & fallback = "") const { + return getAsStr("index", fallback); + } + vespalib::string getQuery(const vespalib::string & fallback = "") const { + return getAsStr("query", fallback); + } + vespalib::string getField(const vespalib::string & fallback = "") const { + return getAsStr("field", fallback); + } + uint32_t getNumRuns(uint32_t fallback = 1000) const { + return getAsUint32("numruns", fallback); + } + + // access "unknown" config values + vespalib::string getAsStr(const vespalib::string & key, const vespalib::string & fallback = "") const { + StringMap::const_iterator itr = _config.find(key); + if (itr != _config.end()) { + return vespalib::string(itr->second); + } + return vespalib::string(fallback); + } + uint32_t getAsUint32(const vespalib::string & key, uint32_t fallback = 0) const { + return util::strToNum(getAsStr(key, vespalib::make_string("%u", fallback))); + } + double getAsDouble(const vespalib::string & key, double fallback = 0) const { + return util::strToNum(getAsStr(key, vespalib::make_string("%f", fallback))); + } + + KeyValueVector getUnknown() const; + + friend std::ostream & operator << (std::ostream & os, const Config & cfg); + }; + +private: + search::fef::BlueprintFactory _factory; + FastOS_Time _timer; + double _sample; + + void start() { _timer.SetNow(); } + void sample() { _sample = _timer.MilliSecsToNow(); } + void setupPropertyMap(Properties & props, const KeyValueVector & values); + void runFieldMatch(Config & cfg); + void runRankingExpression(Config & cfg); + + AttributePtr createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount); + AttributePtr createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount); + AttributePtr createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + const std::vector & values); + void runAttributeMatch(Config & cfg); + void runAttribute(Config & cfg); + void runDotProduct(Config & cfg); + void runNativeAttributeMatch(Config & cfg); + void runNativeFieldMatch(Config & cfg); + void runNativeProximity(Config & cfg); + +public: + Benchmark() : _factory(), _timer(), _sample() {} + int Main(); + +}; + +TEST_APPHOOK(Benchmark); + + +bool +Benchmark::Config::isKnown(const vespalib::string & key) const +{ + if (key == vespalib::string("case") || + key == vespalib::string("feature") || + key == vespalib::string("index") || + key == vespalib::string("query") || + key == vespalib::string("field") || + key == vespalib::string("numruns")) + { + return true; + } + return false; +} + +void +Benchmark::Config::init(const vespalib::string & fileName) +{ + std::ifstream is(fileName.c_str()); + if (is.fail()) { + throw std::runtime_error(fileName); + } + + while (is.good()) { + std::string line; + std::getline(is, line); + if (!line.empty()) { + std::vector values = FtUtil::tokenize(line, "="); + LOG_ASSERT(values.size() == 2); + add(values[0], values[1]); + } + } +} + +Benchmark::KeyValueVector +Benchmark::Config::getUnknown() const +{ + KeyValueVector retval; + for (StringMap::const_iterator itr = _config.begin(); itr != _config.end(); ++itr) { + if (!isKnown(itr->first)) { + retval.push_back(std::make_pair(itr->first, itr->second)); + } + } + return retval; +} + +std::ostream & operator << (std::ostream & os, const Benchmark::Config & cfg) +{ + std::cout << "getCase: '" << cfg.getCase() << "'" << std::endl; + std::cout << "getFeature: '" << cfg.getFeature() << "'" << std::endl; + std::cout << "getIndex: '" << cfg.getIndex() << "'" << std::endl; + std::cout << "getQuery: '" << cfg.getQuery() << "'" << std::endl; + std::cout << "getField: '" << cfg.getField() << "'" << std::endl; + std::cout << "getNumRuns: '" << cfg.getNumRuns() << "'" << std::endl; + + for (StringMap::const_iterator itr = cfg._config.begin(); itr != cfg._config.end(); ++itr) { + os << "'" << itr->first << "'='" << itr->second << "'" << std::endl; + } + return os; +} + + +void +Benchmark::setupPropertyMap(Properties & props, const KeyValueVector & values) +{ + std::cout << "**** setup property map ****" << std::endl; + for (uint32_t i = 0; i < values.size(); ++i) { + std::cout << "'" << values[i].first << "'='" << values[i].second << "'" << std::endl; + props.add(values[i].first, values[i].second); + } + std::cout << "**** setup property map ****" << std::endl; +} + +void +Benchmark::runFieldMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "fieldMatch(foo)"); + cfg.addIfNotFound("index", "foo"); + cfg.addIfNotFound("query", "a b c d"); + cfg.addIfNotFound("field", "a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + vespalib::string index = cfg.getIndex(); + vespalib::string query = cfg.getQuery(); + vespalib::string field = cfg.getField(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + setupFieldMatch(ft, index, query, field, NULL, 0, 0.0f, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +void +Benchmark::runRankingExpression(Config & cfg) +{ + cfg.addIfNotFound("feature", "rankingExpression"); + cfg.addIfNotFound("rankingExpression.rankingScript", "1 + 1 + 1 + 1"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +AttributePtr +Benchmark::createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount) +{ + return createAttributeVector(AVBT::INT32, name, ctype, numDocs, value, valueCount); +} + +AttributePtr +Benchmark::createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount) +{ + AttributePtr a; + if (ctype == "single") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::SINGLE)); + std::cout << "create single int32" << std::endl; + } else if (ctype == "array") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::ARRAY)); + std::cout << "create array int32" << std::endl; + } else if (ctype == "wset") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::WSET)); + std::cout << "create wset int32" << std::endl; + } + + a->addDocs(numDocs); + IntegerAttribute * ia = static_cast(a.get()); + for (uint32_t i = 0; i < numDocs; ++i) { + if (ctype == "single") { + ia->update(i, value); + } else { + for (uint32_t j = 0; j < valueCount; ++j) { + if (ctype == "array") { + ia->append(i, value, 0); + } else { + ia->append(i, value + j, j); + } + } + } + } + + a->commit(); + return a; +} + +AttributePtr +Benchmark::createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + const std::vector & values) +{ + AttributePtr a; + if (ctype == "single") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::SINGLE)); + std::cout << "create single string" << std::endl; + } else if (ctype == "array") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::ARRAY)); + std::cout << "create array string" << std::endl; + } else if (ctype == "wset") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::WSET)); + std::cout << "create wset string" << std::endl; + } + + a->addDocs(numDocs); + StringAttribute * sa = static_cast(a.get()); + for (uint32_t i = 0; i < numDocs; ++i) { + if (ctype == "single") { + sa->update(i, values[0]); + } else { + for (uint32_t j = 0; j < values.size(); ++j) { + sa->append(i, values[j], j); + } + } + } + + a->commit(); + return a; +} + +void +Benchmark::runAttributeMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "attributeMatch(foo)"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = 1000000; + uint32_t numDocs = 1000000; + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getAttributeManager().add(createAttributeVector("foo", "single", numDocs, 10, 10)); + ft.getQueryEnv().getBuilder().addAttributeNode("foo"); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->apply(0); + TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + { + amd->reset(0); // preserve old behavior + TermFieldMatchDataPosition pos; + pos.setElementWeight(i % numDocs); + amd->appendPosition(pos); + } + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runAttribute(Config & cfg) +{ + cfg.addIfNotFound("feature", "attribute(foo,str4)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs", 1000); + StringList values; + values.add("str0").add("str1").add("str2").add("str3").add("str4") + .add("str5").add("str6").add("str7").add("str8").add("str9"); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("foo", "wset", numDocs, values)); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runDotProduct(Config & cfg) +{ + cfg.addIfNotFound("feature", "dotProduct(wsstr,vector)"); + cfg.addIfNotFound("numruns", "1000000"); + cfg.addIfNotFound("numdocs", "1000"); + cfg.addIfNotFound("numvalues", "10"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + vespalib::string collectionType = cfg.getAsStr("collectiontype", "wset"); + vespalib::string dataType = cfg.getAsStr("datatype", "string"); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs", 1000); + uint32_t numValues = cfg.getAsUint32("numvalues", 10); + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, + collectionType == "wset" ? CollectionType::WEIGHTEDSET : CollectionType::ARRAY, + "wsstr"); + if (dataType == "string") { + StringList values; + for (uint32_t i = 0; i < numValues; ++i) { + values.add(vespalib::make_string("str%u", i)); + } + + ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("wsstr", collectionType, numDocs, values)); + } else if (dataType == "int") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT32, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "long") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT64, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "float") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::FLOAT, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "double") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::DOUBLE, "wsstr", collectionType, numDocs, 0, numValues)); + } else { + std::cerr << "Illegal data type '" << dataType << std::endl; + } + ft.getQueryEnv().getProperties().add("dotProduct.vector", cfg.getAsStr("dotProduct.vector", "(str0:1)")); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runNativeAttributeMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeAttributeMatch(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + cfg.addIfNotFound("numdocs", "1000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs"); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + ft.getQueryEnv().getBuilder().addAttributeNode("foo")->setWeight(search::query::Weight(100)); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->apply(0); + + TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + uint32_t docId = i % numDocs; + { + amd->reset(docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(docId); + amd->appendPosition(pos); + } + ft.executeOnly(docId); + } + sample(); +} + +void +Benchmark::runNativeFieldMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeFieldMatch(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + std::vector searchedFields; + searchedFields.push_back("foo"); + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // setup occurrence data + mdb->setFieldLength("foo", 100); + mdb->addOccurence("foo", 0, 2); + mdb->addOccurence("foo", 0, 8); + mdb->addOccurence("foo", 0, 32); + mdb->addOccurence("foo", 0, 64); + ASSERT_TRUE(mdb->apply(0)); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +void +Benchmark::runNativeProximity(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeProximity(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + std::vector searchedFields; + searchedFields.push_back("foo"); + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 0 + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 1 + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // setup occurrence data + mdb->setFieldLength("foo", 100); + mdb->addOccurence("foo", 0, 2); + mdb->addOccurence("foo", 0, 16); + mdb->addOccurence("foo", 0, 32); + mdb->addOccurence("foo", 1, 6); + mdb->addOccurence("foo", 1, 12); + mdb->addOccurence("foo", 1, 30); + ASSERT_TRUE(mdb->apply(0)); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +int +Benchmark::Main() +{ + TEST_INIT("featurebenchmark"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + vespalib::string file; + vespalib::string feature; + while ((opt = GetOpt("c:f:", arg, idx)) != -1) { + switch (opt) { + case 'c': + file.assign(arg); + break; + case 'f': + feature.assign(arg); + break; + default: + optError = true; + break; + } + } + + if (_argc != idx || optError) { + //usage(); + return -1; + } + + Config cfg; + if (file.empty()) { + cfg.add("case", feature); + } else { + cfg.init(file); + } + + if (cfg.getCase() == vespalib::string("fieldMatch")) { + runFieldMatch(cfg); + } else if (cfg.getCase() == vespalib::string("rankingExpression")) { + runRankingExpression(cfg); + } else if (cfg.getCase() == vespalib::string("attributeMatch")) { + runAttributeMatch(cfg); + } else if (cfg.getCase() == vespalib::string("attribute")) { + runAttribute(cfg); + } else if (cfg.getCase() == vespalib::string("dotProduct")) { + runDotProduct(cfg); + } else if (cfg.getCase() == vespalib::string("nativeAttributeMatch")) { + runNativeAttributeMatch(cfg); + } else if (cfg.getCase() == vespalib::string("nativeFieldMatch")) { + runNativeFieldMatch(cfg); + } else if (cfg.getCase() == vespalib::string("nativeProximity")) { + runNativeProximity(cfg); + } else { + std::cout << "feature case '" << cfg.getCase() << "' is not known" << std::endl; + } + + std::cout << "TET: " << _sample << " (ms)" << std::endl; + std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / cfg.getNumRuns() << " (ms)" << std::endl; + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + + TEST_DONE(); + return 0; +} + diff --git a/searchlib/src/tests/features/item_raw_score/.gitignore b/searchlib/src/tests/features/item_raw_score/.gitignore new file mode 100644 index 00000000000..29711c1533d --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/.gitignore @@ -0,0 +1 @@ +searchlib_item_raw_score_test_app diff --git a/searchlib/src/tests/features/item_raw_score/CMakeLists.txt b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt new file mode 100644 index 00000000000..24ef339133c --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_item_raw_score_test_app + SOURCES + item_raw_score_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_item_raw_score_test_app COMMAND searchlib_item_raw_score_test_app) diff --git a/searchlib/src/tests/features/item_raw_score/FILES b/searchlib/src/tests/features/item_raw_score/FILES new file mode 100644 index 00000000000..bce307ff6c1 --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/FILES @@ -0,0 +1 @@ +item_raw_score_test.cpp diff --git a/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp new file mode 100644 index 00000000000..20f9449062d --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const vespalib::string featureName("itemRawScore(label)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct Labels { + virtual void inject(Properties &p) const = 0; + virtual ~Labels() {} +}; +struct NoLabel : public Labels { + virtual void inject(Properties &) const {} +}; +struct SingleLabel : public Labels { + vespalib::string label; + uint32_t uid; + SingleLabel(const vespalib::string &l, uint32_t x) : label(l), uid(x) {} + virtual void inject(Properties &p) const { + vespalib::asciistream key; + key << "vespa.label." << label << ".id"; + vespalib::asciistream value; + value << uid; + p.add(key.str(), value.str()); + } +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector fooHandles; + std::vector barHandles; + RankFixture(size_t fooCnt, size_t barCnt, const Labels &labels) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(i + 1); + term.addField(fieldId).setHandle(fooHandles.back()); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(fooCnt + i + 1); + term.addField(fieldId).setHandle(barHandles.back()); + queryEnv.getTerms().push_back(term); + } + labels.inject(queryEnv.getProperties()); + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { + rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); + } + void setFooScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, fooHandles.size()); + setScore(fooHandles[i], docId, score); + } + void setBarScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, barHandles.size()); + setScore(barHandles[i], docId, score); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("itemRawScore"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", ItemRawScoreBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on random label", ItemRawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(random_label)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "random_label"))); +} + +TEST_FF("require that no label gives 0.0 item raw score", NoLabel(), RankFixture(2, 2, f1)) { + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_FF("require that unrelated label gives 0.0 item raw score", SingleLabel("unrelated", 1), RankFixture(2, 2, f1)) { + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_FF("require that item raw score can be obtained", SingleLabel("label", 1), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f2.getScore(10)); +} + +TEST_FF("require that other raw scores are ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 10, 2.0); + f2.setBarScore(0, 10, 5.0); + f2.setBarScore(1, 10, 6.0); + EXPECT_EQUAL(2.0, f2.getScore(10)); +} + +TEST_FF("require that stale raw score is ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 5, 2.0); + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/native_dot_product/.gitignore b/searchlib/src/tests/features/native_dot_product/.gitignore new file mode 100644 index 00000000000..d95f15f4492 --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/.gitignore @@ -0,0 +1 @@ +searchlib_native_dot_product_test_app diff --git a/searchlib/src/tests/features/native_dot_product/CMakeLists.txt b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..2dad758c82d --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_native_dot_product_test_app + SOURCES + native_dot_product_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_native_dot_product_test_app COMMAND searchlib_native_dot_product_test_app) diff --git a/searchlib/src/tests/features/native_dot_product/FILES b/searchlib/src/tests/features/native_dot_product/FILES new file mode 100644 index 00000000000..ab007656448 --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/FILES @@ -0,0 +1 @@ +native_dot_product_test.cpp diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp new file mode 100644 index 00000000000..3e3702cceec --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const std::string featureName("nativeDotProduct(foo)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +std::vector vec() { + std::vector ret; + return ret; +} + +std::vector vec(uint32_t w1) { + std::vector ret; + ret.push_back(w1); + return ret; +} + +std::vector vec(uint32_t w1, uint32_t w2) { + std::vector ret; + ret.push_back(w1); + ret.push_back(w2); + return ret; +} + +std::vector vec(uint32_t w1, uint32_t w2, uint32_t w3) { + std::vector ret; + ret.push_back(w1); + ret.push_back(w2); + ret.push_back(w3); + return ret; +} + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector fooHandles; + std::vector barHandles; + RankFixture(const std::vector &fooWeights, + const std::vector &barWeights) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooWeights.size(); ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(fooHandles.back()); + term.setWeight(search::query::Weight(fooWeights[i])); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barWeights.size(); ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(barHandles.back()); + term.setWeight(search::query::Weight(barWeights[i])); + queryEnv.getTerms().push_back(term); + } + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setFooWeight(uint32_t i, uint32_t docId, int32_t index_weight) { + ASSERT_LESS(i, fooHandles.size()); + TermFieldMatchDataPosition pos; + pos.setElementWeight(index_weight); + rankProgram->match_data().resolveTermField(fooHandles[i])->reset(docId); + rankProgram->match_data().resolveTermField(fooHandles[i])->appendPosition(pos); + } + void setBarWeight(uint32_t i, uint32_t docId, int32_t index_weight) { + ASSERT_LESS(i, barHandles.size()); + TermFieldMatchDataPosition pos; + pos.setElementWeight(index_weight); + rankProgram->match_data().resolveTermField(barHandles[i])->reset(docId); + rankProgram->match_data().resolveTermField(barHandles[i])->appendPosition(pos); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("nativeDotProduct"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", NativeDotProductBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); +} + +TEST_FF("require that setup can be done on attribute field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "bar"))); +} + +TEST_FF("require that setup fails for unknown field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "unknown"))); +} + +TEST_F("require that not searching a field will give it 0.0 dot product", RankFixture(vec(), vec(1, 2, 3))) { + EXPECT_EQUAL(0.0, f1.getScore(10)); +} + +TEST_F("require that dot product works for single match", RankFixture(vec(5), vec())) { + f1.setFooWeight(0, 10, 7); + EXPECT_EQUAL(35, f1.getScore(10)); +} + +TEST_F("require that dot product works for multiple matches", RankFixture(vec(1, 3, 5), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, 4); + f1.setFooWeight(2, 10, 6); + EXPECT_EQUAL(44, f1.getScore(10)); +} + +TEST_F("require that stale data is ignored", RankFixture(vec(1, 3, 5), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 9, 4); + f1.setFooWeight(2, 10, 6); + EXPECT_EQUAL(32, f1.getScore(10)); +} + +TEST_F("require that data from other fields is ignored", RankFixture(vec(1, 3), vec(5, 7))) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, 4); + f1.setBarWeight(0, 10, 6); + f1.setBarWeight(1, 10, 8); + EXPECT_EQUAL(14, f1.getScore(10)); +} + +TEST_F("require that negative weights in the index works", RankFixture(vec(1, 3), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, -4); + EXPECT_EQUAL(-10, f1.getScore(10)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp new file mode 100644 index 00000000000..b0bac4b576d --- /dev/null +++ b/searchlib/src/tests/features/prod_features.cpp @@ -0,0 +1,1937 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("prod_features_test"); + +#include "prod_features.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::FloatingPointAttribute; +using search::StringAttribute; +using search::WeightedSetStringExtAttribute; +using search::attribute::WeightedEnumContent; + +typedef AttributeVector::SP AttributePtr; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + +const double EPS = 10e-6; + + +TEST_APPHOOK(Test); + +int +Test::Main() +{ + TEST_INIT("prod_features_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + // Test all features. + TEST_DO(testFramework()); TEST_FLUSH(); + TEST_DO(testFtLib()); TEST_FLUSH(); + TEST_DO(testAge()); TEST_FLUSH(); + TEST_DO(testAttribute()); TEST_FLUSH(); + TEST_DO(testAttributeMatch()); TEST_FLUSH(); + TEST_DO(testCloseness()); TEST_FLUSH(); + TEST_DO(testDistance()); TEST_FLUSH(); + TEST_DO(testDistanceToPath()); TEST_FLUSH(); + TEST_DO(testDotProduct()); TEST_FLUSH(); + TEST_DO(testFieldLength()); TEST_FLUSH(); + TEST_DO(testFieldMatch()); TEST_FLUSH(); + TEST_DO(testFieldTermMatch()); TEST_FLUSH(); + TEST_DO(testFirstPhase()); TEST_FLUSH(); + TEST_DO(testForeach()); TEST_FLUSH(); + TEST_DO(testFreshness()); TEST_FLUSH(); + TEST_DO(testMatch()); TEST_FLUSH(); + TEST_DO(testMatches()); TEST_FLUSH(); + TEST_DO(testNow()); TEST_FLUSH(); + TEST_DO(testQuery()); TEST_FLUSH(); + TEST_DO(testQueryTermCount()); TEST_FLUSH(); + TEST_DO(testRandom()); TEST_FLUSH(); + TEST_DO(testRankingExpression()); TEST_FLUSH(); + TEST_DO(testTerm()); TEST_FLUSH(); + TEST_DO(testTermDistance()); TEST_FLUSH(); + TEST_DO(testUtils()); TEST_FLUSH(); + + TEST_DONE(); + return 0; +} + + +void +Test::testFtLib() +{ + { // toQuery + FtQuery q = FtUtil::toQuery("a b!50 0.5:c!200%0.5 d%0.3 e!300 0.3:f "); + ASSERT_TRUE(q.size() == 6); + EXPECT_EQUAL(q[0].term, vespalib::string("a")); + EXPECT_EQUAL(q[0].termWeight.percent(), 100); + EXPECT_APPROX(q[0].connexity, 0.1f, EPS); + EXPECT_APPROX(q[0].significance, 0.1f, EPS); + EXPECT_EQUAL(q[1].term, vespalib::string("b")); + EXPECT_EQUAL(q[1].termWeight.percent(), 50); + EXPECT_APPROX(q[1].connexity, 0.1f, EPS); + EXPECT_APPROX(q[1].significance, 0.1f, EPS); + EXPECT_EQUAL(q[2].term, vespalib::string("c")); + EXPECT_EQUAL(q[2].termWeight.percent(), 200); + EXPECT_APPROX(q[2].connexity, 0.5f, EPS); + EXPECT_APPROX(q[2].significance, 0.5f, EPS); + EXPECT_EQUAL(q[3].term, vespalib::string("d")); + EXPECT_EQUAL(q[3].termWeight.percent(), 100); + EXPECT_APPROX(q[3].connexity, 0.1f, EPS); + EXPECT_APPROX(q[3].significance, 0.3f, EPS); + EXPECT_EQUAL(q[4].term, vespalib::string("e")); + EXPECT_EQUAL(q[4].termWeight.percent(), 300); + EXPECT_APPROX(q[4].connexity, 0.1f, EPS); + EXPECT_APPROX(q[4].significance, 0.1f, EPS); + EXPECT_EQUAL(q[5].term, vespalib::string("f")); + EXPECT_EQUAL(q[5].termWeight.percent(), 100); + EXPECT_APPROX(q[5].connexity, 0.3f, EPS); + EXPECT_APPROX(q[5].significance, 0.1f, EPS); + } + { // toRankResult + RankResult rr = toRankResult("foo", "a:0.5 b:-0.5 c:2 d:3 "); + std::vector keys = rr.getKeys(); + ASSERT_TRUE(keys.size() == 4); + EXPECT_EQUAL(keys[0], vespalib::string("foo.a")); + EXPECT_EQUAL(keys[1], vespalib::string("foo.b")); + EXPECT_EQUAL(keys[2], vespalib::string("foo.c")); + EXPECT_EQUAL(keys[3], vespalib::string("foo.d")); + EXPECT_APPROX(rr.getScore("foo.a"), 0.5f, EPS); + EXPECT_APPROX(rr.getScore("foo.b"), -0.5f, EPS); + EXPECT_APPROX(rr.getScore("foo.c"), 2.0f, EPS); + EXPECT_APPROX(rr.getScore("foo.d"), 3.0f, EPS); + } +} + + +void +Test::testAge() +{ + { // Test blueprint + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime2"); + + AgeBlueprint pt; + EXPECT_TRUE(assertCreateInstance(pt, "age")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("datetime"), in.add("now"), out.add("out")); + FT_SETUP_FAIL(pt, idx_env, params.add("datetime2")); + + FT_DUMP_EMPTY(_factory, "age"); + } + + { // Test executor + assertAge(0, "doctime", 60, 120); + assertAge(60, "doctime", 180, 120); + assertAge(15000000000, "doctime", 20000000000, 5000000000); + } +} + +void +Test::assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime) +{ + vespalib::string feature = "age(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + setupForAgeTest(ft, docTime); + ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME, + vespalib::make_string("%" PRIu64, now)); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expAge))); +} + +void +Test::setupForAgeTest(FtFeatureTest & ft, uint64_t docTime) +{ + AttributePtr doctime = AttributeFactory::createAttribute("doctime", AVC(AVBT::INT64, AVCT::SINGLE)); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "doctime"); + doctime->addReservedDoc(); + doctime->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(doctime); + (static_cast(doctime.get()))->update(1, docTime); + doctime->commit(); +} + +void +Test::testAttribute() +{ + AttributeBlueprint prototype; + { + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + + EXPECT_TRUE(assertCreateInstance(prototype, "attribute")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, idx_env, params); // expects 1 - 2 params + + FT_SETUP_OK(prototype, idx_env, params.add("bar"), in, + out.add("value").add("weight").add("contains").add("count")); + FT_SETUP_OK(prototype, idx_env, params.add("0"), in, out); + + FT_DUMP_EMPTY(_factory, "attribute"); + } + { // single attributes + RankResult exp; + exp.addScore("attribute(sint)", 10). + addScore("attribute(sint,0)", 10). + addScore("attribute(sfloat)", 60.5f). + addScore("attribute(sstr)", (feature_t)vespalib::hash_code("foo")). + addScore("attribute(sint).count", 1). + addScore("attribute(sfloat).count", 1). + addScore("attribute(sstr).count", 1). + addScore("attribute(udefint)", search::attribute::getUndefined()). + addScore("attribute(udeffloat)", search::attribute::getUndefined()). + addScore("attribute(udefstr)", (feature_t)vespalib::hash_code("")); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // array attributes + RankResult exp; + exp.addScore("attribute(aint)", 0). + addScore("attribute(aint,0)", 20). + addScore("attribute(aint,1)", 30). + addScore("attribute(aint,2)", 0). + addScore("attribute(afloat,0)", 70.5f). + addScore("attribute(afloat,1)", 80.5f). + addScore("attribute(astr,0)", (feature_t)vespalib::hash_code("bar")). + addScore("attribute(astr,1)", (feature_t)vespalib::hash_code("baz")). + addScore("attribute(aint).count", 2). + addScore("attribute(aint,0).count", 0). + addScore("attribute(afloat).count", 2). + addScore("attribute(afloat,0).count", 0). + addScore("attribute(astr).count", 2). + addScore("attribute(astr,0).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"). + addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat"). + addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // weighted set attributes + RankResult exp; + exp.addScore("attribute(wsint).value", 0). + addScore("attribute(wsint).weight", 0). + addScore("attribute(wsint).contains", 0). + addScore("attribute(wsint,100).value", 0). + addScore("attribute(wsint,100).weight", 0). + addScore("attribute(wsint,100).contains", 0). + addScore("attribute(wsint,40).value", 40). + addScore("attribute(wsint,40).weight", 10). + addScore("attribute(wsint,40).contains", 1). + addScore("attribute(wsint,50).value", 50). + addScore("attribute(wsint,50).weight", 20). + addScore("attribute(wsint,50).contains", 1). + addScore("attribute(wsfloat).value", 0). + addScore("attribute(wsfloat).weight", 0). + addScore("attribute(wsfloat).contains", 0). + addScore("attribute(wsfloat,1000.5).value", 0). + addScore("attribute(wsfloat,1000.5).weight", 0). + addScore("attribute(wsfloat,1000.5).contains", 0). + addScore("attribute(wsfloat,90.5).value", 90.5f). + addScore("attribute(wsfloat,90.5).weight", -30). + addScore("attribute(wsfloat,90.5).contains", 1). + addScore("attribute(wsfloat,100.5).value", 100.5f). + addScore("attribute(wsfloat,100.5).weight", -40). + addScore("attribute(wsfloat,100.5).contains", 1). + addScore("attribute(wsstr).value", 0). + addScore("attribute(wsstr).weight", 0). + addScore("attribute(wsstr).contains", 0). + addScore("attribute(wsstr,foo).value", 0). + addScore("attribute(wsstr,foo).weight", 0). + addScore("attribute(wsstr,foo).contains", 0). + addScore("attribute(wsstr,qux).value", (feature_t)vespalib::hash_code("qux")). + addScore("attribute(wsstr,qux).weight", 11). + addScore("attribute(wsstr,qux).contains", 1). + addScore("attribute(wsstr,quux).value", (feature_t)vespalib::hash_code("quux")). + addScore("attribute(wsstr,quux).weight", 12). + addScore("attribute(wsstr,quux).contains", 1). + addScore("attribute(wsint).count", 2). + addScore("attribute(wsint,40).count", 0). + addScore("attribute(wsfloat).count", 2). + addScore("attribute(wsfloat,90.5).count", 0). + addScore("attribute(wsstr).count", 2). + addScore("attribute(wsstr,qux).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"). + addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat"). + addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // unique only attribute + RankResult exp; + exp.addScore("attribute(unique).value", 0). + addScore("attribute(unique).weight", 0). + addScore("attribute(unique).contains", 0). + addScore("attribute(unique).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + //ASSERT_TRUE(ft.execute(exp)); + } +} + + +void +Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env) +{ + // setup an original attribute manager with attributes + std::vector avs; + avs.push_back(AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE))); // 0 + avs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); // 1 + avs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); // 2 + avs.push_back(AttributeFactory::createAttribute("sfloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 3 + avs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); // 4 + avs.push_back(AttributeFactory::createAttribute("wsfloat",AVC(AVBT::FLOAT, AVCT::WSET))); // 5 + avs.push_back(AttributeFactory::createAttribute("sstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 6 + avs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); // 7 + avs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); // 8 + avs.push_back(AttributeFactory::createAttribute("udefint", AVC(AVBT::INT32, AVCT::SINGLE))); // 9 + avs.push_back(AttributeFactory::createAttribute("udeffloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 10 + avs.push_back(AttributeFactory::createAttribute("udefstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 11 + + // simulate a unique only attribute as specified in sd + AVC cfg(AVBT::INT32, AVCT::SINGLE); + cfg.setFastSearch(true); + avs.push_back(AttributeFactory::createAttribute("unique", cfg)); // 9 + + if (setup_env) { + // register attributes in index environment + ft.getIndexEnv().getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique"); + } + + for (uint32_t i = 0; i < avs.size(); ++i) { + avs[i]->addReservedDoc(); + avs[i]->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(avs[i]); + } + + // integer attributes + (static_cast(avs[0].get()))->update(1, 10); + (static_cast(avs[1].get()))->append(1, 20, 0); + (static_cast(avs[1].get()))->append(1, 30, 0); + (static_cast(avs[2].get()))->append(1, 40, 10); + (static_cast(avs[2].get()))->append(1, 50, 20); + (static_cast(avs[9].get()))->update(1, search::attribute::getUndefined()); + // feature_t attributes + (static_cast(avs[3].get()))->update(1, 60.5f); + (static_cast(avs[4].get()))->append(1, 70.5f, 0); + (static_cast(avs[4].get()))->append(1, 80.5f, 0); + (static_cast(avs[5].get()))->append(1, 90.5f, -30); + (static_cast(avs[5].get()))->append(1, 100.5f, -40); + (static_cast(avs[10].get()))->update(1, search::attribute::getUndefined()); + // string attributes + (static_cast(avs[6].get()))->update(1, "foo"); + (static_cast(avs[7].get()))->append(1, "bar", 0); + (static_cast(avs[7].get()))->append(1, "baz", 0); + (static_cast(avs[8].get()))->append(1, "qux", 11); + (static_cast(avs[8].get()))->append(1, "quux", 12); + (static_cast(avs[11].get()))->update(1, ""); + + for (uint32_t i = 0; i < avs.size() - 1; ++i) { // do not commit the noupdate attribute + avs[i]->commit(); + } + + // save 'sint' and load it into 'unique' (only way to set a noupdate attribute) + ASSERT_TRUE(avs[0]->saveAs(avs[9]->getBaseFileName())); + ASSERT_TRUE(avs[9]->load()); +} + +void +Test::testCloseness() +{ + { // Test blueprint. + ClosenessBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "closeness")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("name"), in.add("distance(name)"), out.add("out").add("logscale")); + + FT_DUMP_EMPTY(_factory, "closeness"); + } + + { // Test executor. + assertCloseness(1, "pos", 0); + assertCloseness(0.8, "pos", 1802661); + assertCloseness(0, "pos", 9013306); + // use non-existing attribute -> default distance + assertCloseness(0, "no", 0); + + // use non-default maxDistance + assertCloseness(1, "pos", 0, 100); + assertCloseness(0.5, "pos", 50, 100); + assertCloseness(0, "pos", 100, 100); + assertCloseness(0, "pos", 101, 100); + + // test logscale using halfResponse (define that x = 10 should give 0.5 -> s = -10^2/(2*10 - 100) = 1.25 (scale distance)) + assertCloseness(1, "pos", 0, 100, 10); + assertCloseness(0.5, "pos", 10, 100, 10); + assertCloseness(0, "pos", 100, 100, 10); + assertCloseness(0, "pos", 101, 100, 10); + } +} + +void +Test::assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance, double halfResponse) +{ + vespalib::string feature = "closeness(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + std::vector > positions; + int32_t x = 0; + positions.push_back(std::make_pair(x, x)); + setupForDistanceTest(ft, "pos", positions, false); + ft.getQueryEnv().getLocation().setXPosition((int)distance); + ft.getQueryEnv().getLocation().setValid(true); + if (maxDistance > 0) { + ft.getIndexEnv().getProperties().add(feature + ".maxDistance", + vespalib::make_string("%u", (unsigned int)maxDistance)); + } + if (halfResponse > 0) { + ft.getIndexEnv().getProperties().add(feature + ".halfResponse", + vespalib::make_string("%f", halfResponse)); + feature.append(".logscale"); + } + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, exp))); +} + +void +Test::testFieldLength() +{ + FieldLengthBlueprint pt; + + { // Test blueprint. + EXPECT_TRUE(assertCreateInstance(pt, "fieldLength")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FtIndexEnvironment ie; + ie.getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(pt, params.add("qux")); // does not exists + FT_SETUP_FAIL(pt, params.clear().add("bar")); // not an index + FT_SETUP_FAIL(pt, params.clear().add("afoo")); // wrong collection type + FT_SETUP_FAIL(pt, params.clear().add("wfoo")); // wrong collection type + FT_SETUP_OK(pt, ie, params.clear().add("foo"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "fieldLength"); + FT_DUMP_EMPTY(_factory, "fieldLength", ie); + } + + { // Test executor. + for (uint32_t i = 0; i < 10; ++i) { + StringList features; + features.add("fieldLength(foo)").add("fieldLength(baz)"); + FtFeatureTest ft(_factory, features); + ASSERT_TRUE(!ft.setup()); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar").addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->addOccurence("foo", 0, i)); + ASSERT_TRUE(mdb->setFieldLength("foo", i + 10)); + ASSERT_TRUE(mdb->addOccurence("baz", 0, i)); + ASSERT_TRUE(mdb->setFieldLength("baz", i + 20)); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(RankResult() + .addScore("fieldLength(foo)", (feature_t)i + 10) + .addScore("fieldLength(baz)", (feature_t)i + 20))); + } + } +} + + +void +Test::assertFieldMatch(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + const fieldmatch::Params * params, + uint32_t totalTermWeight, + feature_t totalSignificance) +{ + LOG(info, "assertFieldMatch('%s', '%s', '%s', (%u))", spec.c_str(), query.c_str(), field.c_str(), totalTermWeight); + + // Setup feature test. + vespalib::string feature = "fieldMatch(foo)"; + FtFeatureTest ft(_factory, feature); + + setupFieldMatch(ft, "foo", query, field, params, totalTermWeight, totalSignificance, 1); + + // Execute and compare results. + RankResult rr = toRankResult(feature, spec); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr)); +} + +void +Test::assertFieldMatch(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + uint32_t totalTermWeight) +{ + assertFieldMatch(spec, query, field, NULL, totalTermWeight); +} + +void +Test::assertFieldMatchTS(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + feature_t totalSignificance) +{ + assertFieldMatch(spec, query, field, NULL, 0, totalSignificance); +} + + +void +Test::testFirstPhase() +{ + { // Test blueprint. + FirstPhaseBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "firstPhase")); + + FtIndexEnvironment ie; + ie.getProperties().add(indexproperties::rank::FirstPhase::NAME, "random"); // override nativeRank dependency + + StringList params, in, out; + FT_SETUP_OK(pt, ie, params, in.add("random"), out.add("score")); + FT_SETUP_FAIL(pt, params.add("foo")); + params.clear(); + + FT_DUMP(_factory, "firstPhase", ie, StringList().add("firstPhase")); + } + + { // Test executor. + FtFeatureTest ft(_factory, "firstPhase"); + ft.getIndexEnv().getProperties().add(indexproperties::rank::FirstPhase::NAME, "value(10)"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(10.0f)); + } +} + +void +Test::testForeach() +{ + { // Test blueprint. + ForeachBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "foreach")); + + StringList params, in, out; + out.add("value"); + FT_SETUP_FAIL(pt, params); + // illegal dimension + FT_SETUP_FAIL(pt, params.add("squares").add("N").add("foo").add("true").add("sum")); + // illegal condition + FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("false").add("sum")); + // illegal operation + FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("true").add("dotproduct")); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + + // various dimensions + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(0)").add("foo(1)").add("foo(2)").add("foo(3)").add("foo(4)"). + add("foo(5)").add("foo(6)").add("foo(7)").add("foo(8)").add("foo(9)"). + add("foo(10)").add("foo(11)").add("foo(12)").add("foo(13)").add("foo(14)").add("foo(15)"), out); + ie.getProperties().add("foreach.maxTerms", "1"); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), + in.clear().add("foo"), out); + FT_SETUP_OK(pt, ie, params.clear().add("fields").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(foo)").add("foo(bar)"), out); + FT_SETUP_OK(pt, ie, params.clear().add("attributes").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(baz)"), out); + + // various conditions + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in.clear().add("foo"), out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("<4").add("sum"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add(">4").add("sum"), in, out); + // various operations + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("product"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("average"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("max"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("min"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("count"), in, out); + + FT_DUMP_EMPTY(_factory, "foreach"); + } + { // Test executor + // single loop + assertForeachOperation( 16.5, "true", "sum"); + assertForeachOperation(-2106, "true", "product"); + assertForeachOperation( 3.3, "true", "average"); + assertForeachOperation( 8, "true", "max"); + assertForeachOperation( -4.5, "true", "min"); + assertForeachOperation( 5, "true", "count"); + + assertForeachOperation(3, "\">4\"", "count"); + assertForeachOperation(2, "\">4.5\"", "count"); + assertForeachOperation(2, "\"<4\"", "count"); + assertForeachOperation(2, "\"<4.5\"", "count"); + assertForeachOperation(4, "\">0\"", "count"); + assertForeachOperation(1, "\"<0\"", "count"); + assertForeachOperation(4, "\">-4.5\"", "count"); + assertForeachOperation(1, "\"<-4.4\"", "count"); + + { // average without any values + FtFeatureTest ft(_factory, "foreach(fields,N,value(N),true,average)"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(0)); + } + + { // double loop + vespalib::string feature = + "foreach(fields,N,foreach(attributes,M,rankingExpression(\"value(N)+value(M)\"),true,product),true,sum)"; + LOG(info, "double loop feature: '%s'", feature.c_str()); + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getProperties().add("foreach.maxTerms", "1"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "1"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "3"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "4"); + // ((1 + 3) * (1 + 4)) + ((2 + 3) * (2 + 4)) = 4 * 5 + 5 * 6 = 20 + 30 = 50 + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(50)); + ASSERT_TRUE(ft.execute(50)); // check that reset works + } + } +} + +void +Test::assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op) +{ + vespalib::string feature = "foreach(fields,N,value(N)," + cond + "," + op + ")"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "4.5"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "8"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "6.5"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "-4.5"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); // check that reset works +} + + +void +Test::testFreshness() +{ + { // Test blueprint. + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "name"); + + FreshnessBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "freshness")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("name"), in.add("age(name)"), out.add("out").add("logscale")); + + FT_DUMP_EMPTY(_factory, "freshness"); + } + + { // Test executor. + assertFreshness(1, "doctime", 0); + assertFreshness(0.5, "doctime", 3*15*24*60*60); + assertFreshness(0, "doctime", 3*30*24*60*60); + // use non-default maxAge + assertFreshness(1, "doctime", 0, 120); + assertFreshness(0.75, "doctime", 30, 120); + assertFreshness(0.5, "doctime", 60, 120); + assertFreshness(0, "doctime", 120, 120); + assertFreshness(0, "doctime", 121, 120); + + // test logscale + assertFreshness(1, "doctime", 0, 0, 0, true); + assertFreshness(0.5, "doctime", 7*24*60*60, 0, 0, true); + assertFreshness(0, "doctime", 3*30*24*60*60, 0, 0, true); + // use non-default maxAge & halfResponse + assertFreshness(1, "doctime", 0, 120, 30, true); + assertFreshness(0.5, "doctime", 30, 120, 30, true); // half response after 30 secs + assertFreshness(0, "doctime", 120, 120, 30, true); + assertFreshness(0, "doctime", 121, 120, 30, true); + // test invalid half response + assertFreshness(0.5, "doctime", 1, 120, 0.5, true); // half response is set to 1 + assertFreshness(0.5, "doctime", 59, 120, 70, true); // half response is set to 120/2 - 1 + } +} + +void +Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge, double halfResponse, bool logScale) +{ + vespalib::string feature = "freshness(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + setupForAgeTest(ft, 60); // time = 60 + if (maxAge > 0) { + ft.getIndexEnv().getProperties().add("freshness(" + attr + ").maxAge", + vespalib::make_string("%u", maxAge)); + } + if (halfResponse > 0) { + ft.getIndexEnv().getProperties().add("freshness(" + attr + ").halfResponse", + vespalib::make_string("%f", halfResponse)); + } + if (logScale) { + feature.append(".logscale"); + } + ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME, + vespalib::make_string("%u", age + 60)); // now = age + 60 + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS))); +} + +void +Test::testDistance() +{ + { // Test blueprint. + DistanceBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "distance")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("pos"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "distance"); + } + + { // Test executor. + + { // test 2D single location (zcurve) + assert2DZDistance(static_cast(sqrt(650.0f)), "5:-5", 10, 20); + assert2DZDistance(static_cast(sqrt(250.0f)), "5:-5", 10, -20); + assert2DZDistance(static_cast(sqrt(450.0f)), "5:-5", -10, -20); + assert2DZDistance(static_cast(sqrt(850.0f)), "5:-5", -10, 20); + assert2DZDistance(static_cast(sqrt(250.0f)), "5:-5", 15, -20, 0x80000000); // 2^31 + } + + { // test 2D multi location (zcurve) + vespalib::string positions = "5:-5,35:0,5:40,35:-40"; + assert2DZDistance(static_cast(sqrt(425.0f)), positions, 10, 20); + assert2DZDistance(static_cast(sqrt(250.0f)), positions, 10, -20); + assert2DZDistance(static_cast(sqrt(450.0f)), positions, -10, -20); + assert2DZDistance(static_cast(sqrt(625.0f)), positions, -10, 20); + assert2DZDistance(static_cast(sqrt(250.0f)), positions, 15, -20, 0x80000000); // 2^31 + assert2DZDistance(static_cast(sqrt(425.0f)), positions, 45, -20, 0x80000000); // 2^31 + } + + { // test default distance + { // non-existing attribute + FtFeatureTest ft(_factory, "distance(pos)"); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute type (float) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute type (string) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute collection type (weighted set) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + } + } +} + +void +Test::setupForDistanceTest(FtFeatureTest &ft, const vespalib::string & attrName, + const std::vector > & positions, bool zcurve) +{ + AttributePtr pos = AttributeFactory::createAttribute(attrName, AVC(AVBT::INT64, AVCT::ARRAY)); + + pos->addReservedDoc(); + pos->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(pos); + + IntegerAttribute * ia = static_cast(pos.get()); + for (uint32_t i = 0; i < positions.size(); ++i) { + if (zcurve) { + ia->append(1, vespalib::geo::ZCurve::encode(positions[i].first, positions[i].second), 0); + } else { + ia->append(1, positions[i].first, 0); + } + } + + pos->commit(); +} + +void +Test::assert2DZDistance(feature_t exp, const vespalib::string & positions, + int32_t xquery, int32_t yquery, uint32_t xAspect) +{ + LOG(info, "assert2DZDistance(%g, %s, %d, %d, %u)", exp, positions.c_str(), xquery, yquery, xAspect); + FtFeatureTest ft(_factory, "distance(pos)"); + std::vector ta = FtUtil::tokenize(positions, ","); + std::vector > pos; + for (uint32_t i = 0; i < ta.size(); ++i) { + std::vector tb = FtUtil::tokenize(ta[i], ":"); + int32_t x = util::strToNum(tb[0]); + int32_t y = util::strToNum(tb[1]); + pos.push_back(std::make_pair(x, y)); + } + setupForDistanceTest(ft, "pos", pos, true); + ft.getQueryEnv().getLocation().setXPosition(xquery); + ft.getQueryEnv().getLocation().setYPosition(yquery); + ft.getQueryEnv().getLocation().setXAspect(xAspect); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4). + addScore("distance(pos)", exp))); +} + +void +Test::testDistanceToPath() +{ + { + // Test blueprint. + DistanceToPathBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "distanceToPath")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("pos"), in, out.add("distance").add("traveled").add("product")); + FT_SETUP_FAIL(pt, params.add("foo")); + + FT_DUMP_EMPTY(_factory, "distanceToPath"); + } + + { + // Test executor. + std::vector > pos; + pos.push_back(std::make_pair(0, 0)); + + // invalid path + assertDistanceToPath(pos, "a"); + assertDistanceToPath(pos, "("); + assertDistanceToPath(pos, "(a"); + assertDistanceToPath(pos, "(a)"); + assertDistanceToPath(pos, "(-1)"); + assertDistanceToPath(pos, "(-1,1)"); + assertDistanceToPath(pos, "(-1,1,1)"); + assertDistanceToPath(pos, "(-1 1 1 1)"); + + // path on either side of document + assertDistanceToPath(pos, "(-1,1,1,1)", 1, 0.5, 2); + assertDistanceToPath(pos, "(-1,-1,1,-1)", 1, 0.5, -2); + + // zero length path + assertDistanceToPath(pos, "(0,0,0,0)", 0, 0); + assertDistanceToPath(pos, "(0,0,0,0,0,0)", 0, 0); + assertDistanceToPath(pos, "(0,1,0,1)", 1, 0); + assertDistanceToPath(pos, "(0,1,0,1,0,1)", 1, 0); + + // path crosses document + assertDistanceToPath(pos, "(-1,1,1,-1)", 0, 0.5); + assertDistanceToPath(pos, "(-2,2,2,-2)", 0, 0.5); + assertDistanceToPath(pos, "(-1,1,3,-3)", 0, 0.25); + + // intersection outside segments + assertDistanceToPath(pos, "(1,0,2,0)", 1, 0); // before + assertDistanceToPath(pos, "(0,1,0,2)", 1, 0); + assertDistanceToPath(pos, "(-2,0,-1,0)", 1, 1); // after + assertDistanceToPath(pos, "(0,-2,0,-1)", 1, 1); + + // various paths + assertDistanceToPath(pos, "(-3,1,2,1,2,-2,-2,-2)", 1, 0.25, 5); + assertDistanceToPath(pos, "(-3,2,2,2,2,-1,0,-1)", 1, 1, 2); + + // multiple document locations + pos.push_back(std::make_pair(0, 1)); + assertDistanceToPath(pos, "(-1,1,1,1)", 0, 0.5); + assertDistanceToPath(pos, "(-2,-1,-1,1)", 1, 1, 2); + assertDistanceToPath(pos, "(-1,0.25,1,0.25)", 0.25, 0.5, 0.5); + + { + // Test defaults. + RankResult res; + res.addScore("distanceToPath(pos).distance", DistanceExecutor::DEFAULT_DISTANCE); + res.addScore("distanceToPath(pos).traveled", 1); + { + // Non-existing attribute. + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute type (float). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute type (string). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute collection type (weighted set). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + } + } +} + +void +Test::assertDistanceToPath(const std::vector > pos, + const vespalib::string &path, feature_t distance, feature_t traveled, feature_t product) +{ + LOG(info, "Testing distance to path '%s' with %zd document locations.", path.c_str(), pos.size()); + + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + setupForDistanceTest(ft, "pos", pos, true); + + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", path); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult() + .addScore("distanceToPath(pos).distance", distance) + .addScore("distanceToPath(pos).traveled", traveled) + .addScore("distanceToPath(pos).product", product))); +} + +void +Test::setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType) +{ + AttributePtr type = AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE)); + + type->addReservedDoc(); + type->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(type); + + (static_cast(type.get()))->update(1, docType); + type->commit(); +} + +void +Test::testDotProduct() +{ + { // Test blueprint. + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "attribute"); + + DotProductBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "dotProduct")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("attribute").add("vector"), in, out.add("scalar")); + + FT_DUMP_EMPTY(_factory, "dotProduct"); + } + + { // Test vector parser + { // string enum vector + FtFeatureTest ft(_factory, "value(0)"); + setupForDotProductTest(ft); + search::AttributeGuard::UP ag(ft.getIndexEnv().getAttributeManager().getAttribute("wsstr")); + const search::attribute::IAttributeVector * sv = ag->operator->(); + EXPECT_TRUE(sv->hasEnum()); + search::attribute::EnumHandle e; + { + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("()", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("(a;1)", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("(a:1)", out); + EXPECT_EQUAL(out.getVector().size(), 1u); + EXPECT_TRUE(sv->findEnum("a", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 1.0); + } + std::vector v = {"(b:2.5,c:-3.5)", "{b:2.5,c:-3.5}"}; + for(const vespalib::string & s : v) { + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse(s, out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_TRUE(sv->findEnum("b", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_TRUE(sv->findEnum("c", e)); + EXPECT_EQUAL(out.getVector()[1].first, e); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + { // test funky syntax + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("( a: 1, b:2 ,c: , :3)", out); + EXPECT_EQUAL(out.getVector().size(), 3u); + EXPECT_TRUE(sv->findEnum("a", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 1); + EXPECT_TRUE(sv->findEnum("b", e)); + EXPECT_EQUAL(out.getVector()[1].first, e); + EXPECT_EQUAL(out.getVector()[1].second, 2); + EXPECT_TRUE(sv->findEnum("c", e)); + EXPECT_EQUAL(out.getVector()[2].first, e); + EXPECT_EQUAL(out.getVector()[2].second, 0); + } + { // strings not in attribute vector + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("(not:1)", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + } + } + { // string vector + dotproduct::wset::StringVector out; + WeightedSetParser::parse("(b:2.5,c:-3.5)", out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_EQUAL(out.getVector()[0].first, "b"); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_EQUAL(out.getVector()[1].first, "c"); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + { // integer vector + dotproduct::wset::IntegerVector out; + WeightedSetParser::parse("(20:2.5,30:-3.5)", out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_EQUAL(out.getVector()[0].first, 20); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_EQUAL(out.getVector()[1].first, 30); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + } + { // Array parser + std::vector v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"}; + for(const vespalib::string & s : v) { + std::vector out; + ArrayParser::parse(s, out); + EXPECT_EQUAL(8u, out.size()); + EXPECT_EQUAL(2, out[0]); + EXPECT_EQUAL(-3, out[1]); + EXPECT_EQUAL(0, out[2]); + EXPECT_EQUAL(0, out[3]); + EXPECT_EQUAL(0, out[4]); + EXPECT_EQUAL(0, out[5]); + EXPECT_EQUAL(0, out[6]); + EXPECT_EQUAL(-3, out[7]); + } + } + { + vespalib::string s = "[[1:3]]"; + std::vector out; + ArrayParser::parse(s, out); + EXPECT_EQUAL(0u, out.size()); + } + + { // Test executor. + { // string enum attribute + // docId = 1 + assertDotProduct(0, "()"); + assertDotProduct(0, "(f:5)"); + assertDotProduct(0, "(f:5,g:5)"); + assertDotProduct(-5, "(a:-5)"); + assertDotProduct(25, "(e:5)"); + assertDotProduct(-5.5, "(a:-5.5)"); + assertDotProduct(27.5, "(e:5.5)"); + assertDotProduct(55, "(a:1,b:2,c:3,d:4,e:5)"); + assertDotProduct(20, "(b:10,b:15)"); + // docId = 2 + assertDotProduct(0, "()", 2); + assertDotProduct(0, "(a:1,b:2,c:3,d:4,e:5)", 2); + } + { // string attribute + assertDotProduct(0, "(f:5,g:5)", 1, "wsextstr"); + assertDotProduct(550, "(a:1,b:2,c:3,d:4,e:5)", 1, "wsextstr"); + } + { // integer attribute + assertDotProduct(0, "()", 1, "wsint"); + assertDotProduct(0, "(6:5,7:5)", 1, "wsint"); + assertDotProduct(55, "(1:1,2:2,3:3,4:4,5:5)", 1, "wsint"); + } + std::vector attributes = {"arrint", "arrfloat", "arrint_fast", "arrfloat_fast"}; + for (const char * name : attributes) { + assertDotProduct(0, "()", 1, name); + assertDotProduct(0, "(6:5,7:5)", 1, name); + assertDotProduct(55, "(0:1,1:2,2:3,3:4,4:5)", 1, name); + assertDotProduct(55, "[1 2 3 4 5]", 1, name); + assertDotProduct(41, "{3:4,4:5}", 1, name); + } + { // float array attribute + assertDotProduct(55, "[1.0 2.0 3.0 4.0 5.0]", 1, "arrfloat"); + assertDotProduct(41, "{3:4,4:5.0}", 1, "arrfloat"); + } + { // Sparse float array attribute. + assertDotProduct(17, "(0:1,3:4,50:97)", 1, "arrfloat"); + } + + assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint"); // attribute of the wrong type + assertDotProduct(17, "(0:1,3:4,50:97)", 1, "sint", "arrfloat"); // attribute override + assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint", "arrfloat_non_existing"); // incorrect attribute override + } + { // Test that correct executor is created + FtFeatureTest ft(_factory, "value(0)"); + setupForDotProductTest(ft); + ft.getQueryEnv().getProperties().add("dotProduct.vector", "(a:1)"); + ParameterList params; + params.push_back(Parameter(ParameterType::ATTRIBUTE, "wsstr")); + params.push_back(Parameter(ParameterType::STRING, "vector")); + DotProductBlueprint bp; + DummyDependencyHandler deps(bp); + EXPECT_TRUE(bp.setup(ft.getIndexEnv(), params)); + FeatureExecutor::LP exc = bp.createExecutor(ft.getQueryEnv()); + // check that we have the optimized enum version + dotproduct::wset::DotProductExecutor * myExc = + dynamic_cast *>(exc.get()); + EXPECT_TRUE(myExc != nullptr); + EXPECT_EQUAL(1u, deps.output.size()); + } +} + +void +Test::assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId, + const vespalib::string & attribute, const vespalib::string & attributeOverride) +{ + RankResult rr; + rr.addScore("dotProduct(" + attribute + ",vector)", exp); + FtFeatureTest ft(_factory, rr.getKeys()); + setupForDotProductTest(ft); + ft.getQueryEnv().getProperties().add("dotProduct.vector", vector); + if ( ! attributeOverride.empty() ) { + ft.getQueryEnv().getProperties().add("dotProduct." + attribute + ".override.name", attributeOverride); + } + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(rr, docId)); +} + +void +Test::setupForDotProductTest(FtFeatureTest & ft) +{ + struct Config { + const char * name; + AVBT dataType; + AVCT collectionType; + bool fastSearch; + }; + std::vector cfgList = { {"wsint", AVBT::INT32, AVCT::WSET, false}, + {"arrint", AVBT::INT32, AVCT::ARRAY, false}, + {"arrfloat", AVBT::FLOAT, AVCT::ARRAY, false}, + {"arrint_fast", AVBT::INT32, AVCT::ARRAY, true}, + {"arrfloat_fast", AVBT::FLOAT, AVCT::ARRAY, true} + }; + AttributePtr a = AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET)); + AttributePtr c = AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE)); + AttributePtr d(new search::WeightedSetStringExtAttribute("wsextstr")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsextstr"); + for (const Config & cfg : cfgList) { + AttributePtr baf = AttributeFactory::createAttribute(cfg.name, AVC(cfg.dataType, + cfg.collectionType, + cfg.fastSearch)); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, + cfg.collectionType==AVCT::ARRAY + ? CollectionType::ARRAY + : CollectionType::WEIGHTEDSET, + cfg.name); + baf->addReservedDoc(); + baf->addDocs(2); + ft.getIndexEnv().getAttributeManager().add(baf); + for (size_t i(1); i < 6; i++) { + IntegerAttribute * ia = dynamic_cast(baf.get()); + if (ia) { + ia->append(1, i, i); + } else { + FloatingPointAttribute * fa = dynamic_cast(baf.get()); + fa->append(1, i, i); + } + } + baf->commit(); + } + + a->addReservedDoc(); + c->addReservedDoc(); + a->addDocs(2); + c->addDocs(2); + ft.getIndexEnv().getAttributeManager().add(a); + ft.getIndexEnv().getAttributeManager().add(c); + ft.getIndexEnv().getAttributeManager().add(d); + + StringAttribute * sa = static_cast(a.get()); + sa->append(1, "a", 1); + sa->append(1, "b", 2); + sa->append(1, "c", 3); + sa->append(1, "d", 4); + sa->append(1, "e", 5); + + WeightedSetStringExtAttribute * ea = static_cast(d.get()); + EXPECT_TRUE(!ea->hasEnum()); + uint32_t docId; + ea->addDoc(docId); // reserved doc + ea->addDoc(docId); + ea->add("a", 10); + ea->add("b", 20); + ea->add("c", 30); + ea->add("d", 40); + ea->add("e", 50); + ea->addDoc(docId); + + a->commit(); + c->commit(); +} + +void +Test::testNow() +{ + { + // Test blueprint. + NowBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "now")); + + StringList params, in, out; + FT_SETUP_OK (pt, params, in, out.add("out")); + FT_SETUP_FAIL(pt, params.add("foo")); + + FT_DUMP(_factory, "now", StringList().add("now")); + } + + { + // Test executor. + FtFeatureTest ft(_factory, "now"); + ASSERT_TRUE(ft.setup()); + + RankResult res; + res.addScore("now", 0.0f); + for (uint32_t i = 1; i <= 10; ++i) { + feature_t last = res.getScore("now"); + res.clear(); + ASSERT_TRUE(ft.executeOnly(res, i)); + ASSERT_TRUE(last <= res.getScore("now")); + } + } + + { + // Test executor with ms resolution + FtFeatureTest ft(_factory, "now"); + ft.getQueryEnv().getProperties().add("vespa.now", "15000000000"); + ASSERT_TRUE(ft.setup()); + + RankResult res; + ASSERT_TRUE(ft.executeOnly(res, 0)); + feature_t now = 15000000000; + ASSERT_EQUAL(now, res.getScore("now")); + } +} + + +void +Test::testMatch() +{ + { // Test blueprint. + MatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "match")); + + FtFeatureTest ft(_factory, ""); + setupForAttributeTest(ft); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::INDEX, CollectionType::ARRAY, "bar") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight")); + FT_SETUP_OK(pt, idx_env, params, in + .add("fieldMatch(foo)") + .add("elementCompleteness(bar)") + .add("elementCompleteness(baz)") + .add("attributeMatch(sint)") + .add("attributeMatch(aint)") + .add("attributeMatch(wsint)"), out + .add("weight.foo") + .add("weight.bar") + .add("weight.baz") + .add("weight.sint") + .add("weight.aint") + .add("weight.wsint")); + FT_SETUP_FAIL(pt, idx_env, params.add("1")); // expects 0 parameters + + FT_DUMP_EMPTY(_factory, "match"); + } + + { // Test executor + FtFeatureTest ft(_factory, "match"); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", "100"); // assign weight to all fields, simulate sd behaviour + ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", "200"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.sint", "300"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.aint", "400"); + + // search in field 'foo' + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0 + + // search in field 'sint' + ft.getQueryEnv().getBuilder().addAttributeNode("sint"); // term id 1 + setupForAttributeTest(ft, false); + + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add hit for field 'foo' for search term 0 + ASSERT_TRUE(mdb->setFieldLength("foo", 1)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); + ASSERT_TRUE(mdb->setWeight("sint", 1, 0)); + ASSERT_TRUE(mdb->apply(1)); + + RankResult rr = toRankResult("match", "score:1 totalWeight:400 weight.foo:100 weight.bar:200 weight.baz:100 weight.sint:300 weight.aint:400 weight.wsint:100"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr)); + } + + { // Test executor + FtFeatureTest ft(_factory, "match"); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // search in field 'foo' + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0 + ASSERT_TRUE(ft.setup()); + + // must create this so that term match data is configured with the term data object + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // no hits on docId 1 + RankResult rr = toRankResult("match", "score:0 totalWeight:0 weight.foo:100"); + ASSERT_TRUE(ft.execute(rr, 1)); + } +} + +void +Test::testMatches() +{ + { // Test blueprint. + MatchesBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "matches")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + + StringList params, in, out; + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // expects 1-2 parameters + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params.add("baz")); // cannot find the field + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo"), in, out.add("out")); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), in, out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out); + + FT_DUMP_EMPTY(_factory, "matches"); + } + { // Test executor for index fields + EXPECT_TRUE(assertMatches(0, "x", "a")); + EXPECT_TRUE(assertMatches(1, "a", "a")); + EXPECT_TRUE(assertMatches(1, "a b", "a b")); + // change docId to indicate no matches in the field + EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo)", 2)); + // specify termIdx as second parameter + EXPECT_TRUE(assertMatches(0, "x", "a", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(1, "a", "a", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo,1)")); + EXPECT_TRUE(assertMatches(0, "x b", "a b", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(1, "x b", "a b", "matches(foo,1)")); + } + { // Test executor for attribute fields + FtFeatureTest ft(_factory, StringList().add("matches(foo)"). + add("matches(baz)"). + add("matches(foo,0)"). + add("matches(foo,1)"). + add("matches(foo,2)"). + add("matches(foo,3)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 0, hit in foo + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL); // query term 1, hit in bar + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 2, hit in foo + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->setWeight("bar", 1, 0); + mdb->apply(1); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo)", 1))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(baz)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,0)", 1))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,1)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,2)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,3)", 0))); + } +} + +bool +Test::assertMatches(uint32_t output, + const vespalib::string & query, + const vespalib::string & field, + const vespalib::string & feature, + uint32_t docId) +{ + LOG(info, "assertMatches(%u, '%s', '%s', '%s')", output, query.c_str(), field.c_str(), feature.c_str()); + + // Setup feature test. + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + std::map > index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + ASSERT_TRUE(ft.execute(output, EPS, docId)); + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(output, EPS, docId))) return false; + return true; +} + + +void +Test::testQuery() +{ + { // Test blueprint. + QueryBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "query")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("foo"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "query"); + } + + { // Test executor. + RankResult exp; + exp.addScore("query(def1)", 1.0). + addScore("query(def2)", 2.0). + addScore("query(def3)", 0.0). + addScore("query(val1)", 1.1). + addScore("query(val2)", 2.2). + addScore("query(hash1)", vespalib::hash_code("foo")). + addScore("query(hash2)", vespalib::hash_code("2")). + addScore("query(hash3)", vespalib::hash_code("foo")). + addScore("query(hash4)", vespalib::hash_code("'foo")); + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getProperties() + .add("query(def1)", "1.0") + .add("$def2", "2.0"); + ft.getQueryEnv().getProperties() + .add("val1", "1.1") + .add("$val2", "2.2") + .add("hash1", "foo") + .add("hash2", "'2") + .add("hash3", "'foo") + .add("hash4", "''foo"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } +} + +void +Test::testQueryTermCount() +{ + { // Test blueprint. + QueryTermCountBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "queryTermCount")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_FAIL(pt, params.add("foo")); + + StringList dump; + FT_DUMP(_factory, "queryTermCount", dump.add("queryTermCount")); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 0))); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 1))); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 2))); + } +} + +void +Test::testRandom() +{ + { // Test blueprint. + RandomBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "random")); + + StringList params, in, out; + FT_SETUP_OK (pt, params, in, out.add("out").add("match")); + FT_SETUP_OK (pt, params.add("1"), in, out); + FT_SETUP_FAIL(pt, params.add("2")); + + FT_DUMP_EMPTY(_factory, "random"); + } + + { // Test executor (seed specified through config) + FtFeatureTest ft(_factory, "random"); + ft.getIndexEnv().getProperties().add("random.seed", "100"); + ASSERT_TRUE(ft.setup()); + search::Rand48 rnd; + rnd.srand48(100); + for (uint32_t i = 0; i < 5; ++i) { + feature_t exp = rnd.lrand48() / (feature_t)0x80000000u; + ASSERT_TRUE(ft.execute(exp, EPS, i + 1)); + } + } + { // Test executor (current time used as seed) + FtFeatureTest ft(_factory, "random"); + ASSERT_TRUE(ft.setup()); + RankResult rr; + rr.addScore("random", 1.0f); + for (uint32_t i = 0; i < 5; ++i) { + feature_t last = rr.getScore("random"); + rr.clear(); + ASSERT_TRUE(ft.executeOnly(rr, i + 1)); + ASSERT_TRUE(last != rr.getScore("random")); + } + } + { // Test executor (random.match) + FtFeatureTest ft(_factory, "random.match"); + ft.getQueryEnv().getProperties().add("random.match.seed", "100"); + ASSERT_TRUE(ft.setup()); + search::Rand48 rnd; + for (uint32_t i = 1; i <= 5; ++i) { + rnd.srand48(100 + i); // seed + lid + feature_t exp = rnd.lrand48() / (feature_t)0x80000000u; + ASSERT_TRUE(ft.execute(exp, EPS, i)); + } + } +} + + +void +Test::testRankingExpression() +{ + { // Test blueprint. + RankingExpressionBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "rankingExpression")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); // requires config to run without params + FT_SETUP_OK (prototype, params.add("foo.out"), in.add("foo.out"), out.add("out")); + FT_SETUP_FAIL(prototype, params.add("bar.out")); + FT_SETUP_OK (prototype, params.clear().add("log((1 + 2)- 3 * 4 / 5 )"), in.clear(), out); + FT_SETUP_OK (prototype, + params.clear().add("if(if(f1.out<1,0,1)3+4,1,0)")); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(1.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression"); + ft.getIndexEnv().getProperties().add("rankingExpression.rankingScript", "if(1<2,3,4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression(foo)"); + ft.getIndexEnv().getProperties().add("rankingExpression(foo).rankingScript", "if(1<2,3,4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression"); + ft.getIndexEnv().getProperties() + .add("rankingExpression.rankingScript", "if(") + .add("rankingExpression.rankingScript", "1<") + .add("rankingExpression.rankingScript", "2,") + .add("rankingExpression.rankingScript", "3,") + .add("rankingExpression.rankingScript", "4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + // test interpreted expression + vespalib::string my_expr("3.0 + value(4.0) + sum(tensorFromWeightedSet(query(my_tensor)))"); + FtFeatureTest ft(_factory, getExpression(my_expr)); + ft.getQueryEnv().getProperties().add("my_tensor", "{a:1,b:2,c:3}"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(13.0)); + } + } +} + +vespalib::string +Test::getExpression(const vespalib::string ¶meter) const +{ + typedef search::fef::FeatureNameBuilder FNB; + return FNB().baseName("rankingExpression").parameter(parameter).buildName(); +} + +void +Test::testTerm() +{ + { + // Test blueprint. + TermBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "term")); + + StringList params, in, out; + FT_SETUP_OK (pt, params.add("0"), in, out.add("connectedness").add("significance").add("weight")); + FT_SETUP_FAIL(pt, params.add("1")); + } + { + StringList dump; + for (uint32_t term = 0; term < 3; ++term) { + vespalib::string bn = vespalib::make_string("term(%u)", term); + dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight"); + } + FtIndexEnvironment ie; + ie.getProperties().add("term.numTerms", "3"); + FT_DUMP(_factory, "term", ie, dump); // check override + + for (uint32_t term = 3; term < 5; ++term) { + vespalib::string bn = vespalib::make_string("term(%u)", term); + dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight"); + } + FT_DUMP(_factory, "term", dump); // check default + } + } + + { + // Test executor. + FtFeatureTest ft(_factory, "term(0)"); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp .addScore("term(0).connectedness", 0) + .addScore("term(0).significance", 0) + .addScore("term(0).weight", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, StringList().add("term(1)").add("term(2)")); + ft.getIndexEnv().getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "idx1") // field 0 + .addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1 + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2 + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0); + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1).setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(0.5); + ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2).setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(0.25); + // setup connectedness between term 1 and term 0 + ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0"); + ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7"); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("term(1).significance", util::getSignificance(0.50)). + addScore("term(1).weight", 200.0f). + addScore("term(1).connectedness", 0.7f). + addScore("term(2).significance", util::getSignificance(0.25)). + addScore("term(2).weight", 400.0f). + addScore("term(2).connectedness", 0.1f). // default connectedness + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, "term(0)"); + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0); + // setup significance for term 0 + ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.3"); + ASSERT_TRUE(ft.setup()); + + ASSERT_TRUE(ft.execute(RankResult().addScore("term(0).significance", 0.3f).setEpsilon(10e-6))); + } +} + +void +Test::testTermDistance() +{ + { // test blueprint + TermDistanceBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "termDistance")); + + StringList params, in, out; + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, ie, params.add("baz").add("0").add("0")); + FT_SETUP_FAIL(pt, ie, params.clear().add("bar").add("0").add("0")); + + FT_SETUP_OK(pt, ie, params.clear().add("foo").add("0").add("0"), + in, out.add("forward").add("forwardTermPosition") + .add("reverse").add("reverseTermPosition")); + } + { + FT_DUMP_EMPTY(_factory, "termDistance"); + } + } + + { // test executor + typedef TermDistanceCalculator::Result Result; + const uint32_t UV = TermDistanceCalculator::UNDEFINED_VALUE; + + EXPECT_TRUE(assertTermDistance(Result(), "a b", "x x")); + EXPECT_TRUE(assertTermDistance(Result(), "a b", "a x")); + EXPECT_TRUE(assertTermDistance(Result(), "a b", "x b")); + EXPECT_TRUE(assertTermDistance(Result(), "a", "a b")); + EXPECT_TRUE(assertTermDistance(Result(), "a", "a a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,UV,UV), "a b", "a b")); + EXPECT_TRUE(assertTermDistance(Result(2,0,UV,UV), "a b", "a x b")); + EXPECT_TRUE(assertTermDistance(Result(UV,UV,1,0), "a b", "b a")); + EXPECT_TRUE(assertTermDistance(Result(UV,UV,2,0), "a b", "b x a")); + EXPECT_TRUE(assertTermDistance(Result(2,18,1,20), "a b", "a x x x x x b x x x x a x x x b x x a x b a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,2,1), "a b", "a b x a x x b x x x a x x x x b x x x x x a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,1,1), "a b", "a b a b a")); // first best is kept + EXPECT_TRUE(assertTermDistance(Result(1,0,1,0), "a a", "a a")); + EXPECT_TRUE(assertTermDistance(Result(2,0,2,0), "a a", "a x a")); + } +} + +bool +Test::assertTermDistance(const TermDistanceCalculator::Result & exp, + const vespalib::string & query, + const vespalib::string & field, + uint32_t docId) +{ + LOG(info, "assertTermDistance('%s', '%s')", query.c_str(), field.c_str()); + + vespalib::string feature = "termDistance(foo,0,1)"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + RankResult rr; + rr.addScore(feature + ".forward", exp.forwardDist); + rr.addScore(feature + ".forwardTermPosition", exp.forwardTermPos); + rr.addScore(feature + ".reverse", exp.reverseDist); + rr.addScore(feature + ".reverseTermPosition", exp.reverseTermPos); + if (!EXPECT_TRUE(ft.execute(rr, docId))) { + return false; + } + return true; +} + +void +Test::testUtils() +{ + { // getSignificance + EXPECT_APPROX(util::getSignificance(0.0), 1, EPS); + EXPECT_APPROX(util::getSignificance(0.0 + 1.0e-7), 1, EPS); + EXPECT_APPROX(util::getSignificance(1.0), 0.5, EPS); + EXPECT_APPROX(util::getSignificance(1.0 + 1.0e-7), 0.5, EPS); + feature_t last = 1; + for (uint32_t i = 2; i <= 100; i = i + 1) { + feature_t s = util::getSignificance(i * 1.0e-6); + EXPECT_GREATER(s, 0); + EXPECT_LESS(s, 1); + EXPECT_LESS(s, last); + last = s; + } + for (uint32_t i = 999900; i <= 1000000; i = i + 1) { + feature_t s = util::getSignificance(i * 1.0e-6); + EXPECT_GREATER(s, 0); + EXPECT_LESS(s, 1); + EXPECT_LESS(s, last); + last = s; + } + } +} + diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h new file mode 100644 index 00000000000..dd15981af1f --- /dev/null +++ b/searchlib/src/tests/features/prod_features.h @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +class Test : public FtTestApp +{ +public: + int Main(); + void testFramework(); + void testFtLib(); + void testAge(); + void testAttribute(); + void testAttributeMatch(); + void testCloseness(); + void testDistance(); + void testDistanceToPath(); + void testDotProduct(); + void testFieldLength(); + void testFieldMatch(); + void testFieldTermMatch(); + void testFirstPhase(); + void testForeach(); + void testFreshness(); + void testMatch(); + void testMatches(); + void testNow(); + void testQuery(); + void testQueryTermCount(); + void testRandom(); + void testRankingExpression(); + void testTerm(); + void testTermDistance(); + void testUtils(); + +private: + void + testFieldMatchBluePrint(); + + void + testFieldMatchExecutor(); + + void + testFieldMatchExecutorOutOfOrder(); + + void + testFieldMatchExecutorSegments(); + + void + testFieldMatchExecutorGaps(); + + void + testFieldMatchExecutorHead(); + + void + testFieldMatchExecutorTail(); + + void + testFieldMatchExecutorLongestSequence(); + + void + testFieldMatchExecutorMatches(); + + void + testFieldMatchExecutorCompleteness(); + + void + testFieldMatchExecutorOrderness(); + + void + testFieldMatchExecutorRelatedness(); + + void + testFieldMatchExecutorLongestSequenceRatio(); + + void + testFieldMatchExecutorEarliness(); + + void + testFieldMatchExecutorWeight(); + + void + testFieldMatchExecutorSignificance(); + + void + testFieldMatchExecutorImportance(); + + void + testFieldMatchExecutorOccurrence(); + + void + testFieldMatchExecutorAbsoluteOccurrence(); + + void + testFieldMatchExecutorWeightedOccurrence(); + + void + testFieldMatchExecutorWeightedAbsoluteOccurrence(); + + void + testFieldMatchExecutorSignificantOccurrence(); + + void + testFieldMatchExecutorUnweightedProximity(); + + void + testFieldMatchExecutorReverseProximity(); + + void + testFieldMatchExecutorAbsoluteProximity(); + + void + testFieldMatchExecutorMultiSegmentProximity(); + + void + testFieldMatchExecutorSegmentDistance(); + + void + testFieldMatchExecutorSegmentProximity(); + + void + testFieldMatchExecutorSegmentStarts(); + + void + testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery(); + + void + testFieldMatchExecutorQueryRepeats(); + + void + testFieldMatchExecutorZeroCases(); + + void + testFieldMatchExecutorExceedingIterationLimit(); + + void + testFieldMatchExecutorRemaining(); + + + void assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime); + void setupForAgeTest(FtFeatureTest & ft, uint64_t docTime); + void setupForAttributeTest(FtFeatureTest &ft, bool setup_env = true); + void assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance = 0, double halfResponse = 0); + void setupForDistanceTest(FtFeatureTest & ft, const vespalib::string & attrName, + const std::vector > & positions, bool zcurve); + void assert2DZDistance(feature_t exp, const vespalib::string & positions, + int32_t xquery, int32_t yquery, uint32_t xAspect = 0); + void assertDistanceToPath(const std::vector > pos, const vespalib::string &path, + feature_t distance = search::features::DistanceToPathExecutor::DEFAULT_DISTANCE, + feature_t traveled = 1, feature_t product = 0); + void setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType); + void assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId = 1, + const vespalib::string & attribute = "wsstr", const vespalib::string & attributeOverride=""); + void setupForDotProductTest(FtFeatureTest & ft); + void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + const search::features::fieldmatch::Params * params = NULL, uint32_t totalTermWeight = 0, feature_t totalSignificance = 0.0f); + void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + uint32_t totalTermWeight); + void assertFieldMatchTS(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + feature_t totalSignificance); + vespalib::string getExpression(const vespalib::string ¶meter) const; + void assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op); + void assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge = 0, double halfResponse = 0, bool logScale = false); + bool assertTermDistance(const search::features::TermDistanceCalculator::Result & exp, const vespalib::string & query, + const vespalib::string & field, uint32_t docId = 1); + bool assertMatches(uint32_t output, const vespalib::string & query, const vespalib::string & field, + const vespalib::string & feature = "matches(foo)", uint32_t docId = 1); + +private: + search::fef::BlueprintFactory _factory; +}; + diff --git a/searchlib/src/tests/features/prod_features_attributematch.cpp b/searchlib/src/tests/features/prod_features_attributematch.cpp new file mode 100644 index 00000000000..06b2b859709 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_attributematch.cpp @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".prod_features_attributematch"); + +#include "prod_features.h" +#include +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; + +typedef AttributeVector::SP AttributePtr; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + + +void +Test::testAttributeMatch() +{ + AttributeMatchBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "attributeMatch")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); // expects 1 param + FT_SETUP_FAIL(pt, params.add("foo")); // field must exists + + FtIndexEnvironment idx_env; + idx_env.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(pt, idx_env, params); // field must be an attribute + idx_env.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + + FT_SETUP_OK(pt, idx_env, params.clear().add("sint"), in, out + .add("completeness") + .add("queryCompleteness") + .add("fieldCompleteness") + .add("normalizedWeight") + .add("normalizedWeightedWeight") + .add("weight") + .add("significance") + .add("importance") + .add("matches") + .add("totalWeight") + .add("averageWeight")); + + FT_DUMP_EMPTY(_factory, "attributeMatch"); + + FT_DUMP(_factory, "attributeMatch", idx_env, out.clear() + .add("attributeMatch(sint)") + .add("attributeMatch(sint).completeness") + .add("attributeMatch(sint).queryCompleteness") + .add("attributeMatch(sint).fieldCompleteness") + .add("attributeMatch(sint).normalizedWeight") + .add("attributeMatch(sint).normalizedWeightedWeight") + .add("attributeMatch(sint).weight") + .add("attributeMatch(sint).significance") + .add("attributeMatch(sint).importance") + .add("attributeMatch(sint).matches") + .add("attributeMatch(sint).totalWeight") + .add("attributeMatch(sint).averageWeight")); + } + + { // single attributes + FtFeatureTest ft(_factory, StringList(). + add("attributeMatch(sint)").add("attributeMatch(sfloat)").add("attributeMatch(sstr)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); // 2 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat"); // 1 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr"); // 0 matches + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 0, hit in sint + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 1, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 2, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 3, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sfloat") != NULL); // query term 4, hit in sfloat + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL); + ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[0].setUniqueId(0); + ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[1].setUniqueId(1); + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(10)); + ft.getQueryEnv().getTerms()[2].setUniqueId(1); + ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(10)); + ft.getQueryEnv().getTerms()[3].setUniqueId(1); + ft.getQueryEnv().getTerms()[4].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[4].setUniqueId(1); + ft.getQueryEnv().getTerms()[5].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[5].setUniqueId(1); + ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.5"); // change significance for term 0 + ft.getQueryEnv().getProperties().add("vespa.term.1.significance", "0.1"); // change significance for all other terms + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("sint", 0, 0); + mdb->setWeight("sint", 1, 0); + mdb->setWeight("sfloat", 4, 0); + mdb->apply(1); + RankResult exp; + exp.addScore("attributeMatch(sint)", 0.5f). // same as completeness + addScore("attributeMatch(sint).matches", 2). + addScore("attributeMatch(sint).totalWeight", 0). + addScore("attributeMatch(sint).averageWeight", 0). + addScore("attributeMatch(sint).completeness", 0.5f). + addScore("attributeMatch(sint).queryCompleteness", 0.5f). + addScore("attributeMatch(sint).fieldCompleteness", 1). + addScore("attributeMatch(sint).normalizedWeight", 0). + addScore("attributeMatch(sint).normalizedWeightedWeight", 0). + addScore("attributeMatch(sint).weight", 0.4). + addScore("attributeMatch(sint).significance", 0.6). + addScore("attributeMatch(sint).importance", 0.5). + addScore("attributeMatch(sfloat)", 1). // same as completeness + addScore("attributeMatch(sfloat).matches", 1). + addScore("attributeMatch(sfloat).totalWeight", 0). + addScore("attributeMatch(sfloat).averageWeight", 0). + addScore("attributeMatch(sfloat).completeness", 1). + addScore("attributeMatch(sfloat).queryCompleteness", 1). + addScore("attributeMatch(sfloat).fieldCompleteness", 1). + addScore("attributeMatch(sfloat).normalizedWeight", 0). + addScore("attributeMatch(sfloat).normalizedWeightedWeight", 0). + addScore("attributeMatch(sfloat).weight", 0.2). + addScore("attributeMatch(sfloat).significance", 0.1). + addScore("attributeMatch(sfloat).importance", 0.15). + addScore("attributeMatch(sstr)", 0). // same as completeness + addScore("attributeMatch(sstr).matches", 0). + addScore("attributeMatch(sstr).totalWeight", 0). + addScore("attributeMatch(sstr).averageWeight", 0). + addScore("attributeMatch(sstr).completeness", 0). + addScore("attributeMatch(sstr).queryCompleteness", 0). + addScore("attributeMatch(sstr).fieldCompleteness", 0). + addScore("attributeMatch(sstr).normalizedWeight", 0). + addScore("attributeMatch(sstr).normalizedWeightedWeight", 0). + addScore("attributeMatch(sstr).weight", 0). + addScore("attributeMatch(sstr).significance", 0). + addScore("attributeMatch(sstr).importance", 0). + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + } + + { // array attributes + + FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); // 1 matches + ft.getIndexEnv().getProperties().add("attributeMatch(aint).fieldCompletenessImportance", "0.5"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL); // 0 + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("aint", 0, 0); + mdb->apply(1); + RankResult exp; + exp.addScore("attributeMatch(aint)", 0.75f) // same as completeness + .addScore("attributeMatch(aint).matches", 1) + .addScore("attributeMatch(aint).totalWeight", 0) + .addScore("attributeMatch(aint).averageWeight", 0) + .addScore("attributeMatch(aint).completeness", 0.75f) + .addScore("attributeMatch(aint).queryCompleteness", 1) + .addScore("attributeMatch(aint).fieldCompleteness", 0.5f) + .addScore("attributeMatch(aint).normalizedWeight", 0) + .addScore("attributeMatch(aint).normalizedWeightedWeight", 0); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + } + + { // weighted set attributes + FtFeatureTest ft(_factory, StringList(). + add("attributeMatch(wsint)").add("attributeMatch(wsfloat)").add("attributeMatch(wsstr)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); // 2 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat"); // 1 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); // 0 matches + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getProperties().add("attributeMatch(wsint).maxWeight", "100"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 0 + ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(2)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 1 + ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(3)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsfloat") != NULL); // 2 + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(0)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL); + ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(0)); + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("wsint", 0, 10); + mdb->setWeight("wsint", 1, 20); + mdb->setWeight("wsfloat", 2, -30); + mdb->apply(1); + RankResult exp; + + // test all three attributes + exp.addScore("attributeMatch(wsint)", 1). // same as completeness + addScore("attributeMatch(wsint).matches", 2). + addScore("attributeMatch(wsint).totalWeight", 30). + addScore("attributeMatch(wsint).averageWeight", 15). + addScore("attributeMatch(wsint).completeness", 1). + addScore("attributeMatch(wsint).queryCompleteness", 1). + addScore("attributeMatch(wsint).fieldCompleteness", 1). + addScore("attributeMatch(wsint).normalizedWeight", 0.1f). + addScore("attributeMatch(wsint).normalizedWeightedWeight", 0.16f). + addScore("attributeMatch(wsfloat)", 0.95). // same as completeness + addScore("attributeMatch(wsfloat).matches", 1). + addScore("attributeMatch(wsfloat).totalWeight", -30). + addScore("attributeMatch(wsfloat).averageWeight", -30). + addScore("attributeMatch(wsfloat).completeness", 0.95). + addScore("attributeMatch(wsfloat).queryCompleteness", 1). + addScore("attributeMatch(wsfloat).fieldCompleteness", 0). + addScore("attributeMatch(wsfloat).normalizedWeight", 0). + addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 0). + addScore("attributeMatch(wsstr)", 0). // same as completeness + addScore("attributeMatch(wsstr).matches", 0). + addScore("attributeMatch(wsstr).totalWeight", 0). + addScore("attributeMatch(wsstr).averageWeight", 0). + addScore("attributeMatch(wsstr).completeness", 0). + addScore("attributeMatch(wsstr).queryCompleteness", 0). + addScore("attributeMatch(wsstr).fieldCompleteness", 0). + addScore("attributeMatch(wsstr).normalizedWeight", 0). + addScore("attributeMatch(wsstr).normalizedWeightedWeight", 0). + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + + // test fieldCompleteness + mdb->setWeight("wsint", 0, 0); + mdb->setWeight("wsint", 1, 15); + mdb->apply(1); + exp.clear(). + addScore("attributeMatch(wsint).fieldCompleteness", 0.5f); + ASSERT_TRUE(ft.execute(exp)); + + // test that normalized values lies in the interval [0,1]. + mdb->setWeight("wsfloat", 2, 1000); + mdb->apply(1); + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(100)); + exp.clear(). + addScore("attributeMatch(wsfloat).normalizedWeight", 1). + addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 1); + ASSERT_TRUE(ft.execute(exp)); + } + + { // unique only attribute + FtFeatureTest ft(_factory, "attributeMatch(unique)"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("unique") != NULL); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("attributeMatch(unique)", 0). // same as completeness + addScore("attributeMatch(unique).matches", 0). + addScore("attributeMatch(unique).totalWeight", 0). + addScore("attributeMatch(unique).averageWeight", 0). + addScore("attributeMatch(unique).completeness", 0). + addScore("attributeMatch(unique).queryCompleteness", 0). + addScore("attributeMatch(unique).fieldCompleteness", 0). + addScore("attributeMatch(unique).normalizedWeight", 0). + addScore("attributeMatch(unique).normalizedWeightedWeight", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)").add("attributeMatch(wint)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wint"); + + // setup an array and wset attributes with 0 elements + AttributePtr aint = AttributeFactory::createAttribute("aint", AVC (AVBT::INT32, AVCT::ARRAY)); + AttributePtr wint = AttributeFactory::createAttribute("wint", AVC(AVBT::INT32, AVCT::WSET)); + aint->addReservedDoc(); + wint->addReservedDoc(); + ft.getIndexEnv().getAttributeManager().add(aint); + ft.getIndexEnv().getAttributeManager().add(wint); + aint->addDocs(1); + aint->commit(); + ASSERT_TRUE(aint->getValueCount(0) == 0); + wint->addDocs(1); + wint->commit(); + ASSERT_TRUE(wint->getValueCount(0) == 0); + + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wint") != NULL); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("attributeMatch(aint)", 0). // same as completeness + addScore("attributeMatch(aint).completeness", 0). + addScore("attributeMatch(aint).fieldCompleteness", 0). + addScore("attributeMatch(wint)", 0). // same as completeness + addScore("attributeMatch(wint).completeness", 0). + addScore("attributeMatch(wint).fieldCompleteness", 0); + ASSERT_TRUE(ft.execute(exp)); + } +} diff --git a/searchlib/src/tests/features/prod_features_fieldmatch.cpp b/searchlib/src/tests/features/prod_features_fieldmatch.cpp new file mode 100644 index 00000000000..e26d6a92fa6 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_fieldmatch.cpp @@ -0,0 +1,1079 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".prod_features_fieldmatch"); + +#include + +#include "prod_features.h" + +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; + +void +Test::testFieldMatch() +{ + testFieldMatchBluePrint(); + testFieldMatchExecutor(); +} + + +void +Test::testFieldMatchBluePrint() +{ + FieldMatchBlueprint pt; + StringList out; + out.add("score"). + add("proximity"). + add("completeness"). + add("queryCompleteness"). + add("fieldCompleteness"). + add("orderness"). + add("relatedness"). + add("earliness"). + add("longestSequenceRatio"). + add("segmentProximity"). + add("unweightedProximity"). + add("absoluteProximity"). + add("occurrence"). + add("absoluteOccurrence"). + add("weightedOccurrence"). + add("weightedAbsoluteOccurrence"). + add("significantOccurrence"). + + add("weight"). + add("significance"). + add("importance"). + + add("segments"). + add("matches"). + add("outOfOrder"). + add("gaps"). + add("gapLength"). + add("longestSequence"). + add("head"). + add("tail"). + add("segmentDistance"). + add("degradedMatches"); + { + EXPECT_TRUE(assertCreateInstance(pt, "fieldMatch")); + + StringList params, in; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("bar")); + params.clear(); + + { + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_SETUP_FAIL(pt, ie, params.add("foo")); + FT_SETUP_FAIL(pt, ie, params.add("abar")); + FT_SETUP_FAIL(pt, ie, params.add("wbar")); + + FT_SETUP_OK(pt, ie, params.clear().add("bar"), in, out); + } + + { // test illegal proximity table + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + Properties & p = ie.getProperties(); + p.add("fieldMatch(foo).proximityLimit", "1"); + + // too few elements, should be 3 (1*2 + 1) + p.add("fieldMatch(foo).proximityTable", "0.5"); + p.add("fieldMatch(foo).proximityTable", "1.0"); + FT_SETUP_FAIL(pt, ie, params); + + // too many elements, should be 3 (1*2 + 1) + p.add("fieldMatch(foo).proximityTable", "1.0"); + p.add("fieldMatch(foo).proximityTable", "0.5"); + FT_SETUP_FAIL(pt, ie, params); + } + } + { // test dumping with a regular index field + FT_DUMP_EMPTY(_factory, "fieldMatch"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be an index field + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + vespalib::string bn = "fieldMatch(bar)"; + dump.add(bn); + for (uint32_t i = 1; i < out.size(); ++i) { + dump.add(bn + "." + out[i]); + } + FT_DUMP(_factory, "fieldMatch", ie, dump); + } + + { // test dumping with a filter index field + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getFields()[0].setFilter(true); + + StringList dump; + vespalib::string bn = "fieldMatch(foo)"; + dump.add(bn); + dump.add(bn + ".completeness"); + dump.add(bn + ".queryCompleteness"); + dump.add(bn + ".weight"); + dump.add(bn + ".matches"); + dump.add(bn + ".degradedMatches"); + FT_DUMP(_factory, "fieldMatch", ie, dump); + } +} + + +void +Test::testFieldMatchExecutor() +{ + testFieldMatchExecutorOutOfOrder(); + testFieldMatchExecutorSegments(); + testFieldMatchExecutorGaps(); + testFieldMatchExecutorHead(); + testFieldMatchExecutorTail(); + testFieldMatchExecutorLongestSequence(); + testFieldMatchExecutorMatches(); + testFieldMatchExecutorCompleteness(); + testFieldMatchExecutorOrderness(); + testFieldMatchExecutorRelatedness(); + testFieldMatchExecutorLongestSequenceRatio(); + testFieldMatchExecutorEarliness(); + testFieldMatchExecutorWeight(); + testFieldMatchExecutorSignificance(); + testFieldMatchExecutorImportance(); + testFieldMatchExecutorOccurrence(); + testFieldMatchExecutorAbsoluteOccurrence(); + testFieldMatchExecutorWeightedOccurrence(); + testFieldMatchExecutorWeightedAbsoluteOccurrence(); + testFieldMatchExecutorSignificantOccurrence(); + testFieldMatchExecutorUnweightedProximity(); + testFieldMatchExecutorReverseProximity(); + testFieldMatchExecutorAbsoluteProximity(); + testFieldMatchExecutorMultiSegmentProximity(); + testFieldMatchExecutorSegmentDistance(); + testFieldMatchExecutorSegmentProximity(); + testFieldMatchExecutorSegmentStarts(); + testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery(); + testFieldMatchExecutorQueryRepeats(); + testFieldMatchExecutorZeroCases(); + testFieldMatchExecutorExceedingIterationLimit(); + testFieldMatchExecutorRemaining(); +} + + +void +Test::testFieldMatchExecutorOutOfOrder() +{ + assertFieldMatch("outOfOrder:0","a","a"); + assertFieldMatch("outOfOrder:0","a b c","a b c"); + assertFieldMatch("outOfOrder:1","a b c","a c b"); + assertFieldMatch("outOfOrder:2","a b c","c b a"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); +} + + +void +Test::testFieldMatchExecutorSegments() +{ + assertFieldMatch("segments:1","a","a"); + assertFieldMatch("segments:1","a b c","a b c"); + assertFieldMatch("segments:1","a b c","a x x b c"); + assertFieldMatch("segments:2","a b c","a x x x x x x x x x x x x x x x x x x x b c"); + assertFieldMatch("segments:2","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertFieldMatch("segments:2 gaps:1","a b c","x x x a x x x x x x x x x x x x x x x x x x x b x x c x x"); + assertFieldMatch("segments:2 gaps:0 outOfOrder:0","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertFieldMatch("segments:2 gaps:1","a b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); + assertFieldMatch("segments:2 gaps:1","a y y b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); +} + + +void +Test::testFieldMatchExecutorGaps() +{ + assertFieldMatch("gaps:0","a","a"); + assertFieldMatch("gaps:0","x�a","a"); // TODO: which char ? + assertFieldMatch("gaps:0 gapLength:0","a b c","a b c"); + assertFieldMatch("gaps:1 gapLength:1","a b","b a"); + assertFieldMatch("gaps:1 gapLength:1","a b c","a x b c"); + assertFieldMatch("gaps:1 gapLength:3","a b c","a x X Xb c"); + assertFieldMatch("gaps:2 gapLength:2 outOfOrder:1","a b c","a c b"); + assertFieldMatch("gaps:2 gapLength:2 outOfOrder:0","a b c","a x b x c"); + assertFieldMatch("gaps:2 gapLength:5 outOfOrder:1","a b c","a x c x b"); + assertFieldMatch("gaps:3 outOfOrder:2 segments:1","a b c d e","x d x x b c x x a e"); + assertFieldMatch("gaps:0","y a b c","a b c x"); +} + + +void +Test::testFieldMatchExecutorHead() +{ + assertFieldMatch("head:0","a","a"); + //assertFieldMatch("head:0","y","a"); // no hit, executor will not run + assertFieldMatch("head:1","a","x a"); + assertFieldMatch("head:2","a b c","x x a b c"); + assertFieldMatch("head:2","a b c","x x c x x a b"); + assertFieldMatch("head:2","a b c","x x c x x x x x x x x x x x x x x x a b"); +} + + +void +Test::testFieldMatchExecutorTail() +{ + assertFieldMatch("tail:0","a","a"); + //assertFieldMatch("tail:0","y","a"); // no hit, executor will not run + assertFieldMatch("tail:1","a","a x"); + assertFieldMatch("tail:2","a b c","a b c x x"); + assertFieldMatch("tail:2","a b c","x x x c x x x x a b x x"); + assertFieldMatch("tail:0","a b c","x x c x x x x x x x x x x x x x x x a b"); +} + +void +Test::testFieldMatchExecutorLongestSequence() +{ + assertFieldMatch("longestSequence:1","a","a"); + assertFieldMatch("longestSequence:1","a","a b c"); + assertFieldMatch("longestSequence:1","b","a b c"); + assertFieldMatch("longestSequence:3","a b c","x x a b c x x a b x"); + assertFieldMatch("longestSequence:3 segments:1","a b c","x x a b x x a b c x"); + assertFieldMatch("longestSequence:2","a b c d","x x c d x x a b x"); + assertFieldMatch("longestSequence:2","a b c d","x x a b x c d x x"); + assertFieldMatch("longestSequence:2","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x"); + assertFieldMatch("longestSequence:4 segments:1","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x a b c d"); +} + + +void +Test::testFieldMatchExecutorMatches() +{ + assertFieldMatch("matches:1 queryCompleteness:1 fieldCompleteness:1","a","a"); + assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:1","a b c","a b c"); + assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:0.5","a b c","a b c a b d"); + assertFieldMatch("matches:3 queryCompleteness:0.5 fieldCompleteness:0.25","a y y b c y","a x x b c x a x a b x x"); +} + + +void +Test::testFieldMatchExecutorCompleteness() +{ + assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a","a"); + assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","a","x"); + assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","y","a"); + assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a","a a"); + assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a a","a"); + assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a b c","a b c"); + assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a b c d","a b"); + assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a b","a b c d"); + assertFieldMatch("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b c d e"); +} + + +void +Test::testFieldMatchExecutorOrderness() +{ + assertFieldMatch("orderness:1", "a","a"); + // Note: we have no hits -> orderness: 0(1) + assertFieldMatch("orderness:0", "a","x"); + assertFieldMatch("orderness:0", "a a a","a"); // Oh well... + assertFieldMatch("orderness:1", "a","a a a"); + assertFieldMatch("orderness:0", "a b","b a"); + assertFieldMatch("orderness:0.5","a b c","b a c"); + assertFieldMatch("orderness:0.5","a b c d","c b d x x x x x x x x x x x x x x x x x x x x x a"); +} + + +void +Test::testFieldMatchExecutorRelatedness() +{ + assertFieldMatch("relatedness:1", "a","a"); + assertFieldMatch("relatedness:0", "a","x"); + assertFieldMatch("relatedness:1", "a b","a b"); + assertFieldMatch("relatedness:1", "a b c","a b c"); + assertFieldMatch("relatedness:0.5","a b c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("relatedness:0.5","a y b y y y c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); +} + + +void +Test::testFieldMatchExecutorLongestSequenceRatio() +{ + assertFieldMatch("longestSequenceRatio:1", "a","a"); + assertFieldMatch("longestSequenceRatio:0", "a","x"); + assertFieldMatch("longestSequenceRatio:1", "a a","a"); + assertFieldMatch("longestSequenceRatio:1", "a","a a"); + assertFieldMatch("longestSequenceRatio:1", "a b","a b"); + assertFieldMatch("longestSequenceRatio:1", "a y"," a x"); + assertFieldMatch("longestSequenceRatio:0.5","a b","a x b"); + assertFieldMatch("longestSequenceRatio:0.75","a b c d","x x a b x a x c d a b c x d x"); +} + + +void +Test::testFieldMatchExecutorEarliness() +{ + assertFieldMatch("earliness:1", "a","a"); + assertFieldMatch("earliness:0", "a","x"); + assertFieldMatch("earliness:1", "a","a a a"); + assertFieldMatch("earliness:1", "a a a","a"); + assertFieldMatch("earliness:0.8", "b","a b c"); + assertFieldMatch("earliness:0.8", "b","a b"); + assertFieldMatch("earliness:0.9091","a b c","x b c x x x x x a x x x"); + assertFieldMatch("earliness:0.2", "a b c","x b c a x x x x a x x x x x x x a b c x x"); +} + + +void +Test::testFieldMatchExecutorWeight() +{ + assertFieldMatch("weight:1", "a","a"); + assertFieldMatch("weight:0", "y","a"); + assertFieldMatch("weight:0.3333","a a a","a"); + assertFieldMatch("weight:1", "a","a a a"); + assertFieldMatch("weight:1", "a b c","a b c"); + assertFieldMatch("weight:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertFieldMatch("weight:0.3333","a b c","a"); + assertFieldMatch("weight:0.6667","a b c","a b"); + + assertFieldMatch("weight:1", "a b c!200","a b c"); // Best + assertFieldMatch("weight:0.75","a b c!200","b c"); // Middle + assertFieldMatch("weight:0.5", "a b c!200","a b"); // Worst + + assertFieldMatch("weight:1","a!300 b c!200","a b c"); // Best too + + assertFieldMatch("weight:1", "a b c!50","a b c"); // Best + assertFieldMatch("weight:0.6","a b c!50","b c"); // Worse + assertFieldMatch("weight:0.4","a b c!50","b"); // Worse + assertFieldMatch("weight:0.2","a b c!50","c"); // Worst + assertFieldMatch("weight:0.8","a b c!50","a b"); // Middle + + assertFieldMatch("weight:1", "a b c!0","a b c"); // Best + assertFieldMatch("weight:0.5","a b c!0","b c"); // Worst + assertFieldMatch("weight:1", "a b c!0","a b"); // As good as best + assertFieldMatch("weight:0", "a b c!0","c"); // No contribution + + assertFieldMatch("weight:0","a!0 b!0","a b"); + assertFieldMatch("weight:0","a!0 b!0",""); + + // The query also has other terms having a total weight of 300 + // so we add a weight parameter which is the sum of the weights of this query terms + 300 + assertFieldMatch("weight:0.25", "a","a",400); + assertFieldMatch("weight:0", "y","a",400); + assertFieldMatch("weight:0.1667","a a a","a",600); + assertFieldMatch("weight:0.25", "a","a a a",400); + assertFieldMatch("weight:0.5", "a b c","a b c",600); + assertFieldMatch("weight:0.5", "a b c","x x a b x a x c x x a b x c c x",600); + + assertFieldMatch("weight:0.1667","a b c","a",600); + assertFieldMatch("weight:0.3333","a b c","a b",600); + + assertFieldMatch("weight:0.5714","a b c!200","a b c",700); // Best + assertFieldMatch("weight:0.4286","a b c!200","b c",700); // Middle + assertFieldMatch("weight:0.2857","a b c!200","a b",700); // Worst + + assertFieldMatch("weight:0.6667","a!300 b c!200","a b c",900); // Better than best + + assertFieldMatch("weight:0.4545","a b c!50","a b c",550); // Best + assertFieldMatch("weight:0.2727","a b c!50","b c",550); // Worse + assertFieldMatch("weight:0.1818","a b c!50","b",550); // Worse + assertFieldMatch("weight:0.0909","a b c!50","c",550); // Worst + assertFieldMatch("weight:0.3636","a b c!50","a b",550); // Middle + + assertFieldMatch("weight:0.4","a b c!0","a b c",500); // Best + assertFieldMatch("weight:0.2","a b c!0","b c",500); // Worst + assertFieldMatch("weight:0.4","a b c!0","a b",500); // As good as best + assertFieldMatch("weight:0", "a b c!0","c",500); // No contribution + + assertFieldMatch("weight:0","a!0 b!0","a b",300); + assertFieldMatch("weight:0","a!0 b!0","",300); +} + + +void +Test::testFieldMatchExecutorSignificance() +{ + assertFieldMatch("significance:1", "a","a"); + assertFieldMatch("significance:0", "a","x"); + assertFieldMatch("significance:0.3333","a a a","a"); + assertFieldMatch("significance:1", "a","a a a"); + assertFieldMatch("significance:1", "a b c","a b c"); + assertFieldMatch("significance:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertFieldMatch("significance:0.3333","a b c","a"); + assertFieldMatch("significance:0.6667","a b c","a b"); + + assertFieldMatch("significance:1", "a b c%0.2","a b c"); // Best + assertFieldMatch("significance:0.75","a b c%0.2","b c"); // Middle + assertFieldMatch("significance:0.5", "a b c%0.2","a b"); // Worst + + assertFieldMatch("significance:1","a%0.3 b c%0.2","a b c"); // Best too + + assertFieldMatch("significance:1", "a b c%0.05","a b c"); // Best + assertFieldMatch("significance:0.6","a b c%0.05","b c"); // Worse + assertFieldMatch("significance:0.4","a b c%0.05","b"); // Worse + assertFieldMatch("significance:0.2","a b c%0.05","c"); // Worst + assertFieldMatch("significance:0.8","a b c%0.05","a b"); // Middle + + assertFieldMatch("significance:1", "a b c%0","a b c"); // Best + assertFieldMatch("significance:0.5","a b c%0","b c"); // Worst + assertFieldMatch("significance:1", "a b c%0","a b"); // As good as best + assertFieldMatch("significance:0", "a b c%0","c"); // No contribution + + assertFieldMatch("significance:0","a%0 b%0","a b"); + assertFieldMatch("significance:0","a%0 b%0",""); + + // The query also has other terms having a total significance of 0.3 + // so we add a significance parameter which is the sum of the significances of this query terms + 0.3 + assertFieldMatchTS("significance:0.25", "a","a",0.4f); + assertFieldMatchTS("significance:0", "y","a",0.4f); + assertFieldMatchTS("significance:0.1667","a a a","a",0.6f); + assertFieldMatchTS("significance:0.25", "a","a a a",0.4f); + assertFieldMatchTS("significance:0.5", "a b c","a b c",0.6f); + assertFieldMatchTS("significance:0.5", "a b c","x x a b x a x c x x a b x c c x",0.6f); + + assertFieldMatchTS("significance:0.1667","a b c","a",0.6f); + assertFieldMatchTS("significance:0.3333","a b c","a b",0.6f); + + assertFieldMatchTS("significance:0.5714","a b c%0.2","a b c",0.7f); // Best + assertFieldMatchTS("significance:0.4286","a b c%0.2","b c",0.7f); // Middle + assertFieldMatchTS("significance:0.2857","a b c%0.2","a b",0.7f); // Worst + + assertFieldMatchTS("significance:0.6667","a%0.3 b c%0.2","a b c",0.9f); // Better than best + + assertFieldMatchTS("significance:0.4545","a b c%0.05","a b c",0.55f); // Best + assertFieldMatchTS("significance:0.2727","a b c%0.05","b c",0.55f); // Worse + assertFieldMatchTS("significance:0.1818","a b c%0.05","b",0.55f); // Worse + assertFieldMatchTS("significance:0.0909","a b c%0.05","c",0.55f); // Worst + assertFieldMatchTS("significance:0.3636","a b c%0.05","a b",0.55f); // Middle + + assertFieldMatchTS("significance:0.4","a b c%0","a b c",0.5f); // Best + assertFieldMatchTS("significance:0.2","a b c%0","b c",0.5f); // Worst + assertFieldMatchTS("significance:0.4","a b c%0","a b",0.5f); // As good as best + assertFieldMatchTS("significance:0", "a b c%0","c",0.5f); // No contribution + + assertFieldMatchTS("significance:0","a%0 b%0","a b",0.3f); + assertFieldMatchTS("significance:0","a%0 b%0","",0.3f); +} + + +void +Test::testFieldMatchExecutorImportance() +{ + assertFieldMatch("importance:0.75","a b c", "a x x b x c c c",600); + assertFieldMatch("importance:0.85","a b!500 c","a x x b x c c c",1000); + + // Twice as common - twice as weighty, but total weight has the extra 300 - less than the previous + assertFieldMatch("importance:0.7857","a b!200%0.05 c","a x x b x c c c",700); + // Here higher importancy exactly offsets the lowered uniqueness + assertFieldMatch("importance:0.85","a b!500%0.5 c","a x x b x c c c",1000); +} + + +void +Test::testFieldMatchExecutorOccurrence() +{ + assertFieldMatch("occurrence:0","a","x"); + assertFieldMatch("occurrence:1","a","a"); + assertFieldMatch("occurrence:0","a a a","x"); + assertFieldMatch("occurrence:1","a a a","a"); + assertFieldMatch("occurrence:1","a a a","a a a"); + assertFieldMatch("occurrence:1","a a a","a a a a"); + assertFieldMatch("occurrence:0.3571","a","x x x a x x a x a x x x a a"); + assertFieldMatch("occurrence:1","a","a a a a a a a a a a a a a a"); + assertFieldMatch("occurrence:1","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("occurrence:0.9231","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("occurrence:0.6", "a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorAbsoluteOccurrence() +{ + assertFieldMatch("absoluteOccurrence:0", "a","x"); + assertFieldMatch("absoluteOccurrence:0.01","a","a"); + assertFieldMatch("absoluteOccurrence:0","a a a","x"); + assertFieldMatch("absoluteOccurrence:0.01", "a a a","a"); + assertFieldMatch("absoluteOccurrence:0.03", "a a a","a a a"); + assertFieldMatch("absoluteOccurrence:0.04", "a a a","a a a a"); + assertFieldMatch("absoluteOccurrence:0.05","a","x x x a x x a x a x x x a a"); + assertFieldMatch("absoluteOccurrence:0.14","a","a a a a a a a a a a a a a a"); + assertFieldMatch("absoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorWeightedOccurrence() +{ + assertFieldMatch("weightedOccurrence:0","a!200","x"); + assertFieldMatch("weightedOccurrence:1","a!200","a"); + assertFieldMatch("weightedOccurrence:0","a!200 a a","x"); + assertFieldMatch("weightedOccurrence:1","a!200 a a","a"); + assertFieldMatch("weightedOccurrence:1","a a a","a a a"); + assertFieldMatch("weightedOccurrence:1","a!200 a a","a a a a"); + assertFieldMatch("weightedOccurrence:0.3571","a!200","x x x a x x a x a x x x a a"); + assertFieldMatch("weightedOccurrence:1","a!200","a a a a a a a a a a a a a a"); + assertFieldMatch("weightedOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("weightedOccurrence:0.5714","a!200 b","a b b a a a a a b a a b a a"); + assertFieldMatch("weightedOccurrence:0.6753","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("weightedOccurrence:0.4286","a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("weightedOccurrence:0.3061","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("weightedOccurrence:0.30","a b", "a a b b b b x x x x"); + assertFieldMatch("weightedOccurrence:0.3333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("weightedOccurrence:0.2667","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("weightedOccurrence:0.2667","a b!50", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("weightedOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.4667","a b!200","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorWeightedAbsoluteOccurrence() +{ + assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200","x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200","a"); + assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200 a a","x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200 a a","a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a a a","a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.04", "a!200 a a","a a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.05", "a!200","x x x a x x a x a x x x a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.14", "a!200","a a a a a a a a a a a a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("weightedAbsoluteOccurrence:0.08", "a!200 b","a b b a a a a a b a a b a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.0945","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("weightedAbsoluteOccurrence:0.06", "a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("weightedAbsoluteOccurrence:0.0429","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a b", "a a b b b b x x x x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.0333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a b!50", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("weightedAbsoluteOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.4667","a b!200","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorSignificantOccurrence() +{ + assertFieldMatch("significantOccurrence:0","a%0.2","x"); + assertFieldMatch("significantOccurrence:1","a%0.2","a"); + assertFieldMatch("significantOccurrence:0","a%0.2 a a","x"); + assertFieldMatch("significantOccurrence:1","a%0.2 a a","a"); + assertFieldMatch("significantOccurrence:1","a a a","a a a"); + assertFieldMatch("significantOccurrence:1","a%0.2 a a","a a a a"); + assertFieldMatch("significantOccurrence:0.3571","a%0.2","x x x a x x a x a x x x a a"); + assertFieldMatch("significantOccurrence:1","a%0.2","a a a a a a a a a a a a a a"); + assertFieldMatch("significantOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("significantOccurrence:0.5714","a%0.2 b","a b b a a a a a b a a b a a"); + assertFieldMatch("significantOccurrence:0.6753","a%1 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("significantOccurrence:0.4286","a b%0.2","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("significantOccurrence:0.3247","a b%1","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("significantOccurrence:0.30","a b", "a a b b b b x x x x"); + assertFieldMatch("significantOccurrence:0.3333","a b%0.2","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("significantOccurrence:0.2667","a%0.2 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("significantOccurrence:0.2667","a b%0.05", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("significantOccurrence:0","a%0 b%0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.4667","a b%0.2","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + +void +Test::testFieldMatchExecutorUnweightedProximity() +{ + assertFieldMatch("unweightedProximity:1", "a","a"); + assertFieldMatch("unweightedProximity:1", "a b c","a b c"); + assertFieldMatch("unweightedProximity:1", "a b c","a b c x"); + assertFieldMatch("unweightedProximity:1", "y a b c","a b c x"); + assertFieldMatch("unweightedProximity:1", "y a b c","a b c x"); + assertFieldMatch("unweightedProximity:0.855","y a b c","a b x c x"); + assertFieldMatch("unweightedProximity:0.750","y a b c","a b x x c x"); + assertFieldMatch("unweightedProximity:0.71", "y a b c","a x b x c x"); // Should be slightly worse than the previous one + assertFieldMatch("unweightedProximity:0.605","y a b c","a x b x x c x"); + assertFieldMatch("unweightedProximity:0.53", "y a b c","a x b x x x c x"); + assertFieldMatch("unweightedProximity:0.5", "y a b c","a x x b x x c x"); +} + + +void +Test::testFieldMatchExecutorReverseProximity() +{ + assertFieldMatch("unweightedProximity:0.33", "a b","b a"); + assertFieldMatch("unweightedProximity:0.62", "a b c","c a b"); + assertFieldMatch("unweightedProximity:0.585", "y a b c","c x a b"); + assertFieldMatch("unweightedProximity:0.33", "a b c","c b a"); + assertFieldMatch("unweightedProximity:0.6875","a b c d e","a b d c e"); + assertFieldMatch("unweightedProximity:0.9275","a b c d e","a b x c d e"); +} + + +void +Test::testFieldMatchExecutorAbsoluteProximity() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b","a b"); + assertFieldMatch("absoluteProximity:0.3 proximity:1", "a 0.3:b","a b"); + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","a b"); + assertFieldMatch("absoluteProximity:1 proximity:1", "a 1.0:b","a b"); + assertFieldMatch("absoluteProximity:0.033 proximity:0.33", "a b","b a"); + assertFieldMatch("absoluteProximity:0.0108 proximity:0.0359","a 0.3:b","b a"); // Should be worse than the previous one + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","b a"); + assertFieldMatch("absoluteProximity:0 proximity:0", "a 1.0:b","b a"); + + assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x b x x c"); + assertFieldMatch("absoluteProximity:0.0701 proximity:0.2003","a 0.5:b 0.2:c","a x b x x c"); // Most important is close, less important is far: Better + assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x x b x c"); + assertFieldMatch("absoluteProximity:0.0582 proximity:0.1663","a 0.5:b 0.2:c","a x x b x c"); // Most important is far, less important is close: Worse + + assertFieldMatch("absoluteProximity:0.0727 proximity:0.7267","a b c d","a b x x x x x c d"); + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b 0:c d","a b x x x x x c d"); // Should be better because the gap is unimportant + + // test with another proximity table + std::vector pt; + pt.push_back(0.2); + pt.push_back(0.4); + pt.push_back(0.6); + pt.push_back(0.8); + pt.push_back(1.0); + pt.push_back(0.8); + pt.push_back(0.6); + pt.push_back(0.4); + pt.push_back(0.2); + fieldmatch::Params params; + params.setProximityLimit(4); + params.setProximityTable(pt); + assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x b x x c", ¶ms); + assertFieldMatch("absoluteProximity:0.1179 proximity:0.3369","a 0.5:b 0.2:c","a x b x x c", ¶ms); // Most important is close, less important is far: Better + assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x x b x c", ¶ms); + assertFieldMatch("absoluteProximity:0.0834 proximity:0.2384","a 0.5:b 0.2:c","a x x b x c", ¶ms); // Most important is far, less important is close: Worse +} + + +void +Test::testFieldMatchExecutorMultiSegmentProximity() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b c", "a b x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("absoluteProximity:0.05 proximity:0.5","a b c", "a x x b x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("absoluteProximity:0.075 proximity:0.75","a b c d","a x x b x x x x x x x x x x x x x x x x x x x x x x c d"); +} + + +void +Test::testFieldMatchExecutorSegmentDistance() +{ + assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","a b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.5", "a 0.5:b c","a b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","b c x x x x x x x x x x a"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","b x x x x x x x x x x x a x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.006","a b c","a x x x x x x x x x x x b x x x x x x x x c"); + assertFieldMatch("segmentDistance:24 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x c"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","c x x x x x x x x x x x b x x x x x x x x x x a"); +} + + +void +Test::testFieldMatchExecutorSegmentProximity() +{ + assertFieldMatch("segmentProximity:1", "a","a"); + assertFieldMatch("segmentProximity:0", "a","x"); + assertFieldMatch("segmentProximity:1", "a","a x"); + assertFieldMatch("segmentProximity:0", "a b","a x x x x x x x x x x x x x x x x x x x x x x x b"); + assertFieldMatch("segmentProximity:0.4","a b","a x x x x x x x x x x x x x x x x x x x x x x b x x x x x x x x x x x x x x x x"); + assertFieldMatch("segmentProximity:0", "a b c","a b x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("segmentProximity:0.4","a b c","a b x x x x x x x x x x x x x x x x x x x x x c x x x x x x x x x x x x x x x x"); + assertFieldMatch("segmentProximity:0.4","a b c","b c x x x x x x x x x x x x x x x x x x x x x a x x x x x x x x x x x x x x x x"); +} + + +void +Test::testFieldMatchExecutorSegmentStarts() +{ +#ifdef FIELDMATCH_OUTPUTS_SEGMENTSTARTS + // Test cases where we choose between multiple different segmentations + { // test segmentSelection + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1 segmentStarts:19,41", + "a b c d e","x a b x c x x x x x x x x x x x x x x a b c x x x x x x x x x e x d x c d x x x c d e"); + // 0 1 2 3 4 5 6 7 8 9�0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 + // 0 1 2 3 4 + // Should choose - - - - - + + assertFieldMatch("segments:1 absoluteProximity:0.0778 proximity:0.778","a b c d e f","x x a b b b c f e d a b c d x e x x x x x f d e f a b c a a b b c c d d e e f f"); + + // Prefer one segment with ok proximity or two segments with great proximity + assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x x x x x x x x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + } +#endif +} + + +void +Test::testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y","a b"); + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b c d y y y y y y y y y y y y y y y","a b x x x x x x x x x x x x x x x x x x c d"); + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y c d","a b x x x x x x x x x x x x x x x x x x c d"); +} + + +void +Test::testFieldMatchExecutorQueryRepeats() +{ + // Not really handled perfectly, but good enough + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0", "a a a","a"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a a b c c"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a b c"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a b a b","a b a b"); + assertFieldMatch("absoluteProximity:0.0903 proximity:0.9033 head:0 tail:0 gapLength:1","a b a b","a b x a b"); + // Both terms take the same segment: + assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:18","a a","x x x a x x x x x x x x x x x x x x a x x x"); + // But not when the second is preferable + assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:3","a b b a","x x x a b x x x x x x x x x x x x x x b a x x x"); + assertFieldMatch("matches:2 fieldCompleteness:1","a b b b","a b"); +} + + +void +Test::testFieldMatchExecutorZeroCases() +{ + // Note: we have no hits -> absoluteProximity:0(0.1) proximity:0(1) + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y","a"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","a","x"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","","x"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y",""); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","",""); +} + + +void +Test::testFieldMatchExecutorExceedingIterationLimit() +{ + // Segments found: a x x b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(0); + assertFieldMatch("matches:4 tail:0 proximity:0.75 absoluteProximity:0.075","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } + + // Segments found: a x b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(1); + assertFieldMatch("matches:4 tail:0 proximity:0.855 absoluteProximity:0.0855","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } + + // Segments found: a b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(2); + assertFieldMatch("matches:4 tail:0 proximity:1 absoluteProximity:0.1","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } +} + + +void +Test::testFieldMatchExecutorRemaining() +{ + + { // test match (aka score) + // Ordered by decreasing match score per query + assertFieldMatch("score:1", "a","a"); + assertFieldMatch("score:0.9339","a","a x"); + assertFieldMatch("score:0", "a","x"); + assertFieldMatch("score:0.9243","a","x a"); + assertFieldMatch("score:0.9025","a","x a x"); + + assertFieldMatch("score:1", "a b","a b"); + assertFieldMatch("score:0.9558","a b","a b x"); + assertFieldMatch("score:0.9463","a b","x a b"); + assertFieldMatch("score:0.1296","a b","a x x x x x x x x x x x x x x x x x x x x x x b"); + assertFieldMatch("score:0.1288","a b","a x x x x x x x x x x x x x x x x x x x x x x x x x x x b"); + + assertFieldMatch("score:0.8647","a b c","x x a x b x x x x x x x x a b c x x x x x x x x c x x"); + assertFieldMatch("score:0.861", "a b c","x x a x b x x x x x x x x x x a b c x x x x x x c x x"); + assertFieldMatch("score:0.4869","a b c","a b x x x x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.4853","a b c","x x a x b x x x x x x x x x x b a c x x x x x x c x x"); + assertFieldMatch("score:0.3621","a b c","a x b x x x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.3619","a b c","x x a x b x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.3584","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("score:0.3421","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x x"); + + assertFieldMatch("score:0.3474","a b c","x x a x b x x x x x x x x x x x x x x b x x x b x b x"); + } + + { // test repeated match + // gap==1 caused by finding two possible segments due to repeated matching + assertFieldMatch("fieldCompleteness:1 queryCompleteness:0.6667 segments:1 earliness:1 gaps:1", + "pizza hut pizza","pizza hut"); + } + + //------------------- extra tests -------------------// + + { // test with a query on an attribute field + LOG(info, "Query on an attribute field"); + vespalib::string feature = "fieldMatch(foo)"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addAttributeNode("bar"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(toRankResult(feature, "score:0"))); + } + + + { // test with query on another index field as well + LOG(info, "Query on an another index field"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar")); // search on 'bar' (1) + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add occurrence for 'foo' with query=a + ASSERT_TRUE(mdb->setFieldLength("foo", 1)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // a + + // add occurrence for 'bar' with query=a + ASSERT_TRUE(mdb->setFieldLength("bar", 2)); + ASSERT_TRUE(mdb->addOccurence("bar", 1, 1)); // x a + + ASSERT_TRUE(mdb->apply(1)); + + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:1 queryCompleteness:1 fieldCompleteness:1"))); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0"), 2)); // another docid -> no hit -> default values + } + + { // search on more than one document + LOG(info, "Query on more than one document"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'a' (0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'b' (1) + ASSERT_TRUE(ft.setup()); + + // check that we get the same results as this + // assertFieldMatch("score:1", "a b","a b"); + // assertFieldMatch("score:0.9558","a b","a b x"); + // assertFieldMatch("score:0.932", "a b","x a b"); + + { // docid 1: "a b" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 2)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b' + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:2"), 1)); + } + { // docid 2: "a b x" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b' + ASSERT_TRUE(mdb->apply(1)); + RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9558 matches:2"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr, 1)); + } + { // docid 3: "x a b" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 2)); // 'b' + ASSERT_TRUE(mdb->apply(2)); + RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9463 matches:2"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr, 2)); + } + } + + { // test where not all hits have position information + LOG(info, "Not all hits have position information"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(200)); // search for 'a' (termId 0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(400)); // search for 'b' (termId 1) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(600)); // search for 'c' (termId 2) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(800)); // search for 'd' (termId 3) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar"))->setWeight(search::query::Weight(1000)); // search for 'e' (termId 4) + ASSERT_TRUE(ft.setup()); + + assertFieldMatch("score:0.3389 completeness:0.5083 degradedMatches:0", "a b c d", "x a b"); + + // field: x a b + { // no pos occ for term b -> score is somewhat degraded (lower .occurrence) + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + // add occurrence with query term 'a' + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); + // add hit with query term 'b' + mdb->getTermFieldMatchData(1, 0)->reset(1); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", + "score:0.3231 completeness:0.5083 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:1"). + setEpsilon(1e-4))); + } + { // no pos occ for term a & b + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + // add hit with query term 'a' + mdb->getTermFieldMatchData(0, 0)->reset(1); + // add hit with query term 'b' + mdb->getTermFieldMatchData(1, 0)->reset(1); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", + "score:0 completeness:0.475 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:2"). + setEpsilon(1e-4))); + } + } + + { // invalid field length + LOG(info, "We have an invalid field length"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(100)); // search for 'a' (termId 0) + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add occurrence with query term 'a' + ASSERT_TRUE(mdb->setFieldLength("foo", search::fef::FieldPositionsIterator::UNKNOWN_LENGTH)); // invalid field length + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + + ASSERT_TRUE(mdb->apply(1)); + + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0 matches:1 degradedMatches:0"))); + } + + { // test default values when we do not have hits in the field + LOG(info, "Default values when we have no hits"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0) + ASSERT_TRUE(ft.setup()); + + // must create this so that term match data is configured with the term data object + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + RankResult rr = toRankResult("fieldMatch(foo)", + "score:0 " + "proximity:0 " + "completeness:0 " + "queryCompleteness:0 " + "fieldCompleteness:0 " + "orderness:0 " + "relatedness:0 " + "earliness:0 " + "longestSequenceRatio:0 " + "segmentProximity:0 " + "unweightedProximity:0 " + "absoluteProximity:0 " + "occurrence:0 " + "absoluteOccurrence:0 " + "weightedOccurrence:0 " + "weightedAbsoluteOccurrence:0 " + "significantOccurrence:0 " + "weight:0 " + "significance:0 " + "importance:0 " + "segments:0 " + "matches:0 " + "outOfOrder:0 " + "gaps:0 " + "gapLength:0 " + "longestSequence:0 " + "head:0 " + "tail:0 " + "segmentDistance:0 ") + .setEpsilon(10e-6); + + ASSERT_TRUE(ft.execute(rr, 1)); // another docid -> no hit -> default values + } +} diff --git a/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp new file mode 100644 index 00000000000..04caadd2029 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".prod_features_fieldtermmatch"); + +#include "prod_features.h" +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +void +Test::testFieldTermMatch() +{ + { + // Test blueprint. + FieldTermMatchBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "fieldTermMatch")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("0")); + FT_SETUP_FAIL(pt, params.add("1")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(pt, ie, params.add("foo")); + FT_SETUP_OK (pt, ie, params.add("0"), in, + out.add("firstPosition") + .add("lastPosition") + .add("occurrences").add("weight").add("exactness")); + FT_SETUP_FAIL(pt, ie, params.add("1")); + } + { + FT_DUMP_EMPTY(_factory, "fieldTermMatch"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + for (uint32_t term = 0; term < 5; ++term) { + vespalib::string bn = vespalib::make_string("fieldTermMatch(bar,%u)", term); + dump.add(bn + ".firstPosition").add(bn + ".occurrences").add(bn + ".weight"); + } + FT_DUMP(_factory, "fieldTermMatch", ie, dump); + + ie.getProperties().add("fieldTermMatch.numTerms", "0"); + FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie); + + ie.getProperties().add("fieldTermMatch.numTerms.bar", "5"); + FT_DUMP(_factory, "fieldTermMatch", ie, dump); + } + } + + { // Test executor. + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000) + .addScore("fieldTermMatch(foo,0).lastPosition", 1000000) + .addScore("fieldTermMatch(foo,0).occurrences", 0) + .addScore("fieldTermMatch(foo,0).weight", 0) + .addScore("fieldTermMatch(foo,0).exactness", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 100)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 20)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 10) + .addScore("fieldTermMatch(foo,0).lastPosition", 20) + .addScore("fieldTermMatch(foo,0).occurrences", 2) + .addScore("fieldTermMatch(foo,0).weight", 2) + .addScore("fieldTermMatch(foo,0).exactness", 1); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor (match without position information) + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); + ASSERT_TRUE(ft.setup()); + + // make sure the term match data is initialized with the term data + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->getTermFieldMatchData(0, 0)->reset(1); + + search::fef::test::RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000) + .addScore("fieldTermMatch(foo,0).lastPosition", 1000000) + .addScore("fieldTermMatch(foo,0).occurrences", 1) + .addScore("fieldTermMatch(foo,0).weight", 0) + .addScore("fieldTermMatch(foo,0).exactness", 0); + ASSERT_TRUE(ft.execute(exp)); + } +} diff --git a/searchlib/src/tests/features/prod_features_framework.cpp b/searchlib/src/tests/features/prod_features_framework.cpp new file mode 100644 index 00000000000..5ce5e2c3177 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_framework.cpp @@ -0,0 +1,174 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".prod_features_framework"); + +#include "prod_features.h" +#include + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +void +Test::testFramework() +{ + LOG(info, "testFramework()"); + IndexEnvironment indexEnv; + { // test index environment builder + IndexEnvironmentBuilder ieb(indexEnv); + ieb.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "baz"); + { + const FieldInfo * info = indexEnv.getFieldByName("foo"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 0u); + EXPECT_TRUE(info->type() == FieldType::INDEX); + EXPECT_TRUE(info->collection() == CollectionType::SINGLE); + } + { + const FieldInfo * info = indexEnv.getFieldByName("bar"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 1u); + EXPECT_TRUE(info->type() == FieldType::ATTRIBUTE); + EXPECT_TRUE(info->collection() == CollectionType::WEIGHTEDSET); + } + { + const FieldInfo * info = indexEnv.getFieldByName("baz"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 2u); + EXPECT_TRUE(info->type() == FieldType::INDEX); + EXPECT_TRUE(info->collection() == CollectionType::ARRAY); + } + ASSERT_TRUE(indexEnv.getFieldByName("qux") == NULL); + } + + QueryEnvironment queryEnv(&indexEnv); + MatchDataLayout layout; + { // test query environment builder + QueryEnvironmentBuilder qeb(queryEnv, layout); + { + SimpleTermData &tr = qeb.addAllFields(); + ASSERT_TRUE(tr.lookupField(0) != 0); + ASSERT_TRUE(tr.lookupField(1) != 0); + ASSERT_TRUE(tr.lookupField(2) != 0); + EXPECT_TRUE(tr.lookupField(3) == 0); + EXPECT_TRUE(tr.lookupField(0)->getHandle() == 0u); + EXPECT_TRUE(tr.lookupField(1)->getHandle() == 1u); + EXPECT_TRUE(tr.lookupField(2)->getHandle() == 2u); + const ITermData *tp = queryEnv.getTerm(0); + ASSERT_TRUE(tp != NULL); + EXPECT_EQUAL(tp, &tr); + } + { + SimpleTermData *tr = qeb.addAttributeNode("bar"); + ASSERT_TRUE(tr != 0); + ASSERT_TRUE(tr->lookupField(1) != 0); + EXPECT_TRUE(tr->lookupField(0) == 0); + EXPECT_TRUE(tr->lookupField(2) == 0); + EXPECT_TRUE(tr->lookupField(3) == 0); + EXPECT_TRUE(tr->lookupField(1)->getHandle() == 3u); + const ITermData *tp = queryEnv.getTerm(1); + ASSERT_TRUE(tp != NULL); + EXPECT_EQUAL(tp, tr); + } + } + + MatchData::UP data = layout.createMatchData(); + EXPECT_EQUAL(data->getNumTermFields(), 4u); + EXPECT_EQUAL(data->getNumFeatures(), 0u); + + { // check match data access + MatchDataBuilder mdb(queryEnv, *data); + + // setup some occurence lists + ASSERT_TRUE(mdb.addOccurence("foo", 0, 20)); + ASSERT_TRUE(mdb.addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb.setFieldLength("foo", 50)); + ASSERT_TRUE(mdb.addOccurence("baz", 0, 15)); + ASSERT_TRUE(mdb.addOccurence("baz", 0, 5)); + ASSERT_TRUE(mdb.setFieldLength("baz", 100)); + ASSERT_TRUE(mdb.apply(100)); + + { + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 0); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // foo (index) + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getFieldLength(), 50u); + EXPECT_EQUAL(itr.getPosition(), 10u); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getPosition(), 20u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 1); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute) + ASSERT_TRUE(!itr.valid()); + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 2); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // baz (index) + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getFieldLength(), 100u); + EXPECT_EQUAL(itr.getPosition(), 5u); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getPosition(), 15u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(1, 1); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute) + ASSERT_TRUE(!itr.valid()); + } + } + { // check that data is cleared + MatchDataBuilder mdb(queryEnv, *data); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 0)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 1)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 2)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(1, 1)->getDocId(), TermFieldMatchData::invalidId()); + + // test illegal things + ASSERT_TRUE(!mdb.addOccurence("foo", 1, 10)); // invalid term/field combination + } + + BlueprintFactory factory; + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + Properties overrides; + + { // test feature test runner + FeatureTest ft(factory, indexEnv, queryEnv, layout, + StringList().add("value(10)").add("value(20)").add("value(30)"), overrides); + MatchDataBuilder::UP mdb1 = ft.createMatchDataBuilder(); + EXPECT_TRUE(mdb1.get() == NULL); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 10.0f))); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb2 = ft.createMatchDataBuilder(); + EXPECT_TRUE(mdb2.get() != NULL); + + EXPECT_TRUE(ft.execute(RankResult().addScore("value(10)", 10.0f).addScore("value(20)", 20.0f))); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 20.0f))); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(5)", 5.0f))); + } + { // test simple constructor + MatchDataLayout mdl; // match data layout cannot be reused + FeatureTest ft(factory, indexEnv, queryEnv, mdl, "value(10)", overrides); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(10.0f)); + } +} diff --git a/searchlib/src/tests/features/prod_features_test.sh b/searchlib/src/tests/features/prod_features_test.sh new file mode 100755 index 00000000000..bec2b49807f --- /dev/null +++ b/searchlib/src/tests/features/prod_features_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_prod_features_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/features/ranking_expression/.gitignore b/searchlib/src/tests/features/ranking_expression/.gitignore new file mode 100644 index 00000000000..63ab51e663a --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/.gitignore @@ -0,0 +1 @@ +searchlib_ranking_expression_test_app diff --git a/searchlib/src/tests/features/ranking_expression/CMakeLists.txt b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt new file mode 100644 index 00000000000..4caddaa7bd8 --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ranking_expression_test_app + SOURCES + ranking_expression_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_ranking_expression_test_app COMMAND searchlib_ranking_expression_test_app) diff --git a/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp new file mode 100644 index 00000000000..64fb3477951 --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace search::features; +using namespace search::fef::test; +using namespace search::fef; +using namespace vespalib::eval; + +using TypeMap = std::map; + +struct SetupResult { + IndexEnvironment index_env; + RankingExpressionBlueprint rank; + DummyDependencyHandler deps; + bool setup_ok; + SetupResult(const TypeMap &object_inputs, + const vespalib::string &expression) + : index_env(), rank(), deps(rank), setup_ok(false) + { + rank.setName("self"); + index_env.getProperties().add("self.rankingScript", expression); + for (const auto &input: object_inputs) { + deps.define_object_input(input.first, ValueType::from_spec(input.second)); + } + setup_ok = rank.setup(index_env, {}); + EXPECT_TRUE(!deps.accept_type_mismatch); + } +}; + +void verify_output_type(const TypeMap &object_inputs, + const vespalib::string &expression, const FeatureType &expect) +{ + SetupResult result(object_inputs, expression); + EXPECT_TRUE(result.setup_ok); + EXPECT_EQUAL(1u, result.deps.output.size()); + ASSERT_EQUAL(1u, result.deps.output_type.size()); + if (expect.is_object()) { + EXPECT_EQUAL(expect.type(), result.deps.output_type[0].type()); + } else { + EXPECT_TRUE(!result.deps.output_type[0].is_object()); + } +} + +void verify_setup_fail(const TypeMap &object_inputs, + const vespalib::string &expression) +{ + SetupResult result(object_inputs, expression); + EXPECT_TRUE(!result.setup_ok); + EXPECT_EQUAL(0u, result.deps.output.size()); +} + +TEST("require that expression with only number inputs produce number output (compiled)") { + TEST_DO(verify_output_type({}, "a*b", FeatureType::number())); +} + +TEST("require that expression with object input produces object output (interpreted)") { + TEST_DO(verify_output_type({{"b", "double"}}, "a*b", FeatureType::object(ValueType::double_type()))); +} + +TEST("require that expression with internal tensor operations produce object output (interpreted)") { + TEST_DO(verify_output_type({}, "a*b*sum({{x:1}:5,{x:2}:7})", FeatureType::object(ValueType::double_type()))); +} + +TEST("require that ranking expression can resolve to concrete complex type") { + TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor(y{},z{})"}}, "a*b", + FeatureType::object(ValueType::from_spec("tensor(x{},y{},z{})")))); +} + +TEST("require that ranking expression can resolve to abstract complex type") { + TEST_DO(verify_output_type({{"a", "tensor"}}, "a*b", FeatureType::object(ValueType::from_spec("tensor")))); +} + +TEST("require that ranking expression can resolve to 'any' type") { + TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor"}}, "a*b", + FeatureType::object(ValueType::from_spec("any")))); +} + +TEST("require that setup fails for incompatible types") { + TEST_DO(verify_setup_fail({{"a", "tensor(x{},y{})"}, {"b", "tensor(y[10],z{})"}}, "a*b")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/raw_score/.gitignore b/searchlib/src/tests/features/raw_score/.gitignore new file mode 100644 index 00000000000..a1b2d4e3f16 --- /dev/null +++ b/searchlib/src/tests/features/raw_score/.gitignore @@ -0,0 +1 @@ +searchlib_raw_score_test_app diff --git a/searchlib/src/tests/features/raw_score/CMakeLists.txt b/searchlib/src/tests/features/raw_score/CMakeLists.txt new file mode 100644 index 00000000000..a672b7b071d --- /dev/null +++ b/searchlib/src/tests/features/raw_score/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_raw_score_test_app + SOURCES + raw_score_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_raw_score_test_app COMMAND searchlib_raw_score_test_app) diff --git a/searchlib/src/tests/features/raw_score/FILES b/searchlib/src/tests/features/raw_score/FILES new file mode 100644 index 00000000000..479927259ee --- /dev/null +++ b/searchlib/src/tests/features/raw_score/FILES @@ -0,0 +1 @@ +raw_score_test.cpp diff --git a/searchlib/src/tests/features/raw_score/raw_score_test.cpp b/searchlib/src/tests/features/raw_score/raw_score_test.cpp new file mode 100644 index 00000000000..0a15ff69318 --- /dev/null +++ b/searchlib/src/tests/features/raw_score/raw_score_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const std::string featureName("rawScore(foo)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector fooHandles; + std::vector barHandles; + RankFixture(size_t fooCnt, size_t barCnt) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(fooHandles.back()); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(barHandles.back()); + queryEnv.getTerms().push_back(term); + } + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { + rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); + } + void setFooScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, fooHandles.size()); + setScore(fooHandles[i], docId, score); + } + void setBarScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, barHandles.size()); + setScore(barHandles[i], docId, score); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("rawScore"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", RawScoreBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); +} + +TEST_FF("require that setup can be done on attribute field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "bar"))); +} + +TEST_FF("require that setup fails for unknown field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "unknown"))); +} + +TEST_F("require that not searching a filed will give it 0.0 raw score", RankFixture(0, 3)) { + EXPECT_EQUAL(0.0, f1.getScore(10)); +} + +TEST_F("require that raw score can be obtained", RankFixture(1, 0)) { + f1.setFooScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f1.getScore(10)); +} + +TEST_F("require that multiple raw scores are accumulated", RankFixture(3, 0)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 10, 2.0); + f1.setFooScore(2, 10, 3.0); + EXPECT_EQUAL(6.0, f1.getScore(10)); +} + +TEST_F("require that stale raw scores are ignored", RankFixture(3, 0)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 9, 2.0); + f1.setFooScore(2, 10, 3.0); + EXPECT_EQUAL(4.0, f1.getScore(10)); +} + +TEST_F("require that raw scores from other fields are ignored", RankFixture(2, 2)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 10, 2.0); + f1.setBarScore(0, 10, 5.0); + f1.setBarScore(1, 10, 6.0); + EXPECT_EQUAL(3.0, f1.getScore(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/subqueries/.gitignore b/searchlib/src/tests/features/subqueries/.gitignore new file mode 100644 index 00000000000..63dc19177d1 --- /dev/null +++ b/searchlib/src/tests/features/subqueries/.gitignore @@ -0,0 +1 @@ +searchlib_subqueries_test_app diff --git a/searchlib/src/tests/features/subqueries/CMakeLists.txt b/searchlib/src/tests/features/subqueries/CMakeLists.txt new file mode 100644 index 00000000000..45845e8ec1b --- /dev/null +++ b/searchlib/src/tests/features/subqueries/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_subqueries_test_app + SOURCES + subqueries_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_subqueries_test_app COMMAND searchlib_subqueries_test_app) diff --git a/searchlib/src/tests/features/subqueries/subqueries_test.cpp b/searchlib/src/tests/features/subqueries/subqueries_test.cpp new file mode 100644 index 00000000000..160ec404b20 --- /dev/null +++ b/searchlib/src/tests/features/subqueries/subqueries_test.cpp @@ -0,0 +1,162 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector fooHandles; + std::vector barHandles; + RankFixture(size_t fooCnt, size_t barCnt, + std::string featureName = "subqueries(foo)") + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + fooHandles = addFields(fooCnt, indexEnv.getFieldByName("foo")->id()); + barHandles = addFields(barCnt, indexEnv.getFieldByName("bar")->id()); + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + std::vector addFields(size_t count, uint32_t fieldId) { + std::vector handles; + for (size_t i = 0; i < count; ++i) { + handles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(handles.back()); + queryEnv.getTerms().push_back(term); + } + return handles; + } + feature_t getSubqueries(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setSubqueries(TermFieldHandle handle, uint32_t docId, + uint64_t subqueries) { + rankProgram->match_data().resolveTermField(handle)->setSubqueries(docId, subqueries); + } + void setFooSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) { + ASSERT_LESS(i, fooHandles.size()); + setSubqueries(fooHandles[i], docId, subqueries); + } + void setBarSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) { + ASSERT_LESS(i, barHandles.size()); + setSubqueries(barHandles[i], docId, subqueries); + } +}; + +TEST_F("require that blueprint can be created from factory", + BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("subqueries"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", + SubqueriesBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"foo"})); +} + +TEST_FF("require that setup can be done on attribute field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"bar"})); +} + +TEST_FF("require that setup fails for unknown field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_FALSE(((Blueprint&)f1).setup(f2.indexEnv, {"unknown"})); +} + +TEST_F("require that not searching a field will give it 0 subqueries", + RankFixture(0, 3)) { + EXPECT_EQUAL(0, f1.getSubqueries(10)); +} + +TEST_F("require that subqueries can be obtained", RankFixture(1, 0)) { + f1.setFooSubqueries(0, 10, 0x1234); + EXPECT_EQUAL(0x1234, f1.getSubqueries(10)); +} + +TEST_F("require that msb subqueries can be obtained", + RankFixture(1, 0, "subqueries(foo).msb")) { + f1.setFooSubqueries(0, 10, 0x123412345678ULL); + EXPECT_EQUAL(0x1234, f1.getSubqueries(10)); +} + +TEST_F("require that multiple subqueries are accumulated", RankFixture(3, 0)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 10, 2); + f1.setFooSubqueries(2, 10, 4); + EXPECT_EQUAL(7, f1.getSubqueries(10)); +} + +TEST_F("require that stale subqueries are ignored", RankFixture(3, 0)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 9, 2); + f1.setFooSubqueries(2, 10, 4); + EXPECT_EQUAL(5, f1.getSubqueries(10)); +} + +TEST_F("require that subqueries from other fields are ignored", + RankFixture(2, 2)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 10, 2); + f1.setBarSubqueries(0, 10, 4); + f1.setBarSubqueries(1, 10, 8); + EXPECT_EQUAL(3, f1.getSubqueries(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor/.gitignore b/searchlib/src/tests/features/tensor/.gitignore new file mode 100644 index 00000000000..ae6d6dfb414 --- /dev/null +++ b/searchlib/src/tests/features/tensor/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_test_app diff --git a/searchlib/src/tests/features/tensor/CMakeLists.txt b/searchlib/src/tests/features/tensor/CMakeLists.txt new file mode 100644 index 00000000000..33f7d44d8fe --- /dev/null +++ b/searchlib/src/tests/features/tensor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_test_app + SOURCES + tensor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_test_app COMMAND searchlib_tensor_test_app) diff --git a/searchlib/src/tests/features/tensor/FILES b/searchlib/src/tests/features/tensor/FILES new file mode 100644 index 00000000000..6ece9b360b5 --- /dev/null +++ b/searchlib/src/tests/features/tensor/FILES @@ -0,0 +1 @@ +tensor_test.cpp diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp new file mode 100644 index 00000000000..caceea0f47b --- /dev/null +++ b/searchlib/src/tests/features/tensor/tensor_test.cpp @@ -0,0 +1,237 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::indexproperties; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::attribute::TensorAttribute; +using search::AttributeVector; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::DenseTensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; +using vespalib::tensor::TensorType; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +namespace +{ + +Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + vespalib::tensor::DefaultTensor::builder builder; + return TensorFactory::create(cells, dimensions, builder); +} + +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void addAttributeField(const vespalib::string &attrName) { + test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, attrName); + } + AttributeVector::SP createStringAttribute(const vespalib::string &attrName) { + addAttributeField(attrName); + return AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE)); + } + AttributeVector::SP createTensorAttribute(const vespalib::string &attrName, const vespalib::string &type) { + addAttributeField(attrName); + AVC config(AVBT::TENSOR, AVCT::SINGLE); + config.setTensorType(TensorType::fromSpec(type)); + return AttributeFactory::createAttribute(attrName, config); + } + void setAttributeTensorType(const vespalib::string &attrName, const vespalib::string &type) { + type::Attribute::set(test.getIndexEnv().getProperties(), attrName, type); + } + void setQueryTensorType(const vespalib::string &queryFeatureName, const vespalib::string &type) { + type::QueryFeature::set(test.getIndexEnv().getProperties(), queryFeatureName, type); + } + void setupAttributeVectors() { + std::vector attrs; + attrs.push_back(createTensorAttribute("tensorattr", "tensor(x{})")); + attrs.push_back(createStringAttribute("singlestr")); + attrs.push_back(createTensorAttribute("wrongtype", "tensor(y{})")); + addAttributeField("null"); + setAttributeTensorType("tensorattr", "tensor(x{})"); + setAttributeTensorType("wrongtype", "tensor(x{})"); + setAttributeTensorType("null", "tensor(x{})"); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(2); + attr->clearDoc(1); + attr->clearDoc(2); + attr->commit(); + test.getIndexEnv().getAttributeManager().add(attr); + } + + TensorAttribute *tensorAttr = + dynamic_cast(attrs[0].get()); + + tensorAttr->setTensor(1, *createTensor({ {{{"x", "a"}}, 3}, + {{{"x", "b"}}, 5}, + {{{"x", "c"}}, 7} }, + { "x" })); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setQueryTensor(const vespalib::string &tensorName, + const vespalib::string &tensorTypeSpec, + const TensorCells &cells, + const TensorDimensions &dimensions) + { + auto tensor = createTensor(cells, dimensions); + vespalib::nbostream stream; + vespalib::tensor::TypedBinaryFormat::serialize(stream, *tensor); + test.getQueryEnv().getProperties().add(tensorName, + vespalib::stringref(stream.peek(), stream.size())); + setQueryTensorType(tensorName, tensorTypeSpec); + } + + void setupQueryEnvironment() { + setQueryTensor("tensorquery", + "tensor(q{})", + { {{{"q", "d"}}, 11 }, + {{{"q", "e"}}, 13 }, + {{{"q", "f"}}, 17 } }, + { "q" }); + setQueryTensor("mappedtensorquery", + "tensor(x[2])", + { {{{"x", "0"},{"y", "0"}}, 11 }, + {{{"x", "0"},{"y", "1"}}, 13 }, + {{{"x", "1"},{"y", "0"}}, 17 } }, + { "x", "y" }); + setQueryTensorType("null", "tensor(q{})"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast(*value->get().as_tensor()); + } + const Tensor &execute(uint32_t docId = 1) { + test.executeOnly(docId); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +TEST_F("require that tensor attribute can be extracted as tensor in attribute feature", + ExecFixture("attribute(tensorattr)")) +{ + EXPECT_EQUAL(AsTensor("{ {x:b}:5, {x:c}:7, {x:a}:3 }"), f.execute()); +} + +TEST_F("require that tensor from query can be extracted as tensor in query feature", + ExecFixture("query(tensorquery)")) +{ + EXPECT_EQUAL(AsTensor("{ {q:f}:17, {q:d}:11, {q:e}:13 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("attribute(null)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if tensor type is wrong", + ExecFixture("attribute(wrongtype)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("query(null)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if document has no tensor", + ExecFixture("attribute(tensorattr)")) { + EXPECT_EQUAL(AsTensor("{ }"), f.execute(2)); +} + +struct AsDenseTensor { + Tensor::UP tensor; + explicit AsDenseTensor(const DenseTensorCells &cells) + : tensor(TensorFactory::createDense(cells)) + { + ASSERT_TRUE(!!tensor); + } + bool operator==(const Tensor &rhs) const { return tensor->equals(rhs); } +}; + + +std::ostream &operator<<(std::ostream &os, const AsDenseTensor &my_tensor) { + os << *my_tensor.tensor; + return os; +} + +TEST_F("require that tensor from query is mapped", + ExecFixture("query(mappedtensorquery)")) { + EXPECT_EQUAL(AsDenseTensor({ {{{"x", 0}}, 24}, + {{{"x", 1}}, 17} }), + f.execute()); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor_from_labels/.gitignore b/searchlib/src/tests/features/tensor_from_labels/.gitignore new file mode 100644 index 00000000000..0e241941ca3 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_from_labels_test_app diff --git a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt new file mode 100644 index 00000000000..db1814a0f66 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_from_labels_test_app + SOURCES + tensor_from_labels_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_from_labels_test_app COMMAND searchlib_tensor_from_labels_test_app) diff --git a/searchlib/src/tests/features/tensor_from_labels/FILES b/searchlib/src/tests/features/tensor_from_labels/FILES new file mode 100644 index 00000000000..daecb2bbf5b --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/FILES @@ -0,0 +1 @@ +tensor_from_labels_test.cpp diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp new file mode 100644 index 00000000000..b15ffb956ce --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp @@ -0,0 +1,211 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::Tensor; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + TensorFromLabelsBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromLabels")); +} + +TEST_F("require that setup fails if source spec is invalid", SetupFixture) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"), + StringList(), StringList().add("tensor")); +} + +TEST_F("require that setup succeeds with query source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"), + StringList(), StringList().add("tensor")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector attrs; + attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + StringAttribute *astr = static_cast(attrs[0].get()); + // Note that the weight parameter is not used + astr->append(1, "a", 0); + astr->append(1, "b", 0); + astr->append(1, "c", 0); + + IntegerAttribute *aint = static_cast(attrs[1].get()); + aint->append(1, 3, 0); + aint->append(1, 5, 0); + aint->append(1, 7, 0); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("astr_query", "[d e f]"); + test.getQueryEnv().getProperties().add("aint_query", "[11 13 17]"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast(*value->get().as_tensor()); + } + const Tensor &execute() { + test.executeOnly(); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +// Tests for attribute source: + +TEST_F("require that array string attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(attribute(astr))")) +{ + EXPECT_EQUAL(AsTensor("{ {astr:a}:1, {astr:b}:1, {astr:c}:1 }"), f.execute()); +} + +TEST_F("require that array string attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(attribute(astr),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:a}:1, {dim:b}:1, {dim:c}:1 }"), f.execute()); +} + +TEST_F("require that array integer attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(attribute(aint))")) +{ + EXPECT_EQUAL(AsTensor("{ {aint:7}:1, {aint:3}:1, {aint:5}:1 }"), f.execute()); +} + +TEST_F("require that array attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(attribute(aint),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:7}:1, {dim:3}:1, {dim:5}:1 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("tensorFromLabels(attribute(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute type is not supported", + ExecFixture("tensorFromLabels(attribute(wsstr))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + + +// Tests for query source: + +TEST_F("require that string array from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(query(astr_query))")) +{ + EXPECT_EQUAL(AsTensor("{ {astr_query:d}:1, {astr_query:e}:1, {astr_query:f}:1 }"), f.execute()); +} + +TEST_F("require that integer array from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(query(aint_query))")) +{ + EXPECT_EQUAL(AsTensor("{ {aint_query:13}:1, {aint_query:17}:1, {aint_query:11}:1 }"), f.execute()); +} + +TEST_F("require that string array from query can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(query(astr_query),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:d}:1, {dim:e}:1, {dim:f}:1 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("tensorFromLabels(query(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore new file mode 100644 index 00000000000..a56eade053e --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_from_weighted_set_test_app diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt new file mode 100644 index 00000000000..7c38b301679 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_from_weighted_set_test_app + SOURCES + tensor_from_weighted_set_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_from_weighted_set_test_app COMMAND searchlib_tensor_from_weighted_set_test_app) diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/FILES b/searchlib/src/tests/features/tensor_from_weighted_set/FILES new file mode 100644 index 00000000000..639a54230b1 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/FILES @@ -0,0 +1 @@ +tensor_from_weighted_set_test.cpp diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp new file mode 100644 index 00000000000..163fd5b5389 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp @@ -0,0 +1,198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::Tensor; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + TensorFromWeightedSetBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromWeightedSet")); +} + +TEST_F("require that setup fails if source spec is invalid", SetupFixture) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"), + StringList(), StringList().add("tensor")); +} + +TEST_F("require that setup succeeds with query source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"), + StringList(), StringList().add("tensor")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector attrs; + attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + StringAttribute *wsstr = static_cast(attrs[0].get()); + wsstr->append(1, "a", 3); + wsstr->append(1, "b", 5); + wsstr->append(1, "c", 7); + + IntegerAttribute *wsint = static_cast(attrs[1].get()); + wsint->append(1, 11, 3); + wsint->append(1, 13, 5); + wsint->append(1, 17, 7); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("wsquery", "{d:11,e:13,f:17}"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast(*value->get().as_tensor()); + } + const Tensor &execute() { + test.executeOnly(); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +TEST_F("require that weighted set string attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsstr))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsstr:b}:5, {wsstr:c}:7, {wsstr:a}:3 }"), f.execute()); +} + +TEST_F("require that weighted set string attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsstr),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:a}:3, {dim:b}:5, {dim:c}:7 }"), f.execute()); +} + +TEST_F("require that weighted set integer attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsint))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsint:13}:5, {wsint:17}:7, {wsint:11}:3 }"), f.execute()); +} + +TEST_F("require that weighted set integer attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsint),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:17}:7, {dim:11}:3, {dim:13}:5 }"), f.execute()); +} + +TEST_F("require that weighted set from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(query(wsquery))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsquery:f}:17, {wsquery:d}:11, {wsquery:e}:13 }"), f.execute()); +} + +TEST_F("require that weighted set from query can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(query(wsquery),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:d}:11, {dim:e}:13, {dim:f}:17 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("tensorFromWeightedSet(attribute(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute type is not supported", + ExecFixture("tensorFromWeightedSet(attribute(astr))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("tensorFromWeightedSet(query(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/text_similarity_feature/.gitignore b/searchlib/src/tests/features/text_similarity_feature/.gitignore new file mode 100644 index 00000000000..9ffa5b46a43 --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/.gitignore @@ -0,0 +1 @@ +searchlib_text_similarity_feature_test_app diff --git a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt new file mode 100644 index 00000000000..e0cb043c8f1 --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_text_similarity_feature_test_app + SOURCES + text_similarity_feature_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_text_similarity_feature_test_app COMMAND searchlib_text_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/text_similarity_feature/FILES b/searchlib/src/tests/features/text_similarity_feature/FILES new file mode 100644 index 00000000000..dfa5173742d --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/FILES @@ -0,0 +1 @@ +text_similarity_feature_test.cpp diff --git a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp new file mode 100644 index 00000000000..6a6b9d0a48e --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +std::vector featureNamesFoo() { + std::vector f; + f.push_back("textSimilarity(foo).score"); + f.push_back("textSimilarity(foo).proximity"); + f.push_back("textSimilarity(foo).order"); + f.push_back("textSimilarity(foo).queryCoverage"); + f.push_back("textSimilarity(foo).fieldCoverage"); + return f; +} + +const size_t SCORE = 0; +const size_t PROXIMITY = 1; +const size_t ORDER = 2; +const size_t QUERY = 3; +const size_t FIELD = 4; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "bar"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector expect; + size_t dumped; + virtual void visitDumpFeature(const vespalib::string &name) { + EXPECT_LESS(dumped, expect.size()); + EXPECT_EQUAL(expect[dumped++], name); + } + FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {} +}; + +struct RankFixture : BlueprintFactoryFixture { + RankFixture() : BlueprintFactoryFixture() {} + double get_feature(const vespalib::string &query, const FtIndex &index, size_t select, + bool useStaleMatchData = false) + { + std::vector names = featureNamesFoo(); + ASSERT_TRUE(names.size() == 5u); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1)); + return actual.getScore(names[select]); + } +}; + +double prox(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); +} + +double comb(std::initializer_list values) { + double sum = 0.0; + for (double value: values) { + sum += value; + } + return (sum/values.size()); +} + +double mix(double proximity, double order, double query, double field) { + return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field); +} + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("textSimilarity"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", TextSimilarityBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_EQUAL(f3.expect.size(), f3.dumped); +} + +TEST_FF("require that setup can be done on single value index field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "foo"))); +} + +TEST_FF("require that setup can not be done on weighted set index field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "bar"))); +} + +TEST_FF("require that setup can not be done on single value attribute field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(baz)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector(1, "baz"))); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), SCORE)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD)); +} + +TEST_F("require that minal perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), SCORE)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD)); +} + +TEST_F("require that larger perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), SCORE)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD)); +} + +TEST_F("require that extra query terms reduces order but not proximity", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY)); + + EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER)); +} + +TEST_F("require that extra field terms reduces proximity but not order", RankFixture) { + EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY)); + EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY)); + EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY)); + + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER)); +} + +TEST_F("require that proximity acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY)); +} + +TEST_F("require that field order does not affect proximity score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY)); +} + +TEST_F("require that order score acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER)); +} + +TEST_F("require that proximity does not affect order score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER)); +} + +TEST_F("require that query coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY)); +} + +TEST_F("require that field coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD)); +} + +TEST_F("require that first unique match is used per query term", RankFixture) { + EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD)); + + EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD)); +} + +TEST_F("require that overall score combines individual signals appropriately", RankFixture) { + EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY)); + EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD)); + EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), SCORE)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/util/.gitignore b/searchlib/src/tests/features/util/.gitignore new file mode 100644 index 00000000000..14e50fdaf47 --- /dev/null +++ b/searchlib/src/tests/features/util/.gitignore @@ -0,0 +1 @@ +searchlib_util_test_app diff --git a/searchlib/src/tests/features/util/CMakeLists.txt b/searchlib/src/tests/features/util/CMakeLists.txt new file mode 100644 index 00000000000..95a0bf3b45d --- /dev/null +++ b/searchlib/src/tests/features/util/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_util_test_app + SOURCES + util_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_util_test_app COMMAND searchlib_util_test_app) diff --git a/searchlib/src/tests/features/util/FILES b/searchlib/src/tests/features/util/FILES new file mode 100644 index 00000000000..f0bd0a06305 --- /dev/null +++ b/searchlib/src/tests/features/util/FILES @@ -0,0 +1 @@ +util_test.cpp diff --git a/searchlib/src/tests/features/util/util_test.cpp b/searchlib/src/tests/features/util/util_test.cpp new file mode 100644 index 00000000000..d2f97631d0f --- /dev/null +++ b/searchlib/src/tests/features/util/util_test.cpp @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +using namespace search; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +SimpleTermData make_term(uint32_t uid) { + SimpleTermData term; + term.setUniqueId(uid); + return term; +} + +struct TermLabelFixture { + IndexEnvironment indexEnv; + QueryEnvironment queryEnv; + TermLabelFixture() : indexEnv(), queryEnv(&indexEnv) { + queryEnv.getTerms().push_back(make_term(5)); + queryEnv.getTerms().push_back(make_term(0)); + queryEnv.getTerms().push_back(make_term(10)); + queryEnv.getProperties().add("vespa.label.foo.id", "5"); + queryEnv.getProperties().add("vespa.label.bar.id", "0"); // undefined uid + queryEnv.getProperties().add("vespa.label.baz.id", "10"); + queryEnv.getProperties().add("vespa.label.fox.id", "7"); // non-existing + } +}; + +TEST_F("require that label can be mapped to term", TermLabelFixture) { + EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[0], util::getTermByLabel(f1.queryEnv, "foo")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "bar")); + EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[2], util::getTermByLabel(f1.queryEnv, "baz")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "fox")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "unknown")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/.gitignore b/searchlib/src/tests/fef/.gitignore new file mode 100644 index 00000000000..ff604ccaf00 --- /dev/null +++ b/searchlib/src/tests/fef/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +fef_test +searchlib_fef_test_app diff --git a/searchlib/src/tests/fef/CMakeLists.txt b/searchlib/src/tests/fef/CMakeLists.txt new file mode 100644 index 00000000000..a239ba972c3 --- /dev/null +++ b/searchlib/src/tests/fef/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fef_test_app + SOURCES + fef_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fef_test_app COMMAND searchlib_fef_test_app) diff --git a/searchlib/src/tests/fef/DESC b/searchlib/src/tests/fef/DESC new file mode 100644 index 00000000000..431ee7a1a1f --- /dev/null +++ b/searchlib/src/tests/fef/DESC @@ -0,0 +1 @@ +fef test. Take a look at fef.cpp for details. diff --git a/searchlib/src/tests/fef/FILES b/searchlib/src/tests/fef/FILES new file mode 100644 index 00000000000..7e6752e501e --- /dev/null +++ b/searchlib/src/tests/fef/FILES @@ -0,0 +1 @@ +fef.cpp diff --git a/searchlib/src/tests/fef/attributecontent/.gitignore b/searchlib/src/tests/fef/attributecontent/.gitignore new file mode 100644 index 00000000000..dd57ee57362 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +attributecontent_test +searchlib_attributecontent_test_app diff --git a/searchlib/src/tests/fef/attributecontent/CMakeLists.txt b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt new file mode 100644 index 00000000000..84cdb3d4fce --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributecontent_test_app + SOURCES + attributecontent_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributecontent_test_app COMMAND searchlib_attributecontent_test_app) diff --git a/searchlib/src/tests/fef/attributecontent/DESC b/searchlib/src/tests/fef/attributecontent/DESC new file mode 100644 index 00000000000..fa1c457c573 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/DESC @@ -0,0 +1 @@ +attributecontent test. Take a look at attributecontent.cpp for details. diff --git a/searchlib/src/tests/fef/attributecontent/FILES b/searchlib/src/tests/fef/attributecontent/FILES new file mode 100644 index 00000000000..4325e907b45 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/FILES @@ -0,0 +1 @@ +attributecontent.cpp diff --git a/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp new file mode 100644 index 00000000000..66430994016 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("attributecontent_test"); +#include +#include +#include +#include + +#include + +using namespace search::attribute; + +namespace search { +namespace fef { + +class Test : public vespalib::TestApp { +private: + void testWriteAndRead(); + void testFill(); + +public: + int Main(); +}; + +void +Test::testWriteAndRead() +{ + typedef search::attribute::AttributeContent UintContent; + UintContent buf; + EXPECT_EQUAL(buf.capacity(), 16u); + EXPECT_EQUAL(buf.size(), 0u); + + uint32_t i; + uint32_t * data; + const uint32_t * itr; + for (i = 0, data = buf.data(); i < 16; ++i, ++data) { + *data = i; + } + buf.setSize(16); + EXPECT_EQUAL(buf.size(), 16u); + for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) { + EXPECT_EQUAL(*itr, i); + EXPECT_EQUAL(buf[i], i); + } + EXPECT_EQUAL(i, 16u); + + buf.allocate(10); + EXPECT_EQUAL(buf.capacity(), 16u); + EXPECT_EQUAL(buf.size(), 16u); + buf.allocate(32); + EXPECT_EQUAL(buf.capacity(), 32u); + EXPECT_EQUAL(buf.size(), 0u); + + for (i = 0, data = buf.data(); i < 32; ++i, ++data) { + *data = i; + } + buf.setSize(32); + EXPECT_EQUAL(buf.size(), 32u); + for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) { + EXPECT_EQUAL(*itr, i); + EXPECT_EQUAL(buf[i], i); + } + EXPECT_EQUAL(i, 32u); +} + +void +Test::testFill() +{ + Config cfg(BasicType::INT32, CollectionType::ARRAY); + AttributeVector::SP av = AttributeFactory::createAttribute("aint32", cfg); + av->addDocs(2); + IntegerAttribute * ia = static_cast(av.get()); + ia->append(0, 10, 0); + ia->append(1, 20, 0); + ia->append(1, 30, 0); + av->commit(); + const IAttributeVector & iav = *av.get(); + IntegerContent buf; + buf.fill(iav, 0); + EXPECT_EQUAL(1u, buf.size()); + EXPECT_EQUAL(10, buf[0]); + buf.fill(iav, 1); + EXPECT_EQUAL(2u, buf.size()); + EXPECT_EQUAL(20, buf[0]); + EXPECT_EQUAL(30, buf[1]); + buf.fill(iav, 0); + EXPECT_EQUAL(1u, buf.size()); + EXPECT_EQUAL(10, buf[0]); +} + +int +Test::Main() +{ + TEST_INIT("attributecontent_test"); + + testWriteAndRead(); + testFill(); + + TEST_DONE(); +} + +} // namespace fef +} // namespace search + +TEST_APPHOOK(search::fef::Test); diff --git a/searchlib/src/tests/fef/featurenamebuilder/.gitignore b/searchlib/src/tests/fef/featurenamebuilder/.gitignore new file mode 100644 index 00000000000..781f49956a9 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featurenamebuilder_test +searchlib_featurenamebuilder_test_app diff --git a/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt new file mode 100644 index 00000000000..167642c1337 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featurenamebuilder_test_app + SOURCES + featurenamebuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurenamebuilder_test_app COMMAND searchlib_featurenamebuilder_test_app) diff --git a/searchlib/src/tests/fef/featurenamebuilder/DESC b/searchlib/src/tests/fef/featurenamebuilder/DESC new file mode 100644 index 00000000000..38abf1af794 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/DESC @@ -0,0 +1 @@ +featurenamebuilder test. Take a look at featurenamebuilder.cpp for details. diff --git a/searchlib/src/tests/fef/featurenamebuilder/FILES b/searchlib/src/tests/fef/featurenamebuilder/FILES new file mode 100644 index 00000000000..71df1d1033f --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/FILES @@ -0,0 +1 @@ +featurenamebuilder.cpp diff --git a/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp new file mode 100644 index 00000000000..0e574c776b5 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("featurenamebuilder_test"); +#include +#include + +using namespace search::fef; + +typedef FeatureNameBuilder B; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("featurenamebuilder_test"); + + // normal cases + EXPECT_EQUAL(B().baseName("foo").buildName(), "foo"); + EXPECT_EQUAL(B().baseName("foo").output("out").buildName(), "foo.out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").buildName(), "foo(a,b)"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out"); + + // empty base = empty name + EXPECT_EQUAL(B().baseName("").buildName(), ""); + EXPECT_EQUAL(B().baseName("").output("out").buildName(), ""); + EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").buildName(), ""); + EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").output("out").buildName(), ""); + + // quoting + EXPECT_EQUAL(B().baseName("foo").parameter("a,b").output("out").buildName(), "foo(\"a,b\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a\\").output("out").buildName(), "foo(\"a\\\\\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a)").output("out").buildName(), "foo(\"a)\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter(" ").output("out").buildName(), "foo(\" \").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\"").output("out").buildName(), "foo(\"\\\"\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x15").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f\\x15\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x20").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f \").out"); + + // empty parameters + EXPECT_EQUAL(B().baseName("foo").parameter("").output("out").buildName(), "foo().out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").output("out").buildName(), "foo(,).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").parameter("").output("out").buildName(), "foo(,,).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("x").parameter("").output("out").buildName(), "foo(,x,).out"); + + // test change components + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").baseName("bar").buildName(), "bar(a,b).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().buildName(), "foo.out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().parameter("x").buildName(), "foo(x).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("").buildName(), "foo(a,b)"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("len").buildName(), "foo(a,b).len"); + + // test exact quote vs non-quote + EXPECT_EQUAL(B().baseName("foo").parameter("a").buildName(), "foo(a)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a").buildName(), "foo(\" a\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("a.out").buildName(), "foo(a.out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a.out").buildName(), "foo(\" a.out\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b)").buildName(), "foo(bar(a,b))"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b)").buildName(), "foo(\"bar(a, b)\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b).out").buildName(), "foo(bar(a,b).out)"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b).out").buildName(), "foo(\"bar(a, b).out\")"); + + // test non-exact quote vs non-quote + EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\f", false).buildName(), "foo()"); + EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\fbar ", false).buildName(), "foo(bar)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ", false).buildName(), "foo(bar)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a b ", false).buildName(), "foo(\" a b \")"); + EXPECT_EQUAL(B().baseName("foo").parameter("a%", false).buildName(), "foo(\"a%\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("foo\"\\", false).buildName(), "foo(\"foo\\\"\\\\\")"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a . out ", false).buildName(), "foo(a.out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) ", false).buildName(), "foo(bar(a,b))"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out ", false).buildName(), "foo(bar(a,b).out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out.2 ", false).buildName(), "foo(bar(a,b).out.2)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out . 2 ", false).buildName(), "foo(\" bar ( a , b ) . out . 2 \")"); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/fef/featurenameparser/.gitignore b/searchlib/src/tests/fef/featurenameparser/.gitignore new file mode 100644 index 00000000000..f16080e9791 --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featurenameparser_test +searchlib_featurenameparser_test_app diff --git a/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt new file mode 100644 index 00000000000..e313ee24deb --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featurenameparser_test_app + SOURCES + featurenameparser_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurenameparser_test_app COMMAND searchlib_featurenameparser_test_app) diff --git a/searchlib/src/tests/fef/featurenameparser/DESC b/searchlib/src/tests/fef/featurenameparser/DESC new file mode 100644 index 00000000000..4c3da4e47a2 --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/DESC @@ -0,0 +1 @@ +featurenameparser test. Take a look at featurenameparser.cpp for details. diff --git a/searchlib/src/tests/fef/featurenameparser/FILES b/searchlib/src/tests/fef/featurenameparser/FILES new file mode 100644 index 00000000000..4567d5b7ccc --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/FILES @@ -0,0 +1 @@ +featurenameparser.cpp diff --git a/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp new file mode 100644 index 00000000000..2824f5ef8fc --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("featurenameparser_test"); +#include +#include +#include +#include + +using namespace search::fef; + +struct ParamList { + std::vector list; + ParamList() : list() {} + ParamList(const std::vector &l) : list(l) {} + ParamList &add(const vespalib::string &str) { + list.push_back(str); + return *this; + } + bool operator==(const ParamList &rhs) const { + return rhs.list == list; + } +}; + +std::ostream &operator<<(std::ostream &os, const ParamList &pl) { + os << std::endl; + for (uint32_t i = 0; i < pl.list.size(); ++i) { + os << " " << pl.list[i] << std::endl; + } + return os; +} + +class Test : public vespalib::TestApp +{ +public: + bool testParse(const vespalib::string &input, bool valid, + const vespalib::string &base, ParamList pl, + const vespalib::string &output); + void testFile(const vespalib::string &name); + int Main(); +}; + +bool +Test::testParse(const vespalib::string &input, bool valid, + const vespalib::string &base, ParamList pl, + const vespalib::string &output) +{ + bool ok = true; + FeatureNameParser parser(input); + if (!parser.valid()) { + LOG(warning, "parse error: input:'%s', rest:'%s'", + input.c_str(), input.substr(parser.parsedBytes()).c_str()); + } + ok &= EXPECT_EQUAL(parser.valid(), valid); + ok &= EXPECT_EQUAL(parser.baseName(), base); + ok &= EXPECT_EQUAL(ParamList(parser.parameters()), pl); + ok &= EXPECT_EQUAL(parser.output(), output); + return ok; +} + +void +Test::testFile(const vespalib::string &name) +{ + char buf[4096]; + uint32_t lineN = 0; + FILE *f = fopen(name.c_str(), "r"); + ASSERT_TRUE(f != 0); + while (fgets(buf, sizeof(buf), f) != NULL) { + ++lineN; + vespalib::string line(buf); + if (*line.rbegin() == '\n') { + line.resize(line.size() - 1); + } + if (line.empty() || line[0] == '#') { + continue; + } + uint32_t idx = line.find("<=>"); + if (!EXPECT_TRUE(idx < line.size())) { + LOG(error, "(%s:%u): malformed line: '%s'", + name.c_str(), lineN, line.c_str()); + } else { + vespalib::string input = line.substr(0, idx); + vespalib::string expect = line.substr(idx + strlen("<=>")); + if (!EXPECT_EQUAL(FeatureNameParser(input).featureName(), expect)) { + LOG(error, "(%s:%u): test failed: '%s'", + name.c_str(), lineN, line.c_str()); + } + } + } + ASSERT_TRUE(!ferror(f)); + fclose(f); +} + +int +Test::Main() +{ + TEST_INIT("featurenameparser_test"); + + // normal cases + EXPECT_TRUE(testParse("foo", true, "foo", ParamList(), "")); + EXPECT_TRUE(testParse("foo.out", true, "foo", ParamList(), "out")); + EXPECT_TRUE(testParse("foo(a)", true, "foo", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo(a,b)", true, "foo", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo(a,b).out", true, "foo", ParamList().add("a").add("b"), "out")); + + // @ in feature name (for macros) + EXPECT_TRUE(testParse("foo@", true, "foo@", ParamList(), "")); + EXPECT_TRUE(testParse("foo@.out", true, "foo@", ParamList(), "out")); + EXPECT_TRUE(testParse("foo@(a)", true, "foo@", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo@(a,b)", true, "foo@", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo@(a,b).out", true, "foo@", ParamList().add("a").add("b"), "out")); + + // $ in feature name (for macros) + EXPECT_TRUE(testParse("foo$", true, "foo$", ParamList(), "")); + EXPECT_TRUE(testParse("foo$.out", true, "foo$", ParamList(), "out")); + EXPECT_TRUE(testParse("foo$(a)", true, "foo$", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo$(a,b)", true, "foo$", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo$(a,b).out", true, "foo$", ParamList().add("a").add("b"), "out")); + + // de-quoting of parameters + EXPECT_TRUE(testParse("foo(a,\"b\")", true, "foo", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo(a,\" b \")", true, "foo", ParamList().add("a").add(" b "), "")); + EXPECT_TRUE(testParse("foo( \"a\" , \" b \" )", true, "foo", ParamList().add("a").add(" b "), "")); + EXPECT_TRUE(testParse("foo(\"\\\"\\\\\\t\\n\\r\\f\\x20\")", true, "foo", ParamList().add("\"\\\t\n\r\f "), "")); + + // only default output if '.' not specified + EXPECT_TRUE(testParse("foo.", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,b).", false, "", ParamList(), "")); + + // string cannot end in parameter list + EXPECT_TRUE(testParse("foo(", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a\\", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a\\)", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,b", false, "", ParamList(), "")); + + // empty parameters + EXPECT_TRUE(testParse("foo()", true, "foo", ParamList().add(""), "")); + EXPECT_TRUE(testParse("foo(,)", true, "foo", ParamList().add("").add(""), "")); + EXPECT_TRUE(testParse("foo(,,)", true, "foo", ParamList().add("").add("").add(""), "")); + EXPECT_TRUE(testParse("foo(,x,)", true, "foo", ParamList().add("").add("x").add(""), "")); + EXPECT_TRUE(testParse("foo( )", true, "foo", ParamList().add(""), "")); + EXPECT_TRUE(testParse("foo( , , )", true, "foo", ParamList().add("").add("").add(""), "")); + EXPECT_TRUE(testParse("foo( \t , \n , \r , \f )", true, "foo", ParamList().add("").add("").add("").add(""), "")); + + testFile("parsetest.txt"); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fef/featurenameparser/parsetest.txt b/searchlib/src/tests/fef/featurenameparser/parsetest.txt new file mode 100644 index 00000000000..ce9db595eca --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/parsetest.txt @@ -0,0 +1,55 @@ +# This file is used to test feature name parsing. The file format is +# as follows: Empty lines and lines starting with '#' will be +# ignored. Other lines must be on the form +# "'<=>'". The parser will be run on the +# input, and the normalized feature name will be compared to the +# expected output. If they match the test passes, if they don't match +# the test fails. The normalized feature name in the case of a parse +# error is the empty string. When parsing this file, no whitespace +# skipping is allowed inside the input or the expected output. To +# simplify things, the byte sequence '<=>' may not be used anywhere +# else than as a separator between the input and the expected +# output. Malformed lines will result in a failed test. + +# basic normalization + foo . out <=>foo.out + foo ( a , b ) . out <=>foo(a,b).out + foo ( a , b , "") . out <=>foo(a,b,).out + foo ( bar ( a ) , b , "") . out <=>foo(bar(a),b,).out + +# basic parse errors +<=> + <=> +foo(<=> +foo(,<=> +foo().<=> +foo(a b)<=> +foo(bar(a b))<=> +foo . a . b<=> + +#quoting +foo("a b")<=>foo("a b") +foo(bar("a b"))<=>foo(bar("a b")) +foo("\"bar\"")<=>foo("\"bar\"") +foo( "bar(x)" )<=>foo(bar(x)) +foo( "bar( x )" )<=>foo("bar( x )") +foo("xyz")<=>foo(xyz) +foo("\\\t\n\r\f\x10")<=>foo("\\\t\n\r\f\x10") +foo("\y")<=> +foo("\x05")<=>foo("\x05") +foo("\x00")<=> +foo("\")<=> +foo("abc<=> +foo("\x5")<=> +foo("\x31\x32\x33")<=>foo(123) + +# my current favorite pair :) +foo("bar(\"x\")")<=>foo("bar(\"x\")") +foo("bar(\"x \")")<=>foo(bar("x ")) + +# might want to disallow non-printables inside quotes... +foo(" ")<=>foo("\t") + +#some more fancy normalization tests + foo ( a , b ) . out <=>foo(a,b).out + foo ( "", bar ( baz ( a, "" ) , "" ) , b , " ") . out <=>foo(,bar(baz(a,),),b," ").out diff --git a/searchlib/src/tests/fef/featureoverride/.gitignore b/searchlib/src/tests/fef/featureoverride/.gitignore new file mode 100644 index 00000000000..35285582ceb --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featureoverride_test +searchlib_featureoverride_test_app diff --git a/searchlib/src/tests/fef/featureoverride/CMakeLists.txt b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt new file mode 100644 index 00000000000..23370d51d22 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featureoverride_test_app + SOURCES + featureoverride.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featureoverride_test_app COMMAND searchlib_featureoverride_test_app) diff --git a/searchlib/src/tests/fef/featureoverride/DESC b/searchlib/src/tests/fef/featureoverride/DESC new file mode 100644 index 00000000000..1605959dae6 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/DESC @@ -0,0 +1 @@ +featureoverride test. Take a look at featureoverride.cpp for details. diff --git a/searchlib/src/tests/fef/featureoverride/FILES b/searchlib/src/tests/fef/featureoverride/FILES new file mode 100644 index 00000000000..864ca65657a --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/FILES @@ -0,0 +1 @@ +featureoverride.cpp diff --git a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp new file mode 100644 index 00000000000..b0929f50fa9 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("featureoverride_test"); +#include +#include + +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::feature_t; + +typedef FeatureExecutor::LP FESP; +typedef Blueprint::SP BPSP; + +struct Fixture +{ + MatchDataLayout mdl; + std::vector executors; + MatchData::UP md; + Fixture() : mdl(), executors(), md() {} + Fixture &add(FeatureExecutor::LP &executor, size_t outCnt) { + executor->inputs_done(); + for (uint32_t outIdx = 0; outIdx < outCnt; ++outIdx) { + executor->bindOutput(mdl.allocFeature()); + } + executor->outputs_done(); + executors.push_back(executor); + return *this; + } + Fixture &run() { + md = mdl.createMatchData(); + for (const auto &executor : executors) { + executor->execute(*md); + } + return *this; + } + feature_t resolveFeature(FeatureHandle handle) { + return *md->resolveFeature(handle); + } + FESP createValueExecutor() { + std::vector values; + values.push_back(1.0); + values.push_back(2.0); + values.push_back(3.0); + return FESP(new ValueExecutor(values)); + } +}; + +TEST_F("test decorator - single override", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1, 50.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); +} + +TEST_F("test decorator - multiple overrides", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 0, 50.0)); + fe = FESP(new FeatureOverrider(fe, 2, 100.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 100.0); +} + +TEST_F("test decorator - non-existing override", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1000, 50.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); +} + +TEST_F("test decorator - transitive override", Fixture) +{ + FeatureExecutor::SharedInputs inputs; + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1, 50.0)); + f.add(fe, 3); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + FESP fe2 = FESP(new DoubleExecutor(3)); + fe2->bind_shared_inputs(inputs); + fe2->addInput(fe->outputs()[0]); + fe2->addInput(fe->outputs()[1]); + fe2->addInput(fe->outputs()[2]); + fe2 = FESP(new FeatureOverrider(fe2, 2, 10.0)); + f.add(fe2, 3).run(); + EXPECT_EQUAL(fe2->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[0]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[1]), 100.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[2]), 10.0); +} + +TEST("test overrides") +{ + BlueprintFactory bf; + bf.addPrototype(BPSP(new ValueBlueprint())); + bf.addPrototype(BPSP(new DoubleBlueprint())); + bf.addPrototype(BPSP(new SumBlueprint())); + + IndexEnvironment idxEnv; + RankSetup rs(bf, idxEnv); + + rs.addDumpFeature("value(1,2,3)"); + rs.addDumpFeature("double(value(1))"); + rs.addDumpFeature("double(value(2))"); + rs.addDumpFeature("double(value(3))"); + rs.addDumpFeature("mysum(value(2),value(2))"); + rs.addDumpFeature("mysum(value(1),value(2),value(3))"); + EXPECT_TRUE(rs.compile()); + + RankProgram::UP rankProgram = rs.create_dump_program(); + + MatchDataLayout mdl; + QueryEnvironment queryEnv; + Properties overrides; + + overrides.add("value(2)", "20.0"); + overrides.add("value(1,2,3).1", "4.0"); + overrides.add("value(1,2,3).2", "6.0"); + overrides.add("bogus(feature)", "10.0"); + + rankProgram->setup(mdl, queryEnv, overrides); + rankProgram->run(2); + + std::map res = Utils::getAllFeatures(*rankProgram); + + EXPECT_EQUAL(res.size(), 20u); + EXPECT_APPROX(res["value(1)"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1).0"], 1.0, 1e-6); + EXPECT_APPROX(res["value(2)"], 20.0, 1e-6); + EXPECT_APPROX(res["value(2).0"], 20.0, 1e-6); + EXPECT_APPROX(res["value(3)"], 3.0, 1e-6); + EXPECT_APPROX(res["value(3).0"], 3.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3)"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).0"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).1"], 4.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).2"], 6.0, 1e-6); + EXPECT_APPROX(res["mysum(value(2),value(2))"], 40.0, 1e-6); + EXPECT_APPROX(res["mysum(value(2),value(2)).out"], 40.0, 1e-6); + EXPECT_APPROX(res["mysum(value(1),value(2),value(3))"], 24.0, 1e-6); + EXPECT_APPROX(res["mysum(value(1),value(2),value(3)).out"], 24.0, 1e-6); + EXPECT_APPROX(res["double(value(1))"], 2.0, 1e-6); + EXPECT_APPROX(res["double(value(1)).0"], 2.0, 1e-6); + EXPECT_APPROX(res["double(value(2))"], 40.0, 1e-6); + EXPECT_APPROX(res["double(value(2)).0"], 40.0, 1e-6); + EXPECT_APPROX(res["double(value(3))"], 6.0, 1e-6); + EXPECT_APPROX(res["double(value(3)).0"], 6.0, 1e-6); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/fef_test.cpp b/searchlib/src/tests/fef/fef_test.cpp new file mode 100644 index 00000000000..b3107e57fae --- /dev/null +++ b/searchlib/src/tests/fef/fef_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fef_test"); +#include +#include +#include + +using namespace search::fef; +using std::shared_ptr; +using search::feature_t; + +class Test : public vespalib::TestApp +{ +public: + void testLayout(); + void testObjectStore(); + void testTermFieldMatchDataAppend(); + int Main(); +}; + +void +Test::testLayout() +{ + { + TermFieldMatchData tmd; + EXPECT_EQUAL(IllegalFieldId, tmd.getFieldId()); + EXPECT_EQUAL(TermFieldMatchData::invalidId(), tmd.getDocId()); + } + MatchDataLayout mdl; + EXPECT_EQUAL(mdl.allocTermField(0), 0u); + EXPECT_EQUAL(mdl.allocTermField(42), 1u); + EXPECT_EQUAL(mdl.allocTermField(IllegalFieldId), 2u); + EXPECT_EQUAL(mdl.allocFeature(), 0u); + EXPECT_EQUAL(mdl.allocFeature(), 1u); + EXPECT_EQUAL(mdl.allocFeature(), 2u); + + MatchData::UP md = mdl.createMatchData(); + EXPECT_EQUAL(TermFieldMatchData::invalidId(), md->getDocId()); + EXPECT_EQUAL(md->getNumTermFields(), 3u); + EXPECT_EQUAL(md->getNumFeatures(), 3u); + TermFieldMatchData *t0 = md->resolveTermField(0); + TermFieldMatchData *t1 = md->resolveTermField(1); + TermFieldMatchData *t2 = md->resolveTermField(2); + EXPECT_EQUAL(t1, t0 + 1); + EXPECT_EQUAL(t2, t1 + 1); + EXPECT_EQUAL(0u, t0->getFieldId()); + EXPECT_EQUAL(42u, t1->getFieldId()); + EXPECT_EQUAL(IllegalFieldId, t2->getFieldId()); + feature_t *f0 = md->resolveFeature(0); + feature_t *f1 = md->resolveFeature(1); + feature_t *f2 = md->resolveFeature(2); + EXPECT_EQUAL(f1, f0 + 1); + EXPECT_EQUAL(f2, f1 + 1); + EXPECT_TRUE((void*)t2 < (void*)f0 || (void*)f2 < (void*)t0); +} + +void +Test::testObjectStore() +{ + ObjectStore s; + class Object : public Anything { + }; + Anything::UP u1(new Object()); + Anything::UP u11(new Object()); + Anything::UP u2(new Object()); + const Anything * o1(u1.get()); + const Anything * o11(u11.get()); + const Anything * o2(u2.get()); + EXPECT_TRUE(nullptr == s.get("a")); + s.add("a", std::move(u1)); + EXPECT_EQUAL(o1, s.get("a")); + EXPECT_TRUE(nullptr == s.get("b")); + s.add("b", std::move(u2)); + EXPECT_EQUAL(o1, s.get("a")); + EXPECT_EQUAL(o2, s.get("b")); + s.add("a", std::move(u11)); + EXPECT_EQUAL(o11, s.get("a")); +} + +void +Test::testTermFieldMatchDataAppend() +{ + TermFieldMatchData tmd; + EXPECT_EQUAL(0u, tmd.size()); + EXPECT_EQUAL(1u, tmd.capacity()); + TermFieldMatchDataPosition pos; + tmd.appendPosition(pos); + EXPECT_EQUAL(1u, tmd.size()); + EXPECT_EQUAL(1u, tmd.capacity()); + tmd.appendPosition(pos); + EXPECT_EQUAL(2u, tmd.size()); + EXPECT_EQUAL(2u, tmd.capacity()); + for (size_t i(2); i < std::numeric_limits::max(); i++) { + EXPECT_EQUAL(i, tmd.size()); + EXPECT_EQUAL(std::min(size_t(std::numeric_limits::max()), vespalib::roundUp2inN(i)), tmd.capacity()); + tmd.appendPosition(pos); + } + EXPECT_EQUAL(std::numeric_limits::max(), tmd.size()); + EXPECT_EQUAL(std::numeric_limits::max(), tmd.capacity()); + tmd.appendPosition(pos); + EXPECT_EQUAL(std::numeric_limits::max(), tmd.size()); + EXPECT_EQUAL(std::numeric_limits::max(), tmd.capacity()); +} + +int +Test::Main() +{ + TEST_INIT("fef_test"); + testLayout(); + testObjectStore(); + testTermFieldMatchDataAppend(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fef/object_passing/.gitignore b/searchlib/src/tests/fef/object_passing/.gitignore new file mode 100644 index 00000000000..64b250201a8 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/.gitignore @@ -0,0 +1 @@ +searchlib_object_passing_test_app diff --git a/searchlib/src/tests/fef/object_passing/CMakeLists.txt b/searchlib/src/tests/fef/object_passing/CMakeLists.txt new file mode 100644 index 00000000000..2334711f015 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_object_passing_test_app + SOURCES + object_passing_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_object_passing_test_app COMMAND searchlib_object_passing_test_app) diff --git a/searchlib/src/tests/fef/object_passing/object_passing_test.cpp b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp new file mode 100644 index 00000000000..69c681d8f60 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using vespalib::eval::ValueType; + +struct ProxyExecutor : FeatureExecutor { + double number_value; + vespalib::eval::Value::UP object_value; + ProxyExecutor() : number_value(0.0), object_value() {} + bool isPure() override { return true; } + void execute(search::fef::MatchData &md) override { + double was_object = 0.0; + if (md.feature_is_object(inputs()[0])) { + was_object = 1.0; + number_value = md.resolve_object_feature(inputs()[0])->get().as_double(); + object_value.reset(new vespalib::eval::DoubleValue(number_value)); + } else { + number_value = *md.resolveFeature(inputs()[0]); + object_value.reset(new vespalib::eval::DoubleValue(number_value)); + } + if (md.feature_is_object(outputs()[0])) { + *md.resolve_object_feature(outputs()[0]) = *object_value; + } else { + *md.resolveFeature(outputs()[0]) = number_value; + } + *md.resolveFeature(outputs()[1]) = was_object; + } +}; + +struct ProxyBlueprint : Blueprint { + vespalib::string name; + AcceptInput accept_input; + bool object_output; + ProxyBlueprint(const vespalib::string &name_in, AcceptInput accept_input_in, bool object_output_in) + : Blueprint(name_in), name(name_in), accept_input(accept_input_in), object_output(object_output_in) {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { + return Blueprint::UP(new ProxyBlueprint(name, accept_input, object_output)); + } + bool setup(const IIndexEnvironment &, const std::vector ¶ms) override { + ASSERT_EQUAL(1u, params.size()); + defineInput(params[0], accept_input); + describeOutput("value", "the value", object_output ? FeatureType::object(ValueType::double_type()) : FeatureType::number()); + describeOutput("was_object", "whether input was object", FeatureType::number()); + return true; + } + FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override { + return FeatureExecutor::LP(new ProxyExecutor()); + } +}; + +struct Fixture { + BlueprintFactory factory; + IndexEnvironment indexEnv; + + explicit Fixture() { + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("box", Blueprint::AcceptInput::NUMBER, true))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_box", Blueprint::AcceptInput::ANY, true))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("unbox", Blueprint::AcceptInput::OBJECT, false))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_unbox", Blueprint::AcceptInput::ANY, false))); + } + + double eval(const vespalib::string &feature) { + BlueprintResolver::SP resolver(new BlueprintResolver(factory, indexEnv)); + resolver->addSeed(feature); + if (!resolver->compile()) { + return vespalib::eval::error_value; + } + MatchDataLayout mdl; + QueryEnvironment queryEnv(&indexEnv); + Properties overrides; + RankProgram program(resolver); + program.setup(mdl, queryEnv, overrides); + program.run(1); + std::vector names; + std::vector handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(1u, names.size()); + EXPECT_EQUAL(names.size(), handles.size()); + const auto &md = program.match_data(); + EXPECT_TRUE(!md.feature_is_object(handles[0])); // verifies auto-unboxing + return *md.resolveFeature(handles[0]); + } + + bool verify(const vespalib::string &feature) { + return verifyFeature(factory, indexEnv, feature, "unit test"); + } +}; + +TEST_F("require that values can be boxed and unboxed", Fixture()) { + EXPECT_EQUAL(3.0, f1.eval("box(value(3))")); + EXPECT_EQUAL(0.0, f1.eval("box(value(3)).was_object")); + EXPECT_EQUAL(3.0, f1.eval("unbox(box(value(3)))")); + EXPECT_EQUAL(1.0, f1.eval("unbox(box(value(3))).was_object")); + EXPECT_EQUAL(3.0, f1.eval("box(unbox(box(value(3))))")); + EXPECT_EQUAL(0.0, f1.eval("box(unbox(box(value(3)))).was_object")); +} + +TEST_F("require that output features may be either objects or numbers", Fixture()) { + EXPECT_TRUE(f1.verify("value(3)")); + EXPECT_TRUE(f1.verify("box(value(3))")); +} + +TEST_F("require that feature input/output types must be compatible", Fixture()) { + EXPECT_TRUE(!f1.verify("unbox(value(3))")); + EXPECT_TRUE(f1.verify("maybe_unbox(value(3))")); + EXPECT_TRUE(f1.verify("unbox(box(value(3)))")); + EXPECT_TRUE(!f1.verify("unbox(box(box(value(3))))")); + EXPECT_TRUE(f1.verify("unbox(maybe_box(box(value(3))))")); + EXPECT_TRUE(f1.verify("unbox(box(unbox(box(value(3)))))")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/parameter/.gitignore b/searchlib/src/tests/fef/parameter/.gitignore new file mode 100644 index 00000000000..17cf6c69953 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +parameter_test +searchlib_parameter_test_app diff --git a/searchlib/src/tests/fef/parameter/CMakeLists.txt b/searchlib/src/tests/fef/parameter/CMakeLists.txt new file mode 100644 index 00000000000..dcd45390ce3 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_parameter_test_app + SOURCES + parameter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_parameter_test_app NO_VALGRIND COMMAND searchlib_parameter_test_app) diff --git a/searchlib/src/tests/fef/parameter/DESC b/searchlib/src/tests/fef/parameter/DESC new file mode 100644 index 00000000000..738e0dbd512 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/DESC @@ -0,0 +1 @@ +parameter test. Take a look at parameter.cpp for details. diff --git a/searchlib/src/tests/fef/parameter/FILES b/searchlib/src/tests/fef/parameter/FILES new file mode 100644 index 00000000000..20c9e0c9ba0 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/FILES @@ -0,0 +1 @@ +parameter.cpp diff --git a/searchlib/src/tests/fef/parameter/parameter_test.cpp b/searchlib/src/tests/fef/parameter/parameter_test.cpp new file mode 100644 index 00000000000..4d6741937d5 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/parameter_test.cpp @@ -0,0 +1,267 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("parameter_test"); +#include + +#include +#include +#include + +using namespace search::fef::test; + +namespace search { +namespace fef { + +class StringList : public std::vector { +public: + StringList & add(const vespalib::string & str) { push_back(str); return *this; } +}; + +class ParameterTest : public vespalib::TestApp { +private: + typedef ParameterDescriptions PDS; + typedef ParameterType PT; + typedef Parameter P; + typedef StringList SL; + typedef ParameterValidator::Result PVR; + + bool assertParameter(const Parameter & exp, const Parameter & act); + bool validate(const IIndexEnvironment & env, + const std::vector & params, + const ParameterDescriptions & descs); + bool validate(const IIndexEnvironment & env, + const std::vector & params, + const ParameterDescriptions & descs, + const ParameterValidator::Result & result); + + void testDescriptions(); + void testValidator(); + void testParameters(); + +public: + int Main(); +}; + +bool +ParameterTest::assertParameter(const Parameter & exp, const Parameter & act) +{ + bool retval = true; + if (!EXPECT_EQUAL(exp.getType(), act.getType())) retval = false; + if (!EXPECT_EQUAL(exp.getValue(), act.getValue())) retval = false; + if (!EXPECT_EQUAL(exp.asDouble(), act.asDouble())) retval = false; + if (!EXPECT_EQUAL(exp.asInteger(), act.asInteger())) retval = false; + if (!EXPECT_EQUAL(exp.asField(), act.asField())) retval = false; + return retval; +} + +bool +ParameterTest::validate(const IIndexEnvironment & env, + const std::vector & params, + const ParameterDescriptions & descs) +{ + ParameterValidator pv(env, params, descs); + ParameterValidator::Result result = pv.validate(); + LOG(info, "validate(%s)", result.getError().c_str()); + return result.valid(); +} + +bool +ParameterTest::validate(const IIndexEnvironment & env, + const std::vector & params, + const ParameterDescriptions & descs, + const ParameterValidator::Result & result) +{ + if (!validate(env, params, descs)) return false; + ParameterValidator pv(env, params, descs); + ParameterValidator::Result actual = pv.validate(); + if (!EXPECT_EQUAL(result.getTag(), actual.getTag())) return false; + if (!EXPECT_EQUAL(result.getParameters().size(), actual.getParameters().size())) return false; + bool retval = true; + for (size_t i = 0; i < result.getParameters().size(); ++i) { + if (!assertParameter(result.getParameters()[i], actual.getParameters()[i])) retval = false; + } + return retval; +} + +void +ParameterTest::testDescriptions() +{ + PDS descs = PDS(). + desc().indexField(ParameterCollection::SINGLE).indexField(ParameterCollection::ARRAY).indexField(ParameterCollection::WEIGHTEDSET).attribute(ParameterCollection::ANY).attributeField(ParameterCollection::ANY).field(). + desc(5).feature().number().string().attribute(ParameterCollection::ANY). + desc().string().number().repeat(2); + const PDS::DescriptionVector & v = descs.getDescriptions(); + EXPECT_EQUAL(v.size(), 3u); + EXPECT_EQUAL(v[0].getTag(), 0u); + EXPECT_TRUE(!v[0].hasRepeat()); + EXPECT_EQUAL(v[0].getParams().size(), 6u); + EXPECT_EQUAL(v[0].getParam(0).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(1).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(2).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(3).type, ParameterType::ATTRIBUTE); + EXPECT_EQUAL(v[0].getParam(4).type, ParameterType::ATTRIBUTE_FIELD); + EXPECT_EQUAL(v[0].getParam(5).type, ParameterType::FIELD); + EXPECT_EQUAL(v[0].getParam(0).collection, ParameterCollection::SINGLE); + EXPECT_EQUAL(v[0].getParam(1).collection, ParameterCollection::ARRAY); + EXPECT_EQUAL(v[0].getParam(2).collection, ParameterCollection::WEIGHTEDSET); + EXPECT_EQUAL(v[0].getParam(3).collection, ParameterCollection::ANY); + EXPECT_EQUAL(v[0].getParam(4).collection, ParameterCollection::ANY); + EXPECT_EQUAL(v[0].getParam(5).collection, ParameterCollection::ANY); + + EXPECT_EQUAL(v[1].getTag(), 5u); + EXPECT_TRUE(!v[1].hasRepeat()); + EXPECT_EQUAL(v[1].getParams().size(), 4u); + EXPECT_EQUAL(v[1].getParam(0).type, ParameterType::FEATURE); + EXPECT_EQUAL(v[1].getParam(1).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[1].getParam(2).type, ParameterType::STRING); + EXPECT_EQUAL(v[1].getParam(3).type, ParameterType::ATTRIBUTE); + + EXPECT_EQUAL(v[2].getTag(), 6u); + EXPECT_TRUE(v[2].hasRepeat()); + EXPECT_EQUAL(v[2].getParams().size(), 2u); + EXPECT_EQUAL(v[2].getParam(0).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(1).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[2].getParam(2).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(3).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[2].getParam(4).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(5).type, ParameterType::NUMBER); +} + +void +ParameterTest::testValidator() +{ + IndexEnvironment env; + IndexEnvironmentBuilder builder(env); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo") + .addField(FieldType::INDEX, CollectionType::SINGLE, "hybrid"); + env.getFields().back().addAttribute(); // 'hybrid' field can also be accessed as an attribute + + // valid + EXPECT_TRUE(validate(env, SL(), PDS().desc())); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field())); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().field())); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("hybrid"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().feature())); + EXPECT_TRUE(validate(env, SL().add("123"), PDS().desc().number())); + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().string())); + // first fail but second pass + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().field().desc().string())); + + // not valid + EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().string().string())); + EXPECT_FALSE(validate(env, SL().add("baz").add("baz"), PDS().desc().string())); + EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().field())); + EXPECT_FALSE(validate(env, SL().add("bar"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("hybrid"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("12a"), PDS().desc().number())); + EXPECT_FALSE(validate(env, SL().add("a12"), PDS().desc().number())); + + // test repeat + PDS d1 = PDS().desc().field().repeat(); + EXPECT_TRUE(validate(env, SL(), d1)); + EXPECT_TRUE(validate(env, SL().add("foo"), d1)); + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), d1)); + EXPECT_TRUE(!validate(env, SL().add("foo").add("bar").add("baz"), d1)); + PDS d2 = PDS().desc().string().attribute(ParameterCollection::ANY).indexField(ParameterCollection::SINGLE).repeat(2); + EXPECT_TRUE(validate(env, SL().add("str"), d2)); + EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo"), d2)); + EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo").add("bar").add("foo"), d2)); + EXPECT_TRUE(!validate(env, SL().add("str").add("bar"), d2)); + EXPECT_TRUE(!validate(env, SL().add("str").add("bar").add("foo").add("bar"), d2)); +} + +void +ParameterTest::testParameters() +{ + IndexEnvironment env; + IndexEnvironmentBuilder builder(env); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + + const FieldInfo * foo = env.getFieldByName("foo"); + const FieldInfo * bar = env.getFieldByName("bar"); + const FieldInfo * afoo = env.getFieldByName("afoo"); + const FieldInfo * wfoo = env.getFieldByName("wfoo"); + + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field(), + PVR().addParameter(P(PT::FIELD, "foo").setField(foo)))); // field + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE), + PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY), + PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET), + PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY), + PVR().addParameter(P(PT::ATTRIBUTE, "bar").setField(bar)))); // attribute field + EXPECT_TRUE(validate(env, SL().add("feature"), PDS().desc().feature(), + PVR().addParameter(P(PT::FEATURE, "feature")))); // feature + EXPECT_TRUE(validate(env, SL().add("string"), PDS().desc().string(), + PVR().addParameter(P(PT::STRING, "string")))); // string + + // numbers + EXPECT_TRUE(validate(env, SL().add("-100"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "-100").setDouble(-100).setInteger(-100)))); + EXPECT_TRUE(validate(env, SL().add("100"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "100").setDouble(100).setInteger(100)))); + EXPECT_TRUE(validate(env, SL().add("100.16"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "100.16").setDouble(100.16).setInteger(100)))); + + EXPECT_TRUE(validate(env, SL(), PDS().desc(), PVR())); // no param + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().string(), + PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // multiple params + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().repeat(), + PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // repeat + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc(10).field().desc(20).string(), + PVR(20).addParameter(P(PT::STRING, "baz")))); // second desc matching +} + +int +ParameterTest::Main() +{ + TEST_INIT("parameter_test"); + + testDescriptions(); + testValidator(); + testParameters(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::ParameterTest); + diff --git a/searchlib/src/tests/fef/phrasesplitter/.gitignore b/searchlib/src/tests/fef/phrasesplitter/.gitignore new file mode 100644 index 00000000000..418f9961840 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +benchmark +phrasesplitter_test +searchlib_phrasesplitter_test_app +searchlib_benchmark_app diff --git a/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt new file mode 100644 index 00000000000..aa16f3e0a0d --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_phrasesplitter_test_app + SOURCES + phrasesplitter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_phrasesplitter_test_app COMMAND searchlib_phrasesplitter_test_app) +vespa_add_executable(searchlib_benchmark_app + SOURCES + benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_benchmark_app COMMAND searchlib_benchmark_app BENCHMARK) diff --git a/searchlib/src/tests/fef/phrasesplitter/DESC b/searchlib/src/tests/fef/phrasesplitter/DESC new file mode 100644 index 00000000000..fba49bdb8c0 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/DESC @@ -0,0 +1 @@ +phrasesplitter test. Take a look at phrasesplitter.cpp for details. diff --git a/searchlib/src/tests/fef/phrasesplitter/FILES b/searchlib/src/tests/fef/phrasesplitter/FILES new file mode 100644 index 00000000000..be37941d0c8 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/FILES @@ -0,0 +1 @@ +phrasesplitter.cpp diff --git a/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp new file mode 100644 index 00000000000..ca90b1de261 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("phrasesplitter_test"); +#include + +#include +#include +#include +#include + +namespace search { +namespace fef { + +class Benchmark : public vespalib::TestApp +{ +private: + FastOS_Time _timer; + double _sample; + + void start() { _timer.SetNow(); } + void sample() { _sample = _timer.MilliSecsToNow(); } + void run(size_t numRuns, size_t numPositions); + +public: + Benchmark() : _timer(), _sample(0) {} + int Main(); +}; + +void +Benchmark::run(size_t numRuns, size_t numPositions) +{ + test::QueryEnvironment qe; + std::vector &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); // phrase with 3 terms + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + TermFieldMatchData *tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + for (size_t i = 0; i < numPositions; ++i) { + tmd->appendPosition(TermFieldMatchDataPosition(0, i, 0, numPositions)); + } + + PhraseSplitter ps(qe, 0); + + std::cout << "Start benchmark with numRuns(" << numRuns << ") and numPositions(" << numPositions << ")" << std::endl; + + start(); + + for (size_t i = 0; i < numRuns; ++i) { + ps.update(*md); + } + + sample(); +} + +int +Benchmark::Main() +{ + + TEST_INIT("benchmark"); + + if (_argc != 3) { + std::cout << "Must specify and " << std::endl; + return 0; + } + + size_t numRuns = strtoull(_argv[1], NULL, 10); + size_t numPositions = strtoull(_argv[2], NULL, 10); + + run(numRuns, numPositions); + + std::cout << "TET: " << _sample << " (ms)" << std::endl; + std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / numRuns << " (ms)" << std::endl; + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::Benchmark); diff --git a/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp new file mode 100644 index 00000000000..0fa6f27022e --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp @@ -0,0 +1,242 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("phrasesplitter_test"); +#include + +#include +#include +#include + +namespace search { +namespace fef { + +class PhraseSplitterTest : public vespalib::TestApp +{ +private: + void assertTermData(const ITermData * td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t termHandle); + void testCopyTermFieldMatchData(); + void testSplitter(); + void testSplitterUpdate(); + +public: + int Main(); +}; + +void +PhraseSplitterTest::assertTermData(const ITermData *td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t tfHandle) +{ + // fprintf(stderr, "checking uid=%d numterms=%d field=%d handle=%d\n", uniqueId, numTerms, fieldId, tfHandle); + EXPECT_EQUAL(uniqueId, td->getUniqueId()); + EXPECT_EQUAL(numTerms, td->getPhraseLength()); + EXPECT_EQUAL(tfHandle, td->lookupField(fieldId)->getHandle()); +} + +void +PhraseSplitterTest::testCopyTermFieldMatchData() +{ + TermFieldMatchData src; + src.reset(1); + src.appendPosition(TermFieldMatchDataPosition(0, 5, 0, 1000)); + src.appendPosition(TermFieldMatchDataPosition(0, 15, 0, 1000)); + + SimpleTermData td; + TermFieldMatchData dst; + dst.reset(0); + // dst.setTermData(&td); + dst.appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 10u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + + PhraseSplitter::copyTermFieldMatchData(dst, src, 2); + + EXPECT_EQUAL(dst.getDocId(), 1u); + { + TermFieldMatchData::PositionsIterator itr = dst.begin(); + EXPECT_EQUAL(itr->getPosition(), 7u); + ++itr; + EXPECT_EQUAL(itr->getPosition(), 17u); + ++itr; + ASSERT_TRUE(itr == dst.end()); + } + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 7u); + itr.next(); + EXPECT_EQUAL(itr.getPosition(), 17u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } +} + +void +PhraseSplitterTest::testSplitter() +{ + { // single term + test::QueryEnvironment qe; + std::vector &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 1); + ps.update(*md); + // check that nothing is served from the splitter + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TermFieldHandle handle = terms[0].lookupField(0)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + { // single phrase + test::QueryEnvironment qe; + std::vector & terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 7); + ASSERT_TRUE(ps.getNumTerms() == 3); + ps.update(*md); + // check that all is served from the splitter + for (size_t i = 0; i < 3; ++i) { + // fprintf(stderr, "checking term %d\n", (int)i); + const ITermData *td = ps.getTerm(i); + EXPECT_NOT_EQUAL(td, &terms[0]); + EXPECT_NOT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_EQUAL(td->lookupField(0), (ITermFieldData *)0); + TEST_DO(assertTermData(td, 1, 1, 7, i + 4)); // skipHandles = 4 + EXPECT_NOT_EQUAL(td->lookupField(7)->getHandle(), + terms[0].lookupField(7)->getHandle()); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(7)->getHandle()), + md->resolveTermField(terms[0].lookupField(7)->getHandle())); + } + } + { // combination + test::QueryEnvironment qe; + std::vector &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(4).setHandle(mdl.allocTermField(4)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + // fprintf(stderr, "setup B term %p #f %zd\n", &terms.back(), terms.back().numFields()); + } + terms[1].setPhraseLength(3); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 4); + ASSERT_TRUE(ps.getNumTerms() == 5); + ps.update(*md); + { // first term + // fprintf(stderr, "first term\n"); + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 4, 0)); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 7, 1)); + + TermFieldHandle handle = terms[0].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + handle = terms[0].lookupField(7)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + for (size_t i = 0; i < 3; ++i) { // phrase + // fprintf(stderr, "phrase term %zd\n", i); + const ITermData *td = ps.getTerm(i + 1); + EXPECT_NOT_EQUAL(td, &terms[1]); + TEST_DO(assertTermData(td, 1, 1, 4, i + 11)); // skipHandles == 11 + EXPECT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(4)->getHandle()), + md->resolveTermField(terms[1].lookupField(4)->getHandle())); + } + { // last term + // fprintf(stderr, "last term\n"); + EXPECT_EQUAL(ps.getTerm(4), &terms[2]); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 4, 4)); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 7, 5)); + + // fprintf(stderr, "inspect term %p #f %zd\n", &terms[2], terms[2].numFields()); + fflush(stderr); + TermFieldHandle handle = terms[2].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + } +} + +void +PhraseSplitterTest::testSplitterUpdate() +{ + { + test::QueryEnvironment qe; + std::vector &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + } + terms[0].setPhraseLength(2); + terms[2].setPhraseLength(2); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 5); + { // first phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(terms[1].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 20, 0, 1000)); + } + { // second phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[2].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 30, 0, 1000)); + } + ps.update(*md); + for (size_t i = 0; i < 2; ++i) { // first phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 10 + i); + ASSERT_TRUE(itr == tmd->end()); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(ps.getTerm(2)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 20u); + ASSERT_TRUE(itr == tmd->end()); + } + for (size_t i = 0; i < 2; ++i) { // second phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i + 3)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 30 + i); + ASSERT_TRUE(itr == tmd->end()); + } + } +} + +int +PhraseSplitterTest::Main() +{ + + TEST_INIT("phrasesplitter_test"); + + testCopyTermFieldMatchData(); + testSplitter(); + testSplitterUpdate(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::PhraseSplitterTest); diff --git a/searchlib/src/tests/fef/properties/.gitignore b/searchlib/src/tests/fef/properties/.gitignore new file mode 100644 index 00000000000..00f94794fa3 --- /dev/null +++ b/searchlib/src/tests/fef/properties/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +properties_test +searchlib_properties_test_app diff --git a/searchlib/src/tests/fef/properties/CMakeLists.txt b/searchlib/src/tests/fef/properties/CMakeLists.txt new file mode 100644 index 00000000000..0b74b10cb31 --- /dev/null +++ b/searchlib/src/tests/fef/properties/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_properties_test_app + SOURCES + properties_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_properties_test_app COMMAND searchlib_properties_test_app) diff --git a/searchlib/src/tests/fef/properties/DESC b/searchlib/src/tests/fef/properties/DESC new file mode 100644 index 00000000000..02faa4cb727 --- /dev/null +++ b/searchlib/src/tests/fef/properties/DESC @@ -0,0 +1 @@ +properties test. Take a look at properties.cpp for details. diff --git a/searchlib/src/tests/fef/properties/FILES b/searchlib/src/tests/fef/properties/FILES new file mode 100644 index 00000000000..61054fa62c2 --- /dev/null +++ b/searchlib/src/tests/fef/properties/FILES @@ -0,0 +1 @@ +properties.cpp diff --git a/searchlib/src/tests/fef/properties/properties_test.cpp b/searchlib/src/tests/fef/properties/properties_test.cpp new file mode 100644 index 00000000000..a08d511b418 --- /dev/null +++ b/searchlib/src/tests/fef/properties/properties_test.cpp @@ -0,0 +1,425 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::indexproperties; + +struct CopyVisitor : public IPropertiesVisitor +{ + Properties &dst; + CopyVisitor(Properties &p) : dst(p) {} + virtual void visitProperty(const Property::Value &key, + const Property &values) + { + for (uint32_t i = 0; i < values.size(); ++i) { + dst.add(key, values.getAt(i)); + } + } +}; + +Properties make_props(std::initializer_list > > entries) { + Properties props; + for (const auto &entry: entries) { + vespalib::string key = entry.first; + for (vespalib::string value: entry.second) { + props.add(key, value); + } + } + return props; +} + +TEST("require that namespace visitation works") { + Properties props = make_props({ {"foo", {"outside"}}, + {"foo.a", {"a_value"}}, + {"foo.b", {"b_value"}}, + {"foo.", {"outside"}} + }); + Properties result; + CopyVisitor copy_visitor(result); + props.visitNamespace("foo", copy_visitor); + EXPECT_EQUAL(2u, result.numKeys()); + EXPECT_EQUAL(result.lookup("a").get(), Property::Value("a_value")); + EXPECT_EQUAL(result.lookup("b").get(), Property::Value("b_value")); +} + +TEST("test stuff") { + { // empty lookup result + Property p; + + EXPECT_EQUAL(p.found(), false); + EXPECT_EQUAL(p.get(), Property::Value("")); + EXPECT_EQUAL(p.get("fb"), Property::Value("fb")); + EXPECT_EQUAL(p.size(), 0u); + EXPECT_EQUAL(p.getAt(0), Property::Value("")); + } + { // add / count / remove + Properties p = make_props({ {"a", {"a1", "a2", "a3"}}, + {"b", {"b1", "b2"}}, + {"c", {"c1"}} + }); + const Properties &pc = p; + + EXPECT_EQUAL(pc.numKeys(), 3u); + EXPECT_EQUAL(pc.numValues(), 6u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 1u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("d"); + + EXPECT_EQUAL(pc.numKeys(), 3u); + EXPECT_EQUAL(pc.numValues(), 6u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 1u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("c"); + + EXPECT_EQUAL(pc.numKeys(), 2u); + EXPECT_EQUAL(pc.numValues(), 5u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("b"); + + EXPECT_EQUAL(pc.numKeys(), 1u); + EXPECT_EQUAL(pc.numValues(), 3u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 0u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("a"); + + EXPECT_EQUAL(pc.numKeys(), 0u); + EXPECT_EQUAL(pc.numValues(), 0u); + EXPECT_EQUAL(pc.count("a"), 0u); + EXPECT_EQUAL(pc.count("b"), 0u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + } + { // lookup / import / visit / compare / hash + Properties p; + + p.add("x", "x1"); + p.add("a.x", "x2"); + p.add("a.b.x", "x3"); + p.add("a.b.c.x", "x4"); + + p.add("list", "e1").add("list", "e2").add("list", "e3"); + + EXPECT_EQUAL(p.numKeys(), 5u); + EXPECT_EQUAL(p.numValues(), 7u); + + EXPECT_EQUAL(p.lookup("x").found(), true); + EXPECT_EQUAL(p.lookup("a.x").found(), true); + EXPECT_EQUAL(p.lookup("a.b.x").found(), true); + EXPECT_EQUAL(p.lookup("a.b.c.x").found(), true); + EXPECT_EQUAL(p.lookup("list").found(), true); + EXPECT_EQUAL(p.lookup("y").found(), false); + + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("a.x").get(), Property::Value("x2")); + EXPECT_EQUAL(p.lookup("a.b.x").get(), Property::Value("x3")); + EXPECT_EQUAL(p.lookup("a.b.c.x").get(), Property::Value("x4")); + EXPECT_EQUAL(p.lookup("list").get(), Property::Value("e1")); + EXPECT_EQUAL(p.lookup("y").get(), Property::Value("")); + + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2")); + EXPECT_EQUAL(p.lookup("a", "b", "x").get(), Property::Value("x3")); + EXPECT_EQUAL(p.lookup("a", "b", "c", "x").get(), Property::Value("x4")); + + EXPECT_EQUAL(p.lookup("x").get("fallback"), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("y").get("fallback"), Property::Value("fallback")); + + EXPECT_EQUAL(p.lookup("y").size(), 0u); + EXPECT_EQUAL(p.lookup("x").size(), 1u); + EXPECT_EQUAL(p.lookup("list").size(), 3u); + EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("e1")); + EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("e2")); + EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value("e3")); + EXPECT_EQUAL(p.lookup("list").getAt(3), Property::Value("")); + + Properties p2; + + p2.add("x", "new_x"); + p2.add("y", "y1"); + p2.add("list", "foo").add("list", "bar"); + + EXPECT_EQUAL(p2.numKeys(), 3u); + EXPECT_EQUAL(p2.numValues(), 4u); + + p.import(p2); + + EXPECT_EQUAL(p.numKeys(), 6u); + EXPECT_EQUAL(p.numValues(), 7u); + + EXPECT_EQUAL(p.lookup("y").size(), 1u); + EXPECT_EQUAL(p.lookup("y").get(), Property::Value("y1")); + + EXPECT_EQUAL(p.lookup("x").size(), 1u); + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("new_x")); + + EXPECT_EQUAL(p.lookup("z").size(), 0u); + + EXPECT_EQUAL(p.lookup("a", "x").size(), 1u); + EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2")); + + EXPECT_EQUAL(p.lookup("list").size(), 2u); + EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("foo")); + EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("bar")); + EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value("")); + + Properties p3; + + EXPECT_TRUE(!(p == p2)); + EXPECT_TRUE(!(p == p3)); + EXPECT_TRUE(!(p2 == p)); + EXPECT_TRUE(!(p3 == p)); + EXPECT_TRUE(!(p2 == p3)); + EXPECT_TRUE(!(p3 == p2)); + + CopyVisitor cv(p3); + p.visitProperties(cv); + + EXPECT_EQUAL(p3.numKeys(), 6u); + EXPECT_EQUAL(p3.numValues(), 7u); + + EXPECT_TRUE(p == p3); + EXPECT_TRUE(p3 == p); + EXPECT_EQUAL(p.hashCode(), p3.hashCode()); + + p.clear(); + EXPECT_EQUAL(p.numKeys(), 0u); + EXPECT_EQUAL(p.numValues(), 0u); + EXPECT_TRUE(!(p == p3)); + EXPECT_TRUE(!(p3 == p)); + + Properties p4; + CopyVisitor cv2(p4); + p.visitProperties(cv); + EXPECT_EQUAL(p4.numKeys(), 0u); + EXPECT_EQUAL(p4.numValues(), 0u); + EXPECT_TRUE(p == p4); + EXPECT_TRUE(p4 == p); + EXPECT_EQUAL(p.hashCode(), p4.hashCode()); + } + + { // test index properties known by the framework + { // vespa.rank.firstphase + EXPECT_EQUAL(rank::FirstPhase::NAME, vespalib::string("vespa.rank.firstphase")); + EXPECT_EQUAL(rank::FirstPhase::DEFAULT_VALUE, vespalib::string("nativeRank")); + Properties p; + EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("nativeRank")); + p.add("vespa.rank.firstphase", "specialrank"); + EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("specialrank")); + } + { // vespa.rank.secondphase + EXPECT_EQUAL(rank::SecondPhase::NAME, vespalib::string("vespa.rank.secondphase")); + EXPECT_EQUAL(rank::SecondPhase::DEFAULT_VALUE, vespalib::string("")); + Properties p; + EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string("")); + p.add("vespa.rank.secondphase", "specialrank"); + EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string("specialrank")); + } + { // vespa.dump.feature + EXPECT_EQUAL(dump::Feature::NAME, vespalib::string("vespa.dump.feature")); + EXPECT_EQUAL(dump::Feature::DEFAULT_VALUE.size(), 0u); + Properties p; + EXPECT_EQUAL(dump::Feature::lookup(p).size(), 0u); + p.add("vespa.dump.feature", "foo"); + p.add("vespa.dump.feature", "bar"); + std::vector a = dump::Feature::lookup(p); + ASSERT_TRUE(a.size() == 2); + EXPECT_EQUAL(a[0], vespalib::string("foo")); + EXPECT_EQUAL(a[1], vespalib::string("bar")); + } + { // vespa.dump.ignoredefaultfeatures + EXPECT_EQUAL(dump::IgnoreDefaultFeatures::NAME, vespalib::string("vespa.dump.ignoredefaultfeatures")); + EXPECT_EQUAL(dump::IgnoreDefaultFeatures::DEFAULT_VALUE, "false"); + Properties p; + EXPECT_TRUE(!dump::IgnoreDefaultFeatures::check(p)); + p.add("vespa.dump.ignoredefaultfeatures", "true"); + EXPECT_TRUE(dump::IgnoreDefaultFeatures::check(p)); + } + { // vespa.matching.termwise_limit + EXPECT_EQUAL(matching::TermwiseLimit::NAME, vespalib::string("vespa.matching.termwise_limit")); + EXPECT_EQUAL(matching::TermwiseLimit::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 1.0); + p.add("vespa.matching.termwise_limit", "0.05"); + EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 0.05); + } + { // vespa.matching.numthreads + EXPECT_EQUAL(matching::NumThreadsPerSearch::NAME, vespalib::string("vespa.matching.numthreadspersearch")); + EXPECT_EQUAL(matching::NumThreadsPerSearch::DEFAULT_VALUE, std::numeric_limits::max()); + Properties p; + EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), std::numeric_limits::max()); + p.add("vespa.matching.numthreadspersearch", "50"); + EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), 50u); + } + { + EXPECT_EQUAL(matching::NumSearchPartitions::NAME, vespalib::string("vespa.matching.numsearchpartitions")); + EXPECT_EQUAL(matching::NumSearchPartitions::DEFAULT_VALUE, 1u); + Properties p; + EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 1u); + p.add("vespa.matching.numsearchpartitions", "50"); + EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 50u); + } + { // vespa.matchphase.degradation.attribute + EXPECT_EQUAL(matchphase::DegradationAttribute::NAME, vespalib::string("vespa.matchphase.degradation.attribute")); + EXPECT_EQUAL(matchphase::DegradationAttribute::DEFAULT_VALUE, ""); + Properties p; + EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), ""); + p.add("vespa.matchphase.degradation.attribute", "foobar"); + EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), "foobar"); + } + { // vespa.matchphase.degradation.ascending + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::NAME, vespalib::string("vespa.matchphase.degradation.ascendingorder")); + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::DEFAULT_VALUE, false); + Properties p; + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), false); + p.add("vespa.matchphase.degradation.ascendingorder", "true"); + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), true); + } + { // vespa.matchphase.degradation.maxhits + EXPECT_EQUAL(matchphase::DegradationMaxHits::NAME, vespalib::string("vespa.matchphase.degradation.maxhits")); + EXPECT_EQUAL(matchphase::DegradationMaxHits::DEFAULT_VALUE, 0u); + Properties p; + EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 0u); + p.add("vespa.matchphase.degradation.maxhits", "123789"); + EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 123789u); + } + { // vespa.matchphase.degradation.samplepercentage + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::NAME, vespalib::string("vespa.matchphase.degradation.samplepercentage")); + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::DEFAULT_VALUE, 0.2); + Properties p; + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.2); + p.add("vespa.matchphase.degradation.samplepercentage", "0.9"); + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.9); + } + { // vespa.matchphase.degradation.maxfiltercoverage + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::NAME, vespalib::string("vespa.matchphase.degradation.maxfiltercoverage")); + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 1.0); + p.add("vespa.matchphase.degradation.maxfiltercoverage", "0.076"); + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 0.076); + } + { // vespa.matchphase.degradation.postfiltermultiplier + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::NAME, vespalib::string("vespa.matchphase.degradation.postfiltermultiplier")); + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 1.0); + p.add("vespa.matchphase.degradation.postfiltermultiplier", "0.9"); + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 0.9); + } + { // vespa.matchphase.diversity.attribute + EXPECT_EQUAL(matchphase::DiversityAttribute::NAME, vespalib::string("vespa.matchphase.diversity.attribute")); + EXPECT_EQUAL(matchphase::DiversityAttribute::DEFAULT_VALUE, ""); + Properties p; + EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), ""); + p.add("vespa.matchphase.diversity.attribute", "foobar"); + EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), "foobar"); + } + { // vespa.matchphase.diversity.mingroups + EXPECT_EQUAL(matchphase::DiversityMinGroups::NAME, vespalib::string("vespa.matchphase.diversity.mingroups")); + EXPECT_EQUAL(matchphase::DiversityMinGroups::DEFAULT_VALUE, 1u); + Properties p; + EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 1u); + p.add("vespa.matchphase.diversity.mingroups", "5"); + EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 5u); + } + { // vespa.hitcollector.heapsize + EXPECT_EQUAL(hitcollector::HeapSize::NAME, vespalib::string("vespa.hitcollector.heapsize")); + EXPECT_EQUAL(hitcollector::HeapSize::DEFAULT_VALUE, 100u); + Properties p; + EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 100u); + p.add("vespa.hitcollector.heapsize", "50"); + EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 50u); + } + { // vespa.hitcollector.arraysize + EXPECT_EQUAL(hitcollector::ArraySize::NAME, vespalib::string("vespa.hitcollector.arraysize")); + EXPECT_EQUAL(hitcollector::ArraySize::DEFAULT_VALUE, 10000u); + Properties p; + EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 10000u); + p.add("vespa.hitcollector.arraysize", "50"); + EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 50u); + } + { // vespa.hitcollector.estimatepoint + EXPECT_EQUAL(hitcollector::EstimatePoint::NAME, vespalib::string("vespa.hitcollector.estimatepoint")); + EXPECT_EQUAL(hitcollector::EstimatePoint::DEFAULT_VALUE, 0xffffffffu); + Properties p; + EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 0xffffffffu); + p.add("vespa.hitcollector.estimatepoint", "50"); + EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 50u); + } + { // vespa.hitcollector.estimatelimit + EXPECT_EQUAL(hitcollector::EstimateLimit::NAME, vespalib::string("vespa.hitcollector.estimatelimit")); + EXPECT_EQUAL(hitcollector::EstimateLimit::DEFAULT_VALUE, 0xffffffffu); + Properties p; + EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 0xffffffffu); + p.add("vespa.hitcollector.estimatelimit", "50"); + EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 50u); + } + { // vespa.hitcollector.rankscoredroplimit + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::NAME, vespalib::string("vespa.hitcollector.rankscoredroplimit")); + search::feature_t got1 = hitcollector::RankScoreDropLimit::DEFAULT_VALUE; + EXPECT_TRUE(got1 != got1); + Properties p; + search::feature_t got2= hitcollector::RankScoreDropLimit::lookup(p); + EXPECT_TRUE(got2 != got2); + p.add("vespa.hitcollector.rankscoredroplimit", "-123456789.12345"); + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), -123456789.12345); + p.clear().add("vespa.hitcollector.rankscoredroplimit", "123456789.12345"); + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), 123456789.12345); + } + { // vespa.fieldweight. + EXPECT_EQUAL(FieldWeight::BASE_NAME, vespalib::string("vespa.fieldweight.")); + EXPECT_EQUAL(FieldWeight::DEFAULT_VALUE, 100u); + Properties p; + EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 100u); + p.add("vespa.fieldweight.foo", "200"); + EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 200u); + } + { // vespa.isfilterfield. + EXPECT_EQUAL(IsFilterField::BASE_NAME, "vespa.isfilterfield."); + EXPECT_EQUAL(IsFilterField::DEFAULT_VALUE, "false"); + Properties p; + EXPECT_TRUE(!IsFilterField::check(p, "foo")); + p.add("vespa.isfilterfield.foo", "true"); + EXPECT_TRUE(IsFilterField::check(p, "foo")); + EXPECT_TRUE(!IsFilterField::check(p, "bar")); + IsFilterField::set(p, "bar"); + EXPECT_TRUE(IsFilterField::check(p, "bar")); + } + } +} + +TEST("test attribute type properties") +{ + Properties p; + p.add("vespa.type.attribute.foo", "tensor(x[10])"); + EXPECT_EQUAL("tensor(x[10])", type::Attribute::lookup(p, "foo")); + EXPECT_EQUAL("", type::Attribute::lookup(p, "bar")); +} + +TEST("test query feature type properties") +{ + Properties p; + p.add("vespa.type.query.foo", "tensor(x[10])"); + EXPECT_EQUAL("tensor(x[10])", type::QueryFeature::lookup(p, "foo")); + EXPECT_EQUAL("", type::QueryFeature::lookup(p, "bar")); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/rank_program/.gitignore b/searchlib/src/tests/fef/rank_program/.gitignore new file mode 100644 index 00000000000..b86a29e139f --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/.gitignore @@ -0,0 +1 @@ +searchlib_rank_program_test_app diff --git a/searchlib/src/tests/fef/rank_program/CMakeLists.txt b/searchlib/src/tests/fef/rank_program/CMakeLists.txt new file mode 100644 index 00000000000..12d971a9421 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_rank_program_test_app + SOURCES + rank_program_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_rank_program_test_app COMMAND searchlib_rank_program_test_app) diff --git a/searchlib/src/tests/fef/rank_program/FILES b/searchlib/src/tests/fef/rank_program/FILES new file mode 100644 index 00000000000..bf6e4665a68 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/FILES @@ -0,0 +1 @@ +rank_program_test.cpp diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp new file mode 100644 index 00000000000..baf665c58e8 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp @@ -0,0 +1,172 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +struct ImpureValueExecutor : FeatureExecutor { + double value; + ImpureValueExecutor(double value_in) : value(value_in) {} + bool isPure() override { return false; } + void execute(search::fef::MatchData &md) override { *md.resolveFeature(outputs()[0]) = value; } +}; + +struct ImpureValueBlueprint : Blueprint { + double value; + ImpureValueBlueprint() : Blueprint("ivalue"), value(31212.0) {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { return Blueprint::UP(new ImpureValueBlueprint()); } + bool setup(const IIndexEnvironment &, const std::vector ¶ms) override { + ASSERT_EQUAL(1u, params.size()); + value = strtod(params[0].c_str(), nullptr); + describeOutput("out", "the impure value"); + return true; + } + FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override { + return FeatureExecutor::LP(new ImpureValueExecutor(value)); + } +}; + +struct MySetup { + BlueprintFactory factory; + IndexEnvironment indexEnv; + BlueprintResolver::SP resolver; + Properties overrides; + RankProgram program; + MySetup() : factory(), indexEnv(), resolver(new BlueprintResolver(factory, indexEnv)), + overrides(), program(resolver) + { + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + factory.addPrototype(Blueprint::SP(new ImpureValueBlueprint())); + factory.addPrototype(Blueprint::SP(new SumBlueprint())); + } + MySetup &add(const vespalib::string &feature) { + resolver->addSeed(feature); + return *this; + } + MySetup &override(const vespalib::string &feature, double value) { + overrides.add(feature, vespalib::make_string("%g", value)); + return *this; + } + MySetup &compile() { + ASSERT_TRUE(resolver->compile()); + MatchDataLayout mdl; + QueryEnvironment queryEnv(&indexEnv); + program.setup(mdl, queryEnv, overrides); + return *this; + } + MySetup &run() { + program.run(1); + return *this; + } + double get() { + std::vector names; + std::vector handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(1u, names.size()); + EXPECT_EQUAL(names.size(), handles.size()); + return *program.match_data().resolveFeature(handles[0]); + } + double get(const vespalib::string &feature) { + std::vector names; + std::vector handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(names.size(), handles.size()); + for (size_t i = 0; i < names.size(); ++i) { + if (names[i] == feature) { + return *program.match_data().resolveFeature(handles[i]); + } + } + return 31212.0; + } + std::map all() { + std::map result; + std::vector names; + std::vector handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(names.size(), handles.size()); + for (size_t i = 0; i < names.size(); ++i) { + result[names[i]] = *program.match_data().resolveFeature(handles[i]); + } + return result; + } +}; + +TEST_F("require that match data docid is set by run", MySetup()) { + f1.compile(); + EXPECT_NOT_EQUAL(1u, f1.program.match_data().getDocId()); + f1.run(); + EXPECT_EQUAL(1u, f1.program.match_data().getDocId()); +} + +TEST_F("require that simple program works", MySetup()) { + EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().run().get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(2u, f1.program.program_size()); +} + +TEST_F("require that const features are calculated during setup", MySetup()) { + f1.add("mysum(value(10),value(5))").compile(); + EXPECT_EQUAL(15.0, f1.get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(0u, f1.program.program_size()); +} + +TEST_F("require that non-const features are calculated during run", MySetup()) { + f1.add("mysum(ivalue(10),ivalue(5))").compile(); + EXPECT_EQUAL(0.0, f1.get()); + f1.run(); + EXPECT_EQUAL(15.0, f1.get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); +} + +TEST_F("require that a single program can calculate multiple output features", MySetup()) { + f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); + f1.add("mysum(value(1),value(2),ivalue(3))"); + f1.compile().run(); + EXPECT_EQUAL(5u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); + EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures()); + auto result = f1.all(); + EXPECT_EQUAL(4u, result.size()); + EXPECT_EQUAL(1.0, result["value(1)"]); + EXPECT_EQUAL(2.0, result["ivalue(2)"]); + EXPECT_EQUAL(3.0, result["ivalue(3)"]); + EXPECT_EQUAL(6.0, result["mysum(value(1),value(2),ivalue(3))"]); +} + +TEST_F("require that a single executor can produce multiple features", MySetup()) { + f1.add("mysum(value(1,2,3).0,value(1,2,3).1,value(1,2,3).2)"); + EXPECT_EQUAL(6.0, f1.compile().run().get()); + EXPECT_EQUAL(2u, f1.program.num_executors()); + EXPECT_EQUAL(0u, f1.program.program_size()); + EXPECT_EQUAL(4u, f1.program.match_data().getNumFeatures()); +} + +TEST_F("require that feature values can be overridden", MySetup()) { + f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); + f1.add("mysum(value(1),value(2),ivalue(3))"); + f1.override("value(2)", 20.0).override("ivalue(3)", 30.0); + f1.compile().run(); + EXPECT_EQUAL(5u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); + EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures()); + auto result = f1.all(); + EXPECT_EQUAL(4u, result.size()); + EXPECT_EQUAL(1.0, result["value(1)"]); + EXPECT_EQUAL(2.0, result["ivalue(2)"]); + EXPECT_EQUAL(30.0, result["ivalue(3)"]); + EXPECT_EQUAL(51.0, result["mysum(value(1),value(2),ivalue(3))"]); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/resolver/.gitignore b/searchlib/src/tests/fef/resolver/.gitignore new file mode 100644 index 00000000000..57114e69298 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_resolver_test_app diff --git a/searchlib/src/tests/fef/resolver/CMakeLists.txt b/searchlib/src/tests/fef/resolver/CMakeLists.txt new file mode 100644 index 00000000000..835a50fd6fb --- /dev/null +++ b/searchlib/src/tests/fef/resolver/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_resolver_test_app + SOURCES + resolver_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_resolver_test_app COMMAND searchlib_resolver_test_app) diff --git a/searchlib/src/tests/fef/resolver/DESC b/searchlib/src/tests/fef/resolver/DESC new file mode 100644 index 00000000000..7d3262ab110 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/DESC @@ -0,0 +1 @@ +resolver test. Take a look at resolver_test.cpp for details. diff --git a/searchlib/src/tests/fef/resolver/FILES b/searchlib/src/tests/fef/resolver/FILES new file mode 100644 index 00000000000..c40c0663848 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/FILES @@ -0,0 +1 @@ +resolver_test.cpp diff --git a/searchlib/src/tests/fef/resolver/resolver_test.cpp b/searchlib/src/tests/fef/resolver/resolver_test.cpp new file mode 100644 index 00000000000..3d791f886e1 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/resolver_test.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("resolver_test"); +#include +#include +#include + +namespace search { +namespace fef { + +class BaseBlueprint : public Blueprint { +public: + BaseBlueprint() : Blueprint("base") { } + virtual void visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new BaseBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, + const ParameterList & params) { + (void) indexEnv; (void) params; + describeOutput("foo", "foo"); + describeOutput("bar", "bar"); + describeOutput("baz", "baz"); + return true; + } + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const { + return FeatureExecutor::LP(NULL); + } +}; + +class CombineBlueprint : public Blueprint { +public: + CombineBlueprint() : Blueprint("combine") { } + virtual void visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new CombineBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, + const ParameterList & params) { + (void) indexEnv; (void) params; + defineInput("base.foo"); + defineInput("base.bar"); + defineInput("base.baz"); + describeOutput("out", "out"); + return true; + } + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const { + return FeatureExecutor::LP(NULL); + } +}; + +class Test : public vespalib::TestApp { +private: + BlueprintFactory _factory; + void requireThatWeGetUniqueBlueprints(); +public: + Test(); + int Main(); +}; + +Test::Test() : + _factory() +{ + _factory.addPrototype(Blueprint::SP(new BaseBlueprint())); + _factory.addPrototype(Blueprint::SP(new CombineBlueprint())); +} + +void +Test::requireThatWeGetUniqueBlueprints() +{ + test::IndexEnvironment ienv; + BlueprintResolver::SP res(new BlueprintResolver(_factory, ienv)); + res->addSeed("combine"); + EXPECT_TRUE(res->compile()); + const BlueprintResolver::ExecutorSpecList & spec = res->getExecutorSpecs(); + EXPECT_EQUAL(2u, spec.size()); + EXPECT_TRUE(dynamic_cast(spec[0].blueprint.get()) != NULL); + EXPECT_TRUE(dynamic_cast(spec[1].blueprint.get()) != NULL); +} + +int +Test::Main() +{ + TEST_INIT("resolver_test"); + + requireThatWeGetUniqueBlueprints(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::Test); diff --git a/searchlib/src/tests/fef/table/.gitignore b/searchlib/src/tests/fef/table/.gitignore new file mode 100644 index 00000000000..b89a30490e0 --- /dev/null +++ b/searchlib/src/tests/fef/table/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +table_test +searchlib_table_test_app diff --git a/searchlib/src/tests/fef/table/CMakeLists.txt b/searchlib/src/tests/fef/table/CMakeLists.txt new file mode 100644 index 00000000000..ca61eb7c365 --- /dev/null +++ b/searchlib/src/tests/fef/table/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_table_test_app + SOURCES + table_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_table_test_app COMMAND searchlib_table_test_app) diff --git a/searchlib/src/tests/fef/table/DESC b/searchlib/src/tests/fef/table/DESC new file mode 100644 index 00000000000..65834ed1305 --- /dev/null +++ b/searchlib/src/tests/fef/table/DESC @@ -0,0 +1 @@ +table test. Take a look at table.cpp for details. diff --git a/searchlib/src/tests/fef/table/FILES b/searchlib/src/tests/fef/table/FILES new file mode 100644 index 00000000000..40be726aeb8 --- /dev/null +++ b/searchlib/src/tests/fef/table/FILES @@ -0,0 +1 @@ +table.cpp diff --git a/searchlib/src/tests/fef/table/table_test.cpp b/searchlib/src/tests/fef/table/table_test.cpp new file mode 100644 index 00000000000..2d05e0c7310 --- /dev/null +++ b/searchlib/src/tests/fef/table/table_test.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("tablemanager_test"); +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { + +class TableTest : public vespalib::TestApp +{ +private: + bool assertTable(const Table & act, const Table & exp); + bool assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp); + void testTable(); + void testFileTableFactory(); + void testFunctionTableFactory(); + void testTableManager(); + +public: + int Main(); +}; + +bool +TableTest::assertTable(const Table & act, const Table & exp) +{ + if (!EXPECT_EQUAL(act.size(), exp.size())) return false; + for (size_t i = 0; i < act.size(); ++i) { + if (!EXPECT_APPROX(act[i], exp[i], 0.01)) return false; + } + return true; +} + +bool +TableTest::assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp) +{ + Table::SP t = tf.createTable(name); + if (!EXPECT_TRUE(t.get() != NULL)) return false; + return assertTable(*t, exp); +} + +void +TableTest::testTable() +{ + Table t; + EXPECT_EQUAL(t.size(), 0u); + EXPECT_EQUAL(t.max(), -std::numeric_limits::max()); + t.add(1).add(2); + EXPECT_EQUAL(t.size(), 2u); + EXPECT_EQUAL(t.max(), 2); + EXPECT_EQUAL(t[0], 1); + EXPECT_EQUAL(t[1], 2); + t.add(10); + EXPECT_EQUAL(t.size(), 3u); + EXPECT_EQUAL(t.max(), 10); + EXPECT_EQUAL(t[2], 10); + t.add(5); + EXPECT_EQUAL(t.size(), 4u); + EXPECT_EQUAL(t.max(), 10); + EXPECT_EQUAL(t[3], 5); +} + +void +TableTest::testFileTableFactory() +{ + { + FileTableFactory ftf("tables1"); + EXPECT_TRUE(assertCreateTable(ftf, "a", Table().add(1.5).add(2.25).add(3))); + EXPECT_TRUE(ftf.createTable("b").get() == NULL); + } + { + FileTableFactory ftf("tables1/"); + EXPECT_TRUE(ftf.createTable("a").get() != NULL); + } +} + +void +TableTest::testFunctionTableFactory() +{ + FunctionTableFactory ftf(2); + EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12)", + Table().add(400).add(368.02))); + EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1)", + Table().add(5000).add(5693.15))); + EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100)", + Table().add(100).add(110))); + // specify table size + EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12,3)", + Table().add(400).add(368.02).add(338.60))); + EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1,3)", + Table().add(5000).add(5693.15).add(6098.61))); + EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100,3)", + Table().add(100).add(110).add(120))); + EXPECT_TRUE(ftf.createTable("expdecay()").get() == NULL); + EXPECT_TRUE(ftf.createTable("expdecay(10)").get() == NULL); + EXPECT_TRUE(ftf.createTable("loggrowth()").get() == NULL); + EXPECT_TRUE(ftf.createTable("linear()").get() == NULL); + EXPECT_TRUE(ftf.createTable("none").get() == NULL); + EXPECT_TRUE(ftf.createTable("none(").get() == NULL); + EXPECT_TRUE(ftf.createTable("none)").get() == NULL); + EXPECT_TRUE(ftf.createTable("none)(").get() == NULL); +} + +void +TableTest::testTableManager() +{ + { + TableManager tm; + tm.addFactory(ITableFactory::SP(new FileTableFactory("tables1"))); + tm.addFactory(ITableFactory::SP(new FileTableFactory("tables2"))); + + { + const Table * t = tm.getTable("a"); // from tables1 + ASSERT_TRUE(t != NULL); + EXPECT_TRUE(assertTable(*t, Table().add(1.5).add(2.25).add(3))); + EXPECT_TRUE(t == tm.getTable("a")); + } + { + const Table * t = tm.getTable("b"); // from tables2 + ASSERT_TRUE(t != NULL); + EXPECT_TRUE(assertTable(*t, Table().add(40).add(50).add(60))); + EXPECT_TRUE(t == tm.getTable("b")); + } + { + EXPECT_TRUE(tm.getTable("c") == NULL); + EXPECT_TRUE(tm.getTable("c") == NULL); + } + } + { + TableManager tm; + ASSERT_TRUE(tm.getTable("a") == NULL); + } +} + +int +TableTest::Main() +{ + TEST_INIT("table_test"); + + testTable(); + testFileTableFactory(); + testFunctionTableFactory(); + testTableManager(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::TableTest); diff --git a/searchlib/src/tests/fef/table/tables1/a b/searchlib/src/tests/fef/table/tables1/a new file mode 100644 index 00000000000..c46f4d59a71 --- /dev/null +++ b/searchlib/src/tests/fef/table/tables1/a @@ -0,0 +1,3 @@ +1.5 +2.25 +3 diff --git a/searchlib/src/tests/fef/table/tables2/a b/searchlib/src/tests/fef/table/tables2/a new file mode 100644 index 00000000000..300ed6fcd17 --- /dev/null +++ b/searchlib/src/tests/fef/table/tables2/a @@ -0,0 +1,3 @@ +10 +20 +30 diff --git a/searchlib/src/tests/fef/table/tables2/b b/searchlib/src/tests/fef/table/tables2/b new file mode 100644 index 00000000000..6f98b52f55f --- /dev/null +++ b/searchlib/src/tests/fef/table/tables2/b @@ -0,0 +1,3 @@ +40 +50 +60 diff --git a/searchlib/src/tests/fef/termfieldmodel/.gitignore b/searchlib/src/tests/fef/termfieldmodel/.gitignore new file mode 100644 index 00000000000..0f860efa14a --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_termfieldmodel_test_app diff --git a/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt new file mode 100644 index 00000000000..c8a678c11bb --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_termfieldmodel_test_app + SOURCES + termfieldmodel_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_termfieldmodel_test_app COMMAND searchlib_termfieldmodel_test_app) diff --git a/searchlib/src/tests/fef/termfieldmodel/DESC b/searchlib/src/tests/fef/termfieldmodel/DESC new file mode 100644 index 00000000000..2c8df5a8aab --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/DESC @@ -0,0 +1 @@ +termfieldmodel test. Take a look at termfieldmodel_test.cpp for details. diff --git a/searchlib/src/tests/fef/termfieldmodel/FILES b/searchlib/src/tests/fef/termfieldmodel/FILES new file mode 100644 index 00000000000..b5440335bc6 --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/FILES @@ -0,0 +1 @@ +termfieldmodel_test.cpp diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp new file mode 100644 index 00000000000..26a02d38adf --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp @@ -0,0 +1,209 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("termfieldmodel_test"); +#include +#include +#include + +#include + +using namespace search::fef; + +struct State { + SimpleTermData term; + MatchData::UP md; + TermFieldMatchData *f3; + TermFieldMatchData *f5; + TermFieldMatchData *f7; + TermFieldMatchDataArray array; + + State() : term(), md(), f3(0), f5(0), f7(0), array() {} + + void setArray(TermFieldMatchDataArray value) { + array = value; + } +}; + +void testInvalidId() { + const TermFieldMatchData empty; + using search::queryeval::SearchIterator; + + EXPECT_EQUAL(TermFieldMatchData::invalidId(), empty.getDocId()); + EXPECT_TRUE(TermFieldMatchData::invalidId() < (SearchIterator::beginId() + 1 ) || + TermFieldMatchData::invalidId() > (search::endDocId - 1)); +} + +void testSetup(State &state) { + MatchDataLayout layout; + + state.term.addField(3); // docfreq = 1 + state.term.addField(7); // docfreq = 2 + state.term.addField(5); // docfreq = 3 + + typedef search::fef::ITermFieldRangeAdapter FRA; + typedef search::fef::SimpleTermFieldRangeAdapter SFR; + + // lookup terms + { + int i = 1; + for (SFR iter(state.term); iter.valid(); iter.next()) { + iter.get().setDocFreq(0.25 * i++); + } + } + + // reserve handles + { + for (SFR iter(state.term); iter.valid(); iter.next()) { + iter.get().setHandle(layout.allocTermField(iter.get().getFieldId())); + } + } + + state.md = layout.createMatchData(); + + // init match data + { + for (FRA iter(state.term); iter.valid(); iter.next()) { + const ITermFieldData& tfd = iter.get(); + + TermFieldHandle handle = tfd.getHandle(); + TermFieldMatchData *data = state.md->resolveTermField(handle); + switch (tfd.getFieldId()) { + case 3: + state.f3 = data; + break; + case 5: + state.f5 = data; + break; + case 7: + state.f7 = data; + break; + default: + EXPECT_TRUE(false); + } + } + EXPECT_EQUAL(3u, state.f3->getFieldId()); + EXPECT_EQUAL(5u, state.f5->getFieldId()); + EXPECT_EQUAL(7u, state.f7->getFieldId()); + } + + // test that we can setup array + EXPECT_EQUAL(false, state.array.valid()); + state.setArray(TermFieldMatchDataArray().add(state.f3).add(state.f5).add(state.f7)); + EXPECT_EQUAL(true, state.array.valid()); +} + +void testGenerate(State &state) { + // verify array + EXPECT_EQUAL(3u, state.array.size()); + EXPECT_EQUAL(state.f3, state.array[0]); + EXPECT_EQUAL(state.f5, state.array[1]); + EXPECT_EQUAL(state.f7, state.array[2]); + + // stale unpacked data + state.f5->reset(5); + EXPECT_EQUAL(5u, state.f5->getDocId()); + { + TermFieldMatchDataPosition pos; + pos.setPosition(3); + pos.setElementId(0); + pos.setElementLen(10); + state.f5->appendPosition(pos); + EXPECT_EQUAL(1u, state.f5->getIterator().size()); + EXPECT_EQUAL(10u, state.f5->getIterator().getFieldLength()); + } + state.f5->reset(6); + EXPECT_EQUAL(6u, state.f5->getDocId()); + EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH, + state.f5->getIterator().getFieldLength()); + EXPECT_EQUAL(0u, state.f5->getIterator().size()); + + + // fresh unpacked data + state.md->setDocId(10); + state.f3->reset(10); + { + TermFieldMatchDataPosition pos; + pos.setPosition(3); + pos.setElementId(0); + pos.setElementLen(10); + EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH, + state.f3->getIterator().getFieldLength()); + state.f3->appendPosition(pos); + EXPECT_EQUAL(10u, state.f3->getIterator().getFieldLength()); + } + { + TermFieldMatchDataPosition pos; + pos.setPosition(15); + pos.setElementId(1); + pos.setElementLen(20); + state.f3->appendPosition(pos); + EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength()); + } + { + TermFieldMatchDataPosition pos; + pos.setPosition(1); + pos.setElementId(2); + pos.setElementLen(5); + state.f3->appendPosition(pos); + EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength()); + } + + // raw score + state.f7->setRawScore(10, 5.0); +} + +void testAnalyze(State &state) { + EXPECT_EQUAL(state.md->getDocId(), state.f3->getDocId()); + EXPECT_NOT_EQUAL(state.md->getDocId(), state.f5->getDocId()); + EXPECT_EQUAL(state.md->getDocId(), state.f7->getDocId()); + + FieldPositionsIterator it = state.f3->getIterator(); + EXPECT_EQUAL(20u, it.getFieldLength()); + EXPECT_EQUAL(3u, it.size()); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(3u, it.getPosition()); + EXPECT_EQUAL(0u, it.getElementId()); + EXPECT_EQUAL(10u, it.getElementLen()); + it.next(); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(15u, it.getPosition()); + EXPECT_EQUAL(1u, it.getElementId()); + EXPECT_EQUAL(20u, it.getElementLen()); + it.next(); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(1u, it.getPosition()); + EXPECT_EQUAL(2u, it.getElementId()); + EXPECT_EQUAL(5u, it.getElementLen()); + it.next(); + EXPECT_TRUE(!it.valid()); + + EXPECT_EQUAL(0.0, state.f3->getRawScore()); + EXPECT_EQUAL(0.0, state.f5->getRawScore()); + EXPECT_EQUAL(5.0, state.f7->getRawScore()); +} + +TEST("term field model") { + State state; + testSetup(state); + testGenerate(state); + testAnalyze(state); + testInvalidId(); +} + +TEST("Access subqueries") { + State state; + testSetup(state); + state.f3->reset(10); + state.f3->setSubqueries(10, 42); + EXPECT_EQUAL(42ULL, state.f3->getSubqueries()); + state.f3->enableRawScore(); + EXPECT_EQUAL(0ULL, state.f3->getSubqueries()); + + state.f3->reset(11); + state.f3->appendPosition(TermFieldMatchDataPosition()); + state.f3->setSubqueries(11, 42); + EXPECT_EQUAL(0ULL, state.f3->getSubqueries()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/termmatchdatamerger/.gitignore b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore new file mode 100644 index 00000000000..64f3f4a4600 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_termmatchdatamerger_test_app diff --git a/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt new file mode 100644 index 00000000000..cfb6ae2611f --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_termmatchdatamerger_test_app + SOURCES + termmatchdatamerger_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_termmatchdatamerger_test_app COMMAND searchlib_termmatchdatamerger_test_app) diff --git a/searchlib/src/tests/fef/termmatchdatamerger/DESC b/searchlib/src/tests/fef/termmatchdatamerger/DESC new file mode 100644 index 00000000000..abacd50b719 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/DESC @@ -0,0 +1 @@ +termmatchdatamerger test. Take a look at termmatchdatamerger.cpp for details. diff --git a/searchlib/src/tests/fef/termmatchdatamerger/FILES b/searchlib/src/tests/fef/termmatchdatamerger/FILES new file mode 100644 index 00000000000..709c15d91b8 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/FILES @@ -0,0 +1 @@ +termmatchdatamerger_test.cpp diff --git a/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp new file mode 100644 index 00000000000..14b74498f2d --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp @@ -0,0 +1,281 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("termmatchdatamerger_test"); +#include + +#include +#include +#include + +using namespace search::fef; + +typedef TermMatchDataMerger::Input MDMI; +typedef TermMatchDataMerger::Inputs MDMIs; + +namespace { + +TermFieldMatchDataPosition make_pos(uint32_t pos) +{ + return TermFieldMatchDataPosition(0, pos, 1, 1000); +} + +} // namespace + +class Test : public vespalib::TestApp +{ +public: + void testMergeEmptyInput(); + void testMergeSimple(); + void testMergeMultifield(); + void testMergeDuplicates(); + void testMergeFieldLength(); + int Main(); +}; + +void +Test::testMergeEmptyInput() +{ + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + + TermFieldMatchData in; + MDMIs input; + input.push_back(MDMI(&in, 1.0)); + + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + in.reset(docid); + merger.merge(docid); + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_TRUE(out.begin() == out.end()); +} + +void +Test::testMergeSimple() +{ + TermFieldMatchData a; + TermFieldMatchData b; + TermFieldMatchData c; + MDMIs input; + input.push_back(MDMI(&a, 0.5)); + input.push_back(MDMI(&b, 1.0)); + input.push_back(MDMI(&c, 1.5)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5).setMatchExactness(0.5)); + a.appendPosition(make_pos(10).setMatchExactness(3.0)); + a.appendPosition(make_pos(15).setMatchExactness(2.0)); + + b.reset(docid); + b.appendPosition(make_pos(7).setMatchExactness(0.5)); + b.appendPosition(make_pos(20).setMatchExactness(4.0)); + + c.reset(docid); + c.appendPosition(make_pos(22).setMatchExactness(0.5)); + c.appendPosition(make_pos(27).setMatchExactness(2.0)); + c.appendPosition(make_pos(28).setMatchExactness(5.0)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(8u, out.end() - out.begin()); + + EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); + EXPECT_EQUAL( 7u, out.begin()[1].getPosition()); + EXPECT_EQUAL(10u, out.begin()[2].getPosition()); + EXPECT_EQUAL(15u, out.begin()[3].getPosition()); + EXPECT_EQUAL(20u, out.begin()[4].getPosition()); + EXPECT_EQUAL(22u, out.begin()[5].getPosition()); + EXPECT_EQUAL(27u, out.begin()[6].getPosition()); + EXPECT_EQUAL(28u, out.begin()[7].getPosition()); + + EXPECT_EQUAL(0.25, out.begin()[0].getMatchExactness()); + EXPECT_EQUAL( 0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQUAL( 1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQUAL( 1.0, out.begin()[3].getMatchExactness()); + EXPECT_EQUAL( 4.0, out.begin()[4].getMatchExactness()); + EXPECT_EQUAL(0.75, out.begin()[5].getMatchExactness()); + EXPECT_EQUAL( 3.0, out.begin()[6].getMatchExactness()); + EXPECT_EQUAL( 7.5, out.begin()[7].getMatchExactness()); + + // one stale input + + docid = 10; + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(10)); + a.appendPosition(make_pos(15)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(3u, out.end() - out.begin()); + + EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); + EXPECT_EQUAL(10u, out.begin()[1].getPosition()); + EXPECT_EQUAL(15u, out.begin()[2].getPosition()); + + // both inputs are stale + + docid = 15; + + merger.merge(docid); + EXPECT_NOT_EQUAL(docid, out.getDocId()); +} + + +void +Test::testMergeMultifield() +{ + TermFieldMatchData a; + TermFieldMatchData b; + TermFieldMatchData c; + MDMIs input; + a.setFieldId(1); + b.setFieldId(2); + c.setFieldId(2); + input.push_back(MDMI(&a, 1.0)); + input.push_back(MDMI(&b, 0.5)); + input.push_back(MDMI(&c, 1.5)); + + TermFieldMatchData out1; + TermFieldMatchData out2; + TermFieldMatchData out3; + TermFieldMatchDataArray output; + out1.setFieldId(1); + out2.setFieldId(2); + out3.setFieldId(3); + output.add(&out1).add(&out2).add(&out3); + + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(15)); + + b.reset(docid); + b.appendPosition(make_pos(7)); + b.appendPosition(make_pos(20)); + + c.reset(docid); + c.appendPosition(make_pos(5)); + c.appendPosition(make_pos(20)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out1.getDocId()); + EXPECT_EQUAL(docid, out2.getDocId()); + EXPECT_NOT_EQUAL(docid, out3.getDocId()); + + EXPECT_EQUAL(2u, out1.end() - out1.begin()); + EXPECT_EQUAL(3u, out2.end() - out2.begin()); + + EXPECT_EQUAL( 5u, out1.begin()[0].getPosition()); + EXPECT_EQUAL(15u, out1.begin()[1].getPosition()); + + EXPECT_EQUAL( 5u, out2.begin()[0].getPosition()); + EXPECT_EQUAL( 7u, out2.begin()[1].getPosition()); + EXPECT_EQUAL(20u, out2.begin()[2].getPosition()); + + EXPECT_EQUAL(1.0, out1.begin()[0].getMatchExactness()); + EXPECT_EQUAL(1.0, out1.begin()[1].getMatchExactness()); + + EXPECT_EQUAL(1.5, out2.begin()[0].getMatchExactness()); + EXPECT_EQUAL(0.5, out2.begin()[1].getMatchExactness()); + EXPECT_EQUAL(1.5, out2.begin()[2].getMatchExactness()); +} + +void +Test::testMergeDuplicates() +{ + TermFieldMatchData a; + TermFieldMatchData b; + MDMIs input; + input.push_back(MDMI(&a, 0.5)); + input.push_back(MDMI(&b, 1.5)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(10)); + a.appendPosition(make_pos(15)); + + b.reset(docid); + b.appendPosition(make_pos(3)); + b.appendPosition(make_pos(10)); + b.appendPosition(make_pos(15)); + b.appendPosition(make_pos(17)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(5u, out.end() - out.begin()); + + EXPECT_EQUAL( 3u, out.begin()[0].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[0].getMatchExactness()); + EXPECT_EQUAL( 5u, out.begin()[1].getPosition()); + EXPECT_EQUAL(0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQUAL(10u, out.begin()[2].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQUAL(15u, out.begin()[3].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[3].getMatchExactness()); + EXPECT_EQUAL(17u, out.begin()[4].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[4].getMatchExactness()); +} + +void +Test::testMergeFieldLength() +{ + TermFieldMatchData a; + TermFieldMatchData b; + MDMIs input; + input.push_back(MDMI(&a, 1.0)); + input.push_back(MDMI(&b, 1.0)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + a.reset(docid); + a.appendPosition(make_pos(1)); + b.reset(docid); + b.appendPosition(make_pos(2)); + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(1000u, out.getIterator().getFieldLength()); +} + +int +Test::Main() +{ + TEST_INIT("termmatchdatamerger_test"); + testMergeEmptyInput(); + testMergeSimple(); + testMergeMultifield(); + testMergeDuplicates(); + testMergeFieldLength(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fileheaderinspect/.gitignore b/searchlib/src/tests/fileheaderinspect/.gitignore new file mode 100644 index 00000000000..812991d07b5 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +fileheader.dat +fileheaderinspect_test +out +searchlib_fileheaderinspect_test_app diff --git a/searchlib/src/tests/fileheaderinspect/CMakeLists.txt b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt new file mode 100644 index 00000000000..024e83bde02 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fileheaderinspect_test_app + SOURCES + fileheaderinspect.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fileheaderinspect_test_app COMMAND searchlib_fileheaderinspect_test_app) diff --git a/searchlib/src/tests/fileheaderinspect/DESC b/searchlib/src/tests/fileheaderinspect/DESC new file mode 100644 index 00000000000..ee57a2fdde3 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/DESC @@ -0,0 +1 @@ +fileheaderinspect test. Take a look at fileheaderinspect.cpp for details. diff --git a/searchlib/src/tests/fileheaderinspect/FILES b/searchlib/src/tests/fileheaderinspect/FILES new file mode 100644 index 00000000000..7c32fb811d5 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/FILES @@ -0,0 +1 @@ +fileheaderinspect.cpp diff --git a/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp new file mode 100644 index 00000000000..75ad526e2f7 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fileheaderinspect_test"); + +#include +#include + +using namespace search; +using namespace vespalib; + +class Test : public vespalib::TestApp { +private: + bool writeHeader(const FileHeader &header, const vespalib::string &fileName); + vespalib::string readFile(const vespalib::string &fileName); + + void testError(); + void testEscape(); + void testDelimiter(); + void testQuiet(); + void testVerbose(); + +public: + int Main() { + TEST_INIT("fileheaderinspect_test"); + + testError(); TEST_FLUSH(); + testEscape(); TEST_FLUSH(); + testDelimiter(); TEST_FLUSH(); + testQuiet(); TEST_FLUSH(); + testVerbose(); TEST_FLUSH(); + + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + +bool +Test::writeHeader(const FileHeader &header, const vespalib::string &fileName) +{ + FastOS_File file; + if (!EXPECT_TRUE(file.OpenWriteOnlyTruncate(fileName.c_str()))) { + return false; + } + if (!EXPECT_EQUAL(header.getSize(), header.writeFile(file))) { + return false; + } + file.Close(); + return true; +} + +vespalib::string +Test::readFile(const vespalib::string &fileName) +{ + FastOS_File file; + ASSERT_TRUE(file.OpenReadOnly(fileName.c_str())); + + char buf[1024]; + uint32_t len = file.Read(buf, 1024); + EXPECT_TRUE(len != 1024); // make sure we got everything + + vespalib::string str(buf, len); + file.Close(); + return str; +} + +void +Test::testError() +{ + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect notfound.dat") != 0); +} + +void +Test::testEscape() +{ + FileHeader header; + header.putTag(FileHeader::Tag("fanart", "\fa\na\r\t")); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0); + EXPECT_EQUAL("fanart;string;\\fa\\na\\r\\t\n", readFile("out")); +} + +void +Test::testDelimiter() +{ + FileHeader header; + header.putTag(FileHeader::Tag("string", "string")); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -d i -q fileheader.dat > out") == 0); + EXPECT_EQUAL("str\\ingistr\\ingistr\\ing\n", readFile("out")); +} + +void +Test::testVerbose() +{ + FileHeader header; + FileHeaderTk::addVersionTags(header); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect fileheader.dat > out") == 0); + vespalib::string str = readFile("out"); + EXPECT_TRUE(!str.empty()); + for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + EXPECT_TRUE(str.find(tag.getName()) != vespalib::string::npos); + + vespalib::asciistream out; + out << tag; + EXPECT_TRUE(str.find(out.str()) != vespalib::string::npos); + } +} + +void +Test::testQuiet() +{ + FileHeader header; + FileHeaderTk::addVersionTags(header); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0); + vespalib::string str = readFile("out"); + EXPECT_TRUE(!str.empty()); + for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + size_t pos = str.find(tag.getName()); + EXPECT_TRUE(pos != vespalib::string::npos); + + vespalib::asciistream out; + out << ";" << tag; + EXPECT_TRUE(str.find(out.str(), pos) != vespalib::string::npos); + } +} diff --git a/searchlib/src/tests/fileheadertk/.gitignore b/searchlib/src/tests/fileheadertk/.gitignore new file mode 100644 index 00000000000..6aa8c365240 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +fileheadertk.dat +fileheadertk_test +versiontags.dat +searchlib_fileheadertk_test_app diff --git a/searchlib/src/tests/fileheadertk/CMakeLists.txt b/searchlib/src/tests/fileheadertk/CMakeLists.txt new file mode 100644 index 00000000000..bc6969fbac2 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fileheadertk_test_app + SOURCES + fileheadertk_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fileheadertk_test_app COMMAND searchlib_fileheadertk_test_app) diff --git a/searchlib/src/tests/fileheadertk/DESC b/searchlib/src/tests/fileheadertk/DESC new file mode 100644 index 00000000000..08ad9a0769d --- /dev/null +++ b/searchlib/src/tests/fileheadertk/DESC @@ -0,0 +1 @@ +Ensures that FileHeaderTk works as expected. diff --git a/searchlib/src/tests/fileheadertk/FILES b/searchlib/src/tests/fileheadertk/FILES new file mode 100644 index 00000000000..fe82bf13af7 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/FILES @@ -0,0 +1 @@ +fileheadertk.cpp diff --git a/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp new file mode 100644 index 00000000000..14c5d0ed6f6 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fileheadertk_test"); + +#include +#include + +using namespace search; + +class Test : public vespalib::TestApp { +private: + void testVersionTags(); + +public: + int Main() { + TEST_INIT("fileheadertk_test"); + + testVersionTags(); TEST_FLUSH(); + + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + +void +Test::testVersionTags() +{ + vespalib::FileHeader header; + FileHeaderTk::addVersionTags(header); + + FastOS_File file; + ASSERT_TRUE(file.OpenWriteOnlyTruncate("versiontags.dat")); + EXPECT_EQUAL(header.getSize(), header.writeFile(file)); + file.Close(); + + EXPECT_EQUAL(8u, header.getNumTags()); + EXPECT_TRUE(header.hasTag("version-arch")); + EXPECT_TRUE(header.hasTag("version-builder")); + EXPECT_TRUE(header.hasTag("version-component")); + EXPECT_TRUE(header.hasTag("version-date")); + EXPECT_TRUE(header.hasTag("version-system")); + EXPECT_TRUE(header.hasTag("version-system-rev")); + EXPECT_TRUE(header.hasTag("version-tag")); + EXPECT_TRUE(header.hasTag("version-pkg")); +} diff --git a/searchlib/src/tests/forcelink/.gitignore b/searchlib/src/tests/forcelink/.gitignore new file mode 100644 index 00000000000..c74c5915388 --- /dev/null +++ b/searchlib/src/tests/forcelink/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +forcelink_test +searchlib_forcelink_test_app diff --git a/searchlib/src/tests/forcelink/CMakeLists.txt b/searchlib/src/tests/forcelink/CMakeLists.txt new file mode 100644 index 00000000000..50e39d2d844 --- /dev/null +++ b/searchlib/src/tests/forcelink/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_forcelink_test_app + SOURCES + forcelink.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_forcelink_test_app COMMAND searchlib_forcelink_test_app) diff --git a/searchlib/src/tests/forcelink/DESC b/searchlib/src/tests/forcelink/DESC new file mode 100644 index 00000000000..c73185a8736 --- /dev/null +++ b/searchlib/src/tests/forcelink/DESC @@ -0,0 +1 @@ +forcelink test. Take a look at forcelink.cpp for details. diff --git a/searchlib/src/tests/forcelink/FILES b/searchlib/src/tests/forcelink/FILES new file mode 100644 index 00000000000..d917375ebf2 --- /dev/null +++ b/searchlib/src/tests/forcelink/FILES @@ -0,0 +1 @@ +forcelink.cpp diff --git a/searchlib/src/tests/forcelink/forcelink.cpp b/searchlib/src/tests/forcelink/forcelink.cpp new file mode 100644 index 00000000000..9f555e09480 --- /dev/null +++ b/searchlib/src/tests/forcelink/forcelink.cpp @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("forcelink_test"); +#include +#include +#include + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("forcelink_test"); + forcelink_searchlib_expression(); + forcelink_searchlib_aggregation(); + TEST_DONE(); +} diff --git a/searchlib/src/tests/grouping/.gitignore b/searchlib/src/tests/grouping/.gitignore new file mode 100644 index 00000000000..c7654573dc5 --- /dev/null +++ b/searchlib/src/tests/grouping/.gitignore @@ -0,0 +1,11 @@ +.depend +Makefile +diff.txt +grouping_test +lhs.out +rhs.out +/grouping_benchmark +searchlib_grouping_serialization_test_app +searchlib_grouping_test_app +searchlib_hyperloglog_test_app +searchlib_sketch_test_app diff --git a/searchlib/src/tests/grouping/CMakeLists.txt b/searchlib/src/tests/grouping/CMakeLists.txt new file mode 100644 index 00000000000..ef44472edfc --- /dev/null +++ b/searchlib/src/tests/grouping/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_grouping_test_app + SOURCES + grouping_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_grouping_test_app COMMAND searchlib_grouping_test_app) +vespa_add_executable(searchlib_hyperloglog_test_app + SOURCES + hyperloglog_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_hyperloglog_test_app COMMAND searchlib_hyperloglog_test_app) +vespa_add_executable(searchlib_sketch_test_app + SOURCES + sketch_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sketch_test_app COMMAND searchlib_sketch_test_app) +vespa_add_executable(searchlib_grouping_serialization_test_app + SOURCES + grouping_serialization_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_grouping_serialization_test_app COMMAND searchlib_grouping_serialization_test_app) diff --git a/searchlib/src/tests/grouping/DESC b/searchlib/src/tests/grouping/DESC new file mode 100644 index 00000000000..1aa6cb37e89 --- /dev/null +++ b/searchlib/src/tests/grouping/DESC @@ -0,0 +1 @@ +grouping test. Take a look at grouping.cpp for details. diff --git a/searchlib/src/tests/grouping/FILES b/searchlib/src/tests/grouping/FILES new file mode 100644 index 00000000000..af7f7e71257 --- /dev/null +++ b/searchlib/src/tests/grouping/FILES @@ -0,0 +1,4 @@ +grouping.cpp +lhs.out +rhs.out +diff.txt diff --git a/searchlib/src/tests/grouping/grouping_serialization_test.cpp b/searchlib/src/tests/grouping/grouping_serialization_test.cpp new file mode 100644 index 00000000000..99757af8439 --- /dev/null +++ b/searchlib/src/tests/grouping/grouping_serialization_test.cpp @@ -0,0 +1,339 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for grouping_serialization. + +#include +LOG_SETUP("grouping_serialization_test"); +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::HitRank; +using vespalib::Identifiable; +using vespalib::NBOSerializer; +using vespalib::make_string; +using vespalib::nbostream; +using namespace search::aggregation; +using namespace search::expression; + +namespace { + +document::GlobalId getGlobalId(uint32_t docId) { + return document::DocumentId(vespalib::make_string("doc:test:%u", docId)) + .getGlobalId(); +} + +struct Fixture { + // Set WRITE_FILES to true to generate new expected serialization files. + const bool WRITE_FILES = false; + const std::string file_path = "../../test/files/"; + std::string file_name; + std::ifstream file_stream; + + Fixture(const std::string &file_name_in) + : file_name(file_path + file_name_in), + file_stream(file_name.c_str(), + std::ifstream::in | std::ifstream::binary) { + if (WRITE_FILES) { + std::ofstream out(file_name.c_str(), + std::ofstream::out | std::ofstream::trunc | + std::ofstream::binary); + } + } + + void checkObject(const Identifiable &obj) { + if (WRITE_FILES) { + nbostream stream; + NBOSerializer serializer(stream); + serializer << obj; + std::ofstream out(file_name.c_str(), + std::ofstream::out | std::ofstream::app | + std::ofstream::binary); + uint32_t size = stream.size(); + out.write(reinterpret_cast(&size), sizeof(size)); + out.write(stream.peek(), stream.size()); + } + + uint32_t size = 0; + file_stream.read(reinterpret_cast(&size), sizeof(size)); + nbostream stream; + for (size_t i = 0; i < size; ++i) { + char c; + file_stream.read(&c, sizeof(c)); + stream << c; + } + Identifiable::UP newObj = Identifiable::create(stream); + + if (!EXPECT_TRUE(newObj.get() != 0)) { + LOG(error, "object of class '%s' resulted in empty echo", + obj.getClass().name()); + return; + } + if (EXPECT_EQUAL(obj.asString(), newObj->asString()) + && EXPECT_TRUE(newObj->cmp(obj) == 0) + && EXPECT_TRUE(obj.cmp(*newObj) == 0)) + { + LOG(info, "object of class '%s' passed echo test : %s", + obj.getClass().name(), newObj->asString().c_str()); + } else { + LOG(error, "object of class '%s' FAILED echo test", + obj.getClass().name()); + } + } +}; + +//----------------------------------------------------------------------------- + +ExpressionNode::CP createDummyExpression() { + return AddFunctionNode().addArg(ConstantNode(Int64ResultNode(2))) + .addArg(ConstantNode(Int64ResultNode(2))); +} + +//----------------------------------------------------------------------------- + +TEST_F("testResultTypes", Fixture("testResultTypes")) { + f.checkObject(Int64ResultNode(7)); + f.checkObject(FloatResultNode(7.3)); + f.checkObject(StringResultNode("7.3")); + { + char tmp[7] = { (char)0xe5, (char)0xa6, (char)0x82, (char)0xe6, + (char)0x9e, (char)0x9c,0 }; + f.checkObject(StringResultNode(tmp)); + } + { + char tmp[] = { '7', '.', '4' }; + f.checkObject(RawResultNode(tmp, 3)); + } + f.checkObject(IntegerBucketResultNode()); + f.checkObject(FloatBucketResultNode()); + f.checkObject(IntegerBucketResultNode(10, 20)); + f.checkObject(FloatBucketResultNode(10.0, 20.0)); + f.checkObject(StringBucketResultNode("10.0", "20.0")); + char tmp[] = { 1, 0, 0}; + char tmp2[] = { 1, 1, 0}; + f.checkObject( + RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)), + ResultNode::UP(new RawResultNode(tmp2, 3)))); + + IntegerBucketResultNodeVector iv; + iv.getVector().push_back(IntegerBucketResultNode(878, 3246823)); + f.checkObject(iv); + + FloatBucketResultNodeVector fv; + fv.getVector().push_back(FloatBucketResultNode(878, 3246823)); + f.checkObject(fv); + + StringBucketResultNodeVector sv; + sv.getVector().push_back(StringBucketResultNode("878", "3246823")); + f.checkObject(sv); + + RawBucketResultNodeVector rv; + rv.getVector().push_back( + RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)), + ResultNode::UP(new RawResultNode(tmp2, 3)))); + f.checkObject(rv); +} + +TEST_F("testSpecialNodes", Fixture("testSpecialNodes")) { + f.checkObject(AttributeNode("testattribute")); + f.checkObject(DocumentFieldNode("testdocumentfield")); + { + f.checkObject(GetDocIdNamespaceSpecificFunctionNode( + ResultNode::UP(new Int64ResultNode(7)))); + } + f.checkObject(GetYMUMChecksumFunctionNode()); +} + +TEST_F("testFunctionNodes", Fixture("testFunctionNodes")) { + f.checkObject(AddFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(XorFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MultiplyFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(DivideFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(ModuloFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MinFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MaxFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(TimeStampFunctionNode(ConstantNode(Int64ResultNode(7)), + TimeStampFunctionNode::Hour, true)); + f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)), + ZCurveFunctionNode::X)); + f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)), + ZCurveFunctionNode::Y)); + f.checkObject(NegateFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(SortFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(NormalizeSubjectFunctionNode(ConstantNode( + StringResultNode("foo")))); + f.checkObject(ReverseFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(MD5BitFunctionNode(ConstantNode(Int64ResultNode(7)), 64)); + f.checkObject(XorBitFunctionNode(ConstantNode(Int64ResultNode(7)), 64)); + f.checkObject(CatFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(FixedWidthBucketFunctionNode()); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo"))); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo")) + .setWidth(Int64ResultNode(10))); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo")) + .setWidth(FloatResultNode(10.0))); + f.checkObject(RangeBucketPreDefFunctionNode()); + f.checkObject(RangeBucketPreDefFunctionNode(AttributeNode("foo"))); + f.checkObject(DebugWaitFunctionNode(ConstantNode(Int64ResultNode(5)), + 3.3, false)); +} + +TEST_F("testAggregatorResults", Fixture("testAggregatorResults")) { + f.checkObject(SumAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(XorAggregationResult() + .setXor(Int64ResultNode(7)) + .setExpression(AttributeNode("attributeA"))); + f.checkObject(CountAggregationResult() + .setCount(7) + .setExpression(AttributeNode("attributeA"))); + f.checkObject(MinAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(MaxAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(AverageAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + ExpressionCountAggregationResult expression_count; + expression_count.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + f.checkObject(expression_count); +} + +TEST_F("testHitCollection", Fixture("testHitCollection")) { + f.checkObject(FS4Hit()); + f.checkObject(FS4Hit(0, 50.0).setGlobalId(getGlobalId(100))); + f.checkObject(VdsHit()); + f.checkObject(VdsHit("100", 50.0)); + f.checkObject(VdsHit("100", 50.0).setSummary("rawsummary", 10)); + f.checkObject(HitsAggregationResult()); + f.checkObject(HitsAggregationResult() + .setMaxHits(5) + .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10))) + .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20))) + .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30))) + .addHit(FS4Hit(0, 4.0).setGlobalId(getGlobalId(40))) + .addHit(FS4Hit(0, 5.0).setGlobalId(getGlobalId(50))) + .setExpression(ConstantNode(Int64ResultNode(5)))); + f.checkObject(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10)) + .setDistributionKey(100)) + .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20)) + .setDistributionKey(200)) + .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30)) + .setDistributionKey(300)) + .setExpression(ConstantNode(Int64ResultNode(5)))); + f.checkObject(HitsAggregationResult() + .setMaxHits(3) + .addHit(VdsHit("10", 1.0).setSummary("100", 3)) + .addHit(VdsHit("20", 2.0).setSummary("200", 3)) + .addHit(VdsHit("30", 3.0).setSummary("300", 3)) + .setExpression(ConstantNode(Int64ResultNode(5)))); +} + +TEST_F("testGroupingLevel", Fixture("testGroupingLevel")) { + f.checkObject(GroupingLevel() + .setMaxGroups(100) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))); +} + +TEST_F("testGroup", Fixture("testGroup")) { + f.checkObject(Group()); + f.checkObject(Group().setId(Int64ResultNode(50)) + .setRank(RawRank(10))); + f.checkObject(Group().setId(Int64ResultNode(100)) + .addChild(Group().setId(Int64ResultNode(110))) + .addChild(Group().setId(Int64ResultNode(120)) + .setRank(20.5) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))) + .addChild(Group().setId(Int64ResultNode(130)) + .addChild(Group().setId(Int64ResultNode(131))))); +} + +TEST_F("testGrouping", Fixture("testGrouping")) { + f.checkObject(Grouping()); + f.checkObject(Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(100) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))) + .addLevel(GroupingLevel() + .setMaxGroups(10) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())))); + f.checkObject(Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("folder")) + .addAggregationResult(XorAggregationResult() + .setExpression(MD5BitFunctionNode( + AttributeNode("docid"), 64))) + .addAggregationResult(SumAggregationResult() + .setExpression(MinFunctionNode() + .addArg(AttributeNode("attribute1")) + .addArg(AttributeNode("attribute2"))) + ) + .addAggregationResult(XorAggregationResult() + .setExpression( + XorBitFunctionNode(CatFunctionNode() + .addArg(GetDocIdNamespaceSpecificFunctionNode()) + .addArg(DocumentFieldNode("folder")) + .addArg(DocumentFieldNode("flags")), 64))))); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/grouping/grouping_test.cpp b/searchlib/src/tests/grouping/grouping_test.cpp new file mode 100644 index 00000000000..f9939f0d370 --- /dev/null +++ b/searchlib/src/tests/grouping/grouping_test.cpp @@ -0,0 +1,1912 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("grouping_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace vespalib; +using namespace search; +using namespace search::aggregation; +using namespace search::attribute; +using namespace search::expression; + +//----------------------------------------------------------------------------- + +template +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder IntAttrBuilder; +typedef AttrBuilder FloatAttrBuilder; +typedef AttrBuilder StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: + bool testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect); + bool testPrune(const Grouping &a, const Grouping &b, + const Group &expect); + bool testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect); + void testAggregationSimple(); + void testAggregationLevels(); + void testAggregationMaxGroups(); + void testAggregationGroupOrder(); + void testAggregationGroupRank(); + void testAggregationGroupCapping(); + void testMergeSimpleSum(); + void testMergeLevels(); + void testMergeGroups(); + void testMergeTrees(); + void testPruneSimple(); + void testPruneComplex(); + void testPartialMerging(); + void testCount(); + void testTopN(); + void testFS4HitCollection(); + bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket); + bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt); + void testFixedWidthBuckets(); + void testThatNanIsConverted(); + void testNanSorting(); + int Main(); +private: + void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + tmp.aggregate(ctx.result().hits(), ctx.result().size()); + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + EXPECT_EQUAL(attrCheck._numrefs, 0); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; + tmp.merge(tmpB); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +/** + * Prune the given grouping request and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testPrune(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.prune(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge a given grouping request to get a partial request back. Verify that the + * partial request is correct. + **/ +bool +Test::testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.mergePartial(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; // create local copy + Grouping tmpC = c; // create local copy + tmp.merge(tmpB); + tmp.merge(tmpC); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +//----------------------------------------------------------------------------- + +/** + * Test collecting the sum of the values from a single attribute + * vector directly into the root node. Consider this a smoke test. + **/ +void +Test::testAggregationSimple() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp()); + ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp()); + ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp()); + + char strsum[3] = {-101, '5', 0}; + testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum)); + testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15")); + testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7")); +} + +void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr) +{ + ExpressionNode::CP clone(aggr); + Grouping request = Grouping() + .setRoot(Group() + .addResult(static_cast(*clone).setExpression(AttributeNode("int"))) + .addResult(static_cast(*clone).setExpression(AttributeNode("float"))) + .addResult(static_cast(*clone).setExpression(AttributeNode("string"))) + ); + + Group expect = Group() + .addResult(static_cast(*clone).setExpression(AttributeNode("int")).setResult(ir)) + .addResult(static_cast(*clone).setExpression(AttributeNode("float")).setResult(fr)) + .addResult(static_cast(*clone).setExpression(AttributeNode("string")).setResult(sr)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that the backend aggregation will classify and collect on + * the appropriate levels, as indicated by the firstLevel and + * lastLevel parameters. + **/ +void +Test::testAggregationLevels() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + + Grouping baseRequest = Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + + Group notDone = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0"))); +// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0))); + + Group done0 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(0)))); + + Group done1 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(0))))); + + Group done2 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(0)))))); + + Group done3 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(22)))))); + + { // level 0 only + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + EXPECT_TRUE(testAggregation(ctx, request, done0)); + } + { // level 0 and 1 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 0,1 and 2 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 0,1,2 and 3 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 with level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 2 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 3 with level 0,1 and 2 as input + Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 2 and 3 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 without level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, notDone)); + } +} + +/** + * Verify that the aggregation step does not create more groups than + * indicated by the maxgroups parameter. + **/ +void +Test::testAggregationMaxGroups() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp()); + ctx.result().add(0).add(1).add(2); + + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group empty = Group(); + Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5))); + Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10))); + Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15))); + + { // max 0 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(0); + EXPECT_TRUE(testAggregation(ctx, request, empty)); + } + { // max 1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(1); + EXPECT_TRUE(testAggregation(ctx, request, grp1)); + } + { // max 2 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(2); + EXPECT_TRUE(testAggregation(ctx, request, grp2)); + } + { // max 3 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max 4 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(4); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max -1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } +} + +/** + * Verify that groups are sorted by group id + **/ +void +Test::testAggregationGroupOrder() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp()); + ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6); + + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(5))) + .addChild(Group().setId(Int64ResultNode(10))) + .addChild(Group().setId(Int64ResultNode(15))) + .addChild(Group().setId(Int64ResultNode(20))) + .addChild(Group().setId(Int64ResultNode(25))) + .addChild(Group().setId(Int64ResultNode(30))) + .addChild(Group().setId(Int64ResultNode(35))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that groups are tagged with the appropriate rank value. + **/ +void +Test::testAggregationGroupRank() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(1).add(1) + .add(2).add(2).add(2) + .add(3).add(3).add(3).sp()); + ctx.result() + .add(0, 5).add(1, 10).add(2, 15) + .add(3, 10).add(4, 15).add(5, 5) + .add(6, 15).add(7, 5).add(8, 10); + + Grouping request = Grouping().addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +void +Test::testAggregationGroupCapping() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(2).add(3) + .add(4).add(5).add(6) + .add(7).add(8).add(9).sp()); + ctx.result() + .add(0, 1).add(1, 2).add(2, 3) + .add(3, 4).add(4, 5).add(5, 6) + .add(6, 7).add(7, 8).add(8, 9); + + { + Grouping request = Grouping().addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4))) + .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5))) + .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6))) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping().addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(AggregationRefNode(0), false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + AddFunctionNode *add = new AddFunctionNode(); + add->addArg(AggregationRefNode(0)); + add->appendArg(ConstantNode(Int64ResultNode(3))); + ExpressionNode::CP i1(add); + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(i1, false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + +} + +//----------------------------------------------------------------------------- + +/** + * Test merging the sum of the values from a single attribute vector + * that was collected directly into the root node. Consider this a + * smoke test. + **/ +void +Test::testMergeSimpleSum() +{ + Grouping a = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(20)))); + + Grouping b = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(30)))); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(50))); + + EXPECT_TRUE(testMerge(a, b, expect)); +} + +/** + * Verify that frozen levels are not touched during merge. + **/ +void +Test::testMergeLevels() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + Group expect_0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b), + expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b), + expect_0)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b), + expect_1)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b), + expect_2)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a), + request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b), + expect_3)); +} + +/** + * Verify that the number of groups for a level is pruned down to + * maxGroups, that the remaining groups are the highest ranked ones, + * and that they are sorted by group id. + **/ +void +Test::testMergeGroups() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group a = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1 + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3 + + Group b = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2 + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4 + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5 + + Group expect_3 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_5 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3)); + request.levels()[0].setMaxGroups(5); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5)); + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all)); +} + +/** + * Merge two relatively complex tree structures and verify that the + * end result is as expected. + **/ +void +Test::testMergeTrees() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(3) + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setMaxGroups(2) + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setMaxGroups(1) + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(5)) // merged with 200 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picked up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picket up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(5)) // merged with 300 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(5)) // merged with 100 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect)); +} + +void +Test::testPruneComplex() +{ + { // First level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("foo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Second level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")))) + .addChild(Group() + .setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Grouping request = Grouping().setFirstLevel(2).setLastLevel(2); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Third level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group() + .setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + Grouping request = Grouping().setFirstLevel(3).setLastLevel(3); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Try pruning a grouping we don't have + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("boz0"))) + .addChild(Group().setId(StringResultNode("foo0"))) + .addChild(Group().setId(StringResultNode("goo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } +} + +/** + * Test partial merge of a grouping tree, where all levels up to "lastLevel" is + * merged. The last level should not contain any children groups, and only empty + * results. + **/ +void +Test::testPartialMerging() +{ + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + // Cached result + Group cached = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(14)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(22)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + + { // Merge lastlevel 0 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(0))); + + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected)); + } + { + // Merge existing tree. Assume we got modified data down again. + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0)))) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected)); + } +} + +/** + * Test that pruning a simple grouping tree works. + **/ +void +Test::testPruneSimple() +{ + { + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))) + .setFirstLevel(1) + .setLastLevel(1); + + Group a = Group() + .addChild(Group().setId(StringResultNode("foo"))) + .addChild(Group().setId(StringResultNode("bar"))) + .addChild(Group().setId(StringResultNode("baz"))); + + Group b = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect)); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testTopN() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + { + Group expect = Group() + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Group expect = Group() + .addResult(CountAggregationResult().setCount(1) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect)); + } + { + Grouping request2 = Grouping() + .addLevel(GroupingLevel() + .addAggregationResult(SumAggregationResult()) + .addOrderBy(AggregationRefNode(0), false)); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(0); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(1); + EXPECT_TRUE(!request2.needResort()); + request2.setTopN(100); + EXPECT_TRUE(!request2.needResort()); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testCount() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + + Group expect = Group() + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +//----------------------------------------------------------------------------- + +bool +Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt) +{ + CountFS4Hits pop; + Grouping tmp = g; + tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop); + return EXPECT_EQUAL(pop.getHitCount(), cnt); +} + +void +Test::testFS4HitCollection() +{ + { // aggregation + AggregationContext ctx; + ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(25, 25.0)) + .addHit(FS4Hit(20, 20.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { // merging + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(10, 10.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(10, 10.0)) + .addHit(FS4Hit(1, 5.0)) + .addHit(FS4Hit(2, 4.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(3, 7.0)) + .addHit(FS4Hit(4, 6.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group c = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(5, 9.0)) + .addHit(FS4Hit(6, 8.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + } + { // count hits (for external object selection) + HitsAggregationResult dummyHits = HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(1, 3.0)) + .addHit(FS4Hit(2, 2.0)) + .addHit(FS4Hit(3, 1.0)) + .sort(); + Grouping g = Grouping().setRoot(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + ) + ); + EXPECT_TRUE(checkHits(g, 0, 0, 3)); + EXPECT_TRUE(checkHits(g, 1, 1, 6)); + EXPECT_TRUE(checkHits(g, 2, 2, 6)); + EXPECT_TRUE(checkHits(g, 3, 3, 3)); + EXPECT_TRUE(checkHits(g, 4, 4, 0)); + + EXPECT_TRUE(checkHits(g, 0, 1, 9)); + EXPECT_TRUE(checkHits(g, 0, 2, 15)); + EXPECT_TRUE(checkHits(g, 0, 3, 18)); + EXPECT_TRUE(checkHits(g, 0, 4, 18)); + EXPECT_TRUE(checkHits(g, 1, 4, 15)); + EXPECT_TRUE(checkHits(g, 2, 4, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + + EXPECT_TRUE(checkHits(g, 1, 2, 12)); + EXPECT_TRUE(checkHits(g, 2, 3, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + EXPECT_TRUE(checkHits(g, 4, 5, 0)); + } +} + +bool +Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket) +{ + AggregationContext ctx; + ctx.result().add(0); + if (value.getClass().inherits(IntegerResultNode::classId)) { + ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp()); + } else if (value.getClass().inherits(FloatResultNode::classId)) { + ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp()); + } else { + return EXPECT_TRUE(false); + } + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width))); + Group expect = Group().addChild(Group().setId(bucket)); + return testAggregation(ctx, request, expect); +} + +void +Test::testFixedWidthBuckets() +{ + typedef Int64ResultNode Int; + typedef FloatResultNode Float; + typedef IntegerBucketResultNode IntBucket; + typedef FloatBucketResultNode FloatBucket; + + // positive int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20))); + EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300))); + + // negative int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10))); + EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290))); + + // positive float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0))); + + // negative float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0))); + + // non-integer bucket width + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5))); + + // zero-width buckets + EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7))); + EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5))); + + // bucket wrap protection + { + int64_t x = std::numeric_limits::min(); + int64_t y = std::numeric_limits::max(); + EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000))); + EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y))); + } +} + + +void +Test::testNanSorting() +{ + // Attempt at reproducing issue with segfault when setting NaN value. Not + // successful yet, so no point in running test. +#if 0 + double nan = sqrt(-1); + EXPECT_TRUE(isnan(nan)); + EXPECT_TRUE(nan != nan); + EXPECT_FALSE(nan < nan); + EXPECT_FALSE(nan > nan); + EXPECT_FALSE(nan < 0.2); + EXPECT_FALSE(nan > 0.2); + EXPECT_FALSE(0.2 < nan); + EXPECT_FALSE(0.2 > nan); + + FastOS_Time timer; + timer.SetNow(); + std::vector groups; + while (timer.MilliSecsToNow() < 60000.0) { + std::vector vec; + srand((unsigned int)timer.MilliSecs()); + size_t limit = 2345678; + size_t mod = rand() % limit; + for (size_t i = 0; i < limit; i++) { + if ((i % mod) == 0) + vec.push_back(nan); + else + vec.push_back(1.0 * rand()); + } + } + std::sort(groups.begin(), groups.end()); +#endif +} + +void +Test::testThatNanIsConverted() +{ + Group g; + double nan = sqrt(-1); + g.setRank(nan); + // Must have been changed for this to work. + ASSERT_EQUAL(g.getRank(), g.getRank()); +} + +//----------------------------------------------------------------------------- + +struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }}; + +//----------------------------------------------------------------------------- + +int +Test::Main() +{ + RunDiff runDiff; + (void) runDiff; + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("grouping_test"); + testAggregationSimple(); + testAggregationLevels(); + testAggregationMaxGroups(); + testAggregationGroupOrder(); + testAggregationGroupRank(); + testAggregationGroupCapping(); + testMergeSimpleSum(); + testMergeLevels(); + testMergeGroups(); + testMergeTrees(); + testPruneSimple(); + testPruneComplex(); + testPartialMerging(); + testFS4HitCollection(); + testFixedWidthBuckets(); + testCount(); + testTopN(); + testThatNanIsConverted(); + testNanSorting(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/grouping/hyperloglog_test.cpp b/searchlib/src/tests/grouping/hyperloglog_test.cpp new file mode 100644 index 00000000000..15b4ae9ae39 --- /dev/null +++ b/searchlib/src/tests/grouping/hyperloglog_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for hyperloglog. + +#include +LOG_SETUP("hyperloglog_test"); +#include + +#include +#include +#include +#include + +using vespalib::NBOSerializer; +using vespalib::nbostream; +using namespace search; + +namespace { + +TEST("require that hyperloglog changes from sparse to normal sketch") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); + EXPECT_EQUAL(1, hll.aggregate(i)); + } + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); + EXPECT_EQUAL(23, hll.aggregate(256)); + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); +} + +TEST("require that hyperloglog can be (de)serialized") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + nbostream stream; + NBOSerializer serializer(stream); + + // Serializes with sparse sketch + hll.serialize(serializer); + HyperLogLog<> hll2; + hll2.deserialize(serializer); + EXPECT_TRUE(dynamic_cast *>(&hll2.getSketch())); + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + + // Serializes with normal sketch. + EXPECT_EQUAL(23, hll2.aggregate(256)); + hll2.serialize(serializer); + hll.deserialize(serializer); + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); + EXPECT_EQUAL(hll2.getSketch(), hll.getSketch()); +} + +TEST("require that sparse hyperloglogs can be merged") { + HyperLogLog<> hll; + for (size_t i = 0; i < 100; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + HyperLogLog<> hll2; + for (size_t i = 100; i < 255; ++i) { + EXPECT_EQUAL(1, hll2.aggregate(i)); + } + hll.merge(hll2); + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); + + EXPECT_EQUAL(1, hll2.aggregate(255)); + hll.merge(hll2); + EXPECT_TRUE(dynamic_cast *>(&hll.getSketch())); +} + +TEST("require that mixed hyperloglogs can be merged") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + EXPECT_EQUAL(23, hll.aggregate(256)); // normal + HyperLogLog<> hll2; + for (size_t i = 100; i < 255; ++i) { + EXPECT_EQUAL(1, hll2.aggregate(i)); // sparse + } + hll.merge(hll2); // normal + sparse + hll2.merge(hll); // sparse + normal + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + + EXPECT_EQUAL(23, hll2.aggregate(500)); + hll.merge(hll2); // normal + normal + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + EXPECT_EQUAL(0, hll.aggregate(500)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/grouping/sketch_test.cpp b/searchlib/src/tests/grouping/sketch_test.cpp new file mode 100644 index 00000000000..c6c0b144983 --- /dev/null +++ b/searchlib/src/tests/grouping/sketch_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sketch. + +#include +LOG_SETUP("sketch_test"); +#include + +#include +#include +#include +#include +#include + +using vespalib::NBOSerializer; +using vespalib::nbostream; +using namespace search; +using vespalib::make_string; + +namespace { + +TEST("require that normal sketch is initialized") { + NormalSketch<> sketch; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + EXPECT_EQUAL(0, sketch.bucket[i]); + } +} + +template +void checkBucketValue(NormalSketch &sketch, size_t bucket, uint32_t value) { + EXPECT_EQUAL(value, static_cast(sketch.bucket[bucket])); +} + +template +void checkCountPrefixZeros() { + TEST_STATE(make_string("BucketBits: %d, HashBits: %d", + BucketBits, int(sizeof(HashT) * 8)).c_str()); + NormalSketch sketch; + const uint32_t prefix_bits = sizeof(HashT) * 8 - BucketBits; + const uint32_t hash_width = sizeof(HashT) * 8; + for (size_t i = 0; i < prefix_bits ; ++i) { + int increase = sketch.aggregate(HashT(1) << (hash_width - 1 - i)); + EXPECT_EQUAL(1, increase); // bucket increases by 1 for each call + checkBucketValue(sketch, 0, i + 1); + } + sketch.aggregate(0); + checkBucketValue(sketch, prefix_bits + 1, 0); + + checkBucketValue(sketch, HashT(1) << (BucketBits - 1), 0); + sketch.aggregate(HashT(1) << (hash_width - 1 - prefix_bits)); + checkBucketValue(sketch, 0, prefix_bits + 1); + checkBucketValue(sketch, HashT(1) << (BucketBits - 1), prefix_bits + 1); +} + +TEST("require that prefix zeros are counted.") { + checkCountPrefixZeros<10, uint32_t>(); + checkCountPrefixZeros<12, uint32_t>(); + checkCountPrefixZeros<10, uint64_t>(); + checkCountPrefixZeros<12, uint64_t>(); +} + +TEST("require that aggregate returns bucket increase") { + NormalSketch<> sketch; + int increase = sketch.aggregate(-1); + EXPECT_EQUAL(1, increase); + increase = sketch.aggregate(1023); + EXPECT_EQUAL(22, increase); + increase = sketch.aggregate(0); + EXPECT_EQUAL(23, increase); +} + +TEST("require that instances can be merged.") { + NormalSketch<> sketch; + sketch.aggregate(0); + NormalSketch<> sketch2; + sketch2.aggregate(-1); + sketch.merge(sketch2); + checkBucketValue(sketch, 0, 23); + checkBucketValue(sketch, 1023, 1); +} + +TEST("require that different sketch type instances can be merged.") { + NormalSketch<> sketch; + sketch.aggregate(0); + SparseSketch<> sketch2; + sketch2.aggregate(-1); + sketch.merge(sketch2); + checkBucketValue(sketch, 0, 23); + checkBucketValue(sketch, 1023, 1); +} + +TEST("require that normal sketch can be (de)serialized") { + NormalSketch<> sketch; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + sketch.aggregate(i | (1 << ((i % sketch.bucketBits) + + sketch.bucketBits))); + } + nbostream stream; + NBOSerializer serializer(stream); + sketch.serialize(serializer); + EXPECT_EQUAL(31u, stream.size()); + uint32_t val; + stream >> val; + EXPECT_TRUE(sketch.BUCKET_COUNT == val); + stream >> val; + EXPECT_EQUAL(23u, val); + stream.adjustReadPos(-2 * sizeof(uint32_t)); + NormalSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +TEST("require that uncompressed data in normal sketch can be deserialized") { + NormalSketch<> sketch; + nbostream stream; + NBOSerializer serializer(stream); + stream << sketch.BUCKET_COUNT; + stream << sketch.BUCKET_COUNT; + const int hash_bits = sizeof(NormalSketch<>::hash_type) * 8; + const int value_bits = hash_bits - sketch.bucketBits; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + char bucket_val = (i % value_bits) + 1; + stream << bucket_val; + sketch.aggregate(i | (1 << (hash_bits - bucket_val))); + } + NormalSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +TEST("require that sparse sketch can be (de)serialized") { + SparseSketch<> sketch; + const uint32_t hash_count = 10; + for (size_t hash = 0; hash < hash_count; ++hash) { + sketch.aggregate(hash); + } + nbostream stream; + NBOSerializer serializer(stream); + sketch.serialize(serializer); + EXPECT_EQUAL(4 * hash_count + 4u, stream.size()); + uint32_t val; + stream >> val; + EXPECT_EQUAL(hash_count, val); + stream.adjustReadPos(-1 * sizeof(uint32_t)); + SparseSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/groupingengine/.gitignore b/searchlib/src/tests/groupingengine/.gitignore new file mode 100644 index 00000000000..1eb2fc1fb29 --- /dev/null +++ b/searchlib/src/tests/groupingengine/.gitignore @@ -0,0 +1,7 @@ +/lhs.out +/rhs.out +/diff.txt +/groupingengine_benchmark +/vgcore.* +searchlib_groupingengine_test_app +searchlib_groupingengine_benchmark_app diff --git a/searchlib/src/tests/groupingengine/CMakeLists.txt b/searchlib/src/tests/groupingengine/CMakeLists.txt new file mode 100644 index 00000000000..74f4574a9a4 --- /dev/null +++ b/searchlib/src/tests/groupingengine/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_groupingengine_test_app + SOURCES + groupingengine_test.cpp + DEPENDS + searchlib +) +#vespa_add_test(NAME searchlib_groupingengine_test_app COMMAND searchlib_groupingengine_test_app) +vespa_add_executable(searchlib_groupingengine_benchmark_app + SOURCES + groupingengine_benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_groupingengine_benchmark_app COMMAND searchlib_groupingengine_benchmark_app BENCHMARK) diff --git a/searchlib/src/tests/groupingengine/DESC b/searchlib/src/tests/groupingengine/DESC new file mode 100644 index 00000000000..1aa6cb37e89 --- /dev/null +++ b/searchlib/src/tests/groupingengine/DESC @@ -0,0 +1 @@ +grouping test. Take a look at grouping.cpp for details. diff --git a/searchlib/src/tests/groupingengine/FILES b/searchlib/src/tests/groupingengine/FILES new file mode 100644 index 00000000000..af7f7e71257 --- /dev/null +++ b/searchlib/src/tests/groupingengine/FILES @@ -0,0 +1,4 @@ +grouping.cpp +lhs.out +rhs.out +diff.txt diff --git a/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp new file mode 100644 index 00000000000..b7136741a4c --- /dev/null +++ b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp @@ -0,0 +1,292 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +LOG_SETUP("grouping_benchmark"); + +using namespace vespalib; +using namespace search; +using namespace search::attribute; +using namespace search::expression; +using namespace search::aggregation; +using namespace search::grouping; + +//----------------------------------------------------------------------------- + +template +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder IntAttrBuilder; +typedef AttrBuilder FloatAttrBuilder; +typedef AttrBuilder StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: +private: + bool testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine); + void benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups); + void benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; + int Main(); +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + if (useEngine) { + GroupingEngine engine(tmp); + engine.aggregate(ctx.result().hits(), ctx.result().size()); + Group::UP result = engine.createResult(); + } else { + tmp.aggregate(ctx.result().hits(), ctx.result().size()); + } + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + EXPECT_EQUAL(attrCheck._numrefs, 0); + return true; +} + +void +Test::benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups) +{ + IntAttrBuilder attrB("attr0"); + for (size_t i=0; i < numDocs; i++) { + attrB.add(i); + } + AggregationContext ctx; + for(size_t i(0); i < numDocs; i++) { + ctx.result().add(i, numDocs-i); + } + ctx.add(attrB.sp()); + GroupingLevel level; + level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups); + level.addResult(SumAggregationResult().setExpression(AttributeNode("attr0"))); + if (maxGroups >= 0) { + level.addOrderBy(AggregationRefNode(0), false); + } + Grouping baseRequest = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(level); + + for (size_t i(0); i < numQueries; i++) { + testAggregation(ctx, baseRequest, useEngine); + } +} + +void +Test::benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups) +{ + IntAttrBuilder attrB("attr0"); + for (size_t i=0; i < numDocs; i++) { + attrB.add(i); + } + AggregationContext ctx; + for(size_t i(0); i < numDocs; i++) { + ctx.result().add(i); + } + ctx.add(attrB.sp()); + GroupingLevel level; + level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups); + level.addResult(CountAggregationResult().setExpression(AttributeNode("attr0"))); + if (maxGroups >= 0) { + level.addOrderBy(AggregationRefNode(0), false); + } + Grouping baseRequest = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(level); + + for (size_t i(0); i < numQueries; i++) { + testAggregation(ctx, baseRequest, useEngine); + } +} + +int +Test::Main() +{ + size_t numDocs = 1000000; + size_t numQueries = 1000; + int64_t maxGroups = -1; + bool useEngine = true; + vespalib::string idType = "int"; + vespalib::string aggrType = "sum"; + if (_argc > 1) { + useEngine = (strcmp(_argv[1], "tree") != 0); + } + if (_argc > 2) { + idType = _argv[2]; + } + if (_argc > 3) { + aggrType = _argv[3]; + } + if (_argc > 4) { + numDocs = strtol(_argv[4], NULL, 0); + } + if (_argc > 5) { + numQueries = strtol(_argv[5], NULL, 0); + } + if (_argc > 6) { + maxGroups = strtol(_argv[6], NULL, 0); + } + TEST_INIT("grouping_benchmark"); + LOG(info, "sizeof(Group) = %ld", sizeof(Group)); + LOG(info, "sizeof(ResultNode::CP) = %ld", sizeof(ResultNode::CP)); + LOG(info, "sizeof(RawRank) = %ld", sizeof(RawRank)); + LOG(info, "sizeof(SumAggregationResult) = %ld", sizeof(SumAggregationResult)); + LOG(info, "sizeof(CountAggregationResult) = %ld", sizeof(CountAggregationResult)); + LOG(info, "sizeof(Int64ResultNode) = %ld", sizeof(Int64ResultNode)); + + LOG(info, "sizeof(Group::ExpressionVector) = %ld", sizeof(Group::ExpressionVector)); + fastos::TimeStamp start(fastos::ClockSystem::now()); + if (idType == "int") { + if (aggrType == "sum") { + benchmarkIntegerSum(useEngine, numDocs, numQueries, maxGroups); + } else if (aggrType == "count") { + benchmarkIntegerCount(useEngine, numDocs, numQueries, maxGroups); + } else { + ASSERT_TRUE(false); + } + } else { + ASSERT_TRUE(false); + } + LOG(info, "rusage = {\n%s\n}", vespalib::RUsage::createSelf(start).toString().c_str()); + ASSERT_EQUAL(0, kill(0, SIGPROF)); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/groupingengine/groupingengine_test.cpp b/searchlib/src/tests/groupingengine/groupingengine_test.cpp new file mode 100644 index 00000000000..ab371cc3dcc --- /dev/null +++ b/searchlib/src/tests/groupingengine/groupingengine_test.cpp @@ -0,0 +1,1985 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("groupingengine_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace vespalib; +using namespace search; +using namespace search::attribute; +using namespace search::expression; +using namespace search::aggregation; +using namespace search::grouping; + +//----------------------------------------------------------------------------- + +template +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder IntAttrBuilder; +typedef AttrBuilder FloatAttrBuilder; +typedef AttrBuilder StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: + bool testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect); + bool testPrune(const Grouping &a, const Grouping &b, + const Group &expect); + bool testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect); + void testAggregationSimple(); + void testAggregationLevels(); + void testAggregationMaxGroups(); + void testAggregationGroupOrder(); + void testAggregationGroupRank(); + void testAggregationGroupCapping(); + void testMergeSimpleSum(); + void testMergeLevels(); + void testMergeGroups(); + void testMergeTrees(); + void testPruneSimple(); + void testPruneComplex(); + void testPartialMerging(); + void testCount(); + void testTopN(); + void testFS4HitCollection(); + bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket); + bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt); + void testFixedWidthBuckets(); + void testThatNanIsConverted(); + void testNanSorting(); + void testGroupingEngineFromRequest(); + int Main(); +private: + bool verifyEqual(const Group & a, const Group & b); + void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + GroupingEngine engine(tmp); + verifyEqual(*engine.createResult(), tmp.getRoot()); + engine.aggregate(ctx.result().hits(), ctx.result().size()); + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + Group::UP result = engine.createResult(); + EXPECT_EQUAL(attrCheck._numrefs, 0); + return verifyEqual(*result, expect); +} + +bool Test::verifyEqual(const Group & a, const Group & b) +{ + bool ok = EXPECT_EQUAL(a.asString(), b.asString()); + if (!ok) { + std::cerr << a.asString() << std::endl << b.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; +#if 0 + tmp.merge(tmpB); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +#else + GroupingEngine eA(tmp); + GroupingEngine eB(tmpB); + verifyEqual(*eA.createResult(), a.getRoot()); + verifyEqual(*eB.createResult(), b.getRoot()); + eA.merge(eB); + return verifyEqual(*eA.createResult(), expect); +#endif +} + +/** + * Prune the given grouping request and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testPrune(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.prune(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge a given grouping request to get a partial request back. Verify that the + * partial request is correct. + **/ +bool +Test::testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.mergePartial(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; // create local copy + Grouping tmpC = c; // create local copy + tmp.merge(tmpB); + tmp.merge(tmpC); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +//----------------------------------------------------------------------------- + +/** + * Test collecting the sum of the values from a single attribute + * vector directly into the root node. Consider this a smoke test. + **/ +void +Test::testAggregationSimple() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp()); + ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp()); + ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp()); + + char strsum[3] = {-101, '5', 0}; + testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum)); + testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15")); + testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7")); +} + +void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr) +{ + ExpressionNode::CP clone(aggr); + Grouping request = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(static_cast(*clone).setExpression(AttributeNode("int"))) + .addResult(static_cast(*clone).setExpression(AttributeNode("float"))) + .addResult(static_cast(*clone).setExpression(AttributeNode("string"))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(static_cast(*clone).setExpression(AttributeNode("int")).setResult(ir)) + .addResult(static_cast(*clone).setExpression(AttributeNode("float")).setResult(fr)) + .addResult(static_cast(*clone).setExpression(AttributeNode("string")).setResult(sr)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that the backend aggregation will classify and collect on + * the appropriate levels, as indicated by the firstLevel and + * lastLevel parameters. + **/ +void +Test::testAggregationLevels() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + + Grouping baseRequest = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + + Group notDone = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0"))); +// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0))); + + Group done0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(0)))); + + Group done1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(0))))); + + Group done2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(0)))))); + + Group done3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(22)))))); + + { // level 0 only + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + EXPECT_TRUE(testAggregation(ctx, request, done0)); + } + { // level 0 and 1 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 0,1 and 2 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 0,1,2 and 3 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 with level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 2 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 3 with level 0,1 and 2 as input + Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 2 and 3 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } +#if 0 + { // level 1 without level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, notDone)); + } +#else + //#warning "Test has been temporarily disabled" +#endif +} + +/** + * Verify that the aggregation step does not create more groups than + * indicated by the maxgroups parameter. + **/ +void +Test::testAggregationMaxGroups() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp()); + ctx.result().add(0).add(1).add(2); + + Grouping baseRequest = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group empty = Group().setId(NullResultNode()); + Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5))); + Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10))); + Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15))); + + { // max 0 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(0); + EXPECT_TRUE(testAggregation(ctx, request, empty)); + } + { // max 1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(1); + EXPECT_TRUE(testAggregation(ctx, request, grp1)); + } + { // max 2 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(2); + EXPECT_TRUE(testAggregation(ctx, request, grp2)); + } + { // max 3 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max 4 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(4); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max -1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } +} + +/** + * Verify that groups are sorted by group id + **/ +void +Test::testAggregationGroupOrder() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp()); + ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group expect = Group() + .setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(5))) + .addChild(Group().setId(Int64ResultNode(10))) + .addChild(Group().setId(Int64ResultNode(15))) + .addChild(Group().setId(Int64ResultNode(20))) + .addChild(Group().setId(Int64ResultNode(25))) + .addChild(Group().setId(Int64ResultNode(30))) + .addChild(Group().setId(Int64ResultNode(35))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that groups are tagged with the appropriate rank value. + **/ +void +Test::testAggregationGroupRank() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(1).add(1) + .add(2).add(2).add(2) + .add(3).add(3).add(3).sp()); + ctx.result() + .add(0, 5).add(1, 10).add(2, 15) + .add(3, 10).add(4, 15).add(5, 5) + .add(6, 15).add(7, 5).add(8, 10); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +void +Test::testAggregationGroupCapping() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(2).add(3) + .add(4).add(5).add(6) + .add(7).add(8).add(9).sp()); + ctx.result() + .add(0, 1).add(1, 2).add(2, 3) + .add(3, 4).add(4, 5).add(5, 6) + .add(6, 7).add(7, 8).add(8, 9); + + { + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4))) + .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5))) + .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6))) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr"))); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setRoot(Group().setId(NullResultNode())). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(AggregationRefNode(0), false)); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setRoot(Group().setId(NullResultNode())). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + AddFunctionNode *add = new AddFunctionNode(); + add->addArg(AggregationRefNode(0)); + add->appendArg(ConstantNode(Int64ResultNode(3))); + ExpressionNode::CP i1(add); + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(i1, false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + +} + +//----------------------------------------------------------------------------- + +/** + * Test merging the sum of the values from a single attribute vector + * that was collected directly into the root node. Consider this a + * smoke test. + **/ +void +Test::testMergeSimpleSum() +{ + Grouping a = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(20)))); + + Grouping b = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(30)))); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(50))); + + EXPECT_TRUE(testMerge(a, b, expect)); +} + +/** + * Verify that frozen levels are not touched during merge. + **/ +void +Test::testMergeLevels() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + Group expect_0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b), + expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b), + expect_0)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b), + expect_1)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b), + expect_2)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a), + request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b), + expect_3)); +} + +/** + * Verify that the number of groups for a level is pruned down to + * maxGroups, that the remaining groups are the highest ranked ones, + * and that they are sorted by group id. + **/ +void +Test::testMergeGroups() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group a = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1 + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3 + + Group b = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2 + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4 + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5 + + Group expect_3 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_5 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3)); + request.levels()[0].setMaxGroups(5); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5)); + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all)); +} + +/** + * Merge two relatively complex tree structures and verify that the + * end result is as expected. + **/ +void +Test::testMergeTrees() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(3) + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setMaxGroups(2) + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setMaxGroups(1) + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(5)) // merged with 200 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picked up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picket up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(5)) // merged with 300 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(5)) // merged with 100 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect)); +} + +void +Test::testPruneComplex() +{ + { // First level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("foo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Second level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")))) + .addChild(Group() + .setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Grouping request = Grouping().setFirstLevel(2).setLastLevel(2); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Third level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group() + .setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + Grouping request = Grouping().setFirstLevel(3).setLastLevel(3); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Try pruning a grouping we don't have + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("boz0"))) + .addChild(Group().setId(StringResultNode("foo0"))) + .addChild(Group().setId(StringResultNode("goo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } +} + +/** + * Test partial merge of a grouping tree, where all levels up to "lastLevel" is + * merged. The last level should not contain any children groups, and only empty + * results. + **/ +void +Test::testPartialMerging() +{ + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + // Cached result + Group cached = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(14)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(22)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + + { // Merge lastlevel 0 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(0))); + + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected)); + } + { + // Merge existing tree. Assume we got modified data down again. + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0)))) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected)); + } +} + +/** + * Test that pruning a simple grouping tree works. + **/ +void +Test::testPruneSimple() +{ + { + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))) + .setFirstLevel(1) + .setLastLevel(1); + + Group a = Group() + .addChild(Group().setId(StringResultNode("foo"))) + .addChild(Group().setId(StringResultNode("bar"))) + .addChild(Group().setId(StringResultNode("baz"))); + + Group b = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect)); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testTopN() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + { + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(1) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect)); + } + { + Grouping request2 = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .addAggregationResult(SumAggregationResult()) + .addOrderBy(AggregationRefNode(0), false)); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(0); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(1); + EXPECT_TRUE(!request2.needResort()); + request2.setTopN(100); + EXPECT_TRUE(!request2.needResort()); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testCount() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +//----------------------------------------------------------------------------- + +bool +Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt) +{ + CountFS4Hits pop; + Grouping tmp = g; + tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop); + return EXPECT_EQUAL(pop.getHitCount(), cnt); +} + +void +Test::testFS4HitCollection() +{ + { // aggregation + AggregationContext ctx; + ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group().setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(25, 25.0)) + .addHit(FS4Hit(20, 20.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { // merging + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(10, 10.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(10, 10.0)) + .addHit(FS4Hit(1, 5.0)) + .addHit(FS4Hit(2, 4.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(3, 7.0)) + .addHit(FS4Hit(4, 6.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group c = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(5, 9.0)) + .addHit(FS4Hit(6, 8.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + } + { // count hits (for external object selection) + HitsAggregationResult dummyHits = HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(1, 3.0)) + .addHit(FS4Hit(2, 2.0)) + .addHit(FS4Hit(3, 1.0)) + .sort(); + Grouping g = Grouping().setRoot(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + ) + ); + EXPECT_TRUE(checkHits(g, 0, 0, 3)); + EXPECT_TRUE(checkHits(g, 1, 1, 6)); + EXPECT_TRUE(checkHits(g, 2, 2, 6)); + EXPECT_TRUE(checkHits(g, 3, 3, 3)); + EXPECT_TRUE(checkHits(g, 4, 4, 0)); + + EXPECT_TRUE(checkHits(g, 0, 1, 9)); + EXPECT_TRUE(checkHits(g, 0, 2, 15)); + EXPECT_TRUE(checkHits(g, 0, 3, 18)); + EXPECT_TRUE(checkHits(g, 0, 4, 18)); + EXPECT_TRUE(checkHits(g, 1, 4, 15)); + EXPECT_TRUE(checkHits(g, 2, 4, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + + EXPECT_TRUE(checkHits(g, 1, 2, 12)); + EXPECT_TRUE(checkHits(g, 2, 3, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + EXPECT_TRUE(checkHits(g, 4, 5, 0)); + } +} + +bool +Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket) +{ + AggregationContext ctx; + ctx.result().add(0); + if (value.getClass().inherits(IntegerResultNode::classId)) { + ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp()); + } else if (value.getClass().inherits(FloatResultNode::classId)) { + ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp()); + } else { + return EXPECT_TRUE(false); + } + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width))); + Group expect = Group().setId(NullResultNode()).addChild(Group().setId(bucket)); + return testAggregation(ctx, request, expect); +} + +void +Test::testFixedWidthBuckets() +{ + typedef Int64ResultNode Int; + typedef FloatResultNode Float; + typedef IntegerBucketResultNode IntBucket; + typedef FloatBucketResultNode FloatBucket; + + // positive int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20))); + EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300))); + + // negative int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10))); + EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290))); + + // positive float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0))); + + // negative float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0))); + + // non-integer bucket width + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5))); + + // zero-width buckets + EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7))); + EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5))); + + // bucket wrap protection + { + int64_t x = std::numeric_limits::min(); + int64_t y = std::numeric_limits::max(); + EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000))); + EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y))); + } +} + + +void +Test::testNanSorting() +{ + // Attempt at reproducing issue with segfault when setting NaN value. Not + // successful yet, so no point in running test. +#if 0 + double nan = sqrt(-1); + EXPECT_TRUE(isnan(nan)); + EXPECT_TRUE(nan != nan); + EXPECT_FALSE(nan < nan); + EXPECT_FALSE(nan > nan); + EXPECT_FALSE(nan < 0.2); + EXPECT_FALSE(nan > 0.2); + EXPECT_FALSE(0.2 < nan); + EXPECT_FALSE(0.2 > nan); + + FastOS_Time timer; + timer.SetNow(); + std::vector groups; + while (timer.MilliSecsToNow() < 60000.0) { + std::vector vec; + srand((unsigned int)timer.MilliSecs()); + size_t limit = 2345678; + size_t mod = rand() % limit; + for (size_t i = 0; i < limit; i++) { + if ((i % mod) == 0) + vec.push_back(nan); + else + vec.push_back(1.0 * rand()); + } + } + std::sort(groups.begin(), groups.end()); +#endif +} + +void +Test::testThatNanIsConverted() +{ + Group g; + double nan = sqrt(-1); + g.setRank(nan); + // Must have been changed for this to work. + ASSERT_EQUAL(g.getRank(), g.getRank()); +} + +void +Test::testGroupingEngineFromRequest() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + Grouping baseRequest = Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + ctx.setup(baseRequest); + GroupingEngine engine(baseRequest.setFirstLevel(0).setLastLevel(2)); + EXPECT_EQUAL(4u, engine.getEngines().size()); +} + +//----------------------------------------------------------------------------- + +struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }}; + +//----------------------------------------------------------------------------- + +int +Test::Main() +{ + RunDiff runDiff; + (void) runDiff; + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("groupingengine_test"); + testGroupingEngineFromRequest(); + testAggregationSimple(); + testAggregationLevels(); + testAggregationMaxGroups(); + testAggregationGroupOrder(); + testAggregationGroupRank(); + testAggregationGroupCapping(); +#if 0 + testMergeSimpleSum(); + testMergeLevels(); + testMergeGroups(); + testMergeTrees(); + testPruneSimple(); + testPruneComplex(); + testPartialMerging(); +#endif + testFS4HitCollection(); + testFixedWidthBuckets(); + testCount(); + testTopN(); + testThatNanIsConverted(); + testNanSorting(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/hitcollector/.gitignore b/searchlib/src/tests/hitcollector/.gitignore new file mode 100644 index 00000000000..a4313eb2184 --- /dev/null +++ b/searchlib/src/tests/hitcollector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +hitcollector_test +searchlib_hitcollector_test_app diff --git a/searchlib/src/tests/hitcollector/CMakeLists.txt b/searchlib/src/tests/hitcollector/CMakeLists.txt new file mode 100644 index 00000000000..c2b130b2890 --- /dev/null +++ b/searchlib/src/tests/hitcollector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_hitcollector_test_app + SOURCES + hitcollector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_hitcollector_test_app COMMAND searchlib_hitcollector_test_app) diff --git a/searchlib/src/tests/hitcollector/DESC b/searchlib/src/tests/hitcollector/DESC new file mode 100644 index 00000000000..a8751d4a1fe --- /dev/null +++ b/searchlib/src/tests/hitcollector/DESC @@ -0,0 +1 @@ +hitcollector test. Take a look at hitcollector.cpp for details. diff --git a/searchlib/src/tests/hitcollector/FILES b/searchlib/src/tests/hitcollector/FILES new file mode 100644 index 00000000000..88a0d4ba4b3 --- /dev/null +++ b/searchlib/src/tests/hitcollector/FILES @@ -0,0 +1 @@ +hitcollector.cpp diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp new file mode 100644 index 00000000000..ec7c74913af --- /dev/null +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -0,0 +1,493 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("hitcollector_test"); +#include + +#include + +#include +#include +#include +#include + +using namespace search; +using namespace search::fef; +using namespace search::queryeval; + +typedef std::map ScoreMap; + +struct BasicScorer : public HitCollector::DocumentScorer +{ + feature_t _scoreDelta; + BasicScorer(feature_t scoreDelta) : _scoreDelta(scoreDelta) {} + virtual feature_t score(uint32_t docId) { + return docId + _scoreDelta; + } +}; + +struct PredefinedScorer : public HitCollector::DocumentScorer +{ + ScoreMap _scores; + PredefinedScorer(const ScoreMap &scores) : _scores(scores) {} + virtual feature_t score(uint32_t docId) { + feature_t retval = 0.0; + auto itr = _scores.find(docId); + if (itr != _scores.end()) { + retval = itr->second; + } + return retval; + } +}; + +void checkResult(const ResultSet & rs, const std::vector & exp) +{ + if (exp.size() > 0) { + const RankedHit * rh = rs.getArray(); + ASSERT_TRUE(rh != NULL); + ASSERT_EQUAL(rs.getArrayUsed(), exp.size()); + + for (uint32_t i = 0; i < exp.size(); ++i) { +#if 0 + std::cout << " rh[" << i << "]._docId = " << rh[i]._docId << std::endl; + std::cout << "exp[" << i << "]._docId = " << exp[i]._docId << std::endl; + std::cout << " rh[" << i << "]._rankValue = " << rh[i]._rankValue << std::endl; + std::cout << "exp[" << i << "]._rankValue = " << exp[i]._rankValue << std::endl; +#endif + EXPECT_EQUAL(rh[i]._docId, exp[i]._docId); + EXPECT_EQUAL(rh[i]._rankValue, exp[i]._rankValue); + } + } else { + ASSERT_TRUE(rs.getArray() == NULL); + } +} + +void checkResult(ResultSet & rs, BitVector * exp) +{ + if (exp != NULL) { + BitVector * bv = rs.getBitOverflow(); + ASSERT_TRUE(bv != NULL); + bv->invalidateCachedCount(); + exp->invalidateCachedCount(); + LOG(info, "bv.hits: %u, exp.hits: %u", bv->countTrueBits(), exp->countTrueBits()); + ASSERT_TRUE(bv->countTrueBits() == exp->countTrueBits()); + EXPECT_TRUE(*bv == *exp); + } else { + ASSERT_TRUE(rs.getBitOverflow() == NULL); + } +} + +void testAddHit(uint32_t numDocs, uint32_t maxHitsSize, uint32_t maxHeapSize) +{ + + LOG(info, "testAddHit: no hits"); + { // no hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector expRh; + + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } + + LOG(info, "testAddHit: only ranked hits"); + { // only ranked hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector expRh; + + for (uint32_t i = 0; i < maxHitsSize; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } + + LOG(info, "testAddHit: both ranked hits and bit vector hits"); + { // both ranked hits and bit vector hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector expRh; + BitVector::UP expBv(BitVector::create(numDocs)); + + for (uint32_t i = 0; i < numDocs; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expBv->setBit(i); + if (i >= (numDocs - maxHitsSize)) { + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + } + + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), expBv.get())); + } +} + +TEST("testAddHit") { + TEST_DO(testAddHit(30, 10, 5)); + TEST_DO(testAddHit(30, 10, 0)); + TEST_DO(testAddHit(400, 10, 5)); // 400/32 = 12 which is bigger than 10. + TEST_DO(testAddHit(400, 10, 0)); +} + +struct Fixture { + HitCollector hc; + BitVector::UP expBv; + BasicScorer scorer; + + Fixture() + : hc(20, 10, 5), expBv(BitVector::create(20)), scorer(200) + { + } + virtual ~Fixture() {} + virtual HitRank calculateScore(uint32_t) { return 0; } + void addHits() { + for (uint32_t i = 0; i < 20; ++i) { + hc.addHit(i, calculateScore(i)); + expBv->setBit(i); + } + } + size_t reRank() { + return hc.reRank(scorer); + } + size_t reRank(size_t count) { + return hc.reRank(scorer, count); + } +}; + +struct AscendingScoreFixture : Fixture { + AscendingScoreFixture() : Fixture() {} + virtual HitRank calculateScore(uint32_t i) { + return i + 100; + } +}; + +struct DescendingScoreFixture : Fixture { + DescendingScoreFixture() : Fixture() {} + virtual HitRank calculateScore(uint32_t i) { + return 100 - i; + } +}; + +TEST_F("testReRank - empty", Fixture) { + EXPECT_EQUAL(0u, f.reRank()); +} + +TEST_F("testReRank - ascending", AscendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(5u, f.reRank()); + + std::vector expRh; + for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i >= 15) { // hits from heap (5 last) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("testReRank - descending", DescendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(5u, f.reRank()); + + std::vector expRh; + for (uint32_t i = 0; i < 10; ++i) { // 10 first are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i < 5) { // hits from heap (5 first) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("testReRank - partial", AscendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(3u, f.reRank(3)); + + std::vector expRh; + for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i >= 17) { // hits from heap (3 last) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("require that scores for 2nd phase candidates can be retrieved", DescendingScoreFixture) +{ + f.addHits(); + std::vector scores = f.hc.getSortedHeapScores(); + ASSERT_EQUAL(5u, scores.size()); + EXPECT_EQUAL(100, scores[0]); + EXPECT_EQUAL(99, scores[1]); + EXPECT_EQUAL(98, scores[2]); + EXPECT_EQUAL(97, scores[3]); + EXPECT_EQUAL(96, scores[4]); +} + +TEST("require that score ranges can be read and set.") { + std::pair ranges = + std::make_pair(Scores(1.0, 2.0), Scores(3.0, 4.0)); + HitCollector hc(20, 10, 5); + hc.setRanges(ranges); + EXPECT_EQUAL(ranges.first.low, hc.getRanges().first.low); + EXPECT_EQUAL(ranges.first.high, hc.getRanges().first.high); + EXPECT_EQUAL(ranges.second.low, hc.getRanges().second.low); + EXPECT_EQUAL(ranges.second.high, hc.getRanges().second.high); +} + +TEST("testNoHitsToReRank") { + uint32_t numDocs = 20; + uint32_t maxHitsSize = 10; + + LOG(info, "testNoMDHeap: test it"); + { + HitCollector hc(numDocs, maxHitsSize, 0); + std::vector expRh; + + for (uint32_t i = 0; i < maxHitsSize; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } +} + +void testScaling(const std::vector &initScores, + const ScoreMap &finalScores, + const std::vector &expected) +{ + HitCollector hc(5, 5, 2); + + // first phase ranking + for (uint32_t i = 0; i < 5; ++i) { + hc.addHit(i, initScores[i]); + } + + PredefinedScorer scorer(finalScores); + // perform second phase ranking + EXPECT_EQUAL(2u, hc.reRank(scorer)); + + // check results + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expected)); +} + +TEST("testScaling") { + std::vector initScores(5); + initScores[0] = 1000; + initScores[1] = 2000; + initScores[2] = 3000; + initScores[3] = 4000; + initScores[4] = 5000; + + // expected final rank scores + std::vector exp(5); + for (uint32_t i = 0; i < 5; ++i) { + exp[i]._docId = i; + } + + { // scale down and adjust down + exp[0]._rankValue = 0; // scaled + exp[1]._rankValue = 100; // scaled + exp[2]._rankValue = 200; // scaled + exp[3]._rankValue = 300; // from heap + exp[4]._rankValue = 400; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 300; + finalScores[4] = 400; + + testScaling(initScores, finalScores, exp); + } + { // scale down and adjust up + exp[0]._rankValue = 200; // scaled + exp[1]._rankValue = 300; // scaled + exp[2]._rankValue = 400; // scaled + exp[3]._rankValue = 500; // from heap + exp[4]._rankValue = 600; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 500; + finalScores[4] = 600; + + testScaling(initScores, finalScores, exp); + } + { // scale up and adjust down + + exp[0]._rankValue = -500; // scaled (-500) + exp[1]._rankValue = 750; // scaled + exp[2]._rankValue = 2000; // scaled + exp[3]._rankValue = 3250; // from heap + exp[4]._rankValue = 4500; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 3250; + finalScores[4] = 4500; + + testScaling(initScores, finalScores, exp); + } + { // minimal scale (second phase range = 0 (4 - 4) -> 1) + exp[0]._rankValue = 1; // scaled + exp[1]._rankValue = 2; // scaled + exp[2]._rankValue = 3; // scaled + exp[3]._rankValue = 4; // from heap + exp[4]._rankValue = 4; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 4; + finalScores[4] = 4; + + testScaling(initScores, finalScores, exp); + } + { // minimal scale (first phase range = 0 (4000 - 4000) -> 1) + std::vector is(initScores); + is[4] = 4000; + exp[0]._rankValue = -299600; // scaled + exp[1]._rankValue = -199600; // scaled + exp[2]._rankValue = -99600; // scaled + exp[3]._rankValue = 400; // from heap + exp[4]._rankValue = 500; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 400; + finalScores[4] = 500; + + testScaling(is, finalScores, exp); + } +} + +TEST("testOnlyBitVector") { + uint32_t numDocs = 20; + LOG(info, "testOnlyBitVector: test it"); + { + HitCollector hc(numDocs, 0, 0); + BitVector::UP expBv(BitVector::create(numDocs)); + + for (uint32_t i = 0; i < numDocs; i += 2) { + hc.addHit(i, i + 100); + // build expected result set as we go along + expBv->setBit(i); + } + + std::unique_ptr rs = hc.getResultSet(); + std::vector expRh; + TEST_DO(checkResult(*rs.get(), expRh)); // no ranked hits + TEST_DO(checkResult(*rs.get(), expBv.get())); // only bit vector + } +} + +struct MergeResultSetFixture { + const uint32_t numDocs; + const uint32_t maxHitsSize; + const uint32_t maxHeapSize; + HitCollector hc; + MergeResultSetFixture() + : numDocs(100), maxHitsSize(80), maxHeapSize(30), hc(numDocs * 32, maxHitsSize, maxHeapSize) + {} +}; + +TEST_F("require that result set is merged correctly with first phase ranking", + MergeResultSetFixture) +{ + std::vector expRh; + for (uint32_t i = 0; i < f.numDocs; ++i) { + f.hc.addHit(i, i + 1000); + + // build expected result set + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + // only the maxHitsSize best hits gets a score + expRh.back()._rankValue = (i < f.numDocs - f.maxHitsSize) ? 0 : i + 1000; + } + std::unique_ptr rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); +} + +void +addExpectedHitForMergeTest(const MergeResultSetFixture &f, std::vector &expRh, uint32_t docId) +{ + expRh.push_back(RankedHit()); + expRh.back()._docId = docId; + if (docId < f.numDocs - f.maxHitsSize) { // only the maxHitsSize best hits gets a score + expRh.back()._rankValue = 0; + } else if (docId < f.numDocs - f.maxHeapSize) { // only first phase ranking + expRh.back()._rankValue = docId + 500; // adjusted with - 500 + } else { // second phase ranking on the maxHeapSize best hits + expRh.back()._rankValue = docId + 500; + } +} + +TEST_F("require that result set is merged correctly with second phase ranking (document scorer)", + MergeResultSetFixture) +{ + // with second phase ranking that triggers rescoring / scaling + BasicScorer scorer(500); // second phase ranking setting score to docId + 500 + std::vector expRh; + for (uint32_t i = 0; i < f.numDocs; ++i) { + f.hc.addHit(i, i + 1000); + addExpectedHitForMergeTest(f, expRh, i); + } + EXPECT_EQUAL(f.maxHeapSize, f.hc.reRank(scorer)); + std::unique_ptr rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); +} + +TEST("require that hits can be added out of order") { + HitCollector hc(1000, 100, 10); + std::vector expRh; + // produce expected result in normal order + for (uint32_t i = 0; i < 5; ++i) { + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + // add results in reverse order + for (uint32_t i = 5; i-- > 0; ) { + hc.addHit(i, i + 100); + } + std::unique_ptr rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), nullptr)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore new file mode 100644 index 00000000000..999644fce87 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/.gitignore @@ -0,0 +1,5 @@ +*_test +.depend +Makefile +docbuilder_test +searchlib_docbuilder_test_app diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt new file mode 100644 index 00000000000..de382bcc2fe --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_docbuilder_test_app + SOURCES + docbuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app) diff --git a/searchlib/src/tests/index/docbuilder/DESC b/searchlib/src/tests/index/docbuilder/DESC new file mode 100644 index 00000000000..514903f9988 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/DESC @@ -0,0 +1 @@ +ildocbuilder test. Take a look at ildocbuilder.cpp for details. diff --git a/searchlib/src/tests/index/docbuilder/FILES b/searchlib/src/tests/index/docbuilder/FILES new file mode 100644 index 00000000000..4d90f226fb4 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/FILES @@ -0,0 +1 @@ +ildocbuilder.cpp diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp new file mode 100644 index 00000000000..06599834ab5 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp @@ -0,0 +1,531 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + +/* $Id$ + * + * Copyright (C) 2011 Yahoo! Technologies Norway AS + * + * All Rights Reserved + * + */ + +#include +#include +LOG_SETUP("docbuilder_test"); +#include +#include +#include +#include +#include +#include + +using namespace document; + +namespace search { +namespace index { + +namespace +{ +std::string empty; +} + +namespace linguistics +{ +const vespalib::string SPANTREE_NAME("linguistics"); +} + +class Test : public vespalib::TestApp { +private: + void testBuilder(); +public: + int Main(); +}; + +void +Test::testBuilder() +{ + Schema s; + s.addIndexField(Schema::IndexField("ia", Schema::STRING)); + s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY)); + s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET)); + s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING)); + s.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + s.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("aa", Schema::INT32)); + s.addAttributeField(Schema::AttributeField("ab", Schema::FLOAT)); + s.addAttributeField(Schema::AttributeField("ac", Schema::STRING)); + s.addAttributeField(Schema::AttributeField("ad", Schema::INT32, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("ae", Schema::FLOAT, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("af", Schema::STRING, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("ag", Schema::INT32, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("ah", Schema::FLOAT, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("ai", Schema::STRING, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("asp1", + Schema::INT32)); + s.addAttributeField(Schema::AttributeField("asp2", + Schema::INT64)); + s.addAttributeField(Schema::AttributeField("aap1", + Schema::INT32, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("aap2", + Schema::INT64, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("awp1", + Schema::INT32, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("awp2", + Schema::INT64, + Schema::WEIGHTEDSET)); + + s.addSummaryField(Schema::SummaryField("sa", Schema::INT8)); + s.addSummaryField(Schema::SummaryField("sb", Schema::INT16)); + s.addSummaryField(Schema::SummaryField("sc", Schema::INT32)); + s.addSummaryField(Schema::SummaryField("sd", Schema::INT64)); + s.addSummaryField(Schema::SummaryField("se", Schema::FLOAT)); + s.addSummaryField(Schema::SummaryField("sf", Schema::DOUBLE)); + s.addSummaryField(Schema::SummaryField("sg", Schema::STRING)); + s.addSummaryField(Schema::SummaryField("sh", Schema::RAW)); + s.addSummaryField(Schema::SummaryField("si", Schema::RAW, + Schema::ARRAY)); + s.addSummaryField(Schema::SummaryField("sj", Schema::RAW, + Schema::WEIGHTEDSET)); + + DocBuilder b(s); + Document::UP doc; + std::vector lines; + std::vector::const_iterator itr; + std::string xml; + + { // empty + doc = b.startDocument("doc::0").endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_TRUE(itr == lines.end()); + } + { // all fields set + std::vector binaryBlob; + binaryBlob.push_back('\0'); + binaryBlob.push_back('\2'); + binaryBlob.push_back('\1'); + std::string raw1s("Single Raw Element"); + std::string raw1a0("Array Raw Element 0"); + std::string raw1a1("Array Raw Element 1"); + std::string raw1w0("Weighted Set Raw Element 0"); + std::string raw1w1("Weighted Set Raw Element 1"); + raw1s += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + b.startDocument("doc::1"); + b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField(); + b.startIndexField("ib").startElement().addStr("foo").endElement(). + startElement(1).addStr("bar").addStr("baz").endElement().endField(); + b. startIndexField("ic"). + startElement(20).addStr("bar").addStr("baz").endElement(). + startElement().addStr("foo").endElement(). + endField(); + b.startIndexField("iu"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + b.startIndexField("iau"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + b.startIndexField("iwu"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + b.startAttributeField("aa").addInt(2147483647).endField(); + b.startAttributeField("ab").addFloat(1234.56).endField(); + b.startAttributeField("ac").addStr("foo baz").endField(); + b.startAttributeField("ad").startElement().addInt(10).endElement().endField(); + b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField(); + b.startAttributeField("af").startElement().addStr("foo").endElement().endField(); + b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField(); + b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField(); + b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField(); + b.startAttributeField("asp1").addInt(1001).endField(); + b.startAttributeField("asp2").addPosition(1002, 1003).endField(); + b.startAttributeField("aap1"). + startElement().addInt(1004).endElement(). + startElement().addInt(1005).endElement(). + endField(); + b.startAttributeField("aap2"). + startElement().addPosition(1006, 1007).endElement(). + startElement().addPosition(1008, 1009).endElement(). + endField(); + b.startAttributeField("awp1"). + startElement(41).addInt(1010).endElement(). + startElement(42).addInt(1011).endElement(). + endField(); + b.startAttributeField("awp2"). + startElement(43).addPosition(1012, 1013).endElement(). + startElement(44).addPosition(1014, 1015).endElement(). + endField(); + b.startSummaryField("sa").addInt(127).endField(); + b.startSummaryField("sb").addInt(32767).endField(); + b.startSummaryField("sc").addInt(2147483647).endField(); + b.startSummaryField("sd").addInt(2147483648).endField(); + b.startSummaryField("se").addFloat(1234.56).endField(); + b.startSummaryField("sf").addFloat(9876.54).endField(); + b.startSummaryField("sg").addStr("foo bar").endField(); + b.startSummaryField("sh"). + addRaw(raw1s.c_str(), raw1s.size()). + endField(); + b.startSummaryField("si"). + startElement(). + addRaw(raw1a0.c_str(), raw1a0.size()). + endElement(). + startElement(). + addRaw(raw1a1.c_str(), raw1a1.size()). + endElement(). + endField(); + b.startSummaryField("sj"). + startElement(46). + addRaw(raw1w1.c_str(), raw1w1.size()). + endElement(). + startElement(45). + addRaw(raw1w0.c_str(), raw1w0.size()). + endElement(). + endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("foo bar baz", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("foo", *itr++); + EXPECT_EQUAL("bar baz", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("bar baz", *itr++); + EXPECT_EQUAL("foo", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("http://www.yahoo.com:81/fluke?ab=2#4", *itr++); + EXPECT_EQUAL("www.yahoo.com", *itr++); + EXPECT_EQUAL("http", *itr++); + EXPECT_EQUAL("/fluke", *itr++); + EXPECT_EQUAL("81", *itr++); + EXPECT_EQUAL("ab=2", *itr++); + EXPECT_EQUAL("4", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("http://www.yahoo.com:82/fluke?ab=2#8", *itr++); + EXPECT_EQUAL("www.yahoo.com", *itr++); + EXPECT_EQUAL("http", *itr++); + EXPECT_EQUAL("/fluke", *itr++); + EXPECT_EQUAL("82", *itr++); + EXPECT_EQUAL("ab=2", *itr++); + EXPECT_EQUAL("8", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("http://www.flickr.com:82/fluke?ab=2#9", *itr++); + EXPECT_EQUAL("www.flickr.com", *itr++); + EXPECT_EQUAL("http", *itr++); + EXPECT_EQUAL("/fluke", *itr++); + EXPECT_EQUAL("82", *itr++); + EXPECT_EQUAL("ab=2", *itr++); + EXPECT_EQUAL("9", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("http://www.yahoo.com:83/fluke?ab=2#12", *itr++); + EXPECT_EQUAL("www.yahoo.com", *itr++); + EXPECT_EQUAL("http", *itr++); + EXPECT_EQUAL("/fluke", *itr++); + EXPECT_EQUAL("83", *itr++); + EXPECT_EQUAL("ab=2", *itr++); + EXPECT_EQUAL("12", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("http://www.flickr.com:85/fluke?ab=2#13", *itr++); + EXPECT_EQUAL("www.flickr.com", *itr++); + EXPECT_EQUAL("http", *itr++); + EXPECT_EQUAL("/fluke", *itr++); + EXPECT_EQUAL("85", *itr++); + EXPECT_EQUAL("ab=2", *itr++); + EXPECT_EQUAL("13", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("2147483647", *itr++); + EXPECT_EQUAL("1234.56", *itr++); + EXPECT_EQUAL("foo baz", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("10", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("10.5", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("foo", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("20", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("20.5", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("bar", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("1001", *itr++); + EXPECT_EQUAL("1047758", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("1004", *itr++); + EXPECT_EQUAL("1005", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("1047806", *itr++); + EXPECT_EQUAL("1048322", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("1010", *itr++); + EXPECT_EQUAL("1011", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("1048370", *itr++); + EXPECT_EQUAL("1048382", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("127", *itr++); + EXPECT_EQUAL("32767", *itr++); + EXPECT_EQUAL("2147483647", *itr++); + EXPECT_EQUAL("2147483648", *itr++); + EXPECT_EQUAL("1234.56", *itr++); + EXPECT_EQUAL("9876.54", *itr++); + EXPECT_EQUAL("foo bar", *itr++); + EXPECT_EQUAL(empty + "" + + vespalib::Base64::encode(raw1s) + + "", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL(empty + "" + + vespalib::Base64::encode(raw1a0) + + "", *itr++); + EXPECT_EQUAL(empty + "" + + vespalib::Base64::encode(raw1a1) + + "", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL(empty +"" + + vespalib::Base64::encode(raw1w1) + + "", *itr++); + EXPECT_EQUAL(empty + "" + + vespalib::Base64::encode(raw1w0) + + "", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_TRUE(itr == lines.end()); +#if 1 + std::cout << "onedoc xml start -----" << std::endl << + xml << std::endl << + "-------" << std::endl; + std::cout << "onedoc toString start ----" << std::endl << + doc->toString(true) << std::endl << + "-------" << std::endl; +#endif + } + { // create one more to see that everything is cleared + b.startDocument("doc::2"); + b.startIndexField("ia").addStr("yes").endField(); + b.startAttributeField("aa").addInt(20).endField(); + b.startSummaryField("sa").addInt(10).endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("yes", *itr++); + EXPECT_EQUAL("20", *itr++); + EXPECT_EQUAL("10", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_TRUE(itr == lines.end()); + } + { // create field with cjk chars + b.startDocument("doc::3"); + b.startIndexField("ia"). + addStr("我就是那个"). + setAutoSpace(false). + addStr("大灰狼"). + setAutoSpace(true). + endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("", *itr++); + EXPECT_EQUAL("我就是那个大灰狼", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_TRUE(itr == lines.end()); + const FieldValue::UP iaval = doc->getValue("ia"); + ASSERT_TRUE(iaval.get() != NULL); + const StringFieldValue *iasval = dynamic_cast + (iaval.get()); + ASSERT_TRUE(iasval != NULL); + StringFieldValue::SpanTrees trees = iasval->getSpanTrees(); + const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); + ASSERT_TRUE(tree != NULL); + std::vector spans; + std::vector expSpans; + for (SpanTree::const_iterator i = tree->begin(), ie = tree->end(); + i != ie; ++i) { + Annotation &ann = const_cast(*i); + const Span *span = dynamic_cast(ann.getSpanNode()); + if (span == NULL) + continue; + spans.push_back(*span); + } + expSpans.push_back(Span(0, 15)); + expSpans.push_back(Span(0, 15)); + expSpans.push_back(Span(15, 9)); + expSpans.push_back(Span(15, 9)); + ASSERT_TRUE(expSpans == spans); +#if 1 + std::cout << "onedoc xml start -----" << std::endl << + xml << std::endl << + "-------" << std::endl; + std::cout << "onedoc toString start ----" << std::endl << + doc->toString(true) << std::endl << + "-------" << std::endl; +#endif + } +} + +int +Test::Main() +{ + TEST_INIT("docbuilder_test"); + + testBuilder(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::index::Test); + diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore new file mode 100644 index 00000000000..f15be1efcfe --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/.gitignore @@ -0,0 +1,5 @@ +*_test +.depend +Makefile +doctypebuilder_test +searchlib_doctypebuilder_test_app diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt new file mode 100644 index 00000000000..51fb59421f9 --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_doctypebuilder_test_app + SOURCES + doctypebuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app) diff --git a/searchlib/src/tests/index/doctypebuilder/DESC b/searchlib/src/tests/index/doctypebuilder/DESC new file mode 100644 index 00000000000..a199241a331 --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/DESC @@ -0,0 +1 @@ +doctypebuilder test. Take a look at doctypebuilder.cpp for details. diff --git a/searchlib/src/tests/index/doctypebuilder/FILES b/searchlib/src/tests/index/doctypebuilder/FILES new file mode 100644 index 00000000000..9f261ca9a9a --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/FILES @@ -0,0 +1 @@ +doctypebuilder.cpp diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp new file mode 100644 index 00000000000..3980700fa6b --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("doctypebuilder_test"); +#include +#include +#include +#include +#include + +using namespace document; + +namespace search { +namespace index { + +TEST("testSearchDocType") { + Schema s; + s.addIndexField(Schema::IndexField("ia", Schema::STRING)); + s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY)); + s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET)); + s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING)); + s.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + s.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("aa", Schema::INT32)); + s.addAttributeField(Schema::AttributeField("spos", + Schema::INT64)); + s.addAttributeField(Schema::AttributeField("apos", + Schema::INT64, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("wpos", + Schema::INT64, + Schema::WEIGHTEDSET)); + s.addSummaryField(Schema::SummaryField("sa", Schema::STRING)); + + DocTypeBuilder docTypeBuilder(s); + document::DocumenttypesConfig config = docTypeBuilder.makeConfig(); + DocumentTypeRepo repo(config); + const DocumentType *docType = repo.getDocumentType("searchdocument"); + ASSERT_TRUE(docType); + EXPECT_EQUAL(11u, docType->getFieldCount()); + + EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName()); + EXPECT_EQUAL("Array", + docType->getField("ib").getDataType().getName()); + EXPECT_EQUAL("WeightedSet", + docType->getField("ic").getDataType().getName()); + EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName()); + EXPECT_EQUAL("Array", + docType->getField("iau").getDataType().getName()); + EXPECT_EQUAL("WeightedSet", + docType->getField("iwu").getDataType().getName()); + + EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName()); + EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName()); + EXPECT_EQUAL("Array", + docType->getField("apos").getDataType().getName()); + EXPECT_EQUAL("WeightedSet", + docType->getField("wpos").getDataType().getName()); + EXPECT_EQUAL("String", docType->getField("sa").getDataType().getName()); +} + +TEST("require that multiple fields can have the same type") { + Schema s; + s.addIndexField(Schema::IndexField("array1", Schema::STRING, + Schema::ARRAY)); + s.addIndexField(Schema::IndexField("array2", Schema::STRING, + Schema::ARRAY)); + DocTypeBuilder docTypeBuilder(s); + document::DocumenttypesConfig config = docTypeBuilder.makeConfig(); + DocumentTypeRepo repo(config); + const DocumentType *docType = repo.getDocumentType("searchdocument"); + ASSERT_TRUE(docType); + EXPECT_EQUAL(2u, docType->getFieldCount()); + + EXPECT_EQUAL("Array", + docType->getField("array1").getDataType().getName()); + EXPECT_EQUAL("Array", + docType->getField("array2").getDataType().getName()); +} + +} // namespace index +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/indexmetainfo/.gitignore b/searchlib/src/tests/indexmetainfo/.gitignore new file mode 100644 index 00000000000..ddc0b5f4582 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +indexmetainfo_test +test-save.txt +searchlib_indexmetainfo_test_app diff --git a/searchlib/src/tests/indexmetainfo/CMakeLists.txt b/searchlib/src/tests/indexmetainfo/CMakeLists.txt new file mode 100644 index 00000000000..607ab7b7e5b --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_indexmetainfo_test_app + SOURCES + indexmetainfo_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_indexmetainfo_test_app COMMAND searchlib_indexmetainfo_test_app) diff --git a/searchlib/src/tests/indexmetainfo/DESC b/searchlib/src/tests/indexmetainfo/DESC new file mode 100644 index 00000000000..ee312b5fcdc --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/DESC @@ -0,0 +1,2 @@ +Test the API class used to access the 'meta-info.txt' file used to +hold meta information for an index. diff --git a/searchlib/src/tests/indexmetainfo/FILES b/searchlib/src/tests/indexmetainfo/FILES new file mode 100644 index 00000000000..8a96f5f3311 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/FILES @@ -0,0 +1 @@ +indexmetainfo.cpp diff --git a/searchlib/src/tests/indexmetainfo/bogus1.txt b/searchlib/src/tests/indexmetainfo/bogus1.txt new file mode 100644 index 00000000000..6d412ad302e --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus1.txt @@ -0,0 +1 @@ +noAssign diff --git a/searchlib/src/tests/indexmetainfo/bogus10.txt b/searchlib/src/tests/indexmetainfo/bogus10.txt new file mode 100644 index 00000000000..e4f500cf897 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus10.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.0.valid=false +snapshot.0.syncToken=bogus +snapshot.0.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus2.txt b/searchlib/src/tests/indexmetainfo/bogus2.txt new file mode 100644 index 00000000000..9895913aece --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus2.txt @@ -0,0 +1 @@ +=noKey diff --git a/searchlib/src/tests/indexmetainfo/bogus3.txt b/searchlib/src/tests/indexmetainfo/bogus3.txt new file mode 100644 index 00000000000..73c7da9da74 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus3.txt @@ -0,0 +1 @@ +unknownKey=magicValue diff --git a/searchlib/src/tests/indexmetainfo/bogus4.txt b/searchlib/src/tests/indexmetainfo/bogus4.txt new file mode 100644 index 00000000000..d841e7509ca --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus4.txt @@ -0,0 +1 @@ +nextSnapshotId=illegalNumber diff --git a/searchlib/src/tests/indexmetainfo/bogus5.txt b/searchlib/src/tests/indexmetainfo/bogus5.txt new file mode 100644 index 00000000000..08c64d393ba --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus5.txt @@ -0,0 +1,7 @@ +nextSnapshotId=128 +snapshot.1.valid=true +snapshot.1.syncToken=50 +snapshot.1.dirName=foo +snapshot.0.valid=false +snapshot.0.syncToken=100 +snapshot.0.dirName=bar diff --git a/searchlib/src/tests/indexmetainfo/bogus6.txt b/searchlib/src/tests/indexmetainfo/bogus6.txt new file mode 100644 index 00000000000..5506704db80 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus6.txt @@ -0,0 +1,7 @@ +nextSnapshotId=128 +snapshot.0.valid=true +snapshot.0.syncToken=50 +snapshot.0.dirName=foo +snapshot.2.valid=false +snapshot.2.syncToken=100 +snapshot.2.dirName=bar diff --git a/searchlib/src/tests/indexmetainfo/bogus7.txt b/searchlib/src/tests/indexmetainfo/bogus7.txt new file mode 100644 index 00000000000..efbc17b40b6 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus7.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot..valid=true +snapshot..syncToken=50 +snapshot..dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus8.txt b/searchlib/src/tests/indexmetainfo/bogus8.txt new file mode 100644 index 00000000000..e359ca68f12 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus8.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.x.valid=true +snapshot.x.syncToken=50 +snapshot.x.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus9.txt b/searchlib/src/tests/indexmetainfo/bogus9.txt new file mode 100644 index 00000000000..5dd606d8942 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus9.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.0.valid=xyz +snapshot.0.syncToken=50 +snapshot.0.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp new file mode 100644 index 00000000000..e7dc828c9e5 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("indexmetainfo_test"); +#include +#include + +using search::IndexMetaInfo; + +typedef IndexMetaInfo::Snapshot Snap; + +TEST_SETUP(Test) + +int +Test::Main() +{ + TEST_INIT("indexmetainfo_test"); + { // load pregenerated file + IndexMetaInfo info(""); + EXPECT_TRUE(info.load()); + ASSERT_TRUE(info.snapshots().size() == 4); + EXPECT_TRUE(info.snapshots()[0].valid); + EXPECT_TRUE(info.snapshots()[0].syncToken == 50); + EXPECT_TRUE(info.snapshots()[0].dirName == "foo"); + EXPECT_TRUE(!info.snapshots()[1].valid); + EXPECT_TRUE(info.snapshots()[1].syncToken == 100); + EXPECT_TRUE(info.snapshots()[1].dirName == "bar"); + EXPECT_TRUE(info.snapshots()[2].valid); + EXPECT_TRUE(info.snapshots()[2].syncToken == 200); + EXPECT_TRUE(info.snapshots()[2].dirName == "baz"); + EXPECT_TRUE(!info.snapshots()[3].valid); + EXPECT_TRUE(info.snapshots()[3].syncToken == 500); + EXPECT_TRUE(info.snapshots()[3].dirName == "last"); + { + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 200); + EXPECT_TRUE(s.dirName == "baz"); + } + { + Snap s = info.getSnapshot(100); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 100); + EXPECT_TRUE(s.dirName == "bar"); + } + { + Snap s = info.getSnapshot(666); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + { + EXPECT_TRUE(info.invalidateSnapshot(200)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 50); + EXPECT_TRUE(s.dirName == "foo"); + } + { + EXPECT_TRUE(info.invalidateSnapshot(50)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + { + EXPECT_TRUE(info.validateSnapshot(500)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 500); + EXPECT_TRUE(s.dirName == "last"); + } + { + EXPECT_TRUE(!info.invalidateSnapshot(666)); + EXPECT_TRUE(!info.validateSnapshot(666)); + } + { + info.clear(); + EXPECT_TRUE(info.snapshots().size() == 0); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + } + { // load file that does not exist + IndexMetaInfo info("."); + EXPECT_TRUE(!info.load("file-not-present.txt")); + } + { // load files with errors should fail + IndexMetaInfo info("."); + EXPECT_TRUE(!info.load("bogus1.txt")); + EXPECT_TRUE(!info.load("bogus2.txt")); + EXPECT_TRUE(!info.load("bogus3.txt")); + EXPECT_TRUE(!info.load("bogus4.txt")); + EXPECT_TRUE(!info.load("bogus5.txt")); + EXPECT_TRUE(!info.load("bogus6.txt")); + EXPECT_TRUE(!info.load("bogus7.txt")); + EXPECT_TRUE(!info.load("bogus8.txt")); + EXPECT_TRUE(!info.load("bogus9.txt")); + EXPECT_TRUE(!info.load("bogus10.txt")); + } + { // save/load/save/load/save/load test + std::string file("test-save.txt"); + IndexMetaInfo a("."); + IndexMetaInfo b("."); + EXPECT_TRUE(a.addSnapshot(Snap(true, 50, "foo"))); + EXPECT_TRUE(a.addSnapshot(Snap(false, 100, "bar"))); + EXPECT_TRUE(!a.addSnapshot(Snap(false, 100, "bar"))); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 2); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar")); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 2); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar")); + a.removeSnapshot(100); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 1); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/indexmetainfo/meta-info.txt b/searchlib/src/tests/indexmetainfo/meta-info.txt new file mode 100644 index 00000000000..20182f5786c --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/meta-info.txt @@ -0,0 +1,12 @@ +snapshot.0.valid=true +snapshot.0.syncToken=50 +snapshot.0.dirName=foo +snapshot.1.valid=true +snapshot.1.syncToken=200 +snapshot.1.dirName=baz +snapshot.2.valid=false +snapshot.2.syncToken=100 +snapshot.2.dirName=bar +snapshot.3.valid=false +snapshot.3.syncToken=500 +snapshot.3.dirName=last diff --git a/searchlib/src/tests/ld-library-path/.gitignore b/searchlib/src/tests/ld-library-path/.gitignore new file mode 100644 index 00000000000..5f02ecfc8f8 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +ld-library-path_test +searchlib_ld-library-path_test_app diff --git a/searchlib/src/tests/ld-library-path/CMakeLists.txt b/searchlib/src/tests/ld-library-path/CMakeLists.txt new file mode 100644 index 00000000000..47e1372ffc6 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ld-library-path_test_app + SOURCES + ld-library-path.cpp + DEPENDS +) +vespa_add_test(NAME searchlib_ld-library-path_test_app COMMAND searchlib_ld-library-path_test_app) diff --git a/searchlib/src/tests/ld-library-path/ld-library-path.cpp b/searchlib/src/tests/ld-library-path/ld-library-path.cpp new file mode 100644 index 00000000000..c9a429b3b35 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/ld-library-path.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(""); + +int +main(int, char **) +{ + LOG(info, "LD_LIBRARY_PATH='%s'", getenv("LD_LIBRARY_PATH")); + return 0; +} diff --git a/searchlib/src/tests/memoryindex/btree/.gitignore b/searchlib/src/tests/memoryindex/btree/.gitignore new file mode 100644 index 00000000000..94440affa90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +btree_test +frozenbtree_test +searchlib_btree_test_app +searchlib_frozenbtree_test_app diff --git a/searchlib/src/tests/memoryindex/btree/CMakeLists.txt b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt new file mode 100644 index 00000000000..8b523030cab --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_btree_test_app + SOURCES + btree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btree_test_app COMMAND searchlib_btree_test_app) +vespa_add_executable(searchlib_frozenbtree_test_app + SOURCES + frozenbtree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_frozenbtree_test_app COMMAND searchlib_frozenbtree_test_app) diff --git a/searchlib/src/tests/memoryindex/btree/DESC b/searchlib/src/tests/memoryindex/btree/DESC new file mode 100644 index 00000000000..02739da7527 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/DESC @@ -0,0 +1 @@ +btree test. Take a look at btree_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/btree/FILES b/searchlib/src/tests/memoryindex/btree/FILES new file mode 100644 index 00000000000..e63a2f68eb4 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/FILES @@ -0,0 +1 @@ +btree_test.cpp diff --git a/searchlib/src/tests/memoryindex/btree/btree_test.cpp b/searchlib/src/tests/memoryindex/btree/btree_test.cpp new file mode 100644 index 00000000000..5fb6761ba57 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/btree_test.cpp @@ -0,0 +1,1282 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("btree_test"); +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using vespalib::GenerationHandler; + +namespace search { +namespace btree { + +namespace { + +template +std::string +toStr(const T & v) +{ + std::stringstream ss; + ss << v; + return ss.str(); +} + +} + +typedef BTreeTraits<4, 4, 31, false> MyTraits; + +#define KEYWRAP + +#ifdef KEYWRAP + +// Force use of functor to compare keys. +class WrapInt +{ +public: + int _val; + WrapInt(int val) : _val(val) {} + WrapInt(void) : _val(0) {} + bool operator==(const WrapInt & rhs) const { return _val == rhs._val; } +}; + +std::ostream & +operator<<(std::ostream &s, const WrapInt &i) +{ + s << i._val; + return s; +} + +typedef WrapInt MyKey; +class MyComp +{ +public: + bool + operator()(const WrapInt &a, const WrapInt &b) const + { + return a._val < b._val; + } +}; + +#define UNWRAP(key) (key._val) +#else +typedef int MyKey; +typedef std::less MyComp; +#define UNWRAP(key) (key) +#endif + +typedef BTree MyTree; +typedef BTreeStore MyTreeStore; +typedef MyTree::Builder MyTreeBuilder; +typedef MyTree::LeafNodeType MyLeafNode; +typedef MyTree::InternalNodeType MyInternalNode; +typedef MyTree::NodeAllocatorType MyNodeAllocator; +typedef std::pair LeafPair; +typedef MyTreeStore::KeyDataType MyKeyData; +typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair; + +typedef BTree SetTreeB; + +typedef BTreeTraits<16, 16, 10, false> LSeekTraits; +typedef BTree, LSeekTraits> SetTreeL; + +struct LeafPairLess { + bool operator()(const LeafPair & lhs, const LeafPair & rhs) const { + return UNWRAP(lhs.first) < UNWRAP(rhs.first); + } +}; + +template +void +cleanup(GenerationHandler & g, ManagerType & m) +{ + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +template +void +cleanup(GenerationHandler & g, + ManagerType & m, + BTreeNode::Ref n1Ref, NodeType * n1, + BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL) +{ + assert(ManagerType::isValidRef(n1Ref)); + m.holdNode(n1Ref, n1); + if (n2 != NULL) { + assert(ManagerType::isValidRef(n2Ref)); + m.holdNode(n2Ref, n2); + } else { + assert(!ManagerType::isValidRef(n2Ref)); + } + cleanup(g, m); +} + +class Test : public vespalib::TestApp { +private: + template + bool assertLeafNode(const std::string & exp, const LeafNodeType & n); + bool assertSeek(int skey, int ekey, const MyTree & tree); + bool assertSeek(int skey, int ekey, MyTree::Iterator & itr); + bool assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act); + + void + buildSubTree(const std::vector &sub, + size_t numEntries); + + void requireThatNodeInsertWorks(); + void requireThatNodeSplitInsertWorks(); + void requireThatNodeStealWorks(); + void requireThatNodeRemoveWorks(); + void requireThatNodeLowerBoundWorks(); + void requireThatWeCanInsertAndRemoveFromTree(); + void requireThatSortedTreeInsertWorks(); + void requireThatCornerCaseTreeFindWorks(); + void requireThatBasicTreeIteratorWorks(); + void requireThatTreeIteratorSeekWorks(); + void requireThatTreeIteratorAssignWorks(); + void requireThatMemoryUsageIsCalculated(); + template + void requireThatLowerBoundWorksT(); + void requireThatLowerBoundWorks(); + template + void requireThatUpperBoundWorksT(); + void requireThatUpperBoundWorks(); + void requireThatUpdateOfKeyWorks(); + + void + requireThatSmallNodesWorks(); + + void + requireThatApplyWorks(); + + void + requireThatIteratorDistanceWorks(int numEntries); + + void + requireThatIteratorDistanceWorks(); +public: + int Main(); +}; + +template +bool +Test::assertLeafNode(const std::string & exp, const LeafNodeType & n) +{ + std::stringstream ss; + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":" << n.getData(i); + } + ss << "]"; + if (!EXPECT_EQUAL(exp, ss.str())) return false; + return true; +} + +bool +Test::assertSeek(int skey, int ekey, const MyTree & tree) +{ + MyTree::Iterator itr = tree.begin(); + return assertSeek(skey, ekey, itr); +} + +bool +Test::assertSeek(int skey, int ekey, MyTree::Iterator & itr) +{ + MyTree::Iterator bseekItr = itr; + MyTree::Iterator lseekItr = itr; + bseekItr.binarySeek(skey); + lseekItr.linearSeek(skey); + if (!EXPECT_EQUAL(ekey, UNWRAP(bseekItr.getKey()))) return false; + if (!EXPECT_EQUAL(ekey, UNWRAP(lseekItr.getKey()))) return false; + itr = bseekItr; + return true; +} + +bool +Test::assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act) +{ + if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) return false; + if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) return false; + if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) return false; + if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) return false; + return true; +} + +void +Test::requireThatNodeInsertWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = m.allocLeafNode(); + MyLeafNode *n = nPair.second; + EXPECT_TRUE(n->isLeaf()); + EXPECT_EQUAL(0u, n->validSlots()); + n->insert(0, 20, "b"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[20:b]", *n)); + n->insert(0, 10, "a"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b]", *n)); + EXPECT_EQUAL(20, UNWRAP(n->getLastKey())); + EXPECT_EQUAL("b", n->getLastData()); + n->insert(2, 30, "c"); + EXPECT_TRUE(!n->isFull()); + n->insert(3, 40, "d"); + EXPECT_TRUE(n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b,30:c,40:d]", *n)); + cleanup(g, m, nPair.first, n); +} + +MyLeafNode::RefPair +getLeafNode(MyNodeAllocator &allocator) +{ + MyLeafNode::RefPair nPair = allocator.allocLeafNode(); + MyLeafNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 3, "c"); + n->insert(2, 5, "e"); + n->insert(3, 7, "g"); + return nPair; +} + +void +Test::requireThatNodeSplitInsertWorks() +{ + { // new entry in current node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 2, 4, "d"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,4:d]", *n)); + EXPECT_TRUE(assertLeafNode("[5:e,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry in split node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 3, 6, "f"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[6:f,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry at end + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 4, 8, "h"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[7:g,8:h]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } +} + +struct BTreeStealTraits +{ + static const size_t LEAF_SLOTS = 6; + static const size_t INTERNAL_SLOTS = 6; +}; + +void +Test::requireThatNodeStealWorks() +{ + typedef BTreeLeafNode MyStealNode; + typedef BTreeNodeAllocator + MyStealManager; + { // steal all from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 4, "d"); + n->insert(1, 5, "e"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + n->stealAllFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal all from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + n->stealAllFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 5, "e"); + n->insert(1, 6, "f"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + v->insert(3, 4, "d"); + n->stealSomeFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *n)); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + v->insert(3, 6, "f"); + n->stealSomeFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *n)); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } +} + +void +Test::requireThatNodeRemoveWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + n->remove(1); + EXPECT_TRUE(assertLeafNode("[1:a,5:e,7:g]", *n)); + cleanup(g, m, nPair.first, n); +} + +void +Test::requireThatNodeLowerBoundWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + EXPECT_EQUAL(1u, n->lower_bound(3, MyComp())); + EXPECT_FALSE(MyComp()(3, n->getKey(1u))); + EXPECT_EQUAL(0u, n->lower_bound(0, MyComp())); + EXPECT_TRUE(MyComp()(0, n->getKey(0u))); + EXPECT_EQUAL(1u, n->lower_bound(2, MyComp())); + EXPECT_TRUE(MyComp()(2, n->getKey(1u))); + EXPECT_EQUAL(3u, n->lower_bound(6, MyComp())); + EXPECT_TRUE(MyComp()(6, n->getKey(3u))); + EXPECT_EQUAL(4u, n->lower_bound(8, MyComp())); + cleanup(g, m, nPair.first, n); +} + +void +generateData(std::vector & data, size_t numEntries) +{ + data.reserve(numEntries); + Rand48 rnd; + rnd.srand48(10); + for (size_t i = 0; i < numEntries; ++i) { + int num = rnd.lrand48() % 10000000; + std::string str = toStr(num); + data.push_back(std::make_pair(num, str)); + } +} + + +void +Test::buildSubTree(const std::vector &sub, + size_t numEntries) +{ + GenerationHandler g; + MyTree tree; + MyTreeBuilder builder(tree.getAllocator()); + + std::vector sorted(sub.begin(), sub.begin() + numEntries); + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(sorted[i].first); + const std::string & str = sorted[i].second; + builder.insert(num, str); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + EXPECT_EQUAL(numEntries, tree.size()); + EXPECT_TRUE(tree.isValid()); + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr = itr; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + ++itr; + } + EXPECT_TRUE(!itr.valid()); + ritr = itr; + EXPECT_TRUE(!ritr.valid()); + --ritr; + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData()); + --ritr; + } + EXPECT_TRUE(!ritr.valid()); +} + +void +Test::requireThatWeCanInsertAndRemoveFromTree() +{ + GenerationHandler g; + MyTree tree; + std::vector exp; + std::vector sorted; + size_t numEntries = 1000; + generateData(exp, numEntries); + sorted = exp; + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + // insert entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + const std::string & str = exp[i].second; + EXPECT_TRUE(!tree.find(num).valid()); + //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str()); + EXPECT_TRUE(tree.insert(num, str)); + EXPECT_TRUE(!tree.insert(num, str)); + for (size_t j = 0; j <= i; ++j) { + //LOG(info, "find[%zu](%d)", j, exp[j].first._val); + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(i + 1u, tree.size()); + EXPECT_TRUE(tree.isValid()); + buildSubTree(exp, i + 1); + } + //std::cout << "tree: " << tree.toString() << std::endl; + + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itre = itr; + MyTree::Iterator itre2; + MyTree::Iterator ritr = itr; + while (itre.valid()) + ++itre; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + MyTree::Iterator pitr = itr; + for (size_t i = 0; i < numEntries; ++i) { + ssize_t si = i; + ssize_t sileft = numEntries - i; + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(i, itr.position()); + EXPECT_EQUAL(sileft, itre - itr); + EXPECT_EQUAL(-sileft, itr - itre); + EXPECT_EQUAL(sileft, itre2 - itr); + EXPECT_EQUAL(-sileft, itr - itre2); + EXPECT_EQUAL(si, itr - tree.begin()); + EXPECT_EQUAL(-si, tree.begin() - itr); + EXPECT_EQUAL(i != 0, itr - pitr); + EXPECT_EQUAL(-(i != 0), pitr - itr); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + pitr = itr; + ++itr; + ritr = itr; + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_TRUE(ritr == pitr); + } + EXPECT_TRUE(!itr.valid()); + EXPECT_EQUAL(numEntries, itr.position()); + ssize_t sNumEntries = numEntries; + EXPECT_EQUAL(sNumEntries, itr - tree.begin()); + EXPECT_EQUAL(-sNumEntries, tree.begin() - itr); + EXPECT_EQUAL(1, itr - pitr); + EXPECT_EQUAL(-1, pitr - itr); + } + // compact full tree by calling incremental compaction methods in a loop + { + MyTree::NodeAllocatorType &manager = tree.getAllocator(); + std::vector toHold = manager.startCompact(); + MyTree::Iterator itr = tree.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot())); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + manager.finishCompact(toHold); + manager.freeze(); + manager.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + manager.trimHoldLists(g.getFirstUsedGeneration()); + } + // remove entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + //LOG(info, "remove[%zu](%d)", i, num); + //std::cout << "tree: " << tree.toString() << std::endl; + EXPECT_TRUE(tree.remove(num)); + EXPECT_TRUE(!tree.find(num).valid()); + EXPECT_TRUE(!tree.remove(num)); + EXPECT_TRUE(tree.isValid()); + for (size_t j = i + 1; j < numEntries; ++j) { + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(numEntries - 1 - i, tree.size()); + } +} + +void +Test::requireThatSortedTreeInsertWorks() +{ + { + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } + { + GenerationHandler g; + MyTree tree; + for (int i = 1000; i > 0; --i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } +} + +void +Test::requireThatCornerCaseTreeFindWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 1; i < 100; ++i) { + tree.insert(i, toStr(i)); + } + EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest + EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest +} + +void +Test::requireThatBasicTreeIteratorWorks() +{ + GenerationHandler g; + MyTree tree; + EXPECT_TRUE(!tree.begin().valid()); + std::vector exp; + size_t numEntries = 1000; + generateData(exp, numEntries); + for (size_t i = 0; i < numEntries; ++i) { + tree.insert(exp[i].first, exp[i].second); + } + std::sort(exp.begin(), exp.end(), LeafPairLess()); + size_t ei = 0; + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr; + EXPECT_EQUAL(1000u, itr.size()); + for (; itr.valid(); ++itr) { + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey())); + EXPECT_EQUAL(exp[ei].second, itr.getData()); + ei++; + ritr = itr; + } + EXPECT_EQUAL(numEntries, ei); + for (; ritr.valid(); --ritr) { + --ei; + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey())); + EXPECT_EQUAL(exp[ei].second, ritr.getData()); + } +} + +void +Test::requireThatTreeIteratorSeekWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 40; i += 2) { + tree.insert(i, toStr(i)); + } + //std::cout << tree.toString() << std::endl; + EXPECT_TRUE(assertSeek(2, 2, tree)); // next key + EXPECT_TRUE(assertSeek(10, 10, tree)); // skip to existing + EXPECT_TRUE(assertSeek(26, 26, tree)); // skip to existing + EXPECT_TRUE(assertSeek(11, 12, tree)); // skip to non-existing + EXPECT_TRUE(assertSeek(23, 24, tree)); // skip to non-existing + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(4, 4, itr)); + EXPECT_TRUE(assertSeek(14, 14, itr)); + EXPECT_TRUE(assertSeek(18, 18, itr)); + EXPECT_TRUE(assertSeek(36, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(3, 4, itr)); + EXPECT_TRUE(assertSeek(13, 14, itr)); + EXPECT_TRUE(assertSeek(17, 18, itr)); + EXPECT_TRUE(assertSeek(35, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itr2 = tree.begin(); + itr.binarySeek(40); // outside + itr2.linearSeek(40); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(8, 8, itr)); + for (int i = 10; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(26, 26, itr)); + for (int i = 28; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + GenerationHandler g2; + MyTree tree2; // only leaf node + tree2.insert(0, "0"); + tree2.insert(2, "2"); + tree2.insert(4, "4"); + EXPECT_TRUE(assertSeek(1, 2, tree2)); + EXPECT_TRUE(assertSeek(2, 2, tree2)); + { + MyTree::Iterator itr = tree2.begin(); + MyTree::Iterator itr2 = tree2.begin(); + itr.binarySeek(5); // outside + itr2.linearSeek(5); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } +} + +void +Test::requireThatTreeIteratorAssignWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + tree.insert(i, toStr(i)); + } + for (int i = 0; i < 1000; ++i) { + MyTree::Iterator itr = tree.find(i); + MyTree::Iterator itr2 = itr; + EXPECT_TRUE(itr == itr2); + int expNum = i; + for (; itr2.valid(); ++itr2) { + EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey())); + } + EXPECT_EQUAL(1000, expNum); + } +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + typedef BTreeNodeAllocator NodeAllocator; + typedef NodeAllocator::InternalNodeType INode; + typedef NodeAllocator::LeafNodeType LNode; + typedef NodeAllocator::InternalNodeTypeRefPair IRef; + typedef NodeAllocator::LeafNodeTypeRefPair LRef; + LOG(info, "sizeof(BTreeNode)=%zu, sizeof(INode)=%zu, sizeof(LNode)=%zu", + sizeof(BTreeNode), sizeof(INode), sizeof(LNode)); + EXPECT_GREATER(sizeof(INode), sizeof(LNode)); + GenerationHandler gh; + gh.incGeneration(); + NodeAllocator tm; + MemoryUsage mu; + const uint32_t initialInternalNodes = 128u; + const uint32_t initialLeafNodes = 128u; + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode)); + mu.incDeadBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add internal node + IRef ir = tm.allocInternalNode(1); + mu.incUsedBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add leaf node + LRef lr = tm.allocLeafNode(); + mu.incUsedBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // move nodes to hold list + tm.freeze(); // mark allocated nodes as frozen so we can hold them later on + tm.holdNode(ir.first, ir.second); + mu.incAllocatedBytesOnHold(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + tm.holdNode(lr.first, lr.second); + mu.incAllocatedBytesOnHold(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // trim hold lists + tm.transferHoldLists(gh.getCurrentGeneration()); + gh.incGeneration(); + tm.trimHoldLists(gh.getFirstUsedGeneration()); + mu = MemoryUsage(); + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode) * 2); + mu.incDeadBytes(sizeof(INode) * 2); + mu.incUsedBytes(sizeof(LNode)); + mu.incDeadBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); +} + +template +void +Test::requireThatLowerBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.lowerBound(9).getKey()); + EXPECT_EQUAL(20, t.lowerBound(20).getKey()); + EXPECT_EQUAL(30, t.lowerBound(21).getKey()); + EXPECT_EQUAL(30, t.lowerBound(30).getKey()); + EXPECT_TRUE(!t.lowerBound(31).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 990; i+=10) { + EXPECT_EQUAL(i + 1, t.lowerBound(i).getKey()); + EXPECT_EQUAL(i + 1, t.lowerBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.lowerBound(991).valid()); +} + +void +Test::requireThatLowerBoundWorks() +{ + requireThatLowerBoundWorksT(); + requireThatLowerBoundWorksT(); +} + +template +void +Test::requireThatUpperBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.upperBound(9).getKey()); + EXPECT_EQUAL(30, t.upperBound(20).getKey()); + EXPECT_EQUAL(30, t.upperBound(21).getKey()); + EXPECT_TRUE(!t.upperBound(30).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 980; i+=10) { + EXPECT_EQUAL(i + 1, t.upperBound(i).getKey()); + EXPECT_EQUAL(i + 11, t.upperBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.upperBound(990).valid()); +} + +void +Test::requireThatUpperBoundWorks() +{ + requireThatUpperBoundWorksT(); + requireThatUpperBoundWorksT(); +} + +struct UpdKeyComp { + int _remainder; + mutable size_t _numErrors; + UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {} + bool operator() (const int & lhs, const int & rhs) const { + if (lhs % 2 != _remainder) ++_numErrors; + if (rhs % 2 != _remainder) ++_numErrors; + return lhs < rhs; + } +}; + +void +Test::requireThatUpdateOfKeyWorks() +{ + typedef BTree UpdKeyTree; + typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + GenerationHandler g; + UpdKeyTree t; + UpdKeyComp cmp1(0); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1)); + } + EXPECT_EQUAL(0u, cmp1._numErrors); + for (int i = 0; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp1); + itr.writeKey(i + 1); + } + UpdKeyComp cmp2(1); + for (int i = 1; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp2); + EXPECT_TRUE(itr.valid()); + } + EXPECT_EQUAL(0u, cmp2._numErrors); +} + + +void +Test::requireThatSmallNodesWorks(void) +{ + typedef BTreeStore TreeStore; + GenerationHandler g; + TreeStore s; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 40, "fourty")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 20, "twenty")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 60, "sixty")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 50, "fifty")); + EXPECT_TRUE(!s.insert(root, 50, "fifty.not")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.insert(root, 1000 + i, "big")); + if (i > 0) { + EXPECT_TRUE(!s.insert(root, 1000 + i - 1, "big")); + } + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + EXPECT_TRUE(s.remove(root, 40)); + EXPECT_TRUE(!s.remove(root, 40)); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 20)); + EXPECT_TRUE(!s.remove(root, 20)); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 50)); + EXPECT_TRUE(!s.remove(root, 50)); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.remove(root, 1000 + i)); + if (i > 0) { + EXPECT_TRUE(!s.remove(root, 1000 + i - 1)); + } + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + + +void +Test::requireThatApplyWorks(void) +{ + typedef BTreeStore TreeStore; + typedef TreeStore::KeyType KeyType; + typedef TreeStore::KeyDataType KeyDataType; + GenerationHandler g; + TreeStore s; + std::vector additions; + std::vector removals; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(40, "fourty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(20, "twenty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(60, "sixty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(50, "fifty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(1000 + i, "big")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + + additions.clear(); + removals.clear(); + removals.push_back(40); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(20); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(50); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + removals.push_back(1000 +i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1000 + i, "big")); + removals.push_back(60); + removals.push_back(1002); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(20u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(19u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1100 + i, "big")); + for (uint32_t i = 0; i < 10; ++i) + removals.push_back(1000 + i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(30u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + +class MyTreeTestIterator : public MyTree::Iterator +{ +public: + MyTreeTestIterator(const MyTree::Iterator &rhs) + : MyTree::Iterator(rhs) + { + } + + int + getPathSize(void) const + { + return _pathSize; + } +}; + + +void +Test::requireThatIteratorDistanceWorks(int numEntries) +{ + GenerationHandler g; + MyTree tree; + typedef MyTree::Iterator Iterator; + for (int i = 0; i < numEntries; ++i) { + tree.insert(i, toStr(i)); + } + MyTreeTestIterator tit = tree.begin(); + LOG(info, + "numEntries=%d, iterator pathSize=%d", + numEntries, tit.getPathSize()); + Iterator it = tree.begin(); + for (int i = 0; i <= numEntries; ++i) { + Iterator iit = tree.lowerBound(i); + Iterator iitn = tree.lowerBound(i + 1); + Iterator iitu = tree.upperBound(i); + Iterator iitls = tree.begin(); + Iterator iitbs = tree.begin(); + Iterator iitlsp = tree.begin(); + Iterator iitbsp = tree.begin(); + Iterator iitlb(tree.getRoot(), tree.getAllocator()); + iitlb.lower_bound(i); + Iterator iitlb2(BTreeNode::Ref(), tree.getAllocator()); + iitlb2.lower_bound(tree.getRoot(), i); + if (i > 0) { + iitls.linearSeek(i); + iitbs.binarySeek(i); + ++it; + } + iitlsp.linearSeekPast(i); + iitbsp.binarySeekPast(i); + Iterator iitlsp2 = iitls; + Iterator iitbsp2 = iitbs; + Iterator iitnr = i < numEntries ? iitn : tree.begin(); + --iitnr; + if (i < numEntries) { + iitlsp2.linearSeekPast(i); + iitbsp2.binarySeekPast(i); + } + EXPECT_EQUAL(i, static_cast(iit.position())); + EXPECT_EQUAL(i < numEntries, iit.valid()); + EXPECT_TRUE(iit.identical(it)); + EXPECT_TRUE(iit.identical(iitls)); + EXPECT_TRUE(iit.identical(iitbs)); + EXPECT_TRUE(iit.identical(iitnr)); + EXPECT_TRUE(iit.identical(iitlb)); + EXPECT_TRUE(iit.identical(iitlb2)); + EXPECT_TRUE(iitn.identical(iitu)); + EXPECT_TRUE(iitn.identical(iitlsp)); + EXPECT_TRUE(iitn.identical(iitbsp)); + EXPECT_TRUE(iitn.identical(iitlsp2)); + EXPECT_TRUE(iitn.identical(iitbsp2)); + if (i < numEntries) { + EXPECT_EQUAL(i + 1, static_cast(iitn.position())); + EXPECT_EQUAL(i + 1 < numEntries, iitn.valid()); + } + for (int j = 0; j <= numEntries; ++j) { + Iterator jit = tree.lowerBound(j); + EXPECT_EQUAL(j, static_cast(jit.position())); + EXPECT_EQUAL(j < numEntries, jit.valid()); + EXPECT_EQUAL(i - j, iit - jit); + EXPECT_EQUAL(j - i, jit - iit); + + Iterator jit2 = jit; + jit2.setupEnd(); + EXPECT_EQUAL(numEntries - j, jit2 - jit); + EXPECT_EQUAL(numEntries - i, jit2 - iit); + EXPECT_EQUAL(j - numEntries, jit - jit2); + EXPECT_EQUAL(i - numEntries, iit - jit2); + } + } +} + + +void +Test::requireThatIteratorDistanceWorks() +{ + requireThatIteratorDistanceWorks(1); + requireThatIteratorDistanceWorks(3); + requireThatIteratorDistanceWorks(8); + requireThatIteratorDistanceWorks(20); + requireThatIteratorDistanceWorks(100); + requireThatIteratorDistanceWorks(400); +} + + +int +Test::Main() +{ + TEST_INIT("btree_test"); + + requireThatNodeInsertWorks(); + requireThatNodeSplitInsertWorks(); + requireThatNodeStealWorks(); + requireThatNodeRemoveWorks(); + requireThatNodeLowerBoundWorks(); + requireThatWeCanInsertAndRemoveFromTree(); + requireThatSortedTreeInsertWorks(); + requireThatCornerCaseTreeFindWorks(); + requireThatBasicTreeIteratorWorks(); + requireThatTreeIteratorSeekWorks(); + requireThatTreeIteratorAssignWorks(); + requireThatMemoryUsageIsCalculated(); + requireThatLowerBoundWorks(); + requireThatUpperBoundWorks(); + requireThatUpdateOfKeyWorks(); + requireThatSmallNodesWorks(); + requireThatApplyWorks(); + requireThatIteratorDistanceWorks(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); diff --git a/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp new file mode 100644 index 00000000000..817d024c60f --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp @@ -0,0 +1,513 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("frozenbtree_test"); +#define DEBUG_FROZENBTREE +#define LOG_FROZENBTREEXX +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::btree::BTreeRoot; +using search::btree::BTreeNode; +using search::btree::BTreeInternalNode; +using search::btree::BTreeLeafNode; +using search::btree::BTreeDefaultTraits; +using vespalib::GenerationHandler; + +namespace search { + + +class FrozenBTreeTest : public vespalib::TestApp +{ +public: + typedef int KeyType; +private: + std::vector _randomValues; + std::vector _sortedRandomValues; + +public: + typedef int DataType; + typedef BTreeRoot, + BTreeDefaultTraits> Tree; + typedef Tree::NodeAllocatorType NodeAllocator; + typedef Tree::InternalNodeType InternalNodeType; + typedef Tree::LeafNodeType LeafNodeType; + typedef Tree::Iterator Iterator; + typedef Tree::ConstIterator ConstIterator; +private: + GenerationHandler *_generationHandler; + NodeAllocator *_allocator; + Tree *_tree; + + Rand48 _randomGenerator; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count); + + void + insertRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector &values); + + void + removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector &values); + + void + lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values); + + void + lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values); + + void + lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values); + + void + sortRandomValues(void); + + void + traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector &sorted, + bool frozen); + + void + printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const; + + void + printEnumTree(const Tree *tree, + NodeAllocator &allocator); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + FrozenBTreeTest(void) + : vespalib::TestApp(), + _randomValues(), + _sortedRandomValues(), + _generationHandler(NULL), + _allocator(NULL), + _tree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +FrozenBTreeTest::allocTree(void) +{ + assert(_generationHandler == NULL); + assert(_allocator == NULL); + assert(_tree == NULL); + _generationHandler = new GenerationHandler; + _allocator = new NodeAllocator(); + _tree = new Tree; +} + + +void +FrozenBTreeTest::freeTree(bool verbose) +{ +#if 0 + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %" PRIu32 " leaves", + static_cast(_intTree->getUsedMemory()), + static_cast(_intTree->getHeldMemory()), + _intTree->validLeaves()); + _intTree->clear(); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast(_intTree->getUsedMemory()), + static_cast(_intTree->getHeldMemory())); + _intTree->dropFrozen(); + _intTree->removeOldGenerations(_intTree->getGeneration() + 1); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast(_intTree->getUsedMemory()), + static_cast(_intTree->getHeldMemory())); + if (verbose) + LOG(info, + "%d+%d leftover tree nodes", + _intTree->getNumInternalNodes(), + _intTree->getNumLeafNodes()); + EXPECT_TRUE(_intTree->getNumInternalNodes() == 0 && + _intTree->getNumLeafNodes() == 0); + delete _intTree; + _intTree = NULL; + delete _intKeyStore; + _intKeyStore = NULL; +#endif + (void) verbose; + _tree->clear(*_allocator); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + _generationHandler->incGeneration(); + _allocator->trimHoldLists(_generationHandler->getFirstUsedGeneration()); + delete _tree; + _tree = NULL; + delete _allocator; + _allocator = NULL; + delete _generationHandler; + _generationHandler = NULL; +} + + +void +FrozenBTreeTest::fillRandomValues(unsigned int count) +{ + unsigned int i; + + LOG(info, + "Filling %u random values", count); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i &values) +{ + std::vector::const_iterator i(values.begin()); + std::vector::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "insertRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before insert", *i); +#endif + p = tree.find(*i, allocator); + if (!p.valid()) { + DataType val = *i + 42; + if (tree.insert(*i, val, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); +#ifdef DEBUG_FROZENBTREEX + printEnumTree(&tree); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "insertRandomValues done"); +} + + +void +FrozenBTreeTest:: +removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector & values) +{ + std::vector::const_iterator i(values.begin()); + std::vector::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "removeRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before remove", *i); +#endif + p = tree.find(*i, allocator); + if (p.valid()) { + if (tree.remove(*i, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(!p.valid()); +#ifdef DEBUG_FROZENBTREEX + tree.printTree(); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "removeRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values) +{ + std::vector::const_iterator i(values.begin()); + std::vector::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(p.valid() && p.getKey() == *i); + } + LOG(info, "lookupRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values) +{ + std::vector::const_iterator i(values.begin()); + std::vector::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupGoneRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(!p.valid()); + } + LOG(info, "lookupGoneRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector &values) +{ + std::vector::const_iterator i(values.begin()); + std::vector::const_iterator ie(values.end()); + ConstIterator p; + + LOG(info, "lookupFrozenRandomValues start"); + for (; i != ie; ++i) { + p = tree.getFrozenView(allocator).find(*i, std::less()); + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); + } + LOG(info, "lookupFrozenRandomValues done"); +} + + +void +FrozenBTreeTest::sortRandomValues(void) +{ + std::vector::iterator i; + std::vector::iterator ie; + uint32_t okcnt; + int prevVal; + std::vector sorted; + + LOG(info, "sortRandomValues start"); + sorted = _randomValues; + std::sort(sorted.begin(), sorted.end()); + _sortedRandomValues.clear(); + _sortedRandomValues.reserve(sorted.size()); + + okcnt = 0; + prevVal = 0; + ie = sorted.end(); + for (i = sorted.begin(); i != ie; ++i) { + if (i == _sortedRandomValues.begin() || *i > prevVal) { + okcnt++; + _sortedRandomValues.push_back(*i); + } else if (*i == prevVal) + okcnt++; + else + abort(); + prevVal = *i; + } + EXPECT_TRUE(okcnt == sorted.size()); + LOG(info, "sortRandomValues done"); +} + + +void +FrozenBTreeTest:: +traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector &sorted, + bool frozen) +{ + LOG(info, + "traverseTreeIterator %s start", + frozenName(frozen)); + + std::vector::const_iterator i; + + i = sorted.begin(); + if (frozen) { + ConstIterator ai; + ai = tree.getFrozenView(allocator).begin(); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } else { + Iterator ai; + ai = tree.begin(allocator); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } + + + ASSERT_TRUE(i == sorted.end()); + + LOG(info, + "traverseTreeIterator %s done", + frozenName(frozen)); +} + + +void +FrozenBTreeTest:: +printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const +{ + // typedef BTreeNode Node; + typedef LeafNodeType LeafNode; + typedef InternalNodeType InternalNode; + BTreeNode::Ref subNode; + unsigned int i; + + if (allocator.isLeafRef(node)) { + const LeafNode *lnode = allocator.mapLeafRef(node); + printf("%*s LeafNode %s valid=%d\n", + indent, "", + lnode->getFrozen() ? "frozen" : "thawed", + lnode->validSlots()); + for (i = 0; i < lnode->validSlots(); i++) { + + KeyType k = lnode->getKey(i); + DataType d = lnode->getData(i); + printf("leaf value %3d %d %d\n", + (int) i, + (int) k, + (int) d); + } + return; + } + const InternalNode *inode = allocator.mapInternalRef(node); + printf("%*s IntermediteNode %s valid=%d\n", + indent, "", + inode->getFrozen() ? "frozen" : "thawed", + inode->validSlots()); + for (i = 0; i < inode->validSlots(); i++) { + subNode = inode->getChild(i); + assert(subNode != BTreeNode::Ref()); + printSubEnumTree(subNode, allocator, indent + 4); + } +} + + +void +FrozenBTreeTest::printEnumTree(const Tree *tree, + NodeAllocator &allocator) +{ + printf("Tree Dump start\n"); + if (!NodeAllocator::isValidRef(tree->getRoot())) { + printf("EMPTY\n"); + } else { + printSubEnumTree(tree->getRoot(), allocator, 0); + } + printf("Tree Dump done\n"); +} + + + +int +FrozenBTreeTest::Main() +{ + TEST_INIT("frozenbtree_test"); + + fillRandomValues(1000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + lookupRandomValues(*_tree, *_allocator, _randomValues); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + lookupFrozenRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + removeRandomValues(*_tree, *_allocator, _randomValues); + lookupGoneRandomValues(*_tree, *_allocator, _randomValues); + lookupFrozenRandomValues(*_tree, *_allocator,_randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + insertRandomValues(*_tree, *_allocator, _randomValues); + freeTree(true); + + fillRandomValues(1000000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + freeTree(false); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::FrozenBTreeTest); diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore new file mode 100644 index 00000000000..3ad290f1731 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore @@ -0,0 +1 @@ +searchlib_compact_document_words_store_test_app diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt new file mode 100644 index 00000000000..666639f20ba --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_compact_document_words_store_test_app + SOURCES + compact_document_words_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_compact_document_words_store_test_app COMMAND searchlib_compact_document_words_store_test_app) diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/DESC b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC new file mode 100644 index 00000000000..ee9c4b346a2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC @@ -0,0 +1 @@ +compact_document_words_store test. Take a look at compact_document_words_store_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/FILES b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES new file mode 100644 index 00000000000..fb2fb1d637b --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES @@ -0,0 +1 @@ +compact_document_words_store_test.cpp diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp new file mode 100644 index 00000000000..2a3bffb2fe6 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".memoryindex.compact_document_words_store_test"); +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::btree; +using namespace search::memoryindex; + +typedef CompactDocumentWordsStore::Builder Builder; +typedef CompactDocumentWordsStore::Iterator Iterator; +typedef Builder::WordRefVector WordRefVector; + +const EntryRef w1(1); +const EntryRef w2(2); +const EntryRef w3(3); +const EntryRef w4(4); +const uint32_t d1(111); +const uint32_t d2(222); +const uint32_t d3(333); +const uint32_t d4(444); + +WordRefVector +build(Iterator itr) +{ + WordRefVector words; + for (; itr.valid(); ++itr) { + words.push_back(itr.wordRef()); + } + return words; +} + +vespalib::string +toStr(Iterator itr) +{ + WordRefVector words = build(itr); + std::ostringstream oss; + oss << "["; + bool firstWord = true; + for (auto word : words) { + if (!firstWord) oss << ","; + oss << word.ref(); + firstWord = false; + } + oss << "]"; + return oss.str(); +} + +struct SingleFixture +{ + CompactDocumentWordsStore _store; + SingleFixture() : _store() { + _store.insert(Builder(d1).insert(w1).insert(w2).insert(w3)); + } +}; + +struct MultiFixture +{ + CompactDocumentWordsStore _store; + MultiFixture() : _store() { + _store.insert(Builder(d1).insert(w1)); + _store.insert(Builder(d2).insert(w2)); + _store.insert(Builder(d3).insert(w3)); + } +}; + + +TEST_F("require that fields and words can be added for a document", SingleFixture) +{ + EXPECT_EQUAL("[1,2,3]", toStr(f._store.get(d1))); +} + +TEST_F("require that multiple documents can be added", MultiFixture) +{ + EXPECT_EQUAL("[1]", toStr(f._store.get(d1))); + EXPECT_EQUAL("[2]", toStr(f._store.get(d2))); + EXPECT_EQUAL("[3]", toStr(f._store.get(d3))); + EXPECT_FALSE(f._store.get(d4).valid()); +} + +TEST_F("require that documents can be removed", MultiFixture) +{ + f._store.remove(d2); + EXPECT_TRUE(f._store.get(d1).valid()); + EXPECT_FALSE(f._store.get(d2).valid()); + EXPECT_TRUE(f._store.get(d3).valid()); +} + +TEST_F("require that documents can be removed and re-inserted", MultiFixture) +{ + f._store.remove(d2); + f._store.insert(Builder(d2).insert(w4)); + EXPECT_EQUAL("[4]", toStr(f._store.get(d2))); +} + +TEST("require that a lot of words can be inserted, retrieved and removed") +{ + CompactDocumentWordsStore store; + for (uint32_t docId = 0; docId < 50; ++docId) { + Builder b(docId); + for (uint32_t wordRef = 0; wordRef < 20000; ++wordRef) { + b.insert(wordRef); + } + store.insert(b); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (insert): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } + for (uint32_t docId = 0; docId < 50; ++docId) { + WordRefVector words = build(store.get(docId)); + EXPECT_EQUAL(20000u, words.size()); + uint32_t wordRef = 0; + for (auto word : words) { + EXPECT_EQUAL(wordRef++, word.ref()); + } + store.remove(docId); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (remove): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } +} + +TEST("require that initial memory usage is reported") +{ + CompactDocumentWordsStore store; + CompactDocumentWordsStore::DocumentWordsMap docs; + CompactDocumentWordsStore::Store internalStore; + MemoryUsage initExp; + initExp.incAllocatedBytes(docs.getMemoryConsumption()); + initExp.incUsedBytes(docs.getMemoryUsed()); + initExp.merge(internalStore.getMemoryUsage()); + MemoryUsage init = store.getMemoryUsage(); + EXPECT_EQUAL(initExp.allocatedBytes(), init.allocatedBytes()); + EXPECT_EQUAL(initExp.usedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), 0u); + EXPECT_GREATER(init.usedBytes(), 0u); +} + +TEST("require that memory usage is updated after insert") +{ + CompactDocumentWordsStore store; + MemoryUsage init = store.getMemoryUsage(); + + store.insert(Builder(d1).insert(w1)); + MemoryUsage after = store.getMemoryUsage(); + EXPECT_GREATER_EQUAL(after.allocatedBytes(), init.allocatedBytes()); + EXPECT_GREATER(after.usedBytes(), init.usedBytes()); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } + diff --git a/searchlib/src/tests/memoryindex/datastore/.gitignore b/searchlib/src/tests/memoryindex/datastore/.gitignore new file mode 100644 index 00000000000..98f4acc70a8 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +datastore_test +featurestore_test +wordstore_test +searchlib_datastore_test_app +searchlib_featurestore_test_app +searchlib_wordstore_test_app diff --git a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt new file mode 100644 index 00000000000..da45288fe5e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_datastore_test_app + SOURCES + datastore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_datastore_test_app COMMAND searchlib_datastore_test_app) +vespa_add_executable(searchlib_featurestore_test_app + SOURCES + featurestore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurestore_test_app COMMAND searchlib_featurestore_test_app) +vespa_add_executable(searchlib_wordstore_test_app + SOURCES + wordstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_wordstore_test_app COMMAND searchlib_wordstore_test_app) diff --git a/searchlib/src/tests/memoryindex/datastore/DESC b/searchlib/src/tests/memoryindex/datastore/DESC new file mode 100644 index 00000000000..56725396b65 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/DESC @@ -0,0 +1 @@ +datastore test. Take a look at datastore_test.cpp and wordstore_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/datastore/FILES b/searchlib/src/tests/memoryindex/datastore/FILES new file mode 100644 index 00000000000..6cbbaf6a328 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/FILES @@ -0,0 +1,2 @@ +datastore_test.cpp +wordstore_test.cpp diff --git a/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp new file mode 100644 index 00000000000..be55dd7ee1e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("datastore_test"); +#include +#include +#include + +namespace search { +namespace btree { + +class MyStore : public DataStore > { +private: + typedef DataStore > ParentType; + using ParentType::_buffers; + using ParentType::_states; + using ParentType::_activeBufferIds; +public: + MyStore() {} + + void + holdBuffer(uint32_t bufferId) + { + ParentType::holdBuffer(bufferId); + } + + void + holdElem(EntryRef ref, uint64_t len) + { + ParentType::holdElem(ref, len); + } + + void + transferHoldLists(generation_t generation) + { + ParentType::transferHoldLists(generation); + } + + void trimElemHoldList(generation_t usedGen) { + ParentType::trimElemHoldList(usedGen); + } + void incDead(EntryRef ref, uint64_t dead) { + ParentType::incDead(ref, dead); + } + void ensureBufferCapacity(size_t sizeNeeded) { + ParentType::ensureBufferCapacity(0, sizeNeeded); + } + void enableFreeLists() { + ParentType::enableFreeLists(); + } + + void + switchActiveBuffer(void) + { + ParentType::switchActiveBuffer(0, 0u); + } + std::vector & buffers() { return _buffers; } + std::vector &statesVec() { return _states; } + size_t activeBufferId() const { return _activeBufferIds[0]; } +}; + +typedef MyStore::RefType MyRef; + +class Test : public vespalib::TestApp { +private: + bool assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act); + void requireThatEntryRefIsWorking(); + void requireThatAlignedEntryRefIsWorking(); + void requireThatEntriesCanBeAddedAndRetrieved(); + void requireThatAddEntryTriggersChangeOfBuffer(); + void requireThatWeCanHoldAndTrimBuffers(); + void requireThatWeCanHoldAndTrimElements(); + void requireThatWeCanUseFreeLists(); + void requireThatMemoryStatsAreCalculated(); + void requireThatMemoryUsageIsCalculated(); + + void + requireThatWecanDisableElemHoldList(void); +public: + int Main(); +}; + +bool +Test::assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act) +{ + if (!EXPECT_EQUAL(exp._allocElems, act._allocElems)) return false; + if (!EXPECT_EQUAL(exp._usedElems, act._usedElems)) return false; + if (!EXPECT_EQUAL(exp._deadElems, act._deadElems)) return false; + if (!EXPECT_EQUAL(exp._holdElems, act._holdElems)) return false; + if (!EXPECT_EQUAL(exp._freeBuffers, act._freeBuffers)) return false; + if (!EXPECT_EQUAL(exp._activeBuffers, act._activeBuffers)) return false; + if (!EXPECT_EQUAL(exp._holdBuffers, act._holdBuffers)) return false; + return true; +} + +void +Test::requireThatEntryRefIsWorking() +{ + typedef EntryRefT<22> MyRefType; + EXPECT_EQUAL(4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(237u, r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(4194303, 1023); + EXPECT_EQUAL(4194303u, r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } + { + MyRefType r1(6498, 76); + MyRefType r2(r1); + EXPECT_EQUAL(r1.offset(), r2.offset()); + EXPECT_EQUAL(r1.bufferId(), r2.bufferId()); + } +} + +void +Test::requireThatAlignedEntryRefIsWorking() +{ + typedef AlignedEntryRefT<22, 2> MyRefType; // 4 byte alignement + EXPECT_EQUAL(4 * 4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + EXPECT_EQUAL(0u, MyRefType::align(0)); + EXPECT_EQUAL(4u, MyRefType::align(1)); + EXPECT_EQUAL(4u, MyRefType::align(2)); + EXPECT_EQUAL(4u, MyRefType::align(3)); + EXPECT_EQUAL(4u, MyRefType::align(4)); + EXPECT_EQUAL(8u, MyRefType::align(5)); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(MyRefType::align(237), r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(MyRefType::offsetSize() - 4, 1023); + EXPECT_EQUAL(MyRefType::align(MyRefType::offsetSize() - 4), r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } +} + +void +Test::requireThatEntriesCanBeAddedAndRetrieved() +{ + typedef DataStore IntStore; + IntStore ds; + EntryRef r1 = ds.addEntry(10); + EntryRef r2 = ds.addEntry(20); + EntryRef r3 = ds.addEntry(30); + EXPECT_EQUAL(1u, IntStore::RefType(r1).offset()); + EXPECT_EQUAL(2u, IntStore::RefType(r2).offset()); + EXPECT_EQUAL(3u, IntStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, IntStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r3).bufferId()); + EXPECT_EQUAL(10, ds.getEntry(r1)); + EXPECT_EQUAL(20, ds.getEntry(r2)); + EXPECT_EQUAL(30, ds.getEntry(r3)); +} + +void +Test::requireThatAddEntryTriggersChangeOfBuffer() +{ + typedef DataStore > Store; + Store s; + uint64_t num = 0; + uint32_t lastId = 0; + uint64_t lastNum = 0; + for (;;++num) { + EntryRef r = s.addEntry(num); + EXPECT_EQUAL(num, s.getEntry(r)); + uint32_t bufferId = Store::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, "Changed to bufferId %u after %" PRIu64 " nums", bufferId, num); + EXPECT_EQUAL(Store::RefType::offsetSize() - (lastId == 0), + num - lastNum); + lastId = bufferId; + lastNum = num; + } + if (bufferId == 2) { + break; + } + } + EXPECT_EQUAL(Store::RefType::offsetSize() * 2 - 1, num); + LOG(info, "Added %" PRIu64 " nums in 2 buffers", num); +} + +void +Test::requireThatWeCanHoldAndTrimBuffers() +{ + MyStore s; + EXPECT_EQUAL(0u, MyRef(s.addEntry(1)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(1u, s.activeBufferId()); + s.holdBuffer(0); // hold last buffer + s.transferHoldLists(10); + + EXPECT_EQUAL(1u, MyRef(s.addEntry(2)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(2u, s.activeBufferId()); + s.holdBuffer(1); // hold last buffer + s.transferHoldLists(20); + + EXPECT_EQUAL(2u, MyRef(s.addEntry(3)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(3u, s.activeBufferId()); + s.holdBuffer(2); // hold last buffer + s.transferHoldLists(30); + + EXPECT_EQUAL(3u, MyRef(s.addEntry(4)).bufferId()); + s.holdBuffer(3); // hold current buffer + s.transferHoldLists(40); + + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + s.trimHoldLists(11); + EXPECT_TRUE(s.statesVec()[0].size() == 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + + s.switchActiveBuffer(); + EXPECT_EQUAL(0u, s.activeBufferId()); + EXPECT_EQUAL(0u, MyRef(s.addEntry(5)).bufferId()); + s.trimHoldLists(41); + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() == 0); + EXPECT_TRUE(s.statesVec()[2].size() == 0); + EXPECT_TRUE(s.statesVec()[3].size() == 0); +} + +void +Test::requireThatWeCanHoldAndTrimElements() +{ + MyStore s; + MyRef r1 = s.addEntry(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + MyRef r3 = s.addEntry(3); + s.holdElem(r3, 1); + s.transferHoldLists(30); + EXPECT_EQUAL(1, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(11); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(31); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(0, s.getEntry(r2)); + EXPECT_EQUAL(0, s.getEntry(r3)); +} + +void +Test::requireThatWeCanUseFreeLists() +{ + MyStore s; + s.enableFreeLists(); + MyRef r1 = s.addEntry2(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry2(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + s.trimElemHoldList(11); + MyRef r3 = s.addEntry2(3); // reuse r1 + EXPECT_EQUAL(r1.offset(), r3.offset()); + EXPECT_EQUAL(r1.bufferId(), r3.bufferId()); + MyRef r4 = s.addEntry2(4); + EXPECT_EQUAL(r2.offset() + 1, r4.offset()); + s.trimElemHoldList(21); + MyRef r5 = s.addEntry2(5); // reuse r2 + EXPECT_EQUAL(r2.offset(), r5.offset()); + EXPECT_EQUAL(r2.bufferId(), r5.bufferId()); + MyRef r6 = s.addEntry2(6); + EXPECT_EQUAL(r4.offset() + 1, r6.offset()); + EXPECT_EQUAL(3, s.getEntry(r1)); + EXPECT_EQUAL(5, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + EXPECT_EQUAL(4, s.getEntry(r4)); + EXPECT_EQUAL(5, s.getEntry(r5)); + EXPECT_EQUAL(6, s.getEntry(r6)); +} + +void +Test::requireThatMemoryStatsAreCalculated() +{ + MyStore s; + DataStoreBase::MemStats m; + m._allocElems = MyRef::offsetSize(); + m._usedElems = 1; // ref = 0 is reserved + m._deadElems = 1; // ref = 0 is reserved + m._holdElems = 0; + m._activeBuffers = 1; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // add entry + MyRef r = s.addEntry(10); + m._usedElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // inc dead + s.incDead(r, 1); + m._deadElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // hold buffer + s.addEntry(20); + s.addEntry(30); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + m._usedElems += 2; + m._holdElems += 2; // used - dead + m._activeBuffers--; + m._holdBuffers++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // new active buffer + s.switchActiveBuffer(); + s.addEntry(40); + m._allocElems += MyRef::offsetSize(); + m._usedElems++; + m._activeBuffers++; + m._freeBuffers--; + + // trim hold buffer + s.trimHoldLists(101); + m._allocElems -= MyRef::offsetSize(); + m._usedElems = 1; + m._deadElems = 0; + m._holdElems = 0; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + MyStore s; + MyRef r = s.addEntry(10); + s.addEntry(20); + s.addEntry(30); + s.addEntry(40); + s.incDead(r, 1); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(5 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(3 * sizeof(int), m.allocatedBytesOnHold()); + s.trimHoldLists(101); +} + + +void +Test::requireThatWecanDisableElemHoldList(void) +{ + MyStore s; + MyRef r1 = s.addEntry(10); + MyRef r2 = s.addEntry(20); + MyRef r3 = s.addEntry(30); + (void) r3; + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(0 * sizeof(int), m.allocatedBytesOnHold()); + s.holdElem(r1, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.disableElemHoldList(); + s.holdElem(r2, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.transferHoldLists(100); + s.trimHoldLists(101); +} + +int +Test::Main() +{ + TEST_INIT("datastore_test"); + + requireThatEntryRefIsWorking(); + requireThatAlignedEntryRefIsWorking(); + requireThatEntriesCanBeAddedAndRetrieved(); + requireThatAddEntryTriggersChangeOfBuffer(); + requireThatWeCanHoldAndTrimBuffers(); + requireThatWeCanHoldAndTrimElements(); + requireThatWeCanUseFreeLists(); + requireThatMemoryStatsAreCalculated(); + requireThatMemoryUsageIsCalculated(); + requireThatWecanDisableElemHoldList(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); + diff --git a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp new file mode 100644 index 00000000000..87d32c90b78 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("featurestore_test"); +#include +#include + +using namespace search::btree; +using namespace search::index; + +namespace search +{ + + +namespace memoryindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + + const Schema & + getSchema(void) const + { + return _schema; + } + + bool + assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act); + + void + requireThatFeaturesCanBeAddedAndRetrieved(void); + + void + requireThatNextWordsAreWorking(void); + void + requireThatAddFeaturesTriggersChangeOfBuffer(void); + +public: + Test(void); + + int + Main(void); +}; + + +bool +Test::assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act) +{ + // docid is not encoded as part of features + if (!EXPECT_EQUAL(exp._elements.size(), + act._elements.size())) + return false; + for (size_t i = 0; i < exp._elements.size(); ++i) { + if (!EXPECT_EQUAL(exp._elements[i]._elementId, + act._elements[i]._elementId)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._numOccs, + act._elements[i]._numOccs)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._elementLen, + act._elements[i]._elementLen)) + return false; + } + if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + return false; + for (size_t i = 0; i < exp._wordPositions.size(); ++i) { + if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, + act._wordPositions[i]._wordPos)) return false; + } + return true; +} + + +DocIdAndFeatures +getFeatures(uint32_t numOccs, + int32_t weight, + uint32_t elemLen) +{ + DocIdAndFeatures f; + f._docId = 0; + f._elements.push_back(WordDocElementFeatures(0)); + f._elements.back().setNumOccs(numOccs); + f._elements.back().setWeight(weight); + f._elements.back().setElementLen(elemLen); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } + return f; +} + + +void +Test::requireThatFeaturesCanBeAddedAndRetrieved(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatNextWordsAreWorking(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatAddFeaturesTriggersChangeOfBuffer(void) +{ + FeatureStore fs(getSchema()); + size_t cnt = 1; + DocIdAndFeatures act; + uint32_t lastId = 0; + for (;;++cnt) { + uint32_t numOccs = (cnt % 100) + 1; + DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1); + std::pair r = fs.addFeatures(0, f); + fs.getFeatures(0, r.first, act); + EXPECT_TRUE(assertFeatures(f, act)); + uint32_t bufferId = FeatureStore::RefType(r.first).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu feature sets", + bufferId, cnt); + lastId = bufferId; + } + if (bufferId == 1) { + break; + } + } + EXPECT_EQUAL(1u, lastId); + LOG(info, "Added %zu feature sets in 1 buffer", cnt); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", + Schema::STRING, + Schema::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("featurestore_test"); + + requireThatFeaturesCanBeAddedAndRetrieved(); + requireThatNextWordsAreWorking(); + requireThatAddFeaturesTriggersChangeOfBuffer(); + + TEST_DONE(); +} + + +} + + +} + + +TEST_APPHOOK(search::memoryindex::Test); diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp new file mode 100644 index 00000000000..825992b3b4f --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("wordstore_test"); +#include +#include + +using namespace search::btree; + +namespace search { +namespace memoryindex { + +class Test : public vespalib::TestApp { +private: + void requireThatWordsCanBeAddedAndRetrieved(); + void requireThatAddWordTriggersChangeOfBuffer(); +public: + int Main(); +}; + +void +Test::requireThatWordsCanBeAddedAndRetrieved() +{ + std::string w1 = "require"; + std::string w2 = "that"; + std::string w3 = "words"; + WordStore ws; + EntryRef r1 = ws.addWord(w1); + EntryRef r2 = ws.addWord(w2); + EntryRef r3 = ws.addWord(w3); + uint32_t invp = WordStore::RefType::align(1); // Reserved as invalid + uint32_t w1s = w1.size() + 1; + uint32_t w1p = WordStore::RefType::pad(w1s); + uint32_t w2s = w2.size() + 1; + uint32_t w2p = WordStore::RefType::pad(w2s); + EXPECT_EQUAL(invp, WordStore::RefType(r1).offset()); + EXPECT_EQUAL(invp + w1s + w1p, WordStore::RefType(r2).offset()); + EXPECT_EQUAL(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, WordStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r3).bufferId()); + EXPECT_EQUAL(std::string("require"), ws.getWord(r1)); + EXPECT_EQUAL(std::string("that"), ws.getWord(r2)); + EXPECT_EQUAL(std::string("words"), ws.getWord(r3)); +} + +void +Test::requireThatAddWordTriggersChangeOfBuffer() +{ + WordStore ws; + size_t word = 0; + uint32_t lastId = 0; + size_t lastWord = 0; + char wordStr[10]; + size_t entrySize = WordStore::RefType::align(6 + 1); + size_t initBufferSpace = 1024u * WordStore::RefType::align(1); + size_t bufferSpace = initBufferSpace; + size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) / + entrySize; + size_t usedSpace = 0; + size_t sumBufferWords = 0; + for (;;++word) { + sprintf(wordStr, "%6zu", word); + // all words uses 12 bytes (include padding) + EntryRef r = ws.addWord(std::string(wordStr)); + EXPECT_EQUAL(std::string(wordStr), ws.getWord(r)); + uint32_t bufferId = WordStore::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu words", + bufferId, word); + EXPECT_EQUAL(bufferWords, word - lastWord); + lastId = bufferId; + lastWord = word; + usedSpace += bufferWords * entrySize; + sumBufferWords += bufferWords; + bufferSpace = usedSpace + initBufferSpace; + bufferWords = bufferSpace / entrySize; + } + if (bufferId == 4) { + break; + } + } + // each buffer can have offsetSize / 12 words + EXPECT_EQUAL(sumBufferWords, word); + LOG(info, "Added %zu words in 4 buffers", word); +} + +int +Test::Main() +{ + TEST_INIT("wordstore_test"); + + requireThatWordsCanBeAddedAndRetrieved(); + requireThatAddWordTriggersChangeOfBuffer(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::memoryindex::Test); + diff --git a/searchlib/src/tests/memoryindex/dictionary/.gitignore b/searchlib/src/tests/memoryindex/dictionary/.gitignore new file mode 100644 index 00000000000..d404d7d7063 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +dictionary_test +dump +/urldump +searchlib_dictionary_test_app diff --git a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt new file mode 100644 index 00000000000..9520b37d267 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_dictionary_test_app + SOURCES + dictionary_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_dictionary_test_app COMMAND searchlib_dictionary_test_app) diff --git a/searchlib/src/tests/memoryindex/dictionary/DESC b/searchlib/src/tests/memoryindex/dictionary/DESC new file mode 100644 index 00000000000..ff559f42641 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/DESC @@ -0,0 +1 @@ +dictionary test. Take a look at dictionary_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/dictionary/FILES b/searchlib/src/tests/memoryindex/dictionary/FILES new file mode 100644 index 00000000000..1f3a8ebef87 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/FILES @@ -0,0 +1 @@ +dictionary_test.cpp diff --git a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp new file mode 100644 index 00000000000..ef8383b23c7 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp @@ -0,0 +1,1528 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + +/* $Id$ + * + * Copyright (C) 2011 Yahoo! Technologies Norway AS + * + * All Rights Reserved + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP("dictionary_test"); + +namespace search +{ + +using namespace btree; +using namespace fef; +using namespace index; +using queryeval::SearchIterator; +using document::Document; +using diskindex::CheckPointFile; +using vespalib::GenerationHandler; +using test::InitRangeVerifier; + +namespace memoryindex +{ + +typedef Dictionary::PostingList PostingList; +typedef PostingList::Iterator PostingItr; +typedef PostingList::ConstIterator PostingConstItr; + +class MyBuilder : public IndexBuilder { +private: + std::stringstream _ss; + bool _insideWord; + bool _insideField; + bool _insideDoc; + bool _insideElem; + bool _firstWord; + bool _firstField; + bool _firstDoc; + bool _firstElem; + bool _firstPos; +public: + + MyBuilder(const Schema &schema) + : IndexBuilder(schema), + _ss(), + _insideWord(false), + _insideField(false), + _insideDoc(false), + _insideElem(false), + _firstWord(true), + _firstField(true), + _firstDoc(true), + _firstElem(true), + _firstPos(true) + { + } + + virtual void + startWord(const vespalib::stringref &word) + { + assert(_insideField); + assert(!_insideWord); + if (!_firstWord) + _ss << ","; + _ss << "w=" << word << "["; + _firstDoc = true; + _insideWord = true; + } + + virtual void + endWord(void) + { + assert(_insideWord); + assert(!_insideDoc); + _ss << "]"; + _firstWord = false; + _insideWord = false; + } + + virtual void + startField(uint32_t fieldId) + { + assert(!_insideField); + if (!_firstField) _ss << ","; + _ss << "f=" << fieldId << "["; + _firstWord = true; + _insideField = true; + } + + virtual void + endField() + { + assert(_insideField); + assert(!_insideWord); + _ss << "]"; + _firstField = false; + _insideField = false; + } + + virtual void + startDocument(uint32_t docId) + { + assert(_insideWord); + assert(!_insideDoc); + if (!_firstDoc) _ss << ","; + _ss << "d=" << docId << "["; + _firstElem = true; + _insideDoc = true; + } + + virtual void + endDocument(void) + { + assert(_insideDoc); + assert(!_insideElem); + _ss << "]"; + _firstDoc = false; + _insideDoc = false; + } + + virtual void + startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) + { + assert(_insideDoc); + assert(!_insideElem); + if (!_firstElem) + _ss << ","; + _ss << "e=" << elementId << + ",w=" << weight << ",l=" << elementLen << "["; + _firstPos = true; + _insideElem = true; + } + + virtual void + endElement(void) + { + assert(_insideElem); + _ss << "]"; + _firstElem = false; + _insideElem = false; + } + + virtual void + addOcc(const WordDocElementWordPosFeatures &features) + { + assert(_insideElem); + if (!_firstPos) _ss << ","; + _ss << features.getWordPos(); + _firstPos = false; + } + + std::string + toStr(void) const + { + return _ss.str(); + } +}; + +std::string +toString(FieldPositionsIterator posItr, + bool hasElements = false, + bool hasWeights = false) +{ + std::stringstream ss; + ss << "{"; + ss << posItr.getFieldLength() << ":"; + bool first = true; + for (; posItr.valid(); posItr.next()) { + if (!first) ss << ","; + ss << posItr.getPosition(); + first = false; + if (hasElements) { + ss << "[e=" << posItr.getElementId(); + if (hasWeights) + ss << ",w=" << posItr.getElementWeight(); + ss << ",l=" << posItr.getElementLen() << "]"; + } + } + ss << "}"; + return ss.str(); +} + +bool +assertPostingList(const std::string &exp, + PostingConstItr itr, + const FeatureStore *store = NULL) +{ + std::stringstream ss; + FeatureStore::DecodeContextCooked decoder(NULL); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + ss << "["; + for (size_t i = 0; itr.valid(); ++itr, ++i) { + if (i > 0) ss << ","; + uint32_t docId = itr.getKey(); + ss << docId; + if (store != NULL) { // consider features as well + EntryRef ref(itr.getData()); + store->setupForField(0, decoder); + store->setupForUnpackFeatures(ref, decoder); + decoder.unpackFeatures(matchData, docId); + ss << toString(tfmd.getIterator()); + } + } + ss << "]"; + return EXPECT_EQUAL(exp, ss.str()); +} + +bool +assertPostingList(std::vector &exp, PostingConstItr itr) +{ + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < exp.size(); ++i) { + if (i > 0) ss << ","; + ss << exp[i]; + } + ss << "]"; + return assertPostingList(ss.str(), itr); +} + + +namespace +{ + +/** + * MockDictionary is a simple mockup of memory index, used to verify + * that we get correct posting lists from real memory index. + */ +class MockDictionary +{ + std::map, std::set> _dict; + vespalib::string _word; + uint32_t _fieldId; + +public: + void + setNextWord(const vespalib::string &word) + { + _word = word; + } + + void + setNextField(uint32_t fieldId) + { + _fieldId = fieldId; + } + + void + add(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].insert(docId); + } + + void + remove(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].erase(docId); + } + + std::vector + find(const vespalib::string &word, uint32_t fieldId) + { + std::vector res; + for (auto docId : _dict[std::make_pair(word, fieldId)] ) { + res.push_back(docId); + } + return res; + } + + auto begin() + { + return _dict.begin(); + } + + auto end() + { + return _dict.end(); + } +}; + + +/** + * MockWordStoreScan is a helper class to ensure that previous word is + * still stored safely in memory, to satisfy OrderedDocumentInserter + * needs. + */ +class MockWordStoreScan +{ + vespalib::string _word0; + vespalib::string _word1; + vespalib::string *_prevWord; + vespalib::string *_word; + +public: + MockWordStoreScan() + : _word0(), + _word1(), + _prevWord(&_word0), + _word(&_word1) + { + } + + const vespalib::string & + getWord() const + { + return *_word; + } + + const vespalib::string & + setWord(const vespalib::string &word) + { + std::swap(_prevWord, _word); + *_word = word; + return *_word; + } +}; + +/** + * MyInserter performs insertions on both a mockup version of memory index + * and a real memory index. Mockup version is used to calculate expected + * answers. + */ +class MyInserter +{ + MockWordStoreScan _wordStoreScan; + MockDictionary _mock; + Dictionary _d; + DocIdAndPosOccFeatures _features; + IOrderedDocumentInserter *_documentInserter; + +public: + MyInserter(const Schema &schema) + : _wordStoreScan(), + _mock(), + _d(schema), + _features(), + _documentInserter(nullptr) + { + _features.addNextOcc(0, 0, 1, 1); + } + + void + setNextWord(const vespalib::string &word) + { + const vespalib::string &w = _wordStoreScan.setWord(word); + _documentInserter->setNextWord(w); + _mock.setNextWord(w); + } + + void + setNextField(uint32_t fieldId) + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + _documentInserter = &_d.getFieldIndex(fieldId)->getInserter(); + _documentInserter->rewind(); + _mock.setNextField(fieldId); + } + + void + add(uint32_t docId) + { + _documentInserter->add(docId, _features); + _mock.add(docId); + } + + void + remove(uint32_t docId) + { + _documentInserter->remove(docId); + _mock.remove(docId); + } + + bool + assertPosting(const vespalib::string &word, + uint32_t fieldId) + { + std::vector exp = _mock.find(word, fieldId); + PostingConstItr itr = _d.find(word, fieldId); + return EXPECT_TRUE(assertPostingList(exp, itr)); + } + + bool + assertPostings() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + for (auto wfp : _mock) { + auto &wf = wfp.first; + auto &word = wf.first; + auto fieldId = wf.second; + if (!EXPECT_TRUE(assertPosting(word, fieldId))) { + return false; + } + } + return true; + } + + void + rewind() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + _documentInserter = nullptr; + } + } + + uint32_t + getNumUniqueWords() + { + return _d.getNumUniqueWords(); + } + + Dictionary &getDict() { return _d; } +}; + +void +myremove(uint32_t docId, DocumentInverter &inv, Dictionary &d, + ISequencedTaskExecutor &invertThreads) +{ + inv.removeDocument(docId); + invertThreads.sync(); + inv.pushDocuments(d, std::shared_ptr()); +} + + +class WrapInserter +{ + OrderedDocumentInserter &_inserter; +public: + WrapInserter(Dictionary &d, uint32_t fieldId) + : _inserter(d.getFieldIndex(fieldId)->getInserter()) + { + } + + WrapInserter &word(const vespalib::stringref &word_) + { + _inserter.setNextWord(word_); + return *this; + } + + WrapInserter &add(uint32_t docId, const index::DocIdAndFeatures &features) + { + _inserter.add(docId, features); + return *this; + } + + WrapInserter &add(uint32_t docId) + { + DocIdAndPosOccFeatures features; + features.addNextOcc(0, 0, 1, 1); + return add(docId, features); + } + + WrapInserter &remove(uint32_t docId) + { + _inserter.remove(docId); + return *this; + } + + WrapInserter &flush() + { + _inserter.flush(); + return *this; + } + + WrapInserter &rewind() + { + _inserter.rewind(); + return *this; + } + + btree::EntryRef + getWordRef() + { + return _inserter.getWordRef(); + } +}; + + +class MyDrainRemoves : IDocumentRemoveListener +{ + DocumentRemover &_remover; +public: + virtual void remove(const vespalib::stringref, uint32_t) override { } + + MyDrainRemoves(Dictionary &d, uint32_t fieldId) + : _remover(d.getFieldIndex(fieldId)->getDocumentRemover()) + { + } + + void drain(uint32_t docId) + { + _remover.remove(docId, *this); + } +}; + +void +myPushDocument(DocumentInverter &inv, Dictionary &d) +{ + inv.pushDocuments(d, std::shared_ptr()); +} + + +const FeatureStore * +featureStorePtr(const Dictionary &d, uint32_t fieldId) +{ + return &d.getFieldIndex(fieldId)->getFeatureStore(); +} + +const FeatureStore & +featureStoreRef(const Dictionary &d, uint32_t fieldId) +{ + return d.getFieldIndex(fieldId)->getFeatureStore(); +} + + +DataStoreBase::MemStats +getFeatureStoreMemStats(const Dictionary &d) +{ + DataStoreBase::MemStats res; + uint32_t numFields = d.getNumFields(); + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + DataStoreBase::MemStats stats = + d.getFieldIndex(fieldId)->getFeatureStore().getMemStats(); + res += stats; + } + return res; +} + + +void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->commit(); }); + ++fieldId; + } + pushThreads.sync(); +} + + +void +myCompactFeatures(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->compactFeatures(); }); + ++fieldId; + } +} + +} + + +struct Fixture +{ + Schema _schema; + Fixture() : _schema() { + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + _schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + EXPECT_EQUAL(0u, d.getNumUniqueWords()); + WrapInserter(d, 0).word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[10]", d.findFrozen("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); +} + +TEST_F("requireThatAppendInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + WrapInserter(d, 0).word("a").add(10).flush().rewind(). + word("a").add(5).flush(); + EXPECT_TRUE(assertPostingList("[5,10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + WrapInserter(d, 0).rewind().word("a").add(20).flush(); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.findFrozen("a", 0))); +} + +TEST_F("requireThatMultiplePostingListsCanExist", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10).word("b").add(11).add(15).flush(); + WrapInserter(d, 1).word("a").add(5).word("b").add(12).flush(); + EXPECT_EQUAL(4u, d.getNumUniqueWords()); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[5]", d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[11,15]", d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[12]", d.find("b", 1))); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 2))); + EXPECT_TRUE(assertPostingList("[]", d.find("c", 0))); +} + +TEST_F("requireThatRemoveWorks", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + WrapInserter(d, 0).add(10).add(20).add(30).flush(); + EXPECT_TRUE(assertPostingList("[10,20,30]", d.find("a", 0))); + WrapInserter(d, 0).rewind().word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[20,30]", d.find("a", 0))); + WrapInserter(d, 0).remove(20).flush(); + EXPECT_TRUE(assertPostingList("[30]", d.find("a", 0))); + WrapInserter(d, 0).remove(30).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); + MyDrainRemoves(d, 0).drain(10); + WrapInserter(d, 0).rewind().word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); +} + +TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture) +{ + MyInserter inserter(f.getSchema()); + uint32_t numFields = 4; + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { // insert + inserter.add(di * 3); + } + EXPECT_EQUAL((w - 'a' + 1u) + ('z' - 'a' +1u) * fi, + inserter.getNumUniqueWords()); + } + } + EXPECT_TRUE(inserter.assertPostings()); + inserter.rewind(); + for (uint32_t fi = 0; fi < numFields; ++fi) { + MyDrainRemoves drainRemoves(inserter.getDict(), fi); + for (uint32_t di = 0; di < 'z' * 2 + 1; ++di) { + drainRemoves.drain(di); + } + } + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { + // remove half of the docs + if ((di % 2) == 0) { + inserter.remove(di * 2); + } else { + inserter.add(di * 2 + 1); + } + } + } + } + EXPECT_TRUE(inserter.assertPostings()); +} + +void +addElement(DocIdAndFeatures &f, + uint32_t elemLen, + uint32_t numOccs, + int32_t weight = 1) +{ + f._elements.push_back(WordDocElementFeatures(f._elements.size())); + f._elements.back().setElementLen(elemLen); + f._elements.back().setWeight(weight); + f._elements.back().setNumOccs(numOccs); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } +} + +DocIdAndFeatures +getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) +{ + DocIdAndFeatures f; + addElement(f, elemLen, numOccs, weight); + return f; +} + +TEST_F("requireThatFeaturesAreInPostingLists", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(1, getFeatures(4, 2)).flush(); + EXPECT_TRUE(assertPostingList("[1{4:0,1}]", + d.find("a", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 0).word("b").add(2, getFeatures(5, 1)). + add(3, getFeatures(6, 2)).flush(); + EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]", + d.find("b", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 1).word("c").add(4, getFeatures(7, 2)).flush(); + EXPECT_TRUE(assertPostingList("[4{7:0,1}]", + d.find("c", 1), + featureStorePtr(d, 1))); +} + +TEST_F("require that initRange conforms", Fixture) { + Dictionary d(f.getSchema()); + InitRangeVerifier ir; + WrapInserter inserter(d, 0); + inserter.word("a"); + for (uint32_t docId : ir.getExpectedDocIds()) { + inserter.add(docId); + } + inserter.flush(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + PostingIterator itr(d.find("a", 0), featureStoreRef(d, 0), 0, matchData); + ir.verify(itr); +} + +TEST_F("requireThatPostingIteratorIsWorking", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10, getFeatures(4, 1)). + add(20, getFeatures(5, 2)). + add(30, getFeatures(6, 1)). + add(40, getFeatures(7, 2)).flush(); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(d.find("not", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(d.find("a", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) +{ + { + MyBuilder b(f.getSchema()); + WordDocElementWordPosFeatures wpf; + b.startField(4); + b.startWord("a"); + b.startDocument(2); + b.startElement(0, 10, 20); + wpf.setWordPos(1); + b.addOcc(wpf); + wpf.setWordPos(3); + b.addOcc(wpf); + b.endElement(); + b.endDocument(); + b.endWord(); + b.endField(); + EXPECT_EQUAL("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr()); + } + { + Dictionary d(f.getSchema()); + MyBuilder b(f.getSchema()); + DocIdAndFeatures df; + WrapInserter(d, 1).word("a").add(5, getFeatures(2, 1)). + add(7, getFeatures(3, 2)). + word("b").add(5, getFeatures(12, 2)).flush(); + + df = getFeatures(4, 1); + addElement(df, 5, 2); + WrapInserter(d, 2).word("a").add(5, df); + df = getFeatures(6, 1); + addElement(df, 7, 2); + WrapInserter(d, 2).add(7, df).flush(); + + df = getFeatures(8, 1, 12); + addElement(df, 9, 2, 13); + WrapInserter(d, 3).word("a").add(5, df); + df = getFeatures(10, 1, 14); + addElement(df, 11, 2, 15); + WrapInserter(d, 3).add(7, df).flush(); + + d.dump(b); + + EXPECT_EQUAL("f=0[]," + "f=1[w=a[d=5[e=0,w=1,l=2[0]],d=7[e=0,w=1,l=3[0,1]]]," + "w=b[d=5[e=0,w=1,l=12[0,1]]]]," + "f=2[w=a[d=5[e=0,w=1,l=4[0],e=1,w=1,l=5[0,1]]," + "d=7[e=0,w=1,l=6[0],e=1,w=1,l=7[0,1]]]]," + "f=3[w=a[d=5[e=0,w=12,l=8[0],e=1,w=13,l=9[0,1]]," + "d=7[e=0,w=14,l=10[0],e=1,w=15,l=11[0,1]]]]", + b.toStr()); + } + { // test word with no docs + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(2, getFeatures(2, 1)). + word("b").add(4, getFeatures(4, 1)).flush().rewind(). + word("a").remove(2).flush(); + { + MyBuilder b(f.getSchema()); + d.dump(b); + EXPECT_EQUAL("f=0[w=b[d=4[e=0,w=1,l=4[0]]]],f=1[],f=2[],f=3[]", + b.toStr()); + } + { + search::diskindex::IndexBuilder b(f.getSchema()); + b.setPrefix("dump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + b.open(5, 2, tuneFileIndexing, fileHeaderContext); + d.dump(b); + b.close(); + } + } +} + + +template +class DictionaryFixture : public FixtureBase +{ +public: + using FixtureBase::getSchema; + Dictionary _d; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + + DictionaryFixture() + : FixtureBase(), + _d(getSchema()), + _b(getSchema()), + _invertThreads(2), + _pushThreads(2), + _inv(getSchema(), _invertThreads, _pushThreads) + { + } +}; + + +TEST_F("requireThatInversionIsWorking", DictionaryFixture) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::20"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(20, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::30"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + addStr("e").addStr("f"). + endField(); + f._b.startIndexField("f1"). + addStr("\nw2").addStr("w").addStr("x"). + addStr("\nw3").addStr("y").addStr("z"). + endField(); + f._b.startIndexField("f2"). + startElement(4). + addStr("w").addStr("x"). + endElement(). + startElement(5). + addStr("y").addStr("z"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(6). + addStr("w").addStr("x"). + endElement(). + startElement(7). + addStr("y").addStr("z"). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(30, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::40"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). + addStr("e").addStr("f"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(40, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::999"); + f._b.startIndexField("f0"). + addStr("this").addStr("is").addStr("_a_").addStr("test"). + addStr("for").addStr("insertion").addStr("speed").addStr("with"). + addStr("more").addStr("than").addStr("just").addStr("__a__"). + addStr("few").addStr("words").addStr("present").addStr("in"). + addStr("some").addStr("of").addStr("the").addStr("fields"). + endField(); + f._b.startIndexField("f1"). + addStr("the").addStr("other").addStr("field").addStr("also"). + addStr("has").addStr("some").addStr("content"). + endField(); + f._b.startIndexField("f2"). + startElement(1). + addStr("strange").addStr("things").addStr("here"). + addStr("has").addStr("some").addStr("content"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(3). + addStr("not").addStr("a").addStr("weighty").addStr("argument"). + endElement(). + endField(); + doc = f._b.endDocument(); + for (uint32_t docId = 10000; docId < 20000; ++docId) { + f._inv.invertDocument(docId, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + } + + f._pushThreads.sync(); + DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._d); + LOG(info, + "Before feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + beforeStats._allocElems, + beforeStats._usedElems, + beforeStats._deadElems, + beforeStats._holdElems, + beforeStats._freeBuffers, + beforeStats._activeBuffers, + beforeStats._holdBuffers); + myCompactFeatures(f._d, f._pushThreads); + std::vector> guards; + for (auto &fieldIndex : f._d.getFieldIndexes()) { + guards.push_back(std::make_unique + (fieldIndex->takeGenerationGuard())); + } + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._d); + LOG(info, + "During feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + duringStats._allocElems, + duringStats._usedElems, + duringStats._deadElems, + duringStats._holdElems, + duringStats._freeBuffers, + duringStats._activeBuffers, + duringStats._holdBuffers); + guards.clear(); + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._d); + LOG(info, + "After feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + afterStats._allocElems, + afterStats._usedElems, + afterStats._deadElems, + afterStats._holdElems, + afterStats._freeBuffers, + afterStats._activeBuffers, + afterStats._holdBuffers); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(f._d.findFrozen("not", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("a", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1,4}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 1), featureStoreRef(f._d, 1), + 1, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:2[e=0,w=1,l=6]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 2), featureStoreRef(f._d, 2), + 2, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + // weight is hardcoded to 1 for new style il doc array field + EXPECT_EQUAL("{2:1[e=0,w=1,l=2]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 3), featureStoreRef(f._d, 3), + 3, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{2:1[e=0,w=6,l=2]}", + toString(tfmd.getIterator(), true, true)); + } +} + +TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", + DictionaryFixture) +{ + Document::UP doc; + + f._b.startDocument("doc::1"); + f._b.startIndexField("f0").addStr("a").addStr("b").endField(); + f._b.startIndexField("f1").addStr("a").addStr("c").endField(); + Document::UP doc1 = f._b.endDocument(); + f._inv.invertDocument(1, *doc1.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::2"); + f._b.startIndexField("f0").addStr("b").addStr("c").endField(); + Document::UP doc2 = f._b.endDocument(); + f._inv.invertDocument(2, *doc2.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[1,2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("c", 1))); + + myremove(1, f._inv, f._d, f._invertThreads); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("c", 1))); +} + +class UriFixture +{ +public: + Schema _schema; + UriFixture() + : _schema() + { + _schema.addUriIndexFields(Schema::IndexField("iu", + Schema::STRING)); + _schema.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + _schema.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + + +TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("iu"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + f._b.startIndexField("iau"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + f._b.startIndexField("iwu"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iau"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,l=9]}", + toString(tfmd.getIterator(), true, false)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iwu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,w=4,l=9]}", + toString(tfmd.getIterator(), true, true)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + search::diskindex::IndexBuilder dib(f.getSchema()); + dib.setPrefix("urldump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + dib.open(11, f._d.getNumUniqueWords(), tuneFileIndexing, + fileHeaderContext); + f._d.dump(dib); + dib.close(); + } +} + + +class SingleFieldFixture +{ +public: + Schema _schema; + SingleFieldFixture() + : _schema() + { + _schema.addIndexField(Schema::IndexField("i", Schema::STRING)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("i"). + addStr("我就是那个"). + setAutoSpace(false). + addStr("大灰狼"). + setAutoSpace(true). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("我就" + "是那个", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("大灰" + "狼", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +void +insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t docId, + Dictionary &dict) +{ + EntryRef wordRef = WrapInserter(dict, fieldId).rewind().word(word). + add(docId).flush().getWordRef(); + EXPECT_EQUAL(word, + dict.getFieldIndex(fieldId)->getWordStore().getWord(wordRef)); + MyDrainRemoves(dict, fieldId).drain(docId); +} + +TEST_F("require that insert tells which word ref that was inserted", Fixture) +{ + Dictionary d(f.getSchema()); + insertAndAssertTuple("a", 1, 11, d); + insertAndAssertTuple("b", 1, 11, d); + insertAndAssertTuple("a", 2, 11, d); + + insertAndAssertTuple("a", 1, 22, d); + insertAndAssertTuple("b", 2, 22, d); + insertAndAssertTuple("c", 2, 22, d); +} + +struct RemoverFixture : public Fixture +{ + Dictionary _d; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + + RemoverFixture() + : + Fixture(), + _d(getSchema()), + _invertThreads(2), + _pushThreads(2) + { + } + void assertPostingLists(const vespalib::string &e1, + const vespalib::string &e2, + const vespalib::string &e3) { + EXPECT_TRUE(assertPostingList(e1, _d.find("a", 1))); + EXPECT_TRUE(assertPostingList(e2, _d.find("a", 2))); + EXPECT_TRUE(assertPostingList(e3, _d.find("b", 1))); + } + void remove(uint32_t docId) { + DocumentInverter inv(getSchema(), _invertThreads, _pushThreads); + myremove(docId, inv, _d, _invertThreads); + _pushThreads.sync(); + EXPECT_FALSE(_d.getFieldIndex(0u)->getDocumentRemover(). + getStore().get(docId).valid()); + } +}; + +TEST_F("require that document remover can remove several documents", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).add(13).add(15). + word("b").add(11).add(15).flush(); + WrapInserter(f._d, 2).word("a").add(11).add(13).flush(); + f.assertPostingLists("[11,13,15]", "[11,13]", "[11,15]"); + + f.remove(13); + f.assertPostingLists("[11,15]", "[11]", "[11,15]"); + + f.remove(11); + f.assertPostingLists("[15]", "[]", "[15]"); + + f.remove(15); + f.assertPostingLists("[]", "[]", "[]"); +} + +TEST_F("require that removal of non-existing document does not do anything", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).word("b").add(11).flush(); + WrapInserter(f._d, 2).word("a").add(11).flush(); + f.assertPostingLists("[11]", "[11]", "[11]"); + f.remove(13); + f.assertPostingLists("[11]", "[11]", "[11]"); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/document_remover/.gitignore b/searchlib/src/tests/memoryindex/document_remover/.gitignore new file mode 100644 index 00000000000..2126f9147bd --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/.gitignore @@ -0,0 +1 @@ +searchlib_document_remover_test_app diff --git a/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt new file mode 100644 index 00000000000..e918d0400b2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_remover_test_app + SOURCES + document_remover_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_remover_test_app COMMAND searchlib_document_remover_test_app) diff --git a/searchlib/src/tests/memoryindex/document_remover/DESC b/searchlib/src/tests/memoryindex/document_remover/DESC new file mode 100644 index 00000000000..7fe35ab896f --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/DESC @@ -0,0 +1 @@ +document remover test. Take a look at document_remover_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/document_remover/FILES b/searchlib/src/tests/memoryindex/document_remover/FILES new file mode 100644 index 00000000000..9b7cb9a8cfa --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/FILES @@ -0,0 +1 @@ +document_remover_test.cpp diff --git a/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp new file mode 100644 index 00000000000..8c6751adbeb --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP("document_remover_test"); +#include + +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::memoryindex; + +struct WordFieldPair +{ + vespalib::string _word; + uint32_t _fieldId; + WordFieldPair(const vespalib::stringref &word, uint32_t fieldId) + : _word(word), _fieldId(fieldId) + {} + bool operator<(const WordFieldPair &rhs) { + if (_word != rhs._word) { + return _word < rhs._word; + } + return _fieldId < rhs._fieldId; + } +}; + +typedef std::vector WordFieldVector; + +std::ostream & +operator<<(std::ostream &os, const WordFieldPair &val) +{ + os << "{" << val._word << "," << val._fieldId << "}"; + return os; +} + +struct MockRemoveListener : public IDocumentRemoveListener +{ + WordFieldVector _words; + uint32_t _expDocId; + uint32_t _fieldId; + virtual void remove(const vespalib::stringref word, uint32_t docId) override { + EXPECT_EQUAL(_expDocId, docId); + _words.emplace_back(word, _fieldId); + } + void reset(uint32_t expDocId) { + _words.clear(); + _expDocId = expDocId; + } + vespalib::string getWords() { + std::sort(_words.begin(), _words.end()); + std::ostringstream oss; + oss << _words; + return oss.str(); + } + void setFieldId(uint32_t fieldId) { _fieldId = fieldId; } +}; + +struct Fixture +{ + MockRemoveListener _listener; + std::vector> _wordStores; + std::vector> _wordToRefMaps; + std::vector> _removers; + Fixture() + : _listener(), + _wordStores(), + _wordToRefMaps(), + _removers() + { + uint32_t numFields = 4; + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + _wordStores.push_back(std::make_unique()); + _removers.push_back(std::make_unique + (*_wordStores.back())); + } + _wordToRefMaps.resize(numFields); + } + btree::EntryRef getWordRef(const vespalib::string &word, uint32_t fieldId) { + auto &wordToRefMap = _wordToRefMaps[fieldId]; + WordStore &wordStore = *_wordStores[fieldId]; + auto itr = wordToRefMap.find(word); + if (itr == wordToRefMap.end()) { + btree::EntryRef ref = wordStore.addWord(word); + wordToRefMap[word] = ref; + return ref; + } + return itr->second; + } + Fixture &insert(const vespalib::string &word, uint32_t fieldId, uint32_t docId) { + assert(fieldId < _wordStores.size()); + _removers[fieldId]->insert(getWordRef(word, fieldId), docId); + return *this; + } + void flush() { + for (auto &remover : _removers) { + remover->flush(); + } + } + vespalib::string remove(uint32_t docId) { + _listener.reset(docId); + uint32_t fieldId = 0; + for (auto &remover : _removers) { + _listener.setFieldId(fieldId); + remover->remove(docId, _listener); + ++fieldId; + } + return _listener.getWords(); + } +}; + +TEST_F("require that {word,fieldId} pairs for multiple doc ids can be inserted", Fixture) +{ + f.insert("a", 1, 10).insert("a", 1, 20).insert("a", 1, 30); + f.insert("a", 2, 10).insert("a", 2, 20); + f.insert("b", 1, 20).insert("b", 1, 30); + f.insert("b", 2, 10).insert("b", 2, 30); + f.insert("c", 1, 10); + f.insert("c", 2, 20); + f.insert("c", 3, 30); + f.flush(); + + EXPECT_EQUAL("[{a,1},{a,2},{b,2},{c,1}]", f.remove(10)); + EXPECT_EQUAL("[{a,1},{a,2},{b,1},{c,2}]", f.remove(20)); + EXPECT_EQUAL("[{a,1},{b,1},{b,2},{c,3}]", f.remove(30)); +} + +TEST_F("require that we can insert after flush", Fixture) +{ + f.insert("a", 1, 10).insert("b", 1, 10); + f.flush(); + f.insert("b", 1, 20).insert("b", 2, 20); + f.flush(); + + EXPECT_EQUAL("[{a,1},{b,1}]", f.remove(10)); + EXPECT_EQUAL("[{b,1},{b,2}]", f.remove(20)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/documentinverter/.gitignore b/searchlib/src/tests/memoryindex/documentinverter/.gitignore new file mode 100644 index 00000000000..1e9666b2d63 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/.gitignore @@ -0,0 +1 @@ +searchlib_documentinverter_test_app diff --git a/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt new file mode 100644 index 00000000000..85a77fad361 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_documentinverter_test_app + SOURCES + documentinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_documentinverter_test_app COMMAND searchlib_documentinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/documentinverter/DESC b/searchlib/src/tests/memoryindex/documentinverter/DESC new file mode 100644 index 00000000000..5dc610c2a24 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/DESC @@ -0,0 +1 @@ +Document inverter test. Take a look at documentinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/documentinverter/FILES b/searchlib/src/tests/memoryindex/documentinverter/FILES new file mode 100644 index 00000000000..c54817b9df1 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/FILES @@ -0,0 +1 @@ +documentinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp new file mode 100644 index 00000000000..d3ad1f54e95 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp @@ -0,0 +1,294 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include +#include +LOG_SETUP("documentinverter_test"); +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _invertThreads(2), + _pushThreads(2), + _inv(_schema, _invertThreads, _pushThreads), + _inserter() + { + } + + void + pushDocuments() + { + _invertThreads.sync(); + uint32_t fieldId = 0; + for (auto &inverter : _inv.getInverters()) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + _pushThreads.sync(); + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("b", 10); + f._inv.getInverter(0)->remove("a", 10); + f._inv.getInverter(0)->remove("b", 11); + f._inv.getInverter(2)->remove("c", 12); + f._inv.getInverter(1)->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(12); + f._inv.removeDocument(13); + f._inv.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("a", 11); + f._inv.getInverter(0)->remove("c", 9); + f._inv.getInverter(0)->remove("d", 10); + f._inv.getInverter(0)->remove("z", 12); + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f._inv.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/fieldinverter/.gitignore b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore new file mode 100644 index 00000000000..482663dd92e --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_fieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..9d81ebbb57c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fieldinverter_test_app + SOURCES + fieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_fieldinverter_test_app COMMAND searchlib_fieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/fieldinverter/DESC b/searchlib/src/tests/memoryindex/fieldinverter/DESC new file mode 100644 index 00000000000..a40890fdc3d --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/DESC @@ -0,0 +1 @@ +Field inverter test. Take a look at fieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/fieldinverter/FILES b/searchlib/src/tests/memoryindex/fieldinverter/FILES new file mode 100644 index 00000000000..892febd1c50 --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/FILES @@ -0,0 +1 @@ +fieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp new file mode 100644 index 00000000000..6216ba9eb3c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp @@ -0,0 +1,338 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include +#include +LOG_SETUP("fieldinverter_test"); +#include +#include +#include +#include +#include +#include + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + + +Document::UP +makeDoc16(DocBuilder &b) +{ + b.startDocument("doc::16"); + b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). + addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). + addStr("z").endField(); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector > _inverters; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _inverters(), + _inserter() + { + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique(_schema, + fieldId)); + } + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + vespalib::stringref fieldName = + _schema.getIndexField(fieldId).getName(); + inverter->invertField(docId, doc.getValue(fieldName)); + ++fieldId; + } + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + removeDocument(uint32_t docId) { + for (auto &inverter : _inverters) { + inverter->removeDocument(docId); + } + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inverters[0]->remove("b", 10); + f._inverters[0]->remove("a", 10); + f._inverters[0]->remove("b", 11); + f._inverters[2]->remove("c", 12); + f._inverters[1]->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(12); + f.removeDocument(13); + f.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inverters[0]->remove("a", 11); + f._inverters[0]->remove("c", 9); + f._inverters[0]->remove("d", 10); + f._inverters[0]->remove("z", 12); + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("require that multiple words at same position works", Fixture) +{ + f.invertDocument(16, *makeDoc16(f._b)); + f._inserter.setVerbose(); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=altbaz,a=16(e=0,w=1,l=5[2])," + "w=alty,a=16(e=0,w=1,l=5[3])," + "w=bar,a=16(e=0,w=1,l=5[1])," + "w=baz,a=16(e=0,w=1,l=5[2])," + "w=foo,a=16(e=0,w=1,l=5[0])," + "w=y,a=16(e=0,w=1,l=5[3])," + "w=z,a=16(e=0,w=1,l=5[4])", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/memoryindex/.gitignore b/searchlib/src/tests/memoryindex/memoryindex/.gitignore new file mode 100644 index 00000000000..174d0a494e2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +memoryindex_test +sourceselectorwriter_test +searchlib_memoryindex_test_app diff --git a/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt new file mode 100644 index 00000000000..f25089e85bb --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_memoryindex_test_app + SOURCES + memoryindex_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_memoryindex_test_app COMMAND searchlib_memoryindex_test_app) diff --git a/searchlib/src/tests/memoryindex/memoryindex/DESC b/searchlib/src/tests/memoryindex/memoryindex/DESC new file mode 100644 index 00000000000..87b69181803 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/DESC @@ -0,0 +1 @@ +memoryindex test. Take a look at memoryindex_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/memoryindex/FILES b/searchlib/src/tests/memoryindex/memoryindex/FILES new file mode 100644 index 00000000000..4faa7668dfc --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/FILES @@ -0,0 +1 @@ +memoryindex_test.cpp diff --git a/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp new file mode 100644 index 00000000000..7d2afc151d5 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp @@ -0,0 +1,438 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("memoryindex_test"); +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using document::Document; +using document::FieldValue; +using search::query::Node; +using search::query::SimplePhrase; +using search::query::SimpleStringTerm; +using search::makeLambdaTask; +using search::ScheduleTaskCallback; +using namespace search::fef; +using namespace search::index; +using namespace search::memoryindex; +using namespace search::queryeval; + +//----------------------------------------------------------------------------- + +struct Setup { + Schema schema; + Setup &field(const std::string &name) { + schema.addIndexField(Schema::IndexField(name, + Schema::STRING)); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +struct Index { + Schema schema; + vespalib::ThreadStackExecutor _executor; + search::SequencedTaskExecutor _invertThreads; + search::SequencedTaskExecutor _pushThreads; + MemoryIndex index; + DocBuilder builder; + uint32_t docid; + std::string currentField; + + Index(const Setup &setup) + : schema(setup.schema), + _executor(1, 128 * 1024), + _invertThreads(2), + _pushThreads(2), + index(schema, _invertThreads, _pushThreads), + builder(schema), + docid(1), + currentField() + { + } + void closeField() { + if (!currentField.empty()) { + builder.endField(); + currentField.clear(); + } + } + Index &doc(uint32_t id) { + docid = id; + builder.startDocument(vespalib::make_string("doc::%u", id)); + return *this; + } + Index &field(const std::string &name) { + closeField(); + builder.startIndexField(name); + currentField = name; + return *this; + } + Index &add(const std::string &token) { + builder.addStr(token); + return *this; + } + void internalSyncCommit() { + vespalib::Gate gate; + index.commit(std::make_shared + (_executor, + makeLambdaTask([&]() { gate.countDown(); }))); + gate.await(); + } + Document::UP commit() { + closeField(); + Document::UP d = builder.endDocument(); + index.insertDocument(docid, *d); + internalSyncCommit(); + return d; + } + Index &remove(uint32_t id) { + index.removeDocument(id); + internalSyncCommit(); + return *this; + } + +private: + Index(const Index &index); + Index &operator=(const Index &index); +}; + +//----------------------------------------------------------------------------- + +std::string toString(SearchIterator & search) +{ + std::ostringstream oss; + bool first = true; + for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) { + if (!first) oss << ","; + oss << search.getDocId(); + first = false; + } + return oss.str(); +} + +//----------------------------------------------------------------------------- + +const std::string title("title"); +const std::string body("body"); +const std::string foo("foo"); +const std::string bar("bar"); + +//----------------------------------------------------------------------------- + +bool +verifyResult(const FakeResult &expect, + Searchable &index, + std::string fieldName, + const Node &term) +{ + uint32_t fieldId = 0; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FieldSpec field(fieldName, fieldId, handle); + FieldSpecList fields; + fields.add(field); + + Blueprint::UP result = index.createBlueprint(requestContext, fields, term); + if (!EXPECT_TRUE(result.get() != 0)) { + return false; + } + EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits); + EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty); + + result->fetchPostings(true); + SearchIterator::UP search = result->createSearch(*match_data, true); + if (!EXPECT_TRUE(search.get() != 0)) { + return false; + } + TermFieldMatchData &tmd = *match_data->resolveTermField(handle); + + FakeResult actual; + search->initFullRange(); + for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) { + actual.doc(search->getDocId()); + search->unpack(search->getDocId()); + EXPECT_EQUAL(search->getDocId(), tmd.getDocId()); + FieldPositionsIterator p = tmd.getIterator(); + actual.len(p.getFieldLength()); + for (; p.valid(); p.next()) { + actual.pos(p.getPosition()); + } + } + return EXPECT_EQUAL(expect, actual); +} + +namespace { +SimpleStringTerm makeTerm(const std::string &term) { + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +Node::UP makePhrase(const std::string &term1, const std::string &term2) { + SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0)); + Node::UP node(phrase); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1)))); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2)))); + return node; +} +} // namespace + +// tests basic usage; index some documents in docid order and perform +// some searches. +TEST("testIndexAndSearch") +{ + Index index(Setup().field(title).field(body)); + index.doc(1) + .field(title).add(foo).add(bar).add(foo) + .field(body).add(foo).add(foo).add(foo) + .commit(); + index.doc(2) + .field(title).add(bar).add(foo) + .field(body).add(bar).add(bar).add(bar).add(bar) + .commit(); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + index.index, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + index.index, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + index.index, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, title, makeTerm("bogus"))); + + // search for "foo" in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, "bogus", makeTerm(foo))); + + // search for "bar foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, *makePhrase(bar, foo))); + +} + +// tests index update behavior; remove/update and unordered docid +// indexing. +TEST("require that documents can be removed and updated") +{ + Index index(Setup().field(title)); + + // add unordered + index.doc(3).field(title).add(foo).add(foo).add(foo).commit(); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + Document::UP doc2 = index.doc(2).field(title).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(2).len(2).pos(0).pos(1) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // remove document + index.remove(2); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // update document + index.doc(1).field(title).add(bar).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1).pos(2) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); +} + +// test the fake field source here, to make sure it acts similar to +// the memory index field source. +TEST("testFakeSearchable") +{ + Index index(Setup().field(title).field(body)); + + // setup fake field source with predefined results + FakeSearchable fakeSource; + fakeSource.addResult(title, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1)); + fakeSource.addResult(title, bar, + FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0)); + fakeSource.addResult(body, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2)); + fakeSource.addResult(body, bar, + FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3)); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + fakeSource, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + fakeSource, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + fakeSource, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + fakeSource, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, title, makeTerm("bogus"))); + + // search for foo in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, "bogus", makeTerm(foo))); +} + +TEST("requireThatFrozenIndexIgnoresUpdates") +{ + Index index(Setup().field(title)); + Document::UP doc1 = index.doc(1).field(title).add(foo).add(bar).commit(); + FakeResult ffr = FakeResult().doc(1).len(2).pos(0); + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + EXPECT_TRUE(!index.index.isFrozen()); + index.index.freeze(); + EXPECT_TRUE(index.index.isFrozen()); + index.doc(2).field(title).add(bar).add(foo).commit(); // not added + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + index.remove(1); // not removed + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); +} + +TEST("requireThatNumDocsAndDocIdLimitIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumDocs()); + EXPECT_EQUAL(1u, index.index.getDocIdLimit()); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumDocs()); + EXPECT_EQUAL(2u, index.index.getDocIdLimit()); + Document::UP doc4 = index.doc(4).field(title).add(foo).commit(); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + Document::UP doc2 = index.doc(2).field(title).add(foo).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // re-add doc4 + index.doc(4).field(title).add(bar).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // remove doc2 + index.remove(2); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); +} + +TEST("requireThatWeUnderstandTheMemoryFootprint") +{ + { + Setup setup; + Index index(setup); + EXPECT_EQUAL(0u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1")); + EXPECT_EQUAL(118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1").field("f2")); + EXPECT_EQUAL(2*118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } +} + +TEST("requireThatNumWordsIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumWords()); + index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumWords()); + index.doc(2).field(title).add(foo).add(bar).add(body).commit(); + EXPECT_EQUAL(3u, index.index.getNumWords()); +} + +TEST("requireThatWeCanFakeBitVector") +{ + Index index(Setup().field(title)); + index.doc(1).field(title).add(foo).commit(); + index.doc(3).field(title).add(foo).commit(); + { + uint32_t fieldId = 0; + + MatchDataLayout mdl; + FakeRequestContext requestContext; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + // filter field + FieldSpec field(title, fieldId, handle, true); + FieldSpecList fields; + fields.add(field); + + Searchable &searchable = index.index; + Blueprint::UP res = searchable.createBlueprint(requestContext, fields, makeTerm(foo)); + EXPECT_TRUE(res.get() != NULL); + + res->fetchPostings(true); + SearchIterator::UP search = res->createSearch(*match_data, true); + EXPECT_TRUE(search.get() != NULL); + EXPECT_TRUE(dynamic_cast(search.get()) != NULL); + search->initFullRange(); + EXPECT_EQUAL("1,3", toString(*search)); + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore new file mode 100644 index 00000000000..b2636fe5e81 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_urlfieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..c5a0374fad9 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_urlfieldinverter_test_app + SOURCES + urlfieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC new file mode 100644 index 00000000000..00115ada607 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC @@ -0,0 +1 @@ +UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES new file mode 100644 index 00000000000..ac08b0a3e90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES @@ -0,0 +1 @@ +urlfieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp new file mode 100644 index 00000000000..30b5883f153 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp @@ -0,0 +1,579 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include +#include +LOG_SETUP("urlfieldinverter_test"); +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::DocTypeBuilder; +using index::Schema; + +namespace memoryindex +{ + +namespace { +const vespalib::string url = "url"; +} + + +namespace +{ + +Document::UP +makeDoc10Single(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Array(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + +Document::UP +makeDoc10WeightedSet(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Empty(DocBuilder &b) +{ + b.startDocument("doc::10"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector > _inverters; + std::unique_ptr _urlInverter; + test::OrderedDocumentInserter _inserter; + DocTypeBuilder::SchemaIndexFields _schemaIndexFields; + + static Schema + makeSchema(Schema::CollectionType collectionType) + { + Schema schema; + schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING, + collectionType)); + return schema; + } + + Fixture(Schema::CollectionType collectionType) + : _schema(makeSchema(collectionType)), + _b(_schema), + _inverters(), + _urlInverter(), + _inserter(), + _schemaIndexFields() + { + _schemaIndexFields.setup(_schema); + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique(_schema, + fieldId)); + } + DocTypeBuilder::UriField &urlField = + _schemaIndexFields._uriFields.front(); + _urlInverter = std::make_unique + (collectionType, + _inverters[urlField._all].get(), + _inverters[urlField._scheme].get(), + _inverters[urlField._host].get(), + _inverters[urlField._port].get(), + _inverters[urlField._path].get(), + _inverters[urlField._query].get(), + _inverters[urlField._fragment].get(), + _inverters[urlField._hostname].get()); + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + _urlInverter->invertField(docId, doc.getValue(url)); + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + enableAnnotations() + { + _urlInverter->setUseAnnotations(true); + } +}; + + +TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10," + "w=13,a=10," + "w=2,a=10," + "w=83,a=10," + "w=85,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=83,a=10," + "w=85,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=12,a=10," + "w=13,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f._inserter.setVerbose(); + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10(e=0,w=4,l=9[8])," + "w=13,a=10(e=1,w=7,l=9[8])," + "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7])," + "w=83,a=10(e=0,w=4,l=9[4])," + "w=85,a=10(e=1,w=7,l=9[4])," + "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6])," + "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3])," + "w=flickr,a=10(e=1,w=7,l=9[2])," + "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5])," + "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0])," + "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1])," + "w=yahoo,a=10(e=0,w=4,l=9[2])," + "f=1," + "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=2," + "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2])," + "w=flickr,a=10(e=1,w=7,l=3[1])," + "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0])," + "w=yahoo,a=10(e=0,w=4,l=3[1])," + "f=3," + "w=83,a=10(e=0,w=4,l=1[0])," + "w=85,a=10(e=1,w=7,l=1[0])," + "f=4," + "w=altfluke,a=10(e=0,w=4,l=1[0])," + "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=5," + "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1])," + "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0])," + "f=6," + "w=12,a=10(e=0,w=4,l=1[0])," + "w=13,a=10(e=1,w=7,l=1[0])," + "f=7," + "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4])," + "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0])," + "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3])," + "w=flickr,a=10(e=1,w=7,l=5[2])," + "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1])," + "w=yahoo,a=10(e=0,w=4,l=5[2])", + f._inserter.toStr()); +} + + +TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memorytub/.gitignore b/searchlib/src/tests/memorytub/.gitignore new file mode 100644 index 00000000000..d3185d605a1 --- /dev/null +++ b/searchlib/src/tests/memorytub/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +test_memorytub +searchlib_test_memorytub_app diff --git a/searchlib/src/tests/memorytub/CMakeLists.txt b/searchlib/src/tests/memorytub/CMakeLists.txt new file mode 100644 index 00000000000..a06fb4de8e2 --- /dev/null +++ b/searchlib/src/tests/memorytub/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_test_memorytub_app + SOURCES + memorytub_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_test_memorytub_app COMMAND searchlib_test_memorytub_app) diff --git a/searchlib/src/tests/memorytub/memorytub_test.cpp b/searchlib/src/tests/memorytub/memorytub_test.cpp new file mode 100644 index 00000000000..348aee2fe7f --- /dev/null +++ b/searchlib/src/tests/memorytub/memorytub_test.cpp @@ -0,0 +1,205 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("test_memorytub"); + +#include +#include + +#define MEMTUB_ARRAY_ALLOC(tub, type, size) ((type *) tub->Alloc(sizeof(type) * size)) + + +enum { + SMALL_STRING = 100, + BIG_STRING = 100000, + SMALL_SMALL_ARRAY = 10, + BIG_SMALL_ARRAY = 1000 +}; + + +class Small +{ +public: + char filler[SMALL_STRING]; +}; + + +class Big +{ +public: + char filler[BIG_STRING]; +}; + + +class Test : public vespalib::TestApp +{ +private: + search::util::SmallMemoryTub _tub; + +public: + bool Overlap(char *start1, char *end1, + char *start2, char *end2); + bool InTub(char *pt, char *end); + bool NotInTub(char *pt, char *end); + int Main(); + + Test(void) + : _tub() + { + } +}; + + +bool +Test::Overlap(char *start1, char *end1, + char *start2, char *end2) +{ + if (start1 == end1) + return false; + + if (start2 == end2) + return false; + + if (start2 >= start1 && start2 < end1) + return true; + + if (end2 > start1 && end2 <= end1) + return true; + + if (start1 >= start2 && start1 < end2) + return true; + + if (end1 > start2 && end1 <= end2) + return true; + + return false; +} + + +bool +Test::InTub(char *pt, char *end) +{ + for (char *p = pt; p < end; p++) + if (!_tub.InTub(p)) + return false; + return true; +} + + +bool +Test::NotInTub(char *pt, char *end) +{ + for (char *p = pt; p < end; p++) + if (_tub.InTub(p)) + return false; + return true; +} + + +int +Test::Main() +{ + TEST_INIT("memorytub-test"); + + Small *small = NULL; + Big *big = NULL; + char *small_string = NULL; + char *big_string = NULL; + Small *small_small_array = NULL; + Small *big_small_array = NULL; + + EXPECT_TRUE(!_tub.InTub(&_tub)); + + EXPECT_TRUE(sizeof(Small) < _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Big) > _tub.GetAllocLimit()); + EXPECT_TRUE(SMALL_STRING < _tub.GetAllocLimit()); + EXPECT_TRUE(BIG_STRING > _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Small) * SMALL_SMALL_ARRAY < _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Small) * BIG_SMALL_ARRAY > _tub.GetAllocLimit()); + + small = new (&_tub) Small(); + EXPECT_TRUE(((void *)small) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)small, (char *)(small + 1))); + + big = new (&_tub) Big(); + EXPECT_TRUE(((void *)big) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)big, (char *)(big + 1))); + + small_string = MEMTUB_ARRAY_ALLOC((&_tub), char, SMALL_STRING); + EXPECT_TRUE(((void *)small_string) != ((void *)&_tub)); + EXPECT_TRUE(InTub(small_string, small_string + SMALL_STRING)); + + big_string = MEMTUB_ARRAY_ALLOC((&_tub), char, BIG_STRING); + EXPECT_TRUE(((void *)big_string) != ((void *)&_tub)); + EXPECT_TRUE(InTub(big_string, big_string + BIG_STRING)); + + small_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, SMALL_SMALL_ARRAY); + EXPECT_TRUE(((void *)small_small_array) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + big_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, BIG_SMALL_ARRAY); + EXPECT_TRUE(((void *)big_small_array) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)big, (char *)(big + 1))); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + small_string, small_string + SMALL_STRING)); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + small_string, small_string + SMALL_STRING)); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING, + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING, + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + _tub.Reset(); + EXPECT_TRUE(NotInTub((char *)small, (char *)(small + 1))); + EXPECT_TRUE(NotInTub((char *)big, (char *)(big + 1))); + EXPECT_TRUE(NotInTub(small_string, small_string + SMALL_STRING)); + EXPECT_TRUE(NotInTub(big_string, big_string + BIG_STRING)); + EXPECT_TRUE(NotInTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + EXPECT_TRUE(NotInTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + TEST_DONE(); +} + +TEST_APPHOOK(Test) diff --git a/searchlib/src/tests/nativerank/.gitignore b/searchlib/src/tests/nativerank/.gitignore new file mode 100644 index 00000000000..6a3051df4e7 --- /dev/null +++ b/searchlib/src/tests/nativerank/.gitignore @@ -0,0 +1,2 @@ +/vlog3.txt +searchlib_nativerank_test_app diff --git a/searchlib/src/tests/nativerank/CMakeLists.txt b/searchlib/src/tests/nativerank/CMakeLists.txt new file mode 100644 index 00000000000..dc9542a4988 --- /dev/null +++ b/searchlib/src/tests/nativerank/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_nativerank_test_app + SOURCES + nativerank.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_nativerank_test_app + COMMAND searchlib_nativerank_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog3.txt" +) diff --git a/searchlib/src/tests/nativerank/nativerank.cpp b/searchlib/src/tests/nativerank/nativerank.cpp new file mode 100644 index 00000000000..398ca52a190 --- /dev/null +++ b/searchlib/src/tests/nativerank/nativerank.cpp @@ -0,0 +1,828 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("nativerank_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::fef::test; + +const double EPS = 10e-4; + +namespace search { +namespace features { + +class Test : public FtTestApp { +private: + BlueprintFactory _factory; + + struct ANAM { + int32_t attributeWeight; + search::query::Weight termWeight; + uint32_t fieldWeight; + uint32_t docId; + ANAM(int32_t aw, uint32_t tw = 100, uint32_t fw = 100, uint32_t id = 1) : + attributeWeight(aw), termWeight(tw), fieldWeight(fw), docId(id) {} + vespalib::string toString() const { + return vespalib::make_string("aw(%d), tw(%u), fw(%u), id(%u)", + attributeWeight, termWeight.percent(), fieldWeight, docId); + } + }; + + bool assertNativeFieldMatch(feature_t score, const vespalib::string & query, const vespalib::string & field, + const Properties & props = Properties(), uint32_t docId = 1); + bool assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2, + const Properties & props = Properties()); + bool assertNativeProximity(feature_t score, const vespalib::string & query, const vespalib::string & field, + const Properties & props = Properties(), uint32_t docId = 1); + bool assertNativeRank(feature_t score, feature_t fieldMatchWeight, feature_t attributeMatchWeight, feature_t proximityWeight); + + void testNativeFieldMatch(); + void testNativeAttributeMatch(); + void testNativeProximity(); + void testNativeRank(); + +public: + int Main(); +}; + +void +Test::testNativeFieldMatch() +{ + { // test blueprint + NativeFieldMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeFieldMatch")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeFieldMatch.firstOccurrenceTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + p.clear().add("nativeFieldMatch.occurrenceCountTable", "b"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found + + const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeFieldMatchParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); + EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("expdecay(8000,12.50)")); + EXPECT_TRUE(pas.vector[1].firstOccTable == tm.getTable("expdecay(8000,12.50)")); + EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("loggrowth(1500,4000,19)")); + EXPECT_TRUE(pas.vector[1].numOccTable == tm.getTable("loggrowth(1500,4000,19)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + EXPECT_EQUAL(pas.vector[0].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH); + EXPECT_EQUAL(pas.vector[1].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH); + EXPECT_EQUAL(pas.minFieldLength, 6u); + EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.5); + EXPECT_EQUAL(pas.vector[1].firstOccImportance, 0.5); + } + { + p.clear(); + p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,1)"); + p.add("nativeFieldMatch.firstOccurrenceTable.foo", "linear(0,2)"); + p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,3)"); + p.add("nativeFieldMatch.occurrenceCountTable.baz", "linear(0,4)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + p.add("nativeFieldMatch.averageFieldLength.foo", "400"); + p.add("nativeFieldMatch.averageFieldLength.baz", "500"); + p.add("nativeFieldMatch.minFieldLength", "12"); + p.add("nativeFieldMatch.firstOccurrenceImportance", "0.8"); + p.add("nativeFieldMatch.firstOccurrenceImportance.foo", "0.6"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux"); + ft.getIndexEnv().getFields()[4].setFilter(true); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz").add("quux"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeFieldMatchParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 5); + EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("linear(0,2)")); + EXPECT_TRUE(pas.vector[3].firstOccTable == tm.getTable("linear(0,1)")); + EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("linear(0,3)")); + EXPECT_TRUE(pas.vector[3].numOccTable == tm.getTable("linear(0,4)")); + EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6); + EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + EXPECT_EQUAL(pas.vector[4].field, false); // filter field + EXPECT_EQUAL(pas.vector[0].averageFieldLength, 400u); + EXPECT_EQUAL(pas.vector[3].averageFieldLength, 500u); + EXPECT_EQUAL(pas.minFieldLength, 12u); + EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.6); + EXPECT_EQUAL(pas.vector[3].firstOccImportance, 0.8); + } + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeFieldMatch", ie, StringList().add("nativeFieldMatch")); + } + } + + { // test helper functions + FtFeatureTest ft(_factory, ""); + NativeFieldMatchParams p; + NativeFieldMatchParam f; + Table t; + t.add(0).add(1).add(2).add(3).add(4).add(5).add(6).add(7); + f.firstOccTable = &t; + f.numOccTable = &t; + p.vector.push_back(f); + NativeFieldMatchExecutor nfme(ft.getQueryEnv(), p); + EXPECT_EQUAL(p.minFieldLength, 6u); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 4), 0); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 1, 4), 1); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 2, 4), 2); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 4), 4); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 6), 4); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 6), 5); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 5, 6), 7); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 12), 0); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 12), 2); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 11, 12), 7); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 4), 0); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 2, 4), 2); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 4), 4); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 6), 4); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 5, 6), 5); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 6), 7); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 12), 0); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 12), 3); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 12, 12), 7); + } + { // test params object + NativeFieldMatchParams p; + p.resize(1); + p.setMaxTableSums(0, 0); // test reset to 1 + EXPECT_EQUAL(p.vector[0].maxTableSum, 1); + } + + { // test executor + // 1 term + EXPECT_TRUE(assertNativeFieldMatch(55, "a", "a")); + EXPECT_TRUE(assertNativeFieldMatch(40, "a", "x x x a")); + EXPECT_TRUE(assertNativeFieldMatch(70, "a", "a a a a")); + + // 2 terms + EXPECT_TRUE(assertNativeFieldMatch(27.5, "a b", "a")); + EXPECT_TRUE(assertNativeFieldMatch(52.5, "a b", "a b")); + EXPECT_TRUE(assertNativeFieldMatch(67.5, "a b", "a b a b a b a b")); + + // 3 terms + EXPECT_TRUE(assertNativeFieldMatch(50, "a b c", "a b c")); + + // 4 terms + EXPECT_TRUE(assertNativeFieldMatch(47.5, "a b c d", "a b c d")); + + // change term weight + EXPECT_TRUE(assertNativeFieldMatch(45, "a b", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(50, "a!600 b!200", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(40, "a!200 b!600", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(55, "a!200 b!0", "a x x x b")); + + // change significance + EXPECT_TRUE(assertNativeFieldMatch(46, "a%0.4 b%0.1", "x a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(34, "a%0.1 b%0.4", "x a x x x b")); + + // change firstOccImportance + Properties p = Properties().add("nativeFieldMatch.firstOccurrenceImportance", "1"); + EXPECT_TRUE(assertNativeFieldMatch(100, "a", "a", p)); + p.clear().add("nativeFieldMatch.firstOccurrenceImportance", "0"); + EXPECT_TRUE(assertNativeFieldMatch(10, "a", "a", p)); + + // use table normalization + p.clear().add("nativeRank.useTableNormalization", "true"); + // norm factor = (100*0.5 + 60*0.5) = 80 + EXPECT_TRUE(assertNativeFieldMatch(0.6875, "a", "a", p)); // (55/80) + EXPECT_TRUE(assertNativeFieldMatch(1, "a", "a a a a a a", p)); // (80/80) + p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,0)"); + p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,0)"); + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p)); + + // use average field length + p.clear().add("nativeFieldMatch.averageFieldLength.foo", "12"); + EXPECT_TRUE(assertNativeFieldMatch(50, "a", "a", p)); // firstOccBoost: 100, numOccBoost: 0 + EXPECT_TRUE(assertNativeFieldMatch(45, "a", "x x x a", p)); // firstOccBoost: 90, numOccBoost: 0 + EXPECT_TRUE(assertNativeFieldMatch(50, "a", "x x x a a", p)); // firstOccBoost: 90, numOccBoost: 10 + + // change field weight + p.clear().add("vespa.fieldweight.foo", "0"); + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p)); + + // change docId to give 0 hits + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p.clear(), 2)); + } +} + +bool +Test::assertNativeFieldMatch(feature_t score, + const vespalib::string & query, + const vespalib::string & field, + const Properties & props, + uint32_t docId) +{ + LOG(info, "assertNativeFieldMatch(%f, '%s', '%s')", score, query.c_str(), field.c_str()); + + // Setup feature test. + vespalib::string feature = "nativeFieldMatch"; + FtFeatureTest ft(_factory, feature); + + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); + ft.getIndexEnv().getProperties().add("nativeFieldMatch.firstOccurrenceTable", + vespalib::make_string("linear(-10,100,%zu)", std::max((size_t)6, index["foo"].size()))); + ft.getIndexEnv().getProperties().add("nativeFieldMatch.occurrenceCountTable", + vespalib::make_string("linear(10,0,%zu)", std::max((size_t)6, index["foo"].size()) + 1)); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) { + return false; + } + return true; +} + +void +Test::testNativeAttributeMatch() +{ + { // test blueprint + NativeAttributeMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeAttributeMatch")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeAttributeMatch.weightTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + +// const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeAttributeMatchParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); +// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(1,0)")); +// EXPECT_TRUE(pas.vector[1].weightBoostTable == tm.getTable("linear(1,0)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + } + { + p.clear(); + p.add("nativeAttributeMatch.weightTable", "linear(0,3)"); + p.add("nativeAttributeMatch.weightTable.foo", "linear(0,2)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "baz"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeAttributeMatchParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 4); +// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(0,2)")); +// EXPECT_TRUE(pas.vector[3].weightBoostTable == tm.getTable("linear(0,3)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 2); + EXPECT_EQUAL(pas.vector[3].maxTableSum, 3); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an index + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + } + + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeAttributeMatch", ie, StringList().add("nativeAttributeMatch")); + } + } + { // test executor + + EXPECT_TRUE(assertNativeAttributeMatch(15, ANAM(10), ANAM(10))); // basic + EXPECT_TRUE(assertNativeAttributeMatch(5, ANAM(-10), ANAM(10))); // negative weight + EXPECT_TRUE(assertNativeAttributeMatch(12.5, ANAM(10, 600), ANAM(10, 200))); // change term weights + EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 600), ANAM(10, 0))); // change term weights + EXPECT_TRUE(assertNativeAttributeMatch(18, ANAM(10, 100, 200), ANAM(10, 100, 800))); // change field weights + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 0), ANAM(10, 100, 0))); // change field weights + EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 100, 100, 2), ANAM(10, 100, 100))); // change docId to give 1 hit + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 100, 2), ANAM(10, 100, 100, 2))); // change docId to give 0 hits + { // use table normalization + // foo: max table value: 255 + // bar: max table value: 510 + Properties p = Properties().add("nativeRank.useTableNormalization", "true"); + EXPECT_TRUE(assertNativeAttributeMatch(0.2941, ANAM(100), ANAM(50), p)); // (100/255 + 100/510)*0.5 + EXPECT_TRUE(assertNativeAttributeMatch(1, ANAM(255), ANAM(255), p)); // (255/255 + 510/510)*0.5 + p.add("nativeAttributeMatch.weightTable.foo", "linear(0,0)"); + p.add("nativeAttributeMatch.weightTable.bar", "linear(0,0)"); + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(100), ANAM(50), p)); + } + } +} + +bool +Test::assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2, const Properties & props) +{ + LOG(info, "assertNativeAttributeMatch(%f, '%s', '%s')", score, t1.toString().c_str(), t2.toString().c_str()); + vespalib::string feature = "nativeAttributeMatch"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); + ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.foo", "linear(1,0)"); + ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.bar", "linear(2,0)"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", vespalib::make_string("%u", t1.fieldWeight)); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", vespalib::make_string("%u", t2.fieldWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL)) { // t1 + return false; + } + if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL)) { // t2 + return false; + } + ft.getQueryEnv().getTerms()[0].setWeight(t1.termWeight); + ft.getQueryEnv().getTerms()[1].setWeight(t2.termWeight); + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + { + TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(0, 0); + tfmd->reset(t1.docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(t1.attributeWeight); + tfmd->appendPosition(pos); + } + { + TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(1, 1); + tfmd->reset(t2.docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(t2.attributeWeight); + tfmd->appendPosition(pos); + } + if (!EXPECT_TRUE(ft.execute(score, EPS))) { + return false; + } + return true; +} + +void +Test::testNativeProximity() +{ + { // test blueprint + NativeProximityBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeProximity")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeProximity.proximityTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + p.clear().add("nativeProximity.reverseProximityTable", "b"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found + + const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeProximityParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); + EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("expdecay(500,3)")); + EXPECT_TRUE(pas.vector[1].proximityTable == tm.getTable("expdecay(500,3)")); + EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("expdecay(400,3)")); + EXPECT_TRUE(pas.vector[1].revProximityTable == tm.getTable("expdecay(400,3)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + EXPECT_EQUAL(pas.slidingWindow, 4u); + EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.5); + EXPECT_EQUAL(pas.vector[1].proximityImportance, 0.5); + } + { + p.clear(); + p.add("nativeProximity.proximityTable", "linear(0,1)"); + p.add("nativeProximity.proximityTable.foo", "linear(0,2)"); + p.add("nativeProximity.reverseProximityTable", "linear(0,3)"); + p.add("nativeProximity.reverseProximityTable.baz", "linear(0,4)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + p.add("nativeProximity.slidingWindowSize", "2"); + p.add("nativeProximity.proximityImportance", "0.8"); + p.add("nativeProximity.proximityImportance.foo", "0.6"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux"); + ft.getIndexEnv().getFields()[4].setFilter(true); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeProximityParams & pas = (dynamic_cast(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 5); + EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("linear(0,2)")); + EXPECT_TRUE(pas.vector[3].proximityTable == tm.getTable("linear(0,1)")); + EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("linear(0,3)")); + EXPECT_TRUE(pas.vector[3].revProximityTable == tm.getTable("linear(0,4)")); + EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6); + EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + EXPECT_EQUAL(pas.vector[4].field, false); // filter field + EXPECT_EQUAL(pas.slidingWindow, 2u); + EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.6); + EXPECT_EQUAL(pas.vector[3].proximityImportance, 0.8); + } + + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeProximity", ie, StringList().add("nativeProximity")); + } + } + + { // test NativeProximityExecutor::generateTermPairs() + QueryTermVector terms; + SimpleTermData a, b, c; + a.setWeight(search::query::Weight(100)); + a.setUniqueId(0); + b.setWeight(search::query::Weight(200)); + b.setUniqueId(1); + c.setWeight(search::query::Weight(300)); + c.setUniqueId(2); + terms.push_back(QueryTerm(&a, 0.1)); + terms.push_back(QueryTerm(&b, 0.2)); + terms.push_back(QueryTerm(&c, 0.3)); + FtFeatureTest ft(_factory, "nativeProximity"); + FtQueryEnvironment & env = ft.getQueryEnv(); + env.getProperties().add("vespa.term.1.connexity", "0"); + env.getProperties().add("vespa.term.1.connexity", "0.8"); + env.getProperties().add("vespa.term.2.connexity", "1"); + env.getProperties().add("vespa.term.2.connexity", "0.6"); + { + NativeProximityExecutor::FieldSetup setup(0); + NativeProximityExecutor::TermPairVector & pairs = setup.pairs; + NativeProximityExecutor::generateTermPairs(env, terms, 0, setup); + EXPECT_EQUAL(pairs.size(), 0u); + NativeProximityExecutor::generateTermPairs(env, terms, 1, setup); + EXPECT_EQUAL(pairs.size(), 0u); + NativeProximityExecutor::generateTermPairs(env, terms, 2, setup); + EXPECT_EQUAL(pairs.size(), 2u); + EXPECT_TRUE(pairs[0].first.termData() == &a); + EXPECT_TRUE(pairs[0].second.termData() == &b); + EXPECT_EQUAL(pairs[0].connectedness, 0.8); + EXPECT_TRUE(pairs[1].first.termData() == &b); + EXPECT_TRUE(pairs[1].second.termData() == &c); + EXPECT_EQUAL(pairs[1].connectedness, 0.6); + EXPECT_EQUAL(setup.divisor, 118); // (10 + 40)*0.8 + (40 + 90)*0.6 + + pairs.clear(); + setup.divisor = 0; + + NativeProximityExecutor::generateTermPairs(env, terms, 3, setup); + EXPECT_EQUAL(pairs.size(), 3u); + EXPECT_TRUE(pairs[0].first.termData() == &a); + EXPECT_TRUE(pairs[0].second.termData() == &b); + EXPECT_EQUAL(pairs[0].connectedness, 0.8); + EXPECT_TRUE(pairs[1].first.termData() == &a); + EXPECT_TRUE(pairs[1].second.termData() == &c); + EXPECT_EQUAL(pairs[1].connectedness, 0.3); + EXPECT_TRUE(pairs[2].first.termData() == &b); + EXPECT_TRUE(pairs[2].second.termData() == &c); + EXPECT_EQUAL(pairs[2].connectedness, 0.6); + EXPECT_EQUAL(setup.divisor, 148); // (10 + 40)*0.8 + (10 + 90)*0.3 + (40 + 90)*0.6 + + pairs.clear(); + setup.divisor = 0; + a.setWeight(search::query::Weight(0)); + b.setWeight(search::query::Weight(0)); + + // test that (ab) is filtered away + NativeProximityExecutor::generateTermPairs(env, terms, 2, setup); + EXPECT_EQUAL(pairs.size(), 1u); + EXPECT_TRUE(pairs[0].first.termData() == &b); + EXPECT_TRUE(pairs[0].second.termData() == &c); + EXPECT_EQUAL(pairs[0].connectedness, 0.6); + } + } + + { // test executor + // 1 pair (only forward) + EXPECT_TRUE(assertNativeProximity(0, "a", "a")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a")); + EXPECT_TRUE(assertNativeProximity(5, "a b", "a b")); + EXPECT_TRUE(assertNativeProximity(1, "a b", "a x x x x b")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x b")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x x b")); + EXPECT_TRUE(assertNativeProximity(5, "a b", "a x x a x a a b")); + EXPECT_TRUE(assertNativeProximity(5, "b a", "a x x a x a a b")); + + // 1 pair (both forward and backward) + EXPECT_TRUE(assertNativeProximity(10, "a b", "a b a")); + EXPECT_TRUE(assertNativeProximity(10, "b a", "a b a")); + EXPECT_TRUE(assertNativeProximity(10, "a a", "a a")); // term distance 1 + EXPECT_TRUE(assertNativeProximity(6, "a a", "a x x a")); // term distance 3 + EXPECT_TRUE(assertNativeProximity(9, "a b", "a x x x x x b x x x x a x x x b x x a x b a")); + EXPECT_TRUE(assertNativeProximity(9, "b a", "a x x x x x b x x x x a x x x b x x a x b a")); + + // 2 pairs ((ab),(bc)) + EXPECT_TRUE(assertNativeProximity(5, "a b c", "a b c")); + EXPECT_TRUE(assertNativeProximity(10, "a b c", "a b c b a")); + + // change weight + EXPECT_TRUE(assertNativeProximity(4, "a b c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(4.2, "a!200 b c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.8, "a b c!200", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(4.333, "a b c!0", "a b x x c")); // ((100+100)*5 + (100+0)*3) / 300 + EXPECT_TRUE(assertNativeProximity(5, "a b!0 c!0", "a b x x c")); // ((100+0)*5 + (0+0)*3) / 100 + EXPECT_TRUE(assertNativeProximity(0, "a!0 b!0", "a b")); + + // change significance + EXPECT_TRUE(assertNativeProximity(4.692, "a%1 b%0.1 c%0.1", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.308, "a%0.1 b%0.1 c%1", "a b x x c")); + + // change connectedness + EXPECT_TRUE(assertNativeProximity(4, "a 1:b 1:c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.667, "a 0.5:b 1:c", "a b x x c")); // (5*0.5 + 3*1) / (0.5 + 1) + + // change proximityImportance + Properties p = Properties().add("nativeProximity.proximityImportance", "1"); + EXPECT_TRUE(assertNativeProximity(10, "a b", "a b x x x a", p)); + p.clear().add("nativeProximity.proximityImportance", "0"); + EXPECT_TRUE(assertNativeProximity(4, "a b", "a b x x x a", p)); + + // use table normalization + p.clear().add("nativeRank.useTableNormalization", "true"); + // norm factor = (10*0.5 + 10*0.5) = 10 + EXPECT_TRUE(assertNativeProximity(0.5, "a b", "a b", p)); + EXPECT_TRUE(assertNativeProximity(0.5, "a b c", "a b c", p)); + EXPECT_TRUE(assertNativeProximity(1, "a b", "a b a", p)); + EXPECT_TRUE(assertNativeProximity(1, "a b c", "a b c b a", p)); + p.add("nativeProximity.proximityTable", "linear(0,0)"); + p.add("nativeProximity.reverseProximityTable", "linear(0,0)"); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p)); + + // change field weight + p.clear().add("vespa.fieldweight.foo", "0"); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p)); + + // change docId to give 0 hits + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p.clear(), 2)); + } +} + +bool +Test::assertNativeProximity(feature_t score, + const vespalib::string & query, + const vespalib::string & field, + const Properties & props, + uint32_t docId) +{ + LOG(info, "assertNativeProximity(%f, '%s', '%s')", score, query.c_str(), field.c_str()); + + // Setup feature test. + vespalib::string feature = "nativeProximity"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(6))); + ft.getIndexEnv().getProperties().add("nativeProximity.proximityTable", "linear(-2,10)"); + ft.getIndexEnv().getProperties().add("nativeProximity.reverseProximityTable", "linear(-2,10)"); + ft.getIndexEnv().getProperties().add("nativeProximity.slidingWindowSize", "2"); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) { + return false; + } + return true; +} + +void +Test::testNativeRank() +{ + { // test blueprint + NativeRankBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeRank")); + + FtFeatureTest ft(_factory, ""); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("foo")); // field 'foo' not found + params.clear(); + + { + FT_SETUP_OK(pt, params, in.add("nativeFieldMatch").add("nativeProximity").add("nativeAttributeMatch"), + out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast(bp.get()))->getParams(); + EXPECT_EQUAL(pas.fieldMatchWeight, 100u); + EXPECT_EQUAL(pas.attributeMatchWeight, 100u); + EXPECT_EQUAL(pas.proximityWeight, 25u); + } + { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeRank.useTableNormalization", "false"); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast(bp.get()))->getParams(); + EXPECT_EQUAL(pas.proximityWeight, 100u); + p.clear(); + } + { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeRank.fieldMatchWeight", "200"); + p.add("nativeRank.attributeMatchWeight", "300"); + p.add("nativeRank.proximityWeight", "400"); + FT_SETUP_OK(pt, params, in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast(bp.get()))->getParams(); + EXPECT_EQUAL(pas.fieldMatchWeight, 200u); + EXPECT_EQUAL(pas.attributeMatchWeight, 300u); + EXPECT_EQUAL(pas.proximityWeight, 400u); + } + + FT_DUMP(_factory, "nativeRank", ft.getIndexEnv(), StringList().add("nativeRank")); + + { // test optimizations when weight == 0 + Properties & p = ft.getIndexEnv().getProperties(); + p.clear(); + p.add("nativeRank.fieldMatchWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("value(0)").add("nativeProximity").add("nativeAttributeMatch"), out); + p.add("nativeRank.proximityWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch"), out); + p.add("nativeRank.attributeMatchWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, in.clear().add("value(0)").add("value(0)").add("value(0)"), out); + } + { // nativeRank for a subset of fields + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("bar"), in, out); + ft.getIndexEnv().getProperties().clear(); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("nativeFieldMatch(foo)").add("nativeProximity(foo)").add("nativeAttributeMatch(bar)"), out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo").add("baz"), + in.clear().add("nativeFieldMatch(foo,baz)").add("nativeProximity(foo,baz)").add("value(0)"), out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), + in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch(bar)"), out); + } + } + + { // test executor + assertNativeRank(60, 1, 1, 1); + assertNativeRank(72, 3, 1, 1); + assertNativeRank(37.5, 0, 1, 3); + } +} + +bool +Test::assertNativeRank(feature_t score, + feature_t fieldMatchWeight, + feature_t attributeMatchWeight, + feature_t proximityWeight) +{ + LOG(info, "assertNativeRank(%f, %f, %f, %f)", score, fieldMatchWeight, attributeMatchWeight, proximityWeight); + + // Setup feature test. + vespalib::string feature = "nativeRank"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getProperties().add("nativeRank.fieldMatchWeight", + vespalib::make_string("%f", fieldMatchWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.attributeMatchWeight", + vespalib::make_string("%f", attributeMatchWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.proximityWeight", + vespalib::make_string("%f", proximityWeight)); + + ft.getOverrides().add("nativeFieldMatch", "90"); + ft.getOverrides().add("nativeAttributeMatch", "60"); + ft.getOverrides().add("nativeProximity", "30"); + + if (!EXPECT_TRUE(ft.setup())) { + return false; + } + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS))) { + return false; + } + return true; +} + + + +int +Test::Main() +{ + TEST_INIT("nativerank_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + testNativeFieldMatch(); + testNativeAttributeMatch(); + testNativeProximity(); + testNativeRank(); + + TEST_DONE(); + return 0; +} + +} +} + +TEST_APPHOOK(search::features::Test); + diff --git a/searchlib/src/tests/nearsearch/.gitignore b/searchlib/src/tests/nearsearch/.gitignore new file mode 100644 index 00000000000..c6c72b1cd87 --- /dev/null +++ b/searchlib/src/tests/nearsearch/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +nearsearch_test +searchlib_nearsearch_test_app diff --git a/searchlib/src/tests/nearsearch/CMakeLists.txt b/searchlib/src/tests/nearsearch/CMakeLists.txt new file mode 100644 index 00000000000..a526a059a3d --- /dev/null +++ b/searchlib/src/tests/nearsearch/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_nearsearch_test_app + SOURCES + nearsearch_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_nearsearch_test_app COMMAND searchlib_nearsearch_test_app) diff --git a/searchlib/src/tests/nearsearch/DESC b/searchlib/src/tests/nearsearch/DESC new file mode 100644 index 00000000000..1af96b6ab4c --- /dev/null +++ b/searchlib/src/tests/nearsearch/DESC @@ -0,0 +1 @@ +nearsearch test. Take a look at nearsearch.cpp for details. diff --git a/searchlib/src/tests/nearsearch/FILES b/searchlib/src/tests/nearsearch/FILES new file mode 100644 index 00000000000..e8ff3e62114 --- /dev/null +++ b/searchlib/src/tests/nearsearch/FILES @@ -0,0 +1 @@ +nearsearch.cpp diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp new file mode 100644 index 00000000000..9942bcecd4a --- /dev/null +++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp @@ -0,0 +1,247 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("nearsearch_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// Utilities +// +//////////////////////////////////////////////////////////////////////////////// + +class UIntList : public std::set { +public: + UIntList &add(uint32_t i) { + std::set::insert(i); + return *this; + } +}; + +class MyTerm { +private: + std::set _docs; + std::set _data; + +public: + MyTerm(const std::set &doc, + const std::set &pos) + : _docs(doc), + _data(pos) + { + } + + search::queryeval::Blueprint::UP + make_blueprint(uint32_t fieldId, search::fef::TermFieldHandle handle) const + { + search::queryeval::FakeResult result; + for (std::set::const_iterator doc = _docs.begin(); + doc != _docs.end(); ++doc) + { + result.doc(*doc); + for (std::set::const_iterator pos = _data.begin(); + pos != _data.end(); ++pos) + { + result.pos(*pos); + } + } + return search::queryeval::Blueprint::UP( + new search::queryeval::FakeBlueprint( + search::queryeval::FieldSpec("", fieldId, handle), + result)); + } +}; + +class MyQuery { +private: + std::vector _terms; + bool _ordered; + uint32_t _window; + +public: + MyQuery(bool ordered, uint32_t window) : + _terms(), + _ordered(ordered), + _window(window) { + // empty + } + + MyQuery &addTerm(MyTerm &term) { + _terms.push_back(&term); + return *this; + } + + uint32_t getNumTerms() const { + return _terms.size(); + } + + MyTerm &getTerm(uint32_t i) { + return *_terms[i]; + } + + bool isOrdered() const { + return _ordered; + } + + uint32_t getWindow() const { + return _window; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// Setup +// +//////////////////////////////////////////////////////////////////////////////// + +class Test : public vespalib::TestApp { +private: + bool testNearSearch(MyQuery &query, uint32_t matchId); + +public: + int Main(); + void testBasicNear(); + void testRepeatedTerms(); +}; + +int +Test::Main() +{ + TEST_INIT("nearsearch_test"); + + testBasicNear(); TEST_FLUSH(); + testRepeatedTerms(); TEST_FLUSH(); + + TEST_DONE(); +} + +TEST_APPHOOK(Test); + +//////////////////////////////////////////////////////////////////////////////// +// +// Tests +// +//////////////////////////////////////////////////////////////////////////////// + +void +Test::testBasicNear() +{ + MyTerm foo(UIntList().add(69), + UIntList().add(6).add(11)); + for (uint32_t i = 0; i <= 1; ++i) { + TEST_STATE(vespalib::make_string("i = %u", i).c_str()); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo), 69)); + } + + MyTerm bar(UIntList().add(68).add(69).add(70), + UIntList().add(7).add(10)); + TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(bar), 0)); + for (uint32_t i = 1; i <= 2; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar), 69)); + } + + MyTerm baz(UIntList().add(69).add(70).add(71), + UIntList().add(8).add(9)); + for (uint32_t i = 0; i <= 1; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 0)); + } + for (uint32_t i = 2; i <= 3; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 69)); + } +} + +void +Test::testRepeatedTerms() +{ + MyTerm foo(UIntList().add(69), + UIntList().add(1).add(2).add(3)); + TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(foo), 0)); + for (uint32_t i = 1; i <= 2; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo), 69)); + } + + for (uint32_t i = 0; i <= 1; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 0)); + } + for (uint32_t i = 2; i <= 3; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + } +} + +bool +Test::testNearSearch(MyQuery &query, uint32_t matchId) +{ + LOG(info, "testNearSearch(%d)", matchId); + search::queryeval::IntermediateBlueprint *near_b = 0; + if (query.isOrdered()) { + near_b = new search::queryeval::ONearBlueprint(query.getWindow()); + } else { + near_b = new search::queryeval::NearBlueprint(query.getWindow()); + } + search::queryeval::Blueprint::UP bp(near_b); + search::fef::MatchDataLayout layout; + for (uint32_t i = 0; i < query.getNumTerms(); ++i) { + uint32_t fieldId = 0; + layout.allocTermField(fieldId); + near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i)); + } + search::fef::MatchData::UP md(layout.createMatchData()); + + bp->fetchPostings(true); + search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true); + near->initFullRange(); + bool foundMatch = false; + for (near->seek(1u); ! near->isAtEnd(); near->seek(near->getDocId() + 1)) { + uint32_t docId = near->getDocId(); + if (docId == matchId) { + foundMatch = true; + } else { + LOG(info, "Document %d matched unexpectedly.", docId); + return false; + } + } + if (matchId == 0) { + return EXPECT_TRUE(!foundMatch); + } else { + return EXPECT_TRUE(foundMatch); + } +} diff --git a/searchlib/src/tests/postinglistbm/.gitignore b/searchlib/src/tests/postinglistbm/.gitignore new file mode 100644 index 00000000000..ac71dde13e2 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/.gitignore @@ -0,0 +1,10 @@ +*.core +*.ilk +*.pdb +.depend +Makefile +core +core.* +postinglistbm +postinglistbm.exe +searchlib_postinglistbm_app diff --git a/searchlib/src/tests/postinglistbm/CMakeLists.txt b/searchlib/src/tests/postinglistbm/CMakeLists.txt new file mode 100644 index 00000000000..403c12da1b1 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglistbm_app + SOURCES + postinglistbm.cpp + andstress.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_postinglistbm_app NO_VALGRIND COMMAND searchlib_postinglistbm_app -q -a) diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/andstress.cpp new file mode 100644 index 00000000000..f3fabde0d61 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/andstress.cpp @@ -0,0 +1,536 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".andstress"); +#include "andstress.h" +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using namespace search::fakedata; + +namespace postinglistbm +{ + +class AndStressWorker; + +class AndStressMaster +{ +private: + AndStressMaster(const AndStressMaster &); + + AndStressMaster & + operator=(const AndStressMaster &); + + search::Rand48 &_rnd; + unsigned int _numDocs; + unsigned int _commonDocFreq; + std::vector _postingTypes; + unsigned int _loops; + unsigned int _skipCommonPairsRate; + uint32_t _stride; + bool _unpack; + + FastOS_ThreadPool *_threadPool; + std::vector _workers; + unsigned int _workersDone; + + FakeWordSet &_wordSet; + + std::vector > _postings; + + FastOS_Cond _taskCond; + unsigned int _taskIdx; + uint32_t _numTasks; + +public: + typedef std::pair Task; +private: + std::vector _tasks; +public: + AndStressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector &postingType, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); + + ~AndStressMaster(void); + + void + run(void); + + void + makePostingsHelper(FPFactory *postingFactory, + const std::string &postingFormat, + bool validate, bool verbose); + + void + dropPostings(void); + + void + dropTasks(void); + + void + resetTasks(void); // Prepare for rerun + + void + setupTasks(unsigned int numTasks); + + Task * + getTask(void); + + unsigned int + getNumDocs(void) const + { + return _numDocs; + } + + bool + getUnpack(void) const + { + return _unpack; + } + + double + runWorkers(const std::string &postingFormat); +}; + + +class AndStressWorker : public FastOS_Runnable +{ +private: + AndStressWorker(const AndStressWorker &); + + AndStressWorker & + operator=(const AndStressWorker &); + + AndStressMaster &_master; + unsigned int _id; +public: + AndStressWorker(AndStressMaster &master, unsigned int id); + + ~AndStressWorker(void); + + virtual void + Run(FastOS_ThreadInterface *thisThread, void *arg); +}; + + +template +FakePosting * +makePosting(FakeWord &fw) +{ + return new P(fw); +} + + +AndStressMaster::AndStressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) + : _rnd(rnd), + _numDocs(numDocs), + _commonDocFreq(commonDocFreq), + _postingTypes(postingTypes), + _loops(loops), + _skipCommonPairsRate(skipCommonPairsRate), + _stride(stride), + _unpack(unpack), + _threadPool(NULL), + _workers(), + _workersDone(0), + _wordSet(wordSet), + _postings(FakeWordSet::NUM_WORDCLASSES), + _taskCond(), + _taskIdx(0), + _numTasks(numTasks), + _tasks() +{ + LOG(info, "AndStressMaster::AndStressMaster"); + + _threadPool = new FastOS_ThreadPool(128 * 1024, 400); +} + +template +static void +clearPtrVector(std::vector &v) +{ + for (unsigned int i = 0; i < v.size(); ++i) + delete v[i]; + v.clear(); +} + + +AndStressMaster::~AndStressMaster(void) +{ + LOG(info, "AndStressMaster::~AndStressMaster"); + + _threadPool->Close(); + delete _threadPool; + _threadPool = NULL; + clearPtrVector(_workers); + dropPostings(); +} + + +void +AndStressMaster::dropPostings(void) +{ + for (unsigned int i = 0; i < _postings.size(); ++i) + _postings[i].clear(); + dropTasks(); +} + + +void +AndStressMaster::dropTasks(void) +{ + _tasks.clear(); + _taskIdx = 0; +} + + +void +AndStressMaster::resetTasks(void) +{ + _taskIdx = 0; +} + + +static void +makeSomePostings(FPFactory *postingFactory, + std::vector &w, + std::vector &p, + uint32_t stride, + bool validate, + bool verbose) +{ + for (unsigned int i = 0; i < w.size(); ++i) { + FakePosting::SP np(postingFactory->make(*w[i])); + if (validate) { + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + + std::unique_ptr sb(np->createIterator(tfmda)); + if (np->hasWordPositions()) { + if (stride != 0) + w[i]->validate(sb.get(), tfmda, stride, verbose); + else + w[i]->validate(sb.get(), tfmda, verbose); + } else + w[i]->validate(sb.get(), verbose); + } + p.push_back(np); + } +} + +void +AndStressMaster::makePostingsHelper(FPFactory *postingFactory, + const std::string &postingFormat, + bool validate, bool verbose) +{ + FastOS_Time tv; + double before; + double after; + + tv.SetNow(); + before = tv.Secs(); + postingFactory->setup(_wordSet); + for (unsigned int i = 0; i < _wordSet._words.size(); ++i) + makeSomePostings(postingFactory, + _wordSet._words[i], _postings[i], + _stride, + validate, + verbose); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "AndStressMaster::makePostingsHelper elapsed %10.6f s for %s format", + after - before, + postingFormat.c_str()); +} + + +void +AndStressMaster::setupTasks(unsigned int numTasks) +{ + unsigned int wordclass1; + unsigned int wordclass2; + unsigned int word1idx; + unsigned int word2idx; + + for (unsigned int i = 0; i < numTasks; ++i) { + wordclass1 = _rnd.lrand48() % _postings.size(); + wordclass2 = _rnd.lrand48() % _postings.size(); + while (wordclass1 == FakeWordSet::COMMON_WORD && + wordclass2 == FakeWordSet::COMMON_WORD && + (_rnd.lrand48() % _skipCommonPairsRate) != 0) { + wordclass1 = _rnd.lrand48() % _postings.size(); + wordclass2 = _rnd.lrand48() % _postings.size(); + } + word1idx = _rnd.lrand48() % _postings[wordclass1].size(); + word2idx = _rnd.lrand48() % _postings[wordclass2].size(); + FakePosting::SP p1 = _postings[wordclass1][word1idx]; + FakePosting::SP p2 = _postings[wordclass2][word2idx]; + _tasks.push_back(std::make_pair(p1.get(), p2.get())); + } +} + + +AndStressMaster::Task * +AndStressMaster::getTask(void) +{ + Task *result = NULL; + _taskCond.Lock(); + if (_taskIdx < _tasks.size()) { + result = &_tasks[_taskIdx]; + ++_taskIdx; + } else { + _workersDone++; + if (_workersDone == _workers.size()) + _taskCond.Broadcast(); + } + _taskCond.Unlock(); + return result; +} + +void +AndStressMaster::run(void) +{ + LOG(info, "AndStressMaster::run"); + + std::vector::const_iterator pti; + std::vector::const_iterator ptie = _postingTypes.end() ; + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + std::unique_ptr ff(getFPFactory(*pti, _wordSet.getSchema())); + makePostingsHelper(ff.get(), *pti, true, false); + setupTasks(_numTasks); + double totalTime = 0; + for (unsigned int loop = 0; loop < _loops; ++loop) { + totalTime += runWorkers(*pti); + resetTasks(); + } + LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format", + totalTime / _loops, pti->c_str()); + dropPostings(); + } + FastOS_Thread::Sleep(250); +} + + +double +AndStressMaster::runWorkers(const std::string &postingFormat) +{ + FastOS_Time tv; + double before; + double after; + + tv.SetNow(); + before = tv.Secs(); + unsigned int numWorkers = 8; + for (unsigned int i = 0; i < numWorkers; ++i) + _workers.push_back(new AndStressWorker(*this, i)); + + for (unsigned int i = 0; i < _workers.size(); ++i) + _threadPool->NewThread(_workers[i]); + _taskCond.Lock(); + while (_workersDone < _workers.size()) + _taskCond.Wait(); + _taskCond.Unlock(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "AndStressMaster::run elapsed %10.6f s for workers %s format", + after - before, + postingFormat.c_str()); + clearPtrVector(_workers); + _workersDone = 0; + return after - before; +} + + +AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id) + : _master(master), + _id(id) +{ + LOG(debug, "AndStressWorker::AndStressWorker, id=%u", id); +} + +AndStressWorker::~AndStressWorker(void) +{ + LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id); +} + + +static int +highLevelAndPairPostingScan(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScanUnpack(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, + uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + sb1.unpack(docId); + sb2.unpack(docId); + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + +void +testFakePair(FakePosting &f1, FakePosting &f2, unsigned int numDocs, + bool unpack) +{ + TermFieldMatchData md1; + TermFieldMatchDataArray tfmda1; + tfmda1.add(&md1); + std::unique_ptr sb1(f1.createIterator(tfmda1)); + + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda1.add(&md2); + std::unique_ptr sb2(f2.createIterator(tfmda2)); + + int hits = 0; + uint64_t scanUnpackTime = 0; + if (unpack) + hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), + numDocs, &scanUnpackTime); + else + hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), + numDocs, &scanUnpackTime); +#if 0 + printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", + f1.getName().c_str(), + f2.getName().c_str(), + hits, + scanUnpackTime); +#else + (void)hits; +#endif +} + +void +AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) +{ + (void) thisThread; + (void) arg; + LOG(debug, "AndStressWorker::Run, id=%u", _id); + + bool unpack = _master.getUnpack(); + for (;;) { + AndStressMaster::Task *task = _master.getTask(); + if (task == NULL) + break; + testFakePair(*task->first, *task->second, _master.getNumDocs(), + unpack); + } +} + + +AndStress::AndStress(void) +{ + LOG(debug, "Andstress::AndStress"); +} + + +AndStress::~AndStress(void) +{ + LOG(debug, "Andstress::~AndStress"); +} + +void +AndStress::run(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) +{ + LOG(debug, "Andstress::run"); + AndStressMaster master(rnd, wordSet, + numDocs, commonDocFreq, postingTypes, loops, + skipCommonPairsRate, + numTasks, + stride, + unpack); + master.run(); +} + +} diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h new file mode 100644 index 00000000000..458866b09d5 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/andstress.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + + +#include +namespace search +{ +class Rand48; + +namespace fakedata +{ + +class FakeWordSet; + +} + +} + +namespace postinglistbm +{ + +class AndStress +{ +public: + AndStress(void); + + ~AndStress(void); + + void + run(search::Rand48 &rnd, + search::fakedata::FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); +}; + +} // namespace postinglistbm + diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp new file mode 100644 index 00000000000..fc93eb42dcd --- /dev/null +++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp @@ -0,0 +1,491 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP("postinglistbm"); +#include +#include +#include +#include "andstress.h" +#include +#include +#include +#include +#include + +using search::ResultSet; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using search::index::Schema; +using namespace search::fakedata; + +// needed to resolve external symbol from httpd.h on AIX +void FastS_block_usr2() {} + + +namespace postinglistbm +{ + +class PostingListBM : public FastOS_Application +{ +private: + bool _verbose; + uint32_t _numDocs; + uint32_t _commonDocFreq; + uint32_t _numWordsPerClass; + std::vector _postingTypes; + uint32_t _loops; + unsigned int _skipCommonPairsRate; + FakeWordSet _wordSet; + uint32_t _stride; + bool _unpack; +public: + search::Rand48 _rnd; + +private: + void Usage(void); + + void + badPostingType(const std::string &postingType); + + void + testFake(const std::string &postingType, + const Schema &schema, + const FakeWord &fw); +public: + PostingListBM(void); + ~PostingListBM(void); + int Main(void); +}; + + +void +PostingListBM::Usage(void) +{ + printf("postinglistbm " + "[-C ] " + "[-a] " + "[-c ] " + "[-d ] " + "[-l ] " + "[-s ] " + "[-t ] " + "[-u] " + "[-q] " + "[-v]\n"); +} + + +void +PostingListBM::badPostingType(const std::string &postingType) +{ + printf("Bad posting list type: %s\n", postingType.c_str()); + printf("Supported types: "); + + std::vector postingTypes = getPostingTypes(); + std::vector::const_iterator pti; + std::vector::const_iterator ptie = postingTypes.end(); + bool first = true; + + for (pti = postingTypes.begin(); pti != ptie; ++pti) { + if (first) + first = false; + else + printf(", "); + printf("%s", pti->c_str()); + } + printf("\n"); +} + + +PostingListBM::PostingListBM(void) + : _verbose(false), + _numDocs(10000000), + _commonDocFreq(50000), + _numWordsPerClass(100), + _postingTypes(), + _loops(1), + _skipCommonPairsRate(1), + _wordSet(), + _stride(0), + _unpack(false), + _rnd() +{ +} + + +PostingListBM::~PostingListBM(void) +{ +} + + +static int +highLevelSinglePostingScan(SearchIterator &sb, uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb.initFullRange(); + uint32_t docId = sb.getDocId(); + while (docId < numDocs) { + if (sb.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb.getDocId()) + docId= sb.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelSinglePostingScanUnpack(SearchIterator &sb, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb.initFullRange(); + uint32_t docId = sb.getDocId(); + while (docId < numDocs) { + if (sb.seek(docId)) { + ++hits; + sb.unpack(docId); + ++docId; + } else if (docId < sb.getDocId()) + docId= sb.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScan(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScanUnpack(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, + uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb1.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + sb1.unpack(docId); + sb2.unpack(docId); + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +void +PostingListBM::testFake(const std::string &postingType, + const Schema &schema, + const FakeWord &fw) +{ + std::unique_ptr ff(getFPFactory(postingType, schema)); + std::vector v; + v.push_back(&fw); + ff->setup(v); + FakePosting::SP f(ff->make(fw)); + + printf("%s.bitsize=%d+%d+%d+%d+%d\n", + f->getName().c_str(), + static_cast(f->bitSize()), + static_cast(f->l1SkipBitSize()), + static_cast(f->l2SkipBitSize()), + static_cast(f->l3SkipBitSize()), + static_cast(f->l4SkipBitSize())); + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + + std::unique_ptr sb(f->createIterator(tfmda)); + if (f->hasWordPositions()) + fw.validate(sb.get(), tfmda, _verbose); + else + fw.validate(sb.get(), _verbose); + uint64_t scanTime = 0; + uint64_t scanUnpackTime = 0; + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda2.add(&md2); + + std::unique_ptr sb2(f->createIterator(tfmda2)); + int hits1 = highLevelSinglePostingScan(*sb2.get(), fw.getDocIdLimit(), + &scanTime); + TermFieldMatchData md3; + TermFieldMatchDataArray tfmda3; + tfmda3.add(&md3); + + std::unique_ptr sb3(f->createIterator(tfmda3)); + int hits2 = highLevelSinglePostingScanUnpack(*sb3.get(), fw.getDocIdLimit(), + &scanUnpackTime); + printf("testFake '%s' hits1=%d, hits2=%d, scanTime=%" PRIu64 + ", scanUnpackTime=%" PRIu64 "\n", + f->getName().c_str(), + hits1, hits2, scanTime, scanUnpackTime); +} + + +void +testFakePair(const std::string &postingType, + const Schema &schema, + bool unpack, + const FakeWord &fw1, const FakeWord &fw2) +{ + std::unique_ptr ff(getFPFactory(postingType, schema)); + std::vector v; + v.push_back(&fw1); + v.push_back(&fw2); + ff->setup(v); + FakePosting::SP f1(ff->make(fw1)); + FakePosting::SP f2(ff->make(fw2)); + + TermFieldMatchData md1; + TermFieldMatchDataArray tfmda1; + tfmda1.add(&md1); + std::unique_ptr sb1(f1->createIterator(tfmda1)); + + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda1.add(&md2); + std::unique_ptr sb2(f2->createIterator(tfmda2)); + + int hits = 0; + uint64_t scanUnpackTime = 0; + if (unpack) + hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), + fw1.getDocIdLimit(), &scanUnpackTime); + else + hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), + fw1.getDocIdLimit(), &scanUnpackTime); + printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", + f1->getName().c_str(), + f2->getName().c_str(), + hits, + scanUnpackTime); +} + + +int +PostingListBM::Main(void) +{ + int argi; + char c; + const char *optArg; + bool doandstress; + + doandstress = false; + argi = 1; + bool hasElements = false; + bool hasElementWeights = false; + bool quick = false; + + while ((c = GetOpt("C:ac:d:l:s:t:uvw:T:q", optArg, argi)) != -1) { + switch(c) { + case 'C': + _skipCommonPairsRate = atoi(optArg); + break; + case 'T': + if (strcmp(optArg, "single") == 0) { + hasElements = false; + hasElementWeights = false; + } else if (strcmp(optArg, "array") == 0) { + hasElements = true; + hasElementWeights = false; + } else if (strcmp(optArg, "weightedSet") == 0) { + hasElements = true; + hasElementWeights = true; + } else { + printf("Bad collection type: %s\n", optArg); + return 1; + } + break; + case 'a': + doandstress = true; + break; + case 'c': + _commonDocFreq = atoi(optArg); + break; + case 'd': + _numDocs = atoi(optArg); + break; + case 'l': + _loops = atoi(optArg); + break; + case 's': + _stride = atoi(optArg); + break; + case 't': + do { + Schema schema; + Schema::IndexField indexField("field0", + Schema::STRING, + Schema::SINGLE); + schema.addIndexField(indexField); + std::unique_ptr ff(getFPFactory(optArg, schema)); + if (ff.get() == NULL) { + badPostingType(optArg); + return 1; + } + } while (0); + _postingTypes.push_back(optArg); + break; + case 'u': + _unpack = true; + break; + case 'v': + _verbose = true; + break; + case 'w': + _numWordsPerClass = atoi(optArg); + break; + case 'q': + quick = true; + _numDocs = 36000; + _commonDocFreq = 10000; + _numWordsPerClass = 5; + break; + default: + Usage(); + return 1; + } + } + + if (_commonDocFreq > _numDocs) { + Usage(); + return 1; + } + + _wordSet.setupParams(hasElements, hasElementWeights); + + uint32_t w1dfreq = 10; + uint32_t w4dfreq = 790000; + uint32_t w5dfreq = 290000; + uint32_t w4w5od = 100000; + uint32_t numTasks = 40000; + if (quick) { + w1dfreq = 2; + w4dfreq = 19000; + w5dfreq = 5000; + w4w5od = 1000; + numTasks = 40; + } + + + FakeWord word1(_numDocs, w1dfreq, w1dfreq / 2, "word1", _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word2(_numDocs, 1000, 500, "word2", word1, 4, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word3(_numDocs, _commonDocFreq, _commonDocFreq / 2, + "word3", word1, 10, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word4(_numDocs, w4dfreq, w4dfreq / 2, + "word4", _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word5(_numDocs, w5dfreq, w5dfreq / 2, + "word5", word4, w4w5od, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + + if (_postingTypes.empty()) + _postingTypes = getPostingTypes(); + std::vector::const_iterator pti; + std::vector::const_iterator ptie = _postingTypes.end() ; + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFake(*pti, _wordSet.getSchema(), word1); + testFake(*pti, _wordSet.getSchema(), word2); + testFake(*pti, _wordSet.getSchema(), word3); + } + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFakePair(*pti, _wordSet.getSchema(), false, word1, word3); + testFakePair(*pti, _wordSet.getSchema(), false, word2, word3); + } + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFakePair(*pti, _wordSet.getSchema(), false, word4, word5); + } + + if (doandstress) { + _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass); + } + if (doandstress) { + AndStress andstress; + andstress.run(_rnd, _wordSet, + _numDocs, _commonDocFreq, _postingTypes, _loops, + _skipCommonPairsRate, + numTasks, + _stride, + _unpack); + } + return 0; +} + +} // namespace postinglistbm + +int +main(int argc, char **argv) +{ + postinglistbm::PostingListBM app; + + setvbuf(stdout, NULL, _IOLBF, 32768); + app._rnd.srand48(32); + return app.Entry(argc, argv); + + return 0; +} diff --git a/searchlib/src/tests/postinglistbm/skip.txt b/searchlib/src/tests/postinglistbm/skip.txt new file mode 100644 index 00000000000..9804bce3c33 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/skip.txt @@ -0,0 +1,75 @@ +B tree view: + + Leaf Nodes: segments of docid delta list + Interior Nodes: Segments of skip info lists + + Interior Nodes 1 level above leaf nodes: L1 skip info + Interior Nodes 2 level above leaf nodes: L2 skip info + +Example posting list, with stride 4 for L1 skip and L2 skip: + +DocIdPos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 17 18 +DocId: 1 11 21 31|41 51 61 71|81 91 101 111|121 131 141 151|161 171 181 + +(Assume continued with every 10. docid present) + +Old L1 skip info, pointing to start of leaf nodes, with first docid in +leaf node pre-decoded (i.e. containing copy of first docid entry in leaf node): + +L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 +DocId: 41 81 121 161|201 241 281 321|361 401 441 481|521 561 601 641|681 +DocIdPos: 5 9 13 17| 21 25 29 33| 37 41 45 49| 53 57 61 65| 69 + +Old L2 skip info, pointing to start of interior nodes 1 level above leaf nodes +and containing copies of previous L1 skip entry: + +L2Pos: 0 1 2 3 +DocId: 161 321 481 641 +DocIdPos: 17 33 49 65 +L1Pos: 4 8 12 16 + +Reason for change of skip info view: Avoiding null skips, simplifying code. + +Skip from docId 1 to docId 115 first skips to DocId 81 before ending +up at DocId 121. If next seek is to below 161, a null skip to docid +121 is performed since docId delta unpacking caught up with supposedly +next L1 skip docid. With L1 skip stride being N, 1/N of longer seeks +will unpack N extra docids, eating up the advantage of first docid in +leaf node being pre-decoded. + +If a seek to docId 115 is followed by a seek to docId 121, an unpack +of docId 121 and a sek to a higher docid, this causes, with the old L1 +skip info, features for docId 81, 91 101, 111 to be decoded with the +result ignored before the features for docId 121 is decoded. For the +next seek, the null skip of DocId is also associated with a backwards +skip for features, so if the next feature to be decoded was for docId +141 then features for docId 121 will be decoded again and ignored. + +New L1 skip info, pointing to start of leaf nodes, without first docid +in leaf node pre-decoded (i.e. containing copy of last docid entry in +previous leaf node): + +L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 +DocId: 31 71 111 151|191 231 271 311|351 391 431 471|511 551 591 631|671 +DocIdPos: 4 8 12 16| 20 24 28 32| 36 40 44 48| 52 56 60 64| 68 + +New L2 skip info, pointing to start of interior nodes 1 level above leaf nodes +and containing copies of previous L1 skip entry: + +L2Pos: 0 1 2 3 +DocId: 151 311 471 631 +DocIdPos: 16 32 48 64 +L1Pos: 4 8 12 16 + +1 DocId delta is unpacked when using L1 or L2 skip, to get first docid +in leaf node. With old skip info, this wasn't needed. + +With new skip info, docid delta unpacking should never catch up with +next L1 skip docid (can become equal, but that's no longer sufficient +for triggering a skip). + +For each level upwards in skip info, one extra number is needed per element in +the skip info. + +For feature position (split docid/features), one extra number is needed per +element in the skip info. diff --git a/searchlib/src/tests/predicate/.gitignore b/searchlib/src/tests/predicate/.gitignore new file mode 100644 index 00000000000..eea4d347d05 --- /dev/null +++ b/searchlib/src/tests/predicate/.gitignore @@ -0,0 +1,13 @@ +searchlib_document_features_store_test_app +searchlib_predicate_bounds_posting_list_test_app +searchlib_predicate_index_test_app +searchlib_predicate_interval_posting_list_test_app +searchlib_predicate_interval_store_test_app +searchlib_predicate_range_term_expander_test_app +searchlib_predicate_ref_cache_test_app +searchlib_predicate_tree_analyzer_test_app +searchlib_predicate_tree_annotator_test_app +searchlib_predicate_zero_constraint_posting_list_test_app +searchlib_predicate_zstar_compressed_posting_list_test_app +searchlib_simple_index_test_app +searchlib_tree_crumbs_test_app diff --git a/searchlib/src/tests/predicate/CMakeLists.txt b/searchlib/src/tests/predicate/CMakeLists.txt new file mode 100644 index 00000000000..cd15356eeee --- /dev/null +++ b/searchlib/src/tests/predicate/CMakeLists.txt @@ -0,0 +1,92 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_predicate_index_test_app + SOURCES + predicate_index_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_index_test_app COMMAND searchlib_predicate_index_test_app) +vespa_add_executable(searchlib_simple_index_test_app + SOURCES + simple_index_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_simple_index_test_app COMMAND searchlib_simple_index_test_app) +vespa_add_executable(searchlib_tree_crumbs_test_app + SOURCES + tree_crumbs_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tree_crumbs_test_app COMMAND searchlib_tree_crumbs_test_app) +vespa_add_executable(searchlib_predicate_tree_analyzer_test_app + SOURCES + predicate_tree_analyzer_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_tree_analyzer_test_app COMMAND searchlib_predicate_tree_analyzer_test_app) +vespa_add_executable(searchlib_predicate_tree_annotator_test_app + SOURCES + predicate_tree_annotator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_tree_annotator_test_app COMMAND searchlib_predicate_tree_annotator_test_app) +vespa_add_executable(searchlib_predicate_interval_store_test_app + SOURCES + predicate_interval_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_interval_store_test_app COMMAND searchlib_predicate_interval_store_test_app) +vespa_add_executable(searchlib_document_features_store_test_app + SOURCES + document_features_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_features_store_test_app COMMAND searchlib_document_features_store_test_app) +vespa_add_executable(searchlib_predicate_ref_cache_test_app + SOURCES + predicate_ref_cache_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_ref_cache_test_app COMMAND searchlib_predicate_ref_cache_test_app) +vespa_add_executable(searchlib_predicate_interval_posting_list_test_app + SOURCES + predicate_interval_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_interval_posting_list_test_app COMMAND searchlib_predicate_interval_posting_list_test_app) +vespa_add_executable(searchlib_predicate_bounds_posting_list_test_app + SOURCES + predicate_bounds_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_bounds_posting_list_test_app COMMAND searchlib_predicate_bounds_posting_list_test_app) +vespa_add_executable(searchlib_predicate_zero_constraint_posting_list_test_app + SOURCES + predicate_zero_constraint_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_zero_constraint_posting_list_test_app COMMAND searchlib_predicate_zero_constraint_posting_list_test_app) +vespa_add_executable(searchlib_predicate_zstar_compressed_posting_list_test_app + SOURCES + predicate_zstar_compressed_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_zstar_compressed_posting_list_test_app COMMAND searchlib_predicate_zstar_compressed_posting_list_test_app) +vespa_add_executable(searchlib_predicate_range_term_expander_test_app + SOURCES + predicate_range_term_expander_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_range_term_expander_test_app COMMAND searchlib_predicate_range_term_expander_test_app) diff --git a/searchlib/src/tests/predicate/OWNERS b/searchlib/src/tests/predicate/OWNERS new file mode 100644 index 00000000000..569bf1cc3a1 --- /dev/null +++ b/searchlib/src/tests/predicate/OWNERS @@ -0,0 +1 @@ +bjorncs diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp new file mode 100644 index 00000000000..4baf2d03fbe --- /dev/null +++ b/searchlib/src/tests/predicate/document_features_store_test.cpp @@ -0,0 +1,225 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for document_features_store. + +#include +LOG_SETUP("document_features_store_test"); +#include + +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using std::string; + +namespace { + +const uint64_t hash1 = 0x12345678; +const uint64_t hash2 = 0x123456789a; +const uint32_t doc_id = 42; + +TEST("require that DocumentFeaturesStore can store features.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.features.push_back(hash1); + annotations.features.push_back(hash2); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + EXPECT_EQUAL(1u, features.count(hash1)); + EXPECT_EQUAL(1u, features.count(hash2)); + + features_store.remove(doc_id); + features = features_store.get(doc_id); + EXPECT_TRUE(features.empty()); +} + +template +void expectHash(const string &label, const Set &set) { + TEST_STATE(label.c_str()); + uint64_t hash = PredicateHash::hash64(label); + EXPECT_EQUAL(1u, set.count(hash)); +} + +TEST("require that DocumentFeaturesStore can store ranges.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 2, 4}); + annotations.range_features.push_back({"bar", 7, 13}); + annotations.range_features.push_back({"baz", 9, 19}); + annotations.range_features.push_back({"qux", -10, 10}); + annotations.range_features.push_back({"quux", -39, -10}); + annotations.range_features.push_back({"corge", -9, -1}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(13u, features.size()); + expectHash("foo=0", features); + + expectHash("bar=0", features); + expectHash("bar=10", features); + + expectHash("baz=0", features); + expectHash("baz=10-19", features); + + expectHash("qux=-10", features); + expectHash("qux=-9-0", features); + expectHash("qux=10", features); + expectHash("qux=0-9", features); + + expectHash("quux=-19-10", features); + expectHash("quux=-29-20", features); + expectHash("quux=-39-30", features); + + expectHash("corge=-9-0", features); +} + +TEST("require that DocumentFeaturesStore can store large ranges.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 10, 199}); + annotations.range_features.push_back({"bar", 100, 239}); + annotations.range_features.push_back({"baz", -999, 999}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(17u, features.size()); + expectHash("foo=10-19", features); + expectHash("foo=20-29", features); + expectHash("foo=30-39", features); + expectHash("foo=40-49", features); + expectHash("foo=50-59", features); + expectHash("foo=60-69", features); + expectHash("foo=70-79", features); + expectHash("foo=80-89", features); + expectHash("foo=90-99", features); + expectHash("foo=100-199", features); + + expectHash("bar=200-209", features); + expectHash("bar=210-219", features); + expectHash("bar=220-229", features); + expectHash("bar=230-239", features); + expectHash("bar=100-199", features); + + expectHash("baz=-999-0", features); + expectHash("baz=0-999", features); +} + +TEST("require that DocumentFeaturesStore can use very large ranges.") { + DocumentFeaturesStore features_store(2); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", LLONG_MIN, 39}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(4u, features.size()); + expectHash("foo=-9223372036854775808", features); + expectHash("foo=-9223372036854775807-0", features); + expectHash("foo=0-31", features); + expectHash("foo=32-39", features); +} + +TEST("require that duplicate range features are removed.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 80, 199}); + annotations.range_features.push_back({"foo", 85, 199}); + annotations.range_features.push_back({"foo", 90, 199}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(4u, features.size()); + expectHash("foo=80-89", features); + expectHash("foo=90-99", features); + expectHash("foo=100-199", features); + expectHash("foo=80", features); +} + +TEST("require that only unique features are returned") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(1u, features.size()); + expectHash("foo=100-199", features); +} + +TEST("require that both features and ranges are removed by 'remove'") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + features_store.remove(doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(0u, features.size()); +} + +TEST("require that both features and ranges counts towards memory usage") { + DocumentFeaturesStore features_store(10); + EXPECT_EQUAL(332u, features_store.getMemoryUsage().usedBytes()); + + PredicateTreeAnnotations annotations; + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + EXPECT_EQUAL(340u, features_store.getMemoryUsage().usedBytes()); + + annotations.features.clear(); + annotations.range_features.push_back({"foo", 100, 199}); + features_store.insert(annotations, doc_id + 1); + EXPECT_EQUAL(436u, features_store.getMemoryUsage().usedBytes()); +} + +TEST("require that DocumentFeaturesStore can be serialized") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=bar")); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + expectHash("foo=bar", features); + expectHash("foo=100-199", features); + + vespalib::MMapDataBuffer buffer; + features_store.serialize(buffer); + + DocumentFeaturesStore features_store2(buffer); + features = features_store2.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + expectHash("foo=bar", features); + expectHash("foo=100-199", features); +} + +TEST("require that serialization cleans up wordstore") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + features_store.insert(annotations, doc_id); + EXPECT_EQUAL(428u, features_store.getMemoryUsage().usedBytes()); + annotations.range_features.push_back({"bar", 100, 199}); + features_store.insert(annotations, doc_id + 1); + EXPECT_EQUAL(720u, features_store.getMemoryUsage().usedBytes()); + features_store.remove(doc_id + 1); + EXPECT_EQUAL(672u, features_store.getMemoryUsage().usedBytes()); + + vespalib::MMapDataBuffer buffer; + features_store.serialize(buffer); + DocumentFeaturesStore features_store2(buffer); + EXPECT_EQUAL(428u, features_store2.getMemoryUsage().usedBytes()); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp new file mode 100644 index 00000000000..c54e6f49cc7 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_bounds_posting_list. + +#include +LOG_SETUP("predicate_bounds_posting_list_test"); +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty bounds posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateBoundsPostingList + posting_list(index.getIntervalStore(), + index.getBoundsIndex().getBTreePostingList(ref), 42); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +void checkNext(PredicateBoundsPostingList &posting_list, uint32_t move_past, + uint32_t doc_id, uint32_t interval_count) { + std::ostringstream ost; + ost << "checkNext(posting_list, " << move_past << ", " << doc_id + << ", " << interval_count << ")"; + TEST_STATE(ost.str().c_str()); + ASSERT_TRUE(posting_list.next(move_past)); + EXPECT_EQUAL(doc_id, posting_list.getDocId()); + for (uint32_t i = 0; i < interval_count - 1; ++i) { + ASSERT_TRUE(posting_list.nextInterval()); + } + ASSERT_FALSE(posting_list.nextInterval()); +} + +TEST("require that bounds posting list checks bounds.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &bounds_index = index.getBoundsIndex(); + for (uint32_t id = 1; id < 100; ++id) { + PredicateTreeAnnotations annotations(id); + auto &vec = annotations.bounds_map[hash]; + for (uint32_t i = 0; i <= id; ++i) { + uint32_t bounds; + if (id < 30) { + bounds = 0x80000000 | i; // diff >= i + } else if (id < 60) { + bounds = 0x40000000 | i; // diff < i + } else { + bounds = (i << 16) | (i + 10); // i < diff < i+10 + } + vec.push_back(IntervalWithBounds{(i + 1) << 16 | 0xffff, bounds}); + } + index.indexDocument(id, annotations); + } + index.commit(); + auto it = bounds_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateBoundsPostingList + posting_list(index.getIntervalStore(), + bounds_index.getBTreePostingList(ref), 5); + checkNext(posting_list, 0, 1, 2); // [0..] -> [1..] + checkNext(posting_list, 1, 2, 3); // [0..] -> [2..] + checkNext(posting_list, 10, 11, 6); // [0..] -> [5..] + checkNext(posting_list, 20, 21, 6); + + checkNext(posting_list, 30, 31, 26); // [..5] -> [..30] + checkNext(posting_list, 50, 51, 46); + + checkNext(posting_list, 60, 61, 6); // [0..10] -> [5..15] + + + PredicateBoundsPostingList + posting_list2(index.getIntervalStore(), + bounds_index.getBTreePostingList(ref), 40); + checkNext(posting_list2, 0, 1, 2); + checkNext(posting_list2, 1, 2, 3); + checkNext(posting_list2, 20, 21, 22); // [0..] -> [21..] + + checkNext(posting_list2, 30, 41, 1); // skip ahead to match + checkNext(posting_list2, 35, 41, 1); + checkNext(posting_list2, 50, 51, 11); // [..40] -> [..50] + + checkNext(posting_list2, 60, 61, 10); // [31..40] -> [40..49] +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp new file mode 100644 index 00000000000..b22c80294d0 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -0,0 +1,363 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_index. + +#include +LOG_SETUP("predicate_index_test"); +#include + +#include +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using std::make_pair; +using std::pair; +using std::vector; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider dummy_provider; +SimpleIndexConfig simple_index_config; + +TEST("require that PredicateIndex can index empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); +} + +TEST("require that indexDocument don't index empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + PredicateTreeAnnotations annotations; + index.indexDocument(3, annotations); + index.commit(); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); +} + +TEST("require that PredicateIndex can remove empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); + index.removeDocument(2); + index.commit(); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); +} + +TEST("require that indexing the same empty document multiple times is ok") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); +} + +void indexFeature(PredicateIndex &attr, uint32_t doc_id, int min_feature, + const vector> &intervals, + const vector> &bounds) { + PredicateTreeAnnotations annotations(min_feature); + for (auto &p : intervals) { + annotations.interval_map[p.first] = std::vector{{p.second}}; + annotations.features.push_back(p.first); + } + for (auto &p : bounds) { + annotations.bounds_map[p.first] = + std::vector{{p.second}}; + annotations.features.push_back(p.first); + } + attr.indexDocument(doc_id, annotations); +} + +PredicateIndex::BTreeIterator +lookupPosting(const PredicateIndex &index, uint64_t hash) { + const auto &interval_index = index.getIntervalIndex(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto entry = it.getData(); + EXPECT_TRUE(entry.valid()); + + auto posting_it = interval_index.getBTreePostingList(entry); + ASSERT_TRUE(posting_it.valid()); + return posting_it; +} + +const int min_feature = 3; +const int k = min_feature - 1; +const uint32_t doc_id = 2; +const uint64_t hash = 0x12345; +const uint64_t hash2 = 0x3456; +const Interval interval = {0x0001ffff}; +const IntervalWithBounds bounds = {0x0001ffff, 0x03}; +Interval single_buf; + +TEST("require that PredicateIndex can index document") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, {{hash, interval}}, {}); + index.commit(); + + auto posting_it = lookupPosting(index, hash); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + uint32_t size; + const auto &interval_list = + index.getIntervalStore().get(posting_it.getData(), size, &single_buf); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(interval, interval_list[0]); +} + +TEST("require that PredicateIndex can index document with bounds") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, {}, {{hash, bounds}}); + index.commit(); + + const auto &bounds_index = index.getBoundsIndex(); + auto it = bounds_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto entry = it.getData(); + EXPECT_TRUE(entry.valid()); + + auto posting_it = bounds_index.getBTreePostingList(entry); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + + uint32_t size; + IntervalWithBounds single; + const auto &interval_list = + index.getIntervalStore().get(posting_it.getData(), size, &single); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(bounds, interval_list[0]); +} + +TEST("require that PredicateIndex can index multiple documents " + "with the same feature") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, min_feature, {{hash, interval}}, {}); + } + index.commit(); + + auto posting_it = lookupPosting(index, hash); + for (uint32_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + uint32_t size; + const auto &interval_list = index.getIntervalStore().get( + posting_it.getData(), size, &single_buf); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(interval, interval_list[0]); + ++posting_it; + } + ASSERT_FALSE(posting_it.valid()); +} + +TEST("require that PredicateIndex can remove indexed documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + index.removeDocument(doc_id); + index.commit(); + auto it = index.getIntervalIndex().lookup(hash); + ASSERT_FALSE(it.valid()); + auto it2 = index.getBoundsIndex().lookup(hash2); + ASSERT_FALSE(it2.valid()); + + // Remove again. Nothing should happen. + index.removeDocument(doc_id); +} + +TEST("require that PredicateIndex can remove multiple documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + const auto &interval_index = index.getIntervalIndex(); + EXPECT_FALSE(interval_index.lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, min_feature, {{hash, interval}}, {}); + } + index.commit(); + for (uint32_t id = 1; id < 110; ++id) { + index.removeDocument(id); + index.commit(); + auto it = interval_index.lookup(hash); + if (id < 99) { + ASSERT_TRUE(it.valid()); + } else { + ASSERT_FALSE(it.valid()); + } + } +} + +TEST("require that PredicateIndex can remove multiple documents with " + "multiple features") { + vector> intervals; + vector> bounds_intervals; + for (int i = 0; i < 100; ++i) { + intervals.push_back(make_pair(hash + i, interval)); + bounds_intervals.push_back(make_pair(hash2 + i, bounds)); + } + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + const auto &interval_index = index.getIntervalIndex(); + EXPECT_FALSE(interval_index.lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, id, intervals, bounds_intervals); + } + index.commit(); + for (uint32_t id = 1; id < 100; ++id) { + index.removeDocument((id + 50) % 99 + 1); + index.commit(); + auto it = interval_index.lookup(hash); + if (id < 99) { + ASSERT_TRUE(it.valid()); + } else { + ASSERT_FALSE(it.valid()); + } + } +} + +// Helper function for next test. +template +void checkAllIntervals(Iterator posting_it, IntervalT expected_interval, + const PredicateIntervalStore &interval_store) { + for (uint32_t id = 1; id < 100u; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + btree::EntryRef ref = posting_it.getData(); + ASSERT_TRUE(ref.valid()); + uint32_t size; + IntervalT single; + const IntervalT *read_interval = + interval_store.get(ref, size, &single); + EXPECT_EQUAL(1u, size); + EXPECT_EQUAL(expected_interval, read_interval[0]); + ++posting_it; + } +} + +namespace { +struct DocIdLimitFinder : SimpleIndexDeserializeObserver<> { + uint32_t &_doc_id_limit; + DocIdLimitFinder(uint32_t &doc_id_limit) : _doc_id_limit(doc_id_limit) + { + doc_id_limit = 0u; + } + void notifyInsert(uint64_t, uint32_t doc_id, uint32_t) { + _doc_id_limit = std::max(_doc_id_limit, doc_id); + } +}; +} + +TEST("require that PredicateIndex can be (de)serialized") { + vector> intervals; + vector> bounds_intervals; + for (int i = 0; i < 100; ++i) { + intervals.push_back(make_pair(hash + i, interval)); + bounds_intervals.push_back(make_pair(hash2 + i, bounds)); + } + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 8); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, id, intervals, bounds_intervals); + index.indexEmptyDocument(id + 100); + } + index.commit(); + + vespalib::MMapDataBuffer buffer; + index.serialize(buffer); + uint32_t doc_id_limit; + DocIdLimitFinder finder(doc_id_limit); + PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + const PredicateIntervalStore &interval_store = index2.getIntervalStore(); + EXPECT_EQUAL(199u, doc_id_limit); + + EXPECT_EQUAL(index.getArity(), index2.getArity()); + EXPECT_EQUAL(index.getZeroConstraintDocs().size(), + index2.getZeroConstraintDocs().size()); + { + auto it = index2.getZeroConstraintDocs().begin(); + for (uint32_t i = 1; i < 100u; ++i) { + TEST_STATE(vespalib::make_string("%d", i).c_str()); + ASSERT_TRUE(it.valid()); + EXPECT_EQUAL(i + 100, it.getKey()); + ++it; + } + EXPECT_FALSE(it.valid()); + } + + const auto &interval_index = index2.getIntervalIndex(); + const auto &bounds_index = index2.getBoundsIndex(); + for (int i = 0; i < 100; ++i) { + { + auto it = interval_index.lookup(hash + i); + ASSERT_TRUE(it.valid()); + auto posting_it = interval_index.getBTreePostingList(it.getData()); + checkAllIntervals(posting_it, interval, interval_store); + } + { + auto it = bounds_index.lookup(hash2 + i); + ASSERT_TRUE(it.valid()); + auto posting_it = bounds_index.getBTreePostingList(it.getData()); + checkAllIntervals(posting_it, bounds, interval_store); + } + } +} + +TEST("require that DocumentFeaturesStore is restored on deserialization") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + vespalib::MMapDataBuffer buffer; + index.serialize(buffer); + uint32_t doc_id_limit; + DocIdLimitFinder finder(doc_id_limit); + PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + const auto &interval_index = index2.getIntervalIndex(); + const auto &bounds_index = index2.getBoundsIndex(); + EXPECT_EQUAL(doc_id, doc_id_limit); + + auto it = interval_index.lookup(hash); + EXPECT_TRUE(it.valid()); + auto it2 = bounds_index.lookup(hash2); + EXPECT_TRUE(it2.valid()); + + index2.removeDocument(doc_id); + index2.commit(); + + it = interval_index.lookup(hash); + EXPECT_FALSE(it.valid()); + it2 = bounds_index.lookup(hash2); + EXPECT_FALSE(it2.valid()); +} + +TEST("require that hold lists are attempted emptied on destruction") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + { + auto guard = generation_handler.takeGuard(); + index.removeDocument(doc_id); + index.commit(); + } + // No assert on index destruction. +} +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp new file mode 100644 index 00000000000..1c44c096717 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_interval_posting_list. + +#include +LOG_SETUP("predicate_interval_posting_list_test"); +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateIntervalPostingList + posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &interval_index = index.getIntervalIndex(); + for (uint32_t id = 1; id < 100; ++id) { + PredicateTreeAnnotations annotations(id); + auto &vec = annotations.interval_map[hash]; + for (uint32_t i = 0; i < id; ++i) { + vec.push_back(Interval{(i + 1) << 16 | 0xffff}); + } + index.indexDocument(id, annotations); + } + index.commit(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateIntervalPostingList + posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_TRUE(posting_list.next(0)); + EXPECT_EQUAL(1u, posting_list.getDocId()); + EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + ASSERT_TRUE(posting_list.next(1)); + EXPECT_EQUAL(2u, posting_list.getDocId()); + EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x0002ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_TRUE(posting_list.next(50)); + EXPECT_EQUAL(51u, posting_list.getDocId()); + for (uint32_t i = 0; i < 50; ++i) { + EXPECT_EQUAL((i + 1) << 16 | 0xffff, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + } + EXPECT_EQUAL(0x0033ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_interval_store_test.cpp b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp new file mode 100644 index 00000000000..bfe6340e222 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_interval_store. + +#include +LOG_SETUP("predicate_interval_store_test"); +#include + +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using std::vector; + +namespace { + +TEST("require that empty interval list gives invalid ref") { + PredicateIntervalStore store; + vector interval_list; + auto ref = store.insert(interval_list); + ASSERT_FALSE(ref.valid()); +} + +Interval single_buf; + +template +void testInsertAndRetrieve(const std::vector &interval_list) { + std::ostringstream ost; + ost << "Type name: " << typeid(IntervalT).name() << ", intervals:"; + for (auto &i : interval_list) { + ost << " 0x" << std::hex << i.interval; + } + TEST_STATE(ost.str().c_str()); + PredicateIntervalStore store; + auto ref = store.insert(interval_list); + ASSERT_TRUE(ref.valid()); + + uint32_t size; + IntervalT single; + const IntervalT *intervals = store.get(ref, size, &single); + EXPECT_EQUAL(interval_list.size(), size); + ASSERT_TRUE(intervals); + for (size_t i = 0; i < interval_list.size(); ++i) { + EXPECT_EQUAL(interval_list[i], intervals[i]); + } +} + +TEST("require that single interval entry can be inserted") { + testInsertAndRetrieve({{0x0001ffff}}); + testInsertAndRetrieve({{0x0001ffff, 0x3}}); +} + +TEST("require that multi-interval entry can be inserted") { + testInsertAndRetrieve({{0x00010001}, {0x0002ffff}}); + testInsertAndRetrieve( + {{0x00010001}, {0x00020002}, {0x0003ffff}}); + testInsertAndRetrieve( + {{0x00010001}, {0x00020002}, {0x00030003}, {0x00040004}, + {0x00050005}, {0x00060006}, {0x00070007}, {0x00080008}, + {0x0009ffff}}); + testInsertAndRetrieve( + {{0x00010001, 0x4}, {0x0002ffff, 0x10}}); + testInsertAndRetrieve( + {{0x00010001, 0x4}, {0x00020002, 0x10}, {0x00030003, 0x20}, + {0x00040004, 0x6}, {0x0005ffff, 0x7}}); +} + +TEST("require that multiple multi-interval entries can be retrieved") { + PredicateIntervalStore store; + auto ref = store.insert({{1}, {2}}); + ASSERT_TRUE(ref.valid()); + ref = store.insert({{3}, {4}}); + ASSERT_TRUE(ref.valid()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + EXPECT_EQUAL(2u, size); + ASSERT_TRUE(intervals); + EXPECT_EQUAL(3u, intervals[0].interval); + EXPECT_EQUAL(4u, intervals[1].interval); +} + +/* +TEST("require that entries can be removed and reused") { + GenerationHandler gen_handler; + PredicateIntervalStore store(gen_handler); + auto ref = store.insert({{0x0001ffff, 5}}); + ASSERT_TRUE(ref.valid()); + store.remove(ref); + + auto ref2 = store.insert({{1}, {2}, {3}, {4}, {5}, + {6}, {7}, {8}, {9}}); + ASSERT_TRUE(ref2.valid()); + store.remove(ref2); + store.commit(); + + auto ref3 = store.insert({{0x0002ffff, 10}}); + ASSERT_EQUAL(ref.ref(), ref3.ref()); + + uint32_t size; + IntervalWithBounds single; + const IntervalWithBounds *bounds = store.get(ref3, size, &single); + EXPECT_EQUAL(1u, size); + EXPECT_EQUAL(0x0002ffffu, bounds->interval); + EXPECT_EQUAL(10u, bounds->bounds); + + auto ref4 = store.insert({{2}, {3}, {4}, {5}, + {6}, {7}, {8}, {9}, {10}}); + ASSERT_EQUAL(ref2.ref(), ref4.ref()); + + const Interval *intervals = store.get(ref4, size, &single_buf); + EXPECT_EQUAL(9u, size); + EXPECT_EQUAL(2u, intervals[0].interval); + EXPECT_EQUAL(10u, intervals[8].interval); +} +*/ + +TEST("require that single interval entries are optimized") { + PredicateIntervalStore store; + auto ref = store.insert({{0x0001ffff}}); + ASSERT_TRUE(ref.valid()); + ASSERT_EQUAL(0x0001ffffu, ref.ref()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + ASSERT_EQUAL(intervals, &single_buf); + EXPECT_EQUAL(0x0001ffffu, single_buf.interval); + + store.remove(ref); // Should do nothing +} + +TEST("require that interval refs are reused for identical data.") { + PredicateIntervalStore store; + auto ref = store.insert({{0x00010001}, {0x0002ffff}}); + ASSERT_TRUE(ref.valid()); + ASSERT_EQUAL(0x02000040u, ref.ref()); + + auto ref2 = store.insert({{0x00010001}, {0x0002ffff}}); + EXPECT_EQUAL(ref.ref(), ref2.ref()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + EXPECT_EQUAL(0x00010001u, intervals[0].interval); + EXPECT_EQUAL(0x0002ffffu, intervals[1].interval); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp new file mode 100644 index 00000000000..47c29184dcf --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp @@ -0,0 +1,332 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_range_term_expander. + +#include +LOG_SETUP("predicate_range_term_expander_test"); +#include + +#include +#include + +using search::predicate::PredicateRangeTermExpander; +using std::vector; +using vespalib::string; + +namespace { + +struct MyRangeHandler { + vector expected_labels; + string expected_edge_label; + uint64_t expected_edge_value; + size_t i; + ~MyRangeHandler() { + EXPECT_EQUAL(expected_labels.size(), i); + } + void handleRange(const string &label) { + TEST_STATE(("handleRange: " + label).c_str()); + ASSERT_TRUE(i < expected_labels.size()); + EXPECT_EQUAL(expected_labels[i++], label); + } + void handleEdge(const string &label, uint64_t value) { + TEST_STATE(("handleEdge: " + label).c_str()); + EXPECT_EQUAL(expected_edge_label, label); + EXPECT_EQUAL(expected_edge_value, value); + } +}; + +TEST("require that small range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999", + "key=0-99999", + "key=0-999999", + "key=0-9999999", + "key=0-99999999", + "key=0-999999999", + "key=0-9999999999", + "key=0-99999999999", + "key=0-999999999999", + "key=0-9999999999999", + "key=0-99999999999999", + "key=0-999999999999999", + "key=0-9999999999999999", + "key=0-99999999999999999", + "key=0-999999999999999999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that large range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=123456789012345670-123456789012345679", + "key=123456789012345600-123456789012345699", + "key=123456789012345000-123456789012345999", + "key=123456789012340000-123456789012349999", + "key=123456789012300000-123456789012399999", + "key=123456789012000000-123456789012999999", + "key=123456789010000000-123456789019999999", + "key=123456789000000000-123456789099999999", + "key=123456789000000000-123456789999999999", + "key=123456780000000000-123456789999999999", + "key=123456700000000000-123456799999999999", + "key=123456000000000000-123456999999999999", + "key=123450000000000000-123459999999999999", + "key=123400000000000000-123499999999999999", + "key=123000000000000000-123999999999999999", + "key=120000000000000000-129999999999999999", + "key=100000000000000000-199999999999999999", + "key=0-999999999999999999"}, + "key=123456789012345670", 8, 0}; + expander.expand("key", 123456789012345678, range_handler); +} + +TEST("require that max range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{}, "key=9223372036854775800", 7, 0}; + expander.expand("key", 9223372036854775807, range_handler); +} + +TEST("require that small negative range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0", + "key=-99999-0", + "key=-999999-0", + "key=-9999999-0", + "key=-99999999-0", + "key=-999999999-0", + "key=-9999999999-0", + "key=-99999999999-0", + "key=-999999999999-0", + "key=-9999999999999-0", + "key=-99999999999999-0", + "key=-999999999999999-0", + "key=-9999999999999999-0", + "key=-99999999999999999-0", + "key=-999999999999999999-0"}, "key=-40", 2, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that min range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{}, "key=-9223372036854775800", 8, 0}; + expander.expand("key", -9223372036854775808ull, range_handler); +} +TEST("require that min range - 9 is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=-9223372036854775799-9223372036854775790", + "key=-9223372036854775799-9223372036854775700"}, + "key=-9223372036854775790", 9, 0}; + expander.expand("key", -9223372036854775799ll, range_handler); +} + +TEST("require that min range is expanded with arity 8") { + PredicateRangeTermExpander expander(8); + MyRangeHandler range_handler{{}, "key=-9223372036854775808", 0, 0}; + expander.expand("key", -9223372036854775808ull, range_handler); +} + +TEST("require that small range is expanded in arity 2") { + PredicateRangeTermExpander expander(2); + MyRangeHandler range_handler{{ + "key=42-43", + "key=40-43", + "key=40-47", + "key=32-47", + "key=32-63", + "key=0-63", + "key=0-127", + "key=0-255", + "key=0-511", + "key=0-1023", + "key=0-2047", + "key=0-4095", + "key=0-8191", + "key=0-16383", + "key=0-32767", + "key=0-65535", + "key=0-131071", + "key=0-262143", + "key=0-524287", + "key=0-1048575", + "key=0-2097151", + "key=0-4194303", + "key=0-8388607", + "key=0-16777215", + "key=0-33554431", + "key=0-67108863", + "key=0-134217727", + "key=0-268435455", + "key=0-536870911", + "key=0-1073741823", + "key=0-2147483647", + "key=0-4294967295", + "key=0-8589934591", + "key=0-17179869183", + "key=0-34359738367", + "key=0-68719476735", + "key=0-137438953471", + "key=0-274877906943", + "key=0-549755813887", + "key=0-1099511627775", + "key=0-2199023255551", + "key=0-4398046511103", + "key=0-8796093022207", + "key=0-17592186044415", + "key=0-35184372088831", + "key=0-70368744177663", + "key=0-140737488355327", + "key=0-281474976710655", + "key=0-562949953421311", + "key=0-1125899906842623", + "key=0-2251799813685247", + "key=0-4503599627370495", + "key=0-9007199254740991", + "key=0-18014398509481983", + "key=0-36028797018963967", + "key=0-72057594037927935", + "key=0-144115188075855871", + "key=0-288230376151711743", + "key=0-576460752303423487", + "key=0-1152921504606846975", + "key=0-2305843009213693951", + "key=0-4611686018427387903", + "key=0-9223372036854775807"}, "key=42", 0, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that small negative range is expanded in arity 2") { + PredicateRangeTermExpander expander(2); + MyRangeHandler range_handler{{ + "key=-43-42", + "key=-43-40", + "key=-47-40", + "key=-47-32", + "key=-63-32", + "key=-63-0", + "key=-127-0", + "key=-255-0", + "key=-511-0", + "key=-1023-0", + "key=-2047-0", + "key=-4095-0", + "key=-8191-0", + "key=-16383-0", + "key=-32767-0", + "key=-65535-0", + "key=-131071-0", + "key=-262143-0", + "key=-524287-0", + "key=-1048575-0", + "key=-2097151-0", + "key=-4194303-0", + "key=-8388607-0", + "key=-16777215-0", + "key=-33554431-0", + "key=-67108863-0", + "key=-134217727-0", + "key=-268435455-0", + "key=-536870911-0", + "key=-1073741823-0", + "key=-2147483647-0", + "key=-4294967295-0", + "key=-8589934591-0", + "key=-17179869183-0", + "key=-34359738367-0", + "key=-68719476735-0", + "key=-137438953471-0", + "key=-274877906943-0", + "key=-549755813887-0", + "key=-1099511627775-0", + "key=-2199023255551-0", + "key=-4398046511103-0", + "key=-8796093022207-0", + "key=-17592186044415-0", + "key=-35184372088831-0", + "key=-70368744177663-0", + "key=-140737488355327-0", + "key=-281474976710655-0", + "key=-562949953421311-0", + "key=-1125899906842623-0", + "key=-2251799813685247-0", + "key=-4503599627370495-0", + "key=-9007199254740991-0", + "key=-18014398509481983-0", + "key=-36028797018963967-0", + "key=-72057594037927935-0", + "key=-144115188075855871-0", + "key=-288230376151711743-0", + "key=-576460752303423487-0", + "key=-1152921504606846975-0", + "key=-2305843009213693951-0", + "key=-4611686018427387903-0", + "key=-9223372036854775807-0"}, "key=-42", 0, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that upper bound is used") { + PredicateRangeTermExpander expander(10, -99, 9999); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that lower bound is used") { + PredicateRangeTermExpander expander(10, -9999, 99); + MyRangeHandler range_handler{{ + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0"}, "key=-40", 2, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that value outside bounds is not used") { + PredicateRangeTermExpander expander(10, -99, 99); + MyRangeHandler range_handler{{}, "handleEdge is never called", 2, 0}; + expander.expand("key", 100, range_handler); +} + +TEST("require that upper and lower bound > 0 works") { + PredicateRangeTermExpander expander(10, 100, 9999); + MyRangeHandler range_handler{{ + "key=140-149", + "key=100-199", + "key=0-999", + "key=0-9999"}, "key=140", 2, 0}; + expander.expand("key", 142, range_handler); +} + +TEST("require that search close to uneven upper bound is sensible") { + PredicateRangeTermExpander expander(10, -99, 1234); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that search close to max uneven upper bound is sensible") { + PredicateRangeTermExpander expander(10, 0, 9223372036854771234); + MyRangeHandler range_handler{{ + "key=9223372036854770000-9223372036854770009", + "key=9223372036854770000-9223372036854770099", + "key=9223372036854770000-9223372036854770999"}, + "key=9223372036854770000", 0, 0}; + expander.expand("key", 9223372036854770000, range_handler); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp new file mode 100644 index 00000000000..a51f3d678d5 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_ref_cache. + +#include +LOG_SETUP("predicate_ref_cache_test"); +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; + +namespace { + +struct MyBufferStore { + std::vector store; + const uint32_t *getBuffer(uint32_t ref) const { + ASSERT_LESS(ref, store.size()); + return &store[ref]; + } + uint32_t insert(uint32_t value) { + size_t size = store.size(); + store.push_back(value); + return size | 0x01000000; // size = 1 + } + uint32_t insert(std::vector data) { + size_t size = store.size(); + uint8_t data_size = data.size(); + if (data.size() >= 0xff) { + store.push_back(data.size()); + data_size = 0xff; + } + store.insert(store.end(), data.begin(), data.end()); + return size | (data_size << 24); + } +}; + +TEST("require that single entries are cached") { + MyBufferStore store; + PredicateRefCache cache(store); + + uint32_t ref = store.insert(42); + uint32_t new_ref = cache.insert(ref); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref2 = store.insert(42); + new_ref = cache.insert(ref2); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref3 = store.insert(44); + new_ref = cache.insert(ref3); + EXPECT_EQUAL(ref3, new_ref); +} + +TEST("require that multivalue entries are cached") { + MyBufferStore store; + PredicateRefCache cache(store); + + std::vector data1 = {1, 2, 3, 4, 5}; + std::vector data2 = {1, 2, 3, 4, 6}; + uint32_t ref = store.insert(data1); + uint32_t new_ref = cache.insert(ref); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref2 = store.insert(data1); + new_ref = cache.insert(ref2); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref3 = store.insert(data2); + new_ref = cache.insert(ref3); + EXPECT_EQUAL(ref3, new_ref); +} + +TEST("require that entries can be looked up") { + MyBufferStore store; + PredicateRefCache cache(store); + + uint32_t data = 42; + EXPECT_EQUAL(0u, cache.find(&data, 1)); + uint32_t ref = store.insert(42); + cache.insert(ref); + EXPECT_EQUAL(ref, cache.find(&data, 1)); +} + +TEST("require that cache handles large entries") { + MyBufferStore store; + PredicateRefCache cache(store); + + std::vector data1(300); + std::vector data2(300); + data2.back() = 42; + uint32_t ref1 = store.insert(data1); + cache.insert(ref1); + EXPECT_EQUAL(ref1, cache.find(&data1[0], data1.size())); + EXPECT_EQUAL(0u, cache.find(&data2[0], data2.size())); + uint32_t ref2 = store.insert(data2); + uint32_t ref = cache.insert(ref2); + EXPECT_EQUAL(ref, ref2); + EXPECT_EQUAL(ref2, cache.find(&data2[0], data2.size())); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp new file mode 100644 index 00000000000..f455abced3f --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for PredicateTreeAnalyzer. + +#include +LOG_SETUP("PredicateTreeAnalyzer_test"); +#include + +#include +#include +#include +#include + +using document::PredicateSlimeBuilder; +using namespace search; +using namespace search::predicate; +using document::Predicate; +using vespalib::Slime; +using vespalib::slime::Cursor; +using std::map; +using std::string; + +namespace { +typedef PredicateSlimeBuilder Builder; + +TEST("require that minfeature is 1 for simple term") { + auto slime(Builder().feature("foo").value("bar").build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(1, analyzer.getSize()); + EXPECT_TRUE(analyzer.getSizeMap().empty()); +} + +TEST("require that minfeature is 1 for simple negative term") { + auto slime(Builder().neg().feature("foo").value("bar").build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); +} + +void checkSizeMap(const map &map, const string &key, int val) { + auto it = map.find(key); + ASSERT_TRUE(it != map.end()); + EXPECT_EQUAL(val, it->second); +} + +TEST("require that minfeature is sum for and") { + auto slime(Builder() + .and_node({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(3, analyzer.getSize()); + EXPECT_EQUAL(3u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a2", 1)); +} + +TEST("require that minfeature is min for or") { + auto slime(Builder().or_node + ({Builder().and_node + ({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}), + Builder().and_node + ({Builder().feature("grault").value("garply"), + Builder().feature("waldo").value("fred")})}) + .build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(2, analyzer.getMinFeature()); + EXPECT_EQUAL(5, analyzer.getSize()); + EXPECT_EQUAL(5u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a2", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a1", 1)); +} + +TEST("require that minfeature rounds up") { + auto slime(Builder() + .or_node({Builder().feature("foo").value("bar"), + Builder().feature("foo").value("bar"), + Builder().feature("foo").value("bar")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(3, analyzer.getSize()); +} + +TEST("require that multivalue feature set considers all values") { + { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A").value("B"), + Builder().feature("foo").value("B")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); + } + { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A").value("B"), + Builder().feature("foo").value("C")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(2, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); + } +} + +TEST("require that not-features don't count towards minfeature calculation") { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A"), + Builder().neg().feature("foo").value("A"), + Builder().neg().feature("foo").value("B"), + Builder().feature("foo").value("B")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(6, analyzer.getSize()); +} + +TEST("require that not-ranges don't count towards minfeature calculation") { + auto slime(Builder() + .and_node({Builder().feature("foo").range(0, 10), + Builder().neg().feature("foo").range(0, 10), + Builder().neg().feature("bar").range(0, 10), + Builder().feature("bar").range(0, 10)}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(6, analyzer.getSize()); +} + +TEST("require that multilevel AND stores sizes") { + auto slime(Builder().and_node + ({Builder().and_node + ({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}), + Builder().and_node + ({Builder().feature("grault").value("garply"), + Builder().feature("waldo").value("fred")})}) + .build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(5, analyzer.getMinFeature()); + EXPECT_EQUAL(5, analyzer.getSize()); + EXPECT_EQUAL(7u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 3)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 2)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a2", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a1", 1)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp new file mode 100644 index 00000000000..92271cd1c20 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp @@ -0,0 +1,381 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for PredicateTreeAnnotator. + +#include +LOG_SETUP("PredicateTreeAnnotator_test"); +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using document::Predicate; +using std::ostringstream; +using std::pair; +using std::string; +using std::vector; +using vespalib::Slime; +using vespalib::slime::Cursor; +using namespace search; +using namespace search::predicate; +using namespace document::predicate_slime_builder; + +namespace { +Cursor &makeAndNode(Cursor &obj) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_CONJUNCTION); + return obj.setArray(Predicate::CHILDREN); +} + +Cursor &makeOrNode(Cursor &obj) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_DISJUNCTION); + return obj.setArray(Predicate::CHILDREN); +} + +void makeFeatureSet(Cursor &obj, const string &key, const string &value) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET); + obj.setString(Predicate::KEY, key); + Cursor &set = obj.setArray(Predicate::SET); + set.addString(value); +} + +void makeHashedFeatureRange(Cursor &obj, const string &key, + const vector &partitions, + const vector >& edge_partitions) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_RANGE); + obj.setString(Predicate::KEY, key); + Cursor &p = obj.setArray(Predicate::HASHED_PARTITIONS); + for (auto partition : partitions) { + p.addLong(PredicateHash::hash64(partition)); + } + Cursor &e = obj.setArray(Predicate::HASHED_EDGE_PARTITIONS); + for (auto edge_partition : edge_partitions) { + ostringstream label; + label << key << "=" << edge_partition[0]; + uint64_t hash = PredicateHash::hash64(label.str()); + int64_t value = edge_partition[1]; + int64_t payload = edge_partition[2]; + + Cursor &o = e.addObject(); + o.setLong(Predicate::HASH, hash); + o.setLong(Predicate::VALUE, value); + o.setLong(Predicate::PAYLOAD, payload); + } +} + +void checkInterval(const PredicateTreeAnnotations &result, + const string &feature, vector expected) { + TEST_STATE(("Check interval: " + feature).c_str()); + uint64_t hash = PredicateHash::hash64(feature); + auto it = result.interval_map.find(hash); + ASSERT_TRUE(it != result.interval_map.end()); + const auto &intervals = it->second; + ASSERT_EQUAL(expected.size(), intervals.size()); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i], intervals[i].interval); + } +} + +void checkBounds(const PredicateTreeAnnotations &result, + const string &feature, + vector expected) { + TEST_STATE(("Check bounds: " + feature).c_str()); + uint64_t hash = PredicateHash::hash64(feature); + auto it = result.bounds_map.find(hash); + ASSERT_TRUE(it != result.bounds_map.end()); + const auto &intervals = it->second; + ASSERT_EQUAL(expected.size(), intervals.size()); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i].interval, intervals[i].interval); + EXPECT_EQUAL(expected[i].bounds, intervals[i].bounds); + } +} + +TEST("require that OR intervals are the same") { + Slime slime; + Cursor &children = makeOrNode(slime.setObject()); + makeFeatureSet(children.addObject(), "key1", "value1"); + makeFeatureSet(children.addObject(), "key2", "value2"); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key1=value1", {0x00010002}); + checkInterval(result, "key2=value2", {0x00010002}); +} + +TEST("require that ANDs below ORs get different intervals") { + auto slime = orNode({andNode({featureSet("key1", {"value1"}), + featureSet("key1", {"value1"}), + featureSet("key1", {"value1"})}), + andNode({featureSet("key2", {"value2"}), + featureSet("key2", {"value2"}), + featureSet("key2", {"value2"})})}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(6u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key1=value1", {0x00010001, 0x00020002, 0x00030006}); + checkInterval(result, "key2=value2", {0x00010004, 0x00050005, 0x00060006}); +} + +TEST("require that NOTs get correct intervals") { + auto slime = andNode({featureSet("key", {"value"}), + neg(featureSet("key", {"value"})), + featureSet("key", {"value"}), + neg(featureSet("key", {"value"}))}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(2u, result.min_feature); // needs key=value and z-star + EXPECT_EQUAL(6u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=value", + {0x00010001, 0x00020002, 0x00040004, 0x00050005}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020001, 0x00050004}); +} + +TEST("require that NOT inverts ANDs and ORs") { + auto slime = neg(andNode({featureSet("key", {"value"}), + neg(featureSet("key", {"value"}))})); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.min_feature); // needs key=value or z-star + EXPECT_EQUAL(3u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=value", + {0x00010002, 0x00010003}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020000}); +} + +TEST("require that final first NOT-interval is extended") { + auto slime = neg(featureSet("key", {"A"})); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00010000}); +} + +TEST("show different types of NOT-intervals") { + auto slime = andNode({orNode({andNode({featureSet("key", {"A"}), + neg(featureSet("key", {"B"}))}), + andNode({neg(featureSet("key", {"C"})), + featureSet("key", {"D"})})}), + featureSet("foo", {"bar"})}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(3u, result.min_feature); + EXPECT_EQUAL(7u, result.interval_range); + EXPECT_EQUAL(6u, result.interval_map.size()); + checkInterval(result, "foo=bar", {0x00070007}); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, "key=B", {0x00020002}); + checkInterval(result, "key=C", {0x00010004}); + checkInterval(result, "key=D", {0x00060006}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020001, 0x00000006, 0x00040000}); + + slime = orNode({neg(featureSet("key", {"A"})), + neg(featureSet("key", {"B"}))}); + result = PredicateTreeAnnotations(); + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(4u, result.interval_range); + EXPECT_EQUAL(3u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010003}); + checkInterval(result, "key=B", {0x00010003}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00030000, 0x00030000}); + + slime = orNode({andNode({neg(featureSet("key", {"A"})), + neg(featureSet("key", {"B"}))}), + andNode({neg(featureSet("key", {"C"})), + neg(featureSet("key", {"D"}))})}); + result = PredicateTreeAnnotations(); + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(8u, result.interval_range); + EXPECT_EQUAL(5u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, "key=B", {0x00030007}); + checkInterval(result, "key=C", {0x00010005}); + checkInterval(result, "key=D", {0x00070007}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00010000, 0x00070002, 0x00050000, + 0x00070006}); + +} + +TEST("require that hashed ranges get correct intervals") { + Slime slime; + Cursor &children = makeAndNode(slime.setObject()); + makeHashedFeatureRange( + children.addObject(), "key", + {"key=10-19", "key=20-29"}, {{0, 5, -1}, {30, 0, 3}}); + makeHashedFeatureRange( + children.addObject(), "foo", + {"foo=10-19", "foo=20-29"}, {{0, 5, -1}, {30, 0, 3}}); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(2u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(4u, result.interval_map.size()); + EXPECT_EQUAL(4u, result.bounds_map.size()); + checkInterval(result, "key=10-19", {0x00010001}); + checkInterval(result, "key=20-29", {0x00010001}); + checkBounds(result, "key=0", {{0x00010001, 0xffffffff}}); + checkBounds(result, "key=30", {{0x00010001, 3}}); + + checkInterval(result, "foo=10-19", {0x00020002}); + checkInterval(result, "foo=20-29", {0x00020002}); + checkBounds(result, "foo=0", {{0x00020002, 0xffffffff}}); + checkBounds(result, "foo=30", {{0x00020002, 3}}); +} + +TEST("require that extreme ranges works") { + Slime slime; + Cursor &children = makeAndNode(slime.setObject()); + makeHashedFeatureRange( + children.addObject(), "max range", + {"max range=9223372036854775806-9223372036854775807"}, {}); + makeHashedFeatureRange( + children.addObject(), "max edge", + {}, {{9223372036854775807, 0, 0x40000001}}); + makeHashedFeatureRange( + children.addObject(), "min range", + {"min range=-9223372036854775807-9223372036854775806"}, {}); + makeHashedFeatureRange( + children.addObject(), "min edge", + {}, {{LLONG_MIN, 0, 0x40000001}}); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(4u, result.min_feature); + EXPECT_EQUAL(4u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + EXPECT_EQUAL(2u, result.bounds_map.size()); + checkInterval(result, "max range=9223372036854775806-9223372036854775807", + {0x00010001}); + checkBounds(result, "max edge=9223372036854775807", + {{0x00020002, 0x40000001}}); + checkInterval(result, "min range=-9223372036854775807-9223372036854775806", + {0x00030003}); + checkBounds(result, "min edge=-9223372036854775808", + {{0x00040004, 0x40000001}}); +} + +TEST("require that unique features and all ranges are collected") { + auto slime = andNode({featureSet("key1", {"value1"}), + featureSet("key1", {"value1"}), + featureRange("key2", 9, 40), + featureRange("key2", 9, 40)}); + Cursor &c1 = slime->get()[Predicate::CHILDREN][2] + .setArray(Predicate::HASHED_PARTITIONS); + c1.addLong(PredicateHash::hash64("key2=10-19")); + c1.addLong(PredicateHash::hash64("key2=20-29")); + c1.addLong(PredicateHash::hash64("key2=30-39")); + c1.addLong(PredicateHash::hash64("key2=0")); + c1.addLong(PredicateHash::hash64("key2=40")); + Cursor &c2 = slime->get()[Predicate::CHILDREN][3] + .setArray(Predicate::HASHED_PARTITIONS); + c2.addLong(PredicateHash::hash64("key2=10-19")); + c2.addLong(PredicateHash::hash64("key2=20-29")); + c2.addLong(PredicateHash::hash64("key2=30-39")); + c2.addLong(PredicateHash::hash64("key2=0")); + c2.addLong(PredicateHash::hash64("key2=40")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(4u, result.interval_range); + ASSERT_EQUAL(1u, result.features.size()); + EXPECT_EQUAL(static_cast(PredicateHash::hash64("key1=value1")), + result.features[0]); + ASSERT_EQUAL(2u, result.range_features.size()); + EXPECT_EQUAL("key2", result.range_features[0].label.make_string()); + EXPECT_EQUAL(9, result.range_features[0].from); + EXPECT_EQUAL(40, result.range_features[0].to); + EXPECT_EQUAL("key2", result.range_features[1].label.make_string()); + EXPECT_EQUAL(9, result.range_features[1].from); + EXPECT_EQUAL(40, result.range_features[1].to); +} + +TEST("require that z-star feature is only registered once") { + auto slime = andNode({neg(featureSet("key1", {"value1"})), + neg(featureRange("key2", 10, 19))}); + Cursor &c = slime->get()[Predicate::CHILDREN][1][Predicate::CHILDREN][0] + .setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("key2=10-19")); + + // simple range will be stored as a feature. + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(4u, result.interval_range); + ASSERT_EQUAL(3u, result.features.size()); + EXPECT_EQUAL(PredicateHash::hash64("key1=value1"), result.features[0]); + EXPECT_EQUAL(PredicateIndex::z_star_compressed_hash, result.features[1]); + EXPECT_EQUAL(PredicateHash::hash64("key2=10-19"), result.features[2]); + ASSERT_EQUAL(0u, result.range_features.size()); +} + +TEST("require that default open range works") { + auto slime = lessEqual("foo", 39); + Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("foo=-9223372036854775808")); + c.addLong(PredicateHash::hash64("foo=-9223372036854775807-0")); + c.addLong(PredicateHash::hash64("foo=0-31")); + c.addLong(PredicateHash::hash64("foo=32-39")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.interval_range); + EXPECT_EQUAL(0u, result.features.size()); + ASSERT_EQUAL(1u, result.range_features.size()); + EXPECT_EQUAL("foo", result.range_features[0].label.make_string()); + EXPECT_EQUAL(LLONG_MIN, result.range_features[0].from); + EXPECT_EQUAL(39, result.range_features[0].to); +} + +TEST("require that open range works") { + auto slime = lessEqual("foo", 39); + Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("foo=8-15")); + c.addLong(PredicateHash::hash64("foo=16-31")); + c.addLong(PredicateHash::hash64("foo=32-39")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result, 8, 200); + + EXPECT_EQUAL(1u, result.interval_range); + EXPECT_EQUAL(0u, result.features.size()); + ASSERT_EQUAL(1u, result.range_features.size()); + EXPECT_EQUAL("foo", result.range_features[0].label.make_string()); + EXPECT_EQUAL(8, result.range_features[0].from); + EXPECT_EQUAL(39, result.range_features[0].to); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp new file mode 100644 index 00000000000..1751c725044 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_zero_constraint_posting_list. + +#include +LOG_SETUP("predicate_zero_constraint_posting_list_test"); +#include + +#include +#include + +using namespace search; +using namespace search::predicate; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin()); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + for (uint32_t id = 1; id < 100; ++id) { + index.indexEmptyDocument(id); + } + index.commit(); + ASSERT_EQUAL(99u, index.getZeroConstraintDocs().size()); + + PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin()); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + + for (size_t i = 0; i < 99; ++i) { + EXPECT_TRUE(posting_list.next(i)); + EXPECT_EQUAL(i + 1, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.nextInterval()); + } + EXPECT_FALSE(posting_list.next(99)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp new file mode 100644 index 00000000000..2dff14b4417 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_zstar_compressed_posting_list. + +#include +LOG_SETUP("predicate_zstar_compressed_posting_list_test"); +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using std::vector; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateZstarCompressedPostingList + posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &interval_index = index.getIntervalIndex(); + vector> intervals = + {{{0x00010000}}, + {{0x00010000}, {0x0000ffff}}, + {{0x00010000}, {0x00000003}, {0x00040003}, {0x00060005}}}; + for (size_t i = 0; i < intervals.size(); ++i) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[hash] = intervals[i]; + index.indexDocument(i + 1, annotations); + } + index.commit(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateZstarCompressedPostingList + posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + + EXPECT_TRUE(posting_list.next(0)); + EXPECT_EQUAL(1u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00020001u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + EXPECT_TRUE(posting_list.next(1)); + EXPECT_EQUAL(2u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0xffff0001u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_TRUE(posting_list.next(2)); + EXPECT_EQUAL(3u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00030001u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00040003u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00050004u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00060005u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00070006u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_FALSE(posting_list.next(4)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/simple_index_test.cpp b/searchlib/src/tests/predicate/simple_index_test.cpp new file mode 100644 index 00000000000..8ba9e6182fb --- /dev/null +++ b/searchlib/src/tests/predicate/simple_index_test.cpp @@ -0,0 +1,333 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for simple_index. + +#include +LOG_SETUP("simple_index_test"); +#include + +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using vespalib::GenerationHolder; + +namespace { + +struct MyData { + uint32_t data; + MyData() : data(0) {} + MyData(uint32_t d) : data(d) {} + bool valid() const { + return data != 0; + } +}; + +struct MyDataSerializer : PostingSerializer { + void serialize(const MyData &data, + vespalib::MMapDataBuffer& buffer) const { + buffer.writeInt32(data.data); + } +}; + +struct MyDataDeserializer : PostingDeserializer { + MyData deserialize(vespalib::MMapDataBuffer& buffer) { + return {buffer.readInt32()}; + } +}; + +struct SimpleDocIdLimitProvider : public DocIdLimitProvider { + uint32_t _doc_id_limit = 1; + uint32_t _committed_doc_id_limit = 1; + virtual uint32_t getDocIdLimit() const { return _doc_id_limit; } + virtual uint32_t getCommittedDocIdLimit() const { return _committed_doc_id_limit; } +}; + +constexpr uint64_t key = 0x123456; +constexpr uint32_t doc_id = 42; +const MyData data{100}; + +constexpr double UPPER_DOCID_FREQ_THRESHOLD = 0.5; +constexpr double LOWER_DOCID_FREQ_THRESHOLD = 0.25; +constexpr size_t UPPER_VECTOR_SIZE_THRESHOLD = 10; +constexpr size_t LOWER_VECTOR_SIZE_THRESHOLD = 8; +constexpr size_t VECTOR_PRUNE_FREQUENCY = 1; +constexpr double FOREACH_VECTOR_THRESHOLD = 0.0; +const auto config = SimpleIndexConfig(UPPER_DOCID_FREQ_THRESHOLD, + LOWER_DOCID_FREQ_THRESHOLD, + UPPER_VECTOR_SIZE_THRESHOLD, + LOWER_VECTOR_SIZE_THRESHOLD, + VECTOR_PRUNE_FREQUENCY, + FOREACH_VECTOR_THRESHOLD, + GrowStrategy()); +struct Fixture { + GenerationHolder _generation_holder; + SimpleDocIdLimitProvider _limit_provider; + SimpleIndex _index; + Fixture() : _generation_holder(), _limit_provider(), + _index(_generation_holder, _limit_provider, config) {} + ~Fixture() { + _generation_holder.clearHoldLists(); + } + SimpleIndex &index() { + return _index; + } + void addPosting(uint64_t k, uint32_t id, const MyData &d) { + if (id >= _limit_provider._doc_id_limit) { + _limit_provider._doc_id_limit = id + 1; + } + _index.addPosting(k, id, d); + } + SimpleIndex::DictionaryIterator lookup(uint64_t k) { + return _index.lookup(k); + } + bool hasKey(uint64_t k) { + return lookup(k).valid(); + } + std::pair removeFromPostingList(uint64_t k, uint32_t id) { + return _index.removeFromPostingList(k, id); + } + bool hasVectorPostingList(uint64_t k) { + return _index.getVectorPostingList(k).operator bool(); + } + SimpleIndex::VectorIterator getVectorPostingList(uint64_t k) { + return *_index.getVectorPostingList(k); + } + SimpleIndex::BTreeIterator getBTreePostingList(btree::EntryRef ref) { + return _index.getBTreePostingList(ref); + } + void commit() { + _index.commit(); + _limit_provider._committed_doc_id_limit = _limit_provider._doc_id_limit; + } +}; + +TEST_F("require that SimpleIndex can insert and remove a value.", Fixture) { + f.addPosting(key, doc_id, data); + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + EXPECT_EQUAL(data.data, posting_it.getData().data); + + auto result = f.removeFromPostingList(key, doc_id); + EXPECT_TRUE(result.second); + EXPECT_EQUAL(data.data, result.first.data); + f.commit(); + + result = f.removeFromPostingList(key, doc_id); + EXPECT_FALSE(result.second); + EXPECT_FALSE(result.first.valid()); + + ASSERT_FALSE(f.hasKey(key)); +} + +TEST_F("require that SimpleIndex can insert and remove many values.", Fixture) { + for (uint32_t id = 1; id < 100; ++id) { + f.addPosting(key, id, {id}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + for (size_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + EXPECT_EQUAL(id, posting_it.getData().data); + ++posting_it; + } + ASSERT_FALSE(posting_it.valid()); + for (uint32_t id = 1; id < 100; ++id) { + it = f.lookup(key); + ASSERT_TRUE(it.valid()); + ref = it.getData(); + auto result = f.removeFromPostingList(key, id); + EXPECT_TRUE(result.second); + EXPECT_EQUAL(id, result.first.data); + } + f.commit(); + ASSERT_FALSE(f.hasKey(key)); +} + +struct MyObserver : SimpleIndexDeserializeObserver<> { + std::map features; + void notifyInsert(uint64_t my_key, uint32_t my_doc_id, uint32_t) { + features[my_doc_id] = my_key; + } + bool hasSeenDoc(uint32_t doc) { + return features.find(doc) != features.end(); + } +}; + +TEST_FF("require that SimpleIndex can be serialized and deserialized.", Fixture, Fixture) { + for (uint32_t id = 1; id < 100; ++id) { + f1.addPosting(key, id, {id}); + } + f1.commit(); + vespalib::MMapDataBuffer buffer; + f1.index().serialize(buffer, MyDataSerializer()); + MyObserver observer; + MyDataDeserializer deserializer; + f2.index().deserialize(buffer, deserializer, observer, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + + auto it = f2.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f1.getBTreePostingList(ref); + for (uint32_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + EXPECT_EQUAL(id, posting_it.getData().data); + EXPECT_TRUE(observer.hasSeenDoc(id)); + ++posting_it; + } + EXPECT_FALSE(posting_it.valid()); +} + +TEST_F("require that SimpleIndex can update by inserting the same key twice.", Fixture) { + f.addPosting(key, doc_id, data); + + MyData new_data{42}; + f.addPosting(key, doc_id, new_data); + f.commit(); + + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + EXPECT_EQUAL(new_data.data, posting_it.getData().data); +} + +TEST_F("require that only that btrees exceeding size threshold is promoted to vector", Fixture) { + for (uint32_t i = 1; i < 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_FALSE(f.hasVectorPostingList(key)); + f.addPosting(key, 10, {10}); + f.commit(); + ASSERT_TRUE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below size threshold is pruned", Fixture) { + for (uint32_t i = 1; i <= 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + for (uint32_t i = 10; i > 8; --i) { + f.removeFromPostingList(key, i); + } + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key)); + f.removeFromPostingList(key, 8); + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that only btrees with high enough doc frequency is promoted to vector", Fixture) { + for (uint32_t i = 100; i > 51; --i) { + f.addPosting(key, i, {i}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + EXPECT_FALSE(f.hasVectorPostingList(key)); + f.addPosting(key, 51, {51}); + f.commit(); + ASSERT_TRUE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below doc frequency is pruned by removeFromPostingList", Fixture) { + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_TRUE(f.hasVectorPostingList(key)); + for (uint32_t i = 100; i > 25; --i) { + f.removeFromPostingList(key, i); + } + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key)); + f.removeFromPostingList(key, 25); + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below doc frequency is pruned by addPosting", Fixture) { + for (uint32_t i = 1; i <= 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_TRUE(f.hasVectorPostingList(key)); + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key + 1, i, {i}); + } + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that promoteOverThresholdVectors promotes posting lists over threshold to vectors", Fixture) { + f._limit_provider._doc_id_limit = 100; + for (uint32_t i = 1; i <= 20; ++i) { + f.addPosting(key + 0, i, {i}); + f.addPosting(key + 1, i, {i}); + f.addPosting(key + 2, i, {i}); + } + for (uint32_t i = 21; i <= 40; ++i) { + f.addPosting(key + 0, i, {i}); + f.addPosting(key + 2, i, {i}); + } + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key + 0)); + EXPECT_FALSE(f.hasVectorPostingList(key + 1)); + EXPECT_FALSE(f.hasVectorPostingList(key + 2)); + f._limit_provider._doc_id_limit = 50; + f.index().promoteOverThresholdVectors(); + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key + 0)); + EXPECT_FALSE(f.hasVectorPostingList(key + 1)); + EXPECT_TRUE(f.hasVectorPostingList(key + 2)); +} + +TEST_F("require that vector contains correct postings", Fixture) { + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key, i, i % 5 > 0 ? MyData{i * 2} : MyData{0}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + ASSERT_TRUE(f.hasVectorPostingList(key)); + auto v = f.getVectorPostingList(key); + + EXPECT_EQUAL(1u, v.getKey()); + EXPECT_EQUAL(2u, v.getData().data); + + for (uint32_t i = 1; i < 100; ++i) { + v.linearSeek(i); + ASSERT_TRUE(v.valid()); + if (i % 5 == 0) { + EXPECT_EQUAL(i + 1, v.getKey()); + EXPECT_EQUAL((i + 1) * 2, v.getData().data); + } else { + EXPECT_EQUAL(i, v.getKey()); + EXPECT_EQUAL(i * 2, v.getData().data); + } + } + v.linearSeek(100); + EXPECT_FALSE(v.valid()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/tree_crumbs_test.cpp b/searchlib/src/tests/predicate/tree_crumbs_test.cpp new file mode 100644 index 00000000000..2f38bb74507 --- /dev/null +++ b/searchlib/src/tests/predicate/tree_crumbs_test.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for TreeCrumbs. + +#include +LOG_SETUP("TreeCrumbs_test"); +#include + +#include +#include + +using namespace search::predicate; + +namespace { + +TEST("require that crumbs can set child and resize") { + TreeCrumbs crumbs; + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); + crumbs.setChild(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.setChild(12345); + EXPECT_EQUAL(8u, crumbs.size()); + EXPECT_EQUAL(":2:12345", crumbs.getCrumb()); + crumbs.resize(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.setChild(42); + EXPECT_EQUAL(5u, crumbs.size()); + EXPECT_EQUAL(":2:42", crumbs.getCrumb()); + crumbs.resize(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.resize(0); + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); +} + +TEST("require that child counts of 2^31 - 1 is ok") { + TreeCrumbs crumbs; + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); + crumbs.setChild(0xffffffff); + EXPECT_EQUAL(11u, crumbs.size()); + EXPECT_EQUAL(":4294967295", crumbs.getCrumb()); +} + +TEST("require that child 0 gets number") { + TreeCrumbs crumbs; + crumbs.setChild(0); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":0", crumbs.getCrumb()); +} + +TEST("require that crumbs can set custom initial char") { + TreeCrumbs crumbs; + crumbs.setChild(0, 'a'); + crumbs.setChild(1, 'b'); + crumbs.setChild(2, 'c'); + EXPECT_EQUAL("a0b1c2", crumbs.getCrumb()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/prettyfloat/.gitignore b/searchlib/src/tests/prettyfloat/.gitignore new file mode 100644 index 00000000000..bf0327f3372 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +prettyfloat_test +searchlib_prettyfloat_test_app diff --git a/searchlib/src/tests/prettyfloat/CMakeLists.txt b/searchlib/src/tests/prettyfloat/CMakeLists.txt new file mode 100644 index 00000000000..74e91518030 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_prettyfloat_test_app + SOURCES + prettyfloat.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_prettyfloat_test_app COMMAND searchlib_prettyfloat_test_app) diff --git a/searchlib/src/tests/prettyfloat/DESC b/searchlib/src/tests/prettyfloat/DESC new file mode 100644 index 00000000000..fc4e85bcc09 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/DESC @@ -0,0 +1 @@ +prettyfloat test. Take a look at prettyfloat.cpp for details. diff --git a/searchlib/src/tests/prettyfloat/FILES b/searchlib/src/tests/prettyfloat/FILES new file mode 100644 index 00000000000..fe3e151cf90 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/FILES @@ -0,0 +1 @@ +prettyfloat.cpp diff --git a/searchlib/src/tests/prettyfloat/prettyfloat.cpp b/searchlib/src/tests/prettyfloat/prettyfloat.cpp new file mode 100644 index 00000000000..1ed9b7e1767 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/prettyfloat.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("prettyfloat_test"); +#include +#include +#include + +using namespace search; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("prettyfloat_test"); + { + RawBuf buf(5000); + SignedHitRank rank = 10; + buf.addSignedHitRank(rank); + *buf.GetWritableFillPos() = '\0'; + EXPECT_EQUAL(std::string("10"), buf.GetDrainPos()); + } + { + RawBuf buf(5000); + HitRank rank = 10; + buf.addHitRank(rank); + *buf.GetWritableFillPos() = '\0'; + EXPECT_EQUAL(std::string("10"), buf.GetDrainPos()); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/query/.gitignore b/searchlib/src/tests/query/.gitignore new file mode 100644 index 00000000000..8b9d7f9993f --- /dev/null +++ b/searchlib/src/tests/query/.gitignore @@ -0,0 +1,10 @@ +*_test +.depend +Makefile +searchlib_customtypevisitor_test_app +searchlib_query-old-large_test_app +searchlib_query-old_test_app +searchlib_query_visitor_test_app +searchlib_querybuilder_test_app +searchlib_stackdumpquerycreator_test_app +searchlib_templatetermvisitor_test_app diff --git a/searchlib/src/tests/query/CMakeLists.txt b/searchlib/src/tests/query/CMakeLists.txt new file mode 100644 index 00000000000..16a75b7142a --- /dev/null +++ b/searchlib/src/tests/query/CMakeLists.txt @@ -0,0 +1,50 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_query_visitor_test_app + SOURCES + query_visitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query_visitor_test_app COMMAND searchlib_query_visitor_test_app) +vespa_add_executable(searchlib_customtypevisitor_test_app + SOURCES + customtypevisitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_customtypevisitor_test_app COMMAND searchlib_customtypevisitor_test_app) +vespa_add_executable(searchlib_templatetermvisitor_test_app + SOURCES + templatetermvisitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_templatetermvisitor_test_app COMMAND searchlib_templatetermvisitor_test_app) +vespa_add_executable(searchlib_querybuilder_test_app + SOURCES + querybuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_querybuilder_test_app COMMAND searchlib_querybuilder_test_app) +vespa_add_executable(searchlib_stackdumpquerycreator_test_app + SOURCES + stackdumpquerycreator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stackdumpquerycreator_test_app COMMAND searchlib_stackdumpquerycreator_test_app) +vespa_add_executable(searchlib_query-old_test_app + SOURCES + query-old.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query-old_test_app COMMAND searchlib_query-old_test_app) +vespa_add_executable(searchlib_query-old-large_test_app + SOURCES + query-old-large.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query-old-large_test_app COMMAND searchlib_query-old-large_test_app) diff --git a/searchlib/src/tests/query/DESC b/searchlib/src/tests/query/DESC new file mode 100644 index 00000000000..6461797f6bd --- /dev/null +++ b/searchlib/src/tests/query/DESC @@ -0,0 +1 @@ +This is a test of the query interface. diff --git a/searchlib/src/tests/query/FILES b/searchlib/src/tests/query/FILES new file mode 100644 index 00000000000..6f11f149162 --- /dev/null +++ b/searchlib/src/tests/query/FILES @@ -0,0 +1,2 @@ +query.cpp +query-old.cpp diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp new file mode 100644 index 00000000000..c9da2757d81 --- /dev/null +++ b/searchlib/src/tests/query/customtypevisitor_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for customtypevisitor. + +#include +#include +LOG_SETUP("customtypevisitor_test"); + +#include +#include +#include +#include + +using std::string; + +using namespace search::query; + +namespace { + +class Test : public vespalib::TestApp { + const char *current_state; + virtual void DumpState(bool) { + fprintf(stderr, "%s: ERROR: in %s\n", GetName(), current_state); + } + + template void requireThatNodeIsVisited(); + +public: + int Main(); +}; + +template +struct InitTerm : Base { + InitTerm() : Base(typename Base::Type(), "view", 0, Weight(0)) {} +}; + +struct MyAnd : And {}; +struct MyAndNot : AndNot {}; +struct MyEquiv : Equiv {}; +struct MyNear : Near { MyNear() : Near(1) {} }; +struct MyONear : ONear { MyONear() : ONear(1) {} }; +struct MyOr : Or {}; +struct MyPhrase : Phrase { MyPhrase() : Phrase("view", 0, Weight(42)) {} }; +struct MyRank : Rank {}; +struct MyNumberTerm : InitTerm {}; +struct MyLocationTerm : InitTerm {}; +struct MyPrefixTerm : InitTerm {}; +struct MyRangeTerm : InitTerm {}; +struct MyStringTerm : InitTerm {}; +struct MySubstrTerm : InitTerm {}; +struct MySuffixTerm : InitTerm {}; +struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} }; +struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm("view", 0, Weight(42)) {} }; +struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct("view", 0, Weight(42)) {} }; +struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm("view", 0, Weight(42), 57, 67, 77.7) {} }; +struct MyPredicateQuery : InitTerm {}; +struct MyRegExpTerm : InitTerm {}; + +struct MyQueryNodeTypes { + typedef MyAnd And; + typedef MyAndNot AndNot; + typedef MyEquiv Equiv; + typedef MyNumberTerm NumberTerm; + typedef MyLocationTerm LocationTerm; + typedef MyNear Near; + typedef MyONear ONear; + typedef MyOr Or; + typedef MyPhrase Phrase; + typedef MyPrefixTerm PrefixTerm; + typedef MyRangeTerm RangeTerm; + typedef MyRank Rank; + typedef MyStringTerm StringTerm; + typedef MySubstrTerm SubstringTerm; + typedef MySuffixTerm SuffixTerm; + typedef MyWeakAnd WeakAnd; + typedef MyWeightedSetTerm WeightedSetTerm; + typedef MyDotProduct DotProduct; + typedef MyWandTerm WandTerm; + typedef MyPredicateQuery PredicateQuery; + typedef MyRegExpTerm RegExpTerm; +}; + +class MyCustomVisitor : public CustomTypeVisitor +{ +public: + template + bool &isVisited() { + static bool b; + return b; + } + + template void setVisited() { isVisited() = true; } + + virtual void visit(MyAnd &) { setVisited(); } + virtual void visit(MyAndNot &) { setVisited(); } + virtual void visit(MyEquiv &) { setVisited(); } + virtual void visit(MyNumberTerm &) { setVisited(); } + virtual void visit(MyLocationTerm &) { setVisited(); } + virtual void visit(MyNear &) { setVisited(); } + virtual void visit(MyONear &) { setVisited(); } + virtual void visit(MyOr &) { setVisited(); } + virtual void visit(MyPhrase &) { setVisited(); } + virtual void visit(MyPrefixTerm &) { setVisited(); } + virtual void visit(MyRangeTerm &) { setVisited(); } + virtual void visit(MyRank &) { setVisited(); } + virtual void visit(MyStringTerm &) { setVisited(); } + virtual void visit(MySubstrTerm &) { setVisited(); } + virtual void visit(MySuffixTerm &) { setVisited(); } + virtual void visit(MyWeakAnd &) { setVisited(); } + virtual void visit(MyWeightedSetTerm &) { setVisited(); } + virtual void visit(MyDotProduct &) { setVisited(); } + virtual void visit(MyWandTerm &) { setVisited(); } + virtual void visit(MyPredicateQuery &) { setVisited(); } + virtual void visit(MyRegExpTerm &) { setVisited(); } +}; + +template +void Test::requireThatNodeIsVisited() { + MyCustomVisitor visitor; + Node::UP query(new T); + visitor.isVisited() = false; + query->accept(visitor); + ASSERT_TRUE(visitor.isVisited()); +} + +#define TEST_CALL(func) \ + current_state = #func; \ + func(); + +int +Test::Main() +{ + TEST_INIT("customtypevisitor_test"); + + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + TEST_CALL(requireThatNodeIsVisited); + + TEST_DONE(); +} +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/query/query-old-large.cpp b/searchlib/src/tests/query/query-old-large.cpp new file mode 100644 index 00000000000..4e0d0fb85de --- /dev/null +++ b/searchlib/src/tests/query/query-old-large.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::query; + +namespace { + +void setMaxStackSize(rlim_t maxStackSize) +{ + struct rlimit limit; + getrlimit(RLIMIT_STACK, &limit); + limit.rlim_cur = maxStackSize; + setrlimit(RLIMIT_STACK, &limit); +} + +} + + +// NOTE: This test explicitly sets thread stack size and will fail due to +// a stack overflow if the stack usage increases. +TEST("testveryLongQueryResultingInBug6850778") { + const uint32_t NUMITEMS=20000; + setMaxStackSize(4 * 1024 * 1024); + QueryBuilder builder; + for (uint32_t i=0; i <= NUMITEMS; i++) { + builder.addAnd(2); + builder.addStringTerm("a", "", 0, Weight(0)); + if (i < NUMITEMS) { + } else { + builder.addStringTerm("b", "", 0, Weight(0)); + } + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryTermList terms; + QueryNodeRefList phrases; + q.getLeafs(terms); + ASSERT_EQUAL(NUMITEMS + 2, terms.size()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/query-old.cpp b/searchlib/src/tests/query/query-old.cpp new file mode 100644 index 00000000000..94eeacc2b4d --- /dev/null +++ b/searchlib/src/tests/query/query-old.cpp @@ -0,0 +1,650 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::query; + +void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weight) { + EXPECT_EQUAL(h.wordpos(), expWordpos); + EXPECT_EQUAL(h.context(), expContext); + EXPECT_EQUAL(h.weight(), weight); +} + +TEST("testQueryLanguage") { + EmptyQueryNodeResult eqnr; + int64_t ia(0), ib(0); + double da(0), db(0); + + QueryTerm q(eqnr, "7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, "-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, "7.5", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7.5); + EXPECT_EQUAL(db, 7.5); + + q = QueryTerm(eqnr, "-7.5", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.5); + EXPECT_EQUAL(db, -7.5); + + q = QueryTerm(eqnr, "<7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits::max()); + EXPECT_LESS(db, 7); + EXPECT_GREATER(db, 6.99); + + q = QueryTerm(eqnr, "[;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits::max()); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, ">7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 8); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, 7); + EXPECT_LESS(da, 7.01); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + q = QueryTerm(eqnr, "[7;]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + q = QueryTerm(eqnr, "[-7;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, "[-7.1;7.1]", "index", QueryTerm::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.1); + EXPECT_EQUAL(db, 7.1); + + q = QueryTerm(eqnr, "[500.0;1.7976931348623157E308]", "index", QueryTerm::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 500.0); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + const double minusSeven(-7), seven(7); + q = QueryTerm(eqnr, "<-7;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, seven); + + q = QueryTerm(eqnr, "<-7;7>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, nextafterf(seven, minusSeven)); + + q = QueryTerm(eqnr, "<1;2>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 2); + EXPECT_EQUAL(ib, 1); + + q = QueryTerm(eqnr, "[-7;7>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, minusSeven); + EXPECT_EQUAL(db, nextafterf(seven, minusSeven)); + + q = QueryTerm(eqnr, "<-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, -8); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits::max()); + EXPECT_LESS(db, -7); + EXPECT_GREATER(db, -7.01); + + q = QueryTerm(eqnr, "[;-7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits::max()); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, "<;-7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits::max()); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, ">-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, -7); + EXPECT_LESS(da, -6.99); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + q = QueryTerm(eqnr, "[-7;]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + q = QueryTerm(eqnr, "[-7;>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits::max()); + + q = QueryTerm(eqnr, "a", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(!q.getAsDoubleTerm(da, db)); + + q = QueryTerm(eqnr, "word", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "prefix", "index", QueryTerm::PREFIXTERM); + EXPECT_TRUE(q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "substring", "index", QueryTerm::SUBSTRINGTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "suffix", "index", QueryTerm::SUFFIXTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(q.isSuffix()); + + q = QueryTerm(eqnr, "regexp", "index", QueryTerm::REGEXP); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + EXPECT_TRUE(q.isRegex()); +} + +class AllowRewrite : public EmptyQueryNodeResult +{ +public: + DUPLICATE(AllowRewrite); // create duplicate function + + virtual bool getRewriteFloatTerms() const { return true; } +}; + +IMPLEMENT_DUPLICATE(AllowRewrite); + +const char TERM_UNIQ = ParseItem::ITEM_TERM | ParseItem::IF_UNIQUEID; + +TEST("e is not rewritten even if allowed") { + const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(6u, stackDump.size()); + AllowRewrite empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(QueryTerm::classId, root->getClass().id()); + const QueryTerm & qt = static_cast(*root); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); +} + +TEST("1.0e is not rewritten by default") { + const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(9u, stackDump.size()); + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(QueryTerm::classId, root->getClass().id()); + const QueryTerm & qt = static_cast(*root); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); +} + +TEST("1.0e is rewritten if allowed too.") { + const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(9u, stackDump.size()); + AllowRewrite empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(EquivQueryNode::classId, root->getClass().id()); + const EquivQueryNode & equiv = static_cast(*root); + EXPECT_EQUAL(2u, equiv.size()); + EXPECT_EQUAL(QueryTerm::classId, equiv[0]->getClass().id()); + { + const QueryTerm & qt = static_cast(*equiv[0]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); + } + EXPECT_EQUAL(PhraseQueryNode::classId, equiv[1]->getClass().id()); + { + const PhraseQueryNode & phrase = static_cast(*equiv[1]); + EXPECT_EQUAL(2u, phrase.size()); + EXPECT_EQUAL(QueryTerm::classId, phrase[0]->getClass().id()); + { + const QueryTerm & qt = static_cast(*phrase[0]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1"), qt.getTerm()); + EXPECT_EQUAL(0u, qt.uniqueId()); + } + EXPECT_EQUAL(QueryTerm::classId, phrase[1]->getClass().id()); + { + const QueryTerm & qt = static_cast(*phrase[1]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("0e"), qt.getTerm()); + EXPECT_EQUAL(0u, qt.uniqueId()); + } + } +} + +TEST("testGetQueryParts") { + QueryBuilder builder; + builder.addAnd(4); + { + builder.addStringTerm("a", "", 0, Weight(0)); + builder.addPhrase(3, "", 0, Weight(0)); + { + builder.addStringTerm("b", "", 0, Weight(0)); + builder.addStringTerm("c", "", 0, Weight(0)); + builder.addStringTerm("d", "", 0, Weight(0)); + } + builder.addStringTerm("e", "", 0, Weight(0)); + builder.addPhrase(2, "", 0, Weight(0)); + { + builder.addStringTerm("f", "", 0, Weight(0)); + builder.addStringTerm("g", "", 0, Weight(0)); + } + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryTermList terms; + QueryNodeRefList phrases; + q.getLeafs(terms); + q.getPhrases(phrases); + ASSERT_TRUE(terms.size() == 7); + ASSERT_TRUE(phrases.size() == 2); + { + QueryTermList pts; + phrases[0]->getLeafs(pts); + ASSERT_TRUE(pts.size() == 3); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQUAL(pts[i], terms[i + 1]); + } + } + { + QueryTermList pts; + phrases[1]->getLeafs(pts); + ASSERT_TRUE(pts.size() == 2); + for (size_t i = 0; i < 2; ++i) { + EXPECT_EQUAL(pts[i], terms[i + 5]); + } + } +} + +TEST("testPhraseEvaluate") { + QueryBuilder builder; + builder.addPhrase(3, "", 0, Weight(0)); + { + builder.addStringTerm("a", "", 0, Weight(0)); + builder.addStringTerm("b", "", 0, Weight(0)); + builder.addStringTerm("c", "", 0, Weight(0)); + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryNodeRefList phrases; + q.getPhrases(phrases); + QueryTermList terms; + q.getLeafs(terms); + // field 0 + terms[0]->add(0, 0, 1); + terms[1]->add(1, 0, 1); + terms[2]->add(2, 0, 1); + terms[0]->add(7, 0, 1); + terms[1]->add(8, 0, 1); + terms[2]->add(9, 0, 1); + // field 1 + terms[0]->add(4, 1, 1); + terms[1]->add(5, 1, 1); + terms[2]->add(6, 1, 1); + // field 2 (not complete match) + terms[0]->add(1, 2, 1); + terms[1]->add(2, 2, 1); + terms[2]->add(4, 2, 1); + // field 3 + terms[0]->add(0, 3, 1); + terms[1]->add(1, 3, 1); + terms[2]->add(2, 3, 1); + // field 4 (not complete match) + terms[0]->add(1, 4, 1); + terms[1]->add(2, 4, 1); + // field 5 (not complete match) + terms[0]->add(2, 5, 1); + terms[1]->add(1, 5, 1); + terms[2]->add(0, 5, 1); + HitList hits; + PhraseQueryNode * p = static_cast(phrases[0]); + p->evaluateHits(hits); + ASSERT_TRUE(hits.size() == 4); + EXPECT_EQUAL(hits[0].wordpos(), 2u); + EXPECT_EQUAL(hits[0].context(), 0u); + EXPECT_EQUAL(hits[1].wordpos(), 9u); + EXPECT_EQUAL(hits[1].context(), 0u); + EXPECT_EQUAL(hits[2].wordpos(), 6u); + EXPECT_EQUAL(hits[2].context(), 1u); + EXPECT_EQUAL(hits[3].wordpos(), 2u); + EXPECT_EQUAL(hits[3].context(), 3u); + ASSERT_TRUE(p->getFieldInfoSize() == 4); + EXPECT_EQUAL(p->getFieldInfo(0).getHitOffset(), 0u); + EXPECT_EQUAL(p->getFieldInfo(0).getHitCount(), 2u); + EXPECT_EQUAL(p->getFieldInfo(1).getHitOffset(), 2u); + EXPECT_EQUAL(p->getFieldInfo(1).getHitCount(), 1u); + EXPECT_EQUAL(p->getFieldInfo(2).getHitOffset(), 0u); // invalid, but will never be used + EXPECT_EQUAL(p->getFieldInfo(2).getHitCount(), 0u); + EXPECT_EQUAL(p->getFieldInfo(3).getHitOffset(), 3u); + EXPECT_EQUAL(p->getFieldInfo(3).getHitCount(), 1u); +} + +TEST("testHit") { + // positions (0 - (2^24-1)) + assertHit(Hit(0, 0, 0), 0, 0, 0); + assertHit(Hit(256, 0, 1), 256, 0, 1); + assertHit(Hit(16777215, 0, -1), 16777215, 0, -1); + assertHit(Hit(16777216, 0, 1), 0, 1, 1); // overflow + + // contexts (0 - 255) + assertHit(Hit(0, 1, 1), 0, 1, 1); + assertHit(Hit(0, 255, 1), 0, 255, 1); + assertHit(Hit(0, 256, 1), 0, 0, 1); // overflow +} + +void assertInt8Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult res = q.getRange(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +void assertInt32Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult res = q.getRange(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +void assertInt64Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult res = q.getRange(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +TEST("requireThatInt8LimitsAreEnforced") { + //std::numeric_limits::min() -> -128 + //std::numeric_limits::max() -> 127 + + assertInt8Range("-129", true, -128, -128); + assertInt8Range("-128", false, -128, -128); + assertInt8Range("127", false, 127, 127); + assertInt8Range("128", true, 127, 127); + assertInt8Range("[-129;0]", true, -128, 0); + assertInt8Range("[-128;0]", false, -128, 0); + assertInt8Range("[0;127]", false, 0, 127); + assertInt8Range("[0;128]", true, 0, 127); + assertInt8Range("[-130;-129]", true, -128, -128); + assertInt8Range("[128;129]", true, 127, 127); + assertInt8Range("[-129;128]", true, -128, 127); +} + +TEST("requireThatInt32LimitsAreEnforced") { + //std::numeric_limits::min() -> -2147483648 + //std::numeric_limits::max() -> 2147483647 + + int64_t min = std::numeric_limits::min(); + int64_t max = std::numeric_limits::max(); + + assertInt32Range("-2147483649", true, min, min); + assertInt32Range("-2147483648", false, min, min); + assertInt32Range("2147483647", false, max, max); + assertInt32Range("2147483648", true, max, max); + assertInt32Range("[-2147483649;0]", true, min, 0); + assertInt32Range("[-2147483648;0]", false, min, 0); + assertInt32Range("[0;2147483647]", false, 0, max); + assertInt32Range("[0;2147483648]", true, 0, max); + assertInt32Range("[-2147483650;-2147483649]", true, min, min); + assertInt32Range("[2147483648;2147483649]", true, max, max); + assertInt32Range("[-2147483649;2147483648]", true, min, max); +} + +TEST("requireThatInt64LimitsAreEnforced") { + //std::numeric_limits::min() -> -9223372036854775808 + //std::numeric_limits::max() -> 9223372036854775807 + + int64_t min = std::numeric_limits::min(); + int64_t max = std::numeric_limits::max(); + + assertInt64Range("-9223372036854775809", false, min, min); + assertInt64Range("-9223372036854775808", false, min, min); + assertInt64Range("9223372036854775807", false, max, max); + assertInt64Range("9223372036854775808", false, max, max); + assertInt64Range("[-9223372036854775809;0]", false, min, 0); + assertInt64Range("[-9223372036854775808;0]", false, min, 0); + assertInt64Range("[0;9223372036854775807]", false, 0, max); + assertInt64Range("[0;9223372036854775808]", false, 0, max); + assertInt64Range("[-9223372036854775810;-9223372036854775809]", false, min, min); + assertInt64Range("[9223372036854775808;9223372036854775809]", false, max, max); + assertInt64Range("[-9223372036854775809;9223372036854775808]", false, min, max); +} + +TEST("require sensible rounding when using integer attributes.") { + assertInt64Range("1.2", false, 1, 1); + assertInt64Range("1.51", false, 2, 2); + assertInt64Range("2.49", false, 2, 2); +} + +TEST("require that we can take floating point values in range search too.") { + assertInt64Range("[1;2]", false, 1, 2); + assertInt64Range("[1.1;2.1]", false, 2, 2); + assertInt64Range("[1.9;3.9]", false, 2, 3); + assertInt64Range("[1.9;3.9]", false, 2, 3); + assertInt64Range("[1.0;3.0]", false, 1, 3); + assertInt64Range("<1.0;3.0>", false, 2, 2); + assertInt64Range("[500.0;1.7976931348623157E308]", false, 500, std::numeric_limits::max()); + assertInt64Range("[500.0;1.6976931348623157E308]", false, 500, std::numeric_limits::max()); + assertInt64Range("[-1.7976931348623157E308;500.0]", false, std::numeric_limits::min(), 500); + assertInt64Range("[-1.6976931348623157E308;500.0]", false, std::numeric_limits::min(), 500); + assertInt64Range("[10;-10]", false, 10, -10); + assertInt64Range("[10.0;-10.0]", false, 10, -10); + assertInt64Range("[1.6976931348623157E308;-1.6976931348623157E308]", false, std::numeric_limits::max(), std::numeric_limits::min()); + assertInt64Range("[1.7976931348623157E308;-1.7976931348623157E308]", false, std::numeric_limits::max(), std::numeric_limits::min()); +} + +TEST("require that we handle empty range as expected") { + assertInt64Range("[1;1]", false, 1, 1); + assertInt64Range("<1;1]", false, 2, 1); + assertInt64Range("[0;1>", false, 0, 0); + assertInt64Range("[1;1>", false, 1, 0); + assertInt64Range("<1;1>", false, 2, 0); +} + +TEST("require that ascending range can be specified with limit only") { + int64_t low_integer = 0; + int64_t high_integer = 0; + double low_double = 0.0; + double high_double = 0.0; + + EmptyQueryNodeResult eqnr; + QueryTerm ascending_query(eqnr, "[;;500]", "index", QueryTerm::WORD); + + EXPECT_TRUE(ascending_query.getAsIntegerTerm(low_integer, high_integer)); + EXPECT_TRUE(ascending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_EQUAL(std::numeric_limits::min(), low_integer); + EXPECT_EQUAL(std::numeric_limits::max(), high_integer); + EXPECT_EQUAL(-std::numeric_limits::max(), low_double); + EXPECT_EQUAL(std::numeric_limits::max(), high_double); + EXPECT_EQUAL(500, ascending_query.getRangeLimit()); +} + +TEST("require that descending range can be specified with limit only") { + int64_t low_integer = 0; + int64_t high_integer = 0; + double low_double = 0.0; + double high_double = 0.0; + + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500]", "index", QueryTerm::WORD); + + EXPECT_TRUE(descending_query.getAsIntegerTerm(low_integer, high_integer)); + EXPECT_TRUE(descending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_EQUAL(std::numeric_limits::min(), low_integer); + EXPECT_EQUAL(std::numeric_limits::max(), high_integer); + EXPECT_EQUAL(-std::numeric_limits::max(), low_double); + EXPECT_EQUAL(std::numeric_limits::max(), high_double); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); +} + +TEST("require that correctly specified diversity can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(std::numeric_limits::max(), descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;13]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(13u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with incorrect cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;a13.9]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(std::numeric_limits::max(), descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff strategy can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;anything but strict]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with strict cutoff strategy can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;strict]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_TRUE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that incorrectly specified diversity can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56]", "index", QueryTerm::WORD); + EXPECT_FALSE(descending_query.isValid()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp new file mode 100644 index 00000000000..b98e14604e3 --- /dev/null +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for query_visitor. + +#include +#include +LOG_SETUP("query_visitor_test"); + +#include +#include +#include +#include +#include +#include + +using namespace search::query; + +namespace { + +class Test : public vespalib::TestApp { + void requireThatAllNodesCanBeVisited(); + + template void checkVisit(T *node); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("query_visitor_test"); + + TEST_DO(requireThatAllNodesCanBeVisited()); + + TEST_DONE(); +} + +class MyVisitor : public QueryVisitor +{ +public: + template + bool &isVisited() { + static bool b; + return b; + } + + virtual void visit(And &) { isVisited() = true; } + virtual void visit(AndNot &) { isVisited() = true; } + virtual void visit(Equiv &) { isVisited() = true; } + virtual void visit(NumberTerm &) { isVisited() = true; } + virtual void visit(LocationTerm &) { isVisited() = true; } + virtual void visit(Near &) { isVisited() = true; } + virtual void visit(ONear &) { isVisited() = true; } + virtual void visit(Or &) { isVisited() = true; } + virtual void visit(Phrase &) { isVisited() = true; } + virtual void visit(PrefixTerm &) { isVisited() = true; } + virtual void visit(RangeTerm &) { isVisited() = true; } + virtual void visit(Rank &) { isVisited() = true; } + virtual void visit(StringTerm &) { isVisited() = true; } + virtual void visit(SubstringTerm &) { isVisited() = true; } + virtual void visit(SuffixTerm &) { isVisited() = true; } + virtual void visit(WeakAnd &) { isVisited() = true; } + virtual void visit(WeightedSetTerm &) + { isVisited() = true; } + virtual void visit(DotProduct &) { isVisited() = true; } + virtual void visit(WandTerm &) { isVisited() = true; } + virtual void visit(PredicateQuery &) + { isVisited() = true; } + virtual void visit(RegExpTerm &) { isVisited() = true; } +}; + +template +void Test::checkVisit(T *node) { + Node::UP query(node); + MyVisitor visitor; + visitor.isVisited() = false; + query->accept(visitor); + ASSERT_TRUE(visitor.isVisited()); +} + +void Test::requireThatAllNodesCanBeVisited() { + checkVisit(new SimpleAnd); + checkVisit(new SimpleAndNot); + checkVisit(new SimpleNear(0)); + checkVisit(new SimpleONear(0)); + checkVisit(new SimpleOr); + checkVisit(new SimplePhrase("field", 0, Weight(42))); + checkVisit( + new SimpleWeightedSetTerm("field", 0, Weight(42))); + checkVisit(new SimpleDotProduct("field", 0, Weight(42))); + checkVisit( + new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7)); + checkVisit(new SimpleRank); + checkVisit( + new SimpleNumberTerm("0.42", "field", 0, Weight(0))); + const Location location(Point(10, 10), 20, 0); + checkVisit( + new SimpleLocationTerm(location, "field", 0, Weight(0))); + checkVisit(new SimplePrefixTerm("t", "field", 0, Weight(0))); + checkVisit( + new SimpleRangeTerm(Range(0, 1), "field", 0, Weight(0))); + checkVisit(new SimpleStringTerm("t", "field", 0, Weight(0))); + checkVisit( + new SimpleSubstringTerm("t", "field", 0, Weight(0))); + checkVisit(new SimpleSuffixTerm("t", "field", 0, Weight(0))); + checkVisit( + new SimplePredicateQuery(PredicateQueryTerm::UP(), + "field", 0, Weight(0))); + checkVisit(new SimpleRegExpTerm("t", "field", 0, Weight(0))); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp new file mode 100644 index 00000000000..b64a46e9b18 --- /dev/null +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -0,0 +1,615 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for querybuilder. + +#include +#include +LOG_SETUP("querybuilder_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using std::string; +using search::SimpleQueryStackDumpIterator; +using namespace search::query; + +namespace { + +template void checkQueryTreeTypes(Node *node); + +const string str[] = { "foo", "bar", "baz", "qux", "quux", "corge", + "grault", "garply", "waldo", "fred", "plugh" }; +const string (&view)[11] = str; +const int32_t id[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; +const Weight weight[] = { Weight(1), Weight(2), Weight(3), Weight(4), + Weight(5), Weight(6), Weight(7), Weight(8), + Weight(9), Weight(10), Weight(11) }; +const size_t distance = 4; +const string int1 = "42"; +const string float1 = "3.14"; +const Range range(32, 64); +const Point position(100, 100); +const int max_distance = 20; +const uint32_t x_aspect = 0; +const Location location(position, max_distance, x_aspect); + +PredicateQueryTerm::UP getPredicateQueryTerm() { + PredicateQueryTerm::UP pqt(new PredicateQueryTerm); + pqt->addFeature("key", "value"); + pqt->addRangeFeature("key2", 42, 0xfff); + return pqt; +} + +template +Node::UP createQueryTree() { + QueryBuilder builder; + builder.addAnd(9); + { + builder.addRank(2); + { + builder.addNear(2, distance); + { + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]); + } + builder.addONear(2, distance); + { + builder.addSuffixTerm(str[2], view[2], id[2], weight[2]); + builder.addPrefixTerm(str[3], view[3], id[3], weight[3]); + } + } + builder.addOr(3); + { + builder.addPhrase(3, view[4], id[4], weight[4]); + { + builder.addStringTerm(str[4], view[4], id[4], weight[5]); + builder.addStringTerm(str[5], view[5], id[5], weight[6]); + builder.addStringTerm(str[6], view[6], id[6], weight[7]); + } + builder.addPhrase(2, view[4], id[4], weight[4]) + .setRanked(false); + { + builder.addStringTerm(str[4], view[4], id[4], weight[5]); + builder.addStringTerm(str[5], view[5], id[5], weight[6]); + } + builder.addAndNot(2); + { + builder.addNumberTerm(int1, view[7], id[7], weight[7]); + builder.addNumberTerm(float1, view[8], id[8], weight[8]) + .setRanked(false); + } + } + builder.addRangeTerm(range, view[9], id[9], weight[9]); + builder.addLocationTerm(location, view[10], id[10], weight[10]); + builder.addWeakAnd(2, 123, view[0]); + { + builder.addStringTerm(str[4], view[4], id[4], weight[4]); + builder.addStringTerm(str[5], view[5], id[5], weight[5]); + } + builder.addPredicateQuery(getPredicateQueryTerm(), + view[3], id[3], weight[3]); + builder.addDotProduct(3, view[2], id[2], weight[2]); + { + builder.addStringTerm(str[3], view[3], id[3], weight[3]); + builder.addStringTerm(str[4], view[4], id[4], weight[4]); + builder.addStringTerm(str[5], view[5], id[5], weight[5]); + } + builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); + { + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + builder.addStringTerm(str[2], view[2], id[2], weight[2]); + } + builder.addRegExpTerm(str[5], view[5], id[5], weight[5]); + } + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + return node; +} + +template +bool compareTerms(const TermType &expected, const TermType &actual) { + return EXPECT_TRUE(expected == actual); +} +template +bool compareTerms(const std::unique_ptr &expected, + const std::unique_ptr &actual) { + return EXPECT_TRUE(*expected == *actual); +} + +template +bool checkTerm(const Term *term, const typename Term::Type &t, const string &f, + int32_t i, Weight w, bool ranked = true, + bool use_position_data = true) { + return EXPECT_TRUE(term != 0) && + (EXPECT_TRUE(compareTerms(t, term->getTerm())) & + EXPECT_EQUAL(f, term->getView()) & + EXPECT_EQUAL(i, term->getId()) & + EXPECT_EQUAL(w.percent(), term->getWeight().percent()) & + EXPECT_EQUAL(ranked, term->isRanked()) & + EXPECT_EQUAL(use_position_data, term->usePositionData())); +} + +template +void checkQueryTreeTypes(Node *node) { + typedef typename NodeTypes::And And; + typedef typename NodeTypes::AndNot AndNot; + typedef typename NodeTypes::NumberTerm NumberTerm; + //typedef typename NodeTypes::NumberTerm FloatTrm; + typedef typename NodeTypes::Near Near; + typedef typename NodeTypes::ONear ONear; + typedef typename NodeTypes::Or Or; + typedef typename NodeTypes::Phrase Phrase; + typedef typename NodeTypes::PrefixTerm PrefixTerm; + typedef typename NodeTypes::RangeTerm RangeTerm; + typedef typename NodeTypes::Rank Rank; + typedef typename NodeTypes::StringTerm StringTerm; + //typedef typename NodeTypes::SubstringTerm SubstrTr; + typedef typename NodeTypes::SuffixTerm SuffixTerm; + typedef typename NodeTypes::LocationTerm LocationTerm; + //typedef typename NodeTypes::WeightedSetTerm WeightedSetTerm; + typedef typename NodeTypes::DotProduct DotProduct; + typedef typename NodeTypes::WandTerm WandTerm; + typedef typename NodeTypes::WeakAnd WeakAnd; + typedef typename NodeTypes::PredicateQuery PredicateQuery; + typedef typename NodeTypes::RegExpTerm RegExpTerm; + + ASSERT_TRUE(node); + And *and_node = dynamic_cast(node); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(9u, and_node->getChildren().size()); + + + Rank *rank = dynamic_cast(and_node->getChildren()[0]); + ASSERT_TRUE(rank); + EXPECT_EQUAL(2u, rank->getChildren().size()); + + Near *near = dynamic_cast(rank->getChildren()[0]); + ASSERT_TRUE(near); + EXPECT_EQUAL(2u, near->getChildren().size()); + EXPECT_EQUAL(distance, near->getDistance()); + StringTerm *string_term = + dynamic_cast(near->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[0], view[0], id[0], weight[0])); + SubstringTerm *substring_term = + dynamic_cast(near->getChildren()[1]); + EXPECT_TRUE(checkTerm(substring_term, str[1], view[1], id[1], weight[1])); + + ONear *onear = dynamic_cast(rank->getChildren()[1]); + ASSERT_TRUE(onear); + EXPECT_EQUAL(2u, onear->getChildren().size()); + EXPECT_EQUAL(distance, onear->getDistance()); + SuffixTerm *suffix_term = + dynamic_cast(onear->getChildren()[0]); + EXPECT_TRUE(checkTerm(suffix_term, str[2], view[2], id[2], weight[2])); + PrefixTerm *prefix_term = + dynamic_cast(onear->getChildren()[1]); + EXPECT_TRUE(checkTerm(prefix_term, str[3], view[3], id[3], weight[3])); + + + Or *or_node = dynamic_cast(and_node->getChildren()[1]); + ASSERT_TRUE(or_node); + EXPECT_EQUAL(3u, or_node->getChildren().size()); + + Phrase *phrase = dynamic_cast(or_node->getChildren()[0]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(phrase->isRanked()); + EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent()); + EXPECT_EQUAL(3u, phrase->getChildren().size()); + string_term = dynamic_cast(phrase->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast(phrase->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4])); + string_term = dynamic_cast(phrase->getChildren()[2]); + EXPECT_TRUE(checkTerm(string_term, str[6], view[6], id[6], weight[4])); + + phrase = dynamic_cast(or_node->getChildren()[1]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(!phrase->isRanked()); + EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent()); + EXPECT_EQUAL(2u, phrase->getChildren().size()); + string_term = dynamic_cast(phrase->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast(phrase->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4])); + + AndNot *and_not = dynamic_cast(or_node->getChildren()[2]); + ASSERT_TRUE(and_not); + EXPECT_EQUAL(2u, and_not->getChildren().size()); + NumberTerm *integer_term = + dynamic_cast(and_not->getChildren()[0]); + EXPECT_TRUE(checkTerm(integer_term, int1, view[7], id[7], weight[7])); + NumberTerm *float_term = + dynamic_cast(and_not->getChildren()[1]); + EXPECT_TRUE(checkTerm(float_term, float1, view[8], id[8], weight[8], + false)); + + + RangeTerm *range_term = + dynamic_cast(and_node->getChildren()[2]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(checkTerm(range_term, range, view[9], id[9], weight[9])); + + LocationTerm *loc_term = + dynamic_cast(and_node->getChildren()[3]); + ASSERT_TRUE(loc_term); + EXPECT_TRUE(checkTerm(loc_term, location, view[10], id[10], weight[10])); + + + WeakAnd *wand = dynamic_cast(and_node->getChildren()[4]); + ASSERT_TRUE(wand != 0); + EXPECT_EQUAL(123u, wand->getMinHits()); + EXPECT_EQUAL(2u, wand->getChildren().size()); + string_term = dynamic_cast(wand->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast(wand->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + + PredicateQuery *predicateQuery = + dynamic_cast(and_node->getChildren()[5]); + ASSERT_TRUE(predicateQuery); + PredicateQueryTerm::UP pqt(new PredicateQueryTerm); + EXPECT_TRUE(checkTerm(predicateQuery, getPredicateQueryTerm(), + view[3], id[3], weight[3])); + + DotProduct *dotProduct = + dynamic_cast(and_node->getChildren()[6]); + ASSERT_TRUE(dotProduct); + EXPECT_EQUAL(3u, dotProduct->getChildren().size()); + string_term = dynamic_cast(dotProduct->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[3], view[3], id[3], weight[3])); + string_term = dynamic_cast(dotProduct->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast(dotProduct->getChildren()[2]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + + WandTerm *wandTerm = dynamic_cast(and_node->getChildren()[7]); + ASSERT_TRUE(wandTerm); + EXPECT_EQUAL(57u, wandTerm->getTargetNumHits()); + EXPECT_EQUAL(67, wandTerm->getScoreThreshold()); + EXPECT_EQUAL(77.7, wandTerm->getThresholdBoostFactor()); + EXPECT_EQUAL(2u, wandTerm->getChildren().size()); + string_term = dynamic_cast(wandTerm->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[1], view[1], id[1], weight[1])); + string_term = dynamic_cast(wandTerm->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[2], view[2], id[2], weight[2])); + + RegExpTerm *regexp_term = + dynamic_cast(and_node->getChildren()[8]); + EXPECT_TRUE(checkTerm(regexp_term, str[5], view[5], id[5], weight[5])); +} + +struct AbstractTypes { + typedef search::query::And And; + typedef search::query::AndNot AndNot; + typedef search::query::NumberTerm NumberTerm; + typedef search::query::LocationTerm LocationTerm; + typedef search::query::Near Near; + typedef search::query::ONear ONear; + typedef search::query::Or Or; + typedef search::query::Phrase Phrase; + typedef search::query::PrefixTerm PrefixTerm; + typedef search::query::RangeTerm RangeTerm; + typedef search::query::Rank Rank; + typedef search::query::StringTerm StringTerm; + typedef search::query::SubstringTerm SubstringTerm; + typedef search::query::SuffixTerm SuffixTerm; + typedef search::query::WeightedSetTerm WeightedSetTerm; + typedef search::query::DotProduct DotProduct; + typedef search::query::WandTerm WandTerm; + typedef search::query::WeakAnd WeakAnd; + typedef search::query::PredicateQuery PredicateQuery; + typedef search::query::RegExpTerm RegExpTerm; +}; + +// Builds a tree with simplequery and checks that the results have the +// correct abstract types. +TEST("require that Query Trees Can Be Built") { + Node::UP node = createQueryTree(); + checkQueryTreeTypes(node.get()); +} + +// Builds a tree with simplequery and checks that the results have the +// correct concrete types. +TEST("require that Simple Query Trees Can Be Built") { + Node::UP node = createQueryTree(); + checkQueryTreeTypes(node.get()); +} + +struct MyAnd : And {}; +struct MyAndNot : AndNot {}; +struct MyEquiv : Equiv { + MyEquiv(int32_t i, Weight w) : Equiv(i, w) {} +}; +struct MyNear : Near { MyNear(size_t dist) : Near(dist) {} }; +struct MyONear : ONear { MyONear(size_t dist) : ONear(dist) {} }; +struct MyWeakAnd : WeakAnd { MyWeakAnd(uint32_t minHits, const vespalib::string & v) : WeakAnd(minHits, v) {} }; +struct MyOr : Or {}; +struct MyPhrase : Phrase { + MyPhrase(const string &f, int32_t i, Weight w) : Phrase(f, i, w) {} +}; +struct MyWeightedSetTerm : WeightedSetTerm { + MyWeightedSetTerm(const string &f, int32_t i, Weight w) : WeightedSetTerm(f, i, w) {} +}; +struct MyDotProduct : DotProduct { + MyDotProduct(const string &f, int32_t i, Weight w) : DotProduct(f, i, w) {} +}; +struct MyWandTerm : WandTerm { + MyWandTerm(const string &f, int32_t i, Weight w, uint32_t targetNumHits, + int64_t scoreThreshold, double thresholdBoostFactor) + : WandTerm(f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} +}; +struct MyRank : Rank {}; +struct MyNumberTerm : NumberTerm { + MyNumberTerm(Type t, const string &f, int32_t i, Weight w) + : NumberTerm(t, f, i, w) { + } +}; +struct MyLocationTerm : LocationTerm { + MyLocationTerm(const Type &t, const string &f, int32_t i, Weight w) + : LocationTerm(t, f, i, w) { + } +}; +struct MyPrefixTerm : PrefixTerm { + MyPrefixTerm(const Type &t, const string &f, int32_t i, Weight w) + : PrefixTerm(t, f, i, w) { + } +}; +struct MyRangeTerm : RangeTerm { + MyRangeTerm(const Type &t, const string &f, int32_t i, Weight w) + : RangeTerm(t, f, i, w) { + } +}; +struct MyStringTerm : StringTerm { + MyStringTerm(const Type &t, const string &f, int32_t i, Weight w) + : StringTerm(t, f, i, w) { + } +}; +struct MySubstringTerm : SubstringTerm { + MySubstringTerm(const Type &t, const string &f, int32_t i, Weight w) + : SubstringTerm(t, f, i, w) { + } +}; +struct MySuffixTerm : SuffixTerm { + MySuffixTerm(const Type &t, const string &f, int32_t i, Weight w) + : SuffixTerm(t, f, i, w) { + } +}; +struct MyPredicateQuery : PredicateQuery { + MyPredicateQuery(Type &&t, const string &f, int32_t i, Weight w) + : PredicateQuery(std::move(t), f, i, w) { + } +}; +struct MyRegExpTerm : RegExpTerm { + MyRegExpTerm(const Type &t, const string &f, int32_t i, Weight w) + : RegExpTerm(t, f, i, w) { + } +}; + +struct MyQueryNodeTypes { + typedef MyAnd And; + typedef MyAndNot AndNot; + typedef MyEquiv Equiv; + typedef MyNumberTerm NumberTerm; + typedef MyLocationTerm LocationTerm; + typedef MyNear Near; + typedef MyONear ONear; + typedef MyOr Or; + typedef MyPhrase Phrase; + typedef MyPrefixTerm PrefixTerm; + typedef MyRangeTerm RangeTerm; + typedef MyRank Rank; + typedef MyStringTerm StringTerm; + typedef MySubstringTerm SubstringTerm; + typedef MySuffixTerm SuffixTerm; + typedef MyWeakAnd WeakAnd; + typedef MyWeightedSetTerm WeightedSetTerm; + typedef MyDotProduct DotProduct; + typedef MyWandTerm WandTerm; + typedef MyPredicateQuery PredicateQuery; + typedef MyRegExpTerm RegExpTerm; +}; + +TEST("require that Custom Query Trees Can Be Built") { + Node::UP node = createQueryTree(); + checkQueryTreeTypes(node.get()); +} + +TEST("require that Invalid Trees Cannot Be Built") { + // Incomplete tree. + QueryBuilder builder; + builder.addAnd(1); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error()); + + // Adding a node after build() and before reset() is a no-op. + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error()); + + builder.reset(); + EXPECT_TRUE(builder.error().empty()); + + // Too many nodes. + builder.addAnd(1); + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error()); + + // Adding an intermediate node after build() is also a no-op. + builder.addAnd(1); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error()); +} + +TEST("require that Term Index Can Be Added") { + const int term_index0 = 14; + const int term_index1 = 65; + + QueryBuilder builder; + builder.addAnd(2); + builder.addStringTerm(str[0], view[0], id[0], weight[0]) + .setTermIndex(term_index0); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]) + .setTermIndex(term_index1); + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate *intermediate = dynamic_cast(node.get()); + ASSERT_TRUE(intermediate); + ASSERT_TRUE(intermediate->getChildren().size() == 2); + Term *term = dynamic_cast(intermediate->getChildren()[0]); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_index0, term->getTermIndex()); + term = dynamic_cast(intermediate->getChildren()[1]); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_index1, term->getTermIndex()); +} + +TEST("require that Rank Can Be Turned Off") { + QueryBuilder builder; + builder.addAnd(3); + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]) + .setRanked(false); + builder.addPhrase(2, view[2], id[2], weight[2]) + .setRanked(false); + { + builder.addStringTerm(str[2], view[2], id[3], weight[3]); + builder.addStringTerm(str[3], view[2], id[4], weight[4]); + } + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate *intermediate = dynamic_cast(node.get()); + ASSERT_TRUE(intermediate); + ASSERT_TRUE(intermediate->getChildren().size() == 3); + Term *term = dynamic_cast(intermediate->getChildren()[0]); + ASSERT_TRUE(term); + EXPECT_TRUE(term->isRanked()); + term = dynamic_cast(intermediate->getChildren()[1]); + ASSERT_TRUE(term); + EXPECT_TRUE(!term->isRanked()); + Phrase *phrase = dynamic_cast(intermediate->getChildren()[2]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(!phrase->isRanked()); +} + +TEST("require that Using Position Data Can Be Turned Off") { + QueryBuilder builder; + builder.addAnd(2); + builder.addStringTerm(str[0], view[0], id[0], weight[0]).setPositionData(false); + builder.addPhrase(2, view[1], id[1], weight[1]).setPositionData(false); + builder.addStringTerm(str[2], view[1], id[2], weight[2]); + builder.addStringTerm(str[3], view[1], id[3], weight[3]); + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate * andNode = dynamic_cast(node.get()); + ASSERT_TRUE(andNode != NULL); + ASSERT_TRUE(andNode->getChildren().size() == 2); + Term * term = dynamic_cast(andNode->getChildren()[0]); + ASSERT_TRUE(term != NULL); + EXPECT_TRUE(!term->usePositionData()); + Phrase * phrase = dynamic_cast(andNode->getChildren()[1]); + ASSERT_TRUE(phrase != NULL); + EXPECT_TRUE(!phrase->usePositionData()); +} + +TEST("require that Weight Override Works Across Multiple Levels") { + QueryBuilder builder; + builder.addPhrase(2, view[0], id[0], weight[0]); + + SimpleStringTerm &string_term_1 = + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + EXPECT_EQUAL(weight[0].percent(), string_term_1.getWeight().percent()); + + builder.addAnd(2); + SimpleStringTerm &string_term_2 = + builder.addStringTerm(str[2], view[2], id[2], weight[2]); + EXPECT_EQUAL(weight[0].percent(), string_term_2.getWeight().percent()); +} + +TEST("require that Query Tree Creator Can Replicate Queries") { + Node::UP node = createQueryTree(); + Node::UP new_node = QueryTreeCreator::replicate(*node); + + checkQueryTreeTypes(node.get()); + checkQueryTreeTypes(new_node.get()); +} + +TEST("require that Query Tree Creator Can Create Queries From Stack") { + Node::UP node = createQueryTree(); + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator::create(iterator); + checkQueryTreeTypes(new_node.get()); +} + +TEST("require that All Range Syntaxes Work") { + + Range range0("[2,42.1]"); + Range range1(">10"); + Range range2("<45.23"); + + QueryBuilder builder; + builder.addAnd(3); + builder.addRangeTerm(range0, "view", 0, Weight(0)); + builder.addRangeTerm(range1, "view", 0, Weight(0)); + builder.addRangeTerm(range2, "view", 0, Weight(0)); + Node::UP node = builder.build(); + + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator::create(iterator); + And *and_node = dynamic_cast(new_node.get()); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(3u, and_node->getChildren().size()); + + RangeTerm *range_term = + dynamic_cast(and_node->getChildren()[0]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range0 == range_term->getTerm()); + + range_term = dynamic_cast(and_node->getChildren()[1]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range1 == range_term->getTerm()); + + range_term = dynamic_cast(and_node->getChildren()[2]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range2 == range_term->getTerm()); +} + +TEST("require that empty intermediate node can be added") { + QueryBuilder builder; + builder.addAnd(0); + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator::create(iterator); + And *and_node = dynamic_cast(new_node.get()); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(0u, and_node->getChildren().size()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/stackdumpquerycreator_test.cpp b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp new file mode 100644 index 00000000000..269947b7059 --- /dev/null +++ b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for stackdumpquerycreator. + +#include +#include +LOG_SETUP("stackdumpquerycreator_test"); + +#include +#include +#include +#include +#include +#include + +using search::ParseItem; +using search::RawBuf; +using search::SimpleQueryStackDumpIterator; +using std::string; +using namespace search::query; + +namespace { + +template +void append(RawBuf &buf, T i) { + buf.preAlloc(sizeof(T)); + buf.PutToInet(i); +} + +void appendString(RawBuf &buf, const string &s) { + buf.preAlloc(sizeof(uint32_t) + s.size()); + buf.appendCompressedPositiveNumber(s.size()); + buf.append(s.data(), s.size()); +} + +void appendNumTerm(RawBuf &buf, const string &term_string) { + uint8_t typefield = ParseItem::ITEM_NUMTERM | + ParseItem::IF_WEIGHT | + ParseItem::IF_UNIQUEID; + buf.append(typefield); + buf.appendCompressedNumber(2); // weight + buf.appendCompressedPositiveNumber(42); // id + appendString(buf, "view_name"); + appendString(buf, term_string); +} + +TEST("requireThatTooLargeNumTermIsTreatedAsFloat") { + const string term_string("99999999999999999999999999999999999"); + RawBuf buf(1024); + appendNumTerm(buf, term_string); + + SimpleQueryStackDumpIterator query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator::create(query_stack); + ASSERT_TRUE(node.get()); + NumberTerm *term = dynamic_cast(node.get()); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_string, term->getTerm()); +} + +TEST("requireThatTooLargeFloatNumTermIsTreatedAsFloat") { + const string term_string = "1" + string(310, '0') + ".20"; + RawBuf buf(1024); + appendNumTerm(buf, term_string); + + SimpleQueryStackDumpIterator + query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator::create(query_stack); + ASSERT_TRUE(node.get()); + NumberTerm *term = dynamic_cast(node.get()); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_string, term->getTerm()); +} + +TEST("require that PredicateQueryItem stack dump item can be read") { + RawBuf buf(1024); + uint8_t typefield = ParseItem::ITEM_PREDICATE_QUERY; + buf.append(typefield); + appendString(buf, "view_name"); + + buf.appendCompressedNumber(2); + appendString(buf, "key1"); + appendString(buf, "value1"); + buf.Put64ToInet(-1ULL); + appendString(buf, "key2"); + appendString(buf, "value2"); + buf.Put64ToInet(0xffffULL); + + buf.appendCompressedNumber(2); + appendString(buf, "key3"); + buf.Put64ToInet(42ULL); + buf.Put64ToInet(-1ULL); + appendString(buf, "key4"); + buf.Put64ToInet(84ULL); + buf.Put64ToInet(0xffffULL); + + SimpleQueryStackDumpIterator + query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator::create(query_stack); + ASSERT_TRUE(node.get()); + PredicateQuery *p = dynamic_cast(node.get()); + ASSERT_TRUE(p); + const PredicateQueryTerm &term = *p->getTerm(); + ASSERT_EQUAL(2u, term.getFeatures().size()); + ASSERT_EQUAL(2u, term.getRangeFeatures().size()); + ASSERT_EQUAL("value1", term.getFeatures()[0].getValue()); + ASSERT_EQUAL(0xffffffffffffffffULL, + term.getFeatures()[0].getSubQueryBitmap()); + ASSERT_EQUAL("key2", term.getFeatures()[1].getKey()); + ASSERT_EQUAL(42u, term.getRangeFeatures()[0].getValue()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/templatetermvisitor_test.cpp b/searchlib/src/tests/query/templatetermvisitor_test.cpp new file mode 100644 index 00000000000..369266f5b2d --- /dev/null +++ b/searchlib/src/tests/query/templatetermvisitor_test.cpp @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for templatetermvisitor. + +#include +#include +LOG_SETUP("templatetermvisitor_test"); + +#include +#include +#include +#include +#include + +using namespace search::query; + +namespace { + +class MyVisitor; + +class Test : public vespalib::TestApp { + void requireThatAllTermsCanBeVisited(); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("templatetermvisitor_test"); + + TEST_DO(requireThatAllTermsCanBeVisited()); + + TEST_DONE(); +} + +class MyVisitor : public TemplateTermVisitor +{ +public: + template + bool &isVisited() { + static bool b; + return b; + } + + template + void visitTerm(TermType &) { isVisited() = true; } +}; + +template +bool checkVisit(T *q) { + Node::UP query(q); + MyVisitor visitor; + visitor.isVisited() = false; + query->accept(visitor); + return visitor.isVisited(); +} + +template +bool checkVisit() { + return checkVisit(new T(typename T::Type(), "field", 0, Weight(0))); +} + +void Test::requireThatAllTermsCanBeVisited() { + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit()); + EXPECT_TRUE(checkVisit(new SimplePhrase("field", 0, Weight(0)))); + EXPECT_TRUE(!checkVisit(new SimpleAnd)); + EXPECT_TRUE(!checkVisit(new SimpleAndNot)); + EXPECT_TRUE(!checkVisit(new SimpleEquiv(17, Weight(100)))); + EXPECT_TRUE(!checkVisit(new SimpleNear(2))); + EXPECT_TRUE(!checkVisit(new SimpleONear(2))); + EXPECT_TRUE(!checkVisit(new SimpleOr)); + EXPECT_TRUE(!checkVisit(new SimpleRank)); +} + +} // namespace + +TEST_APPHOOK(Test); +#include diff --git a/searchlib/src/tests/queryeval/.gitignore b/searchlib/src/tests/queryeval/.gitignore new file mode 100644 index 00000000000..7039566e7c2 --- /dev/null +++ b/searchlib/src/tests/queryeval/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +*_test +*_bench +searchlib_queryeval_test_app diff --git a/searchlib/src/tests/queryeval/CMakeLists.txt b/searchlib/src/tests/queryeval/CMakeLists.txt new file mode 100644 index 00000000000..35496b7f99a --- /dev/null +++ b/searchlib/src/tests/queryeval/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_queryeval_test_app + SOURCES + queryeval.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_queryeval_test_app COMMAND searchlib_queryeval_test_app) diff --git a/searchlib/src/tests/queryeval/DESC b/searchlib/src/tests/queryeval/DESC new file mode 100644 index 00000000000..15e6efd489d --- /dev/null +++ b/searchlib/src/tests/queryeval/DESC @@ -0,0 +1 @@ +queryeval test. Take a look at queryeval.cpp for details. diff --git a/searchlib/src/tests/queryeval/FILES b/searchlib/src/tests/queryeval/FILES new file mode 100644 index 00000000000..d082d6f8725 --- /dev/null +++ b/searchlib/src/tests/queryeval/FILES @@ -0,0 +1 @@ +queryeval.cpp diff --git a/searchlib/src/tests/queryeval/blueprint/.cvsignore b/searchlib/src/tests/queryeval/blueprint/.cvsignore new file mode 100644 index 00000000000..a8da5289575 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +blueprint_test diff --git a/searchlib/src/tests/queryeval/blueprint/.gitignore b/searchlib/src/tests/queryeval/blueprint/.gitignore new file mode 100644 index 00000000000..da4bf633103 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/.gitignore @@ -0,0 +1,8 @@ +*_test +.depend +Makefile +lhs.out +rhs.out +searchlib_blueprint_test_app +searchlib_intermediate_blueprints_test_app +searchlib_leaf_blueprints_test_app diff --git a/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt new file mode 100644 index 00000000000..88ba3deeb29 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_blueprint_test_app + SOURCES + blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_blueprint_test_app COMMAND searchlib_blueprint_test_app || diff -u lhs.out rhs.out) +vespa_add_executable(searchlib_leaf_blueprints_test_app + SOURCES + leaf_blueprints_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_leaf_blueprints_test_app COMMAND searchlib_leaf_blueprints_test_app || diff -u lhs.out rhs.out) +vespa_add_executable(searchlib_intermediate_blueprints_test_app + SOURCES + intermediate_blueprints_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_intermediate_blueprints_test_app COMMAND searchlib_intermediate_blueprints_test_app || diff -u lhs.out rhs.out) diff --git a/searchlib/src/tests/queryeval/blueprint/DESC b/searchlib/src/tests/queryeval/blueprint/DESC new file mode 100644 index 00000000000..a2634c017bd --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/DESC @@ -0,0 +1 @@ +blueprint test. Take a look at blueprint_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/blueprint/FILES b/searchlib/src/tests/queryeval/blueprint/FILES new file mode 100644 index 00000000000..89c566c5aea --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/FILES @@ -0,0 +1 @@ +blueprint_test.cpp diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp new file mode 100644 index 00000000000..79fec3770b3 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -0,0 +1,766 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("blueprint_test"); +#include +#include +#include +#include +#include + +#include "mysearch.h" + +using namespace search::queryeval; +using namespace search::fef; + +namespace { + +//----------------------------------------------------------------------------- + +class MyOr : public IntermediateBlueprint +{ +private: +public: + virtual HitEstimate combine(const std::vector &data) const { + return max(data); + } + + virtual FieldSpecBaseList exposeFields() const { + return mixChildrenFields(); + } + + virtual void sort(std::vector &children) const { + std::sort(children.begin(), children.end(), GreaterEstimate()); + } + + virtual bool inheritStrict(size_t i) const { + (void)i; + return true; + } + + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("or", subSearches, &md, strict)); + } + + static MyOr& create() { return *(new MyOr()); } + MyOr& add(Blueprint *n) { addChild(UP(n)); return *this; } + MyOr& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + + +class OtherOr : public OrBlueprint +{ +private: +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("or", subSearches, &md, strict)); + } + + static OtherOr& create() { return *(new OtherOr()); } + OtherOr& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherOr& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + +//----------------------------------------------------------------------------- + +class MyAnd : public AndBlueprint +{ +private: +public: + virtual HitEstimate combine(const std::vector &data) const { + return min(data); + } + + virtual FieldSpecBaseList exposeFields() const { + return FieldSpecBaseList(); + } + + virtual bool inheritStrict(size_t i) const { + return (i == 0); + } + + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("and", subSearches, &md, strict)); + } + + static MyAnd& create() { return *(new MyAnd()); } + MyAnd& add(Blueprint *n) { addChild(UP(n)); return *this; } + MyAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + + +class OtherAnd : public AndBlueprint +{ +private: +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("and", subSearches, &md, strict)); + } + + static OtherAnd& create() { return *(new OtherAnd()); } + OtherAnd& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + +class OtherAndNot : public AndNotBlueprint +{ +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("andnot", subSearches, &md, strict)); + } + + static OtherAndNot& create() { return *(new OtherAndNot()); } + OtherAndNot& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherAndNot& add(Blueprint &n) { addChild(UP(&n)); return *this; } + +}; + +//----------------------------------------------------------------------------- + +struct MyTerm : SimpleLeafBlueprint { + MyTerm(const FieldSpecBaseList &fields, uint32_t hitEstimate) : SimpleLeafBlueprint(fields) { + setEstimate(HitEstimate(hitEstimate, false)); + } + virtual SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const { + return SearchIterator::UP(); + } +}; + +//----------------------------------------------------------------------------- + +} // namespace + +class Test : public vespalib::TestApp +{ +private: + MatchData::UP _md; + + static Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); } + static Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); } + + SearchIterator::UP create(const Blueprint &blueprint); + bool check_equal(const SearchIterator &a, const SearchIterator &b); + bool check_equal(const Blueprint &a, const Blueprint &b); + bool check_not_equal(const SearchIterator &a, const SearchIterator &b); + bool check_not_equal(const Blueprint &a, const Blueprint &b); + +public: + Test() + : vespalib::TestApp(), + _md(MatchData::makeTestInstance(0, 100, 10)) + { + } + Blueprint::UP buildBlueprint1(); + Blueprint::UP buildBlueprint2(); + void testBlueprintBuilding(); + void testHitEstimateCalculation(); + void testHitEstimatePropagation(); + void testMatchDataPropagation(); + void testChildSorting(); + void testChildAndNotCollapsing(); + void testChildAndCollapsing(); + void testChildOrCollapsing(); + void testSearchCreation(); + void testBlueprintMakeNew(); + void requireThatAsStringWorks(); + void requireThatVisitMembersWorks(); + void requireThatDocIdLimitInjectionWorks(); + int Main(); +}; + +SearchIterator::UP +Test::create(const Blueprint &blueprint) +{ + const_cast(blueprint).fetchPostings(true); + SearchIterator::UP search = blueprint.createSearch(*_md, true); + MySearch::verifyAndInfer(search.get(), *_md); + return search; +} + +bool +Test::check_equal(const SearchIterator &a, const SearchIterator &b) +{ + return EXPECT_EQUAL(a.asString(), b.asString()); +} + +bool +Test::check_equal(const Blueprint &a, const Blueprint &b) +{ + SearchIterator::UP searchA = create(a); + SearchIterator::UP searchB = create(b); + TEST_STATE("check_equal"); + bool ok = check_equal(*searchA, *searchB); + return ok; +} + +bool +Test::check_not_equal(const SearchIterator &a, const SearchIterator &b) +{ + return EXPECT_NOT_EQUAL(a.asString(), b.asString()); +} + +bool +Test::check_not_equal(const Blueprint &a, const Blueprint &b) +{ + SearchIterator::UP searchA = create(a); + SearchIterator::UP searchB = create(b); + TEST_STATE("check_not_equal"); + bool ok = check_not_equal(*searchA, *searchB); + return ok; +} + +Blueprint::UP +Test::buildBlueprint1() +{ + return ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(10).addField(1, 11).create()) + .add(MyLeafSpec(20).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(1, 31).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(100).addField(2, 22).create()) + .add(MyLeafSpec(200).addField(2, 42).create()) + ) + ); +} + +Blueprint::UP +Test::buildBlueprint2() +{ + return ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(10).addField(1, 11).create()) + .add(MyLeafSpec(20).addField(1, 21).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(100).addField(2, 22).create()) + .add(MyLeafSpec(200).addField(2, 32).create()) + .add(MyLeafSpec(300).addField(2, 42).create()) + ) + ); +} + +void +Test::testBlueprintBuilding() +{ + Blueprint::UP root1 = buildBlueprint1(); + Blueprint::UP root2 = buildBlueprint2(); + SearchIterator::UP search1 = create(*root1); + SearchIterator::UP search2 = create(*root2); + // fprintf(stderr, "%s\n", search1->asString().c_str()); + // fprintf(stderr, "%s\n", search2->asString().c_str()); +} + +void +Test::testHitEstimateCalculation() +{ + { + Blueprint::UP leaf = ap(MyLeafSpec(37).create()); + EXPECT_EQUAL(37u, leaf->getState().estimate().estHits); + EXPECT_EQUAL(0u, leaf->getState().numFields()); + } + { + Blueprint::UP a1 = ap(MyAnd::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(4u, a1->getState().estimate().estHits); + } + { + Blueprint::UP a2 = ap(MyAnd::create() + .add(MyLeafSpec(4).addField(1, 1).create()) + .add(MyLeafSpec(7).addField(2, 2).create()) + .add(MyLeafSpec(6).addField(3, 3).create())); + EXPECT_EQUAL(4u, a2->getState().estimate().estHits); + } + { + Blueprint::UP o1 = ap(MyOr::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(7u, o1->getState().estimate().estHits); + } + { + Blueprint::UP o2 = ap(MyOr::create() + .add(MyLeafSpec(4).addField(1, 1).create()) + .add(MyLeafSpec(7).addField(2, 2).create()) + .add(MyLeafSpec(6).addField(3, 3).create())); + EXPECT_EQUAL(7u, o2->getState().estimate().estHits); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(0).create()) + .add(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().estimate().estHits); + EXPECT_EQUAL(true, a->getState().estimate().empty); + } + { + Blueprint::UP o = ap(MyOr::create() + .add(MyLeafSpec(0).create()) + .add(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, o->getState().estimate().estHits); + EXPECT_EQUAL(false, o->getState().estimate().empty); + } + { + Blueprint::UP tree1 = buildBlueprint1(); + EXPECT_EQUAL(30u, tree1->getState().estimate().estHits); + + Blueprint::UP tree2 = buildBlueprint2(); + EXPECT_EQUAL(20u, tree2->getState().estimate().estHits); + } +} + +void +Test::testHitEstimatePropagation() +{ + MyLeaf *leaf1 = new MyLeaf(FieldSpecBaseList()); + leaf1->estimate(10); + + MyLeaf *leaf2 = new MyLeaf(FieldSpecBaseList()); + leaf2->estimate(20); + + MyLeaf *leaf3 = new MyLeaf(FieldSpecBaseList()); + leaf3->estimate(30); + + MyOr *parent = new MyOr(); + MyOr *grandparent = new MyOr(); + + Blueprint::UP root(grandparent); + + parent->addChild(ap(leaf1)); + parent->addChild(ap(leaf3)); + grandparent->addChild(ap(leaf2)); + grandparent->addChild(ap(parent)); + EXPECT_EQUAL(30u, root->getState().estimate().estHits); + + // edit + leaf3->estimate(50); + EXPECT_EQUAL(50u, root->getState().estimate().estHits); + + // remove + ASSERT_TRUE(parent->childCnt() == 2); + Blueprint::UP tmp = parent->removeChild(1); + ASSERT_TRUE(tmp.get() == leaf3); + EXPECT_EQUAL(1u, parent->childCnt()); + EXPECT_EQUAL(20u, root->getState().estimate().estHits); + + // add + leaf3->estimate(25); + EXPECT_EQUAL(20u, root->getState().estimate().estHits); + parent->addChild(std::move(tmp)); + EXPECT_TRUE(tmp.get() == 0); + EXPECT_EQUAL(25u, root->getState().estimate().estHits); +} + +void +Test::testMatchDataPropagation() +{ + { + Blueprint::UP leaf = ap(MyLeafSpec(0, true).create()); + EXPECT_EQUAL(0u, leaf->getState().numFields()); + } + { + Blueprint::UP leaf = ap(MyLeafSpec(42) + .addField(1, 41) + .addField(2, 72).create()); + EXPECT_EQUAL(42u, leaf->getState().estimate().estHits); + ASSERT_TRUE(leaf->getState().numFields() == 2); + EXPECT_EQUAL(1u, leaf->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, leaf->getState().field(1).getFieldId()); + EXPECT_EQUAL(41u, leaf->getState().field(0).getHandle()); + EXPECT_EQUAL(72u, leaf->getState().field(1).getHandle()); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + MyOr &o = MyOr::create() + .add(MyLeafSpec(1).addField(1, 1).create()) + .add(MyLeafSpec(2).addField(2, 2).create()); + + Blueprint::UP a = ap(o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.add(MyLeafSpec(5).addField(2, 2).create()); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.add(MyLeafSpec(5).addField(2, 32).create()); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.add(MyLeafSpec(0, true).create()); + EXPECT_EQUAL(0u, a->getState().numFields()); + } +} + +void +Test::testChildAndNotCollapsing() +{ + Blueprint::UP unsorted = ap(OtherAndNot::create() + .add(OtherAndNot::create() + .add(OtherAndNot::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ); + + Blueprint::UP sorted = ap(OtherAndNot::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ); + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildAndCollapsing() +{ + Blueprint::UP unsorted = ap(OtherAnd::create() + .add(OtherAnd::create() + .add(OtherAnd::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ); + + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildOrCollapsing() +{ + Blueprint::UP unsorted = ap(OtherOr::create() + .add(OtherOr::create() + .add(OtherOr::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherOr::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(OtherOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(OtherOr::create() + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(1).addField(2, 42).create()) + ); + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildSorting() +{ + Blueprint::UP unsorted = ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(1).addField(2, 42).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + ) + ); + + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + + +void +Test::testSearchCreation() +{ + { + Blueprint::UP l = ap(MyLeafSpec(3) + .addField(1, 1) + .addField(2, 2) + .addField(3, 3).create()); + SearchIterator::UP leafsearch = create(*l); + + MySearch *lw = new MySearch("leaf", true, true); + lw->addHandle(1).addHandle(2).addHandle(3); + SearchIterator::UP wantleaf(lw); + + TEST_DO(check_equal(*wantleaf, *leafsearch)); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(1).addField(1, 1).create()) + .add(MyLeafSpec(2).addField(2, 2).create())); + SearchIterator::UP andsearch = create(*a); + + MySearch *l1 = new MySearch("leaf", true, true); + MySearch *l2 = new MySearch("leaf", true, false); + l1->addHandle(1); + l2->addHandle(2); + MySearch *aw = new MySearch("and", false, true); + aw->add(l1); + aw->add(l2); + SearchIterator::UP wanted(aw); + TEST_DO(check_equal(*wanted, *andsearch)); + } + { + Blueprint::UP o = ap(MyOr::create() + .add(MyLeafSpec(1).addField(1, 11).create()) + .add(MyLeafSpec(2).addField(2, 22).create())); + SearchIterator::UP orsearch = create(*o); + + MySearch *l1 = new MySearch("leaf", true, true); + MySearch *l2 = new MySearch("leaf", true, true); + l1->addHandle(11); + l2->addHandle(22); + MySearch *ow = new MySearch("or", false, true); + ow->add(l1); + ow->add(l2); + SearchIterator::UP wanted(ow); + TEST_DO(check_equal(*wanted, *orsearch)); + } +} + +template +Blueprint::UP makeNew(T *orig) +{ + return Blueprint::UP(new T(*orig)); +} + +void +Test::testBlueprintMakeNew() +{ + Blueprint::UP orig = ap(MyOr::create() + .add(MyLeafSpec(1).addField(1, 11).create()) + .add(MyLeafSpec(2).addField(2, 22).create())); + orig->setSourceId(42); + MyOr *myOr = dynamic_cast(orig.get()); + ASSERT_TRUE(myOr != 0); + Blueprint::UP copy1 = makeNew(myOr); + Blueprint::UP copy2 = makeNew(myOr); + TEST_DO(check_equal(*copy1, *copy2)); + TEST_DO(check_not_equal(*orig, *copy1)); + TEST_DO(check_not_equal(*orig, *copy2)); + EXPECT_TRUE(dynamic_cast(copy1.get()) != 0); + EXPECT_TRUE(dynamic_cast(copy2.get()) != 0); + EXPECT_EQUAL(42u, orig->getSourceId()); + EXPECT_EQUAL(42u, copy1->getSourceId()); + EXPECT_EQUAL(2u, orig->getState().numFields()); + EXPECT_EQUAL(0u, copy1->getState().numFields()); +} + +vespalib::string +getExpectedBlueprint() +{ + return "(anonymous namespace)::MyOr {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 5\n" + " handle: 7\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 9\n" + " tree_size: 2\n" + " allow_termwise_eval: 0\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " children: std::vector {\n" + " [0]: (anonymous namespace)::MyTerm {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 5\n" + " handle: 7\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 9\n" + " tree_size: 1\n" + " allow_termwise_eval: 1\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " }\n" + " }\n" + "}\n"; +} + +struct BlueprintFixture +{ + MyOr _blueprint; + BlueprintFixture() : _blueprint() { + _blueprint.add(new MyTerm(FieldSpecBaseList().add(FieldSpecBase(5, 7)), 9)); + } +}; + +void +Test::requireThatAsStringWorks() +{ + BlueprintFixture f; + EXPECT_EQUAL(getExpectedBlueprint(), f._blueprint.asString()); +} + +void +Test::requireThatVisitMembersWorks() +{ + BlueprintFixture f; + vespalib::ObjectDumper dumper; + visit(dumper, "", &f._blueprint); + EXPECT_EQUAL(getExpectedBlueprint(), dumper.toString()); +} + +void +Test::requireThatDocIdLimitInjectionWorks() +{ + BlueprintFixture f; + ASSERT_GREATER(f._blueprint.childCnt(), 0u); + const MyTerm &term = dynamic_cast(f._blueprint.getChild(0)); + EXPECT_EQUAL(0u, term.get_docid_limit()); + f._blueprint.setDocIdLimit(1000); + EXPECT_EQUAL(1000u, term.get_docid_limit()); +} + +int +Test::Main() +{ + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("blueprint_test"); + testBlueprintBuilding(); + testHitEstimateCalculation(); + testHitEstimatePropagation(); + testMatchDataPropagation(); + testChildSorting(); + testChildAndNotCollapsing(); + testChildAndCollapsing(); + testChildOrCollapsing(); + testSearchCreation(); + testBlueprintMakeNew(); + requireThatAsStringWorks(); + requireThatVisitMembersWorks(); + requireThatDocIdLimitInjectionWorks(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp new file mode 100644 index 00000000000..161537104e0 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -0,0 +1,1332 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("blueprint_test"); + +#include +#include +#include +#include +#include +#include + +#include "mysearch.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace search::queryeval; +using namespace search::fef; +using namespace search::query; + +struct WeightOrder { + bool operator()(const wand::Term &t1, const wand::Term &t2) const { + return (t1.weight < t2.weight); + } +}; + +Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); } +Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); } + +TEST("test AndNot Blueprint") { + AndNotBlueprint b; + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + } + { + AndNotBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c3.get(), children[1]); + EXPECT_EQUAL(c4.get(), children[2]); + EXPECT_EQUAL(c2.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test And Blueprint") { + AndBlueprint b; + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + AndBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(40).create()); + Blueprint::UP c3 = ap(MyLeafSpec(10).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c3.get(), children[0]); + EXPECT_EQUAL(c1.get(), children[1]); + EXPECT_EQUAL(c4.get(), children[2]); + EXPECT_EQUAL(c2.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Or Blueprint") { + OrBlueprint b; + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + OrBlueprint &o = *(new OrBlueprint()); + o.addChild(ap(MyLeafSpec(1).addField(1, 1).create())); + o.addChild(ap(MyLeafSpec(2).addField(2, 2).create())); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 2).create())); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 3).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.addChild(ap(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c3.get(), children[0]); + EXPECT_EQUAL(c4.get(), children[1]); + EXPECT_EQUAL(c2.get(), children[2]); + EXPECT_EQUAL(c1.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Near Blueprint") { + NearBlueprint b(7); + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + NearBlueprint a(7); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(40).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(30).create()); + Blueprint::UP c4 = ap(MyLeafSpec(20).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c2.get(), children[0]); + EXPECT_EQUAL(c4.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c1.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test ONear Blueprint") { + ONearBlueprint b(8); + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + ONearBlueprint a(8); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Rank Blueprint") { + RankBlueprint b; + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + } + { + RankBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test SourceBlender Blueprint") { + ISourceSelector *selector = 0; // not needed here + SourceBlenderBlueprint b(*selector); + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + SourceBlenderBlueprint &o = *(new SourceBlenderBlueprint(*selector)); + o.addChild(ap(MyLeafSpec(1).addField(1, 1).create())); + o.addChild(ap(MyLeafSpec(2).addField(2, 2).create())); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 2).create())); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 3).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.addChild(ap(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test SourceBlender below AND optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast(100); // not the one + //------------------------------------------------------------------------- + AndBlueprint *top = new AndBlueprint(); + Blueprint::UP top_bp(top); + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + AndBlueprint *expect = new AndBlueprint(); + Blueprint::UP expect_bp(expect); + expect->addChild(ap(MyLeafSpec(1).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_bp->asString(), top_bp->asString()); + top_bp = Blueprint::optimize(std::move(top_bp)); + EXPECT_EQUAL(expect_bp->asString(), top_bp->asString()); + expect_bp = Blueprint::optimize(std::move(expect_bp)); + EXPECT_EQUAL(expect_bp->asString(), top_bp->asString()); +} + +TEST("test SourceBlender below OR optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast(100); // not the one + //------------------------------------------------------------------------- + OrBlueprint *top = new OrBlueprint(); + Blueprint::UP top_up(top); + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + OrBlueprint *expect = new OrBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(3).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test SourceBlender below AND_NOT optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast(100); // not the one + //------------------------------------------------------------------------- + AndNotBlueprint *top = new AndNotBlueprint(); + Blueprint::UP top_up(top); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + AndNotBlueprint *expect = new AndNotBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(3).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test SourceBlender below RANK optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast(100); // not the one + //------------------------------------------------------------------------- + RankBlueprint *top = new RankBlueprint(); + Blueprint::UP top_up(top); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + RankBlueprint *expect = new RankBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + expect->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty root node optimization and safeness") { + //------------------------------------------------------------------------- + // tests leaf node elimination + Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create())); + //------------------------------------------------------------------------- + // tests intermediate node elimination + Blueprint::UP top2_up(ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty AND_NOT child removal + Blueprint::UP top3_up(ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty RANK child removal + Blueprint::UP top4_up(ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty OR child removal + Blueprint::UP top5_up(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up(new EmptyBlueprint()); + //------------------------------------------------------------------------- + top1_up = Blueprint::optimize(std::move(top1_up)); + top2_up = Blueprint::optimize(std::move(top2_up)); + top3_up = Blueprint::optimize(std::move(top3_up)); + top4_up = Blueprint::optimize(std::move(top4_up)); + top5_up = Blueprint::optimize(std::move(top5_up)); + EXPECT_EQUAL(expect_up->asString(), top1_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top2_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top3_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top4_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top5_up->asString()); +} + +TEST("and with one empty child is optimized away") { + ISourceSelector *selector = 0; + Blueprint::UP top(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(10).create())). + addChild(ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))))); + top = Blueprint::optimize(std::move(top)); + Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(new EmptyBlueprint())))); + EXPECT_EQUAL(expect_up->asString(), top->asString()); +} + +TEST("test single child optimization") { + ISourceSelector *selector = 0; + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new AndNotBlueprint())-> + addChild(ap((new AndBlueprint())-> + addChild(ap((new OrBlueprint())-> + addChild(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())))))))))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(42).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty OR child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty AND_NOT child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty RANK child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("require that replaced blueprints retain source id") { + //------------------------------------------------------------------------- + // replace empty root with empty search + Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create()->setSourceId(13))); + Blueprint::UP expect1_up(new EmptyBlueprint()); + expect1_up->setSourceId(13); + //------------------------------------------------------------------------- + // replace self with single child + Blueprint::UP top2_up(ap(static_cast((new AndBlueprint())->setSourceId(42)). + addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); + Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); + //------------------------------------------------------------------------- + top1_up = Blueprint::optimize(std::move(top1_up)); + top2_up = Blueprint::optimize(std::move(top2_up)); + EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); + EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); + EXPECT_EQUAL(13u, top1_up->getSourceId()); + EXPECT_EQUAL(42u, top2_up->getSourceId()); +} + +TEST("test Equiv Blueprint") { + FieldSpecBaseList fields; + search::fef::MatchDataLayout subLayout; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + fields.add(FieldSpecBase(3, 3)); + EquivBlueprint b(fields, subLayout); + { + EquivBlueprint &o = *(new EquivBlueprint(fields, subLayout)); + o.addTerm(ap(MyLeafSpec(5).addField(1, 4).create()), 1.0); + o.addTerm(ap(MyLeafSpec(10).addField(1, 5).create()), 1.0); + o.addTerm(ap(MyLeafSpec(20).addField(1, 6).create()), 1.0); + o.addTerm(ap(MyLeafSpec(50).addField(2, 7).create()), 1.0); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 3); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(3u, a->getState().field(2).getFieldId()); + + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(3u, a->getState().field(2).getHandle()); + + EXPECT_EQUAL(50u, a->getState().estimate().estHits); + EXPECT_EQUAL(false, a->getState().estimate().empty); + } + // createSearch tested by iterator unit test +} + + +TEST("test WeakAnd Blueprint") { + WeakAndBlueprint b(1000); + { // combine + std::vector est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + WeakAndBlueprint a(1000); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + { + FieldSpec field("foo", 1, 1); + FakeResult x = FakeResult().doc(1).doc(2).doc(5); + FakeResult y = FakeResult().doc(2); + FakeResult z = FakeResult().doc(1).doc(4); + { + WeakAndBlueprint wa(456); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, x)), 120); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, z)), 140); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, y)), 130); + { + wa.fetchPostings(true); + SearchIterator::UP search = wa.createSearch(*md, true); + EXPECT_TRUE(dynamic_cast(search.get()) != 0); + WeakAndSearch &s = dynamic_cast(*search); + EXPECT_EQUAL(456u, s.getN()); + ASSERT_EQUAL(3u, s.getTerms().size()); + EXPECT_GREATER(s.get_max_score(0), 0.0); + EXPECT_GREATER(s.get_max_score(1), 0.0); + EXPECT_GREATER(s.get_max_score(2), 0.0); + wand::Terms terms = s.getTerms(); + std::sort(terms.begin(), terms.end(), WeightOrder()); + EXPECT_EQUAL(120, terms[0].weight); + EXPECT_EQUAL(3u, terms[0].estHits); + EXPECT_EQUAL(0u, terms[0].maxScore); // NB: not set + EXPECT_EQUAL(130, terms[1].weight); + EXPECT_EQUAL(1u, terms[1].estHits); + EXPECT_EQUAL(0u, terms[1].maxScore); // NB: not set + EXPECT_EQUAL(140, terms[2].weight); + EXPECT_EQUAL(2u, terms[2].estHits); + EXPECT_EQUAL(0u, terms[2].maxScore); // NB: not set + } + { + wa.fetchPostings(false); + SearchIterator::UP search = wa.createSearch(*md, false); + EXPECT_TRUE(dynamic_cast(search.get()) != 0); + EXPECT_TRUE(search->seek(1)); + EXPECT_TRUE(search->seek(2)); + EXPECT_FALSE(search->seek(3)); + EXPECT_TRUE(search->seek(4)); + EXPECT_TRUE(search->seek(5)); + EXPECT_FALSE(search->seek(6)); + } + } + } +} + +TEST("require_that_unpack_of_or_over_multisearch_is_optimized") { + Blueprint::UP child1( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + Blueprint::UP child2( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(4,4).create())). + addChild(ap(MyLeafSpec(20).addField(5,5).create())). + addChild(ap(MyLeafSpec(10).addField(6,6).create())))); + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(std::move(child1)). + addChild(std::move(child2)))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(4)->tagAsNotNeeded(); + md->resolveTermField(6)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(5)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_of_or_is_optimized") { + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_of_and_is_optimized") { + Blueprint::UP top_up( + ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_optimization_is_honoured_by_parents") { + Blueprint::UP top_up( + ap((new AndBlueprint())-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", + top_up->createSearch(*md, false)->getClassName()); +} + +namespace { + +SimpleStringTerm +makeTerm(const std::string & term) +{ + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +} + +TEST("require that children does not optimize when parents refuse them to") { + FakeRequestContext requestContext; + search::diskindex::TestDiskIndex index; + vespalib::mkdir("index", false); + index.buildSchema(); + index.openIndex("index/1", false, true, false, false, false); + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 11)); + fields.add(FieldSpecBase(2, 22)); + search::fef::MatchDataLayout subLayout; + search::fef::TermFieldHandle idxth21 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth22 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1); + Blueprint::UP top_up( + ap((new EquivBlueprint(fields, subLayout))-> + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f2", 2, idxth22, true), + makeTerm("w2")), + 1.0). + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f1", 1, idxth1), + makeTerm("w1")), + 1.0). + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f2", 2, idxth21), makeTerm("w2")), + 1.0))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + SearchIterator::UP search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl", search->getClassName()); + { + const MultiSearch & e = dynamic_cast(*search); + EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator", e.getChildren()[2]->getClassName()); + } + + md->resolveTermField(12)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl", search->getClassName()); + { + const MultiSearch & e = dynamic_cast(*search); + EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator", e.getChildren()[2]->getClassName()); + } +} + +TEST("require_that_unpack_optimization_is_overruled_by_equiv") { + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + fields.add(FieldSpecBase(3, 3)); + search::fef::MatchDataLayout subLayout; + search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1); + search::fef::TermFieldHandle idxth2 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth3 = subLayout.allocTermField(3); + Blueprint::UP top_up( + ap((new EquivBlueprint(fields, subLayout))-> + addTerm(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,idxth1).create())). + addChild(ap(MyLeafSpec(20).addField(2,idxth2).create())). + addChild(ap(MyLeafSpec(10).addField(3,idxth3).create()))), + 1.0))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + SearchIterator::UP search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl", search->getClassName()); + { + const MultiSearch & e = dynamic_cast(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + e.getChildren()[0]->getClassName()); + } + + md->resolveTermField(2)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl", search->getClassName()); + { + const MultiSearch & e = dynamic_cast(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + e.getChildren()[0]->getClassName()); + } + + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl", search->getClassName()); + { + const MultiSearch & e = dynamic_cast(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch", + e.getChildren()[0]->getClassName()); + } +} + +TEST("require that children of near are not optimized") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new NearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new NearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("require that children of onear are not optimized") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new ONearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create()->estimate(20))). + addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))). + addChild(ap(MyLeafSpec(30).create()->estimate(30))))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new ONearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp new file mode 100644 index 00000000000..a2353184c9f --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("blueprint_test"); +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::fef; + +class Test : public vespalib::TestApp +{ +public: + void testEmptyBlueprint(); + void testSimpleBlueprint(); + void testFakeBlueprint(); + int Main(); +}; + +void +Test::testEmptyBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + EmptyBlueprint empty(FieldSpecBase(1, 11)); + EmptyBlueprint copy(empty); + ASSERT_TRUE(copy.getState().numFields() == 1u); + EXPECT_EQUAL(1u, copy.getState().field(0).getFieldId()); + EXPECT_EQUAL(11u, copy.getState().field(0).getHandle()); + + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + + SimpleResult res; + res.search(*search); + SimpleResult expect; // empty + EXPECT_EQUAL(res, expect); +} + +void +Test::testSimpleBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + SimpleResult a; + a.addHit(3).addHit(5).addHit(7); + SimpleBlueprint simple(a); + simple.tag("tag"); + SimpleBlueprint copy(simple); + EXPECT_EQUAL("tag", copy.tag()); + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + + SimpleResult res; + res.search(*search); + SimpleResult expect; + expect.addHit(3).addHit(5).addHit(7); + EXPECT_EQUAL(res, expect); +} + +void +Test::testFakeBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + FakeResult fake; + fake.doc(10).len(50).pos(2).pos(3) + .doc(25).len(10).pos(5); + + uint32_t fieldId = 0; + TermFieldHandle handle = 0; + FakeBlueprint orig(FieldSpec("", fieldId, handle), fake); + FakeBlueprint copy(orig); + + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + search->initFullRange(); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(10u, search->getDocId()); + { + search->unpack(10u); + TermFieldMatchData &data = *md->resolveTermField(handle); + EXPECT_EQUAL(fieldId, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(50u, itr.getFieldLength()); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + EXPECT_TRUE(search->seek(25)); + EXPECT_EQUAL(25u, search->getDocId()); + { + search->unpack(25u); + TermFieldMatchData &data = *md->resolveTermField(handle); + EXPECT_EQUAL(fieldId, data.getFieldId()); + EXPECT_EQUAL(25u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(10u, itr.getFieldLength()); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(5u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + EXPECT_TRUE(!search->seek(50)); + EXPECT_TRUE(search->isAtEnd()); +} + +int +Test::Main() +{ + TEST_INIT("leaf_blueprints_test"); + testEmptyBlueprint(); + testSimpleBlueprint(); + testFakeBlueprint(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h new file mode 100644 index 00000000000..7ab852b384f --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +class MySearch : public SearchIterator +{ +public: + typedef MultiSearch::Children Children; + typedef std::vector MyChildren; + typedef search::fef::TermFieldMatchDataArray TFMDA; + typedef search::fef::MatchData MatchData; + +private: + std::string _tag; + bool _isLeaf; + bool _isStrict; + MyChildren _children; + TFMDA _match; + MatchData *_md; + + std::vector _handles; + +protected: + virtual void doSeek(uint32_t) {} + virtual void doUnpack(uint32_t) {} + +public: + MySearch(const std::string &tag, bool leaf, bool strict) + : _tag(tag), _isLeaf(leaf), _isStrict(strict), _children(), + _match(), _md(0) {} + + MySearch(const std::string &tag, const TFMDA &tfmda, bool strict) + : _tag(tag), _isLeaf(true), _isStrict(strict), _children(), + _match(tfmda), _md(0) {} + + MySearch(const std::string &tag, const Children &children, + MatchData *md, bool strict) + : _tag(tag), _isLeaf(false), _isStrict(strict), _children(), + _match(), _md(md) { + for (size_t i(0); i < children.size(); i++) { + _children.emplace_back(children[i]); + } + } + + MySearch &add(SearchIterator *search) { + _children.emplace_back(search); + return *this; + } + + MySearch &addHandle(uint32_t handle) { + _handles.push_back(handle); + return *this; + } + + bool verifyAndInferImpl(MatchData &md) { + bool ok = true; + if (!_isLeaf) { + ok &= (_md == &md); + } + for (size_t i = 0; i < _children.size(); ++i) { + MySearch *child = dynamic_cast(_children[i].get()); + ok &= (child != 0); + if (child != 0) { + ok &= child->verifyAndInferImpl(md); + } + } + for (size_t i = 0; i < _match.size(); ++i) { + search::fef::TermFieldMatchData *tfmd = _match[i]; + _handles.push_back(search::fef::IllegalHandle); + for (search::fef::TermFieldHandle j = 0; j < md.getNumTermFields(); ++j) { + if (md.resolveTermField(j) == tfmd) { + _handles.back() = j; + break; + } + } + ok &= (_handles.back() != search::fef::IllegalHandle); + } + return ok; + } + + static bool verifyAndInfer(SearchIterator *search, MatchData &md) { + MySearch *self = dynamic_cast(search); + if (self == 0) { + return false; + } else { + return self->verifyAndInferImpl(md); + } + } + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "_tag", _tag); + visit(visitor, "_isLeaf", _isLeaf); + visit(visitor, "_isStrict", _isStrict); + visit(visitor, "_children", _children); + visit(visitor, "_handles", _handles); + } + + virtual ~MySearch() {} +}; + +//----------------------------------------------------------------------------- + +class MyLeaf : public SimpleLeafBlueprint +{ + typedef search::fef::TermFieldMatchDataArray TFMDA; + +public: + virtual SearchIterator::UP + createLeafSearch(const TFMDA &tfmda, bool strict) const + { + return SearchIterator::UP(new MySearch("leaf", tfmda, strict)); + } + + MyLeaf(const FieldSpecBaseList &fields) + : SimpleLeafBlueprint(fields) + {} + + MyLeaf &estimate(uint32_t hits, bool empty = false) { + setEstimate(HitEstimate(hits, empty)); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +class MyLeafSpec +{ +private: + FieldSpecBaseList _fields; + Blueprint::HitEstimate _estimate; + +public: + explicit MyLeafSpec(uint32_t estHits, bool empty = false) + : _fields(), _estimate(estHits, empty) {} + + MyLeafSpec &addField(uint32_t fieldId, uint32_t handle) { + _fields.add(FieldSpecBase(fieldId, handle)); + return *this; + } + MyLeaf *create() const { + MyLeaf *leaf = new MyLeaf(_fields); + leaf->estimate(_estimate.estHits, _estimate.empty); + return leaf; + } +}; + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore new file mode 100644 index 00000000000..9e6565f9d16 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +booleanmatchiteratorwrapper_test diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore new file mode 100644 index 00000000000..b568b87514a --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +booleanmatchiteratorwrapper_test +searchlib_booleanmatchiteratorwrapper_test_app diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt new file mode 100644 index 00000000000..cf701c430aa --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_booleanmatchiteratorwrapper_test_app + SOURCES + booleanmatchiteratorwrapper_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_booleanmatchiteratorwrapper_test_app COMMAND searchlib_booleanmatchiteratorwrapper_test_app) diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC new file mode 100644 index 00000000000..097198d38ef --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC @@ -0,0 +1 @@ +booleanmatchiteratorwrapper test. Take a look at booleanmatchiteratorwrapper.cpp for details. diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES new file mode 100644 index 00000000000..a47b5b35a40 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES @@ -0,0 +1 @@ +booleanmatchiteratorwrapper.cpp diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp new file mode 100644 index 00000000000..940f825b691 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("booleanmatchiteratorwrapper_test"); +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::queryeval; +using search::BitVector; +using search::BitVectorIterator; + +struct DummyItr : public SearchIterator { + static uint32_t seekCnt; + static uint32_t unpackCnt; + static uint32_t dtorCnt; + static uint32_t _unpackedDocId; + TermFieldMatchData *match; + + DummyItr(TermFieldMatchData *m) { + match = m; + } + + ~DummyItr() { + ++dtorCnt; + } + + void doSeek(uint32_t docid) { + ++seekCnt; + if (docid <= 10) { + setDocId(10); + } else if (docid <= 20) { + setDocId(20); + } else { + setAtEnd(); + } + } + + void doUnpack(uint32_t docid) { + ++unpackCnt; + if (match != 0) { + _unpackedDocId = docid; + } + } +}; +uint32_t DummyItr::seekCnt = 0; +uint32_t DummyItr::unpackCnt = 0; +uint32_t DummyItr::dtorCnt = 0; +uint32_t DummyItr::_unpackedDocId = 0; + + +TEST("mostly everything") { + EXPECT_EQUAL(DummyItr::seekCnt, 0u); + EXPECT_EQUAL(DummyItr::unpackCnt, 0u); + EXPECT_EQUAL(DummyItr::dtorCnt, 0u); + { // without wrapper + TermFieldMatchData match; + DummyItr::_unpackedDocId = 0; + SearchIterator::UP search(new DummyItr(&match)); + search->initFullRange(); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 10u); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 20u); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 3u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 1u); + { // with wrapper + TermFieldMatchData match; + TermFieldMatchDataArray tfmda; + tfmda.add(&match); + DummyItr::_unpackedDocId = 0; + SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(&match)), tfmda)); + search->initFullRange(); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 6u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 2u); + { // with wrapper, without match data + SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(0)), TermFieldMatchDataArray())); + search->initFullRange(); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 9u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 3u); +} + +TEST("Test boolean wrapper iterators adheres to initRange") { + search::test::InitRangeVerifier ir; + TermFieldMatchDataArray tfmda; + BooleanMatchIteratorWrapper relaxed(ir.createIterator(ir.getExpectedDocIds(), false), tfmda); + ir.verify(relaxed); + BooleanMatchIteratorWrapper strict(ir.createIterator(ir.getExpectedDocIds(), true), tfmda); + ir.verify(strict); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/dot_product/.gitignore b/searchlib/src/tests/queryeval/dot_product/.gitignore new file mode 100644 index 00000000000..a22cb6c5ea0 --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/.gitignore @@ -0,0 +1 @@ +searchlib_dot_product_test_app diff --git a/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt new file mode 100644 index 00000000000..91b78f2e54d --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_dot_product_test_app + SOURCES + dot_product_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_dot_product_test_app COMMAND searchlib_dot_product_test_app) diff --git a/searchlib/src/tests/queryeval/dot_product/FILES b/searchlib/src/tests/queryeval/dot_product/FILES new file mode 100644 index 00000000000..cf1bcd96ec4 --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/FILES @@ -0,0 +1 @@ +dot_product_test.cpp diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp new file mode 100644 index 00000000000..e9dcc34219b --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("dot_product_test"); +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; + +namespace { + +void setupFakeSearchable(FakeSearchable &fake) { + for (size_t docid = 1; docid < 10; ++docid) { + std::string token1 = vespalib::make_string("%zu", docid); + std::string token2 = vespalib::make_string("1%zu", docid); + std::string token3 = vespalib::make_string("2%zu", docid); + + fake.addResult("field", token1, FakeResult().doc(docid).weight(docid).pos(0)); + fake.addResult("multi-field", token1, FakeResult().doc(docid).weight(docid).pos(0)); + fake.addResult("multi-field", token2, FakeResult().doc(docid).weight(2 * docid).pos(0)); + fake.addResult("multi-field", token3, FakeResult().doc(docid).weight(3 * docid).pos(0)); + } +} + +struct DP { + static const uint32_t fieldId = 0; + static const TermFieldHandle handle = 0; + std::vector > tokens; + + DP &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleDotProduct *node = new SimpleDotProduct("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + FakeRequestContext requestContext; + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + EXPECT_TRUE(dynamic_cast(sb.get()) != 0); + sb->initFullRange(); + FakeResult result; + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + double score = md->resolveTermField(handle)->getRawScore(); + EXPECT_EQUAL((int)score, score); + result.score(score); + } + } + return result; + } +}; + +struct MockSearch : public SearchIterator { + int seekCnt; + uint32_t _initial; + MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setDocId(_initial); + } + virtual void doSeek(uint32_t) { + ++seekCnt; + setAtEnd(); + } + virtual void doUnpack(uint32_t) {} +}; + +struct MockFixture { + MockSearch *mock; + TermFieldMatchData tfmd; + std::unique_ptr search; + MockFixture(uint32_t initial) : mock(0), tfmd(), search() { + std::vector children; + std::vector childMatch; + std::vector weights; + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + mock = new MockSearch(initial); + children.push_back(mock); + childMatch.push_back(md->resolveTermField(0)); + weights.push_back(1); + search = DotProductSearch::create(children, tfmd, childMatch, weights, std::move(md)); + } +}; + +} // namespace + +TEST("test Simple") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).score(30 * 3) + .doc(5).score(50 * 5) + .doc(7).score(70 * 7); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + + EXPECT_EQUAL(expect, ws.search(index, "field", true)); + EXPECT_EQUAL(expect, ws.search(index, "field", false)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST("test Multi") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).score(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3) + .doc(5).score(50 * 5 + 150 * 2 * 5) + .doc(7).score(70 * 7); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30) + .add("15", 150).add("13", 130) + .add("23", 230).add("100", 1000); + + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(1)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(0, mock->seekCnt); +} + +TEST_F("test Eager Matching Child", MockFixture(5)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(3)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(search.seek(5)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(!search.seek(7)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(1, mock->seekCnt); +} + +TEST("verify initRange with search iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + using DocIds = InitRangeVerifier::DocIds; + std::vector split_lists(num_children); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + split_lists[i % num_children].push_back(full_list[i]); + } + bool strict = true; + std::vector children; + for (size_t i = 0; i < num_children; ++i) { + children.push_back(ir.createIterator(split_lists[i], strict).release()); + } + TermFieldMatchData tfmd; + std::vector weights(num_children, 1); + std::vector no_child_match; // unpack not called + MatchData::UP no_match_data; // unpack not called + SearchIterator::UP itr = DotProductSearch::create(children, tfmd, no_child_match, weights, std::move(no_match_data)); + ir.verify(*itr); +} + +TEST("verify initRange with document weight iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + TermFieldMatchData tfmd; + std::vector weights(num_children, 1); + std::vector children; + for (size_t i = 0; i < num_children; ++i) { + auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str()); + helper.dwa().create(dict_entry.posting_idx, children); + } + SearchIterator::UP itr(DotProductSearch::create(tfmd, weights, std::move(children))); + ir.verify(*itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/equiv/.cvsignore b/searchlib/src/tests/queryeval/equiv/.cvsignore new file mode 100644 index 00000000000..1f159f55125 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +equiv_test diff --git a/searchlib/src/tests/queryeval/equiv/.gitignore b/searchlib/src/tests/queryeval/equiv/.gitignore new file mode 100644 index 00000000000..d28d4650b98 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_equiv_test_app diff --git a/searchlib/src/tests/queryeval/equiv/CMakeLists.txt b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt new file mode 100644 index 00000000000..695e9b87121 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_equiv_test_app + SOURCES + equiv_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_equiv_test_app COMMAND searchlib_equiv_test_app) diff --git a/searchlib/src/tests/queryeval/equiv/DESC b/searchlib/src/tests/queryeval/equiv/DESC new file mode 100644 index 00000000000..e294d10cb23 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/DESC @@ -0,0 +1 @@ +equiv test. Take a look at equiv_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/equiv/FILES b/searchlib/src/tests/queryeval/equiv/FILES new file mode 100644 index 00000000000..79adf32c1d5 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/FILES @@ -0,0 +1 @@ +equiv_test.cpp diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp new file mode 100644 index 00000000000..3d97d05995f --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("equiv_test"); +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; +using search::fef::FieldPositionsIterator; + +class Test : public vespalib::TestApp { +public: + void testEquiv(); + int Main(); +}; + +void +Test::testEquiv() +{ + FakeResult a; + FakeResult b; + FakeResult c; + + a.doc(5).pos(1); + b.doc(5).pos(2); + c.doc(5).pos(3).doc(10).pos(4); + + MatchDataLayout subLayout; + TermFieldHandle fbh11 = subLayout.allocTermField(1); + TermFieldHandle fbh21 = subLayout.allocTermField(2); + TermFieldHandle fbh22 = subLayout.allocTermField(2); + + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + EquivBlueprint *eq_b = new EquivBlueprint(fields, subLayout); + + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("foo", 1, fbh11), a)), 1.0); + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh21), b)), 1.0); + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh22), c)), 1.0); + + Blueprint::UP bp(eq_b); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (!strict) { + EXPECT_EQUAL(SearchIterator::beginId(), search->getDocId()); + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(7)); + if (!strict) { + EXPECT_EQUAL(5u, search->getDocId()); + EXPECT_TRUE(search->seek(10u)); + } + EXPECT_EQUAL(10u, search->getDocId()); + { // test doc 10 results + search->unpack(10u); + EXPECT_EQUAL(5u, md->resolveTermField(1)->getDocId()); // no match + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(4u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } else { + EXPECT_EQUAL(10u, search->getDocId()); + } + } +} + +int +Test::Main() +{ + TEST_INIT("equiv_test"); + testEquiv(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/fake_searchable/.cvsignore b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore new file mode 100644 index 00000000000..56a6e2188be --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +fake_searchable_test diff --git a/searchlib/src/tests/queryeval/fake_searchable/.gitignore b/searchlib/src/tests/queryeval/fake_searchable/.gitignore new file mode 100644 index 00000000000..42b48509660 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_fake_searchable_test_app diff --git a/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt new file mode 100644 index 00000000000..75eaae7d9ed --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fake_searchable_test_app + SOURCES + fake_searchable_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fake_searchable_test_app COMMAND searchlib_fake_searchable_test_app) diff --git a/searchlib/src/tests/queryeval/fake_searchable/DESC b/searchlib/src/tests/queryeval/fake_searchable/DESC new file mode 100644 index 00000000000..75ce65796f0 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/DESC @@ -0,0 +1 @@ +fake_searchable test. Take a look at fake_searchable_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/fake_searchable/FILES b/searchlib/src/tests/queryeval/fake_searchable/FILES new file mode 100644 index 00000000000..b02a791e332 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/FILES @@ -0,0 +1 @@ +fake_searchable_test.cpp diff --git a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp new file mode 100644 index 00000000000..c3ff31625d3 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("fake_searchable_test"); +#include + +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::query; +using namespace search::fef; + +class Test : public vespalib::TestApp { +public: + int Main(); + void testTestFakeResult(); + void testTerm(); + void testPhrase(); + void testWeightedSet(); + void testMultiField(); + void testPhraseWithEmptyChild(); +private: + FakeRequestContext _requestContext; +}; + +void +Test::testTestFakeResult() +{ + EXPECT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(1).elem(5).len(15).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(1).len(15).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(19).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(1).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(1)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).doc(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).elem(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6)); +} + +void +Test::testTerm() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(5).pos(3)); + + SimpleStringTerm termNode("word1", "viewfoo", 1, w); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testPhrase() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(3).pos(7).doc(5).pos(3)); + source.addResult("fieldfoo", "word2", + FakeResult().doc(2).pos(1).doc(3).pos(10).doc(5).pos(4)); + + SimplePhrase phraseNode("viewfoo", 1, w); + phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w))); + phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testWeightedSet() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "friend1", + FakeResult().doc(3).doc(5).doc(7).doc(9)); + source.addResult("fieldfoo", "friend2", + FakeResult().doc(3).doc(4).doc(5).doc(6)); + source.addResult("fieldfoo", "friend3", + FakeResult().doc(5)); + + SimpleWeightedSetTerm weightedSet("fieldfoo", 1, w); + weightedSet.append(Node::UP(new SimpleStringTerm("friend1", "fieldfoo", 2, Weight(1)))); + weightedSet.append(Node::UP(new SimpleStringTerm("friend2", "fieldfoo", 3, Weight(2)))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, weightedSet); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(2)); + if (strict) { + EXPECT_EQUAL(3u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(3u)); + } + EXPECT_EQUAL(3u, search->getDocId()); + { // test doc 3 results + search->unpack(3u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(3u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2, itr.getElementWeight()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1, itr.getElementWeight()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(search->seek(4)); + EXPECT_TRUE(search->seek(5)); + EXPECT_TRUE(search->seek(6)); + EXPECT_TRUE(search->seek(7)); + EXPECT_TRUE(!search->seek(8)); + EXPECT_TRUE(search->seek(9)); + { // test doc 9 results + search->unpack(9u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(9u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1, itr.getElementWeight()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testMultiField() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(5).pos(3)); + source.addResult("fieldbar", "word1", + FakeResult().doc(5).pos(7).doc(10).pos(2)); + + SimpleStringTerm termNode("word1", "viewfoobar", 1, w); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + fields.add(FieldSpec("fieldbar", 2, 2)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(7u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(7)); + if (strict) { + EXPECT_EQUAL(10u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(10u)); + } + EXPECT_EQUAL(10u, search->getDocId()); + { // test doc 10 results + search->unpack(10u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_NOT_EQUAL(10u, data.getDocId()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testPhraseWithEmptyChild() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(3).pos(7).doc(5).pos(3)); + + SimplePhrase phraseNode("viewfoo", 1, w); + phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w))); + phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +int +Test::Main() +{ + TEST_INIT("fake_searchable_test"); + testTestFakeResult(); + testTerm(); + testPhrase(); + testWeightedSet(); + testMultiField(); + testPhraseWithEmptyChild(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/getnodeweight/.gitignore b/searchlib/src/tests/queryeval/getnodeweight/.gitignore new file mode 100644 index 00000000000..a050aeb9215 --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/.gitignore @@ -0,0 +1 @@ +searchlib_getnodeweight_test_app diff --git a/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt new file mode 100644 index 00000000000..5502e2c033b --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_getnodeweight_test_app + SOURCES + getnodeweight_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_getnodeweight_test_app COMMAND searchlib_getnodeweight_test_app) diff --git a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp new file mode 100644 index 00000000000..949a1364061 --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("getweight_test"); +#include + +#include +#include + +using namespace search::query; +using namespace search::queryeval; + +class Test : public vespalib::TestApp { +public: + int32_t getWeight(const Node &node); + int Main(); +}; + +int32_t +Test::getWeight(const Node &node) { + return getWeightFromNode(node).percent(); +} + +int +Test::Main() +{ + TEST_INIT("getweight_test"); + EXPECT_EQUAL(0, getWeight(SimpleAnd())); + EXPECT_EQUAL(0, getWeight(SimpleAndNot())); + EXPECT_EQUAL(42, getWeight(SimpleEquiv(0, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleNumberTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleLocationTerm(Location(), "bar", 1, Weight(42)))); + EXPECT_EQUAL(0, getWeight(SimpleNear(5))); + EXPECT_EQUAL(0, getWeight(SimpleONear(5))); + EXPECT_EQUAL(0, getWeight(SimpleOr())); + EXPECT_EQUAL(42, getWeight(SimplePhrase("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimplePrefixTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleRangeTerm(Range(), "bar", 1, Weight(42)))); + EXPECT_EQUAL(0, getWeight(SimpleRank())); + EXPECT_EQUAL(42, getWeight(SimpleStringTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleSubstringTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleSuffixTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleDotProduct("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWandTerm("bar", 1, Weight(42), 57, 67, 77.7))); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore new file mode 100644 index 00000000000..a3012152158 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore @@ -0,0 +1 @@ +searchlib_monitoring_search_iterator_test_app diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt new file mode 100644 index 00000000000..eebc9c8cf17 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_monitoring_search_iterator_test_app + SOURCES + monitoring_search_iterator_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_monitoring_search_iterator_test_app COMMAND searchlib_monitoring_search_iterator_test_app) diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC new file mode 100644 index 00000000000..1c126deb4ed --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC @@ -0,0 +1 @@ +monitoring_search_iterator test. Take a look at monitoring_search_iterator_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES new file mode 100644 index 00000000000..b514a3cf512 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES @@ -0,0 +1 @@ +monitoring_search_iterator_test.cpp diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp new file mode 100644 index 00000000000..a559be21ea3 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp @@ -0,0 +1,325 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::queryeval::test; +using namespace search::fef; +using search::BitVector; +using search::BitVectorIterator; +using std::make_unique; + +struct HistorySearchIterator : public SearchIterator +{ + SearchHistory _history; + mutable bool _getPostingInfoCalled; + HistorySearchIterator() : _history(), _getPostingInfoCalled(false) {} + virtual void doSeek(uint32_t docId) { + _history.seek("x", docId); + setDocId(docId); + } + virtual void doUnpack(uint32_t docId) { _history.unpack("x", docId); } + virtual const PostingInfo *getPostingInfo() const { + _getPostingInfoCalled = true; + return NULL; + } +}; + +struct SimpleFixture +{ + MonitoringSearchIterator _itr; + SimpleResult _res; + SimpleFixture() + : _itr("SimpleIterator", + SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8))), + false), + _res() + { + _res.search(_itr); + } +}; + +struct AdvancedFixture +{ + MonitoringSearchIterator _itr; + AdvancedFixture() + : _itr("AdvancedIterator", + SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8). + addHit(16).addHit(32).addHit(64).addHit(128))), + true) + { + } +}; + +struct HistoryFixture +{ + MonitoringSearchIterator _itr; + HistoryFixture() + : _itr("HistoryIterator", SearchIterator::UP(new HistorySearchIterator()), false) + { + } +}; + +struct TreeFixture +{ + MonitoringSearchIterator::UP _itr; + SimpleResult _res; + TreeFixture() + : _itr() + { + MultiSearch::Children children; + children.push_back(new MonitoringSearchIterator("child1", + SearchIterator::UP + (new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(6))), + false)); + children.push_back(new MonitoringSearchIterator("child2", + SearchIterator::UP + (new SimpleSearch(SimpleResult().addHit(3).addHit(4).addHit(5))), + false)); + _itr.reset(new MonitoringSearchIterator("and", + SearchIterator::UP(AndSearch::create(children, true)), + false)); + _res.search(*_itr); + } +}; + +TEST_F("require that number of seeks is collected", SimpleFixture) +{ + EXPECT_EQUAL(4u, f._itr.getStats().getNumSeeks()); + EXPECT_EQUAL(4.0 / 3.0, f._itr.getStats().getNumSeeksPerUnpack()); +} + +TEST_F("require that number of unpacks is collected", SimpleFixture) +{ + EXPECT_EQUAL(3u, f._itr.getStats().getNumUnpacks()); +} + +TEST_F("require that docId stepping is collected (root iterator)", SimpleFixture) +{ + EXPECT_EQUAL(4u, f._itr.getStats().getNumDocIdSteps()); + EXPECT_EQUAL(1, f._itr.getStats().getAvgDocIdSteps()); +} + +TEST_F("require that docId stepping is collected (child iterator)", AdvancedFixture) +{ + f._itr.seek(1); // 2 - 1 + EXPECT_EQUAL(1u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(19); // 19 - 2 + EXPECT_EQUAL(18u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(64); // 64 - 32 + EXPECT_EQUAL(50u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(74); // 74 - 64 + EXPECT_EQUAL(60u, f._itr.getStats().getNumDocIdSteps()); + EXPECT_EQUAL(60 / 4, f._itr.getStats().getAvgDocIdSteps()); +} + +TEST_F("require that hit skipping is collected ", AdvancedFixture) +{ + f._itr.seek(1); + EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(4); + EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(16); + EXPECT_EQUAL(1u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(120); + EXPECT_EQUAL(3u, f._itr.getStats().getNumHitSkips()); + EXPECT_EQUAL(3.0 / 4.0, f._itr.getStats().getAvgHitSkips()); +} + +TEST_F("require that results from underlying iterator is exposed through monitoring iterator", SimpleFixture) +{ + EXPECT_EQUAL(SimpleResult().addHit(2).addHit(4).addHit(8), f._res); +} + +TEST_F("require that calls are forwarded to underlying iterator", HistoryFixture) +{ + f._itr.seek(2); + EXPECT_EQUAL(2u, f._itr.getDocId()); + f._itr.unpack(2); + f._itr.seek(4); + EXPECT_EQUAL(4u, f._itr.getDocId()); + f._itr.unpack(4); + f._itr.seek(8); + EXPECT_EQUAL(8u, f._itr.getDocId()); + f._itr.unpack(8); + f._itr.getPostingInfo(); + const HistorySearchIterator &hsi = dynamic_cast(f._itr.getIterator()); + EXPECT_EQUAL(SearchHistory().seek("x", 2).unpack("x", 2).seek("x", 4).unpack("x", 4).seek("x", 8).unpack("x", 8), + hsi._history); + EXPECT_TRUE(hsi._getPostingInfoCalled); +} + +void +addIterator(MonitoringSearchIterator::Dumper &d, + const vespalib::string &name, + int64_t numSeeks, + double avgDocIdSteps, + double avgHitSkips, + int64_t numUnpacks, + double numSeeksPerUnpack) +{ + d.openStruct("void", "search::queryeval::MonitoringSearchIterator"); + d.visitString("iteratorName", name); + { + d.openStruct("void", "MonitoringSearchIterator::Stats"); + d.visitInt("numSeeks", numSeeks); + d.visitFloat("avgDocIdSteps", avgDocIdSteps); + d.visitFloat("avgHitSkips", avgHitSkips); + d.visitInt("numUnpacks", numUnpacks); + d.visitFloat("numSeeksPerUnpack", numSeeksPerUnpack); + d.closeStruct(); + } + d.closeStruct(); +} + +TEST("require that dumper can handle formatting on several levels") +{ + MonitoringSearchIterator::Dumper d(2, 6, 6, 10, 3); + addIterator(d, "root", 1, 1.1, 11.22, 11, 111.3); + { + d.openStruct("children", "void"); + addIterator(d, "c.1", 222222, 2.1111, 22.2222, 222000, 222.4444); + { + d.openStruct("children", "void"); + addIterator(d, "c.1.1", 333333, 3.1111, 33.2222, 333000, 333333.4444); + addIterator(d, "c.1.2", 444, 4.22, 4.33, 440, 4.44); + d.closeStruct(); + } + addIterator(d, "c.2", 555, 5.22, 5.33, 550, 5.44); + { + d.openStruct("children", "void"); + addIterator(d, "c.2.1", 666666, 6.1111, 66.2222, 333000, 666666.4444); + addIterator(d, "c.2.2", 777, 7.22, 7.33, 770, 7.44); + d.closeStruct(); + } + d.closeStruct(); + } + EXPECT_EQUAL( + "root: 1 seeks, 1.100 steps/seek, 11.220 skips/seek, 11 unpacks, 111.300 seeks/unpack\n" + " c.1: 222222 seeks, 2.111 steps/seek, 22.222 skips/seek, 222000 unpacks, 222.444 seeks/unpack\n" + " c.1.1: 333333 seeks, 3.111 steps/seek, 33.222 skips/seek, 333000 unpacks, 333333.444 seeks/unpack\n" + " c.1.2: 444 seeks, 4.220 steps/seek, 4.330 skips/seek, 440 unpacks, 4.440 seeks/unpack\n" + " c.2: 555 seeks, 5.220 steps/seek, 5.330 skips/seek, 550 unpacks, 5.440 seeks/unpack\n" + " c.2.1: 666666 seeks, 6.111 steps/seek, 66.222 skips/seek, 333000 unpacks, 666666.444 seeks/unpack\n" + " c.2.2: 777 seeks, 7.220 steps/seek, 7.330 skips/seek, 770 unpacks, 7.440 seeks/unpack\n", + d.toString()); +} + +TEST_F("require that single iterator can be dumped compact", AdvancedFixture) +{ + f._itr.seek(6); + f._itr.seek(16); + f._itr.unpack(16); + MonitoringSearchIterator::Dumper dumper; + visit(dumper, "", f._itr); + EXPECT_EQUAL("AdvancedIterator: 2 seeks, 7.00 steps/seek, 1.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n", + dumper.toString()); +} + +TEST_F("require that iterator tree can be dumped compact", TreeFixture) +{ + MonitoringSearchIterator::Dumper dumper; + visit(dumper, "", f._itr.get()); + EXPECT_EQUAL("and: 2 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n" + " child1: 3 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n" + " child2: 3 seeks, 1.67 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n", + dumper.toString()); +} + +TEST_F("require that single iterator can be dumped verbosely", AdvancedFixture) +{ + f._itr.seek(6); + f._itr.seek(16); + f._itr.unpack(16); + vespalib::ObjectDumper dumper; + visit(dumper, "", &f._itr); + EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'AdvancedIterator'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 2\n" + " numDocIdSteps: 14\n" + " avgDocIdSteps: 7\n" + " numHitSkips: 2\n" + " avgHitSkips: 1\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 2\n" + " }\n" + " tag: ''\n" + "}\n", + dumper.toString()); +} + +TEST_F("require that iterator tree can be dumped verbosely", TreeFixture) +{ + vespalib::ObjectDumper dumper; + visit(dumper, "", f._itr.get()); + EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'and'\n" + " iteratorType: 'search::queryeval::AndSearchStrict'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 2\n" + " numDocIdSteps: 2\n" + " avgDocIdSteps: 1\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 2\n" + " }\n" + " children: std::vector {\n" + " [0]: search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'child1'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 3\n" + " numDocIdSteps: 3\n" + " avgDocIdSteps: 1\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 3\n" + " }\n" + " tag: ''\n" + " }\n" + " [1]: search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'child2'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 3\n" + " numDocIdSteps: 5\n" + " avgDocIdSteps: 1.66667\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 3\n" + " }\n" + " tag: ''\n" + " }\n" + " }\n" + "}\n", + dumper.toString()); +} + +MonitoringSearchIterator::UP +create(SearchIterator::UP child) { + return make_unique("test", std::move(child), false); +} + +TEST("test monitoring search iterator handles initRange accoring to spec") { + search::test::InitRangeVerifier ir; + ir.verify(*create(ir.createIterator(ir.getExpectedDocIds(), false))); + ir.verify(*make_unique(create(ir.createIterator(ir.getExpectedDocIds(), false)))); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore new file mode 100644 index 00000000000..415cfe14f11 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore @@ -0,0 +1,2 @@ +searchlib_multibitvectoriterator_test_app +searchlib_multibitvectoriterator_bench_app diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt new file mode 100644 index 00000000000..1bac095225f --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multibitvectoriterator_test_app + SOURCES + multibitvectoriterator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multibitvectoriterator_test_app COMMAND searchlib_multibitvectoriterator_test_app) +vespa_add_executable(searchlib_multibitvectoriterator_bench_app + SOURCES + multibitvectoriterator_bench.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multibitvectoriterator_bench_app COMMAND searchlib_multibitvectoriterator_bench_app and no no 10 100000000 50 50 50 BENCHMARK) diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/DESC b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC new file mode 100644 index 00000000000..96fc26f5950 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC @@ -0,0 +1 @@ +multibitvectoriterator test. Take a look at multibitvectoriterator_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/FILES b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES new file mode 100644 index 00000000000..7ae4331d090 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES @@ -0,0 +1,2 @@ +multibitvectoriterator_test.cpp +multibitvectoriterator_bench.cpp diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp new file mode 100644 index 00000000000..8912be56351 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("multibitvectoriterator_test"); +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; + +//----------------------------------------------------------------------------- + +class Test : public vespalib::TestApp +{ +public: + void benchmark(); + int Main(); + template + void testSearch(bool strict); +private: + void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit); + void setup(); + std::vector< BitVector::UP > _bvs; + uint32_t _numSearch; + uint32_t _numDocs; + bool _strict; + bool _optimize; + vespalib::string _type; + std::vector _fillLimits; +}; + +void Test::setup() +{ + for(size_t i(0); i < _fillLimits.size(); i++) { + _bvs.push_back(BitVector::create(_numDocs)); + BitVector & bv(*_bvs.back()); + for (size_t j(0); j < bv.size(); j++) { + int r = rand(); + if (r < _fillLimits[i]) { + bv.setBit(j); + } + } + bv.invalidateCachedCount(); + LOG(info, "Filled bitvector %ld with %d bits", i, bv.countTrueBits()); + } +} + +typedef std::vector H; + +H +seek(SearchIterator & s, uint32_t docIdLimit) +{ + H h; + for (uint32_t docId(0); docId < docIdLimit; ) { + if (s.seek(docId)) { + h.push_back(docId); + docId++; + } else { + if (s.getDocId() > docId) { + docId = s.getDocId(); + } else { + docId++; + } + } + //printf("docId = %u\n", docId); + } + return h; +} + +void +Test::benchmark() +{ + if (_type == "and") { + LOG(info, "Testing 'and'"); + for (size_t i(0); i < _numSearch; i++) { + testSearch(_strict); + } + } else { + LOG(info, "Testing 'or'"); + for (size_t i(0); i < _numSearch; i++) { + testSearch(_strict); + } + } +} + +template +void +Test::testSearch(bool strict) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + MultiSearch::Children andd; + for (size_t i(0); i < _bvs.size(); i++) { + andd.push_back(BitVectorIterator::create(_bvs[i].get(), tfmda, strict).release()); + } + SearchIterator::UP s(T::create(andd, strict)); + if (_optimize) { + LOG(info, "Optimizing iterator"); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + } + H h = seek(*s, _numDocs); + LOG(info, "Found %ld hits", h.size()); +} + +int +Test::Main() +{ + TEST_INIT("multibitvectoriterator_benchmark"); + if (_argc < 6) { + LOG(info, "%s <'and/or'> <'strict/no-strict'> <'optimize/no-optimize> []", _argv[0]); + return -1; + } + _type = _argv[1]; + _strict = _argv[2] == vespalib::string("strict"); + _optimize = _argv[3] == vespalib::string("optimize"); + _numSearch = strtoul(_argv[4], NULL, 0); + _numDocs = strtoul(_argv[5], NULL, 0); + for (int i(6); i < _argc; i++) { + _fillLimits.push_back((RAND_MAX/100) * strtoul(_argv[i], NULL, 0)); + } + LOG(info, "Start setup of '%s' isearch with %ld vectors with %d documents", _type.c_str(), _fillLimits.size(), _numDocs); + setup(); + LOG(info, "Start benchmark"); + benchmark(); + LOG(info, "Done benchmark"); + TEST_FLUSH(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp new file mode 100644 index 00000000000..f3a25d675b2 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp @@ -0,0 +1,531 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("multibitvectoriterator_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; + +//----------------------------------------------------------------------------- + +class Test : public vespalib::TestApp +{ +public: + void testAndNot(); + void testAnd(); + void testBug7163266(); + void testOr(); + void testAndWith(); + void testEndGuard(); + template + void testThatOptimizePreservesUnpack(); + template + void testOptimizeCommon(bool isAnd); + template + void testOptimizeAndOr(); + template + void testSearch(bool strict); + int Main(); +private: + void verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd); + void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit); + void setup(); + std::vector< BitVector::UP > _bvs; +}; + +void Test::setup() +{ + srand(7); + for(size_t i(0); i < 3; i++) { + _bvs.push_back(BitVector::create(10000)); + BitVector & bv(*_bvs.back()); + for (size_t j(0); j < bv.size(); j++) { + int r = rand(); + if (r & 0x1) { + bv.setBit(j); + } + } + } +} + +typedef std::vector H; + +H +seekNoReset(SearchIterator & s, uint32_t start, uint32_t docIdLimit) +{ + H h; + for (uint32_t docId(start); docId < docIdLimit; ) { + if (s.seek(docId)) { + h.push_back(docId); + docId++; + } else { + if (s.getDocId() > docId) { + docId = s.getDocId(); + } else { + docId++; + } + } + //printf("docId = %u\n", docId); + } + return h; +} + +H +seek(SearchIterator & s, uint32_t docIdLimit) +{ + s.resetRange(); + s.initFullRange(); + return seekNoReset(s, 1, docIdLimit); +} + +void +Test::testAndWith() +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(AndSearch::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + + s->initFullRange(); + H firstHits2 = seekNoReset(*s, 1, 130); + SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9)); + H lastHits2F = seekNoReset(*s, 130, _bvs[0]->size()); + + children.clear(); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, false).release()); + s.reset(AndSearch::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->initFullRange(); + H firstHits3 = seekNoReset(*s, 1, 130); + H lastHits3 = seekNoReset(*s, 130, _bvs[0]->size()); + //These constants will change if srand(7) is changed. + EXPECT_EQUAL(30u, firstHits2.size()); + EXPECT_EQUAL(19u, firstHits3.size()); + EXPECT_EQUAL(1234u, lastHits2F.size()); + ASSERT_EQUAL(lastHits3.size(), lastHits2F.size()); + for (size_t i(0); i < lastHits3.size(); i++) { + EXPECT_EQUAL(lastHits3[i], lastHits2F[i]); + } + } +} + +void +Test::testAndNot() +{ + testOptimizeCommon(false); + testSearch(false); + testSearch(true); +} + +void +Test::testAnd() +{ + testOptimizeCommon(true); + testOptimizeAndOr(); + testSearch(false); + testSearch(true); +} + +void +Test::testOr() +{ + testOptimizeCommon< OrSearch >(false); + testOptimizeAndOr< OrSearch >(); + testSearch(false); + testSearch(true); +} + +void +Test::testBug7163266() +{ + TermFieldMatchData tfmd[30]; + TermFieldMatchDataArray tfmda[30]; + for (size_t i(0); i < 30; i++) { + tfmda[i].add(&tfmd[i]); + } + _bvs[0]->setBit(1); + _bvs[1]->setBit(1); + MultiSearch::Children children; + UnpackInfo unpackInfo; + for (size_t i(0); i < 28; i++) { + children.push_back(new TrueSearch(tfmd[2])); + unpackInfo.add(i); + } + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release()); + SearchIterator::UP s(AndSearch::create(children, false, unpackInfo)); + const MultiSearch * ms = dynamic_cast(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(30u, ms->getChildren().size()); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", s->getClassName()); + for (size_t i(0); i < 28; i++) { + EXPECT_TRUE(ms->needUnpack(i)); + } + EXPECT_FALSE(ms->needUnpack(28)); + EXPECT_FALSE(ms->needUnpack(29)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + ms = dynamic_cast(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(29u, ms->getChildren().size()); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict", s->getClassName()); + for (size_t i(0); i < 28; i++) { + EXPECT_TRUE(ms->needUnpack(i)); + } + EXPECT_TRUE(ms->needUnpack(28)); // NB: force unpack all +} + +template +void +Test::testThatOptimizePreservesUnpack() +{ + TermFieldMatchData tfmd[4]; + TermFieldMatchDataArray tfmda[4]; + for (size_t i(0); i < 4; i++) { + tfmda[i].add(&tfmd[i]); + } + _bvs[0]->setBit(1); + _bvs[1]->setBit(1); + _bvs[2]->setBit(1); + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release()); + children.push_back(new TrueSearch(tfmd[2])); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda[3], false).release()); + UnpackInfo unpackInfo; + unpackInfo.add(1); + unpackInfo.add(2); + SearchIterator::UP s(T::create(children, false, unpackInfo)); + s->initFullRange(); + const MultiSearch * ms = dynamic_cast(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(4u, ms->getChildren().size()); + verifySelectiveUnpack(*s, tfmd); + tfmd[1].resetOnlyDocId(0); + tfmd[2].resetOnlyDocId(0); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->resetRange(); + s->initFullRange(); + ms = dynamic_cast(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(2u, ms->getChildren().size()); + verifySelectiveUnpack(*s, tfmd); +} + +void +Test::verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd) +{ + s.seek(1); + EXPECT_EQUAL(0u, tfmd[0].getDocId()); + EXPECT_EQUAL(0u, tfmd[1].getDocId()); + EXPECT_EQUAL(0u, tfmd[2].getDocId()); + EXPECT_EQUAL(0u, tfmd[3].getDocId()); + s.unpack(1); + EXPECT_EQUAL(0u, tfmd[0].getDocId()); + EXPECT_EQUAL(1u, tfmd[1].getDocId()); + EXPECT_EQUAL(1u, tfmd[2].getDocId()); + EXPECT_EQUAL(0u, tfmd[3].getDocId()); +} + +void +Test::searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit) +{ + H a = seek(*s, docIdLimit); + SearchIterator * p = s.get(); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + if (s.get() != p) { + H b = seek(*s, docIdLimit); + EXPECT_FALSE(a.empty()); + EXPECT_EQUAL(a.size(), b.size()); + for (size_t i(0); i < a.size(); i++) { + EXPECT_EQUAL(a[i], b[i]); + } + } +} + +template +void +Test::testSearch(bool strict) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + uint32_t docIdLimit(_bvs[0]->size()); + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } +} + +template +void +Test::testOptimizeCommon(bool isAnd) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(1u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + EXPECT_FALSE(dynamic_cast(m.getChildren()[1])->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1])->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9)); + + if (isAnd) { + EXPECT_TRUE(nullptr == filter.get()); + } else { + EXPECT_FALSE(nullptr == filter.get()); + } + + children.clear(); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + s.reset(T::create(children, true)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + filter = s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9); + + if (isAnd) { + EXPECT_TRUE(nullptr == filter.get()); + } else { + EXPECT_FALSE(nullptr == filter.get()); + } + } +} + +template +void +Test::testOptimizeAndOr() +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + EXPECT_FALSE(dynamic_cast(s.get())->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_FALSE(dynamic_cast(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_FALSE(dynamic_cast(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + const MultiSearch & m(dynamic_cast(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast(m.getChildren()[1]) != NULL); + } +} + +void +Test::testEndGuard() +{ + typedef AndSearch T; + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, true).release()); + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->initFullRange(); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast(s.get()) != NULL); + MultiSearch & m(dynamic_cast(*s)); + EXPECT_TRUE(m.seek(0) || !m.seek(0)); + EXPECT_TRUE(m.seek(3) || !m.seek(3)); + EXPECT_FALSE(m.seek(_bvs[0]->size()+987)); +} + +int +Test::Main() +{ + TEST_INIT("multibitvectoriterator_test"); + setup(); + testBug7163266(); + testThatOptimizePreservesUnpack(); + testThatOptimizePreservesUnpack(); + TEST_FLUSH(); + testEndGuard(); + TEST_FLUSH(); + testAndNot(); + TEST_FLUSH(); + testAnd(); + TEST_FLUSH(); + testOr(); + TEST_FLUSH(); + testAndWith(); + TEST_FLUSH(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore new file mode 100644 index 00000000000..0a4881f0952 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore @@ -0,0 +1 @@ +searchlib_parallel_weak_and_test_app diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt new file mode 100644 index 00000000000..b76286bea65 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_parallel_weak_and_test_app + SOURCES + parallel_weak_and_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_parallel_weak_and_test_app COMMAND searchlib_parallel_weak_and_test_app) diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/DESC b/searchlib/src/tests/queryeval/parallel_weak_and/DESC new file mode 100644 index 00000000000..f58343f384b --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/DESC @@ -0,0 +1 @@ +parallel_weak_and test. Take a look at parallel_weak_and_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/FILES b/searchlib/src/tests/queryeval/parallel_weak_and/FILES new file mode 100644 index 00000000000..972727bfa00 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/FILES @@ -0,0 +1,2 @@ +weak_and_test.cpp +weak_and_bench.cpp diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp new file mode 100644 index 00000000000..74aa052e486 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -0,0 +1,681 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::query; +using namespace search::queryeval; +using namespace search::queryeval::test; + +typedef search::feature_t feature_t; +typedef wand::score_t score_t; +typedef ParallelWeakAndSearch::MatchParams MatchParams; +typedef ParallelWeakAndSearch::RankParams RankParams; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; +using search::IDocumentWeightAttribute; +using search::fef::TermFieldMatchData; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; + + +struct Scores : public std::vector +{ + Scores &add(score_t val) { + push_back(val); + return *this; + } +}; + +struct ScoresHistory : public std::vector +{ + ScoresHistory &add(const Scores &s) { + push_back(s); + return *this; + } +}; + +std::ostream &operator << (std::ostream &out, const ScoresHistory &hist) +{ + out << "ScoresHistory:\n"; + for (size_t i = 0; i < hist.size(); ++i) { + const Scores &scores = hist[i]; + out << "[" << i << "]: "; + for (size_t j = 0; j < scores.size(); ++j) { + if (j != 0) { + out << ","; + } + out << scores[j]; + } + out << std::endl; + } + return out; +} + +struct TestHeap : public WeakAndHeap +{ + ScoresHistory history; + + TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {} + virtual void adjust(score_t *begin, score_t *end) { + Scores scores; + for (score_t *itr = begin; itr != end; ++itr) { + scores.add(*itr); + } + history.push_back(scores); + setMinScore(1); + } + virtual size_t size() const { return history.size(); } +}; + +template +struct WandTestSpec : public WandSpec +{ + HeapType heap; + TermFieldMatchData rootMatchData; + MatchParams matchParams; + + WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1, + score_t scoreThreshold = 0, double thresholdBoostFactor = 1) + : WandSpec(), + heap(scoresToTrack), + rootMatchData(), + matchParams(heap, scoreThreshold, thresholdBoostFactor, scoresAdjustFrequency) + {} + SearchIterator *create() { + MatchData::UP childrenMatchData = createMatchData(); + MatchData *tmp = childrenMatchData.get(); + return new TrackedSearch("PWAND", getHistory(), ParallelWeakAndSearch::create(getTerms(tmp), + matchParams, + RankParams(rootMatchData, + std::move(childrenMatchData)), + true)); + } +}; + +typedef WandTestSpec WandSpecWithTestHeap; +typedef WandTestSpec WandSpecWithRealHeap; + +FakeResult +doSearch(SearchIterator &sb, const TermFieldMatchData &tfmd) +{ + FakeResult retval; + sb.initFullRange(); + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + sb.unpack(sb.getDocId()); + retval.doc(sb.getDocId()); + feature_t score = tfmd.getRawScore(); + retval.score(score); + } + return retval; +} + +SimpleResult +asSimpleResult(const FakeResult &result) +{ + SimpleResult retval; + for (size_t i = 0; i < result.inspect().size(); ++i) { + retval.addHit(result.inspect()[i].docId); + } + return retval; +} + +struct WandBlueprintSpec +{ + static const uint32_t fieldId = 0; + static const TermFieldHandle handle = 0; + std::vector > tokens; + uint32_t docIdLimit = 0; + FakeRequestContext requestContext; + + WandBlueprintSpec &add(const std::string &token, int32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode(uint32_t scoresToTrack = 100, + score_t scoreThreshold = 0, + double thresholdBoostFactor = 1) const { + SimpleWandTerm *node = new SimpleWandTerm("view", 0, Weight(0), + scoresToTrack, scoreThreshold, thresholdBoostFactor); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, + Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + Blueprint::UP blueprint(Searchable &searchable, const std::string &field, const search::query::Node &term) const { + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, term); + EXPECT_TRUE(dynamic_cast(bp.get()) != 0); + return bp; + } + + SearchIterator::UP iterator(Searchable &searchable, const std::string &field) const { + Node::UP term = createNode(); + Blueprint::UP bp = blueprint(searchable, field, *term); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + bp->fetchPostings(true); + bp->setDocIdLimit(docIdLimit); + SearchIterator::UP sb = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast(sb.get()) != 0); + return sb; + } + + FakeResult search(Searchable &searchable, const std::string &field) const { + Node::UP term = createNode(); + return search(searchable, field, *term); + } + + FakeResult search(Searchable &searchable, const std::string &field, const search::query::Node &term) const { + Blueprint::UP bp = blueprint(searchable, field, term); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + bp->fetchPostings(true); + bp->setDocIdLimit(docIdLimit); + SearchIterator::UP sb = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast(sb.get()) != 0); + return doSearch(*sb, *md->resolveTermField(handle)); + } +}; + +struct FixtureBase +{ + WandSpecWithRealHeap spec; + FakeResult result; + FixtureBase(uint32_t scoresToTrack, + uint32_t scoresAdjustFrequency, + score_t scoreThreshold = 0, + double boostFactor = 1.0) + : spec(scoresToTrack, scoresAdjustFrequency, scoreThreshold, boostFactor), + result() {} + void prepare() { + SearchIterator::UP si(spec.create()); + result = doSearch(*si, spec.rootMatchData); + } +}; + +struct AlgoSimpleFixture : public FixtureBase +{ + AlgoSimpleFixture() : FixtureBase(2, 1) { + spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6)); + spec.leaf(LeafSpec("B", 4).doc(1, 1).doc(3, 3).doc(5, 5)); + prepare(); + } +}; + +struct AlgoAdvancedFixture : public FixtureBase +{ + AlgoAdvancedFixture() : FixtureBase(100, 1) { + spec.leaf(LeafSpec("1").doc(1, 1).doc(11, 1).doc(111, 1)); + spec.leaf(LeafSpec("2").doc(2, 1).doc(12, 1).doc(112, 1)); + spec.leaf(LeafSpec("3").doc(3, 1).doc(13, 1).doc(113, 1)); + spec.leaf(LeafSpec("4").doc(4, 1).doc(14, 1).doc(114, 1)); + spec.leaf(LeafSpec("5").doc(5, 1).doc(15, 1).doc(115, 1)); + prepare(); + } +}; + +struct AlgoSubsearchFixture : public FixtureBase +{ + AlgoSubsearchFixture() : FixtureBase(2, 1) { + spec.leaf(LeafSpec("A", 10).itr(new EagerChild(search::endDocId))); + spec.leaf(LeafSpec("B", 20).itr(new EagerChild(10))); + prepare(); + } +}; + +struct AlgoSameScoreFixture : public FixtureBase +{ + AlgoSameScoreFixture() : FixtureBase(1, 1) { + spec.leaf(LeafSpec("A").doc(1, 1).doc(2, 1)); + prepare(); + } +}; + +struct AlgoScoreThresholdFixture : public FixtureBase +{ + AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30)); + spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40)); + prepare(); + } +}; + +struct AlgoLargeScoresFixture : public FixtureBase +{ + AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 60000).doc(1, 60000).doc(2, 70000)); + spec.leaf(LeafSpec("B", 70000).doc(1, 80000).doc(3, 90000)); + prepare(); + } +}; + +struct AlgoExhaustPastFixture : public FixtureBase +{ + AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 1).doc(1, 20).doc(3, 40).doc(5, 10)); + spec.leaf(LeafSpec("B", 1).doc(5, 10)); + spec.leaf(LeafSpec("C", 1).doc(5, 10)); + prepare(); + } +}; + + +TEST_F("require that algorithm prunes bad hits after enough good ones are obtained", AlgoSimpleFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 1 + 4 * 1) + .doc(2).score(1 * 2) + .doc(3).score(1 * 3 + 4 * 3) + .doc(5).score(1 * 5 + 4 * 5); + EXPECT_EQUAL(expect, f.result); +} + +TEST_F("require that algorithm uses subsearches as expected", AlgoSimpleFixture) { + EXPECT_EQUAL(SearchHistory() + .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1) + .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1) + .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2) + .unpack("PWAND", 2) + .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3) + .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3) + .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5) + .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5) + .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); +} + +TEST_F("require that algorithm considers documents in the right order", AlgoAdvancedFixture) +{ + EXPECT_EQUAL(SimpleResult() + .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) + .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) + .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result)); +} + +TEST_F("require that algorithm take initial docid for subsearches into account", AlgoSubsearchFixture) +{ + EXPECT_EQUAL(FakeResult().doc(10).score(20), f.result); + EXPECT_EQUAL(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10) + .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); +} + +TEST_F("require that algorithm uses first match when two matches have same score", AlgoSameScoreFixture) +{ + EXPECT_EQUAL(FakeResult().doc(1).score(100), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (all hits greater)", AlgoScoreThresholdFixture(29)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (2 hits greater)", AlgoScoreThresholdFixture(30)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (1 hit greater)", AlgoScoreThresholdFixture(50)) +{ + EXPECT_EQUAL(FakeResult() + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (0 hits greater)", AlgoScoreThresholdFixture(80)) +{ + EXPECT_EQUAL(FakeResult(), f.result); +} + +TEST_F("require that algorithm handle large scores", AlgoLargeScoresFixture(60000L * 70000L)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.result); +} + +TEST_F("require that algorithm steps all present terms when past is empty", AlgoExhaustPastFixture(25)) +{ + EXPECT_EQUAL(FakeResult() + .doc(3).score(40) + .doc(5).score(30), f.result); +} + +struct HeapFixture +{ + WandSpecWithTestHeap spec; + SimpleResult result; + HeapFixture() : spec(2, 2), result() { + spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6)); + SearchIterator::UP sb(spec.create()); + result.search(*sb); + } +}; + +TEST_F("require that scores are collected in batches before adjusting heap", HeapFixture) +{ + EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6), + f.result); + EXPECT_EQUAL(ScoresHistory().add(Scores().add(1).add(2)) + .add(Scores().add(3).add(4)) + .add(Scores().add(5).add(6)), + f.spec.heap.history); +} + + +struct SearchFixture : public FixtureBase +{ + SearchFixture() : FixtureBase(10, 1) { + spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30)); + spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40)); + prepare(); + } +}; + +TEST_F("require that dot product score is calculated", SearchFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40); + EXPECT_EQUAL(expect, f.result); +} + + +struct BlueprintFixtureBase +{ + WandBlueprintSpec spec; + FakeSearchable searchable; + BlueprintFixtureBase() : spec(), searchable() {} + Blueprint::UP blueprint(const search::query::Node &term) { + return spec.blueprint(searchable, "field", term); + } + SearchIterator::UP iterator() { + return spec.iterator(searchable, "field"); + } + FakeResult search(const search::query::Node &term) { + return spec.search(searchable, "field", term); + } + FakeResult search() { + return spec.search(searchable, "field"); + } +}; + +struct BlueprintHitsFixture : public BlueprintFixtureBase +{ + FakeResult createResult(size_t hits) { + FakeResult result; + for (size_t i = 0; i < hits; ++i) { + result.doc(i + 1); + } + result.minMax(1, 10); + return result; + } + BlueprintHitsFixture(size_t hits_a, size_t hits_b, size_t docs) : BlueprintFixtureBase() { + spec.docIdLimit = docs + 1; + spec.add("A", 20).add("B", 10); + searchable.addResult("field", "A", createResult(hits_a)); + searchable.addResult("field", "B", createResult(hits_b)); + } + bool maxScoreFirst() { + SearchIterator::UP itr = iterator(); + const ParallelWeakAndSearch *wand = dynamic_cast(itr.get()); + ASSERT_EQUAL(2u, wand->get_num_terms()); + return (wand->get_term_weight(0) == 20); + } +}; + +struct ThresholdBoostFixture : public FixtureBase +{ + FakeResult result; + ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) { + spec.leaf(LeafSpec("A").doc(1, 10)); + spec.leaf(LeafSpec("B").doc(2, 20)); + spec.leaf(LeafSpec("C").doc(3, 30)); + spec.leaf(LeafSpec("D").doc(4, 42)); + SearchIterator::UP si(spec.create()); + result = doSearch(*si, spec.rootMatchData); + } +}; + +struct BlueprintFixture : public BlueprintFixtureBase +{ + BlueprintFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30)); + searchable.addResult("field", "B", FakeResult().doc(1).weight(20).pos(0).doc(3).weight(40).pos(0).minMax(0, 40)); + spec.add("A", 1).add("B", 2); + } +}; + +struct BlueprintLargeScoresFixture : public BlueprintFixtureBase +{ + BlueprintLargeScoresFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(60000).pos(0).doc(2).weight(70000).pos(0).minMax(0, 70000)); + searchable.addResult("field", "B", FakeResult().doc(1).weight(80000).pos(0).doc(3).weight(90000).pos(0).minMax(0, 90000)); + spec.add("A", 60000).add("B", 70000); + } +}; + +struct BlueprintAsStringFixture : public BlueprintFixtureBase +{ + BlueprintAsStringFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30)); + spec.add("A", 5); + } +}; + + +TEST_F("require that hit estimate is calculated", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(); + Blueprint::UP bp = f.blueprint(*term); + EXPECT_EQUAL(4u, bp->getState().estimate().estHits); +} + +TEST_F("require that blueprint picks up docid limit", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(57, 67, 77.7); + Blueprint::UP bp = f.blueprint(*term); + const ParallelWeakAndBlueprint * pbp = dynamic_cast(bp.get()); + EXPECT_EQUAL(0u, pbp->get_docid_limit()); + bp->setDocIdLimit(1000); + EXPECT_EQUAL(1000u, pbp->get_docid_limit()); +} + +TEST_F("require that scores to track, score threshold and threshold boost factor is passed down from query node to blueprint", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(57, 67, 77.7); + Blueprint::UP bp = f.blueprint(*term); + const ParallelWeakAndBlueprint * pbp = dynamic_cast(bp.get()); + EXPECT_EQUAL(57u, pbp->getScores().getScoresToTrack()); + EXPECT_EQUAL(67u, pbp->getScoreThreshold()); + EXPECT_EQUAL(77.7, pbp->getThresholdBoostFactor()); +} + +TEST_F("require that search iterator is correctly setup and executed", BlueprintFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40); + EXPECT_EQUAL(expect, f.search()); +} + +TEST_F("require that initial score threshold can be specified (1 hit greater)", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(3, 50); + EXPECT_EQUAL(FakeResult() + .doc(3).score(2 * 40), f.search(*term)); +} + +TEST_F("require that large scores are handled", BlueprintLargeScoresFixture) +{ + Node::UP term = f.spec.createNode(3, 60000L * 70000L); + EXPECT_EQUAL(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.search(*term)); +} + +TEST_F("require that docid limit is propagated to search iterator", BlueprintFixture()) +{ + f1.spec.docIdLimit = 4050; + SearchIterator::UP itr = f1.iterator(); + const ParallelWeakAndSearch *wand = dynamic_cast(itr.get()); + EXPECT_EQUAL(4050u, wand->getMatchParams().docIdLimit); +} + +TEST_FFF("require that terms are sorted for maximum skipping", + BlueprintHitsFixture(50, 50, 100), + BlueprintHitsFixture(60, 50, 100), + BlueprintHitsFixture(80, 50, 100)) +{ + EXPECT_TRUE(f1.maxScoreFirst()); + EXPECT_TRUE(f2.maxScoreFirst()); + EXPECT_FALSE(f3.maxScoreFirst()); +} + +TEST_FF("require that threshold boosting works as expected", ThresholdBoostFixture(1.0), ThresholdBoostFixture(2.0)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1000) + .doc(2).score(2000) + .doc(3).score(3000) + .doc(4).score(4200), f1.result); + EXPECT_EQUAL(FakeResult() + .doc(2).score(2000) + .doc(4).score(4200), f2.result); +} + +TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) +{ + Node::UP term = f.spec.createNode(57, 67); + Blueprint::UP bp = f.blueprint(*term); + vespalib::string expStr = "search::queryeval::ParallelWeakAndBlueprint {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 0\n" + " handle: 0\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 2\n" + " tree_size: 2\n" + " allow_termwise_eval: 0\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " _weights: std::vector {\n" + " [0]: 5\n" + " }\n" + " _terms: std::vector {\n" + " [0]: search::queryeval::FakeBlueprint {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 0\n" + " handle: 0\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 2\n" + " tree_size: 1\n" + " allow_termwise_eval: 1\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " }\n" + " }\n" + "}\n"; + EXPECT_EQUAL(expStr, bp->asString()); +} + +using MatchParams = ParallelWeakAndSearch::MatchParams; +using RankParams = ParallelWeakAndSearch::RankParams; + +struct DummyHeap : public WeakAndHeap { + DummyHeap() : WeakAndHeap(9001) {} + void adjust(score_t *, score_t *) override {} +}; + +SearchIterator::UP create_wand(bool use_dwa, + TermFieldMatchData &tfmd, + const MatchParams &matchParams, + const std::vector &weights, + const std::vector &dict_entries, + const IDocumentWeightAttribute &attr, + bool strict) +{ + if (use_dwa) { + return ParallelWeakAndSearch::create(tfmd, matchParams, weights, dict_entries, attr, strict); + } + // use search iterators as children + MatchDataLayout layout; + std::vector handles; + for (size_t i = 0; i < weights.size(); ++i) { + handles.push_back(layout.allocTermField(tfmd.getFieldId())); + } + MatchData::UP childrenMatchData = layout.createMatchData(); + assert(childrenMatchData->getNumTermFields() == dict_entries.size()); + wand::Terms terms; + for (size_t i = 0; i < dict_entries.size(); ++i) { + terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + weights[i], + dict_entries[i].posting_size, + childrenMatchData->resolveTermField(handles[i]))); + } + assert(terms.size() == dict_entries.size()); + return SearchIterator::UP(ParallelWeakAndSearch::create(terms, matchParams, RankParams(tfmd, std::move(childrenMatchData)), strict)); +} + +TEST("verify initRange") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + std::vector weights(num_children, 1); + for (bool use_dwa: {false, true}) { + for (bool strict: {false, true}) { + DummyHeap dummy_heap; + TermFieldMatchData tfmd; + MatchParams match_params(dummy_heap, dummy_heap.getMinScore(), 1.0, 1); + match_params.setDocIdLimit(ir.getDocIdLimit()); + std::vector dict_entries; + for (size_t i = 0; i < num_children; ++i) { + dict_entries.push_back(helper.dwa().lookup(vespalib::make_string("%zu", i).c_str())); + } + auto search = create_wand(use_dwa, tfmd, match_params, weights, dict_entries, helper.dwa(), strict); + ir.verify(*search); + } + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/predicate/.gitignore b/searchlib/src/tests/queryeval/predicate/.gitignore new file mode 100644 index 00000000000..7f94446d571 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/.gitignore @@ -0,0 +1,2 @@ +searchlib_predicate_blueprint_test_app +searchlib_predicate_search_test_app diff --git a/searchlib/src/tests/queryeval/predicate/CMakeLists.txt b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt new file mode 100644 index 00000000000..e1c4ebf9aa8 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_predicate_blueprint_test_app + SOURCES + predicate_blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_blueprint_test_app COMMAND searchlib_predicate_blueprint_test_app) +vespa_add_executable(searchlib_predicate_search_test_app + SOURCES + predicate_search_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_search_test_app COMMAND searchlib_predicate_search_test_app) diff --git a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp new file mode 100644 index 00000000000..3b609849141 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp @@ -0,0 +1,241 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_blueprint. + +#include +LOG_SETUP("predicate_blueprint_test"); +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search; +using namespace search::predicate; +using search::fef::TermFieldMatchDataArray; +using search::query::PredicateQueryTerm; +using search::query::SimplePredicateQuery; +using search::query::Weight; +using search::queryeval::FieldSpecBase; +using search::queryeval::PredicateBlueprint; +using search::queryeval::SearchIterator; + +namespace { + +struct Fixture { + FieldSpecBase field; + AttributeVector::SP attribute; + vespalib::GenerationHandler generation_handler; + SimplePredicateQuery query; + + using IntervalRange = PredicateAttribute::IntervalRange; + + Fixture() + : field(42, 0), + attribute(new PredicateAttribute("f", attribute::Config(attribute::BasicType::PREDICATE))), + query(PredicateQueryTerm::UP(new PredicateQueryTerm), + "view", 0, Weight(1)) { + query.getTerm()->addFeature("key", "value"); + query.getTerm()->addRangeFeature("range_key", 42); + } + PredicateAttribute & guard() { + return dynamic_cast(*attribute); + } + PredicateIndex & index() { + return predicate().getIndex(); + } + PredicateAttribute & predicate() { return static_cast(*attribute); } + void resize(uint32_t doc_id) { + while (predicate().getNumDocs() <= doc_id) { + uint32_t tmp; + predicate().addDoc(tmp); + PredicateAttribute::MinFeatureHandle mfh = predicate().getMinFeatureVector(); + const_cast(mfh.first)[tmp] = 0; + } + } + void setIntervalRange(uint32_t doc_id, IntervalRange interval_range) { + const_cast(predicate().getIntervalRangeVector())[doc_id] = interval_range; + } + void indexEmptyDocument(uint32_t doc_id, IntervalRange ir = 0x1) { + resize(doc_id); + index().indexEmptyDocument(doc_id); + setIntervalRange(doc_id, ir); + predicate().updateMaxIntervalRange(ir); + predicate().commit(false); + } + void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations, IntervalRange ir = 0xffff) { + resize(doc_id); + index().indexDocument(doc_id, annotations); + setIntervalRange(doc_id, ir); + predicate().updateMaxIntervalRange(ir); + predicate().commit(false); + } +}; + +TEST_F("require that blueprint with empty index estimates empty.", Fixture) { + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_TRUE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with zero-constraint doc estimates non-empty.", + Fixture) { + f.indexEmptyDocument(42); + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(1u, blueprint.getState().estimate().estHits); +} + +const int min_feature = 1; +const uint32_t doc_id = 2; +const uint32_t interval = 0x0001ffff; + +TEST_F("require that blueprint with posting list entry estimates non-empty.", + Fixture) { + PredicateTreeAnnotations annotations(min_feature); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector{{interval}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with 'bounds' posting list entry estimates " + "non-empty.", Fixture) { + PredicateTreeAnnotations annotations(min_feature); + annotations.bounds_map[PredicateHash::hash64("range_key=40")] = + std::vector{{interval, 0x80000003}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with zstar-compressed estimates non-empty.", + Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_compressed_hash] = + std::vector{{0xfffe0000}}; + f.indexDocument(doc_id, annotations); + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint can create search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector{{interval}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId()); + EXPECT_FALSE(it->seek(doc_id - 1)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST_F("require that blueprint can create more advanced search", Fixture) { + PredicateTreeAnnotations annotations(2); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector{{0x00010001}}; + annotations.bounds_map[PredicateHash::hash64("range_key=40")] = + std::vector{{0x00020010, 0x40000005}}; // [40..44] + f.indexDocument(doc_id, annotations, 0x10); + f.indexEmptyDocument(doc_id + 2); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId()); + EXPECT_FALSE(it->seek(doc_id - 1)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); + EXPECT_EQUAL(doc_id + 2, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id + 2)); + EXPECT_FALSE(it->seek(doc_id + 3)); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST_F("require that blueprint can create NOT search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_hash] = + std::vector{{0x00010000}, {0xffff0001}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); +} + +TEST_F("require that blueprint can create compressed NOT search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_compressed_hash] = + std::vector{{0xfffe0000}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); +} + +TEST_F("require that blueprint can set up search with subqueries", Fixture) { + PredicateTreeAnnotations annotations(2); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector{{0x00010001}}; + annotations.interval_map[PredicateHash::hash64("key2=value")] = + std::vector{{0x0002ffff}}; + f.indexDocument(doc_id, annotations); + + SimplePredicateQuery query(PredicateQueryTerm::UP(new PredicateQueryTerm), + "view", 0, Weight(1)); + query.getTerm()->addFeature("key", "value", 1); + query.getTerm()->addFeature("key2", "value", 2); + + PredicateBlueprint blueprint(f.field, f.guard(), query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_FALSE(it->seek(doc_id)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp new file mode 100644 index 00000000000..5954d51ec9b --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp @@ -0,0 +1,370 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_search. + +#include +LOG_SETUP("predicate_search_test"); +#include + +#include +#include +#include +#include +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using namespace search::queryeval; +using namespace search::predicate; +using std::pair; +using std::vector; +using vespalib::arraysize; + +namespace { + +class MyPostingList : public PredicatePostingList { + vector> _entries; + size_t _index; + uint32_t _interval; + + void setInterval(uint32_t interval) { _interval = interval; } + +public: + MyPostingList(const vector> &entries) + : _entries(entries), + _index(0) { + } + MyPostingList(std::initializer_list> ilist) + : _entries(ilist.begin(), ilist.end()), + _index(0) { + } + + bool next(uint32_t doc_id) override { + if (_index < _entries.size()) { + while (_entries[_index].first <= doc_id) { + ++_index; + if (_index == _entries.size()) { + setDocId(search::endDocId); + return false; + } + } + setDocId(_entries[_index].first); + setInterval(_entries[_index].second); + return true; + } + setDocId(search::endDocId); + return false; + } + + bool nextInterval() override { + if (_index + 1 < _entries.size() && + _entries[_index].first == _entries[_index + 1].first) { + ++_index; + setInterval(_entries[_index].second); + return true; + } + return false; + } + uint32_t getInterval() const override { return _interval; } +}; + +template +vector +make_posting_lists_vector(MyPostingList (&plists)[N]) { + vector posting_lists; + for (int i = 0; i < N; ++i) { + posting_lists.emplace_back(std::make_unique(plists[i])); + } + return posting_lists; +} + +TermFieldMatchDataArray tfmda; +typedef std::vector CV; +typedef std::vector MF; +typedef std::vector IR; + +TEST("Require that the skipping is efficient") { + const uint8_t min_feature[] = { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}; + const uint8_t kv[] = { 6,7,6,7,6,7,6,8,6,5,6,7,6,0,6,7, + 7,6,7,6,6,6,6,7,7,7,8,7,8,7,7,7,6,7}; + SkipMinFeature::UP skip = SkipMinFeature::create(min_feature, kv, 34); + EXPECT_EQUAL(1u, skip->next()); + EXPECT_EQUAL(3u, skip->next()); + EXPECT_EQUAL(5u, skip->next()); + EXPECT_EQUAL(7u, skip->next()); + EXPECT_EQUAL(11u, skip->next()); + EXPECT_EQUAL(15u, skip->next()); + EXPECT_EQUAL(16u, skip->next()); + EXPECT_EQUAL(18u, skip->next()); + EXPECT_EQUAL(23u, skip->next()); + EXPECT_EQUAL(24u, skip->next()); + EXPECT_EQUAL(25u, skip->next()); + EXPECT_EQUAL(26u, skip->next()); + EXPECT_EQUAL(27u, skip->next()); + EXPECT_EQUAL(28u, skip->next()); + EXPECT_EQUAL(29u, skip->next()); + EXPECT_EQUAL(30u, skip->next()); + EXPECT_EQUAL(31u, skip->next()); + EXPECT_EQUAL(33u, skip->next()); +} + +TEST("require that empty search yields no results") { + vector posting_lists; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, std::move(posting_lists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that simple search yields result") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(1)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_TRUE(search.seek(2)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that minFeature (K) is used to prune results") { + MyPostingList plists[] = {{{2, 0x0001ffff}}, + {{5, 0x0001ffff}}}; + MF mf{0, 0, 3, 0, 0, 0}; + CV cv{1, 0, 0, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_EQUAL(5u, search.getDocId()); +} + +TEST("require that a high K (min_feature - 1) can yield results") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002ffff}}}; + MF mf{0, 0, 2}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that we can skip past entries") { + MyPostingList plists[] = {{{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0, 0}; + CV cv{0, 0, 1, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that posting lists are sorted after advancing") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that short interval ranges works") { + MyPostingList plists[] = {{{1, 0x00010001}, + {5, 0x00010001}}, + {{2, 0x00010001}, + {4, 0x00010001}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0x0001); + PredicateSearch search(&mf[0], &ir[0], 0x1, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that empty posting lists work") { + MyPostingList plists[] = {{}}; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that shorter posting list ending is ok") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{4, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1}; + IR ir(5, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that sorting works for many posting lists") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 1, 5, 0, 2, 2}; + CV cv{0, 1, 5, 0, 2, 2}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(2)); + EXPECT_TRUE(search.seek(4)); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that insufficient interval coverage prevents match") { + MyPostingList plists[] = {{{2, 0x00010001}, + {3, 0x0002ffff}}}; + MF mf{0, 0, 0, 0}; + CV cv{0, 0, 1, 1}; + IR ir(4, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_FALSE(search.seek(3)); +} + +TEST("require that intervals are sorted") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0003ffff}}, + {{2, 0x00020002}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - match") { + MyPostingList plists[] = {{{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match because of previous term") { + MyPostingList plists[] = {{{2, 0x00020001}, // [l, r]* + {2, 0xffff0002}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - subqueries") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + plists[0].setSubquery(0xffff); + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that there can be many intervals") { + MyPostingList plists[] = {{{2, 0x00010001}, + {2, 0x00020002}, + {2, 0x00030003}, + {2, 0x0001ffff}, + {2, 0x00040004}, + {2, 0x00050005}, + {2, 0x00060006}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 7}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that match can require multiple postinglists.") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002000b}, + {2, 0x00030003}}, + {{2, 0x00040003}}, + {{2, 0x00050004}}, + {{2, 0x00010008}, + {2, 0x00060006}}, + {{2, 0x00020002}, + {2, 0x0007ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 9}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that subquery bitmap is unpacked to subqueries.") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + TermFieldMatchDataArray array; + TermFieldMatchData data; + array.add(&data); + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), array); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); + search.unpack(2); + EXPECT_EQUAL(0xffffffffffffffffULL, + static_cast(data.getSubqueries())); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/queryeval.cpp b/searchlib/src/tests/queryeval/queryeval.cpp new file mode 100644 index 00000000000..3a2070a1fd8 --- /dev/null +++ b/searchlib/src/tests/queryeval/queryeval.cpp @@ -0,0 +1,691 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace search::queryeval; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::BitVector; +using search::BitVectorIterator; +using search::test::InitRangeVerifier; + +//----------------------------------------------------------------------------- + +template > +class Collect +{ +private: + V _data; + +public: + Collect &add(const T &t) { + _data.push_back(t); + return *this; + } + operator const V &() const { return _data; } +}; + +SearchIterator *simple(const std::string &tag) { + return &((new SimpleSearch(SimpleResult()))->tag(tag)); +} + +Collect search2(const std::string &t1, const std::string &t2) { + return Collect().add(simple(t1)).add(simple(t2)); +} + + +class ISourceSelectorDummy : public ISourceSelector +{ + static SourceStore _sourceStoreDummy; + +public: + static Iterator::UP + makeDummyIterator() + { + return Iterator::UP(new Iterator(_sourceStoreDummy)); + } +}; + +ISourceSelector::SourceStore ISourceSelectorDummy::_sourceStoreDummy("foo"); + +ISourceSelector::Iterator::UP selector() { + return ISourceSelectorDummy::makeDummyIterator(); +} + +//----------------------------------------------------------------------------- + +void testMultiSearch(SearchIterator & search) { + MultiSearch & ms = dynamic_cast(search); + ms.initRange(3, 309); + EXPECT_EQUAL(2u, ms.getDocId()); + EXPECT_EQUAL(309u, ms.getEndId()); + for (const auto & child : ms.getChildren()) { + EXPECT_EQUAL(2u, child->getDocId()); + EXPECT_EQUAL(309u, child->getEndId()); + } +} + +TEST("test that OR.andWith is a NOOP") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(OrSearch::create(ch, true)); + SearchIterator::UP filter(new TrueSearch(tfmd)); + + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 1).get()); +} + +TEST("test that non-strict AND.andWith is a NOOP") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, false)); + SearchIterator::UP filter(new TrueSearch(tfmd)); + filter = search->andWith(std::move(filter), 8); + EXPECT_TRUE(nullptr != filter.get()); +} + +TEST("test that strict AND.andWith steals filter and places it correctly based on estimate") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, true)); + static_cast(*search).estimate(7); + SearchIterator::UP filter(new TrueSearch(tfmd)); + SearchIterator * filterP = filter.get(); + + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); + const MultiSearch::Children & andChildren = static_cast(*search).getChildren(); + EXPECT_EQUAL(3u, andChildren.size()); + EXPECT_EQUAL(ch[0], andChildren[0]); + EXPECT_EQUAL(filterP, andChildren[1]); + EXPECT_EQUAL(ch[1], andChildren[2]); + + SearchIterator::UP filter2(new TrueSearch(tfmd)); + SearchIterator * filter2P = filter2.get(); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter2), 6).get()); + EXPECT_EQUAL(4u, andChildren.size()); + EXPECT_EQUAL(filter2P, andChildren[0]); + EXPECT_EQUAL(ch[0], andChildren[1]); + EXPECT_EQUAL(filterP, andChildren[2]); + EXPECT_EQUAL(ch[1], andChildren[3]); +} + +class NonStrictTrueSearch : public TrueSearch +{ +public: + NonStrictTrueSearch(TermFieldMatchData & tfmd) : TrueSearch(tfmd) { } + Trinary is_strict() const override { return Trinary::False; } +}; + +TEST("test that strict AND.andWith does not place non-strict iterator first") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, true)); + static_cast(*search).estimate(7); + SearchIterator::UP filter(new NonStrictTrueSearch(tfmd)); + SearchIterator * filterP = filter.get(); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 6).get()); + const MultiSearch::Children & andChildren = static_cast(*search).getChildren(); + EXPECT_EQUAL(3u, andChildren.size()); + EXPECT_EQUAL(ch[0], andChildren[0]); + EXPECT_EQUAL(filterP, andChildren[1]); + EXPECT_EQUAL(ch[1], andChildren[2]); +} + +TEST("test that strict rank search forwards to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + RankSearch::create( + Collect() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + true) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); +} + +TEST("test that non-strict rank search does NOT forward to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + RankSearch::create( + Collect() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + false) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); +} + +TEST("test that strict andnot search forwards to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + AndNotSearch::create( + Collect() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + true) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); +} + +TEST("test that non-strict andnot search does NOT forward to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + AndNotSearch::create( + Collect() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + false) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); +} + +TEST("testAnd") { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10).addHit(16).addHit(30); + b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndBlueprint *and_b = new AndBlueprint(); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(and_b); + bp->fetchPostings(true); + SearchIterator::UP and_ab = bp->createSearch(*md, true); + + EXPECT_TRUE(dynamic_cast(and_ab.get()) != nullptr); + EXPECT_EQUAL(4u, dynamic_cast(*and_ab).estimate()); + SimpleResult res; + res.search(*and_ab); + SimpleResult expect; + expect.addHit(5).addHit(30); + + EXPECT_EQUAL(res, expect); +} + +TEST("mutisearch and initRange") { +} + +TEST("testOr") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + OrBlueprint *or_b = new OrBlueprint(); + or_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + or_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(or_b); + bp->fetchPostings(true); + SearchIterator::UP or_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*or_ab); + SimpleResult expect; + expect.addHit(5).addHit(10).addHit(17).addHit(30); + + EXPECT_EQUAL(res, expect); + } + { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP orSearch(OrSearch::create(ch, true)); + testMultiSearch(*orSearch); + } +} + +class TestInsertRemoveSearch : public MultiSearch +{ +public: + TestInsertRemoveSearch(const MultiSearch::Children & children) : + MultiSearch(children), + _accumRemove(0), + _accumInsert(0) + { } + virtual void onRemove(size_t index) { _accumRemove += index; } + virtual void onInsert(size_t index) { _accumInsert += index; } + size_t _accumRemove; + size_t _accumInsert; +private: + virtual void doSeek(uint32_t docid) { (void) docid; } +}; + +TEST("testMultiSearch") { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(new EmptySearch()); + children.push_back(new EmptySearch()); + TestInsertRemoveSearch ms(children); + EXPECT_EQUAL(3u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[1], ms.getChildren()[1]); + EXPECT_EQUAL(children[2], ms.getChildren()[2]); + EXPECT_EQUAL(0u, ms._accumInsert); + EXPECT_EQUAL(0u, ms._accumRemove); + + EXPECT_EQUAL(children[1], ms.remove(1).get()); + EXPECT_EQUAL(2u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[2], ms.getChildren()[1]); + EXPECT_EQUAL(0u, ms._accumInsert); + EXPECT_EQUAL(1u, ms._accumRemove); + + children.push_back(new EmptySearch()); + ms.insert(1, SearchIterator::UP(children.back())); + EXPECT_EQUAL(3u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[3], ms.getChildren()[1]); + EXPECT_EQUAL(children[2], ms.getChildren()[2]); + EXPECT_EQUAL(1u, ms._accumInsert); + EXPECT_EQUAL(1u, ms._accumRemove); +} + +class DummySingleValueBitNumericAttributeBlueprint : public SimpleLeafBlueprint +{ +public: + DummySingleValueBitNumericAttributeBlueprint(const SimpleResult & result) : + SimpleLeafBlueprint(FieldSpecBaseList()), + _a("a"), + _sc(), + _tfmd() + { + for (size_t i(0); i < result.getHitCount(); i++) { + size_t docId(result.getHit(i)); + uint32_t curDoc(0); + for (_a.addDoc(curDoc); curDoc < docId; _a.addDoc(curDoc)); + _a.update(docId, 1); + } + _a.commit(); + _sc = _a.getSearch(search::QueryTermSimple::UP(new search::QueryTermSimple("1", search::QueryTermSimple::WORD)), + search::AttributeVector::SearchContext::Params().useBitVector(true)); + } + virtual SearchIterator::UP + createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const + { + (void) tfmda; + return _sc->createIterator(&_tfmd, strict); + } +private: + search::SingleValueBitNumericAttribute _a; + search::AttributeVector::SearchContext::UP _sc; + mutable TermFieldMatchData _tfmd; +}; + + +TEST("testAndNot") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(andnot_b); + bp->fetchPostings(true); + SearchIterator::UP andnot_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*andnot_ab); + SimpleResult expect; + expect.addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + SimpleResult a; + SimpleResult b; + a.addHit(1).addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(2, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new DummySingleValueBitNumericAttributeBlueprint(b))); + Blueprint::UP bp(andnot_b); + bp->fetchPostings(true); + SearchIterator::UP andnot_ab = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast(andnot_ab.get()) != NULL); + + SimpleResult res; + res.search(*andnot_ab); + SimpleResult expect; + expect.addHit(1).addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + SimpleResult a; + SimpleResult b; + SimpleResult c; + a.addHit(1).addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + c.addHit(1).addHit(5).addHit(10).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + + AndBlueprint *and_b = new AndBlueprint(); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(c))); + and_b->addChild(Blueprint::UP(andnot_b)); + Blueprint::UP bp(and_b); + bp->fetchPostings(true); + SearchIterator::UP and_cab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*and_cab); + SimpleResult expect; + expect.addHit(1).addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + } +} + +TEST("testRank") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10).addHit(16).addHit(30); + b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + RankBlueprint *rank_b = new RankBlueprint(); + rank_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + rank_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(rank_b); + bp->fetchPostings(true); + SearchIterator::UP rank_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*rank_ab); + SimpleResult expect; + expect.addHit(5).addHit(10).addHit(16).addHit(30); + + EXPECT_EQUAL(res, expect); + } +} + +TEST("testDump") { + typedef SourceBlenderSearch::Child Source; + SearchIterator::UP search( + AndSearch::create( + Collect() + .add(AndNotSearch::create(search2("+", "-"), true)) + .add(AndSearch::create(search2("and_a", "and_b"), true)) + .add(new BooleanMatchIteratorWrapper(SearchIterator::UP(simple("wrapped")), TermFieldMatchDataArray())) + .add(new NearSearch(search2("near_a", "near_b"), + TermFieldMatchDataArray(), + 5u, true)) + .add(new ONearSearch(search2("onear_a", "onear_b"), + TermFieldMatchDataArray(), 10, true)) + .add(OrSearch::create(search2("or_a", "or_b"), false)) + .add(RankSearch::create(search2("rank_a", "rank_b"),false)) + .add(SourceBlenderSearch::create(selector(), Collect() + .add(Source(simple("blend_a"), 2)) + .add(Source(simple("blend_b"), 4)), true)) + , true)); + vespalib::string sas = search->asString(); + EXPECT_TRUE(sas.size() > 50); + // fprintf(stderr, "%s", search->asString().c_str()); +} + +TEST("testFieldSpec") { + EXPECT_EQUAL(8u, sizeof(FieldSpecBase)); + EXPECT_EQUAL(72u, sizeof(FieldSpec)); +} + + +const size_t unpack_child_cnt = 500; +const size_t max_unpack_size = 31; +const size_t max_unpack_index = 255; + +std::vector vectorize(const UnpackInfo &unpack) { + std::vector list; + unpack.each([&](size_t i){list.push_back(i);}, unpack_child_cnt); + return list; +} + +std::vector fill_vector(size_t begin, size_t end) { + std::vector list; + for (size_t i = begin; i < end; ++i) { + list.push_back(i); + } + return list; +} + +void verify_unpack(const UnpackInfo &unpack, const std::vector &expect) { + std::vector actual = vectorize(unpack); + EXPECT_EQUAL(unpack.empty(), expect.empty()); + EXPECT_EQUAL(unpack.unpackAll(), (expect.size() == unpack_child_cnt)); + EXPECT_EQUAL(expect, actual); + size_t child_idx = 0; + for (size_t next_unpack: expect) { + while (child_idx < next_unpack) { + EXPECT_FALSE(unpack.needUnpack(child_idx++)); + } + EXPECT_TRUE(unpack.needUnpack(child_idx++)); + } +} + +TEST("require that unpack info has expected memory footprint") { + EXPECT_EQUAL(32u, sizeof(UnpackInfo)); +} + +TEST("require that unpack info starts out empty") { + verify_unpack(UnpackInfo(), {}); +} + +TEST("require that unpack info force all unpacks all children") { + verify_unpack(UnpackInfo().forceAll(), fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding a large index to unpack info forces unpack all") { + UnpackInfo unpack; + unpack.add(0); + unpack.add(max_unpack_index); + verify_unpack(unpack, {0, max_unpack_index}); + unpack.add(max_unpack_index + 1); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding too many children to unpack info forces unpack all") { + UnpackInfo unpack; + std::vector expect; + for (size_t i = 0; i < max_unpack_size; ++i) { + unpack.add(i); + expect.push_back(i); + } + verify_unpack(unpack, expect); + unpack.add(100); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding normal unpack info indexes works") { + UnpackInfo unpack; + unpack.add(3).add(5).add(7).add(14).add(50); + verify_unpack(unpack, {3,5,7,14,50}); +} + +TEST("require that adding unpack info indexes out of order works") { + UnpackInfo unpack; + unpack.add(5).add(3).add(7).add(50).add(14); + verify_unpack(unpack, {3,5,7,14,50}); +} + +TEST("require that basic insert remove of unpack info works") { + UnpackInfo unpack; + unpack.insert(1).insert(3); + verify_unpack(unpack, {1, 3}); + unpack.insert(0); + verify_unpack(unpack, {0, 2, 4}); + unpack.insert(3); + verify_unpack(unpack, {0, 2, 3, 5}); + unpack.remove(1); + verify_unpack(unpack, {0, 1, 2, 4}); + unpack.remove(1); + verify_unpack(unpack, {0, 1, 3}); + unpack.remove(1); + verify_unpack(unpack, {0, 2}); + unpack.remove(2); + verify_unpack(unpack, {0}); + unpack.remove(0); + verify_unpack(unpack, {}); +} + +TEST("require that inserting too many indexs into unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + for (size_t i = 0; i < max_unpack_size; ++i) { + unpack.add(i); + } + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(0, unpack_inserted); + if (unpack_inserted) { + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } else { + verify_unpack(unpack, fill_vector(1, max_unpack_size + 1)); + } + } +} + +TEST("require that implicitly overflowing indexes during insert in unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + unpack.insert(max_unpack_index); + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(5, unpack_inserted); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } +} + +TEST("require that inserting a too high index into unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + for (size_t i = 0; i < 10; ++i) { + unpack.add(i); + } + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(max_unpack_index + 1, unpack_inserted); + if (unpack_inserted) { + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } else { + verify_unpack(unpack, fill_vector(0, 10)); + } + } +} + +TEST("require that we can insert indexes into unpack info that we do not unpack") { + UnpackInfo unpack; + unpack.add(10).add(20).add(30); + verify_unpack(unpack, {10, 20, 30}); + unpack.insert(5, false).insert(15, false).insert(25, false).insert(35, false); + verify_unpack(unpack, {11, 22, 33}); +} + +TEST("testTrueSearch") { + EXPECT_EQUAL(16u, sizeof(EmptySearch)); + EXPECT_EQUAL(24u, sizeof(TrueSearch)); + + TermFieldMatchData tfmd; + TrueSearch t(tfmd); + EXPECT_EQUAL(0u, t.getDocId()); + EXPECT_EQUAL(0u, t.getEndId()); + t.initRange(7, 10); + EXPECT_EQUAL(6u, t.getDocId()); + EXPECT_EQUAL(10u, t.getEndId()); + EXPECT_TRUE(t.seek(9)); + EXPECT_EQUAL(9u, t.getDocId()); + EXPECT_FALSE(t.isAtEnd()); + EXPECT_TRUE(t.seek(10)); + EXPECT_EQUAL(10u, t.getDocId()); + EXPECT_TRUE(t.isAtEnd()); + t.resetRange(); + t.initRange(4, 14); + EXPECT_EQUAL(3u, t.getDocId()); + EXPECT_EQUAL(14u, t.getEndId()); + EXPECT_FALSE(t.isAtEnd()); +} + +TEST("test InitRangeVerifier") { + InitRangeVerifier ir; + EXPECT_EQUAL(207u, ir.getDocIdLimit()); + EXPECT_EQUAL(41u, ir.getExpectedDocIds().size()); + auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), 300); + size_t numInverted = 300 - 41 - 1; + EXPECT_EQUAL(numInverted, inverted.size()); + EXPECT_EQUAL(2u, inverted[0]); + EXPECT_EQUAL(299u, inverted[numInverted - 1]); + ir.verify(*ir.createIterator(ir.getExpectedDocIds(), false)); + ir.verify(*ir.createIterator(ir.getExpectedDocIds(), true)); +} + +TEST("Test multisearch and andsearchstrict iterators adheres to initRange") { + InitRangeVerifier ir; + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false).release(), + ir.createFullIterator().release() }, false)); + + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true).release(), + ir.createFullIterator().release() }, true)); +} + +TEST("Test andnotsearchstrict iterators adheres to initRange") { + InitRangeVerifier ir; + + TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false).release(), + ir.createEmptyIterator().release() }, false))); + TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true).release(), + ir.createEmptyIterator().release() }, true))); + + auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), ir.getDocIdLimit()); + TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(), + ir.createIterator(inverted, false).release() }, false))); + TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(), + ir.createIterator(inverted, false).release() }, true))); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/simple_phrase/.cvsignore b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore new file mode 100644 index 00000000000..78f4563a999 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +simple_phrase_test diff --git a/searchlib/src/tests/queryeval/simple_phrase/.gitignore b/searchlib/src/tests/queryeval/simple_phrase/.gitignore new file mode 100644 index 00000000000..bfdb1a61782 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_simple_phrase_test_app diff --git a/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt new file mode 100644 index 00000000000..bc9e664a8cf --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_simple_phrase_test_app + SOURCES + simple_phrase_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_simple_phrase_test_app COMMAND searchlib_simple_phrase_test_app) diff --git a/searchlib/src/tests/queryeval/simple_phrase/DESC b/searchlib/src/tests/queryeval/simple_phrase/DESC new file mode 100644 index 00000000000..4b1ad693a7b --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/DESC @@ -0,0 +1 @@ +simple_phrase test. Take a look at simple_phrase_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/simple_phrase/FILES b/searchlib/src/tests/queryeval/simple_phrase/FILES new file mode 100644 index 00000000000..31d8e1af993 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/FILES @@ -0,0 +1 @@ +simple_phrase_test.cpp diff --git a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp new file mode 100644 index 00000000000..f813d7203d0 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp @@ -0,0 +1,341 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("simple_phrase_test"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; + +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldHandle; +using search::query::SimpleStringTerm; +using search::query::Weight; +using std::unique_ptr; +using std::copy; +using std::string; +using std::vector; + +namespace { + +struct MyTerm : public search::queryeval::SimpleLeafBlueprint { + MyTerm(const FieldSpec &field, uint32_t hits) + : search::queryeval::SimpleLeafBlueprint(field) + { + setEstimate(HitEstimate(hits, (hits == 0))); + } + virtual SearchIterator::UP createLeafSearch( + const search::fef::TermFieldMatchDataArray &, bool) const + { + return SearchIterator::UP(); + } +}; + +class Test : public vespalib::TestApp { + void requireThatIteratorFindsSimplePhrase(bool useBlueprint); + void requireThatIteratorFindsLongPhrase(bool useBlueprint); + void requireThatStrictIteratorFindsNextMatch(bool useBlueprint); + void requireThatPhrasesAreUnpacked(bool useBlueprint); + void requireThatTermsCanBeEvaluatedInPriorityOrder(); + void requireThatBlueprintExposesFieldWithEstimate(); + void requireThatBlueprintForcesPositionDataOnChildren(); + void requireThatIteratorHonorsFutureDoom(); + void requireThatIteratorHonorsDoom(); + void requireThatDoomIsPropagated(); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("phrasesearch_test"); + + TEST_DO(requireThatIteratorFindsSimplePhrase(false)); + TEST_DO(requireThatIteratorFindsLongPhrase(false)); + TEST_DO(requireThatStrictIteratorFindsNextMatch(false)); + TEST_DO(requireThatPhrasesAreUnpacked(false)); + TEST_DO(requireThatTermsCanBeEvaluatedInPriorityOrder()); + + TEST_DO(requireThatIteratorFindsSimplePhrase(true)); + TEST_DO(requireThatIteratorFindsLongPhrase(true)); + TEST_DO(requireThatStrictIteratorFindsNextMatch(true)); + TEST_DO(requireThatPhrasesAreUnpacked(true)); + TEST_DO(requireThatBlueprintExposesFieldWithEstimate()); + TEST_DO(requireThatBlueprintForcesPositionDataOnChildren()); + TEST_DO(requireThatIteratorHonorsFutureDoom()); + TEST_DO(requireThatIteratorHonorsDoom()); + TEST_DO(requireThatDoomIsPropagated()); + + TEST_DONE(); +} + +const string field = "field"; +const uint32_t fieldId = 1; +const uint32_t doc_match = 42; +const uint32_t doc_no_match = 43; +const uint32_t phrase_handle = 1; + +class PhraseSearchTest +{ +private: + FakeRequestContext _requestContext; + FakeSearchable _index; + FieldSpec _phrase_fs; + SimplePhraseBlueprint _phrase; + std::vector _children; + MatchData::UP _md; + vector _order; + uint32_t _pos; + bool _strict; + +public: + PhraseSearchTest(bool expiredDoom=false) : + _requestContext(nullptr, expiredDoom ? 0 : std::numeric_limits::max()), + _index(), + _phrase_fs(field, fieldId, phrase_handle), + _phrase(_phrase_fs, _requestContext), + _children(), + _md(MatchData::makeTestInstance(0, 100, 10)), + _order(), + _pos(1), + _strict(false) + {} + + TermFieldHandle childHandle(uint32_t idx) const { return (10 * idx + 11); } + + void setStrict(bool strict) { _strict = strict; } + void setOrder(const vector &order) { _order = order; } + const TermFieldMatchData &tmd() const { return *_md->resolveTermField(phrase_handle); } + + PhraseSearchTest &addTerm(const string &term, bool last) { + return addTerm(term, FakeResult() + .doc(doc_match).pos(_pos) + .doc(doc_no_match).pos(_pos + last)); + } + + PhraseSearchTest &addTerm(const string &term, const FakeResult &r) { + _index.addResult(field, term, r); + ++_pos; + SimpleStringTerm term_node(term, field, 0, Weight(0)); + { + // make one child blueprint for explicit use + FieldSpecList fields; + fields.add(FieldSpec(field, fieldId, + childHandle(_children.size()))); + _children.push_back(_index.createBlueprint(_requestContext, fields, term_node)); + } + { + // and one to be used by the phrase blueprint + FieldSpecList fields; + fields.add(_phrase.getNextChildField(_phrase_fs)); + _phrase.addTerm(_index.createBlueprint(_requestContext, fields, term_node)); + } + _order.push_back(_order.size()); + return *this; + } + + void + fetchPostings(bool useBlueprint) + { + if (useBlueprint) { + _phrase.fetchPostings(_strict); + return; + } + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->fetchPostings(_strict); + } + } + + // NB: using blueprint will ignore eval order override + SearchIterator *createSearch(bool useBlueprint) { + SearchIterator::UP search; + if (useBlueprint) { + search = _phrase.createSearch(*_md, _strict); + } else { + search::fef::TermFieldMatchDataArray childMatch; + for (size_t i = 0; i < _children.size(); ++i) { + childMatch.add(_md->resolveTermField(childHandle(i))); + } + SimplePhraseSearch::Children children; + for (size_t i = 0; i < _children.size(); ++i) { + children.push_back(_children[i]->createSearch(*_md, _strict).release()); + } + search.reset(new SimplePhraseSearch(children, MatchData::UP(), + childMatch, _order, + *_md->resolveTermField(phrase_handle), + _strict)); + } + search->initFullRange(); + return search.release(); + } +}; + +void Test::requireThatIteratorFindsSimplePhrase(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(useBlueprint); + unique_ptr search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatIteratorHonorsFutureDoom() { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(false); + vespalib::Clock clock; + vespalib::Doom futureDoom(clock, std::numeric_limits::max()); + unique_ptr search(test.createSearch(false)); + static_cast(*search).setDoom(&futureDoom); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatIteratorHonorsDoom() { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(false); + vespalib::Clock clock; + vespalib::Doom futureDoom(clock, 0); + unique_ptr search(test.createSearch(false)); + static_cast(*search).setDoom(&futureDoom); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->beginId(), search->getDocId()); + EXPECT_TRUE(!search->seek(doc_match)); + EXPECT_TRUE(search->isAtEnd()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatDoomIsPropagated() { + PhraseSearchTest test(true); + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(true); + unique_ptr search(test.createSearch(true)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->beginId(), search->getDocId()); + EXPECT_TRUE(!search->seek(doc_match)); + EXPECT_TRUE(search->isAtEnd()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatIteratorFindsLongPhrase(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 0).addTerm("baz", 0) + .addTerm("qux", 1); + + test.fetchPostings(useBlueprint); + unique_ptr search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatStrictIteratorFindsNextMatch(bool useBlueprint) { + PhraseSearchTest test; + test.setStrict(true); + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(useBlueprint); + unique_ptr search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(doc_match, search->getDocId()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatPhrasesAreUnpacked(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", FakeResult() + .doc(doc_match).pos(1).pos(11).pos(21)); + test.addTerm("bar", FakeResult() + .doc(doc_match).pos(2).pos(16).pos(22)); + test.fetchPostings(useBlueprint); + unique_ptr search(test.createSearch(useBlueprint)); + EXPECT_TRUE(search->seek(doc_match)); + search->unpack(doc_match); + + EXPECT_EQUAL(doc_match, test.tmd().getDocId()); + EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end())); + EXPECT_EQUAL(1u, test.tmd().begin()->getPosition()); + EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition()); +} + +void Test::requireThatTermsCanBeEvaluatedInPriorityOrder() { + vector order; + order.push_back(2); + order.push_back(0); + order.push_back(1); + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1).addTerm("baz", 1); + test.setOrder(order); + + test.fetchPostings(false); + unique_ptr search(test.createSearch(false)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void +Test::requireThatBlueprintExposesFieldWithEstimate() +{ + FakeRequestContext requestContext; + FieldSpec f("foo", 1, 1); + SimplePhraseBlueprint phrase(f, requestContext); + ASSERT_TRUE(phrase.getState().numFields() == 1); + EXPECT_EQUAL(f.getFieldId(), phrase.getState().field(0).getFieldId()); + EXPECT_EQUAL(f.getHandle(), phrase.getState().field(0).getHandle()); + + EXPECT_EQUAL(true, phrase.getState().estimate().empty); + EXPECT_EQUAL(0u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 10))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(10u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 5))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(5u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 20))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(5u, phrase.getState().estimate().estHits); +} + +void +Test::requireThatBlueprintForcesPositionDataOnChildren() +{ + FakeRequestContext requestContext; + FieldSpec f("foo", 1, 1, true); + SimplePhraseBlueprint phrase(f, requestContext); + EXPECT_TRUE(f.isFilter()); + EXPECT_TRUE(!phrase.getNextChildField(f).isFilter()); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/sourceblender/.gitignore b/searchlib/src/tests/queryeval/sourceblender/.gitignore new file mode 100644 index 00000000000..e0d4b433d65 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +sourceblender_test +searchlib_sourceblender_test_app diff --git a/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt new file mode 100644 index 00000000000..e566cb0fdf0 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sourceblender_test_app + SOURCES + sourceblender.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_sourceblender_test_app COMMAND searchlib_sourceblender_test_app) diff --git a/searchlib/src/tests/queryeval/sourceblender/DESC b/searchlib/src/tests/queryeval/sourceblender/DESC new file mode 100644 index 00000000000..437dd818eb7 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/DESC @@ -0,0 +1 @@ +sourceblender test. Take a look at sourceblender.cpp for details. diff --git a/searchlib/src/tests/queryeval/sourceblender/FILES b/searchlib/src/tests/queryeval/sourceblender/FILES new file mode 100644 index 00000000000..97be7bcaf53 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/FILES @@ -0,0 +1 @@ +sourceblender.cpp diff --git a/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp new file mode 100644 index 00000000000..2cfcf4e3f1d --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP("sourceblender_test"); +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; +using std::make_unique; + +/** + * Proxy search used to verify unpack pattern + **/ +class UnpackChecker : public SearchIterator +{ +private: + SearchIterator::UP _search; + SimpleResult _unpacked; + +protected: + virtual void doSeek(uint32_t docid) { + _search->seek(docid); + setDocId(_search->getDocId()); + } + virtual void doUnpack(uint32_t docid) { + _unpacked.addHit(docid); + _search->unpack(docid); + } + +public: + UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {} + const SimpleResult &getUnpacked() const { return _unpacked; } +}; + +class MySelector : public search::FixedSourceSelector +{ +public: + MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { } + MySelector & set(Source s, uint32_t docId) { + setSource(s, docId); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +TEST("test strictness") { + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + for (uint32_t i = 0; i < 2; ++i) { + bool strict = (i == 0); + + SimpleResult a; + SimpleResult b; + + a.addHit(2).addHit(5).addHit(6).addHit(8); + b.addHit(3).addHit(5).addHit(6).addHit(7); + + MySelector *sel = new MySelector(5); + sel->set(2, 1).set(3, 2).set(5, 2).set(7, 1); + + SourceBlenderBlueprint *blend_b = new SourceBlenderBlueprint(*sel); + Blueprint::UP a_b(new SimpleBlueprint(a)); + Blueprint::UP b_b(new SimpleBlueprint(b)); + a_b->setSourceId(1); + b_b->setSourceId(2); + blend_b->addChild(std::move(a_b)); + blend_b->addChild(std::move(b_b)); + Blueprint::UP bp(blend_b); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + SearchIterator &blend = *search; + + EXPECT_TRUE(!blend.seek(1u)); + if (strict) { + EXPECT_EQUAL(2u, blend.getDocId()); + } else { + EXPECT_EQUAL(blend.beginId(), blend.getDocId()); + } + EXPECT_TRUE(blend.seek(5)); + EXPECT_EQUAL(5u, blend.getDocId()); + EXPECT_TRUE(!blend.seek(6)); + if (strict) { + EXPECT_TRUE(blend.isAtEnd()); + } else { + EXPECT_EQUAL(5u, blend.getDocId()); + } + delete sel; + } +} + +TEST("test full sourceblender search") { + SimpleResult a; + SimpleResult b; + SimpleResult c; + + a.addHit(2).addHit(11).addHit(21).addHit(34); + b.addHit(3).addHit(11).addHit(21).addHit(33); + c.addHit(4).addHit(11).addHit(21).addHit(32); + + // these are all handed over to the blender + UnpackChecker *ua = new UnpackChecker(new SimpleSearch(a)); + UnpackChecker *ub = new UnpackChecker(new SimpleSearch(b)); + UnpackChecker *uc = new UnpackChecker(new SimpleSearch(c)); + auto sel = make_unique(5); + + sel->set(2, 1).set(3, 2).set(11, 2).set(21, 3).set(34, 1); + SourceBlenderSearch::Children abc; + abc.push_back(SourceBlenderSearch::Child(ua, 1)); + abc.push_back(SourceBlenderSearch::Child(ub, 2)); + abc.push_back(SourceBlenderSearch::Child(uc, 3)); + + SearchIterator::UP blend(SourceBlenderSearch::create(sel->createIterator(), abc, true)); + SimpleResult result; + result.search(*blend); + + SimpleResult expect_result; + expect_result.addHit(2).addHit(3).addHit(11).addHit(21).addHit(34); + + SimpleResult expect_unpacked_a; + expect_unpacked_a.addHit(2).addHit(34); + + SimpleResult expect_unpacked_b; + expect_unpacked_b.addHit(3).addHit(11); + + SimpleResult expect_unpacked_c; + expect_unpacked_c.addHit(21); + + EXPECT_EQUAL(expect_result, result); + EXPECT_EQUAL(expect_unpacked_a, ua->getUnpacked()); + EXPECT_EQUAL(expect_unpacked_b, ub->getUnpacked()); + EXPECT_EQUAL(expect_unpacked_c, uc->getUnpacked()); +} + +using search::test::InitRangeVerifier; + +SourceBlenderSearch::Children +createChildren(const std::vector & indexes, const InitRangeVerifier & ir, bool strict) { + SourceBlenderSearch::Children children; + for (size_t index(0); index < indexes.size(); index++) { + children.emplace_back(ir.createIterator(indexes[index], strict).release(), index); + } + return children; +} + +TEST("test init range") { + InitRangeVerifier ir; + std::vector indexes(3); + auto sel = make_unique(ir.getDocIdLimit()); + for (uint32_t docId : ir.getExpectedDocIds()) { + const size_t indexId = docId%indexes.size(); + sel->set(docId, indexId); + indexes[indexId].push_back(docId); + } + TermFieldMatchData tfmd; + TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, false), false))); + TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, true), true))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore new file mode 100644 index 00000000000..b0ce58fa658 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore @@ -0,0 +1,6 @@ +/log.txt +/report.head +/report.html +/plot.* +/*.png +searchlib_sparse_vector_benchmark_test_app diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt new file mode 100644 index 00000000000..8d4aa8c10be --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sparse_vector_benchmark_test_app + SOURCES + sparse_vector_benchmark_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sparse_vector_benchmark_test_app COMMAND searchlib_sparse_vector_benchmark_test_app BENCHMARK) diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES new file mode 100644 index 00000000000..48eda2416c9 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES @@ -0,0 +1 @@ +sparse_vector_benchmark_test.cpp diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp new file mode 100644 index 00000000000..68d7dec0f87 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -0,0 +1,429 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +#include "../weak_and/rise_wand.h" +#include "../weak_and/rise_wand.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using namespace search::queryeval; +using namespace vespalib; + +namespace { + +//----------------------------------------------------------------------------- + +struct Writer { + FILE *file; + Writer(const std::string &file_name) { + file = fopen(file_name.c_str(), "w"); + assert(file != 0); + } + void write(const char *data, size_t size) const { + fwrite(data, 1, size, file); + } + void fmt(const char *format, ...) const +#ifdef __GNUC__ + __attribute__ ((format (printf,2,3))) +#endif + { + va_list ap; + va_start(ap, format); + vfprintf(file, format, ap); + va_end(ap); + } + ~Writer() { fclose(file); } +}; + +//----------------------------------------------------------------------------- + +// top-level html report (global, used by plots and graphs directly) +class Report +{ +private: + Writer _html; + +public: + Report(const std::string &file) : _html(file) { + _html.fmt("\n"); + _html.fmt("Sparse Vector Search Benchmark Report\n"); + _html.fmt("\n"); + _html.fmt("

Sparse Vector Search Benchmark Report

\n"); + } + void addPlot(const std::string &title, const std::string &png_file) { + _html.fmt("

%s

\n", title.c_str()); + _html.fmt("\n", png_file.c_str()); + } + ~Report() { + _html.fmt("

Test Log with Numbers

\n"); + _html.fmt("
\n");
+        // html file needs external termination
+    }
+};
+
+Report report("report.head");
+
+//-----------------------------------------------------------------------------
+
+// a single graph within a plot
+class Graph
+{
+private:
+    Writer _writer;
+
+public:
+    typedef std::unique_ptr UP;
+    Graph(const std::string &file) : _writer(file) {}
+    void addValue(double x, double y) { _writer.fmt("%g %g\n", x, y); }
+};
+
+// a plot possibly containing multiple graphs
+class Plot
+{
+private:
+    std::string _name;
+    int         _graphs;
+    Writer      _writer;
+    static int  _plots;
+
+public:
+    typedef std::unique_ptr UP;
+
+    Plot(const std::string &title) : _name(vespalib::make_string("plot.%d", _plots++)), _graphs(0),
+                                     _writer(vespalib::make_string("%s.gnuplot", _name.c_str())) {
+        std::string png_file = vespalib::make_string("%s.png", _name.c_str());
+        _writer.fmt("set term png size 1200,800\n");
+        _writer.fmt("set output '%s'\n", png_file.c_str());
+        _writer.fmt("set title '%s'\n", title.c_str());
+        _writer.fmt("set xlabel 'term count'\n");
+        _writer.fmt("set ylabel 'time (ms)'\n");
+        report.addPlot(title, png_file);
+    }
+
+    ~Plot() {
+        _writer.fmt("\n");
+    }
+
+    Graph::UP createGraph(const std::string &legend) {
+        std::string file = vespalib::make_string("%s.graph.%d", _name.c_str(), _graphs);
+        _writer.fmt("%s '%s' using 1:2 title '%s' w lines",
+                    (_graphs == 0) ? "plot " : ",", file.c_str(), legend.c_str());
+        ++_graphs;
+        return Graph::UP(new Graph(file));
+    }
+
+    static UP createPlot(const std::string &title) { return UP(new Plot(title)); }
+};
+
+int Plot::_plots = 0;
+
+//-----------------------------------------------------------------------------
+
+uint32_t default_weight = 100;
+double max_time = 1000000.0;
+
+//-----------------------------------------------------------------------------
+
+struct ChildFactory {
+    ChildFactory() {}
+    virtual std::string name() const = 0;
+    virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const = 0;
+    virtual ~ChildFactory() {}
+};
+
+struct SparseVectorFactory {
+    virtual std::string name() const = 0;
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0;
+    virtual ~SparseVectorFactory() {}
+};
+
+struct FilterStrategy {
+    virtual std::string name() const = 0;
+    virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0;
+    virtual ~FilterStrategy() {}
+};
+
+//-----------------------------------------------------------------------------
+
+struct ModSearch : SearchIterator {
+    uint32_t step;
+    uint32_t limit;
+    ModSearch(uint32_t step_in, uint32_t limit_in) : step(step_in), limit(limit_in) { setDocId(step); }
+    virtual void doSeek(uint32_t docid) {
+        assert(docid > getDocId());
+        uint32_t hit = (docid / step) * step;
+        if (hit < docid) {
+            hit += step;
+        }
+        if (hit < limit) {
+            assert(hit >= docid);
+            setDocId(hit);
+        } else {
+            setAtEnd();
+        }
+    }
+    virtual void doUnpack(uint32_t) {}
+};
+
+struct ModSearchFactory : ChildFactory {
+    uint32_t bias;
+    ModSearchFactory() : bias(1) {}
+    explicit ModSearchFactory(int b) : bias(b) {}
+    virtual std::string name() const {
+        return vespalib::make_string("ModSearch(%u)", bias);
+    }
+    virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const {
+        return new ModSearch(bias + idx, limit);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+struct VespaWandFactory : SparseVectorFactory {
+    uint32_t n;
+    VespaWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const {
+        return vespalib::make_string("VespaWand(%u)", n);
+    }
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        wand::Terms terms;
+        for (size_t i = 0; i < childCnt; ++i) {
+            terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1)));
+        }
+        return WeakAndSearch::create(terms, n, true);
+    }
+};
+
+struct RiseWandFactory : SparseVectorFactory {
+    uint32_t n;
+    RiseWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const {
+        return vespalib::make_string("RiseWand(%u)", n);
+    }
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        wand::Terms terms;
+        for (size_t i = 0; i < childCnt; ++i) {
+            terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1)));
+        }
+        return new rise::TermFrequencyRiseWand(terms, n);
+    }
+};
+
+struct WeightedSetFactory : SparseVectorFactory {
+    mutable TermFieldMatchData tfmd;
+    virtual std::string name() const {
+        return vespalib::make_string("WeightedSet");
+    }
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        std::vector terms;
+        std::vector weights;
+        for (size_t i = 0; i < childCnt; ++i) {
+            terms.push_back(childFactory.createChild(i, limit));
+            weights.push_back(default_weight);
+        }
+        return WeightedSetTermSearch::create(terms, tfmd, weights);
+    }
+};
+
+struct DotProductFactory : SparseVectorFactory {
+    mutable TermFieldMatchData tfmd;
+    virtual std::string name() const {
+        return vespalib::make_string("DotProduct");
+    }
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        MatchDataLayout layout;
+        std::vector handles;
+        for (size_t i = 0; i < childCnt; ++i) {
+            handles.push_back(layout.allocTermField(0));
+        }
+        std::vector terms;
+        std::vector childMatch;
+        std::vector weights;
+        MatchData::UP md = layout.createMatchData();
+        for (size_t i = 0; i < childCnt; ++i) {
+            terms.push_back(childFactory.createChild(i, limit));
+            childMatch.push_back(md->resolveTermField(handles[i]));
+            weights.push_back(default_weight);
+        }
+        return DotProductSearch::create(terms, tfmd, childMatch, weights, std::move(md)).release();
+    }
+};
+
+struct OrFactory : SparseVectorFactory {
+    virtual std::string name() const {
+        return vespalib::make_string("Or");
+    }
+    virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        OrSearch::Children children;
+        for (size_t i = 0; i < childCnt; ++i) {
+            children.push_back(childFactory.createChild(i, limit));
+        }
+        return OrSearch::create(children, true);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+struct NoFilterStrategy : FilterStrategy {
+    virtual std::string name() const {
+        return vespalib::make_string("NoFilter");
+    }
+    virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        return vectorFactory.createSparseVector(childFactory, childCnt, limit);
+    }
+};
+
+struct PositiveFilterBeforeStrategy : FilterStrategy {
+    virtual std::string name() const {
+        return vespalib::make_string("PositiveBefore");
+    }
+    virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        AndSearch::Children children;
+        children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded)
+        children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit));
+        return AndSearch::create(children, true);
+    }
+};
+
+struct NegativeFilterAfterStrategy : FilterStrategy {
+    virtual std::string name() const {
+        return vespalib::make_string("NegativeAfter");
+    }
+    virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const {
+        AndNotSearch::Children children;
+        children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit));
+        children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded)
+        return AndNotSearch::create(children, true);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+struct Result {
+    double time_ms;
+    uint32_t num_hits;
+    Result() : time_ms(max_time), num_hits(0) {}
+    Result(double t, uint32_t n) : time_ms(t), num_hits(n) {}
+    void combine(const Result &r) {
+        if (time_ms == max_time) {
+            *this = r;
+        } else {
+            assert(num_hits == r.num_hits);
+            time_ms = std::min(time_ms, r.time_ms);
+        }
+    }
+    std::string toString() const {
+        return vespalib::make_string("%u hits, %g ms", num_hits, time_ms);
+    }
+};
+
+Result run_single_benchmark(FilterStrategy &filterStrategy, SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) {
+    SearchIterator::UP search(filterStrategy.createRoot(vectorFactory, childFactory, childCnt, limit));
+    SearchIterator &sb = *search;
+    uint32_t num_hits = 0;
+    FastOS_Time timer;
+    timer.SetNow();
+    for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+        ++num_hits;
+        sb.unpack(sb.getDocId());
+    }
+    return Result(timer.MilliSecsToNow(), num_hits);
+}
+
+//-----------------------------------------------------------------------------
+
+// one setup is used to produce all graphs in a single plot
+class Setup
+{
+private:
+    FilterStrategy &_filterStrategy;
+    ChildFactory &_childFactory;
+    uint32_t _limit;
+    Plot::UP _plot;
+
+    std::string make_title() const {
+        return vespalib::make_string("%u docs, filter:%s, terms:%s", _limit, _filterStrategy.name().c_str(), _childFactory.name().c_str());
+    }
+
+public:
+    Setup(FilterStrategy &fs, ChildFactory &cf, uint32_t lim) : _filterStrategy(fs), _childFactory(cf), _limit(lim) {
+        _plot = Plot::createPlot(make_title());
+        fprintf(stderr, "benchmark setup: %s\n", make_title().c_str());
+    }
+
+    void benchmark(SparseVectorFactory &svf, const std::vector &child_counts) {
+        Graph::UP graph = _plot->createGraph(svf.name());
+        fprintf(stderr, "  search operator: %s\n", svf.name().c_str());
+        for (size_t i = 0; i < child_counts.size(); ++i) {
+            uint32_t childCnt = child_counts[i];
+            Result result;
+            for (int j = 0; j < 5; ++j) {
+                result.combine(run_single_benchmark(_filterStrategy, svf, _childFactory, childCnt, _limit));
+            }
+            graph->addValue(childCnt, result.time_ms);
+            fprintf(stderr, "    %u children => %s\n", childCnt, result.toString().c_str());
+        }
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+void benchmark_all_operators(Setup &setup, const std::vector &child_counts) {
+    VespaWandFactory       vespaWand256(256);
+    RiseWandFactory        riseWand256(256);
+    WeightedSetFactory     weightedSet;
+    DotProductFactory      dotProduct;
+    OrFactory              plain_or;
+    setup.benchmark(vespaWand256, child_counts);
+    setup.benchmark(riseWand256, child_counts);
+    setup.benchmark(weightedSet, child_counts);
+    setup.benchmark(dotProduct, child_counts);
+    setup.benchmark(plain_or, child_counts);
+}
+
+//-----------------------------------------------------------------------------
+
+Box make_full_child_counts() {
+    return Box()
+        .add(10).add(20).add(30).add(40).add(50).add(60).add(70).add(80).add(90)
+        .add(100).add(125).add(150).add(175)
+        .add(200).add(250).add(300).add(350).add(400).add(450)
+        .add(500).add(600).add(700).add(800).add(900)
+        .add(1000).add(1200).add(1400).add(1600).add(1800)
+        .add(2000);
+}
+
+//-----------------------------------------------------------------------------
+
+} // namespace 
+
+TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+    benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(8), Setup(f1, f2, 5000000)) {
+    benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", PositiveFilterBeforeStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+    benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_FFF("benchmark", NegativeFilterAfterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) {
+    benchmark_all_operators(f3, make_full_child_counts());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/termwise_eval/.gitignore b/searchlib/src/tests/queryeval/termwise_eval/.gitignore
new file mode 100644
index 00000000000..b6b345775f6
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/.gitignore
@@ -0,0 +1 @@
+searchlib_termwise_eval_test_app
diff --git a/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt
new file mode 100644
index 00000000000..ab9362f6e99
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_termwise_eval_test_app
+    SOURCES
+    termwise_eval_test.cpp
+    DEPENDS
+    searchlib
+    searchlib_test
+)
+vespa_add_test(NAME searchlib_termwise_eval_test_app COMMAND searchlib_termwise_eval_test_app)
diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
new file mode 100644
index 00000000000..625d9928048
--- /dev/null
+++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
@@ -0,0 +1,641 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace vespalib;
+using namespace search;
+using namespace search::fef;
+using namespace search::queryeval;
+
+//-----------------------------------------------------------------------------
+
+const uint32_t my_field = 0;
+
+//-----------------------------------------------------------------------------
+
+struct MyTerm : public SearchIterator {
+    size_t pos;
+    bool is_strict;
+    std::vector hits;
+    MyTerm(const std::vector &hits_in, bool is_strict_in)
+        : pos(0), is_strict(is_strict_in), hits(hits_in) {}
+    void initRange(uint32_t beginid, uint32_t endid) override {
+        SearchIterator::initRange(beginid, endid);
+        if (is_strict) {
+            doSeek(beginid);
+        }
+    }
+    void resetRange() override {
+        SearchIterator::resetRange();
+        pos = 0;
+    }
+    void doSeek(uint32_t docid) override {
+        while ((pos < hits.size()) && (hits[pos] < docid)) {
+            ++pos;
+        }
+        if (is_strict) {
+            if ((pos == hits.size()) || isAtEnd(hits[pos])) {
+                setAtEnd();
+            } else {
+                setDocId(hits[pos]);
+            }
+        } else {
+            if (isAtEnd(docid)) {
+                setAtEnd();
+            } else if ((pos < hits.size()) && (hits[pos] == docid)) {
+                setDocId(docid);
+            }
+        }
+    }
+    void doUnpack(uint32_t) override {}
+    void visitMembers(vespalib::ObjectVisitor &visitor) const {
+        visit(visitor, "hits", hits);
+        visit(visitor, "strict", is_strict);
+    }
+};
+
+struct MyBlueprint : SimpleLeafBlueprint {
+    std::vector hits;
+    MyBlueprint(const std::vector &hits_in)
+        : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in)
+    {
+        setEstimate(HitEstimate(hits.size(), hits.empty()));
+    }
+    MyBlueprint(const std::vector &hits_in, bool allow_termwise_eval)
+        : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in)
+    {
+        setEstimate(HitEstimate(hits.size(), hits.empty()));
+        set_allow_termwise_eval(allow_termwise_eval);
+    }
+    MyBlueprint(const std::vector &hits_in, bool allow_termwise_eval, TermFieldHandle handle)
+        : SimpleLeafBlueprint(FieldSpecBase(my_field, handle)), hits(hits_in)
+    {
+        setEstimate(HitEstimate(hits.size(), hits.empty()));
+        set_allow_termwise_eval(allow_termwise_eval);
+    }
+    SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &,
+                                        bool strict) const override
+    {
+        return SearchIterator::UP(new MyTerm(hits, strict));
+    }
+};
+
+struct MyOr : OrBlueprint {
+    bool use_my_value;
+    bool my_value;
+    MyOr(bool use_my_value_in, bool my_value_in = true)
+        : use_my_value(use_my_value_in), my_value(my_value_in) {}
+    bool supports_termwise_children() const override {
+        if (use_my_value) {
+            return my_value;
+        }
+        // the default value for intermediate blueprints
+        return IntermediateBlueprint::supports_termwise_children();
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+UnpackInfo no_unpack() { return UnpackInfo(); }
+
+UnpackInfo selective_unpack() {
+    UnpackInfo unpack;
+    unpack.add(0); // 'only unpack first child' => trigger selective unpack
+    return unpack;
+}
+
+SearchIterator *TERM(std::initializer_list hits, bool strict) {
+    return new MyTerm(hits, strict);
+}
+
+SearchIterator *ANDNOT(std::initializer_list children, bool strict) {
+    return AndNotSearch::create(children, strict);
+}
+
+SearchIterator *AND(std::initializer_list children, bool strict) {
+    return AndSearch::create(children, strict);
+}
+
+SearchIterator *ANDz(std::initializer_list children, bool strict) {
+    return AndSearch::create(children, strict, no_unpack());
+}
+
+SearchIterator *ANDs(std::initializer_list children, bool strict) {
+    return AndSearch::create(children, strict, selective_unpack());
+}
+
+SearchIterator *OR(std::initializer_list children, bool strict) {
+    return OrSearch::create(children, strict);
+}
+
+SearchIterator *ORz(std::initializer_list children, bool strict) {
+    return OrSearch::create(children, strict, no_unpack());
+}
+
+SearchIterator *ORs(std::initializer_list children, bool strict) {
+    return OrSearch::create(children, strict, selective_unpack());
+}
+
+//-----------------------------------------------------------------------------
+
+template 
+std::unique_ptr UP(T *t) { return std::unique_ptr(t); }
+
+//-----------------------------------------------------------------------------
+
+SearchIterator::UP make_search(bool strict) {
+    return UP(AND({OR({TERM({2,7}, true),
+                       TERM({4,8}, true),
+                       TERM({5,6,9}, true)}, true),
+                   OR({TERM({1,4,7}, false),
+                       TERM({2,5,8}, true),
+                       TERM({3,6}, false)}, false),
+                   OR({TERM({1,2,3}, false),
+                       TERM({4,6}, false),
+                       TERM({8,9}, false)}, false)}, strict));
+}
+
+SearchIterator::UP make_filter_search(bool strict) {
+    return UP(ANDNOT({TERM({1,2,3,4,5,6,7,8,9}, true),
+                      TERM({1,9}, false),
+                      TERM({3,7}, true),
+                      TERM({5}, false)}, strict));
+}
+
+void add_if_inside(uint32_t docid, uint32_t begin, uint32_t end, std::vector &expect) {
+    if (docid >= begin && docid < end) {
+        expect.push_back(docid);
+    }
+}
+
+std::vector make_expect(uint32_t begin, uint32_t end) {
+    std::vector expect;
+    add_if_inside(2, begin, end, expect);
+    add_if_inside(4, begin, end, expect);
+    add_if_inside(6, begin, end, expect);
+    add_if_inside(8, begin, end, expect);
+    return expect;
+}
+
+void verify(const std::vector &expect, SearchIterator &search, uint32_t begin, uint32_t end) {
+    std::vector actual;
+    search.initRange(begin, end);
+    for (uint32_t docid = begin; docid < end; ++docid) {
+        if (search.seek(docid)) {
+            actual.push_back(docid);
+        }
+    }
+    EXPECT_EQUAL(expect, actual);
+}
+
+//-----------------------------------------------------------------------------
+
+MatchData::UP make_match_data() {
+    uint32_t num_features = 0;
+    uint32_t num_handles = 100;
+    uint32_t num_fields = 1;
+    return MatchData::makeTestInstance(num_features, num_handles, num_fields);
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that pseudo term produces correct results") {
+    TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6));
+    TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6));
+    TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6));
+    TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6));
+    TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4));
+    TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4));
+}
+
+TEST("require that normal search gives expected results") {
+    auto search = make_search(true);
+    TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+}
+
+TEST("require that filter search gives expected results") {
+    auto search = make_filter_search(true);
+    TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+}
+
+TEST("require that termwise AND/OR search produces appropriate results") {
+    for (uint32_t begin: {1, 2, 5}) {
+        for (uint32_t end: {6, 7, 10}) {
+            for (bool strict_search: {true, false}) {
+                for (bool strict_wrapper: {true, false}) {
+                    TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+                                    begin, end, strict_search ? "true" : "false",
+                                    strict_wrapper ? "true" : "false").c_str());
+                    auto search = make_termwise(make_search(strict_search), strict_wrapper);
+                    TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+                }
+            }
+        }
+    }
+}
+
+TEST("require that termwise filter search produces appropriate results") {
+    for (uint32_t begin: {1, 2, 5}) {
+        for (uint32_t end: {6, 7, 10}) {
+            for (bool strict_search: {true, false}) {
+                for (bool strict_wrapper: {true, false}) {
+                    TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+                                    begin, end, strict_search ? "true" : "false",
+                                    strict_wrapper ? "true" : "false").c_str());
+                    auto search = make_termwise(make_filter_search(strict_search), strict_wrapper);
+                    TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+                }
+            }
+        }
+    }
+}
+
+TEST("require that termwise ANDNOT with single term works") {
+    TEST_DO(verify({2,3,4}, *make_termwise(UP(ANDNOT({TERM({1,2,3,4,5}, true)}, true)), true), 2, 5));
+}
+
+TEST("require that pseudo term is rewindable") {
+    auto search = UP(TERM({1,2,3,4,5}, true));
+    TEST_DO(verify({3,4,5}, *search, 3, 6));
+    search->resetRange();
+    TEST_DO(verify({1,2,3,4}, *search, 1, 5));
+}
+
+TEST("require that termwise wrapper is rewindable") {
+    auto search = make_termwise(make_search(true), true);
+    TEST_DO(verify(make_expect(3, 7), *search, 3, 7));
+    search->resetRange();
+    TEST_DO(verify(make_expect(1, 5), *search, 1, 5));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that leaf blueprints allow termwise evaluation by default") {
+    MyBlueprint bp({});
+    EXPECT_TRUE(bp.getState().allow_termwise_eval());
+}
+
+TEST("require that leaf blueprints can enable/disable termwise evaluation") {
+    MyBlueprint enable({}, true);
+    MyBlueprint disable({}, false);
+    EXPECT_TRUE(enable.getState().allow_termwise_eval());
+    EXPECT_FALSE(disable.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints disallow termwise evaluation by default") {
+    MyOr bp(false);
+    bp.addChild(UP(new MyBlueprint({}, true)));
+    bp.addChild(UP(new MyBlueprint({}, true)));
+    EXPECT_FALSE(bp.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints can enable/disable termwise evaluation") {
+    MyOr enable(true, true);
+    enable.addChild(UP(new MyBlueprint({}, true)));
+    enable.addChild(UP(new MyBlueprint({}, true)));
+    EXPECT_TRUE(enable.getState().allow_termwise_eval());
+    MyOr disable(true, false);
+    disable.addChild(UP(new MyBlueprint({}, true)));
+    disable.addChild(UP(new MyBlueprint({}, true)));
+    EXPECT_FALSE(disable.getState().allow_termwise_eval());
+}
+
+TEST("require that intermediate blueprints cannot be termwise unless all its children are termwise") {
+    MyOr bp(true, true);
+    bp.addChild(UP(new MyBlueprint({}, true)));
+    bp.addChild(UP(new MyBlueprint({}, false)));
+    EXPECT_FALSE(bp.getState().allow_termwise_eval());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that leafs have tree size 1") {
+    MyBlueprint bp({});
+    EXPECT_EQUAL(1u, bp.getState().tree_size());    
+}
+
+TEST("require that tree size is accumulated correctly by intermediate nodes") {
+    MyOr bp(false);
+    EXPECT_EQUAL(1u, bp.getState().tree_size());    
+    bp.addChild(UP(new MyBlueprint({})));
+    bp.addChild(UP(new MyBlueprint({})));
+    EXPECT_EQUAL(3u, bp.getState().tree_size());
+    auto child = UP(new MyOr(false));
+    child->addChild(UP(new MyBlueprint({})));
+    child->addChild(UP(new MyBlueprint({})));
+    bp.addChild(std::move(child));
+    EXPECT_EQUAL(6u, bp.getState().tree_size());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that any blueprint node can obtain the root") {
+    MyOr bp(false);
+    bp.addChild(UP(new MyBlueprint({1,2,3})));
+    bp.addChild(UP(new MyBlueprint({1,2,3,4,5,6})));
+    EXPECT_TRUE(&bp != &bp.getChild(0));
+    EXPECT_TRUE(&bp != &bp.getChild(1));
+    EXPECT_TRUE(&bp == &bp.getChild(0).root());
+    EXPECT_TRUE(&bp == &bp.getChild(1).root());
+    EXPECT_TRUE(&bp == &bp.root());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that match data keeps track of the termwise limit") {
+    auto md = make_match_data();
+    EXPECT_EQUAL(1.0, md->get_termwise_limit());
+    md->set_termwise_limit(0.03);
+    EXPECT_EQUAL(0.03, md->get_termwise_limit());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that terwise test search string dump is detailed enough") {
+    EXPECT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+                 make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString());
+
+    EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+                     make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true)}, true)), true)->asString());
+
+    EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+                     make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, false)), true)->asString());
+
+    EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+                     make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), false)->asString());
+
+    EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(),
+                     make_termwise(UP(OR({TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true)}, true)), true)->asString());
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that basic termwise evaluation works") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString());
+    }
+}
+
+TEST("require that the hit rate must be high enough for termwise evaluation to be activated") {
+    auto md = make_match_data();
+    md->set_termwise_limit(1.0); // <-
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    for (bool strict: {true, false}) {
+        EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos);
+    }
+}
+
+TEST("require that enough unranked termwise terms are present for termwise evaluation to be activated") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // <- not termwise
+    my_or.addChild(UP(new MyBlueprint({3}, true, 3)));  // <- ranked
+    for (bool strict: {true, false}) {
+        EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos);
+    }
+}
+
+TEST("require that termwise evaluation can be multi-level, but not duplicated") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));    
+    auto child = UP(new OrBlueprint());
+    child->addChild(UP(new MyBlueprint({2}, true, 2)));
+    child->addChild(UP(new MyBlueprint({3}, true, 3)));
+    my_or.addChild(std::move(child));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     make_termwise(UP(OR({TERM({1}, strict), ORz({TERM({2}, strict), TERM({3}, strict)}, strict)}, strict)), strict)->asString());
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that OR can be completely termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString());
+    }
+}
+
+TEST("require that OR can be partially termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     UP(ORs({make_termwise(UP(OR({TERM({1}, strict), TERM({3}, strict)}, strict)), strict).release(), TERM({2}, strict)}, strict))->asString());
+    }
+}
+
+TEST("require that OR puts termwise subquery at the right place") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(2)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     UP(ORs({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString());
+    }
+}
+
+TEST("require that OR can use termwise eval also when having non-termwise children") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, false, 1)));
+    my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
+                     UP(ORz({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString());
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that AND can be completely termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    AndBlueprint my_and;
+    my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+                     make_termwise(UP(AND({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString());
+    }
+}
+
+TEST("require that AND can be partially termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    AndBlueprint my_and;
+    my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+                     UP(ANDs({make_termwise(UP(AND({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString());
+    }
+}
+
+TEST("require that AND puts termwise subquery at the right place") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(2)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    AndBlueprint my_and;
+    my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+                     UP(ANDs({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+    }
+}
+
+TEST("require that AND can use termwise eval also when having non-termwise children") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    md->resolveTermField(2)->tagAsNotNeeded();
+    md->resolveTermField(3)->tagAsNotNeeded();
+    AndBlueprint my_and;
+    my_and.addChild(UP(new MyBlueprint({1}, false, 1)));
+    my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+                     UP(ANDz({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that ANDNOT can be completely termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    AndNotBlueprint my_andnot;
+    my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+                     make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString());
+    }
+}
+
+TEST("require that ANDNOT can be partially termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    AndNotBlueprint my_andnot;
+    my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
+    my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+                     UP(ANDNOT({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString());
+    }
+}
+
+TEST("require that ANDNOT can be partially termwise with first child being termwise") {
+    auto md = make_match_data();
+    md->set_termwise_limit(0.0);
+    md->resolveTermField(1)->tagAsNotNeeded();
+    AndNotBlueprint my_andnot;
+    my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
+    my_andnot.addChild(UP(new MyBlueprint({2}, false, 2)));
+    my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
+    for (bool strict: {true, false}) {
+        EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+                     UP(ANDNOT({make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString());
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+TEST("require that termwise blueprint helper calculates unpack info correctly") {
+    OrBlueprint my_or;
+    my_or.addChild(UP(new MyBlueprint({1}, false, 1))); // termwise not allowed
+    my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // termwise not allowed and ranked
+    my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
+    my_or.addChild(UP(new MyBlueprint({4}, true, 4))); // ranked
+    my_or.addChild(UP(new MyBlueprint({5}, true, 5)));
+    MultiSearch::Children dummy_searches(5, nullptr);
+    UnpackInfo unpack; // non-termwise unpack info
+    unpack.add(1);
+    unpack.add(3);
+    TermwiseBlueprintHelper helper(my_or, dummy_searches, unpack);
+    EXPECT_EQUAL(helper.children.size(), 3u);
+    EXPECT_EQUAL(helper.termwise.size(), 2u);
+    EXPECT_EQUAL(helper.first_termwise, 2u);
+    EXPECT_TRUE(!helper.termwise_unpack.needUnpack(0));
+    EXPECT_TRUE(helper.termwise_unpack.needUnpack(1));
+    EXPECT_TRUE(!helper.termwise_unpack.needUnpack(2));
+    EXPECT_TRUE(helper.termwise_unpack.needUnpack(3));
+    EXPECT_TRUE(!helper.termwise_unpack.needUnpack(4));
+    EXPECT_TRUE(!helper.termwise_unpack.needUnpack(5));
+}
+
+TEST("test that init range works for terwise too.") {
+    search::test::InitRangeVerifier ir;
+    ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), false), false));
+    ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), true), true));
+}
+
+//-----------------------------------------------------------------------------
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/.gitignore b/searchlib/src/tests/queryeval/weak_and/.gitignore
new file mode 100644
index 00000000000..5bbecb89249
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/.gitignore
@@ -0,0 +1,7 @@
+/weak_and_bench
+/weak_and_test_expensive
+/parallel_weak_and_bench
+searchlib_weak_and_test_app
+searchlib_parallel_weak_and_bench_app
+searchlib_weak_and_bench_app
+searchlib_weak_and_test_expensive_app
diff --git a/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt
new file mode 100644
index 00000000000..b3839ac75f3
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_test_app
+    SOURCES
+    weak_and_test.cpp
+    DEPENDS
+    searchlib
+    searchlib_test
+)
+vespa_add_test(NAME searchlib_weak_and_test_app COMMAND searchlib_weak_and_test_app)
+vespa_add_executable(searchlib_weak_and_test_expensive_app
+    SOURCES
+    weak_and_test_expensive.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app NO_VALGRIND COMMAND searchlib_weak_and_test_expensive_app)
+vespa_add_executable(searchlib_weak_and_bench_app
+    SOURCES
+    weak_and_bench.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_weak_and_bench_app BENCHMARK)
+vespa_add_executable(searchlib_parallel_weak_and_bench_app
+    SOURCES
+    parallel_weak_and_bench.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_parallel_weak_and_bench_app BENCHMARK)
diff --git a/searchlib/src/tests/queryeval/weak_and/FILES b/searchlib/src/tests/queryeval/weak_and/FILES
new file mode 100644
index 00000000000..972727bfa00
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/FILES
@@ -0,0 +1,2 @@
+weak_and_test.cpp
+weak_and_bench.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp
new file mode 100644
index 00000000000..8e5b7fc7b85
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "wand_bench_setup.hpp"
+
+TEST_FF("benchmark", VespaParallelWandFactory(1000),  WandSetup(f1,    10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1,    10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaParallelWandFactory(1000),  WandSetup(f1,   100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1,   100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaParallelWandFactory(1000),  WandSetup(f1,  1000, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1,  1000, 10000000)) { f2.benchmark(); }
+
+TEST_FFF("benchmark", VespaParallelWandFactory(1000),  FilterFactory(f1, 2), WandSetup(f2,    10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,    10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaParallelWandFactory(1000),  FilterFactory(f1, 2), WandSetup(f2,   100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,   100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaParallelWandFactory(1000),  FilterFactory(f1, 2), WandSetup(f2,  1000, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,  1000, 10000000)) { f3.benchmark(); }
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h
new file mode 100644
index 00000000000..f130f0d1012
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h
@@ -0,0 +1,132 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+using search::queryeval::wand::DotProductScorer;
+using search::queryeval::wand::TermFrequencyScorer;
+using namespace search::queryeval;
+
+namespace rise {
+
+struct TermFreqScorer
+{
+    static int64_t calculateMaxScore(const wand::Term &term) {
+        return TermFrequencyScorer::calculateMaxScore(term);
+    }
+    static int64_t calculateScore(const wand::Term &term, uint32_t docId) {
+        term.search->unpack(docId);
+        return term.maxScore;
+    }
+};
+
+template 
+class RiseWand : public search::queryeval::SearchIterator
+{
+public:
+    typedef uint32_t docid_t;
+    typedef uint64_t score_t;
+    typedef search::queryeval::wand::Terms Terms;
+    typedef search::queryeval::SearchIterator *PostingStreamPtr;
+
+private:
+    // comparator class that compares two streams. The variables a and b are
+    // logically indices into the streams vector.
+    class StreamComparator : public std::binary_function
+    {
+    private:
+        const docid_t *_streamDocIds;
+        //const addr_t *const *_streamPayloads;
+
+    public:
+        StreamComparator(const docid_t *streamDocIds);
+        //const addr_t *const *streamPayloads);
+        inline bool operator()(const uint16_t a, const uint16_t b);
+    };
+
+    // number of streams present in the query
+    uint32_t _numStreams;
+
+    // we own our substreams
+    std::vector _streams;
+
+    size_t _lastPivotIdx;
+
+    // array of current doc ids for the various streams
+    docid_t *_streamDocIds;
+
+    // two arrays of indices into the _streams vector. This is used for merge.
+    // inplace_merge is not as efficient as the copy merge.
+    uint16_t *_streamIndices;
+    uint16_t *_streamIndicesAux;
+
+    // comparator that compares two streams
+    StreamComparator _streamComparator;
+
+    //-------------------------------------------------------------------------
+    // variables used for scoring and pruning
+
+    size_t                           _n;
+    score_t                          _limit;
+    score_t                         *_streamScores;
+    vespalib::PriorityQueue _scores;
+    Terms                            _terms;
+
+    //-------------------------------------------------------------------------
+
+    /**
+     * Find the pivot feature index
+     *
+     * @param threshold  score threshold
+     * @param pivotIdx   pivot index
+     *
+     * @return  whether a valid pivot index is found
+     */
+    bool _findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx);
+
+    /**
+     * let the first numStreamsToMove streams in the stream
+     * vector move to the next doc, and sort them.
+     *
+     * @param numStreamsToMove  the number of streams that should move
+     */
+    void _moveStreamsAndSort(const uint32_t numStreamsToMove);
+
+    /**
+     * let the first numStreamsToMove streams in the stream
+     * vector move to desiredDocId or to the first docId greater than
+     * desiredDocId if desiredDocId does not exist in this stream,
+     * and sort them.
+     *
+     * @param numStreamsToMove  the number of streams that should move
+     * @param desiredDocId  desired doc id
+     *
+     */
+    void _moveStreamsToDocAndSort(const uint32_t numStreamsToMove, const docid_t desiredDocId);
+
+    /**
+     * do sort and merge for WAND
+     *
+     * @param numStreamsToSort  the number of streams (starting from the first one) should
+     *                                           be sorted and then merge sort with the rest
+     *
+     */
+    void _sortMerge(const uint32_t numStreamsToSort);
+
+public:
+    RiseWand(const Terms &terms, uint32_t n);
+    virtual ~RiseWand();
+    void next();
+    virtual void doSeek(uint32_t docid);
+    virtual void doUnpack(uint32_t docid);
+};
+
+typedef RiseWand > TermFrequencyRiseWand;
+typedef RiseWand > DotProductRiseWand;
+
+} // namespacve rise
+
diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp
new file mode 100644
index 00000000000..02420e6c35d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp
@@ -0,0 +1,238 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include "rise_wand.h"
+#include 
+#include 
+#include 
+
+using search::queryeval::wand::TermFrequencyScorer;
+
+namespace rise {
+
+template 
+RiseWand::RiseWand(const Terms &terms, uint32_t n)
+    : _numStreams(0),
+      _streams(),
+      _lastPivotIdx(0),
+      _streamDocIds(new docid_t[terms.size()]),
+      _streamIndices(new uint16_t[terms.size()]),
+      _streamIndicesAux(new uint16_t[terms.size()]),
+      _streamComparator(_streamDocIds),
+      _n(n),
+      _limit(1),
+      _streamScores(new score_t[terms.size()]),
+      _scores(),
+      _terms(terms)
+{
+    for (size_t i = 0; i < terms.size(); ++i) {
+        _terms[i].maxScore = Scorer::calculateMaxScore(terms[i]);
+        _streamScores[i] = _terms[i].maxScore;
+        _streams.push_back(terms[i].search);
+    }
+    _numStreams = _streams.size();
+    if (_numStreams == 0) {
+        setAtEnd();
+    }
+    for (uint32_t i=0; i<_numStreams; ++i) {
+        _streamIndices[i] = i;
+    }
+    for (uint32_t i=0; i<_numStreams; ++i) {
+        _streamDocIds[i] = _streams[i]->getDocId();
+    }
+    std::sort(_streamIndices, _streamIndices+_numStreams, _streamComparator);
+}
+
+template 
+RiseWand::~RiseWand()
+{
+    for (size_t i = 0; i < _streams.size(); ++i) {
+        delete _streams[i];
+    }
+    delete [] _streamScores;
+    delete [] _streamIndicesAux;
+    delete [] _streamIndices;
+    delete [] _streamDocIds;
+}
+
+template 
+void
+RiseWand::next()
+{
+
+    // We do not check whether the stream is already at the end
+    // here based on the assumption that application won't call
+    // next() for streams that are already at the end, or atleast
+    // won't do this frequently.
+
+    uint32_t pivotIdx;
+    docid_t pivotDocId = search::endDocId;
+    score_t threshold = _limit;
+
+    while (true) {
+
+        if (!_findPivotFeatureIdx(threshold, pivotIdx)) {
+            setAtEnd();
+            return;
+        }
+
+        pivotDocId = _streamDocIds[_streamIndices[pivotIdx]];
+
+        if (_streamDocIds[_streamIndices[0]] == _streamDocIds[_streamIndices[pivotIdx]]) {
+
+            // Found candidate. All cursors before (*_streams)[pivotIdx] point to
+            // the same doc and this doc is the candidate for full evaluation.
+            setDocId(pivotDocId);
+
+            // Advance pivotIdx sufficiently so that all instances of pivotDocId are included
+            while (pivotIdx < _numStreams-1 && _streamDocIds[_streamIndices[pivotIdx+1]] == pivotDocId) {
+                ++pivotIdx;
+            }
+
+            _lastPivotIdx = pivotIdx;
+            return; // scoring and threshold adjustment is done in doUnpack
+
+        } else { // not all cursors upto the pivot are aligned at the same doc yet
+
+            // decreases pivotIdx to the first stream pointing at the pivotDocId
+            while (pivotIdx && _streamDocIds[_streamIndices[pivotIdx-1]] == pivotDocId) {
+                --pivotIdx;
+            }
+
+            _moveStreamsToDocAndSort(pivotIdx, pivotDocId);
+        }
+
+    }  /* while (true) */
+}
+
+template 
+bool
+RiseWand::_findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx)
+{
+    uint32_t idx;
+    score_t accumUB = 0;
+    for (idx=0;
+         !Cmp()(accumUB, threshold) && idx < _numStreams;
+         ++idx) {
+        accumUB += _streamScores[_streamIndices[idx]];
+    }
+
+    if( Cmp()(accumUB, threshold) ) {
+        pivotIdx = idx - 1;
+        return true;
+    }
+    return false;
+}
+
+template 
+void
+RiseWand::_moveStreamsAndSort(const uint32_t numStreamsToMove)
+{
+    for (uint32_t i=0; iseek(_streams[_streamIndices[i]]->getDocId() + 1);
+        _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId();
+    }
+    _sortMerge(numStreamsToMove);
+}
+
+template 
+void
+RiseWand::_moveStreamsToDocAndSort(const uint32_t numStreamsToMove,
+                                   const docid_t desiredDocId)
+{
+    for (uint32_t i=0; iseek(desiredDocId);
+        _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId();
+    }
+    _sortMerge(numStreamsToMove);
+}
+
+template 
+inline
+void RiseWand::_sortMerge(const uint32_t numStreamsToMove)
+{
+    for (uint32_t i=0; i
+void
+RiseWand::doSeek(uint32_t docid)
+{
+    if (getDocId() != beginId() && (docid - 1) == getDocId()) {
+        _moveStreamsAndSort(_lastPivotIdx + 1);
+    } else {
+        _moveStreamsToDocAndSort(_numStreams, docid);
+    }
+    next();
+}
+
+template 
+void
+RiseWand::doUnpack(uint32_t docid)
+{
+    score_t score = 0;
+    for (size_t i = 0; i <= _lastPivotIdx; ++i) {
+        score += Scorer::calculateScore(_terms[_streamIndices[i]], docid);
+    }
+    if (_scores.size() < _n || _scores.front() < score) {
+        _scores.push(score);
+        if (_scores.size() > _n) {
+            _scores.pop_front();
+        }
+        if (_scores.size() == _n) {
+            _limit = _scores.front();
+        }
+    }
+}
+
+/**
+ ************ BEGIN STREAM COMPARTOR *********************
+ */
+template 
+RiseWand::StreamComparator::StreamComparator(
+        const docid_t *streamDocIds)
+    : _streamDocIds(streamDocIds)
+{
+}
+
+template 
+inline bool
+RiseWand::StreamComparator::operator()(const uint16_t a,
+                                       const uint16_t b)
+{
+    if (_streamDocIds[a] < _streamDocIds[b]) return true;
+    return false;
+}
+
+/**
+ ************ END STREAM COMPARTOR *********************
+ */
+
+} // namespace rise
+
diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
new file mode 100644
index 00000000000..4c7116edfc4
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp
@@ -0,0 +1,248 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "rise_wand.h"
+#include "rise_wand.hpp"
+
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace vespalib;
+
+typedef ParallelWeakAndSearch::MatchParams PWMatchParams;
+typedef ParallelWeakAndSearch::RankParams PWRankParams;
+
+namespace {
+
+struct Stats {
+    size_t hitCnt;
+    size_t seekCnt;
+    size_t unpackCnt;
+    size_t skippedDocs;
+    size_t skippedHits;
+    Stats() : hitCnt(0), seekCnt(0), unpackCnt(0),
+              skippedDocs(0), skippedHits(0) {}
+    void hit() {
+        ++hitCnt;
+    }
+    void seek(size_t docs, size_t hits) {
+        ++seekCnt;
+        skippedDocs += docs;
+        skippedHits += hits;
+    }
+    void unpack() {
+        ++unpackCnt;
+    }
+    void print() {
+        fprintf(stderr, "Stats: hits=%zu, seeks=%zu, unpacks=%zu, skippedDocs=%zu, skippedHits=%zu\n",
+                hitCnt, seekCnt, unpackCnt, skippedDocs, skippedHits);
+    }
+};
+
+struct ModSearch : SearchIterator {
+    Stats   &stats;
+    uint32_t step;
+    uint32_t limit;
+    MinMaxPostingInfo info;
+    TermFieldMatchData *tfmd;
+    ModSearch(Stats &stats_in, uint32_t step_in, uint32_t limit_in, int32_t maxWeight, TermFieldMatchData *tfmd_in)
+        : stats(stats_in), step(step_in), limit(limit_in), info(0, maxWeight), tfmd(tfmd_in) { }
+    void initRange(uint32_t begin, uint32_t end) override {
+        SearchIterator::initRange(begin, end);
+        setDocId(step);
+    }
+    virtual void doSeek(uint32_t docid) {
+        assert(docid > getDocId());
+        uint32_t skippedDocs = (docid - getDocId() - 1);
+        uint32_t skippedHits = (skippedDocs / step);
+        stats.seek(skippedDocs, skippedHits);
+        uint32_t hit = (docid / step) * step;
+        if (hit < docid) {
+            hit += step;
+        }
+        if (hit < limit) {
+            assert(hit >= docid);
+            setDocId(hit);
+        } else {
+            setAtEnd();
+        }
+    }
+    virtual void doUnpack(uint32_t docid) {
+        if (tfmd != NULL) {
+            tfmd->reset(docid);
+            search::fef::TermFieldMatchDataPosition pos;
+            pos.setElementWeight(info.getMaxWeight());
+            tfmd->appendPosition(pos);
+        }
+        stats.unpack();
+    }
+    virtual const PostingInfo *getPostingInfo() const { return &info; }
+};
+
+struct WandFactory {
+    virtual std::string name() const = 0;
+    virtual SearchIterator::UP create(const wand::Terms &terms) = 0;
+    virtual ~WandFactory() {}
+};
+
+struct VespaWandFactory : WandFactory {
+    uint32_t n;
+    VespaWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const { return make_string("VESPA WAND (n=%u)", n); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(WeakAndSearch::create(terms, n, true));
+    }
+};
+
+struct VespaArrayWandFactory : WandFactory {
+    uint32_t n;
+    VespaArrayWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const { return make_string("VESPA ARRAY WAND (n=%u)", n); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true));
+    }
+};
+
+struct VespaHeapWandFactory : WandFactory {
+    uint32_t n;
+    VespaHeapWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const { return make_string("VESPA HEAP WAND (n=%u)", n); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true));
+    }
+};
+
+struct VespaParallelWandFactory : public WandFactory {
+    SharedWeakAndPriorityQueue scores;
+    TermFieldMatchData rootMatchData;
+    VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {}
+    virtual std::string name() const { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(ParallelWeakAndSearch::create(terms,
+                        PWMatchParams(scores, 0, 1, 1),
+                        PWRankParams(rootMatchData, MatchData::UP()), true));
+    }
+};
+
+struct VespaParallelArrayWandFactory : public VespaParallelWandFactory {
+    VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+    virtual std::string name() const { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms,
+                        PWMatchParams(scores, 0, 1, 1),
+                        PWRankParams(rootMatchData, MatchData::UP()), true));
+    }
+};
+
+struct VespaParallelHeapWandFactory : public VespaParallelWandFactory {
+    VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {}
+    virtual std::string name() const { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms,
+                        PWMatchParams(scores, 0, 1, 1),
+                        PWRankParams(rootMatchData, MatchData::UP()), true));
+    }
+};
+
+struct TermFrequencyRiseWandFactory : WandFactory {
+    uint32_t n;
+    TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const { return make_string("RISE WAND TF (n=%u)", n); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n));
+    }
+};
+
+struct DotProductRiseWandFactory : WandFactory {
+    uint32_t n;
+    DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {}
+    virtual std::string name() const { return make_string("RISE WAND DP (n=%u)", n); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        return SearchIterator::UP(new rise::DotProductRiseWand(terms, n));
+    }
+};
+
+struct FilterFactory : WandFactory {
+    WandFactory &factory;
+    Stats stats;
+    uint32_t n;
+    FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {}
+    virtual std::string name() const { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); }
+    virtual SearchIterator::UP create(const wand::Terms &terms) {
+        AndNotSearch::Children children;
+        children.push_back(factory.create(terms).release());
+        children.push_back(new ModSearch(stats, n, search::endDocId, n, NULL));
+        return SearchIterator::UP(AndNotSearch::create(children, true));
+    }
+};
+
+struct Setup {
+    Stats    stats;
+    double   minTimeMs;
+    Setup() : stats(), minTimeMs(10000000.0) {}
+    virtual ~Setup() {}
+    virtual std::string name() const = 0;
+    virtual SearchIterator::UP create() = 0;
+    void perform() {
+        SearchIterator::UP search = create();
+        SearchIterator &sb = *search;
+        FastOS_Time timer;
+        timer.SetNow();
+        for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+            stats.hit();
+            sb.unpack(sb.getDocId());
+        }
+        double ms = timer.MilliSecsToNow();
+        if (ms < minTimeMs) {
+            minTimeMs = ms;
+        }
+    }
+    void benchmark() {
+        fprintf(stderr, "running benchmark for %s...\n", name().c_str());
+        for (size_t i = 0; i < 5; ++i) {
+            perform();
+            if (i == 0) {
+                stats.print();
+            }
+        }
+        fprintf(stderr, "time (ms): %g\n", minTimeMs);
+    }
+};
+
+struct WandSetup : Setup {
+    WandFactory &factory;
+    uint32_t childCnt;
+    uint32_t limit;
+    uint32_t weight;
+    MatchData::UP matchData;
+    WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {}
+    virtual std::string name() const {
+        return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str());
+    }
+    virtual SearchIterator::UP create() {
+        MatchDataLayout layout;
+        std::vector handles;
+        for (size_t i = 0; i < childCnt; ++i) {
+            handles.push_back(layout.allocTermField(0));
+        }
+        matchData = layout.createMatchData();
+        wand::Terms terms;
+        for (size_t i = 1; i <= childCnt; ++i) {
+            TermFieldMatchData *tfmd = matchData->resolveTermField(handles[i-1]);
+            terms.push_back(wand::Term(new ModSearch(stats, i, limit, i, tfmd), weight, limit / i, tfmd));
+        }
+        return factory.create(terms);
+    }
+};
+
+} // namespace 
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp
new file mode 100644
index 00000000000..1eba66a524f
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "wand_bench_setup.hpp"
+
+TEST_FF("benchmark", VespaWandFactory(1000),             WandSetup(f1,    10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1,    10, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaWandFactory(1000),             WandSetup(f1,   100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1,   100, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", VespaWandFactory(1000),             WandSetup(f1,  1000, 10000000)) { f2.benchmark(); }
+TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1,  1000, 10000000)) { f2.benchmark(); }
+
+TEST_FFF("benchmark", VespaWandFactory(1000),             FilterFactory(f1, 2), WandSetup(f2,    10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,    10, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaWandFactory(1000),             FilterFactory(f1, 2), WandSetup(f2,   100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,   100, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", VespaWandFactory(1000),             FilterFactory(f1, 2), WandSetup(f2,  1000, 10000000)) { f3.benchmark(); }
+TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2,  1000, 10000000)) { f3.benchmark(); }
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
new file mode 100644
index 00000000000..3c64db1eb84
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
@@ -0,0 +1,128 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace search::queryeval::test;
+using search::test::InitRangeVerifier;
+
+typedef SearchHistory History;
+
+namespace {
+
+struct MyWandSpec : public WandSpec
+{
+    uint32_t n;
+
+    MyWandSpec(uint32_t n_) : WandSpec(), n(n_) {}
+    SearchIterator *create() {
+        return new TrackedSearch("WAND", getHistory(), WeakAndSearch::create(getTerms(), n, true));
+    }
+};
+
+struct SimpleWandFixture {
+    MyWandSpec   spec;
+    SimpleResult hits;
+    SimpleWandFixture() : spec(2), hits() {
+        spec.leaf(LeafSpec("foo").doc(1).doc(2).doc(3).doc(4).doc(5).doc(6));
+        spec.leaf(LeafSpec("bar").doc(1).doc(3).doc(5));
+        SearchIterator::UP search(spec.create());
+        hits.search(*search);
+    }
+};
+
+struct AdvancedWandFixture {
+    MyWandSpec   spec;
+    SimpleResult hits;
+    AdvancedWandFixture() : spec(100), hits() {
+        spec.leaf(LeafSpec("1").doc(1).doc(11).doc(111));
+        spec.leaf(LeafSpec("2").doc(2).doc(12).doc(112));
+        spec.leaf(LeafSpec("3").doc(3).doc(13).doc(113));
+        spec.leaf(LeafSpec("4").doc(4).doc(14).doc(114));
+        spec.leaf(LeafSpec("5").doc(5).doc(15).doc(115));
+        SearchIterator::UP search(spec.create());
+        hits.search(*search);
+    }
+};
+
+struct WeightOrder {
+    bool operator()(const wand::Term &t1, const wand::Term &t2) const {
+        return (t1.weight < t2.weight);
+    }
+};
+
+} // namespace 
+
+TEST_F("require that wand prunes bad hits after enough good ones are obtained", SimpleWandFixture) {
+    EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits);
+}
+
+TEST_F("require that wand uses subsearches as expected", SimpleWandFixture) {
+    EXPECT_EQUAL(History()
+                 .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1)
+                 .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1)
+                 .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2)
+                 .unpack("WAND", 2).unpack("foo", 2)
+                 .seek("WAND", 3).step("WAND", 3)
+                 .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3)
+                 .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5)
+                 .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5)
+                 .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId),
+                 f.spec.getHistory());
+}
+
+TEST_F("require that documents are considered in the right order", AdvancedWandFixture) {
+    EXPECT_EQUAL(SimpleResult()
+                 .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
+                 .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
+                 .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits);
+}
+
+TEST("require that initial docid for subsearches are taken into account") {
+    History history;
+    wand::Terms terms;
+    terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1));
+    terms.push_back(wand::Term(new TrackedSearch("bar", history, new EagerChild(10)), 100, 2));
+    SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true)));
+    SimpleResult hits;
+    hits.search(*search);
+    EXPECT_EQUAL(SimpleResult().addHit(10), hits);
+    EXPECT_EQUAL(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10)
+                 .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId),
+                 history);
+}
+
+TEST("verify initRange with search iterator children") {
+    const size_t num_children = 7;
+    InitRangeVerifier ir;
+    using DocIds = InitRangeVerifier::DocIds;
+    std::vector split_lists(num_children);
+    auto full_list = ir.getExpectedDocIds();
+    for (size_t i = 0; i < full_list.size(); ++i) {
+        split_lists[i % num_children].push_back(full_list[i]);
+    }
+    for (bool strict: {false, true}) {
+        wand::Terms terms;
+        for (size_t i = 0; i < num_children; ++i) {
+            terms.emplace_back(ir.createIterator(split_lists[i], strict).release(),
+                               100, split_lists[i].size());
+        }
+        SearchIterator::UP itr(WeakAndSearch::create(terms, -1, strict));
+        ir.verify(*itr);
+    }
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
new file mode 100644
index 00000000000..8f60b6dd8c7
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp
@@ -0,0 +1,102 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "wand_bench_setup.hpp"
+
+using namespace rise;
+
+namespace {
+
+template 
+void checkWandHits(WandFactory &vespa, WandFactory &rise, uint32_t step, uint32_t filter) {
+    WandSetup vespaSetup(vespa, 500, 5000000);
+    WandSetup riseSetup(rise, 500, 5000000);
+    SearchIterator::UP s1 = vespaSetup.create();
+    s1->initFullRange();
+    SearchIterator::UP s2 = riseSetup.create();
+    s2->initFullRange();
+    ASSERT_TRUE(dynamic_cast(s1.get()) != 0);
+    ASSERT_TRUE(dynamic_cast(s2.get()) == 0);
+    ASSERT_TRUE(dynamic_cast(s2.get()) != 0);
+    ASSERT_TRUE(dynamic_cast(s1.get()) == 0);
+    s1->seek(1);
+    s2->seek(1);
+    while (!s1->isAtEnd() &&
+           !s2->isAtEnd())
+    {
+        ASSERT_EQUAL(s1->getDocId(), s2->getDocId());
+        if ((filter == 0) || ((s1->getDocId() % filter) != 0)) {
+            s1->unpack(s1->getDocId());
+            s2->unpack(s2->getDocId());
+        }
+        s1->seek(s1->getDocId() + step);
+        s2->seek(s2->getDocId() + step);
+    }
+    ASSERT_TRUE(s1->isAtEnd());
+    ASSERT_TRUE(s2->isAtEnd());
+}
+
+} // namespace 
+
+TEST("require that mod search works") {
+    Stats stats;
+    SearchIterator::UP search(new ModSearch(stats, 3, 8, 3, NULL));
+    SimpleResult hits;
+    hits.search(*search);
+    EXPECT_EQUAL(SimpleResult().addHit(3).addHit(6), hits);
+}
+
+//---- WeakAndSearch ------------------------------------------------------------------------------
+
+TEST_FF("require that (array) WAND and RISE WAND gives the same hits",
+        VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (heap) WAND and RISE WAND gives the same hits",
+        VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (array) WAND and RISE WAND gives the same hits with filtering and skipping",
+        VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 123, 5);
+}
+
+TEST_FF("require that (heap) WAND and RISE WAND gives the same hits with filtering and skipping",
+        VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 123, 5);
+}
+
+
+//---- ParallelWeakAndSearch ----------------------------------------------------------------------
+
+TEST_FF("require that (array) PWAND and RISE WAND gives the same hits",
+        VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits",
+        VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 1, 0);
+}
+
+TEST_FF("require that (array) PWAND and RISE WAND gives the same hits with filtering and skipping",
+        VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 123, 5);
+}
+
+TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits with filtering and skipping",
+        VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500))
+{
+    checkWandHits(f1, f2, 123, 5);
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/.gitignore b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore
new file mode 100644
index 00000000000..b10f1cb370d
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore
@@ -0,0 +1 @@
+searchlib_weak_and_heap_test_app
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt
new file mode 100644
index 00000000000..cacf4987aff
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_heap_test_app
+    SOURCES
+    weak_and_heap_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_heap_test_app COMMAND searchlib_weak_and_heap_test_app)
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/DESC b/searchlib/src/tests/queryeval/weak_and_heap/DESC
new file mode 100644
index 00000000000..447bfc21e7c
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/DESC
@@ -0,0 +1 @@
+weak_and_heap test. Take a look at weak_and_heap_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/FILES b/searchlib/src/tests/queryeval/weak_and_heap/FILES
new file mode 100644
index 00000000000..05d3f4c5df0
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/FILES
@@ -0,0 +1 @@
+weak_and_heap_test.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp
new file mode 100644
index 00000000000..ee44abf2b27
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp
@@ -0,0 +1,101 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+
+using namespace search::queryeval;
+typedef wand::score_t score_t;
+
+struct Scores : public std::vector {
+    Scores &s(score_t val) {
+        push_back(val);
+        return *this;
+    }
+};
+
+void
+adjust(WeakAndHeap &heap, const Scores &scores) 
+{
+    Scores tmp = scores;
+    heap.adjust(&tmp[0], &tmp[0] + tmp.size());
+}
+
+void
+assertScores(const Scores &exp, SharedWeakAndPriorityQueue &heap)
+{
+    ASSERT_EQUAL(exp.size(), heap.getScores().size());
+    for (size_t i = 0; i < exp.size(); ++i) {
+        score_t front = heap.getScores().front();
+        EXPECT_EQUAL(exp[i], front);
+        heap.getScores().pop_front();
+    }
+}
+
+struct NullFixture {
+    SharedWeakAndPriorityQueue h;
+    NullFixture() : h(0) {}
+};
+
+struct EmptyFixture {
+    SharedWeakAndPriorityQueue h;
+    EmptyFixture() : h(4) {}
+};
+
+struct FilledFixture {
+    SharedWeakAndPriorityQueue h;
+    FilledFixture() : h(4) {
+        adjust(h, Scores().s(3).s(5).s(7).s(9));
+        EXPECT_EQUAL(3, h.getMinScore());
+    }
+};
+
+TEST_F("require that SharedWeakAndPriorityQueue with 0 size gives max threshold", NullFixture)
+{
+    EXPECT_EQUAL(std::numeric_limits::max(), f.h.getMinScore());
+    adjust(f.h, Scores().s(100));
+    EXPECT_EQUAL(std::numeric_limits::max(), f.h.getMinScore());
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be filled one-by-one", EmptyFixture)
+{
+    adjust(f.h, Scores().s(4));
+    EXPECT_EQUAL(0, f.h.getMinScore());
+    adjust(f.h, Scores().s(3));
+    EXPECT_EQUAL(0, f.h.getMinScore());
+    adjust(f.h, Scores().s(2));
+    EXPECT_EQUAL(0, f.h.getMinScore());
+    adjust(f.h, Scores().s(1));
+    EXPECT_EQUAL(1, f.h.getMinScore());
+    assertScores(Scores().s(1).s(2).s(3).s(4), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be filled all-at-once", EmptyFixture)
+{
+    adjust(f.h, Scores().s(4).s(3).s(2).s(1));
+    EXPECT_EQUAL(1, f.h.getMinScore());
+    assertScores(Scores().s(1).s(2).s(3).s(4), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be adjusted one-by-one", FilledFixture)
+{
+    adjust(f.h, Scores().s(2));
+    EXPECT_EQUAL(3, f.h.getMinScore());
+    adjust(f.h, Scores().s(3));
+    EXPECT_EQUAL(3, f.h.getMinScore());
+    adjust(f.h, Scores().s(6));
+    EXPECT_EQUAL(5, f.h.getMinScore());
+    adjust(f.h, Scores().s(8));
+    EXPECT_EQUAL(6, f.h.getMinScore());
+    adjust(f.h, Scores().s(4));
+    EXPECT_EQUAL(6, f.h.getMinScore());
+    assertScores(Scores().s(6).s(7).s(8).s(9), f.h);
+}
+
+TEST_F("require that SharedWeakAndPriorityQueue can be adjusted all-at-once", FilledFixture)
+{
+    adjust(f.h, Scores().s(2).s(3).s(6).s(8).s(4));
+    EXPECT_EQUAL(6, f.h.getMinScore());
+    assertScores(Scores().s(6).s(7).s(8).s(9), f.h);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore
new file mode 100644
index 00000000000..18fa7afeed4
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore
@@ -0,0 +1 @@
+searchlib_weak_and_scorers_test_app
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt
new file mode 100644
index 00000000000..74a37c8fce8
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weak_and_scorers_test_app
+    SOURCES
+    weak_and_scorers_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_weak_and_scorers_test_app COMMAND searchlib_weak_and_scorers_test_app)
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/DESC b/searchlib/src/tests/queryeval/weak_and_scorers/DESC
new file mode 100644
index 00000000000..ceaf1028aae
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/DESC
@@ -0,0 +1 @@
+weak_and_scorers test. Take a look at weak_and_scorers_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/FILES b/searchlib/src/tests/queryeval/weak_and_scorers/FILES
new file mode 100644
index 00000000000..7f3b71a9f34
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/FILES
@@ -0,0 +1 @@
+weak_and_scorers_test.cpp
diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
new file mode 100644
index 00000000000..2dec1762c27
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp
@@ -0,0 +1,67 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::queryeval;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataPosition;
+
+typedef wand::Term Term;
+
+struct TestIterator : public SearchIterator
+{
+    MinMaxPostingInfo  _info;
+    int32_t            _termWeight;
+    bool               _useInfo;
+    TermFieldMatchData _tfmd;
+    uint32_t           _unpackDocId;
+
+    typedef std::unique_ptr UP;
+    TestIterator(int32_t maxWeight, int32_t termWeight, bool useInfo)
+        : _info(0, maxWeight),
+          _termWeight(termWeight),
+          _useInfo(useInfo),
+          _unpackDocId(0)
+    {}
+    virtual void doSeek(uint32_t docId) {
+        (void) docId;
+    }
+    virtual void doUnpack(uint32_t docId) {
+        _unpackDocId = docId;
+        _tfmd.appendPosition(TermFieldMatchDataPosition(0, 0, _termWeight, 1));
+    }
+    virtual const PostingInfo *getPostingInfo() const {
+        return (_useInfo ? &_info : NULL);
+    }
+    static UP create(int32_t maxWeight, int32_t termWeight, bool useInfo) {
+        return UP(new TestIterator(maxWeight, termWeight, useInfo));
+    }
+};
+
+TEST("require that DotProductScorer calculates max score")
+{
+    TestIterator::UP itr = TestIterator::create(10, 0, true);
+    Term term(itr.get(), 5, 0);
+    EXPECT_EQUAL(50, wand::DotProductScorer::calculateMaxScore(term));
+}
+
+TEST("require that DotProductScorer uses default max weight when not available in search iterator")
+{
+    TestIterator::UP itr = TestIterator::create(10, 0, false);
+    Term term(itr.get(), 5, 0);
+    int64_t exp = (int64_t)5 * std::numeric_limits::max();
+    EXPECT_EQUAL(exp, wand::DotProductScorer::calculateMaxScore(term));
+}
+
+TEST("require that DotProductScorer calculates term score")
+{
+    TestIterator::UP itr = TestIterator::create(0, 7, false);
+    Term term(itr.get(), 5, 0, &itr->_tfmd);
+    EXPECT_EQUAL(35, wand::DotProductScorer::calculateScore(term, 11));
+    EXPECT_EQUAL(11u, itr->_unpackDocId);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/.gitignore b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore
new file mode 100644
index 00000000000..ab8cbb5bd5a
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore
@@ -0,0 +1 @@
+searchlib_weighted_set_term_test_app
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt
new file mode 100644
index 00000000000..4083762d115
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_weighted_set_term_test_app
+    SOURCES
+    weighted_set_term_test.cpp
+    DEPENDS
+    searchlib
+    searchlib_test
+)
+vespa_add_test(NAME searchlib_weighted_set_term_test_app COMMAND searchlib_weighted_set_term_test_app)
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/DESC b/searchlib/src/tests/queryeval/weighted_set_term/DESC
new file mode 100644
index 00000000000..040554bdd0e
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/DESC
@@ -0,0 +1 @@
+weighted_set_term test. Take a look at weighted_set_term_test.cpp for details.
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/FILES b/searchlib/src/tests/queryeval/weighted_set_term/FILES
new file mode 100644
index 00000000000..9912bc9a4a2
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/FILES
@@ -0,0 +1 @@
+weighted_set_term_test.cpp
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
new file mode 100644
index 00000000000..7436913b642
--- /dev/null
+++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("weighted_set_term_test");
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search;
+using namespace search::query;
+using namespace search::fef;
+using namespace search::queryeval;
+using search::test::InitRangeVerifier;
+using search::test::DocumentWeightAttributeHelper;
+
+namespace {
+
+void setupFakeSearchable(FakeSearchable &fake) {
+    for (size_t docid = 1; docid < 10; ++docid) {
+        std::string token1 = vespalib::make_string("%zu", docid);
+        std::string token2 = vespalib::make_string("1%zu", docid);
+        std::string token3 = vespalib::make_string("2%zu", docid);
+
+        fake.addResult("field", token1, FakeResult().doc(docid));
+        fake.addResult("multi-field", token1, FakeResult().doc(docid));
+        fake.addResult("multi-field", token2, FakeResult().doc(docid));
+        fake.addResult("multi-field", token3, FakeResult().doc(docid));
+    }
+}
+
+struct WS {
+    static const uint32_t fieldId = 42;
+    MatchDataLayout layout;
+    TermFieldHandle handle;
+    std::vector > tokens;
+
+    WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() {
+        MatchData::UP tmp = layout.createMatchData();
+        ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
+    }
+
+    WS &add(const std::string &token, uint32_t weight) {
+        tokens.push_back(std::make_pair(token, weight));
+        return *this;
+    }
+
+    Node::UP createNode() const {
+        SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0));
+        for (size_t i = 0; i < tokens.size(); ++i) {
+            node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second))));
+        }
+        return Node::UP(node);
+    }
+
+    bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+        FakeRequestContext requestContext;
+        MatchData::UP md = layout.createMatchData();
+        Node::UP node = createNode();
+        FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+        queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+        bp->fetchPostings(strict);
+        SearchIterator::UP sb = bp->createSearch(*md, strict);
+        return (dynamic_cast(sb.get()) != 0);
+    }
+
+    FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
+        FakeRequestContext requestContext;
+        MatchData::UP md = layout.createMatchData();
+        Node::UP node = createNode();
+        FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+        queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+        bp->fetchPostings(strict);
+        SearchIterator::UP sb = bp->createSearch(*md, strict);
+        sb->initFullRange();
+        FakeResult result;
+        for (uint32_t docId = 1; docId < 10; ++docId) {
+            if (sb->seek(docId)) {
+                sb->unpack(docId);
+                result.doc(docId);
+                TermFieldMatchData &data = *md->resolveTermField(handle);
+                FieldPositionsIterator itr = data.getIterator();
+                for (; itr.valid(); itr.next()) {
+                    result.elem(itr.getElementId());
+                    result.weight(itr.getElementWeight());
+                    result.pos(itr.getPosition());
+                }
+            }
+        }
+        return result;
+    }
+};
+
+struct MockSearch : public SearchIterator {
+    int seekCnt;
+    int _initial;
+    MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { }
+    void initRange(uint32_t begin, uint32_t end) override {
+        SearchIterator::initRange(begin, end);
+        setDocId(_initial);
+    }
+    virtual void doSeek(uint32_t) {
+        ++seekCnt;
+        setAtEnd();
+    }
+    virtual void doUnpack(uint32_t) {}
+};
+
+struct MockFixture {
+    MockSearch *mock;
+    TermFieldMatchData tfmd;
+    std::unique_ptr search;
+    MockFixture(uint32_t initial) : mock(0), tfmd(), search() {
+        std::vector children;
+        std::vector weights;
+        mock = new MockSearch(initial);
+        children.push_back(mock);
+        weights.push_back(1);
+        search.reset(WeightedSetTermSearch::create(children, tfmd, weights));
+    }
+};
+
+} // namespace 
+
+TEST("testSimple") {
+    FakeSearchable index;
+    setupFakeSearchable(index);
+    FakeResult expect = FakeResult()
+                        .doc(3).elem(0).weight(30).pos(0)
+                        .doc(5).elem(0).weight(50).pos(0)
+                        .doc(7).elem(0).weight(70).pos(0);
+    WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000);
+    EXPECT_TRUE(ws.isGenericSearch(index, "field", true));
+    EXPECT_TRUE(ws.isGenericSearch(index, "field", false));
+    EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
+    EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
+
+    EXPECT_EQUAL(expect, ws.search(index, "field", true));
+    EXPECT_EQUAL(expect, ws.search(index, "field", false));
+    EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+    EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST("testMulti") {
+    FakeSearchable index;
+    setupFakeSearchable(index);
+    FakeResult expect = FakeResult()
+                        .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0)
+                        .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0)
+                        .doc(7).elem(0).weight(70).pos(0);
+    WS ws = WS().add("7", 70).add("5", 50).add("3", 30)
+            .add("15", 150).add("13", 130)
+            .add("23", 230).add("100", 1000);
+    EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
+    EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
+
+    EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
+    EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+}
+
+TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) {
+    MockSearch *mock = f1.mock;
+    SearchIterator &search = *f1.search;
+    search.initFullRange();
+    EXPECT_EQUAL(search.beginId(), search.getDocId());
+    EXPECT_TRUE(!search.seek(1));
+    EXPECT_TRUE(search.isAtEnd());
+    EXPECT_EQUAL(0, mock->seekCnt);
+}
+
+TEST_F("test Eager Matching Child", MockFixture(5)) {
+    MockSearch *mock = f1.mock;
+    SearchIterator &search = *f1.search;
+    search.initFullRange();
+    EXPECT_EQUAL(search.beginId(), search.getDocId());
+    EXPECT_TRUE(!search.seek(3));
+    EXPECT_EQUAL(5u, search.getDocId());
+    EXPECT_EQUAL(0, mock->seekCnt);
+    EXPECT_TRUE(search.seek(5));
+    EXPECT_EQUAL(5u, search.getDocId());
+    EXPECT_EQUAL(0, mock->seekCnt);
+    EXPECT_TRUE(!search.seek(7));
+    EXPECT_TRUE(search.isAtEnd());
+    EXPECT_EQUAL(1, mock->seekCnt);
+}
+
+TEST("verify initRange with search iterator children") {
+    const size_t num_children = 7;
+    InitRangeVerifier ir;
+    using DocIds = InitRangeVerifier::DocIds;
+    std::vector split_lists(num_children);
+    auto full_list = ir.getExpectedDocIds();
+    for (size_t i = 0; i < full_list.size(); ++i) {
+        split_lists[i % num_children].push_back(full_list[i]);
+    }
+    bool strict = true;
+    std::vector children;
+    for (size_t i = 0; i < num_children; ++i) {
+        children.push_back(ir.createIterator(split_lists[i], strict).release());
+    }
+    TermFieldMatchData tfmd;
+    std::vector weights(num_children, 1);
+    SearchIterator::UP itr(WeightedSetTermSearch::create(children, tfmd, weights));
+    ir.verify(*itr);
+}
+
+TEST("verify initRange with document weight iterator children") {
+    const size_t num_children = 7;
+    InitRangeVerifier ir;
+    DocumentWeightAttributeHelper helper;
+    helper.add_docs(ir.getDocIdLimit());
+    auto full_list = ir.getExpectedDocIds();
+    for (size_t i = 0; i < full_list.size(); ++i) {
+        helper.set_doc(full_list[i], i % num_children, 1);
+    }
+    TermFieldMatchData tfmd;
+    std::vector weights(num_children, 1);
+    std::vector children;
+    for (size_t i = 0; i < num_children; ++i) {
+        auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str());
+        helper.dwa().create(dict_entry.posting_idx, children);
+    }
+    SearchIterator::UP itr(WeightedSetTermSearch::create(tfmd, weights, std::move(children)));
+    ir.verify(*itr);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
new file mode 100644
index 00000000000..88c86c1720e
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore
@@ -0,0 +1 @@
+searchlib_feature_name_extractor_test_app
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
new file mode 100644
index 00000000000..b1b81efd840
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_feature_name_extractor_test_app
+    SOURCES
+    feature_name_extractor_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_feature_name_extractor_test_app COMMAND searchlib_feature_name_extractor_test_app)
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES
new file mode 100644
index 00000000000..6f6f6c1df43
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES
@@ -0,0 +1 @@
+feature_name_extractor_test.cpp
diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
new file mode 100644
index 00000000000..12ce67a586a
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp
@@ -0,0 +1,79 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+
+using search::features::rankingexpression::FeatureNameExtractor;
+
+void verify_extract(const vespalib::string &input,
+                    const vespalib::string &expect_symbol,
+                    const vespalib::string &expect_after)
+{
+    FeatureNameExtractor extractor;
+    const char *pos_in = input.data();
+    const char *end_in = input.data() + input.size();
+    vespalib::string symbol_out;
+    const char *pos_out = nullptr;
+    extractor.extract_symbol(pos_in, end_in, pos_out, symbol_out);
+    ASSERT_TRUE(pos_out != nullptr);
+    vespalib::string after(pos_out, end_in);
+    EXPECT_EQUAL(expect_symbol, symbol_out);
+    EXPECT_EQUAL(expect_after, after);
+}
+
+TEST("require that basic names are extracted correctly") {
+    TEST_DO(verify_extract("foo+", "foo", "+"));
+    TEST_DO(verify_extract("foo.out+", "foo.out", "+"));
+    TEST_DO(verify_extract("foo(p1,p2)+", "foo(p1,p2)", "+"));
+    TEST_DO(verify_extract("foo(p1,p2).out+", "foo(p1,p2).out", "+"));
+}
+
+TEST("require that special characters are allowed in prefix and suffix") {
+    TEST_DO(verify_extract("_@$+", "_@$", "+"));
+    TEST_DO(verify_extract("_@$.$@_+", "_@$.$@_", "+"));
+    TEST_DO(verify_extract("_@$(p1,p2)+", "_@$(p1,p2)", "+"));
+    TEST_DO(verify_extract("_@$(p1,p2).$@_+", "_@$(p1,p2).$@_", "+"));
+}
+
+TEST("require that dot is only allowed in suffix") {
+    TEST_DO(verify_extract("foo.bar+", "foo.bar", "+"));
+    TEST_DO(verify_extract("foo.bar.out+", "foo.bar.out", "+"));
+    TEST_DO(verify_extract("foo.bar(p1,p2)+", "foo.bar", "(p1,p2)+"));
+    TEST_DO(verify_extract("foo.bar(p1,p2).out+", "foo.bar", "(p1,p2).out+"));
+    TEST_DO(verify_extract("foo(p1,p2).out.bar+", "foo(p1,p2).out.bar", "+"));
+}
+
+TEST("require that parameters can be nested") {
+    TEST_DO(verify_extract("foo(p1(a,b),p2(c,d(e,f))).out+", "foo(p1(a,b),p2(c,d(e,f))).out", "+"));
+}
+
+TEST("require that space is allowed among parameters") {
+    TEST_DO(verify_extract("foo( p1 ( a , b ) ).out+", "foo( p1 ( a , b ) ).out", "+"));
+}
+
+TEST("require that space is now allowed outside parameters") {
+    TEST_DO(verify_extract("foo +", "foo", " +"));
+    TEST_DO(verify_extract("foo . out+", "foo", " . out+"));
+    TEST_DO(verify_extract("foo. out+", "foo.", " out+"));
+    TEST_DO(verify_extract("foo (p1,p2)+", "foo", " (p1,p2)+"));
+    TEST_DO(verify_extract("foo(p1,p2) +", "foo(p1,p2)", " +"));
+    TEST_DO(verify_extract("foo(p1,p2) .out+", "foo(p1,p2)", " .out+"));
+    TEST_DO(verify_extract("foo(p1,p2).out +", "foo(p1,p2).out", " +"));
+}
+
+TEST("require that parameters can be scientific numbers") {
+    TEST_DO(verify_extract("foo(1.3E+3,-1.9e-10).out+", "foo(1.3E+3,-1.9e-10).out", "+"));
+}
+
+TEST("require that quoted parenthesis are not counted") {
+    TEST_DO(verify_extract("foo(a,b,\")\").out+", "foo(a,b,\")\").out", "+"));
+}
+
+TEST("require that escaped quotes does not unquote") {
+    TEST_DO(verify_extract("foo(a,b,\"\\\")\").out+", "foo(a,b,\"\\\")\").out", "+"));
+}
+
+TEST("require that escaped escape does not hinder unquote") {
+    TEST_DO(verify_extract("foo(a,b,\"\\\\\")\").out+", "foo(a,b,\"\\\\\")", "\").out+"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/rankingexpression/rankingexpressionlist b/searchlib/src/tests/rankingexpression/rankingexpressionlist
new file mode 100644
index 00000000000..2ff1350025b
--- /dev/null
+++ b/searchlib/src/tests/rankingexpression/rankingexpressionlist
@@ -0,0 +1,160 @@
+# This file is a list of semicolon separated strings. The first string is the expression to be parsed, whereas all
+# following strings are allowed ways to print the parsed expression. If no alternatives are given, the expression can be
+# printed as the original. Note that all strings are trimmed before they are parsed / compared.
+  1
+1.0; 1.0; 1
+1e1; 1e1; 10
+1e-1; 1e-1; 0.1
+1.0e1; 1.0e1; 10
+1.0e-1; 1.0e-1; 0.1
+-1; -1
+1 + -1; 1 + -1; 1 - 1
+-1 + 1; -1 + 1
+tan(10)
+1
+ 1
+ 1 + 2
+ 1 - 2
+ 1 * 2
+ 1 / 2
+ 1 + 2 - 3
+ 1 + 2 - 3 * 4
+ 1 + 2 - 3 * 4 / 5
+1+2-3*4/5; 1 + 2 - 3 * 4 / 5
+(1)
+(1)+ 2; (1) + 2
+(1)+(2); (1) + (2)
+(1)+(2)-3; (1) + (2) - 3
+(1)+(2)-(3); (1) + (2) - (3)
+(1)+(2)-(3)*4; (1) + (2) - (3) * 4
+(1)+(2)-(3)*(4); (1) + (2) - (3) * (4)
+(1)+(2)-(3)*(4)/5; (1) + (2) - (3) * (4) / 5
+(1)+(2)-(3)*(4)/(5); (1) + (2) - (3) * (4) / (5)
+ 1 +(2)-(3)*(4)/(5); 1 + (2) - (3) * (4) / (5)
+ 1 + 2 -(3)*(4)/(5); 1 + 2 - (3) * (4) / (5)
+ 1 + 2 - 3 *(4)/(5); 1 + 2 - 3 * (4) / (5)
+ 1 + 2 - 3 * 4 /(5); 1 + 2 - 3 * 4 / (5)
+ 1 + 2 - 3 * 4 / 5 ; 1 + 2 - 3 * 4 / 5
+(1 + 2)
+(1 + 2)- 3; (1 + 2) - 3
+(1 + 2 - 3)
+(1 + 2 - 3)* 4; (1 + 2 - 3) * 4
+(1 + 2 - 3 * 4)
+(1 + 2 - 3 * 4)/ 5; (1 + 2 - 3 * 4) / 5
+(1 + 2 - 3 * 4 / 5)
+ 1 +(2 - 3 * 4 / 5); 1 + (2 - 3 * 4 / 5)
+ 1 + 2 -(3 * 4 / 5); 1 + 2 - (3 * 4 / 5)
+ 1 + 2 - 3 *(4 / 5); 1 + 2 - 3 * (4 / 5)
+1+2-3*(4/5); 1 + 2 - 3 * (4 / 5)
+log(1)
+log( 1 ); log(1)
+log( 1 + 2 ); log(1 + 2)
+log( 1 + 2 - 3 ); log(1 + 2 - 3)
+log( 1 + 2 - 3 * 4 ); log(1 + 2 - 3 * 4)
+log( 1 + 2 - 3 * 4 / 5 ); log(1 + 2 - 3 * 4 / 5)
+log((1 + 2)- 3 * 4 / 5 ); log((1 + 2) - 3 * 4 / 5)
+log( 1 +(2 - 3)* 4 / 5 ); log(1 + (2 - 3) * 4 / 5)
+log( 1 + 2 -(3 * 4)/ 5 ); log(1 + 2 - (3 * 4) / 5)
+log( 1 + 2 - 3 *(4 / 5)); log(1 + 2 - 3 * (4 / 5))
+log(1+2-3*4/5); log(1 + 2 - 3 * 4 / 5)
+""; ""
+"foo"
+"foo\""
+(1+"foo"); (1 + "foo")
+if("foo" == "bar", 1, 2); if ("foo" == "bar", 1, 2)
+cosh(1); cosh(1)
+cosh (1); cosh(1)
+cosh ( 1 ); cosh(1)
+cosh ( foo ); cosh(foo)
+cosh ( foo.out ); cosh(foo.out)
+cosh ( foo ( bar ) . out ); cosh(foo(bar).out)
+sin(10)
+cos(10)
+tan(10)
+acos(10)
+asin(10)
+atan(10)
+cosh(10)
+sinh(10)
+tanh(10)
+exp(10)
+log(10)
+log10(10)
+sqrt(10)
+ceil(10)
+fabs(10)
+floor(10)
+atan2(10, 20); atan2(10,20)
+ldexp(10, 20); ldexp(10,20)
+pow(10, 20); pow(10,20)
+fmod(10, 20); fmod(10,20)
+min(0, 1); min(0,1)
+max(1, 0); max(1,0)
+if(1<2,3,4); if (1 < 2, 3, 4)
+if(1>2,3,4); if (1 > 2, 3, 4)
+if(1==2,3,4); if (1 == 2, 3, 4)
+if(1~=2,3,4); if (1 ~= 2, 3, 4)
+if(1<=2,3,4); if (1 <= 2, 3, 4)
+if(1>=2,3,4); if (1 >= 2, 3, 4)
+if(1>=2,3,4,0.3); if (1 >= 2, 3, 4, 0.3)
+if(1>=2,3,4,0.5); if (1 >= 2, 3, 4, 0.5)
+if (1   <   2,   3,   4); if (1 < 2, 3, 4)
+if (1+2 <   3,   4,   5); if (1 + 2 < 3, 4, 5)
+if (1   < 2+3,   4,   5); if (1 < 2 + 3, 4, 5)
+if (1   <   2, 3+4,   5); if (1 < 2, 3 + 4, 5)
+if (1   <   2,   3, 4+5); if (1 < 2, 3, 4 + 5)
+if (foo in [bar], 6, 9); if (foo in [bar], 6, 9)
+if (foo in [bar,baz], 6, 9); if (foo in [bar, baz], 6, 9)
+if (foo in [bar,baz,cox], 6, 9); if (foo in [bar, baz, cox], 6, 9)
+if (foo in [bar], 6, 9)
+if (foo in [bar, baz], 6, 9)
+if (foo in [bar, baz, cox], 6, 9)
+if (foo in [ bar ], 6, 9); if (foo in [bar], 6, 9)
+if (foo in [ bar,  baz ], 6, 9); if (foo in [bar, baz], 6, 9)
+if (foo in [ bar,  baz,  cox ], 6, 9); if (foo in [bar, baz, cox], 6, 9)
+feature;                                    feature
+fe@ture;                                    fe@ture
+featur@;                                    featur@
+fe$ture;                                    fe$ture
+featur$;                                    featur$
+feature.out;                                feature.out
+feature .out;                               feature.out
+feature . out;                              feature.out
+feature.out.out;                            feature.out.out
+feature.if
+feature.in
+feature(arg1);                              feature(arg1)
+feature (arg1);                             feature(arg1)
+feature ( arg1);                            feature(arg1)
+feature ( arg1 );                           feature(arg1)
+feature(arg1,arg2);                         feature(arg1,arg2)
+feature(arg1 ,arg2);                        feature(arg1,arg2)
+feature(arg1 , arg2);                       feature(arg1,arg2)
+feature(arg1 , arg2).out;                   feature(arg1,arg2).out
+feature(arg1 , arg2) .  out;                feature(arg1,arg2).out
+feature("\",difficult","\")arguments\\").out
+feature(arg1,arg2).out;                     feature(arg1,arg2).out
+feature(if)
+feature(in)
+feature(cos)
+feature("cos(1,2)")
+feature(cos,sin,tan,cosh,sinh,tanh,acos,asin,atan,exp,log10,log,sqrt,ceil,fabs,floor)
+feature(cos,"sin(1,2)",3)
+rankingExpression(foo@92c9e83e1b665d2c.fe5dbbcea5ce7e29).rankingScript
+rankingExpression(foo@92c9e83e1b665d2c.2e5dbbcea5ce7e29).rankingScript
+mysum ( mysum(4,  4), value( 4 ), value(4) ); mysum(mysum(4,4),value(4),value(4))
+"\\"
+"\""
+"\f"
+"\female"
+"\n"
+"\nude"
+"\r"
+"fa\rt"
+"\t"
+"fe\tish"
+"\x10081977"
+"10\x081977"
+"1008\x1977"
+"100819\x77"
+if(1.09999~=1.1,2,3); if (1.09999 ~= 1.1, 2, 3)
diff --git a/searchlib/src/tests/ranksetup/.gitignore b/searchlib/src/tests/ranksetup/.gitignore
new file mode 100644
index 00000000000..754597f65f8
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/.gitignore
@@ -0,0 +1,5 @@
+.depend
+Makefile
+ranksetup_test
+/.gdbinit
+searchlib_ranksetup_test_app
diff --git a/searchlib/src/tests/ranksetup/CMakeLists.txt b/searchlib/src/tests/ranksetup/CMakeLists.txt
new file mode 100644
index 00000000000..712f1ffefa4
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ranksetup_test_app
+    SOURCES
+    ranksetup_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_ranksetup_test_app COMMAND searchlib_ranksetup_test_app)
diff --git a/searchlib/src/tests/ranksetup/DESC b/searchlib/src/tests/ranksetup/DESC
new file mode 100644
index 00000000000..37f7cc6f2c5
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/DESC
@@ -0,0 +1 @@
+ranksetup test. Take a look at ranksetup.cpp for details.
diff --git a/searchlib/src/tests/ranksetup/FILES b/searchlib/src/tests/ranksetup/FILES
new file mode 100644
index 00000000000..f1fce1d28ff
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/FILES
@@ -0,0 +1 @@
+ranksetup.cpp
diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
new file mode 100644
index 00000000000..aee04ef4cb7
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp
@@ -0,0 +1,922 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("ranksetup_test");
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::fef;
+using namespace search::features;
+using namespace search::fef::test;
+using search::feature_t;
+
+typedef FeatureNameBuilder FNB;
+
+//-----------------------------------------------------------------------------
+// DumpFeatureVisitor
+//-----------------------------------------------------------------------------
+class DumpFeatureVisitor : public IDumpFeatureVisitor
+{
+public:
+    DumpFeatureVisitor() {}
+    virtual void visitDumpFeature(const vespalib::string & name) {
+        std::cout << "dump feature: " << name << std::endl;
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+// RankEnvironment
+//-----------------------------------------------------------------------------
+class RankEnvironment
+{
+private:
+    const BlueprintFactory & _factory;
+    const IIndexEnvironment & _indexEnv;
+    const IQueryEnvironment & _queryEnv;
+
+public:
+    RankEnvironment(const BlueprintFactory & bfactory,
+                    const IIndexEnvironment & indexEnv, const IQueryEnvironment & queryEnv) :
+        _factory(bfactory), _indexEnv(indexEnv), _queryEnv(queryEnv) {}
+
+    const BlueprintFactory & factory() const { return _factory; }
+    const IIndexEnvironment & indexEnvironment() const { return _indexEnv; }
+    const IQueryEnvironment & queryEnvironment() const { return _queryEnv; }
+};
+
+
+//-----------------------------------------------------------------------------
+// RankExecutor
+//-----------------------------------------------------------------------------
+class RankExecutor
+{
+private:
+    vespalib::string _initRank;
+    vespalib::string _finalRank;
+    const RankEnvironment & _rankEnv;
+    MatchDataLayout _layout;
+    std::unique_ptr _rs;
+    RankProgram::UP _firstPhaseProgram;
+    RankProgram::UP _secondPhaseProgram;
+
+public:
+    RankExecutor(const vespalib::string &initRank,
+                 const vespalib::string &finalRank, const RankEnvironment &rankEnv) :
+        _initRank(initRank), _finalRank(finalRank), _rankEnv(rankEnv), _layout(),
+        _rs(), _firstPhaseProgram(), _secondPhaseProgram() {}
+    bool setup();
+    RankResult execute(uint32_t docId = 0);
+};
+
+bool
+RankExecutor::setup()
+{
+    _rs = std::unique_ptr(new RankSetup(_rankEnv.factory(), _rankEnv.indexEnvironment()));
+    if (_initRank.empty()) {
+        return false;
+    }
+    _rs->setFirstPhaseRank(_initRank);
+
+    if (!_finalRank.empty()) {
+        _rs->setSecondPhaseRank(_finalRank);
+    }
+
+    if (!_rs->compile()) {
+        return false;
+    }
+
+    _firstPhaseProgram = _rs->create_first_phase_program();
+    _firstPhaseProgram->setup(_layout, _rankEnv.queryEnvironment());
+    if (!_finalRank.empty()) {
+        _secondPhaseProgram = _rs->create_second_phase_program();
+        _secondPhaseProgram->setup(_layout, _rankEnv.queryEnvironment());
+    }
+    return true;
+}
+
+RankResult
+RankExecutor::execute(uint32_t docId)
+{
+    RankResult result;
+    _firstPhaseProgram->run(docId);
+    result.addScore(_initRank, *Utils::getScoreFeature(*_firstPhaseProgram));
+
+    if (_secondPhaseProgram.get() != nullptr) {
+        _secondPhaseProgram->run(docId);
+        result.addScore(_finalRank, *Utils::getScoreFeature(*_secondPhaseProgram));
+    }
+
+    return result;
+}
+
+
+//-----------------------------------------------------------------------------
+// FeatureDumper
+//-----------------------------------------------------------------------------
+class FeatureDumper
+{
+private:
+    const RankEnvironment & _rankEnv;
+    RankSetup _setup;
+    MatchDataLayout _layout;
+    RankProgram::UP _rankProgram;
+
+public:
+    FeatureDumper(const RankEnvironment & rankEnv) :
+        _rankEnv(rankEnv),
+        _setup(_rankEnv.factory(), _rankEnv.indexEnvironment()),
+        _layout(),
+        _rankProgram() {}
+    void addDumpFeature(const vespalib::string &name);
+    void configure();
+    bool setup();
+    RankResult dump();
+};
+
+void
+FeatureDumper::addDumpFeature(const vespalib::string &name)
+{
+    _setup.addDumpFeature(name);
+}
+
+void
+FeatureDumper::configure()
+{
+    _setup.configure();
+}
+
+bool
+FeatureDumper::setup()
+{
+    if (!_setup.compile()) {
+        return false;
+    }
+
+    _rankProgram = _setup.create_dump_program();
+    _rankProgram->setup(_layout, _rankEnv.queryEnvironment());
+    return true;
+}
+
+RankResult
+FeatureDumper::dump()
+{
+    _rankProgram->run(1);
+    std::map features = Utils::getSeedFeatures(*_rankProgram);
+    RankResult retval;
+    for (auto itr = features.begin(); itr != features.end(); ++itr) {
+        retval.addScore(itr->first, itr->second);
+    }
+    return retval;
+}
+
+
+//-----------------------------------------------------------------------------
+// RankSetupTest
+//-----------------------------------------------------------------------------
+class RankSetupTest : public vespalib::TestApp
+{
+private:
+    BlueprintFactory _factory;
+    search::AttributeManager _manager;
+    IndexEnvironment _indexEnv;
+    QueryEnvironment _queryEnv;
+    RankEnvironment  _rankEnv;
+    DumpFeatureVisitor _visitor;
+
+    void testValueBlueprint();
+    void testDoubleBlueprint();
+    void testSumBlueprint();
+    void testStaticRankBlueprint();
+    void testChainBlueprint();
+    void testCfgValueBlueprint();
+    void testCompilation();
+    void testRankSetup();
+    bool testExecution(const vespalib::string & initRank, feature_t initScore,
+                       const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0);
+    bool testExecution(const RankEnvironment &rankEnv,
+                       const vespalib::string & initRank, feature_t initScore,
+                       const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0);
+    void testExecution();
+    void testFeatureDump();
+
+    void checkFeatures(std::map &exp, std::map &actual);
+    void testFeatureNormalization();
+
+public:
+    RankSetupTest();
+    int Main();
+};
+
+
+void
+RankSetupTest::testValueBlueprint()
+{
+    ValueBlueprint prototype;
+    prototype.visitDumpFeatures(_indexEnv, _visitor);
+    { // basic test
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        bp->setName("value");
+        EXPECT_EQUAL(bp->getName(), "value");
+        std::vector params;
+        params.push_back("5.5");
+        params.push_back("10.5");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 0u);
+        EXPECT_EQUAL(deps.output.size(), 2u);
+        EXPECT_EQUAL(deps.output[0], "0");
+        EXPECT_EQUAL(deps.output[1], "1");
+
+        FeatureExecutor::LP fe = bp->createExecutor(_queryEnv);
+        ValueExecutor * vfe = static_cast(fe.get());
+        EXPECT_EQUAL(vfe->getValues().size(), 2u);
+        EXPECT_EQUAL(vfe->getValues()[0], 5.5f);
+        EXPECT_EQUAL(vfe->getValues()[1], 10.5f);
+    }
+    { // invalid params
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        EXPECT_TRUE(!bp->setup(_indexEnv, params));
+    }
+}
+
+void
+RankSetupTest::testDoubleBlueprint()
+{
+    DoubleBlueprint prototype;
+    prototype.visitDumpFeatures(_indexEnv, _visitor);
+    { // basic test
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("value(5.5).0");
+        params.push_back("value(10.5).0");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 2u);
+        EXPECT_EQUAL(deps.input[0], "value(5.5).0");
+        EXPECT_EQUAL(deps.input[1], "value(10.5).0");
+        EXPECT_EQUAL(deps.output.size(), 2u);
+        EXPECT_EQUAL(deps.output[0], "0");
+        EXPECT_EQUAL(deps.output[1], "1");
+   }
+}
+
+void
+RankSetupTest::testSumBlueprint()
+{
+    SumBlueprint prototype;
+    prototype.visitDumpFeatures(_indexEnv, _visitor);
+    { // basic test
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("value(5.5, 10.5).0");
+        params.push_back("value(5.5, 10.5).1");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 2u);
+        EXPECT_EQUAL(deps.input[0], "value(5.5, 10.5).0");
+        EXPECT_EQUAL(deps.input[1], "value(5.5, 10.5).1");
+        EXPECT_EQUAL(deps.output.size(), 1u);
+        EXPECT_EQUAL(deps.output[0], "out");
+    }
+}
+
+void
+RankSetupTest::testStaticRankBlueprint()
+{
+    StaticRankBlueprint prototype;
+    { // basic test
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("sr1");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 0u);
+        EXPECT_EQUAL(deps.output.size(), 1u);
+        EXPECT_EQUAL(deps.output[0], "out");
+    }
+    { // invalid params
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        EXPECT_TRUE(!bp->setup(_indexEnv, params));
+        params.push_back("sr1");
+        params.push_back("sr2");
+        EXPECT_TRUE(!bp->setup(_indexEnv, params));
+    }
+}
+
+void
+RankSetupTest::testChainBlueprint()
+{
+    ChainBlueprint prototype;
+    { // chaining
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("basic");
+        params.push_back("2");
+        params.push_back("4");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 1u);
+        EXPECT_EQUAL(deps.input[0], "chain(basic,1,4)");
+    }
+    { // leaf node
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("basic");
+        params.push_back("1");
+        params.push_back("4");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 1u);
+        EXPECT_EQUAL(deps.input[0], "value(4)");
+    }
+    { // cycle
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        params.push_back("cycle");
+        params.push_back("1");
+        params.push_back("4");
+        EXPECT_TRUE(bp->setup(_indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 1u);
+        EXPECT_EQUAL(deps.input[0], "chain(cycle,4,4)");
+    }
+    { // invalid params
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        std::vector params;
+        EXPECT_TRUE(!bp->setup(_indexEnv, params));
+        params.push_back("basic");
+        params.push_back("0");
+        params.push_back("4");
+        EXPECT_TRUE(!bp->setup(_indexEnv, params));
+    }
+}
+
+void
+RankSetupTest::testCfgValueBlueprint()
+{
+    CfgValueBlueprint     prototype;
+    IndexEnvironment      indexEnv;
+    indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+    indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0");
+    indexEnv.getProperties().add("test_cfgvalue(foo).value", "3.0");
+
+    { // basic test
+        Blueprint::UP bp = prototype.createInstance();
+        DummyDependencyHandler deps(*bp);
+        bp->setName("test_cfgvalue(foo)");
+        std::vector params;
+        params.push_back("foo");
+
+        EXPECT_TRUE(bp->setup(indexEnv, params));
+        EXPECT_EQUAL(deps.input.size(), 0u);
+        EXPECT_EQUAL(deps.output.size(), 3u);
+        EXPECT_EQUAL(deps.output[0], "0");
+        EXPECT_EQUAL(deps.output[1], "1");
+        EXPECT_EQUAL(deps.output[2], "2");
+
+        FeatureExecutor::LP fe = bp->createExecutor(_queryEnv);
+        ValueExecutor *vfe = static_cast(fe.get());
+        EXPECT_EQUAL(vfe->getValues().size(), 3u);
+        EXPECT_EQUAL(vfe->getValues()[0], 1.0f);
+        EXPECT_EQUAL(vfe->getValues()[1], 2.0f);
+        EXPECT_EQUAL(vfe->getValues()[2], 3.0f);
+    }
+}
+
+
+void
+RankSetupTest::testCompilation()
+{
+    { // unknown blueprint
+        RankSetup rs(_factory, _indexEnv);
+        rs.setFirstPhaseRank("unknown");
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // unknown output for initial rank
+        RankSetup rs(_factory, _indexEnv);
+        rs.setFirstPhaseRank("value(2).1");
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // unknown output for dependency
+        RankSetup rs(_factory, _indexEnv);
+        rs.setFirstPhaseRank(FNB().baseName("mysum").parameter("value(2).1").buildName());
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // illegal input parameters
+        RankSetup rs(_factory, _indexEnv);
+        rs.setFirstPhaseRank("value.0");
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // illegal feature name
+        RankSetup rs(_factory, _indexEnv);
+        rs.setFirstPhaseRank("value(2).");
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // almost too deep dependency graph
+        RankSetup rs(_factory, _indexEnv);
+        std::ostringstream oss;
+        oss << "chain(basic," << (BlueprintResolver::MAX_DEP_DEPTH - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH
+        rs.setFirstPhaseRank(oss.str());
+        EXPECT_TRUE(rs.compile());
+    }
+    { // too deep dependency graph
+        RankSetup rs(_factory, _indexEnv);
+        std::ostringstream oss;
+        oss << "chain(basic," << BlueprintResolver::MAX_DEP_DEPTH << ",4)"; // gives tree height == MAX_DEP_DEPTH + 1
+        rs.setFirstPhaseRank(oss.str());
+        EXPECT_TRUE(!rs.compile());
+    }
+    { // cycle
+        RankSetup rs(_factory, _indexEnv);
+        // c(c,4,2) -> c(c,3,2) -> c(c,2,2) -> c(c,1,2) -> c(c,2,2)
+        rs.setFirstPhaseRank("chain(cycle,4,2)");
+        EXPECT_TRUE(!rs.compile());
+    }
+}
+
+void RankSetupTest::testRankSetup()
+{
+    using namespace search::fef::indexproperties;
+    IndexEnvironment env;
+    env.getProperties().add(rank::FirstPhase::NAME, "firstphase");
+    env.getProperties().add(rank::SecondPhase::NAME, "secondphase");
+    env.getProperties().add(dump::Feature::NAME, "foo");
+    env.getProperties().add(dump::Feature::NAME, "bar");
+    env.getProperties().add(matching::NumThreadsPerSearch::NAME, "3");
+    env.getProperties().add(matchphase::DegradationAttribute::NAME, "mystaticrankattr");
+    env.getProperties().add(matchphase::DegradationAscendingOrder::NAME, "true");
+    env.getProperties().add(matchphase::DegradationMaxHits::NAME, "12345");
+    env.getProperties().add(matchphase::DegradationMaxFilterCoverage::NAME, "0.19");
+    env.getProperties().add(matchphase::DegradationSamplePercentage::NAME, "0.9");
+    env.getProperties().add(matchphase::DegradationPostFilterMultiplier::NAME, "0.7");
+    env.getProperties().add(matchphase::DiversityAttribute::NAME, "mycategoryattr");
+    env.getProperties().add(matchphase::DiversityMinGroups::NAME, "37");
+    env.getProperties().add(matchphase::DiversityCutoffFactor::NAME, "7.1");
+    env.getProperties().add(matchphase::DiversityCutoffStrategy::NAME, "strict");
+    env.getProperties().add(hitcollector::HeapSize::NAME, "50");
+    env.getProperties().add(hitcollector::ArraySize::NAME, "60");
+    env.getProperties().add(hitcollector::EstimatePoint::NAME, "70");
+    env.getProperties().add(hitcollector::EstimateLimit::NAME, "80");
+    env.getProperties().add(hitcollector::RankScoreDropLimit::NAME, "90.5");
+
+    RankSetup rs(_factory, env);
+    rs.configure();
+    EXPECT_EQUAL(rs.getFirstPhaseRank(), vespalib::string("firstphase"));
+    EXPECT_EQUAL(rs.getSecondPhaseRank(), vespalib::string("secondphase"));
+    ASSERT_TRUE(rs.getDumpFeatures().size() == 2);
+    EXPECT_EQUAL(rs.getDumpFeatures()[0], vespalib::string("foo"));
+    EXPECT_EQUAL(rs.getDumpFeatures()[1], vespalib::string("bar"));
+    EXPECT_EQUAL(rs.getNumThreadsPerSearch(), 3u);
+    EXPECT_EQUAL(rs.getDegradationAttribute(), "mystaticrankattr");
+    EXPECT_EQUAL(rs.isDegradationOrderAscending(), true);
+    EXPECT_EQUAL(rs.getDegradationMaxHits(), 12345u);
+    EXPECT_EQUAL(rs.getDegradationSamplePercentage(), 0.9);
+    EXPECT_EQUAL(rs.getDegradationMaxFilterCoverage(), 0.19);
+    EXPECT_EQUAL(rs.getDegradationPostFilterMultiplier(), 0.7);
+    EXPECT_EQUAL(rs.getDiversityAttribute(), "mycategoryattr");
+    EXPECT_EQUAL(rs.getDiversityMinGroups(), 37u);
+    EXPECT_EQUAL(rs.getDiversityCutoffFactor(), 7.1);
+    EXPECT_EQUAL(rs.getDiversityCutoffStrategy(), "strict");
+    EXPECT_EQUAL(rs.getHeapSize(), 50u);
+    EXPECT_EQUAL(rs.getArraySize(), 60u);
+    EXPECT_EQUAL(rs.getEstimatePoint(), 70u);
+    EXPECT_EQUAL(rs.getEstimateLimit(), 80u);
+    EXPECT_EQUAL(rs.getRankScoreDropLimit(), 90.5);
+}
+
+bool
+RankSetupTest::testExecution(const vespalib::string & initRank, feature_t initScore,
+                             const vespalib::string & finalRank, feature_t finalScore, uint32_t docId)
+{
+    return testExecution(_rankEnv, initRank, initScore, finalRank, finalScore, docId);
+}
+
+bool
+RankSetupTest::testExecution(const RankEnvironment &rankEnv, const vespalib::string & initRank, feature_t initScore,
+                             const vespalib::string & finalRank, feature_t finalScore, uint32_t docId)
+{
+    bool ok = true;
+    RankExecutor re(initRank, finalRank, rankEnv);
+    ok = ok && re.setup();
+    EXPECT_TRUE(ok);
+    RankResult exp;
+    exp.addScore(initRank, initScore);
+    if (finalRank != "") {
+        exp.addScore(finalRank, finalScore);
+    }
+    RankResult rs = re.execute(docId);
+    ok = ok && (exp == rs);
+    EXPECT_EQUAL(exp, rs);
+    return ok;
+}
+
+void
+RankSetupTest::testExecution()
+{
+    { // value executor
+        vespalib::string v = FNB().baseName("value").parameter("5.5").parameter("10.5").buildName();
+        EXPECT_TRUE(testExecution(v + ".0", 5.5f));
+        EXPECT_TRUE(testExecution(v + ".0", 5.5f, v + ".1", 10.5f));
+        EXPECT_TRUE(testExecution(v, 5.5f));
+    }
+    { // double executor
+        vespalib::string d1 = FNB().baseName("double").parameter("value(2).0").parameter("value(8).0").buildName();
+        vespalib::string d2 = FNB().baseName("double").parameter("value(2)").parameter("value(8)").buildName();
+        EXPECT_TRUE(testExecution(d1 + ".0", 4.0f));
+        EXPECT_TRUE(testExecution(d1 + ".0", 4.0f, d1 + ".1", 16.0f));
+        EXPECT_TRUE(testExecution(d2, 4.0f));
+    }
+    { // sum executor
+        vespalib::string s1 = FNB().baseName("mysum").parameter("value(2).0").parameter("value(4).0").output("out").buildName();
+        vespalib::string s2 = FNB().baseName("mysum").parameter("value(2)").parameter("value(4)").buildName();
+        EXPECT_TRUE(testExecution(s1, 6.0f));
+        EXPECT_TRUE(testExecution(s2, 6.0f));
+    }
+    { // static rank executor
+        vespalib::string sr1 = "staticrank(staticrank1)";
+        vespalib::string sr2 = "staticrank(staticrank2)";
+        for (uint32_t i = 0; i < 5; ++i) {
+            EXPECT_TRUE(testExecution(sr1, static_cast(i + 100),
+                                     sr2, static_cast(i + 200), i));
+        }
+    }
+    { // test topologic sorting
+        vespalib::string v1 = "value(2)";
+        vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+        vespalib::string d2 = FNB().baseName("double").parameter(d1).buildName();
+
+        {
+            vespalib::string s1 = FNB().baseName("mysum").parameter(v1).parameter(d1).parameter(d2).buildName();
+            EXPECT_TRUE(testExecution(s1, 14.0f));
+        }
+        {
+            vespalib::string s1 = FNB().baseName("mysum").parameter(d2).parameter(d1).parameter(v1).buildName();
+            EXPECT_TRUE(testExecution(s1, 14.0f));
+        }
+    }
+    { // output used by more than one
+        vespalib::string v1 = "value(2)";
+        vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+        vespalib::string d2 = FNB().baseName("double").parameter(v1).buildName();
+        vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName();
+        EXPECT_TRUE(testExecution(s1, 8.0f));
+    }
+    { // output not shared between phases
+        vespalib::string v1 = "value(2)";
+        vespalib::string v2 = "value(8)";
+        vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+        vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName();
+        EXPECT_TRUE(testExecution(d1, 4.0f, d2, 16.0f));
+    }
+    { // output shared between phases
+        vespalib::string v1 = "value(2)";
+        vespalib::string v2 = "value(8)";
+        vespalib::string v3 = "value(32)";
+        vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName();
+        vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName();
+        vespalib::string d3 = FNB().baseName("double").parameter(v3).buildName();
+        vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName();
+        vespalib::string s2 = FNB().baseName("mysum").parameter(d2).parameter(d3).buildName();
+        EXPECT_TRUE(testExecution(s1, 20.0f, s2, 80.0f));
+    }
+    { // max dependency depth
+        uint32_t maxDepth = BlueprintResolver::MAX_DEP_DEPTH;
+        std::ostringstream oss;
+        oss << "chain(basic," << (maxDepth - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH;
+        EXPECT_TRUE(testExecution(oss.str(), 4.0f));
+    }
+    {
+        IndexEnvironment indexEnv;
+        indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+        indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0");
+        indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+
+        vespalib::string s = FNB().baseName("mysum")
+                        .parameter("test_cfgvalue(foo).0")
+                        .parameter("test_cfgvalue(foo).1")
+                        .buildName();
+
+        EXPECT_TRUE(testExecution(RankEnvironment(_factory, indexEnv, _queryEnv),
+                                 s, 3.0f, "test_cfgvalue(bar).0", 5.0f));
+    }
+}
+
+void
+RankSetupTest::testFeatureDump()
+{
+    {
+        FeatureDumper dumper(_rankEnv);
+        dumper.addDumpFeature("value(2)");
+        dumper.addDumpFeature("value(4)");
+        dumper.addDumpFeature("double(value(4))");
+        dumper.addDumpFeature("double(value(8))");
+        dumper.addDumpFeature("mysum(value(4),value(16))");
+        dumper.addDumpFeature("mysum(double(value(8)),double(value(32)))");
+        EXPECT_TRUE(dumper.setup());
+
+        RankResult exp;
+        exp.addScore("value(2)", 2.0f);
+        exp.addScore("value(4)", 4.0f);
+        exp.addScore(FNB().baseName("double").parameter("value(4)").buildName(), 8.0f);
+        exp.addScore(FNB().baseName("double").parameter("value(8)").buildName(), 16.0f);
+        exp.addScore(FNB().baseName("mysum").parameter("value(4)").parameter("value(16)").buildName(), 20.0f);
+        exp.addScore(FNB().baseName("mysum").
+                     parameter(FNB().baseName("double").parameter("value(8)").buildName()).
+                     parameter(FNB().baseName("double").parameter("value(32)").buildName()).
+                     buildName(), 80.0f);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+    {
+        FeatureDumper dumper(_rankEnv);
+        dumper.addDumpFeature("value(50)");
+        dumper.addDumpFeature("value(100)");
+        EXPECT_TRUE(dumper.setup());
+        RankResult exp;
+        exp.addScore("value(50)", 50.0f);
+        exp.addScore("value(100)", 100.0f);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+    {
+        FeatureDumper dumper(_rankEnv);
+        dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName());
+        EXPECT_TRUE(dumper.setup());
+        RankResult exp;
+        exp.addScore(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName(), 4.0f);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+
+    {
+        FeatureDumper dumper(_rankEnv);
+        dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName());
+        EXPECT_TRUE(dumper.setup());
+        RankResult exp;
+        exp.addScore(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName(), 3.0f);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+    { // dump features indicated by visitation
+        IndexEnvironment indexEnv;
+        indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+        indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+        indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)");
+        indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)");
+        indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, "");
+        indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, "");
+
+        RankEnvironment rankEnv(_factory, indexEnv, _queryEnv);
+        FeatureDumper dumper(rankEnv);
+        dumper.configure();
+        EXPECT_TRUE(dumper.setup());
+        RankResult exp;
+        exp.addScore("test_cfgvalue(foo)", 1.0);
+        exp.addScore("test_cfgvalue(bar)", 5.0);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+    { // ignore features indicated by visitation
+        IndexEnvironment indexEnv;
+        indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0");
+        indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0");
+        indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)");
+        indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)");
+        indexEnv.getProperties().add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true");
+        indexEnv.getProperties().add(indexproperties::dump::Feature::NAME, "test_cfgvalue(foo)");
+        indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, "");
+        indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, "");
+
+        RankEnvironment rankEnv(_factory, indexEnv, _queryEnv);
+        FeatureDumper dumper(rankEnv);
+        dumper.configure();
+        EXPECT_TRUE(dumper.setup());
+        RankResult exp;
+        exp.addScore("test_cfgvalue(foo)", 1.0);
+        EXPECT_EQUAL(exp, dumper.dump());
+    }
+}
+
+void
+RankSetupTest::checkFeatures(std::map &exp, std::map &actual)
+{
+    typedef std::map::const_iterator ITR;
+    if (!EXPECT_EQUAL(exp.size(), actual.size())) {
+        return;
+    }
+    ITR exp_itr    = exp.begin();
+    ITR exp_end    = exp.end();
+    ITR actual_itr = actual.begin();
+    ITR actual_end = actual.end();
+    for (; exp_itr != exp_end && actual_itr != actual_end; ++exp_itr, ++actual_itr) {
+        EXPECT_EQUAL(exp_itr->first, actual_itr->first);
+        EXPECT_APPROX(exp_itr->second, actual_itr->second, 0.001);
+    }
+    EXPECT_EQUAL(exp_itr == exp_end, actual_itr == actual_end);
+}
+
+void
+RankSetupTest::testFeatureNormalization()
+{
+    BlueprintFactory factory;
+    factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+    factory.addPrototype(Blueprint::SP(new SumBlueprint()));
+
+    IndexEnvironment idxEnv;
+    RankSetup rankSetup(factory, idxEnv);
+
+    rankSetup.setFirstPhaseRank(" mysum ( value ( 1 ) , value ( 1 ) ) ");
+    rankSetup.setSecondPhaseRank(" mysum ( value ( 2 ) , value ( 2 ) ) ");
+    rankSetup.addSummaryFeature(" mysum ( value ( 5 ) , value ( 5 ) ) ");
+    rankSetup.addSummaryFeature(" mysum ( \"value( 5 )\" , \"value( 5 )\" ) ");
+    rankSetup.addDumpFeature(" mysum ( value ( 10 ) , value ( 10 ) ) ");
+    rankSetup.addDumpFeature(" mysum ( \"value( 10 )\" , \"value( 10 )\" ) ");
+
+    ASSERT_TRUE(rankSetup.compile());
+
+    { // RANK context
+        MatchDataLayout layout;
+        QueryEnvironment queryEnv;
+        RankProgram::UP firstPhaseProgram = rankSetup.create_first_phase_program();
+        RankProgram::UP secondPhaseProgram = rankSetup.create_second_phase_program();
+        RankProgram::UP summaryProgram = rankSetup.create_summary_program();
+        firstPhaseProgram->setup(layout, queryEnv);
+        secondPhaseProgram->setup(layout, queryEnv);
+        summaryProgram->setup(layout, queryEnv);
+
+        firstPhaseProgram->run(1);
+        EXPECT_APPROX(2.0, *Utils::getScoreFeature(*firstPhaseProgram), 0.001);
+        secondPhaseProgram->run(1);
+        EXPECT_APPROX(4.0, *Utils::getScoreFeature(*secondPhaseProgram), 0.001);
+        summaryProgram->run(1);
+
+        { // rank seed features
+            std::map actual = Utils::getSeedFeatures(*summaryProgram);
+            std::map exp;
+            exp["mysum(value(5),value(5))"] = 10.0;
+            exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0;
+            TEST_DO(checkFeatures(exp, actual));
+        }
+        { // all rank features (1. phase)
+            std::map actual = Utils::getAllFeatures(*firstPhaseProgram);
+            std::map exp;
+            exp["value(1)"] = 1.0;
+            exp["value(1).0"] = 1.0;
+            exp["mysum(value(1),value(1))"] = 2.0;
+            exp["mysum(value(1),value(1)).out"] = 2.0;
+            TEST_DO(checkFeatures(exp, actual));
+        }
+        { // all rank features (2. phase)
+            std::map actual = Utils::getAllFeatures(*secondPhaseProgram);
+            std::map exp;
+            exp["value(2)"] = 2.0;
+            exp["value(2).0"] = 2.0;
+            exp["mysum(value(2),value(2))"] = 4.0;
+            exp["mysum(value(2),value(2)).out"] = 4.0;
+            TEST_DO(checkFeatures(exp, actual));
+        }
+        { // all rank features (summary)
+            std::map actual = Utils::getAllFeatures(*summaryProgram);
+            std::map exp;
+            exp["value(5)"] = 5.0;
+            exp["value(5).0"] = 5.0;
+            exp["mysum(value(5),value(5))"] = 10.0;
+            exp["mysum(value(5),value(5)).out"] = 10.0;
+            exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0;
+            exp["mysum(\"value( 5 )\",\"value( 5 )\").out"] = 10.0;
+            TEST_DO(checkFeatures(exp, actual));
+        }
+    }
+
+    { // DUMP context
+        MatchDataLayout layout;
+        QueryEnvironment queryEnv;
+        RankProgram::UP rankProgram = rankSetup.create_dump_program();
+        rankProgram->setup(layout, queryEnv);
+        rankProgram->run(1);
+
+        { // dump seed features
+            std::map actual = Utils::getSeedFeatures(*rankProgram);
+            std::map exp;
+            exp["mysum(value(10),value(10))"] = 20.0;
+            exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0;
+            TEST_DO(checkFeatures(exp, actual));
+        }
+
+        { // all dump features
+            std::map actual = Utils::getAllFeatures(*rankProgram);
+            std::map exp;
+
+            exp["value(10)"] = 10.0;
+            exp["value(10).0"] = 10.0;
+
+            exp["mysum(value(10),value(10))"] = 20.0;
+            exp["mysum(value(10),value(10)).out"] = 20.0;
+
+            exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0;
+            exp["mysum(\"value( 10 )\",\"value( 10 )\").out"] = 20.0;
+
+            TEST_DO(checkFeatures(exp, actual));
+        }
+    }
+}
+
+
+RankSetupTest::RankSetupTest() :
+    _factory(),
+    _manager(),
+    _indexEnv(),
+    _queryEnv(),
+    _rankEnv(_factory, _indexEnv, _queryEnv),
+    _visitor()
+{
+    // register blueprints
+    setup_fef_test_plugin(_factory);
+    _factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+    _factory.addPrototype(Blueprint::SP(new RankingExpressionBlueprint()));
+
+    // setup an original attribute manager with two attributes
+    search::attribute::Config cfg(search::attribute::BasicType::INT32,
+                                        search::attribute::CollectionType::SINGLE);
+    search::AttributeVector::SP av1 =
+        search::AttributeFactory::createAttribute("staticrank1", cfg);
+    search::AttributeVector::SP av2 =
+        search::AttributeFactory::createAttribute("staticrank2", cfg);
+    av1->addDocs(5);
+    av2->addDocs(5);
+    for (uint32_t i = 0; i < 5; ++i) {
+        (static_cast(av1.get()))->update(i, i + 100);
+        (static_cast(av2.get()))->update(i, i + 200);
+    }
+    av1->commit();
+    av2->commit();
+    _manager.add(av1);
+    _manager.add(av2);
+
+    // set the index environment
+    _queryEnv.setIndexEnv(&_indexEnv);
+
+    // set the manager
+    _queryEnv.overrideAttributeManager(&_manager);
+}
+
+
+int
+RankSetupTest::Main()
+{
+    TEST_INIT("ranksetup_test");
+
+    testValueBlueprint();
+    testDoubleBlueprint();
+    testSumBlueprint();
+    testStaticRankBlueprint();
+    testChainBlueprint();
+    testCfgValueBlueprint();
+
+    testCompilation();
+    testRankSetup();
+    testExecution();
+    testFeatureDump();
+    testFeatureNormalization();
+
+    TEST_DONE();
+}
+
+TEST_APPHOOK(RankSetupTest);
diff --git a/searchlib/src/tests/ranksetup/verify_feature/.gitignore b/searchlib/src/tests/ranksetup/verify_feature/.gitignore
new file mode 100644
index 00000000000..69a39cd13f2
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/.gitignore
@@ -0,0 +1 @@
+searchlib_verify_feature_test_app
diff --git a/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt
new file mode 100644
index 00000000000..8ffd79fe327
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_verify_feature_test_app
+    SOURCES
+    verify_feature_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_verify_feature_test_app COMMAND searchlib_verify_feature_test_app)
diff --git a/searchlib/src/tests/ranksetup/verify_feature/FILES b/searchlib/src/tests/ranksetup/verify_feature/FILES
new file mode 100644
index 00000000000..652373e33da
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/FILES
@@ -0,0 +1 @@
+verify_feature_test.cpp
diff --git a/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp
new file mode 100644
index 00000000000..1e49cd4aae6
--- /dev/null
+++ b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::features;
+using namespace search::fef::test;
+using namespace search::fef;
+
+struct RankFixture {
+    BlueprintFactory factory;
+    IndexEnvironment indexEnv;
+
+    RankFixture() : factory(), indexEnv() {
+        setup_fef_test_plugin(factory);
+        factory.addPrototype(Blueprint::SP(new ValueBlueprint()));
+    }
+
+    bool verify(const std::string &feature) const {
+        return verifyFeature(factory, indexEnv, feature, "feature verification test");
+    }
+};
+
+TEST_F("verify valid rank feature", RankFixture) {
+    EXPECT_TRUE(f1.verify("value(1, 2, 3).0"));
+    EXPECT_TRUE(f1.verify("value(1, 2, 3).1"));
+    EXPECT_TRUE(f1.verify("value(1, 2, 3).2"));
+}
+
+TEST_F("verify unknown feature", RankFixture) {
+    EXPECT_FALSE(f1.verify("unknown"));
+}
+
+TEST_F("verify unknown output", RankFixture) {
+    EXPECT_FALSE(f1.verify("value(1, 2, 3).3"));
+}
+
+TEST_F("verify illegal input parameters", RankFixture) {
+    EXPECT_FALSE(f1.verify("value.0"));
+}
+
+TEST_F("verify illegal feature name", RankFixture) {
+    EXPECT_FALSE(f1.verify("value(2)."));
+}
+
+TEST_F("verify too deep dependency graph", RankFixture) {
+    EXPECT_TRUE(f1.verify("chain(basic, 63, 4)"));
+    EXPECT_FALSE(f1.verify("chain(basic, 64, 4)"));
+}
+
+TEST_F("verify dependency cycle", RankFixture) {
+    EXPECT_FALSE(f1.verify("chain(cycle, 4, 2)"));
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/sha1/.gitignore b/searchlib/src/tests/sha1/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/searchlib/src/tests/sort/.gitignore b/searchlib/src/tests/sort/.gitignore
new file mode 100644
index 00000000000..7207ff4596d
--- /dev/null
+++ b/searchlib/src/tests/sort/.gitignore
@@ -0,0 +1,8 @@
+.depend
+Makefile
+sort_test
+uca_stress
+/sortbenchmark
+searchlib_sort_test_app
+searchlib_sortbenchmark_app
+searchlib_uca_stress_app
diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt
new file mode 100644
index 00000000000..1830952bffd
--- /dev/null
+++ b/searchlib/src/tests/sort/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sortbenchmark_app
+    SOURCES
+    sortbenchmark.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_sortbenchmark_app COMMAND searchlib_sortbenchmark_app BENCHMARK)
+vespa_add_executable(searchlib_sort_test_app
+    SOURCES
+    sort_test.cpp
+    DEPENDS
+    searchlib
+)
+#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app)
+vespa_add_executable(searchlib_uca_stress_app
+    SOURCES
+    uca.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK)
diff --git a/searchlib/src/tests/sort/DESC b/searchlib/src/tests/sort/DESC
new file mode 100644
index 00000000000..ad8ab11f5ba
--- /dev/null
+++ b/searchlib/src/tests/sort/DESC
@@ -0,0 +1 @@
+Testing templatized radixsort.
diff --git a/searchlib/src/tests/sort/FILES b/searchlib/src/tests/sort/FILES
new file mode 100644
index 00000000000..e2ef9d3c1ab
--- /dev/null
+++ b/searchlib/src/tests/sort/FILES
@@ -0,0 +1 @@
+sort.cpp
diff --git a/searchlib/src/tests/sort/javaorder.zh b/searchlib/src/tests/sort/javaorder.zh
new file mode 100644
index 00000000000..0d29efc99bd
--- /dev/null
+++ b/searchlib/src/tests/sort/javaorder.zh
@@ -0,0 +1,158 @@
+
+
+
+30雜誌30雜誌
+asiatwnewsasiatwnews
+AZ時尚旅遊AZ時尚旅遊
+bobo小天才養成誌bobo小天才養成誌
+Career職場情報誌Career職場情報誌
+CheersCheers雜誌
+EMBAEMBA雜誌
+ETtodayETtoday
+FASHION QUEEN時尚女王FASHION QUEEN時尚女王
+iLOOKiLOOK電影雜誌
+men&#39;s uno男人誌men&#39;s uno男人誌
+Money 錢Money 錢
+NOWnewsNOWnews
+NOWnews今日新聞網
+PAR表演藝術PAR表演藝術雜誌
+Press Association ImagesPress Association Images
+Smart智富月刊Smart智富月刊
+Taipei WalkerTaipei Walker
+TSNATSNA
+TVBSTVBS
+Yahoo! Taiwan Specials without layoutYahoo! Taiwan Specials without layout
+Yahoo奇摩video.yahoo.com(勿用)
+Yahoo奇摩Yahoo奇摩(爆新聞)
+Yahoo奇摩Yahoo奇摩(新聞)
+Yahoo奇摩Yahoo奇摩(影音)
+Yahoo奇摩新聞Yahoo奇摩新聞(報氣象)
+YourNewsYourNews
+Y特別企畫Y特別企畫
+愛爾達愛爾達
+愛爾達愛爾達電視
+財訊快報財訊快報
+財訊快報財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊快報季刊財訊快報季刊
+財訊雙週刊財訊雙週刊
+常春月刊常春月刊
+朝鮮日報朝鮮日報
+達志達志
+達志達志
+達志達志
+大家健康雜誌大家健康雜誌
+大師輕鬆讀大師輕鬆讀
+大台灣旅遊網大台灣旅遊網
+東森新聞東森新聞
+東森新聞東森新聞
+東星東星
+俄羅斯新聞網俄羅斯新聞網
+法新社法新社
+非凡新聞非凡新聞
+非凡新聞節目非凡新聞節目
+富爾特消費新聞富爾特消費新聞
+公共電視公共電視
+公視公視
+古美術古美術
+管理雜誌管理雜誌
+光華雜誌台灣光華雜誌
+廣編特輯廣編特輯
+廣告雜誌廣告雜誌
+國際商情雙周刊國際商情雙周刊
+哈佛商業評論哈佛商業評論
+韓國朝鮮日報韓國朝鮮日報
+韓國中央日報韓國中央日報
+韓國中央日報韓國中央日報
+韓星網韓星網
+韓星網韓星網
+華人健康網華人健康網
+華視華視
+華視華視
+華視華視
+環境資訊中心環境資訊中心
+健康醫療網健康醫療網
+健康醫療網健康醫療網
+講義雜誌講義雜誌
+教育廣播電台國立教育廣播電台
+今藝術今藝術
+今周刊今周刊
+今周刊今周刊
+經理人經理人月刊
+鉅亨網鉅亨網
+軍聞社軍聞社
+卡優新聞網卡優新聞網
+康健雜誌康健雜誌
+科學人科學人雜誌
+客家電視客家電視台
+酷搜圖聞酷搜圖聞
+理財周刊理財周刊
+麗台運動報麗台運動報
+聯合文學聯合文學
+聯合新聞網聯合新聞網
+路透社路透社
+路透社路透社
+旅遊經旅遊經
+羅開Golf 頻道羅開Golf 頻道
+媽媽寶寶媽媽寶寶
+美麗佳人美麗佳人雜誌
+美聯社美聯社
+美通社美通社
+民視民視
+民視民視
+明報周刊明報周刊
+男人幫男人幫
+能力雜誌能力雜誌
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+年代新聞年代新聞
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網.紐約時報中文網.
+紐約時報中文網紐約時報中文網
+儂儂雜誌儂儂雜誌
+蓬勃網球蓬勃網球
+蘋果日報蘋果日報
+親子天下親子天下
+全國廣播全國廣播
+全球中央全球中央雜誌
+商業周刊商業周刊
+數位家庭數位家庭
+數位時代數位時代
+台灣立報台灣立報
+台灣新生報台灣新生報
+台灣醒報台灣醒報
+台灣醒報台灣醒報
+臺灣時報臺灣時報
+天下雜誌天下雜誌
+統一獅 Video統一獅 Video
+玩高爾夫玩高爾夫
+旺報旺報
+先探投資週刊先探投資週刊
+現代保險健康理財雜誌現代保險健康理財雜誌
+香港中文大學EMBA Videos香港中文大學EMBA
+新頭殼新頭殼
+新新聞周刊新新聞周刊
+兄弟象 Video兄弟象 Video
+訊息快遞Yahoo 奇摩新聞訊息快遞
+野球人野球人
+壹電視壹電視
+壹電視壹電視
+壹蘋果壹蘋果
+義大犀牛 Video義大犀牛 Video
+優活健康網優活健康網
+原視原視
+遠見雜誌遠見雜誌
+張老師月刊張老師月刊
+整形達人整形達人
+中廣中廣新聞網
+中華日報中華日報
+中華職棒中華職棒
+中時電子報中時電子報
+中央廣播電台中央廣播電台
+中央日報中央日報
+中央社中央社
+中央社中央社
+自立晚報自立晚報
+自由時報自由時報
+自由時報自由時報
diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp
new file mode 100644
index 00000000000..cf5e1a1cb1f
--- /dev/null
+++ b/searchlib/src/tests/sort/sort_test.cpp
@@ -0,0 +1,295 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using namespace search::common;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+    int Main();
+    void testUnsignedIntegerSort();
+    template 
+    void testSignedIntegerSort();
+    void testStringSort();
+    void testIcu();
+    void testStringCaseInsensitiveSort();
+    void testSortSpec();
+    void testSameAsJavaOrder();
+};
+
+struct LoadedStrings
+{
+    LoadedStrings(const char * v=NULL) : _value(v), _currRadix(_value) { }
+
+    class ValueRadix
+    {
+    public:
+        char operator () (LoadedStrings & x) const {
+            unsigned char c(*x._currRadix);
+            if (c) {
+                x._currRadix++;
+            }
+            return c;
+        }
+    };
+
+    class ValueCompare : public std::binary_function {
+    public:
+        bool operator() (const LoadedStrings & x, const LoadedStrings & y) const {
+            return strcmp(x._value, y._value) < 0;
+        }
+    };
+    const char * _value;
+    const char * _currRadix;
+};
+
+void Test::testIcu()
+{
+    {
+        const std::string src("Creation of Bob2007 this is atumated string\this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string; _ 12345567890-=,./;'[;");
+        std::vector u16Buffer(100);
+        UErrorCode status = U_ZERO_ERROR;
+        int32_t u16Wanted(0);
+        u_strFromUTF8(&u16Buffer[0], u16Buffer.size(), &u16Wanted, src.c_str(), -1, &status);
+        ASSERT_TRUE(U_SUCCESS(status) || (status == U_INVALID_CHAR_FOUND) || ((status == U_BUFFER_OVERFLOW_ERROR) && (u16Wanted > (int)u16Buffer.size())));
+    }
+}
+
+void Test::testUnsignedIntegerSort()
+{
+    search::NumericRadixSorter S;
+    S(NULL, 0);
+
+    Array array1(1);
+    array1[0] = 1567;
+    S(&array1[0], 1);
+    ASSERT_TRUE(array1[0] == 1567);
+
+    unsigned int N(0x100000);
+    Array array(N);
+    unsigned seed(1);
+    for(size_t i(0); i < N; i++) {
+        array[i] = rand_r(&seed);
+    }
+    S(&array[0], N);
+    for (size_t i(1); i < N; i++) {
+        ASSERT_TRUE(array[i] >= array[i-1]);
+    }
+}
+
+template
+class IntOrder {
+public:
+    uint64_t operator () (T v) const { return v ^ (std::numeric_limits::max() + 1); }
+};
+
+template 
+void Test::testSignedIntegerSort()
+{
+    search::NumericRadixSorter S;
+    S(NULL, 0);
+
+    Array array1(1);
+    array1[0] = 1567;
+    S(&array1[0], 1);
+    ASSERT_TRUE(array1[0] == 1567);
+
+    unsigned int N(0x100000);
+    Array array(N);
+    unsigned seed(1);
+    for(size_t i(0); i < N; i++) {
+        T v = rand_r(&seed);
+        array[i] = (i%2) ? v : -v;
+    }
+    S(&array[0], N);
+    for (size_t i(1); i < N; i++) {
+        ASSERT_TRUE(array[i] >= array[i-1]);
+    }
+}
+
+void Test::testStringSort()
+{
+    Array array1(1);
+
+    unsigned int N(0x1000);
+    Array loaded(N);
+    std::vector radixScratchPad(N);
+    search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof(), 1, static_cast(NULL), 0, &radixScratchPad[0], 0);
+
+    array1[0] = LoadedStrings("a");
+    search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof(), 1, &array1[0], 1, &radixScratchPad[0], 0);
+    ASSERT_TRUE(strcmp(array1[0]._value, "a") == 0);
+
+    loaded[0] = LoadedStrings("a");
+    for(size_t i(1); i < N; i++) {
+        loaded[i] = LoadedStrings("");
+    }
+
+    search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof(), 1, &loaded[0], N, &radixScratchPad[0], 0);
+    LoadedStrings::ValueCompare vc;
+    for(size_t i(1); i < N; i++) {
+        ASSERT_TRUE( ! vc(loaded[i], loaded[i-1]));
+    }
+}
+
+void Test::testStringCaseInsensitiveSort()
+{
+}
+
+void Test::testSortSpec()
+{
+    {
+        SortSpec sortspec("-name");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() == NULL);
+    }
+
+    {
+        SortSpec sortspec("-lowercase(name)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+
+    {
+        SortSpec sortspec("-uca(name,nn_no)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,nn_no,PRIMARY)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,nn_no,SECONDARY)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,nn_no,TERTIARY)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,nn_no,QUATERNARY)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,nn_no,IDENTICAL)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,zh)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        SortSpec sortspec("-uca(name,finnes_ikke)");
+        EXPECT_EQUAL(sortspec.size(), 1u);
+        EXPECT_EQUAL(sortspec[0]._field, "name");
+        EXPECT_TRUE( ! sortspec[0]._ascending);
+        EXPECT_TRUE(sortspec[0]._converter.get() != NULL);
+        EXPECT_TRUE(dynamic_cast(sortspec[0]._converter.get()) != NULL);
+    }
+    {
+        try {
+            SortSpec sortspec("-uca(name,nn_no,NTERTIARY)");
+            EXPECT_TRUE(false);
+        } catch (const std::runtime_error & e) {
+            EXPECT_TRUE(true);
+            EXPECT_TRUE(strcmp(e.what(), "Illegal uca collation strength : NTERTIARY") == 0);
+        }
+    }
+}
+
+void Test::testSameAsJavaOrder()
+{
+    std::vector javaOrder;
+    std::ifstream is("javaorder.zh");
+    while (!is.eof()) {
+        std::string line;
+        getline(is, line);
+        if (!is.eof()) {
+            javaOrder.push_back(line);
+        }
+    }
+    EXPECT_EQUAL(158u, javaOrder.size());
+    search::common::UcaConverter uca("zh", "PRIMARY");
+    vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size()));
+    vespalib::string prev(fkey.c_str(), fkey.size());
+    for (size_t i(1); i < javaOrder.size(); i++) {
+        vespalib::ConstBufferRef key = uca.convert(vespalib::ConstBufferRef(javaOrder[i].c_str(), javaOrder[i].size()));
+        vespalib::HexDump dump(key.c_str(), key.size());
+        vespalib::string current(key.c_str(), key.size());
+        UErrorCode status(U_ZERO_ERROR);
+        UCollationResult cr = uca.getCollator().compareUTF8(javaOrder[i-1].c_str(), javaOrder[i].c_str(), status);
+        std::cout << std::setw(3) << i << ": " << status << "(" << u_errorName(status) << ") - " << cr << " '" << dump << "'  : '" << javaOrder[i] << "'" << std::endl;
+        EXPECT_TRUE(prev <= current);
+        EXPECT_TRUE(U_SUCCESS(status));
+        EXPECT_TRUE(cr == UCOL_LESS || cr == UCOL_EQUAL);
+        prev = current;
+    }
+}
+
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+    TEST_INIT("sort_test");
+
+    testUnsignedIntegerSort();
+    testSignedIntegerSort();
+    testSignedIntegerSort();
+    testStringSort();
+    testStringCaseInsensitiveSort();
+    testSortSpec();
+    testIcu();
+    testSameAsJavaOrder();
+
+    TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/sortbenchmark.cpp b/searchlib/src/tests/sort/sortbenchmark.cpp
new file mode 100644
index 00000000000..1309cf57d5d
--- /dev/null
+++ b/searchlib/src/tests/sort/sortbenchmark.cpp
@@ -0,0 +1,115 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOG_SETUP("sort_test");
+
+using vespalib::Array;
+using vespalib::ConstBufferRef;
+
+class Test : public vespalib::TestApp
+{
+public:
+    typedef std::vector V;
+    std::vector< std::vector > _data;
+    int Main();
+    void generateVectors(size_t numVectors, size_t values);
+    V merge();
+    void twoWayMerge();
+    V cat() const;
+};
+
+void Test::generateVectors(size_t numVectors, size_t values)
+{
+    _data.resize(numVectors);
+    for (size_t j(0); j < numVectors; j++) {
+        V & v(_data[j]);
+        v.resize(values);
+        for (size_t i(0); i < values; i++) {
+            v[i] = i;
+        }
+    }
+}
+
+Test::V Test::merge()
+{
+    twoWayMerge();
+    return _data[0];
+}
+
+void Test::twoWayMerge()
+{
+    std::vector n((_data.size()+1)/2);
+
+    for ( size_t i(0), m(_data.size()/2); i < m; i++) {
+        const V & a = _data[i*2 + 0];
+        const V & b = _data[i*2 + 1];
+        n[i].resize(a.size() + b.size());
+        std::merge(a.begin(), a.end(), b.begin(), b.end(), n[i].begin());
+    }
+    if (_data.size()%2) {
+        n[n.size()-1].swap(_data[_data.size() - 1]);
+    }
+    _data.swap(n);
+    if (_data.size() > 1) {
+        twoWayMerge();
+    }
+}
+
+Test::V Test::cat() const
+{
+    size_t sum(0);
+    for (size_t i(0), m(_data.size()); i < m; i++) {
+        sum += _data[i].size();
+    }
+    V c;
+    c.reserve(sum);
+    for (size_t i(0), m(_data.size()); i < m; i++) {
+        const V & v(_data[i]);
+        c.insert(c.end(), v.begin(), v.end());
+    }
+
+    return c;
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+    TEST_INIT("sortbenchmark");
+    size_t numVectors(11);
+    size_t values(10000000);
+    vespalib::string type("radix");
+    if (_argc > 1) {
+        values = strtol(_argv[1], NULL, 0);
+        if (_argc > 2) {
+            numVectors = strtol(_argv[2], NULL, 0);
+            if (_argc > 2) {
+                type = _argv[3];
+            }
+        }
+    }
+
+    printf("Start with %ld vectors with %ld values and type '%s'(radix, qsort, merge)\n", numVectors, values, type.c_str());
+    generateVectors(numVectors, values);
+    printf("Start cat\n");
+    V v = cat();
+    printf("Cat %ld values\n", v.size());
+    if (type == "merge") {
+        V m = merge();
+        printf("Merged %ld values\n", m.size());
+    } else if (type == "qsort") {
+        std::sort(v.begin(), v.end());
+        printf("sorted %ld value with std::sort\n", v.size());
+    } else {
+        search::NumericRadixSorter S;
+        S(&v[0], v.size());
+        printf("sorted %ld value with radix::sort\n", v.size());
+    }
+
+    TEST_DONE();
+}
diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp
new file mode 100644
index 00000000000..b9225c94a66
--- /dev/null
+++ b/searchlib/src/tests/sort/uca.cpp
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOG_SETUP("uca_stress");
+
+using icu::Collator;
+
+class Test : public vespalib::TestApp
+{
+public:
+    int Main();
+    void testFromDat();
+};
+
+
+void Test::testFromDat()
+{
+    size_t badnesses = 0;
+
+    std::string startMark("abc");
+    std::string midMark("def");
+    std::string endMark("ghi");
+
+    UErrorCode status = U_ZERO_ERROR;
+    auto coll = std::unique_ptr(Collator::createInstance(icu::Locale("en"), status));
+    
+    coll->setStrength(Collator::PRIMARY);
+
+    std::vector u16buffer(100);
+    std::vector  u8buffer(10);
+
+    int fd = open("sort-blobs.dat", O_RDONLY);
+    char sbuf[4];
+
+    int num=0;
+
+    uint32_t atleast = 0;
+
+    while (read(fd, sbuf, 4) == 4) {
+        if (startMark == sbuf) {
+            uint32_t len = 0;
+            int r = read(fd, &len, 4);
+
+            EXPECT_EQUAL(4, r);
+            r = read(fd, sbuf, 4);
+            EXPECT_EQUAL(4, r);
+            EXPECT_EQUAL(midMark, sbuf);
+
+            if (u16buffer.size() < len) {
+                u16buffer.resize(len);
+            }
+            r = read(fd, &u16buffer[0], len*2);
+            EXPECT_EQUAL((int)len*2, r);
+
+            r = read(fd, sbuf, 4);
+            EXPECT_EQUAL(4, r);
+            EXPECT_EQUAL(endMark, sbuf);
+
+            uint32_t wanted = coll->getSortKey(&u16buffer[0], len, NULL, 0);
+
+            EXPECT_TRUE(wanted > 0);
+            EXPECT_TRUE(wanted >= len);
+            EXPECT_TRUE(wanted < len*6);
+
+            if (wanted + 20 > u8buffer.size()) {
+                u8buffer.resize(wanted+20);
+            }
+
+            for (uint32_t pretend = 1; pretend < wanted+8; ++pretend) {
+                memset(&u8buffer[0], 0x99, u8buffer.size());
+                uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], pretend);
+                EXPECT_EQUAL(wanted, got);
+
+                if (u8buffer[pretend+1] != 0x99) {
+                    printf("wrote 2 bytes too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x %02x\n",
+                           wanted, pretend, u8buffer[pretend-1],
+                           u8buffer[pretend], u8buffer[pretend+1]);
+                } else if (u8buffer[pretend] != 0x99) {
+                    ++badnesses;
+                    if (wanted > atleast) {
+                        atleast = wanted;
+                        printf("wrote 1 byte too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x\n",
+                               wanted, pretend, u8buffer[pretend-1], u8buffer[pretend]);
+                    }
+                }
+            }
+
+            memset(&u8buffer[0], 0x99, u8buffer.size());
+            uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], u8buffer.size());
+            EXPECT_EQUAL(wanted, got);
+
+            EXPECT_EQUAL('\0', u8buffer[got-1]);
+            EXPECT_EQUAL((uint8_t)0x99, u8buffer[got]);
+        }
+        if (++num >= 10000) {
+            TEST_FLUSH();
+            num=0;
+        }
+    }
+    EXPECT_EQUAL(0u, badnesses);
+}
+
+TEST_APPHOOK(Test);
+
+int Test::Main()
+{
+    TEST_INIT("uca_stress");
+    testFromDat();
+    TEST_DONE();
+}
diff --git a/searchlib/src/tests/sortresults/.gitignore b/searchlib/src/tests/sortresults/.gitignore
new file mode 100644
index 00000000000..1b191671559
--- /dev/null
+++ b/searchlib/src/tests/sortresults/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+sortresults
+searchlib_sortresults_app
diff --git a/searchlib/src/tests/sortresults/CMakeLists.txt b/searchlib/src/tests/sortresults/CMakeLists.txt
new file mode 100644
index 00000000000..6889e4c517d
--- /dev/null
+++ b/searchlib/src/tests/sortresults/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sortresults_app
+    SOURCES
+    sorttest.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_sortresults_app COMMAND searchlib_sortresults_app)
diff --git a/searchlib/src/tests/sortresults/sorttest.cpp b/searchlib/src/tests/sortresults/sorttest.cpp
new file mode 100644
index 00000000000..4c3326f4cbc
--- /dev/null
+++ b/searchlib/src/tests/sortresults/sorttest.cpp
@@ -0,0 +1,99 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include 
+#include 
+#include 
+
+using search::RankedHit;
+
+unsigned int
+myrandom()
+{
+    unsigned int r;
+    r = random() & 0xffff;
+    r <<= 16;
+    r += random() & 0xffff;
+    return r;
+}
+
+
+bool
+test_sort(unsigned int caseNum, unsigned int n, unsigned int ntop)
+{
+    bool ok = true;
+    double minmax;
+    unsigned int i;
+    RankedHit *array;
+
+    if (ntop == 0) {
+        printf("CASE %03d: [%d/%d] PASS\n", caseNum, ntop, n);
+        return true;
+    }
+    if (ntop > n)
+        ntop = n;
+
+    array = new RankedHit[n];
+    assert(array != NULL);
+
+    for (i = 0; i < n; i++) {
+        array[i]._docId = i;
+        array[i]._rankValue = myrandom();
+    }
+    FastS_SortResults(array, n, ntop);
+
+    minmax = array[ntop - 1]._rankValue;
+    for(i = 0; i < n; i++) {
+        if (i < ntop && i > 0
+            && array[i]._rankValue > array[i - 1]._rankValue) {
+            printf("ERROR: rank(%d) > rank(%d)\n",
+                   i, i - 1);
+            ok = false;
+            break;
+        }
+        if (i >= ntop &&
+            array[i]._rankValue > minmax) {
+            printf("ERROR: rank(%d) > rank(%d)\n",
+                   i, ntop - 1);
+            ok = false;
+            break;
+        }
+    }
+    delete [] array;
+    printf("CASE %03d: [%d/%d] %s\n", caseNum, ntop, n,
+           (ok)? "PASS" : "FAIL");
+    return ok;
+}
+
+
+int
+main(int argc, char **argv)
+{
+    (void) argc;
+    (void) argv;
+
+    bool ok = true;
+    unsigned int caseNum = 0;
+    unsigned int i;
+
+    ok &= test_sort(++caseNum, 1, 1);
+    for (i = 0; i < 5; i++) {
+        ok &= test_sort(++caseNum, 2, 2);
+    }
+    for (i = 0; i < 5; i++) {
+        ok &= test_sort(++caseNum, 50, 50);
+    }
+    for (i = 0; i < 5; i++) {
+        ok &= test_sort(++caseNum,  50000,      1);
+        ok &= test_sort(++caseNum,  50000,    500);
+        ok &= test_sort(++caseNum,  50000,   1000);
+        ok &= test_sort(++caseNum,  50000,   2000);
+        ok &= test_sort(++caseNum,  50000,   5000);
+        ok &= test_sort(++caseNum,  50000,  10000);
+        ok &= test_sort(++caseNum,  50000,  50000);
+    }
+    printf("CONCLUSION: TEST %s\n", (ok)? "PASSED" : "FAILED");
+    return (ok)? 0 : 1;
+}
diff --git a/searchlib/src/tests/sortspec/.gitignore b/searchlib/src/tests/sortspec/.gitignore
new file mode 100644
index 00000000000..dff7336208a
--- /dev/null
+++ b/searchlib/src/tests/sortspec/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+multilevelsort_test
+searchlib_multilevelsort_test_app
diff --git a/searchlib/src/tests/sortspec/CMakeLists.txt b/searchlib/src/tests/sortspec/CMakeLists.txt
new file mode 100644
index 00000000000..a436bb744c5
--- /dev/null
+++ b/searchlib/src/tests/sortspec/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multilevelsort_test_app
+    SOURCES
+    multilevelsort.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_multilevelsort_test_app COMMAND searchlib_multilevelsort_test_app)
diff --git a/searchlib/src/tests/sortspec/multilevelsort.cpp b/searchlib/src/tests/sortspec/multilevelsort.cpp
new file mode 100644
index 00000000000..f151bfaf132
--- /dev/null
+++ b/searchlib/src/tests/sortspec/multilevelsort.cpp
@@ -0,0 +1,413 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("multilevelsort_test");
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search;
+
+typedef FastS_SortSpec::VectorRef VectorRef;
+typedef IntegerAttributeTemplate  Uint8;
+typedef IntegerAttributeTemplate   Int8;
+typedef IntegerAttributeTemplate Uint16;
+typedef IntegerAttributeTemplate  Int16;
+typedef IntegerAttributeTemplate Uint32;
+typedef IntegerAttributeTemplate  Int32;
+typedef IntegerAttributeTemplate Uint64;
+typedef IntegerAttributeTemplate  Int64;
+typedef FloatingPointAttributeTemplate  Float;
+typedef FloatingPointAttributeTemplate Double;
+typedef std::map VectorMap;
+typedef AttributeVector::SP             AttributePtr;
+using search::attribute::Config;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+
+class MultilevelSortTest : public vespalib::TestApp
+{
+public:
+    enum AttrType {
+        INT8,
+        INT16,
+        INT32,
+        INT64,
+        FLOAT,
+        DOUBLE,
+        STRING,
+        RANK,
+        DOCID,
+        NONE
+    };
+    struct Spec {
+        Spec() : _name("unknown"), _type(NONE), _asc(true) {}
+        Spec(const std::string &name, AttrType type) : _name(name), _type(type), _asc(true) {}
+        Spec(const std::string &name, AttrType type, bool asc) : _name(name), _type(type), _asc(asc) {}
+        std::string _name;
+        AttrType _type;
+        bool _asc;
+    };
+private:
+    int _sortMethod;
+    template
+    T getRandomValue() {
+        T min = std::numeric_limits::min();
+        T max = std::numeric_limits::max();
+        return min + static_cast((max - min) * (((float)rand() / (float)RAND_MAX)));
+    }
+    template
+    void fill(IntegerAttribute *attr, uint32_t size, uint32_t unique = 0);
+    template
+    void fill(FloatingPointAttribute *attr, uint32_t size, uint32_t unique = 0);
+    void fill(StringAttribute *attr, uint32_t size, const std::vector &values);
+    template
+    int compareTemplate(T *vector, uint32_t a, uint32_t b);
+    int compare(AttributeVector *vector, AttrType type,
+                uint32_t a, uint32_t b);
+    void sortAndCheck(const std::vector &spec, uint32_t num,
+                      uint32_t unique, const std::vector &strValues);
+    void testSortMethod(int method);
+public:
+    MultilevelSortTest() : _sortMethod(0) { srand(time(NULL)); }
+    int Main();
+};
+
+template
+void MultilevelSortTest::fill(IntegerAttribute *attr, uint32_t size,
+                              uint32_t unique)
+{
+    ASSERT_TRUE(attr->addDocs(size));
+    std::vector values;
+    for (uint32_t j = 0; j < unique; ++j) {
+        if (j % 2 == 0) {
+            values.push_back(std::numeric_limits::min() + static_cast(j));
+        } else {
+            values.push_back(std::numeric_limits::max() - static_cast(j));
+        }
+    }
+    for (uint32_t i = 0; i < size; ++i) {
+        if (unique == 0) {
+            attr->update(i, getRandomValue());
+        } else {
+            uint32_t idx = rand() % values.size();
+            attr->update(i, values[idx]);
+        }
+    }
+}
+
+template
+void MultilevelSortTest::fill(FloatingPointAttribute *attr, uint32_t size,
+                              uint32_t unique)
+{
+    ASSERT_TRUE(attr->addDocs(size));
+    std::vector values;
+    for (uint32_t j = 0; j < unique; ++j) {
+        if (j % 2 == 0) {
+            values.push_back(std::numeric_limits::min() + static_cast(j));
+        } else {
+            values.push_back(std::numeric_limits::max() - static_cast(j));
+        }
+    }
+    for (uint32_t i = 0; i < size; ++i) {
+        if (unique == 0) {
+            attr->update(i, getRandomValue());
+        } else {
+            uint32_t idx = rand() % values.size();
+            //LOG(info, "fill vector<%s>::doc<%d> = %f (idx=%d)", attr->getName().c_str(), i, values[idx], idx);
+            attr->update(i, values[idx]);
+        }
+    }
+}
+
+void
+MultilevelSortTest::fill(StringAttribute *attr, uint32_t size,
+                         const std::vector &values)
+{
+    ASSERT_TRUE(attr->addDocs(size));
+    for (uint32_t i = 0; i < size; ++i) {
+        if (values.empty()) {
+            uint32_t len = 1 + static_cast(127 * (((float)rand() / (float)RAND_MAX)));
+            std::string value;
+            for (uint32_t j = 0; j < len; ++j) {
+                char c = 'a' + static_cast(('Z' - 'a') * (((float)rand() / (float)RAND_MAX)));
+                value.append(1, c);
+            }
+            attr->update(i, value.c_str());
+        } else {
+            uint32_t idx = rand() % values.size();
+            //LOG(info, "fill vector<%s>::doc<%d> = %s (idx=%d)", attr->getName().c_str(),
+            //    i, values[idx].c_str(), idx);
+            attr->update(i, values[idx].c_str());
+        }
+    }
+}
+
+template
+int
+MultilevelSortTest::compareTemplate(T *vector, uint32_t a, uint32_t b)
+{
+    V va;
+    V vb;
+    vector->getAll(a, &va, 1);
+    vector->getAll(b, &vb, 1);
+    if (va == vb) {
+        return 0;
+    } else if (va < vb) {
+        return -1;
+    }
+    return 1;
+}
+
+int
+MultilevelSortTest::compare(AttributeVector *vector, AttrType type,
+                            uint32_t a, uint32_t b)
+{
+    if (type == INT8) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == INT16) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == INT32) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == INT64) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == FLOAT) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == DOUBLE) {
+        return compareTemplate(static_cast(vector), a, b);
+    } else if (type == STRING) {
+        StringAttribute *vString = static_cast(vector);
+        const char *va = vString->get(a);
+        const char *vb = vString->get(b);
+        std::string sa(va);
+        std::string sb(vb);
+        if (sa == sb) {
+            return 0;
+        } else if (sa < sb) {
+            return -1;
+        }
+        return 1;
+    } else {
+        ASSERT_TRUE(false);
+        return 0;
+    }
+}
+
+void
+MultilevelSortTest::sortAndCheck(const std::vector &spec, uint32_t num,
+                                 uint32_t unique, const std::vector &strValues)
+{
+    VectorMap vec;
+    // generate attribute vectors
+    for (uint32_t i = 0; i < spec.size(); ++i) {
+        std::string name = spec[i]._name;
+        AttrType type = spec[i]._type;
+        if (type == INT8) {
+            Config cfg(BasicType::INT8, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == INT16) {
+            Config cfg(BasicType::INT16, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == INT32) {
+            Config cfg(BasicType::INT32, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == INT64) {
+            Config cfg(BasicType::INT64, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == FLOAT) {
+            Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == DOUBLE) {
+            Config cfg(BasicType::DOUBLE, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, unique);
+        } else if (type == STRING) {
+            Config cfg(BasicType::STRING, CollectionType::SINGLE);
+            vec[name] = AttributeFactory::createAttribute(name, cfg);
+            fill(static_cast(vec[name].get()), num, strValues);
+        }
+        if (vec[name].get() != NULL)
+            vec[name]->commit();
+    }
+
+    RankedHit *hits = new RankedHit[num];
+    for (uint32_t i = 0; i < num; ++i) {
+        hits[i]._docId = i;
+        hits[i]._rankValue = getRandomValue();
+    }
+
+    vespalib::Clock clock;
+    vespalib::Doom doom(clock, std::numeric_limits::max());
+    FastS_SortSpec sorter(doom, _sortMethod);
+    // init sorter with sort data
+    for(uint32_t i = 0; i < spec.size(); ++i) {
+        AttributeGuard ag;
+        if (spec[i]._type == RANK) {
+            sorter._vectors.push_back
+                (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_RANK :
+                           FastS_SortSpec::DESC_RANK, NULL, NULL));
+        } else if (spec[i]._type == DOCID) {
+            sorter._vectors.push_back
+                (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_DOCID :
+                           FastS_SortSpec::DESC_DOCID, NULL, NULL));
+        } else {
+            const search::attribute::IAttributeVector * v = vec[spec[i]._name].get();
+            sorter._vectors.push_back
+                (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_VECTOR :
+                           FastS_SortSpec::DESC_VECTOR, v, NULL));
+        }
+    }
+
+    FastOS_Time timer;
+    timer.SetNow();
+    sorter.sortResults(hits, num, num);
+    LOG(info, "sort time = %f ms", timer.MilliSecsToNow());
+
+    uint32_t *offsets = new uint32_t[num + 1];
+    char *buf = new char[sorter.getSortDataSize(0, num)];
+    sorter.copySortData(0, num, offsets, buf);
+
+    // check results
+    for (uint32_t i = 0; i < num - 1; ++i) {
+        for (uint32_t j = 0; j < spec.size(); ++j) {
+            int cmp = 0;
+            if (spec[j]._type == RANK) {
+                if (hits[i]._rankValue < hits[i+1]._rankValue) {
+                    cmp = -1;
+                } else if (hits[i]._rankValue > hits[i+1]._rankValue) {
+                    cmp = 1;
+                }
+            } else if (spec[j]._type == DOCID) {
+                if (hits[i]._docId < hits[i+1]._docId) {
+                    cmp = -1;
+                } else if (hits[i]._docId > hits[i+1]._docId) {
+                    cmp = 1;
+                }
+            } else {
+                AttributeVector *av = vec[spec[j]._name].get();
+                cmp = compare(av, spec[j]._type,
+                              hits[i]._docId, hits[i+1]._docId);
+            }
+            if (spec[j]._asc) {
+                EXPECT_TRUE(cmp <= 0);
+                if (cmp < 0) {
+                    break;
+                }
+            } else {
+                EXPECT_TRUE(cmp >= 0);
+                if (cmp > 0) {
+                    break;
+                }
+            }
+        }
+        // check binary sort data
+        uint32_t minLen = std::min(sorter._sortDataArray[i]._len,
+                          sorter._sortDataArray[i+1]._len);
+        int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx,
+                         &sorter._binarySortData[0] + sorter._sortDataArray[i+1]._idx,
+                         minLen);
+        EXPECT_TRUE(cmp <= 0);
+        EXPECT_TRUE(sorter._sortDataArray[i]._len == (offsets[i+1] - offsets[i]));
+        cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx,
+                     buf + offsets[i], sorter._sortDataArray[i]._len);
+        EXPECT_TRUE(cmp == 0);
+    }
+    EXPECT_TRUE(sorter._sortDataArray[num-1]._len == (offsets[num] - offsets[num-1]));
+    int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[num-1]._idx,
+                 buf + offsets[num-1], sorter._sortDataArray[num-1]._len);
+    EXPECT_TRUE(cmp == 0);
+
+    delete [] hits;
+    delete [] offsets;
+    delete [] buf;
+}
+
+int
+MultilevelSortTest::Main()
+{
+    TEST_INIT("multilevelsort_test");
+
+    testSortMethod(0);
+    testSortMethod(1);
+    testSortMethod(2);
+
+    TEST_DONE();
+}
+
+void MultilevelSortTest::testSortMethod(int method)
+{
+    _sortMethod = method;
+    {
+        std::vector spec;
+        spec.push_back(Spec("int8", INT8));
+        spec.push_back(Spec("int16", INT16));
+        spec.push_back(Spec("int32", INT32));
+        spec.push_back(Spec("int64", INT64));
+        spec.push_back(Spec("float", FLOAT));
+        spec.push_back(Spec("double", DOUBLE));
+        spec.push_back(Spec("string", STRING));
+        spec.push_back(Spec("rank", RANK));
+        spec.push_back(Spec("docid", DOCID));
+
+        std::vector strValues;
+        strValues.push_back("applications");
+        strValues.push_back("places");
+        strValues.push_back("system");
+        strValues.push_back("vespa search core");
+
+        srand(12345);
+        sortAndCheck(spec, 5000, 4, strValues);
+        srand(time(NULL));
+        sortAndCheck(spec, 5000, 4, strValues);
+
+        strValues.push_back("multilevelsort");
+        strValues.push_back("trondheim");
+        strValues.push_back("ubuntu");
+        strValues.push_back("fastserver4");
+
+        srand(56789);
+        sortAndCheck(spec, 5000, 8, strValues);
+        srand(time(NULL));
+        sortAndCheck(spec, 5000, 8, strValues);
+    }
+    {
+        std::vector none;
+        uint32_t num = 50;
+        sortAndCheck(std::vector(1, Spec("int8", INT8, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int16", INT16, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int32", INT32, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int64", INT64, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("float", FLOAT, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("double", DOUBLE, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("string", STRING, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("rank", RANK, true)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("docid", DOCID, true)), num, 0, none);
+
+        sortAndCheck(std::vector(1, Spec("int8", INT8, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int16", INT16, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int32", INT32, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("int64", INT64, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("float", FLOAT, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("double", DOUBLE, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("string", STRING, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("rank", RANK, false)), num, 0, none);
+        sortAndCheck(std::vector(1, Spec("docid", DOCID, false)), num, 0, none);
+    }
+
+}
+
+
+TEST_APPHOOK(MultilevelSortTest);
diff --git a/searchlib/src/tests/stackdumpiterator/.gitignore b/searchlib/src/tests/stackdumpiterator/.gitignore
new file mode 100644
index 00000000000..6f8239c6cc1
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+stackdumpiteratortest
+searchlib_stackdumpiteratortest_app
diff --git a/searchlib/src/tests/stackdumpiterator/CMakeLists.txt b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt
new file mode 100644
index 00000000000..dceeaad3f26
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stackdumpiteratortest_app
+    SOURCES
+    stackdumpiteratortest.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_stackdumpiteratortest_app COMMAND searchlib_stackdumpiteratortest_app - *1000)
diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp
new file mode 100644
index 00000000000..19ce69550f7
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp
@@ -0,0 +1,316 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include 
+#include 
+#include 
+#include "stackdumpiteratortest.h"
+
+#define NUMTESTS 5
+
+int
+StackDumpIteratorTest::Main()
+{
+    int doTest[NUMTESTS];
+    int low, high, accnum, num;
+    int indicator;
+    bool verify = false;
+    int multiplier = 1;
+    bool failed = false;
+
+    if (_argc == 1) {
+        Usage(_argv[0]);
+        return 1;
+    }
+
+    // default initialize to not run any tests.
+    for (int n = 0; n < NUMTESTS; n++)
+        doTest[n] = 0;
+
+    // parse the command line arguments
+    for (int i = 1; i < _argc; i++) {
+        low = 0;
+        high = NUMTESTS - 1;
+        char *p = _argv[i];
+
+        // Check if a multiplier is specified
+        if (*p == '*') {
+            p++;
+            accnum = 0;
+            while (*p != '\0') {
+                num = *p - '0';
+                accnum = accnum * 10 + num;
+                p++;
+            }
+            multiplier = accnum;
+            continue;
+        }
+
+        // Default is to run the tests specified, unless the first char is '/'
+        indicator = 1;
+        if (*p == '/') {
+            p++;
+            indicator = 0;
+        }
+
+        // Find the first number
+        accnum = 0;
+        while (*p != '-' && *p != '\0') {
+            num = *p - '0';
+            accnum = accnum * 10 + num;
+            p++;
+        }
+        if (accnum >= NUMTESTS)
+            continue;
+        low = accnum;
+        // Check for range operator
+        if (*p == '-') {
+            p++;
+            // Find the second number
+            accnum = 0;
+            while (*p != '\0') {
+                num = *p - '0';
+                accnum = accnum * 10 + num;
+                p++;
+            }
+            if (accnum > 0)
+                high = accnum < NUMTESTS ? accnum : NUMTESTS-1;
+        } else
+            high = low;
+
+        // Indicate the runrequest for the desired range.
+        for (int j = low; j <= high; j++)
+            doTest[j] = indicator;
+    }
+
+    // Remove unused tests.
+    // doTest[1] = 0;
+
+    // Remember time
+    if (multiplier > 1) {
+        printf("Running all tests %d times.\n", multiplier);
+        verify = false;
+    } else {
+        verify = true;
+    }
+
+    int testCnt = 0;
+
+    FastOS_Time timer;
+    timer.SetNow();
+
+    // Actually run the tests that we wanted.
+    for (int j = 0; j < multiplier; j++)
+        for (int k = 0; k < NUMTESTS; k++)
+            if (doTest[k] == 1) {
+                if (!RunTest(k, verify))
+                    failed = true;
+                testCnt++;
+            }
+
+    // Print time taken
+    double timeTaken = timer.MilliSecsToNow();
+
+    printf("Time taken : %f ms\n", timeTaken);
+    printf("Number of tests run: %d\n", testCnt);
+    double avgTestPrMSec = static_cast(testCnt) / timeTaken;
+    printf("Tests pr Sec: %f\n", avgTestPrMSec * 1000.0);
+
+    return failed ? 1 : 0;
+}
+
+#define ITERATOR_NOERROR                0x0
+#define ITERATOR_ERROR_WRONG_NUM        0x1
+#define ITERATOR_ERROR_WRONG_TYPE       0x2
+#define ITERATOR_ERROR_WRONG_ARITY      0x4
+#define ITERATOR_ERROR_WRONG_INDEX     0x10
+#define ITERATOR_ERROR_WRONG_TERM      0x20
+#define ITERATOR_ERROR_WRONG_GETINDEX  0x40
+#define ITERATOR_ERROR_WRONG_GETTERM   0x80
+#define ITERATOR_ERROR_WRONG_SIZE     0x100
+
+bool
+StackDumpIteratorTest::ShowResult(int testNo,
+                                  search::SimpleQueryStackDumpIterator &actual,
+                                  search::SimpleQueryStack &correct,
+                                  unsigned int expected)
+{
+    unsigned int results = 0;
+    const char *idx_ptr;
+    const char *term_ptr;
+    size_t idx_len;
+    size_t term_len;
+
+    int num = 0;
+
+    search::ParseItem *item;
+
+    printf("%03d: ", testNo);
+
+    while (actual.next()) {
+        actual.getIndexName(&idx_ptr, &idx_len);
+        actual.getTerm(&term_ptr, &term_len);
+
+#if 0
+        printf("StackItem #%d: %d %d '%.*s:%.*s'\n",
+               actual.getNum(),
+               actual.getType(),
+               actual.getArity(),
+               idx_len, idx_ptr,
+               term_len, term_ptr);
+#endif
+
+        item = correct.Pop();
+
+        if (num++ != actual.getNum()) {
+            results |= ITERATOR_ERROR_WRONG_NUM;
+            delete item;
+            break;
+        }
+        if (item->Type() != actual.getType()) {
+            results |= ITERATOR_ERROR_WRONG_TYPE;
+            delete item;
+            break;
+        }
+        if (item->_arity != actual.getArity()) {
+            results |= ITERATOR_ERROR_WRONG_ARITY;
+            delete item;
+            break;
+        }
+        if (strncmp(item->_indexName.c_str(), idx_ptr, idx_len) != 0) {
+            results |= ITERATOR_ERROR_WRONG_INDEX;
+            delete item;
+            break;
+        }
+        if (strncmp(item->_term.c_str(), term_ptr, term_len) != 0) {
+            results |= ITERATOR_ERROR_WRONG_TERM;
+            delete item;
+            break;
+        }
+
+        delete item;
+    }
+    if (correct.GetSize() != 0) results |= ITERATOR_ERROR_WRONG_SIZE;
+
+    if (results == expected)
+        printf("ok\n");
+    else
+        printf("fail. exp: %x, result: %x\n", expected, results);
+
+    return results == expected;
+}
+
+/**
+ *
+ * @param testno The test to run.
+ * @param verify Verify the result of the test.
+ */
+bool
+StackDumpIteratorTest::RunTest(int testno, bool verify)
+{
+    search::SimpleQueryStack stack;
+    search::RawBuf buf(32768);
+
+    switch (testno) {
+    case 0:
+    {
+        // Simple term query
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar"));
+
+        stack.AppendBuffer(&buf);
+        search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+        if (verify)
+            return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+        break;
+    }
+
+    case 1:
+    {
+        // multi term query
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "xyzzy"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar", "baz"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2));
+
+        stack.AppendBuffer(&buf);
+        search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+        if (verify)
+            return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+        break;
+    }
+
+    case 2:
+    {
+        // all stack items
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_NUMTERM, "foo", "[0;22]"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_PREFIXTERM, "bar", "baz"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "bar"));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_RANK, 5));
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_NOT, 3));
+
+        stack.AppendBuffer(&buf);
+        search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+
+        if (verify)
+            return ShowResult(testno, si, stack, ITERATOR_NOERROR);
+        break;
+    }
+
+    case 3:
+    {
+        // malicous type in buffer
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+        stack.AppendBuffer(&buf);
+        *buf.GetWritableDrainPos(0) = 0x1e;
+        search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+        if (verify)
+            return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE);
+        break;
+    }
+
+    case 4:
+    {
+        // malicous length in buffer
+        stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar"));
+        stack.AppendBuffer(&buf);
+        *buf.GetWritableDrainPos(1) = 0xba;
+        search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen()));
+        if (verify)
+            return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE);
+        break;
+    }
+
+
+    default:
+    {
+        printf("%03d: no such test\n", testno);
+    }
+    }
+
+    return true;
+}
+
+void
+StackDumpIteratorTest::Usage(char *progname)
+{
+    printf("%s {testnospec}+\n\
+    Where testnospec is:\n\
+      num:     single test\n\
+      num-num: inclusive range (open range permitted)\n",progname);
+    printf("There are tests from %d to %d\n\n", 0, NUMTESTS-1);
+}
+
+int
+main(int argc, char** argv)
+{
+    StackDumpIteratorTest tester;
+    return tester.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h
new file mode 100644
index 00000000000..63d41005932
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h
@@ -0,0 +1,17 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#pragma once
+
+#include 
+#include 
+
+class StackDumpIteratorTest : public FastOS_Application
+{
+    int Main();
+    void Usage(char *progname);
+    bool ShowResult(int testNo, search::SimpleQueryStackDumpIterator &actual, search::SimpleQueryStack &correct, unsigned int expected);
+    bool RunTest(int i, bool verify);
+};
+
diff --git a/searchlib/src/tests/stackdumpiterator/testowner.ATS b/searchlib/src/tests/stackdumpiterator/testowner.ATS
new file mode 100644
index 00000000000..6d03b0836a4
--- /dev/null
+++ b/searchlib/src/tests/stackdumpiterator/testowner.ATS
@@ -0,0 +1 @@
+vlarsen
diff --git a/searchlib/src/tests/stringenum/.gitignore b/searchlib/src/tests/stringenum/.gitignore
new file mode 100644
index 00000000000..7a2f1dd659f
--- /dev/null
+++ b/searchlib/src/tests/stringenum/.gitignore
@@ -0,0 +1,8 @@
+*.core
+.depend
+Makefile
+core
+core.*
+stringenum
+tmp.enum
+searchlib_stringenum_test_app
diff --git a/searchlib/src/tests/stringenum/CMakeLists.txt b/searchlib/src/tests/stringenum/CMakeLists.txt
new file mode 100644
index 00000000000..b59e739be47
--- /dev/null
+++ b/searchlib/src/tests/stringenum/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stringenum_test_app
+    SOURCES
+    stringenum_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_stringenum_test_app COMMAND searchlib_stringenum_test_app)
diff --git a/searchlib/src/tests/stringenum/stringenum_test.cpp b/searchlib/src/tests/stringenum/stringenum_test.cpp
new file mode 100644
index 00000000000..8c6ef64dbe3
--- /dev/null
+++ b/searchlib/src/tests/stringenum/stringenum_test.cpp
@@ -0,0 +1,147 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2001-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+
+#include 
+#include 
+LOG_SETUP("stringenum");
+#include 
+#include 
+
+
+#include 
+
+using namespace vespalib;
+
+class MyApp : public vespalib::TestApp
+{
+public:
+    void CheckLookup( search::util::StringEnum *strEnum, const char *str, int value);
+    int Main();
+
+    MyApp(void) {}
+};
+
+
+void
+MyApp::CheckLookup( search::util::StringEnum *strEnum, const char *str, int value)
+{
+    EXPECT_EQUAL(0, strcmp(str, strEnum->Lookup(value)));
+    EXPECT_EQUAL(value, strEnum->Lookup(str));
+}
+
+
+int
+MyApp::Main()
+{
+    TEST_INIT("stringenum_test");
+
+    search::util::StringEnum enum1;
+    search::util::StringEnum enum2;
+
+    // check number of entries
+    EXPECT_EQUAL(enum1.GetNumEntries(), 0u);
+    EXPECT_EQUAL(enum2.GetNumEntries(), 0u);
+
+    // check add non-duplicates
+    EXPECT_EQUAL(enum1.Add("zero"),   0);
+    EXPECT_EQUAL(enum1.Add("one"),    1);
+    EXPECT_EQUAL(enum1.Add("two"),    2);
+    EXPECT_EQUAL(enum1.Add("three"),  3);
+    EXPECT_EQUAL(enum1.Add("four"),   4);
+    EXPECT_EQUAL(enum1.Add("five"),   5);
+    EXPECT_EQUAL(enum1.Add("six"),    6);
+    EXPECT_EQUAL(enum1.Add("seven"),  7);
+    EXPECT_EQUAL(enum1.Add("eight"),  8);
+    EXPECT_EQUAL(enum1.Add("nine"),   9);
+
+    // check add duplicates
+    EXPECT_EQUAL(enum1.Add("four"),   4);
+    EXPECT_EQUAL(enum1.Add("eight"),  8);
+    EXPECT_EQUAL(enum1.Add("six"),    6);
+    EXPECT_EQUAL(enum1.Add("seven"),  7);
+    EXPECT_EQUAL(enum1.Add("one"),    1);
+    EXPECT_EQUAL(enum1.Add("nine"),   9);
+    EXPECT_EQUAL(enum1.Add("five"),   5);
+    EXPECT_EQUAL(enum1.Add("zero"),   0);
+    EXPECT_EQUAL(enum1.Add("two"),    2);
+    EXPECT_EQUAL(enum1.Add("three"),  3);
+
+    // check add non-duplicate
+    EXPECT_EQUAL(enum1.Add("ten"),   10);
+
+    // check mapping and reverse mapping
+    EXPECT_EQUAL(enum1.GetNumEntries(), 11u);
+    TEST_DO(CheckLookup(&enum1, "zero",   0));
+    TEST_DO(CheckLookup(&enum1, "one",    1));
+    TEST_DO(CheckLookup(&enum1, "two",    2));
+    TEST_DO(CheckLookup(&enum1, "three",  3));
+    TEST_DO(CheckLookup(&enum1, "four",   4));
+    TEST_DO(CheckLookup(&enum1, "five",   5));
+    TEST_DO(CheckLookup(&enum1, "six",    6));
+    TEST_DO(CheckLookup(&enum1, "seven",  7));
+    TEST_DO(CheckLookup(&enum1, "eight",  8));
+    TEST_DO(CheckLookup(&enum1, "nine",   9));
+    TEST_DO(CheckLookup(&enum1, "ten",   10));
+
+    TEST_FLUSH();
+
+    // save/load
+    EXPECT_TRUE(enum1.Save("tmp.enum"));
+    EXPECT_TRUE(enum2.Load("tmp.enum"));
+
+    // check mapping and reverse mapping
+    EXPECT_EQUAL(enum2.GetNumEntries(), 11u);
+    TEST_DO(CheckLookup(&enum2, "zero",   0));
+    TEST_DO(CheckLookup(&enum2, "one",    1));
+    TEST_DO(CheckLookup(&enum2, "two",    2));
+    TEST_DO(CheckLookup(&enum2, "three",  3));
+    TEST_DO(CheckLookup(&enum2, "four",   4));
+    TEST_DO(CheckLookup(&enum2, "five",   5));
+    TEST_DO(CheckLookup(&enum2, "six",    6));
+    TEST_DO(CheckLookup(&enum2, "seven",  7));
+    TEST_DO(CheckLookup(&enum2, "eight",  8));
+    TEST_DO(CheckLookup(&enum2, "nine",   9));
+    TEST_DO(CheckLookup(&enum2, "ten",   10));
+
+    // add garbage
+    enum2.Add("sfsdffgdfh");
+    enum2.Add("sf24dfsgg3");
+    enum2.Add("sfwertfgdh");
+    enum2.Add("sfewrgtsfh");
+    enum2.Add("sfgdsdgdfh");
+
+    TEST_FLUSH();
+
+    // reload
+    EXPECT_TRUE(enum2.Load("tmp.enum"));
+
+    // check garbage lost
+    EXPECT_EQUAL(enum2.GetNumEntries(), 11u);
+    EXPECT_EQUAL(-1, enum2.Lookup("sfewrgtsfh"));
+    // check mapping and reverse mapping
+    TEST_DO(CheckLookup(&enum2, "zero",   0));
+    TEST_DO(CheckLookup(&enum2, "one",    1));
+    TEST_DO(CheckLookup(&enum2, "two",    2));
+    TEST_DO(CheckLookup(&enum2, "three",  3));
+    TEST_DO(CheckLookup(&enum2, "four",   4));
+    TEST_DO(CheckLookup(&enum2, "five",   5));
+    TEST_DO(CheckLookup(&enum2, "six",    6));
+    TEST_DO(CheckLookup(&enum2, "seven",  7));
+    TEST_DO(CheckLookup(&enum2, "eight",  8));
+    TEST_DO(CheckLookup(&enum2, "nine",   9));
+    TEST_DO(CheckLookup(&enum2, "ten",   10));
+
+    // clear
+    enum1.Clear();
+    enum2.Clear();
+
+    // check number of entries
+    EXPECT_EQUAL(enum1.GetNumEntries(), 0u);
+    EXPECT_EQUAL(enum2.GetNumEntries(), 0u);
+
+    TEST_DONE();
+}
+
+TEST_APPHOOK(MyApp);
diff --git a/searchlib/src/tests/transactionlog/.gitignore b/searchlib/src/tests/transactionlog/.gitignore
new file mode 100644
index 00000000000..a7bdcf0397d
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/.gitignore
@@ -0,0 +1,7 @@
+.depend
+Makefile
+translogserver_test
+translogclient_test
+
+searchlib_translogclient_test_app
+searchlib_translogserver_test_app
diff --git a/searchlib/src/tests/transactionlog/CMakeLists.txt b/searchlib/src/tests/transactionlog/CMakeLists.txt
new file mode 100644
index 00000000000..545c81ba45f
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_translogserver_test_app
+    SOURCES
+    translogserver_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_translogserver_test_app COMMAND searchlib_translogserver_test_app)
+vespa_add_executable(searchlib_translogclient_test_app
+    SOURCES
+    translogclient_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_translogclient_test_app COMMAND sh translogclient_test.sh)
diff --git a/searchlib/src/tests/transactionlog/DESC b/searchlib/src/tests/transactionlog/DESC
new file mode 100644
index 00000000000..db53d59fb6c
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/DESC
@@ -0,0 +1 @@
+Thios is a test of the tls/tlc interface.
diff --git a/searchlib/src/tests/transactionlog/FILES b/searchlib/src/tests/transactionlog/FILES
new file mode 100644
index 00000000000..babcf181807
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/FILES
@@ -0,0 +1,2 @@
+translogclient.cpp
+translogserver.cpp
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp
new file mode 100644
index 00000000000..775654d23fc
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp
@@ -0,0 +1,926 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+LOG_SETUP("translogclient_test");
+
+using namespace search;
+using namespace transactionlog;
+using namespace document;
+using namespace vespalib;
+using search::index::DummyFileHeaderContext;
+
+vespalib::string myhex(const void * b, size_t sz)
+{
+    static const char * hextab="0123456789ABCDEF";
+    const unsigned char * c = static_cast(b);
+    vespalib::string s;
+    s.reserve(sz*2);
+    for (size_t i=0; i < sz; i++) {
+        s += hextab[c[i] >> 4];
+        s += hextab[c[i] & 0x0f];
+    }
+    return s;
+}
+
+class Test : public vespalib::TestApp
+{
+public:
+    int Main();
+private:
+    bool createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains=0);
+    TransLogClient::Session::UP openDomainTest(TransLogClient & tls, const vespalib::string & name);
+    bool fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name);
+    void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries);
+    void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries, size_t entrySize);
+    uint32_t countFiles(const vespalib::string &dir);
+    void checkFilledDomainTest(const TransLogClient::Session::UP &s1, size_t numEntries);
+    bool visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name);
+    bool subscribeDomainTest(TransLogClient & tls, const vespalib::string & name);
+    bool partialUpdateTest();
+    bool test1();
+    bool testRemove();
+    void createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains);
+    void verifyDomain(const vespalib::string & name);
+    void testCrcVersions();
+    bool test2();
+    void testMany();
+    void testErase();
+    void testSync();
+    void testTruncateOnShortRead();
+    void testTruncateOnVersionMismatch();
+};
+
+TEST_APPHOOK(Test);
+
+class CallBackTest : public TransLogClient::Subscriber::Callback
+{
+private:
+    virtual RPC::Result receive(const Packet & packet);
+    virtual void inSync() { _inSync = true; }
+    virtual void eof()    { _eof = true; }
+    typedef std::map PacketMap;
+    PacketMap _packetMap;
+public:
+    CallBackTest() : _inSync(false), _eof(false) { }
+    size_t size() const { return _packetMap.size(); }
+    bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); }
+    void clear() { _inSync = false; _eof = false; _packetMap.clear(); }
+    const ByteBuffer & packet(SerialNum n) { return (_packetMap.find(n)->second); }
+
+    bool      _inSync;
+    bool      _eof;
+};
+
+RPC::Result CallBackTest::receive(const Packet & p)
+{
+    vespalib::nbostream  h(p.getHandle().c_str(), p.getHandle().size(), true);
+    LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(h.peek(), h.size()).c_str());
+    while(h.size() > 0) {
+        Packet::Entry e;
+        e.deserialize(h);
+        LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(e.data().c_str(), e.data().size()).c_str());
+        _packetMap[e.serial()] = ByteBuffer(e.data().c_str(), e.data().size());
+    }
+    return RPC::OK;
+}
+
+class CallBackManyTest : public TransLogClient::Subscriber::Callback
+{
+private:
+    virtual RPC::Result receive(const Packet & packet);
+    virtual void inSync() { _inSync = true; }
+    virtual void eof()    { _eof = true; }
+public:
+    CallBackManyTest(size_t start) : _inSync(false), _eof(false), _count(start), _value(start) { }
+    void clear() { _inSync = false; _eof = false; _count = 0; _value = 0; }
+    bool      _inSync;
+    bool      _eof;
+    size_t    _count;
+    size_t    _value;
+};
+
+RPC::Result CallBackManyTest::receive(const Packet & p)
+{
+    nbostream h(p.getHandle().c_str(), p.getHandle().size(), true);
+    for(;h.size() > 0; _count++, _value++) {
+        Packet::Entry e;
+        e.deserialize(h);
+        assert(e.data().size() == 8);
+        size_t v = *(const size_t*) e.data().c_str();
+        assert(_count+1 == e.serial());
+        assert(v == _value);
+        (void) v;
+    }
+    return RPC::OK;
+}
+
+class CallBackUpdate : public TransLogClient::Subscriber::Callback
+{
+public:
+    typedef std::map PacketMap;
+private:
+    virtual RPC::Result receive(const Packet & packet);
+    virtual void inSync() { _inSync = true; }
+    virtual void eof()    { _eof = true; }
+    PacketMap _packetMap;
+public:
+    CallBackUpdate() : _inSync(false), _eof(false) { }
+    virtual ~CallBackUpdate()         { while (_packetMap.begin() != _packetMap.end()) { delete _packetMap.begin()->second; _packetMap.erase(_packetMap.begin()); } }
+    bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); }
+    const PacketMap & map() const { return _packetMap; }
+    bool      _inSync;
+    bool      _eof;
+};
+
+
+RPC::Result CallBackUpdate::receive(const Packet & packet)
+{
+    nbostream h(packet.getHandle().c_str(), packet.getHandle().size(), true);
+    while (h.size() > 0) {
+        Packet::Entry e;
+        e.deserialize(h);
+        const vespalib::Identifiable::RuntimeClass * cl(vespalib::Identifiable::classFromId(e.type()));
+        if (cl) {
+            vespalib::Identifiable * obj(cl->create());
+            if (obj->inherits(Identifiable::classId)) {
+                Identifiable * ser = static_cast(obj);
+                nbostream is(e.data().c_str(), e.data().size());
+                try {
+                    is >> *ser;
+                } catch (std::exception & ex) {
+                    LOG(warning, "Failed deserializing (%" PRId64 ", %s) bb(%zu, %zu, %zu)=%s what=%s", e.serial(), cl->name(), is.rp(), is.size(), is.capacity(), myhex(is.peek(), is.size()).c_str(), ex.what());
+                    assert(false);
+                    return RPC::ERROR;
+                }
+                assert(is.state() == nbostream::ok);
+                assert(is.size() == 0);
+                _packetMap[e.serial()] = ser;
+            } else {
+                LOG(warning, "Packet::Entry(%" PRId64 ", %s) is not a Identifiable", e.serial(), cl->name());
+            }
+        } else {
+            LOG(warning, "Packet::Entry(%" PRId64 ", %d) is not recognized by vespalib::Identifiable", e.serial(), e.type());
+        }
+    }
+    return RPC::OK;
+}
+
+class CallBackStatsTest : public TransLogClient::Session::Callback
+{
+private:
+    virtual RPC::Result receive(const Packet & packet);
+    virtual void inSync() { _inSync = true; }
+    virtual void eof()    { _eof = true; }
+public:
+    CallBackStatsTest() : _inSync(false), _eof(false),
+                          _count(0), _inOrder(0),
+                          _firstSerial(0), _lastSerial(0),
+                          _prevSerial(0) { }
+    void clear() { _inSync = false; _eof = false; _count = 0; _inOrder = 0;
+        _firstSerial = 0; _lastSerial = 0; _inOrder = 0; }
+    bool      _inSync;
+    bool      _eof;
+    uint64_t  _count;
+    uint64_t  _inOrder; // increase when next entry is one above previous
+    SerialNum _firstSerial;
+    SerialNum _lastSerial;
+    SerialNum _prevSerial;
+};
+
+RPC::Result CallBackStatsTest::receive(const Packet & p)
+{
+    nbostream h(p.getHandle().c_str(), p.getHandle().size(), true);
+    for(;h.size() > 0; ++_count) {
+        Packet::Entry e;
+        e.deserialize(h);
+        SerialNum s = e.serial();
+        if (_count == 0) {
+            _firstSerial = s;
+            _lastSerial = s;
+        }
+        if (s == _prevSerial + 1) {
+            ++_inOrder;
+        }
+        _prevSerial = s;
+        if (_firstSerial > s) {
+            _firstSerial = s;
+        }
+        if (_lastSerial < s) {
+            _lastSerial = s;
+        }
+    }
+    return RPC::OK;
+}
+
+#define CID_TestIdentifiable 0x5762314
+
+class TestIdentifiable : public Identifiable
+{
+public:
+    DECLARE_IDENTIFIABLE(TestIdentifiable);
+    TestIdentifiable() { }
+};
+
+IMPLEMENT_IDENTIFIABLE(TestIdentifiable, Identifiable);
+
+bool Test::partialUpdateTest()
+{
+    bool retval(false);
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+    TransLogClient tls("tcp/localhost:18377");
+
+    TransLogClient::Session::UP s1 = openDomainTest(tls, "test1");
+    TransLogClient::Session & session = *s1;
+
+    TestIdentifiable du;
+
+    nbostream os;
+    os << du;
+
+    vespalib::ConstBufferRef bb(os.c_str(), os.size());
+    LOG(info, "DU : %s", myhex(bb.c_str(), bb.size()).c_str());
+    Packet::Entry e(7, du.getClass().id(), bb);
+    Packet pa;
+    pa.add(e);
+    pa.close();
+    ASSERT_TRUE(session.commit(vespalib::ConstBufferRef(pa.getHandle().c_str(), pa.getHandle().size())));
+
+    CallBackUpdate ca;
+    TransLogClient::Visitor::UP visitor = tls.createVisitor("test1", ca);
+    ASSERT_TRUE(visitor.get());
+    ASSERT_TRUE( visitor->visit(5, 7) );
+    for (size_t i(0); ! ca._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+    ASSERT_TRUE( ! ca._inSync );
+    ASSERT_TRUE( ca._eof );
+    ASSERT_TRUE( ca.map().size() == 1);
+    ASSERT_TRUE( ca.hasSerial(7) );
+
+    CallBackUpdate ca1;
+    TransLogClient::Visitor::UP visitor1 = tls.createVisitor("test1", ca1);
+    ASSERT_TRUE(visitor1.get());
+    ASSERT_TRUE( visitor1->visit(4, 5) );
+    for (size_t i(0); ! ca1._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+    ASSERT_TRUE( ! ca1._inSync );
+    ASSERT_TRUE( ca1._eof );
+    ASSERT_TRUE( ca1.map().size() == 0);
+
+    CallBackUpdate ca2;
+    TransLogClient::Visitor::UP visitor2 = tls.createVisitor("test1", ca2);
+    ASSERT_TRUE(visitor2.get());
+    ASSERT_TRUE( visitor2->visit(5, 6) );
+    for (size_t i(0); ! ca2._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+    ASSERT_TRUE( ! ca2._inSync );
+    ASSERT_TRUE( ca2._eof );
+    ASSERT_TRUE( ca2.map().size() == 0);
+
+    CallBackUpdate ca3;
+    TransLogClient::Visitor::UP visitor3 = tls.createVisitor("test1", ca3);
+    ASSERT_TRUE(visitor3.get());
+    ASSERT_TRUE( visitor3->visit(5, 1000) );
+    for (size_t i(0); ! ca3._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); }
+    ASSERT_TRUE( ! ca3._inSync );
+    ASSERT_TRUE( ca3._eof );
+    ASSERT_TRUE( ca3.map().size() == 1);
+    ASSERT_TRUE( ca3.hasSerial(7) );
+
+    return retval;
+}
+
+bool Test::createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains)
+{
+    bool retval(true);
+    std::vector dir;
+    tls.listDomains(dir);
+    EXPECT_EQUAL (dir.size(), preExistingDomains);
+    TransLogClient::Session::UP s1 = tls.open(name);
+    ASSERT_TRUE (s1.get() == NULL);
+    retval = tls.create(name);
+    ASSERT_TRUE (retval);
+    dir.clear();
+    tls.listDomains(dir);
+    EXPECT_EQUAL (dir.size(), preExistingDomains+1);
+//    ASSERT_TRUE (dir[0] == name);
+    return retval;
+}
+
+TransLogClient::Session::UP Test::openDomainTest(TransLogClient & tls, const vespalib::string & name)
+{
+    TransLogClient::Session::UP s1 = tls.open(name);
+    ASSERT_TRUE (s1.get() != NULL);
+    return s1;
+}
+
+bool Test::fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name)
+{
+    bool retval(true);
+    Packet::Entry e1(1, 1, vespalib::ConstBufferRef("Content in buffer A", 20));
+    Packet::Entry e2(2, 2, vespalib::ConstBufferRef("Content in buffer B", 20));
+    Packet::Entry e3(3, 1, vespalib::ConstBufferRef("Content in buffer C", 20));
+
+    Packet a;
+    ASSERT_TRUE (a.add(e1));
+    Packet b;
+    ASSERT_TRUE (b.add(e2));
+    ASSERT_TRUE (b.add(e3));
+    ASSERT_TRUE (!b.add(e1));
+    a.close();
+    b.close();
+    ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size())));
+    ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(b.getHandle().c_str(), b.getHandle().size())));
+    try {
+        s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size()));
+        ASSERT_TRUE(false);
+    } catch (const std::exception & e) {
+        EXPECT_EQUAL(vespalib::string("commit failed with code -2. server says: Exception during commit on " + name + " : Incomming serial number(1) must be bigger than the last one (3)."), e.what());
+    }
+    EXPECT_EQUAL(a.size(), 1u);
+    EXPECT_EQUAL(a.range().from(), 1u);
+    EXPECT_EQUAL(a.range().to(), 1u);
+    EXPECT_EQUAL(b.size(), 2u);
+    EXPECT_EQUAL(b.range().from(), 2u);
+    EXPECT_EQUAL(b.range().to(), 3u);
+    EXPECT_TRUE(a.merge(b));
+    EXPECT_EQUAL(a.size(), 3u);
+    EXPECT_EQUAL(a.range().from(), 1u);
+    EXPECT_EQUAL(a.range().to(), 3u);
+
+    Packet::Entry e;
+    vespalib::nbostream h(a.getHandle().c_str(), a.getHandle().size());
+    e.deserialize(h);
+    e.deserialize(h);
+    e.deserialize(h);
+    EXPECT_EQUAL(h.size(), 0u);
+
+    return retval;
+}
+
+void Test::fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries)
+{
+    size_t value(0);
+    for(size_t i=0; i < numPackets; i++) {
+        std::unique_ptr p(new Packet());
+        for(size_t j=0; j < numEntries; j++, value++) {
+            Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&value, sizeof(value)));
+            if ( ! p->add(e) ) {
+                p->close();
+                ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+                p.reset(new Packet());
+                ASSERT_TRUE(p->add(e));
+            }
+        }
+        p->close();
+        ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+    }
+}
+
+
+void
+Test::fillDomainTest(TransLogClient::Session * s1,
+                     size_t numPackets, size_t numEntries,
+                     size_t entrySize)
+{
+    size_t value(0);
+    std::vector entryBuffer(entrySize); 
+    for(size_t i=0; i < numPackets; i++) {
+        std::unique_ptr p(new Packet());
+        for(size_t j=0; j < numEntries; j++, value++) {
+            Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&entryBuffer[0], entryBuffer.size()));
+            if ( ! p->add(e) ) {
+                p->close();
+                ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+                p.reset(new Packet());
+                ASSERT_TRUE(p->add(e));
+            }
+        }
+        p->close();
+        ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size())));
+    }
+}
+
+
+uint32_t
+Test::countFiles(const vespalib::string &dir)
+{
+    uint32_t res = 0;
+    FastOS_DirectoryScan dirScan(dir.c_str());
+    while (dirScan.ReadNext()) {
+        const char *ename = dirScan.GetName();
+        if (strcmp(ename, ".") == 0 ||
+            strcmp(ename, "..") == 0)
+            continue;
+        ++res;
+    }
+    return res;
+}
+
+
+void
+Test::checkFilledDomainTest(const TransLogClient::Session::UP &s1,
+                            size_t numEntries)
+{
+    SerialNum b(0), e(0);
+    size_t c(0);
+    EXPECT_TRUE(s1->status(b, e, c));
+    EXPECT_EQUAL(b, 1u);
+    EXPECT_EQUAL(e, numEntries);
+    EXPECT_EQUAL(c, numEntries);
+}
+
+
+bool Test::visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name)
+{
+    bool retval(true);
+
+    SerialNum b(0), e(0);
+    size_t c(0);
+    EXPECT_TRUE(s1->status(b, e, c));
+    EXPECT_EQUAL(b, 1u);
+    EXPECT_EQUAL(e, 3u);
+    EXPECT_EQUAL(c, 3u);
+
+    CallBackTest ca;
+    TransLogClient::Visitor::UP visitor = tls.createVisitor(name, ca);
+    ASSERT_TRUE(visitor.get());
+    EXPECT_TRUE( visitor->visit(0, 1) );
+    for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+    EXPECT_TRUE( ! ca._inSync );
+    EXPECT_TRUE( ca._eof );
+    EXPECT_TRUE( ! ca.hasSerial(0) );
+    EXPECT_TRUE( ca.hasSerial(1) );
+    EXPECT_TRUE( ! ca.hasSerial(2) );
+    ca.clear();
+
+    visitor = tls.createVisitor(name, ca);
+    ASSERT_TRUE(visitor.get());
+    EXPECT_TRUE( visitor->visit(1, 2) );
+    for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+    EXPECT_TRUE( ! ca._inSync );
+    EXPECT_TRUE( ca._eof );
+    EXPECT_TRUE( ! ca.hasSerial(0) );
+    EXPECT_TRUE( ! ca.hasSerial(1) );
+    EXPECT_TRUE( ca.hasSerial(2) );
+    EXPECT_TRUE( ! ca.hasSerial(3) );
+    ca.clear();
+
+    visitor = tls.createVisitor(name, ca);
+    EXPECT_TRUE(visitor.get());
+    EXPECT_TRUE( visitor->visit(0, 3) );
+    for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+    EXPECT_TRUE( ! ca._inSync );
+    EXPECT_TRUE( ca._eof );
+    EXPECT_TRUE( ! ca.hasSerial(0) );
+    EXPECT_TRUE( ca.hasSerial(1) );
+    EXPECT_TRUE( ca.hasSerial(2) );
+    EXPECT_TRUE( ca.hasSerial(3) );
+    ca.clear();
+
+    visitor = tls.createVisitor(name, ca);
+    ASSERT_TRUE(visitor.get());
+    EXPECT_TRUE( visitor->visit(2, 3) );
+    for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+    EXPECT_TRUE( ! ca._inSync );
+    EXPECT_TRUE( ca._eof );
+    EXPECT_TRUE( ! ca.hasSerial(0) );
+    EXPECT_TRUE( !ca.hasSerial(1) );
+    EXPECT_TRUE( !ca.hasSerial(2) );
+    EXPECT_TRUE( ca.hasSerial(3) );
+    ca.clear();
+
+    return retval;
+}
+
+bool Test::subscribeDomainTest(TransLogClient & tls, const vespalib::string & name)
+{
+    bool retval(true);
+    CallBackTest ca;
+    TransLogClient::Subscriber::UP subscriber = tls.createSubscriber(name, ca);
+    ASSERT_TRUE(subscriber.get());
+    ASSERT_TRUE( subscriber->subscribe(0) );
+    for (size_t i(0); ! ca._inSync && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+    ASSERT_TRUE( ca._inSync );
+    ASSERT_TRUE( ! ca.hasSerial(0) );
+    ASSERT_TRUE( ! ca._eof );
+    ASSERT_TRUE( ca.hasSerial(1) );
+    ASSERT_TRUE( ca.hasSerial(2) );
+    ASSERT_TRUE( ca.hasSerial(3) );
+    return retval;
+}
+
+bool Test::test1()
+{
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+    TransLogClient tls("tcp/localhost:18377");
+
+    vespalib::string name("test1");
+    createDomainTest(tls, name);
+    TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+    fillDomainTest(s1.get(), name);
+    visitDomainTest(tls, s1.get(), name);
+    subscribeDomainTest(tls, name);
+    return true;
+}
+
+void Test::createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains)
+{
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000, false, 4, crcMethod);
+    TransLogClient tls("tcp/localhost:18377");
+
+    createDomainTest(tls, name, preExistingDomains);
+    TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+    fillDomainTest(s1.get(), name);
+}
+
+void Test::verifyDomain(const vespalib::string & name)
+{
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000);
+    TransLogClient tls("tcp/localhost:18377");
+    TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+    visitDomainTest(tls, s1.get(), name);
+}
+
+void Test::testCrcVersions()
+{
+    createAndFillDomain("ccitt_crc32", DomainPart::ccitt_crc32, 0);
+    createAndFillDomain("xxh64", DomainPart::xxh64, 1);
+
+    verifyDomain("ccitt_crc32");
+    verifyDomain("xxh64");
+}
+
+bool Test::testRemove()
+{
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("testremove", 18377, ".", fileHeaderContext, 0x10000);
+    TransLogClient tls("tcp/localhost:18377");
+
+    vespalib::string name("test-delete");
+    createDomainTest(tls, name);
+    TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+    fillDomainTest(s1.get(), name);
+    visitDomainTest(tls, s1.get(), name);
+    subscribeDomainTest(tls, name);
+    ASSERT_TRUE(tls.remove(name));
+
+    return true;
+}
+
+bool Test::test2()
+{
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000);
+    TransLogClient tls("tcp/localhost:18377");
+
+    vespalib::string name("test1");
+    TransLogClient::Session::UP s1 = openDomainTest(tls, name);
+    visitDomainTest(tls, s1.get(), name);
+    subscribeDomainTest(tls, name);
+    return true;
+}
+
+namespace {
+
+void
+assertVisitStats(TransLogClient &tls, const vespalib::string &domain,
+                 SerialNum visitStart, SerialNum visitEnd,
+                 SerialNum expFirstSerial, SerialNum expLastSerial,
+                 uint64_t expCount, uint64_t expInOrder)
+{
+    CallBackStatsTest ca;
+    TransLogClient::Visitor::UP visitor = tls.createVisitor(domain, ca);
+    ASSERT_TRUE(visitor.get());
+    ASSERT_TRUE( visitor->visit(visitStart, visitEnd) );
+    for (size_t i(0); ! ca._eof && (i < 60000); i++ ) {
+        FastOS_Thread::Sleep(10);
+    }
+    ASSERT_TRUE(!ca._inSync);
+    ASSERT_TRUE(ca._eof);
+    EXPECT_EQUAL(expFirstSerial, ca._firstSerial);
+    EXPECT_EQUAL(expLastSerial, ca._lastSerial);
+    EXPECT_EQUAL(expCount, ca._count);
+    EXPECT_EQUAL(expInOrder, ca._inOrder);
+}
+
+void
+assertStatus(TransLogClient::Session &s,
+             SerialNum expFirstSerial, SerialNum expLastSerial,
+             uint64_t expCount)
+{
+    SerialNum b(0), e(0);
+    size_t c(0);
+    EXPECT_TRUE(s.status(b, e, c));
+    EXPECT_EQUAL(expFirstSerial, b);
+    EXPECT_EQUAL(expLastSerial, e);
+    EXPECT_EQUAL(expCount, c);
+}
+
+}
+
+
+void Test::testMany()
+{
+    const unsigned int NUM_PACKETS = 1000;
+    const unsigned int NUM_ENTRIES = 100;
+    const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+    {
+        DummyFileHeaderContext fileHeaderContext;
+        TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x80000);
+        TransLogClient tls("tcp/localhost:18377");
+
+        createDomainTest(tls, "many", 0);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "many");
+        fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+        SerialNum b(0), e(0);
+        size_t c(0);
+        EXPECT_TRUE(s1->status(b, e, c));
+        EXPECT_EQUAL(b, 1u);
+        EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES);
+        EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES);
+        CallBackManyTest ca(2);
+        TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca);
+        ASSERT_TRUE(visitor.get());
+        ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) );
+        for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+        ASSERT_TRUE( ! ca._inSync );
+        ASSERT_TRUE( ca._eof );
+        EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES);
+        EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES);
+    }
+    {
+        DummyFileHeaderContext fileHeaderContext;
+        TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x1000000);
+        TransLogClient tls("tcp/localhost:18377");
+
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "many");
+        SerialNum b(0), e(0);
+        size_t c(0);
+        EXPECT_TRUE(s1->status(b, e, c));
+        EXPECT_EQUAL(b, 1u);
+        EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES);
+        EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES);
+        CallBackManyTest ca(2);
+        TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca);
+        ASSERT_TRUE(visitor.get());
+        ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) );
+        for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); }
+        ASSERT_TRUE( ! ca._inSync );
+        ASSERT_TRUE( ca._eof );
+        EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES);
+        EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES);
+    }
+}
+
+void Test::testErase()
+{
+    const unsigned int NUM_PACKETS = 1000;
+    const unsigned int NUM_ENTRIES = 100;
+    const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+    {
+        DummyFileHeaderContext fileHeaderContext;
+        TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x80000);
+        TransLogClient tls("tcp/localhost:18377");
+
+        createDomainTest(tls, "erase", 0);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "erase");
+        fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+    }
+    {
+        DummyFileHeaderContext fileHeaderContext;
+        TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x1000000);
+        TransLogClient tls("tcp/localhost:18377");
+
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "erase");
+
+        // Before erase
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 3, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES -2, TOTAL_NUM_ENTRIES - 3));
+        DomainStats domainStats = tlss.getDomainStats();
+        DomainInfo domainInfo = domainStats["erase"];
+        size_t numParts = domainInfo.parts.size();
+        LOG(info, "%zu parts", numParts);
+        for (uint32_t partId = 0; partId < numParts; ++partId) {
+            const PartInfo &part = domainInfo.parts[partId];
+            LOG(info,
+                "part %u from %" PRIu64 " to %" PRIu64 ", "
+                "count %zu, numBytes %zu",
+                partId,
+                (uint64_t) part.range.from(), (uint64_t) part.range.to(),
+                part.count, part.byteSize);
+        }
+        ASSERT_LESS_EQUAL(2u, numParts);
+        // Erase everything before second to last domainpart file
+        SerialNum eraseSerial = domainInfo.parts[numParts - 2].range.from();
+        s1->erase(eraseSerial);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 2].count +
+                             domainInfo.parts[numParts - 1].count));
+        // No apparent effect of erasing just first entry in 2nd to last part
+        s1->erase(eraseSerial + 1);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 2].count +
+                             domainInfo.parts[numParts - 1].count));
+        // No apparent effect of erasing almost all of 2nd to last part
+        SerialNum eraseSerial2 = domainInfo.parts[numParts - 2].range.to();
+        s1->erase(eraseSerial2);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 2].count +
+                             domainInfo.parts[numParts - 1].count));
+        // Erase everything before last domainpart file
+        eraseSerial = domainInfo.parts[numParts - 1].range.from();
+        s1->erase(eraseSerial);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 1].count));
+        // No apparent effect of erasing just first entry in last part
+        s1->erase(eraseSerial + 1);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 1].count));
+        // No apparent effect of erasing almost all of last part
+        eraseSerial2 = domainInfo.parts[numParts - 1].range.to();
+        s1->erase(eraseSerial2);
+        TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES,
+                                 eraseSerial, TOTAL_NUM_ENTRIES,
+                                 TOTAL_NUM_ENTRIES + 1 - eraseSerial,
+                                 TOTAL_NUM_ENTRIES - eraseSerial));
+        TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES,
+                             domainInfo.parts[numParts - 1].count));
+    }
+}
+
+
+void
+Test::testSync()
+{
+    const unsigned int NUM_PACKETS = 3;
+    const unsigned int NUM_ENTRIES = 4;
+    const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tlss("test9", 18377, ".", fileHeaderContext, 0x1000000);
+    TransLogClient tls("tcp/localhost:18377");
+
+    createDomainTest(tls, "sync", 0);
+    TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+    fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+
+    SerialNum syncedTo(0);
+
+    EXPECT_TRUE(s1->sync(2, syncedTo));
+    EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+}
+
+
+void
+Test::testTruncateOnVersionMismatch()
+{
+    const unsigned int NUM_PACKETS = 3;
+    const unsigned int NUM_ENTRIES = 4;
+    const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+
+    uint64_t fromOld(0), toOld(0);
+    size_t countOld(0);
+    DummyFileHeaderContext fileHeaderContext;
+    {
+        TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000);
+        TransLogClient tls("tcp/localhost:18377");
+
+        createDomainTest(tls, "sync", 0);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+        fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES);
+        EXPECT_TRUE(s1->status(fromOld, toOld, countOld));
+        SerialNum syncedTo(0);
+
+        EXPECT_TRUE(s1->sync(2, syncedTo));
+        EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+    }
+    FastOS_File f("test11/sync/sync-0000000000000000");
+    EXPECT_TRUE(f.OpenWriteOnlyExisting());
+    EXPECT_TRUE(f.SetPosition(f.GetSize()));
+   
+    char tmp[100];
+    memset(tmp, 0, sizeof(tmp));
+    EXPECT_EQUAL(static_cast(sizeof(tmp)), f.Write2(tmp, sizeof(tmp)));
+    EXPECT_TRUE(f.Close());
+    {
+        TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000);
+        TransLogClient tls("tcp/localhost:18377");
+        TransLogClient::Session::UP s1 = openDomainTest(tls, "sync");
+        uint64_t from(0), to(0);
+        size_t count(0);
+        EXPECT_TRUE(s1->status(from, to, count));
+        ASSERT_EQUAL(fromOld, from);
+        ASSERT_EQUAL(toOld, to);
+        ASSERT_EQUAL(countOld, count);
+    }
+}
+
+void
+Test::testTruncateOnShortRead()
+{
+    const unsigned int NUM_PACKETS = 17;
+    const unsigned int NUM_ENTRIES = 1;
+    const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES;
+    const unsigned int ENTRYSIZE = 4080;
+    vespalib::string topdir("test10");
+    vespalib::string domain("truncate");
+    vespalib::string dir(topdir + "/" + domain);
+    vespalib::string tlsspec("tcp/localhost:18377");
+
+    DummyFileHeaderContext fileHeaderContext;
+    {
+        TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+        TransLogClient tls(tlsspec);
+        
+        createDomainTest(tls, domain, 0);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+        fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES, ENTRYSIZE);
+        
+        SerialNum syncedTo(0);
+        
+        EXPECT_TRUE(s1->sync(TOTAL_NUM_ENTRIES, syncedTo));
+        EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES);
+    }
+    {
+        EXPECT_EQUAL(2u, countFiles(dir));
+    }
+    {
+        TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+        TransLogClient tls(tlsspec);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+        checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES);
+    }
+    {
+        EXPECT_EQUAL(2u, countFiles(dir));
+    }
+    {
+        vespalib::string filename(dir + "/truncate-0000000000000017");
+        FastOS_File trfile(filename.c_str());
+        EXPECT_TRUE(trfile.OpenReadWrite(NULL));
+        trfile.SetSize(trfile.getSize() - 1);
+        trfile.Close();
+    }
+    {
+        TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000);
+        TransLogClient tls(tlsspec);
+        TransLogClient::Session::UP s1 = openDomainTest(tls, domain);
+        checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES - 1);
+    }
+    {
+        EXPECT_EQUAL(2u, countFiles(dir));
+    }
+}
+
+
+int Test::Main()
+{
+    TEST_INIT("translogclient_test");
+
+    if (_argc > 0) {
+        DummyFileHeaderContext::setCreator(_argv[0]);
+    }
+    test1();
+    test2();
+    testMany();
+    testErase();
+    partialUpdateTest();
+
+    testRemove();
+    
+    testSync();
+
+    testTruncateOnShortRead();
+    testTruncateOnVersionMismatch();
+
+    testCrcVersions();
+    
+    TEST_DONE();
+}
diff --git a/searchlib/src/tests/transactionlog/translogclient_test.sh b/searchlib/src/tests/transactionlog/translogclient_test.sh
new file mode 100755
index 00000000000..bf7ddab1fcf
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogclient_test.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
+$VALGRIND ./searchlib_translogclient_test_app
+rm -rf test7 test8 test9 test10 test11 test12 test13 testremove
diff --git a/searchlib/src/tests/transactionlog/translogserver_test.cpp b/searchlib/src/tests/transactionlog/translogserver_test.cpp
new file mode 100644
index 00000000000..fa03e4f5291
--- /dev/null
+++ b/searchlib/src/tests/transactionlog/translogserver_test.cpp
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+
+using search::index::DummyFileHeaderContext;
+using search::transactionlog::TransLogServer;
+
+
+int main(int argc, char *argv[])
+{
+    if ((argc > 1) && (argv[0] != NULL)) {
+        DummyFileHeaderContext::setCreator(argv[0]);
+    }
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tls("test7", 18377, ".", fileHeaderContext, 0x10000);
+    sleep(60);
+    return 0;
+}
diff --git a/searchlib/src/tests/transactionlogstress/.gitignore b/searchlib/src/tests/transactionlogstress/.gitignore
new file mode 100644
index 00000000000..5913613b455
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+server
+translogstress
diff --git a/searchlib/src/tests/transactionlogstress/CMakeLists.txt b/searchlib/src/tests/transactionlogstress/CMakeLists.txt
new file mode 100644
index 00000000000..c91b0e34b40
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_translogstress_app
+    SOURCES
+    translogstress.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_translogstress_app COMMAND searchlib_translogstress_app BENCHMARK)
diff --git a/searchlib/src/tests/transactionlogstress/DESC b/searchlib/src/tests/transactionlogstress/DESC
new file mode 100644
index 00000000000..47dac6a9342
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/DESC
@@ -0,0 +1 @@
+This is a stress test of the transaction log server.
diff --git a/searchlib/src/tests/transactionlogstress/FILES b/searchlib/src/tests/transactionlogstress/FILES
new file mode 100644
index 00000000000..68cc8402652
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/FILES
@@ -0,0 +1 @@
+translogstress.cpp
diff --git a/searchlib/src/tests/transactionlogstress/translogstress.cpp b/searchlib/src/tests/transactionlogstress/translogstress.cpp
new file mode 100644
index 00000000000..1c51c81e633
--- /dev/null
+++ b/searchlib/src/tests/transactionlogstress/translogstress.cpp
@@ -0,0 +1,875 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOG_SETUP("translogstress");
+
+using document::ByteBuffer;
+using search::Runnable;
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using std::shared_ptr;
+using vespalib::make_string;
+using vespalib::ConstBufferRef;
+using search::index::DummyFileHeaderContext;
+
+namespace search {
+namespace transactionlog {
+
+using ClientSession = TransLogClient::Session;
+using Subscriber = TransLogClient::Subscriber;
+using Visitor = TransLogClient::Visitor;
+
+//-----------------------------------------------------------------------------
+// BufferGenerator
+//-----------------------------------------------------------------------------
+class BufferGenerator
+{
+private:
+    Rand48 _rnd;
+    uint32_t _minStrLen;
+    uint32_t _maxStrLen;
+
+public:
+    BufferGenerator() :
+        _rnd(), _minStrLen(0), _maxStrLen(0) {}
+    BufferGenerator(uint32_t minStrLen, uint32_t maxStrLen) :
+        _rnd(), _minStrLen(minStrLen), _maxStrLen(maxStrLen) {}
+    void setSeed(long seed) { _rnd.srand48(seed); }
+    ByteBuffer getRandomBuffer();
+};
+
+ByteBuffer
+BufferGenerator::getRandomBuffer()
+{
+    size_t len = _minStrLen + _rnd.lrand48() % (_maxStrLen - _minStrLen);
+    std::string str;
+    for (size_t i = 0; i < len; ++i) {
+        char c = 'a' + _rnd.lrand48() % ('z' - 'a' + 1);
+        str.push_back(c);
+    }
+    ByteBuffer buf(str.size() + 1);
+    buf.putBytes(str.c_str(), str.size() + 1);
+    buf.flip();
+    return buf;
+}
+
+
+//-----------------------------------------------------------------------------
+// EntryGenerator
+//-----------------------------------------------------------------------------
+class EntryGenerator
+{
+private:
+    Rand48 _rnd;
+    long _baseSeed;
+    BufferGenerator _bufferGenerator;
+    const std::vector * _buffers;
+    ByteBuffer _lastGeneratedBuffer;
+
+public:
+    EntryGenerator(long baseSeed, const BufferGenerator & bufferGenerator) :
+        _rnd(), _baseSeed(baseSeed), _bufferGenerator(bufferGenerator), _buffers(NULL),
+        _lastGeneratedBuffer() {}
+    EntryGenerator(const EntryGenerator & rhs) :
+        _rnd(), _baseSeed(rhs._baseSeed), _bufferGenerator(rhs._bufferGenerator),
+        _buffers(rhs._buffers), _lastGeneratedBuffer(rhs._lastGeneratedBuffer) {}
+    EntryGenerator & operator=(const EntryGenerator & rhs) {
+        _rnd = rhs._rnd;
+        _baseSeed = rhs._baseSeed;
+        _bufferGenerator = rhs._bufferGenerator;
+        _buffers = rhs._buffers;
+        return *this;
+    };
+    SerialNum getRandomSerialNum(SerialNum begin, SerialNum end);
+    Packet::Entry getRandomEntry(SerialNum num);
+    Rand48 & getRnd() { return _rnd; }
+    void setBuffers(const std::vector & buffers) {
+        _buffers = &buffers;
+    }
+};
+
+SerialNum
+EntryGenerator::getRandomSerialNum(SerialNum begin, SerialNum end)
+{
+    // return random number in range [begin, end]
+    assert(begin <= end);
+    if (begin == end) {
+        return SerialNum(begin);
+    } else {
+        return SerialNum(begin + _rnd.lrand48() % (end - begin + 1));
+    }
+}
+
+Packet::Entry
+EntryGenerator::getRandomEntry(SerialNum num)
+{
+    _rnd.srand48(_baseSeed + num);
+    if (_buffers != NULL) {
+        size_t i = _rnd.lrand48() % _buffers->size();
+        const ByteBuffer& buffer = (*_buffers)[i];
+        return Packet::Entry(num, 1024, ConstBufferRef(buffer.getBuffer(), buffer.getLength()));
+    } else {
+        _bufferGenerator.setSeed(_baseSeed + num);
+        _lastGeneratedBuffer = _bufferGenerator.getRandomBuffer();
+        return Packet::Entry(num, 1024, ConstBufferRef(_lastGeneratedBuffer.getBuffer(), _lastGeneratedBuffer.getLength()));
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+// EntryComparator
+//-----------------------------------------------------------------------------
+class EntryComparator
+{
+public:
+    static bool cmp(const Packet::Entry & lhs, const Packet::Entry & rhs) {
+        if (lhs.serial() != rhs.serial()) {
+            return false;
+        }
+        if (lhs.type() != rhs.type()) {
+            return false;
+        }
+        if (lhs.data().size() != rhs.data().size()) {
+            return false;
+        }
+        if (memcmp(lhs.data().c_str(), rhs.data().c_str(), lhs.data().size()) != 0) {
+            return false;
+        }
+        return true;
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+// EntryPrinter
+//-----------------------------------------------------------------------------
+class EntryPrinter
+{
+public:
+    static std::string toStr(const Packet::Entry & e) {
+        std::stringstream ss;
+        ss << "Entry(serial(" << e.serial() << "), type(" << e.type() << "), bufferSize(" <<
+            e.data().size() << "), buffer(";
+        for (size_t i = 0; i < e.data().size() - 1; ++i) {
+            ss << e.data().c_str()[i];
+        }
+        ss << ")";
+        return ss.str();
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+// PacketPrinter
+//-----------------------------------------------------------------------------
+class PacketPrinter
+{
+public:
+    static std::string toStr(const Packet & p) {
+        std::stringstream ss;
+        ss << "Packet(entries(" << p.size() << "), range([" << p.range().from() << ", " << p.range().to()
+            << "]), bytes(" << p.getHandle().size() << "))";
+        return ss.str();
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+// FeederThread
+//-----------------------------------------------------------------------------
+class FeederThread : public Runnable
+{
+private:
+    std::string _tlsSpec;
+    std::string _domain;
+    TransLogClient _client;
+    std::unique_ptr _session;
+    EntryGenerator _generator;
+    uint32_t _feedRate;
+    Packet _packet;
+    SerialNum _current;
+    SerialNum _lastCommited;
+    FastOS_Time _timer;
+
+    void commitPacket();
+    bool addEntry(const Packet::Entry & e);
+
+public:
+    FeederThread(const std::string & tlsSpec, const std::string & domain,
+                 const EntryGenerator & generator, uint32_t feedRate, size_t packetSize) :
+        _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec), _session(),
+        _generator(generator), _feedRate(feedRate), _packet(packetSize), _current(1), _lastCommited(1), _timer() {}
+    virtual void doRun();
+    SerialNumRange getRange() const { return SerialNumRange(1, _lastCommited); }
+};
+
+void
+FeederThread::commitPacket()
+{
+    _packet.close();
+    const vespalib::nbostream& stream = _packet.getHandle();
+    if (!_session->commit(ConstBufferRef(stream.c_str(), stream.size()))) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("FeederThread: Failed commiting %s", PacketPrinter::toStr(_packet).c_str()));
+    } else {
+        LOG(info, "FeederThread: commited %s", PacketPrinter::toStr(_packet).c_str());
+    }
+    _packet.clear();
+    _lastCommited = _current - 1;
+}
+
+bool
+FeederThread::addEntry(const Packet::Entry & e)
+{
+    //LOG(info, "FeederThread: add %s", EntryPrinter::toStr(e).c_str());
+    return _packet.add(e);
+}
+
+void
+FeederThread::doRun()
+{
+    _session = _client.open(_domain);
+    if (_session.get() == NULL) {
+        throw std::runtime_error(vespalib::make_string("FeederThread: Could not open session to %s", _tlsSpec.c_str()));
+    }
+
+    while (!_done) {
+        if (_feedRate != 0) {
+            _timer.SetNow();
+            for (uint32_t i = 0; i < _feedRate; ++i) {
+                Packet::Entry entry = _generator.getRandomEntry(_current++);
+                if (!addEntry(entry)) {
+                    commitPacket();
+                    if (!addEntry(entry)) {
+                        throw std::runtime_error(vespalib::make_string
+                                                 ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str()));
+                    }
+                }
+            }
+            commitPacket();
+
+            uint64_t milliSecsUsed = static_cast(_timer.MilliSecsToNow());
+            if (milliSecsUsed < 1000) {
+                //LOG(info, "FeederThread: sleep %u ms", 1000 - milliSecsUsed);
+                FastOS_Thread::Sleep(1000 - milliSecsUsed);
+            } else {
+                LOG(info, "FeederThread: max throughput");
+            }
+        } else {
+            Packet::Entry entry = _generator.getRandomEntry(_current++);
+            if (!addEntry(entry)) {
+                commitPacket();
+                if (!addEntry(entry)) {
+                    throw std::runtime_error(vespalib::make_string
+                                             ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str()));
+                }
+            }
+        }
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+// Agent
+//-----------------------------------------------------------------------------
+class Agent : public ClientSession::Callback
+{
+protected:
+    std::string _tlsSpec;
+    std::string _domain;
+    TransLogClient _client;
+    EntryGenerator _generator;
+    std::string _name;
+    uint32_t _id;
+    bool _validate;
+
+public:
+    Agent(const std::string & tlsSpec, const std::string & domain,
+          const EntryGenerator & generator, const std::string & name, uint32_t id, bool validate) :
+        ClientSession::Callback(),
+        _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec),
+        _generator(generator), _name(name), _id(id), _validate(validate) {}
+    virtual ~Agent() {}
+    virtual RPC::Result receive(const Packet & packet) = 0;
+    virtual void inSync() {}
+    virtual void eof() {}
+    virtual void failed() {}
+};
+
+
+//-----------------------------------------------------------------------------
+// SubscriberAgent
+//-----------------------------------------------------------------------------
+class SubscriberAgent : public Agent
+{
+private:
+    std::unique_ptr _subscriber;
+    SerialNum _from;
+    SerialNum _next;
+    Monitor _monitor;
+
+    SerialNum getNext() {
+        MonitorGuard guard(_monitor);
+        return _next++;
+    }
+
+public:
+    SubscriberAgent(const std::string & tlsSpec, const std::string & domain,
+                    const EntryGenerator & generator, SerialNum from, uint32_t id, bool validate) :
+        Agent(tlsSpec, domain, generator, "SubscriberAgent", id, validate),
+        _subscriber(), _from(from), _next(from + 1) {}
+    virtual ~SubscriberAgent() {}
+    void start();
+    void stop();
+    SerialNum getExpectedNext() const {
+        MonitorGuard guard(_monitor);
+        return _next;
+    }
+    SerialNumRange getRange() const { return SerialNumRange(_from, _next - 1); }
+    virtual RPC::Result receive(const Packet & packet);
+};
+
+void
+SubscriberAgent::start()
+{
+    _subscriber = _client.createSubscriber(_domain, *this);
+    if (_subscriber.get() == NULL) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("SubscriberAgent[%u]: Could not open subscriber to %s", _id, _tlsSpec.c_str()));
+    }
+    if (!_subscriber->subscribe(_from)) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("SubscriberAgent[%u]: Could not subscribe to %s from serialnumber %" PRIu64,
+                                  _id, _tlsSpec.c_str(), _from));
+    }
+}
+
+void
+SubscriberAgent::stop()
+{
+    _subscriber.reset();
+}
+
+RPC::Result
+SubscriberAgent::receive(const Packet & packet)
+{
+    auto handle = packet.getHandle();
+    while (handle.size() > 0) {
+        Packet::Entry entry;
+        entry.deserialize(handle);
+        Packet::Entry expected = _generator.getRandomEntry(getNext());
+        if (_validate) {
+            if (!EntryComparator::cmp(entry, expected)) {
+                throw std::runtime_error(vespalib::make_string
+                                         ("SubscriberAgent[%u]: Got %s, expected %s", _id,
+                                          EntryPrinter::toStr(entry).c_str(),
+                                          EntryPrinter::toStr(expected).c_str()));
+            }
+        }
+    }
+    LOG(info, "SubscriberAgent[%u]: received %s", _id, PacketPrinter::toStr(packet).c_str());
+
+    return RPC::OK;
+}
+
+
+//-----------------------------------------------------------------------------
+// VisitorAgent
+//-----------------------------------------------------------------------------
+class VisitorAgent : public Agent
+{
+private:
+    enum State {
+        IDLE, RUNNING, FINISHED
+    };
+    std::unique_ptr _visitor;
+    SerialNum _from;
+    SerialNum _to;
+    SerialNum _next;
+    bool _running;
+    State _state;
+    Monitor _monitor;
+
+    void setState(State newState) {
+        MonitorGuard guard(_monitor);
+        //LOG(info, "VisitorAgent[%u]: setState(%s)", _id, newState == IDLE ? "idle" :
+        //    (newState == RUNNING ? "running" : "finished"));
+        _state = newState;
+    }
+    SerialNum getNext();
+
+public:
+    VisitorAgent(const std::string & tlsSpec, const std::string & domain,
+                 const EntryGenerator & generator, uint32_t id, bool validate) :
+        Agent(tlsSpec, domain, generator, "VisitorAgent", id, validate),
+        _visitor(), _from(0), _to(0), _next(0), _state(IDLE) {}
+    virtual ~VisitorAgent() {}
+    void start(SerialNum from, SerialNum to);
+    void setIdle();
+    bool idle() {
+        MonitorGuard guard(_monitor);
+        return _state == IDLE;
+    }
+    bool running() {
+        MonitorGuard guard(_monitor);
+        return _state == RUNNING;
+    }
+    bool finished() {
+        MonitorGuard guard(_monitor);
+        return _state == FINISHED;
+    }
+    std::string getState() {
+        MonitorGuard guard(_monitor);
+        if (_state == IDLE) {
+            return std::string("idle");
+        } else if (_state == FINISHED) {
+            return std::string("finished");
+        } else {
+            return std::string("running");
+        }
+    }
+    SerialNum getFrom() { return _from; }
+    virtual RPC::Result receive(const Packet & packet);
+    virtual void eof() {
+        LOG(info, "VisitorAgent[%u]: eof", _id);
+        setState(FINISHED);
+    }
+};
+
+SerialNum
+VisitorAgent::getNext()
+{
+    SerialNum retval = _next++;
+    if (retval > _to) {
+        throw std::runtime_error(make_string("VisitorAgent[%u]: SerialNum (%" PRIu64 ") outside "
+                                             "expected range <%" PRIu64 ", %" PRIu64 "]", _id,
+                                             retval, _from, _to));
+    }
+    return retval;
+}
+
+void
+VisitorAgent::start(SerialNum from, SerialNum to)
+{
+    assert(idle());
+    LOG(info, "VisitorAgent[%u]: start<%" PRIu64 ", %" PRIu64 "]", _id, from, to);
+    _from = from;
+    _to = to;
+    _next = from + 1;
+    _visitor = _client.createVisitor(_domain, *this);
+    if (_visitor.get() == NULL) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("VisitorAgent[%u]: Could not open visitor to %s", _id, _tlsSpec.c_str()));
+    }
+    setState(RUNNING);
+    if (!_visitor->visit(_from, _to)) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("VisitorAgent[%u]: Could not visit from %s with range <%" PRIu64 ", %" PRIu64 "]",
+                                  _id, _tlsSpec.c_str(), _from, _to));
+    }
+}
+
+void
+VisitorAgent::setIdle()
+{
+    assert(finished());
+    _visitor.reset();
+    setState(IDLE);
+}
+
+RPC::Result
+VisitorAgent::receive(const Packet & packet)
+{
+    auto handle = packet.getHandle();
+    while (handle.size() > 0) {
+        Packet::Entry entry;
+        entry.deserialize(handle);
+        Packet::Entry expected = _generator.getRandomEntry(getNext());
+        if (_validate) {
+            if (!EntryComparator::cmp(entry, expected)) {
+                throw std::runtime_error(vespalib::make_string
+                                         ("VisitorAgent[%u]: Got %s, expected %s", _id,
+                                          EntryPrinter::toStr(entry).c_str(),
+                                          EntryPrinter::toStr(expected).c_str()));
+            }
+        }
+    }
+
+    if (_next > _to + 1) {
+        throw std::runtime_error(vespalib::make_string
+                                 ("VisitorAgent[%u]: Visited range <%" PRIu64 ", %" PRIu64 "], expected "
+                                  "range <%" PRIu64 "u, %" PRIu64 "]", _id,
+                                  _from, _next - 1, _from, _to));
+    }
+
+    return RPC::OK;
+}
+
+
+//-----------------------------------------------------------------------------
+// ControllerThread
+//-----------------------------------------------------------------------------
+class ControllerThread : public Runnable
+{
+private:
+    std::string _tlsSpec;
+    std::string _domain;
+    TransLogClient _client;
+    std::unique_ptr _session;
+    EntryGenerator _generator;
+    std::vector > _subscribers;
+    std::vector > _visitors;
+    std::vector > _rndVisitors;
+    uint64_t _visitorInterval; // in milliseconds
+    uint64_t _pruneInterval;   // in milliseconds
+    FastOS_Time _pruneTimer;
+    SerialNum _begin;
+    SerialNum _end;
+    size_t _count;
+
+    void getStatus();
+    void makeRandomVisitorVector();
+
+public:
+    ControllerThread(const std::string & tlsSpec, const std::string & domain,
+                     const EntryGenerator & generator, uint32_t numSubscribers, uint32_t numVisitors,
+                     uint64_t visitorInterval, uint64_t pruneInterval) :
+        _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec.c_str()), _session(),
+        _generator(generator), _subscribers(), _visitors(), _rndVisitors(), _visitorInterval(visitorInterval),
+        _pruneInterval(pruneInterval), _pruneTimer(), _begin(0), _end(0), _count(0)
+    {
+        for (uint32_t i = 0; i < numSubscribers; ++i) {
+            _subscribers.push_back(std::shared_ptr
+                                  (new SubscriberAgent(tlsSpec, domain, generator, 0, i, true)));
+        }
+
+        for (uint32_t i = 0; i < numVisitors; ++i) {
+            _visitors.push_back(std::shared_ptr(new VisitorAgent(tlsSpec, domain, generator, i, true)));
+        }
+    }
+    void startSubscribers();
+    uint32_t runningVisitors();
+    std::vector > & getSubscribers() { return _subscribers; }
+    std::vector > & getVisitors() { return _visitors; }
+    virtual void doRun();
+
+};
+
+void
+ControllerThread::getStatus()
+{
+    if (!_session->status(_begin, _end, _count)) {
+        throw std::runtime_error(vespalib::make_string("ControllerThread: Could not get status from %s", _tlsSpec.c_str()));
+    }
+}
+
+void
+ControllerThread::makeRandomVisitorVector()
+{
+    std::vector > tmp(_visitors);
+    _rndVisitors.clear();
+    while (tmp.size() > 0) {
+        size_t i = _generator.getRnd().lrand48() % tmp.size();
+        _rndVisitors.push_back(tmp[i]);
+        tmp.erase(tmp.begin() + i);
+    }
+}
+
+void
+ControllerThread::startSubscribers()
+{
+    for (size_t i = 0; i < _subscribers.size(); ++i) {
+        _subscribers[i]->start();
+    }
+}
+
+void
+ControllerThread::doRun()
+{
+    _session = _client.open(_domain);
+    if (_session.get() == NULL) {
+        throw std::runtime_error(vespalib::make_string("ControllerThread: Could not open session to %s", _tlsSpec.c_str()));
+    }
+
+    _pruneTimer.SetNow();
+    while (!_done) {
+        // set finished visitors as idle
+        for (size_t i = 0; i < _visitors.size(); ++i) {
+            if (_visitors[i]->finished()) {
+                _visitors[i]->setIdle();
+            }
+        }
+        // find idle visitor
+        makeRandomVisitorVector();
+        for (size_t i = 0; i < _rndVisitors.size(); ++i) {
+            if (_rndVisitors[i]->idle()) {
+                getStatus();
+                SerialNum from = _generator.getRandomSerialNum(_begin, _end) - 1;
+                SerialNum to = _generator.getRandomSerialNum(from + 1, _end);
+                _rndVisitors[i]->start(from, to);
+                break;
+            }
+        }
+        // prune transaction log server
+        if (_pruneTimer.MilliSecsToNow() > _pruneInterval) {
+            getStatus();
+            SerialNum safePrune = _end;
+            for (size_t i = 0; i < _visitors.size(); ++i) {
+                if (_visitors[i]->running() && _visitors[i]->getFrom() < safePrune) {
+                    safePrune = _visitors[i]->getFrom();
+                }
+            }
+            for (size_t i = 0; i < _subscribers.size(); ++i) {
+                SerialNum next = _subscribers[i]->getExpectedNext();
+                if (next < safePrune) {
+                    safePrune = next;
+                }
+            }
+            LOG(info, "ControllerThread: status: begin(%" PRIu64 "), end(%" PRIu64 "), count(%zu)", _begin, _end, _count);
+            LOG(info, "ControllerThread: prune [%" PRIu64 ", %" PRIu64 ">", _begin, safePrune);
+            if (!_session->erase(safePrune)) {
+                throw std::runtime_error(vespalib::make_string("ControllerThread: Could not erase up to %" PRIu64, safePrune));
+            }
+            _pruneTimer.SetNow();
+        }
+        FastOS_Thread::Sleep(_visitorInterval);
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+// TransLogStress
+//-----------------------------------------------------------------------------
+class TransLogStress : public FastOS_Application
+{
+private:
+    class Config {
+    public:
+    uint64_t domainPartSize;
+    size_t packetSize;
+
+    uint64_t stressTime;
+    uint32_t feedRate;
+    uint32_t numSubscribers;
+    uint32_t numVisitors;
+    uint64_t visitorInterval;
+    uint64_t pruneInterval;
+
+    uint32_t numPreGeneratedBuffers;
+    uint32_t minStrLen;
+    uint32_t maxStrLen;
+    long baseSeed;
+
+    Config() :
+        domainPartSize(0), packetSize(0), stressTime(0), feedRate(0), numSubscribers(0),
+        numVisitors(0), visitorInterval(0), pruneInterval(0), minStrLen(0), maxStrLen(0), baseSeed(0) {}
+    };
+
+    Config _cfg;
+
+    void printConfig();
+    void usage();
+
+public:
+    int Main();
+};
+
+void
+TransLogStress::printConfig()
+{
+    std::cout << "######## Config ########" << std::endl;
+    std::cout << "stressTime:             " << _cfg.stressTime / 1000 << " s" << std::endl;
+    std::cout << "feedRate:               " << _cfg.feedRate << " per/sec" << std::endl;
+    std::cout << "numSubscribers:         " << _cfg.numSubscribers << std::endl;
+    std::cout << "numVisitors:            " << _cfg.numVisitors << std::endl;
+    std::cout << "visitorInterval:        " << _cfg.visitorInterval << " ms" << std::endl;
+    std::cout << "pruneInterval:          " << _cfg.pruneInterval / 1000 << " s" << std::endl;
+    std::cout << "numPreGeneratedBuffers: " << _cfg.numPreGeneratedBuffers << std::endl;
+    std::cout << "minStrLen:              " << _cfg.minStrLen << std::endl;
+    std::cout << "maxStrLen:              " << _cfg.maxStrLen << std::endl;
+    std::cout << "baseSeed:               " << _cfg.baseSeed << std::endl;
+    std::cout << "domainPartSize:         " << _cfg.domainPartSize << " bytes" << std::endl;
+    std::cout << "packetSize:             " << _cfg.packetSize << " bytes" << std::endl;
+}
+
+void
+TransLogStress::usage()
+{
+    std::cout << "usage: translogstress [-t stressTime(s)] [-f feedRate] [-s numSubscribers]" << std::endl;
+    std::cout << "                      [-v numVisitors] [-c visitorInterval(ms)] [-e pruneInterval(s)]" << std::endl;
+    std::cout << "                      [-g numPreGeneratedBuffers] [-i minStrLen] [-a maxStrLen] [-b baseSeed]" << std::endl;
+    std::cout << "                      [-d domainPartSize] [-p packetSize]" << std::endl;
+}
+
+int
+TransLogStress::Main()
+{
+    std::string tlsSpec("tcp/localhost:17897");
+    std::string domain("translogstress");
+    _cfg.domainPartSize = 8000000; // ~8MB
+    _cfg.packetSize = 0x10000;
+
+    _cfg.stressTime = 1000 * 60;
+    _cfg.feedRate = 10000;
+    _cfg.numSubscribers = 1;
+    _cfg.numVisitors = 1;
+    _cfg.visitorInterval = 1000 * 1;
+    _cfg.pruneInterval = 1000 * 12;
+
+    _cfg.numPreGeneratedBuffers = 0;
+    _cfg.minStrLen = 40;
+    _cfg.maxStrLen = 80;
+    _cfg.baseSeed = 100;
+
+    uint64_t sleepTime = 4000;
+
+    int idx = 1;
+    char opt;
+    const char * arg;
+    bool optError = false;
+    while ((opt = GetOpt("d:p:t:f:s:v:c:e:g:i:a:b:h", arg, idx)) != -1) {
+        switch (opt) {
+        case 'd':
+            _cfg.domainPartSize = atol(arg);
+            break;
+        case 'p':
+            _cfg.packetSize = atol(arg);
+            break;
+        case 't':
+            _cfg.stressTime = 1000 * atol(arg);
+            break;
+        case 'f':
+            _cfg.feedRate = atoi(arg);
+            break;
+        case 's':
+            _cfg.numSubscribers = atoi(arg);
+            break;
+        case 'v':
+            _cfg.numVisitors = atoi(arg);
+            break;
+        case 'c':
+            _cfg.visitorInterval = atol(arg);
+            break;
+        case 'e':
+            _cfg.pruneInterval = 1000 * atol(arg);
+            break;
+        case 'g':
+            _cfg.numPreGeneratedBuffers = atoi(arg);
+            break;
+        case 'i':
+            _cfg.minStrLen = atoi(arg);
+            break;
+        case 'a':
+            _cfg.maxStrLen = atoi(arg);
+            break;
+        case 'b':
+            _cfg.baseSeed = atol(arg);
+            break;
+        case 'h':
+            usage();
+            return -1;
+        default:
+            optError = true;
+            break;
+        }
+    }
+
+    printConfig();
+    FastOS_Thread::Sleep(sleepTime);
+
+    if (_argc != idx || optError) {
+        usage();
+        return -1;
+    }
+
+    // start transaction log server
+    DummyFileHeaderContext fileHeaderContext;
+    TransLogServer tls("server", 17897, ".", fileHeaderContext, _cfg.domainPartSize);
+    TransLogClient client(tlsSpec);
+    client.create(domain);
+
+    FastOS_ThreadPool threadPool(256000);
+
+    BufferGenerator bufferGenerator(_cfg.minStrLen, _cfg.maxStrLen);
+    bufferGenerator.setSeed(_cfg.baseSeed);
+    std::vector buffers;
+    for (uint32_t i = 0; i < _cfg.numPreGeneratedBuffers; ++i) {
+        buffers.push_back(bufferGenerator.getRandomBuffer());
+    }
+    EntryGenerator generator(_cfg.baseSeed, bufferGenerator);
+    if (buffers.size() > 0) {
+        generator.setBuffers(buffers);
+    }
+
+
+    // start feeder and controller
+    FeederThread feeder(tlsSpec, domain, generator, _cfg.feedRate, _cfg.packetSize);
+    threadPool.NewThread(&feeder);
+
+    FastOS_Thread::Sleep(sleepTime);
+
+    ControllerThread controller(tlsSpec, domain, generator, _cfg.numSubscribers, _cfg.numVisitors,
+                     _cfg.visitorInterval, _cfg.pruneInterval);
+    threadPool.NewThread(&controller);
+
+    // start subscribers
+    controller.startSubscribers();
+
+    // stop feeder and controller
+    FastOS_Thread::Sleep(_cfg.stressTime);
+    printConfig();
+    LOG(info, "Stop feeder...");
+    feeder.stop();
+    feeder.join();
+    std::cout << "" << std::endl;
+    std::cout << "  " << feeder.getRange().from() << "" << std::endl;
+    std::cout << "  " << feeder.getRange().to() << "" << std::endl;
+    std::cout << "  " << 1000 * (feeder.getRange().to() - feeder.getRange().from()) / (sleepTime + _cfg.stressTime)
+        << "" << std::endl;
+    std::cout << "" << std::endl;
+
+    LOG(info, "Stop controller...");
+    controller.stop();
+    controller.join();
+
+    FastOS_Thread::Sleep(sleepTime);
+    std::vector > & visitors = controller.getVisitors();
+    for (size_t i = 0; i < visitors.size(); ++i) {
+        std::cout << "" << std::endl;
+        std::cout << "" << visitors[i]->getState() << "" << std::endl;
+        std::cout << "" << std::endl;
+    }
+
+    // stop subscribers
+    LOG(info, "Stop subscribers...");
+    std::vector > & subscribers = controller.getSubscribers();
+    for (size_t i = 0; i < subscribers.size(); ++i) {
+        subscribers[i]->stop();
+        std::cout << "" << std::endl;
+        std::cout << "  " << subscribers[i]->getRange().from() << "" << std::endl;
+        std::cout << "  " << subscribers[i]->getRange().to() << "" << std::endl;
+        std::cout << "" << std::endl;
+    }
+
+    threadPool.Close();
+
+    return 0;
+}
+
+}
+}
+
+int main(int argc, char ** argv)
+{
+    search::transactionlog::TransLogStress myApp;
+    return myApp.Entry(argc, argv);
+}
diff --git a/searchlib/src/tests/true/.gitignore b/searchlib/src/tests/true/.gitignore
new file mode 100644
index 00000000000..9ab22a26cdc
--- /dev/null
+++ b/searchlib/src/tests/true/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+true_test
+searchlib_true_test_app
diff --git a/searchlib/src/tests/true/CMakeLists.txt b/searchlib/src/tests/true/CMakeLists.txt
new file mode 100644
index 00000000000..f7c80db8f92
--- /dev/null
+++ b/searchlib/src/tests/true/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_true_test_app
+    SOURCES
+    true.cpp
+    DEPENDS
+)
+vespa_add_test(NAME searchlib_true_test_app COMMAND searchlib_true_test_app)
diff --git a/searchlib/src/tests/true/DESC b/searchlib/src/tests/true/DESC
new file mode 100644
index 00000000000..55b708ce96a
--- /dev/null
+++ b/searchlib/src/tests/true/DESC
@@ -0,0 +1 @@
+A very simple test. A good starting point for writing new tests.
diff --git a/searchlib/src/tests/true/FILES b/searchlib/src/tests/true/FILES
new file mode 100644
index 00000000000..7d324c5824d
--- /dev/null
+++ b/searchlib/src/tests/true/FILES
@@ -0,0 +1 @@
+true.cpp
diff --git a/searchlib/src/tests/true/true.cpp b/searchlib/src/tests/true/true.cpp
new file mode 100644
index 00000000000..d052f3ca4e4
--- /dev/null
+++ b/searchlib/src/tests/true/true.cpp
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("true_test");
+#include 
+
+TEST_SETUP(Test)
+
+int
+Test::Main()
+{
+    TEST_INIT("true_test");
+    EXPECT_TRUE(true);
+    TEST_DONE();
+}
diff --git a/searchlib/src/tests/url/.gitignore b/searchlib/src/tests/url/.gitignore
new file mode 100644
index 00000000000..6d6a20d8270
--- /dev/null
+++ b/searchlib/src/tests/url/.gitignore
@@ -0,0 +1,7 @@
+*.core
+.depend
+Makefile
+core
+core.*
+testurl
+searchlib_testurl_app
diff --git a/searchlib/src/tests/url/CMakeLists.txt b/searchlib/src/tests/url/CMakeLists.txt
new file mode 100644
index 00000000000..aa52f3d8374
--- /dev/null
+++ b/searchlib/src/tests/url/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_testurl_app
+    SOURCES
+    testurl.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_testurl_app COMMAND sh dotest.sh)
diff --git a/searchlib/src/tests/url/dotest.sh b/searchlib/src/tests/url/dotest.sh
new file mode 100755
index 00000000000..f7ac1fd1e69
--- /dev/null
+++ b/searchlib/src/tests/url/dotest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+# Run test
+echo "Testing the FastS_URL class..."
+$VALGRIND ./searchlib_testurl_app
+if [ $? -eq 0 ]; then
+    echo "SUCCESS: Test on FastS_URL passed!"
+else
+    echo "FAILURE: Test on FastS_URL failed!"
+    exit 1
+fi
+exit 0
diff --git a/searchlib/src/tests/url/testurl.cpp b/searchlib/src/tests/url/testurl.cpp
new file mode 100644
index 00000000000..4ed28453890
--- /dev/null
+++ b/searchlib/src/tests/url/testurl.cpp
@@ -0,0 +1,750 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright (C) 2000-2003 Fast Search & Transfer ASA
+// Copyright (C) 2003 Overture Services Norway AS
+
+#include 
+#include 
+
+
+// needed to resolve external symbol from httpd.h on AIX
+void FastS_block_usr2() {}
+
+
+static search::util::URL GlobalURL;
+
+static bool
+CheckString(const char *name,
+	    const unsigned char *test1,
+	    const unsigned char *test2)
+{
+    assert(test1 != NULL);
+    assert(test2 != NULL);
+
+    if (strcmp((const char*)test1, (const char*)test2)!=0) {
+        printf("FAILED: %s: '%s' != '%s'!\n", name, test1, test2);
+        GlobalURL.Dump();
+
+        return false;
+    }
+    return true;
+}
+
+static bool
+CheckInt(const char *name,
+	 int test1,
+	 int test2)
+{
+    if (test1 != test2) {
+        printf("FAILED: %s: %d != %d!\n", name, test1, test2);
+        GlobalURL.Dump();
+
+        return false;
+    }
+    return true;
+}
+
+const char *
+GetTokenString(search::util::URL &url)
+{
+    static char tokenbuffer[1000];
+
+    const unsigned char *token;
+    search::util::URL::URL_CONTEXT ctx;
+
+    tokenbuffer[0] = '\0';
+
+    while ((token = url.GetToken(ctx)) != NULL) {
+        if (tokenbuffer[0] != '\0')
+            strcat(tokenbuffer, ",");
+        strcat(tokenbuffer, url.ContextName(ctx));
+        strcat(tokenbuffer, ":");
+        strcat(tokenbuffer, (const char*)token);
+    }
+
+    return tokenbuffer;
+}
+
+
+static bool
+CheckURL(const char *url,
+	 const char *scheme,
+	 const char *host,
+	 const char *domain,
+	 const char *siteowner,
+         const char *tld,
+         const char *maintld,
+	 const char */* tldregion */,
+	 const char *port,
+	 const char *path,
+	 int pathdepth,
+	 const char *filename,
+	 const char *extension,
+	 const char *params,
+	 const char *query,
+	 const char *fragment,
+	 const char *address,
+	 const char *tokens,
+	 int verbose=0)
+{
+    if (verbose>0)
+        printf("Checking with URL: '%s'\n", url);
+
+    GlobalURL.SetURL((const unsigned char *)url);
+
+    if (verbose>0)
+        GlobalURL.Dump();
+    //  GlobalURL.Dump();
+
+    return
+        CheckString("URL", (const unsigned char *)url, GlobalURL.GetURL()) &&
+        CheckString("urltype", (const unsigned char *)scheme,
+                    GlobalURL.GetScheme()) &&
+        CheckString("host", (const unsigned char *)host,
+                    GlobalURL.GetHost()) &&
+        CheckString("domain", (const unsigned char *)domain,
+                    GlobalURL.GetDomain()) &&
+        CheckString("siteowner", (const unsigned char *)siteowner,
+                    GlobalURL.GetSiteOwner()) &&
+        CheckString("tld", (const unsigned char *)tld,
+                    GlobalURL.GetTLD()) &&
+        CheckString("maintld", (const unsigned char *)maintld,
+                    GlobalURL.GetMainTLD()) &&
+#if 0
+        CheckString("tldregion", (const unsigned char *)tldregion,
+                    GlobalURL.GetTLDRegion()) &&
+#endif
+        CheckString("port", (const unsigned char *)port,
+                    GlobalURL.GetPort()) &&
+        CheckString("path", (const unsigned char *)path,
+                    GlobalURL.GetPath()) &&
+        CheckInt("pathdepth", pathdepth,
+                 GlobalURL.GetPathDepth()) &&
+        CheckString("filename", (const unsigned char *)filename,
+                    GlobalURL.GetFilename()) &&
+        CheckString("extension", (const unsigned char *)extension,
+                    GlobalURL.GetExtension()) &&
+        CheckString("params", (const unsigned char *)params,
+                    GlobalURL.GetParams()) &&
+        CheckString("query", (const unsigned char *)query,
+                    GlobalURL.GetQuery()) &&
+        CheckString("fragment", (const unsigned char *)fragment,
+                    GlobalURL.GetFragment()) &&
+        CheckString("address", (const unsigned char *)address,
+                    GlobalURL.GetAddress()) &&
+        CheckString("TOKENS", (const unsigned char *)tokens,
+                    (const unsigned char*)GetTokenString(GlobalURL));
+}
+
+
+int main(int, char **)
+{
+    bool success = true;
+
+    success = success &&
+              CheckURL("", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "");// Tokenstring
+    success = success &&
+              CheckURL(".", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       ".", // path
+                       1, // pathdepth
+                       ".", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "");// Tokenstring
+    success = success &&
+              CheckURL("..", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "..", // path
+                       1, // pathdepth
+                       "..", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "");// Tokenstring
+    success = success &&
+              CheckURL("CHANGES_2.0a", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "CHANGES_2.0a", // path
+                       1, // pathdepth
+                       "CHANGES_2.0a", // filename
+                       "0a", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "FILENAME:CHANGES_2,EXTENSION:0a");// Tokenstring
+    success = success &&
+              CheckURL("patches/patch-cvs-1.9.10", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "patches/patch-cvs-1.9.10", // path
+                       2, // pathdepth
+                       "patch-cvs-1.9.10", // filename
+                       "10", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:patches,FILENAME:patch-cvs-1,FILENAME:9,EXTENSION:10");// Tokenstring
+    success = success &&
+              CheckURL("http:patches/patch-ssh-1.2.14", // URL
+                       "http", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "patches/patch-ssh-1.2.14", // path
+                       2, // pathdepth
+                       "patch-ssh-1.2.14", // filename
+                       "14", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,PATH:patches,FILENAME:patch-ssh-1,FILENAME:2,EXTENSION:14");// Tokenstring
+    success = success &&
+              CheckURL("http://180.uninett.no/servlet/online.Bransje", // URL
+                       "http", // scheme
+                       "180.uninett.no", // host
+                       "uninett.no", // domain
+                       "uninett", // siteowner
+                       "no", // tld
+                       "no", // maintld
+                       "europe", // tldregion
+                       "", // port
+                       "/servlet/online.Bransje", // path
+                       2, // pathdepth
+                       "online.Bransje", // filename
+                       "Bransje", // extension
+                       "", // query
+                       "", // params
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:180,DOMAIN:uninett,MAINTLD:no,PATH:servlet,FILENAME:online,EXTENSION:Bransje");// Tokenstring
+    success = success &&
+              CheckURL("Bilder.gif/rule11.GIF", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "Bilder.gif/rule11.GIF", // path
+                       2, // pathdepth
+                       "rule11.GIF", // filename
+                       "GIF", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:Bilder,PATH:gif,FILENAME:rule11,EXTENSION:GIF");// Tokenstring
+    success = success &&
+              CheckURL("bilder/meny/Buer/bue_o.GIF", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "bilder/meny/Buer/bue_o.GIF", // path
+                       4, // pathdepth
+                       "bue_o.GIF", // filename
+                       "GIF", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:bilder,PATH:meny,PATH:Buer,FILENAME:bue_o,EXTENSION:GIF");// Tokenstring
+    success = success &&
+              CheckURL("./fakadm/grafikk/indus_bilde.JPG", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "./fakadm/grafikk/indus_bilde.JPG", // path
+                       4, // pathdepth
+                       "indus_bilde.JPG", // filename
+                       "JPG", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:fakadm,PATH:grafikk,FILENAME:indus_bilde,EXTENSION:JPG");// Tokenstring
+    success = success &&
+              CheckURL("linux-2.0.35.tar.bz2", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "linux-2.0.35.tar.bz2", // path
+                       1, // pathdepth
+                       "linux-2.0.35.tar.bz2", // filename
+                       "bz2", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "FILENAME:linux-2,FILENAME:0,FILENAME:35,FILENAME:tar,EXTENSION:bz2");// Tokenstring
+    success = success &&
+              CheckURL("http://www.underdusken.no", // URL
+                       "http", // scheme
+                       "www.underdusken.no", // host
+                       "underdusken.no", // domain
+                       "underdusken", // siteowner
+                       "no", // tld
+                       "no", // maintld
+                       "europe", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
+    success = success &&
+              CheckURL("http://www.underdusken.no/?page=dusker/html/0008/Uholdbar.html", // URL
+                       "http", // scheme
+                       "www.underdusken.no", // host
+                       "underdusken.no", // domain
+                       "underdusken", // siteowner
+                       "no", // tld
+                       "no", // maintld
+                       "europe", // tldregion
+                       "", // port
+                       "/", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "page=dusker/html/0008/Uholdbar.html", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no,QUERY:page,QUERY:dusker,QUERY:html,QUERY:0008,QUERY:Uholdbar,QUERY:html");// Tokenstring
+    success = success &&
+              CheckURL("http://www.uni-karlsruhe.de/~ig25/ssh-faq/", // URL
+                       "http", // scheme
+                       "www.uni-karlsruhe.de", // host
+                       "uni-karlsruhe.de", // domain
+                       "uni-karlsruhe", // siteowner
+                       "de", // tld
+                       "de", // maintld
+                       "", // tldregion
+                       "", // port
+                       "/~ig25/ssh-faq/", // path
+                       2, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:www,DOMAIN:uni-karlsruhe,MAINTLD:de,PATH:ig25,PATH:ssh-faq");// Tokenstring
+    success = success &&
+              CheckURL("java/", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "java/", // path
+                       1, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:java");// Tokenstring
+    success = success &&
+              CheckURL("javascript:OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // URL
+                       "javascript", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // address
+                       "SCHEME:javascript,ADDRESS:OpenWindow,ADDRESS:survey,ADDRESS:faq,ADDRESS:html,ADDRESS:Issues,ADDRESS:width,ADDRESS:635,ADDRESS:height,ADDRESS:400,ADDRESS:toolbars,ADDRESS:no,ADDRESS:location,ADDRESS:no,ADDRESS:menubar,ADDRESS:yes,ADDRESS:status,ADDRESS:no,ADDRESS:resizable,ADDRESS:yes,ADDRESS:scrollbars,ADDRESS:yes");// Tokenstring
+    success = success &&
+              CheckURL("mailto: dmf-post@medisin.ntnu.no", // URL
+                       "mailto", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       " dmf-post@medisin.ntnu.no", // address
+                       "SCHEME:mailto,ADDRESS:dmf-post,ADDRESS:medisin,ADDRESS:ntnu,ADDRESS:no");// Tokenstring
+    success = success &&
+              CheckURL("mailto:%20Harald%20Danielsen@energy.sintef.no", // URL
+                       "mailto", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "%20Harald%20Danielsen@energy.sintef.no", // address
+                       "SCHEME:mailto,ADDRESS:20Harald,ADDRESS:20Danielsen,ADDRESS:energy,ADDRESS:sintef,ADDRESS:no");// Tokenstring
+    success = success &&
+              CheckURL("www.underdusken.no", // URL
+                       "", // scheme
+                       "www.underdusken.no", // host
+                       "underdusken.no", // domain
+                       "underdusken", // siteowner
+                       "no", // tld
+                       "no", // maintld
+                       "europe", // tldregion
+                       "", // port
+                       "", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring
+    success = success &&
+              CheckURL("~janie/", // URL
+                       "", // scheme
+                       "", // host
+                       "", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "~janie/", // path
+                       1, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "PATH:janie");// Tokenstring
+    success = success &&
+              CheckURL("https://dette.er.en:2020/~janie/index.htm?param1=q¶m2=r", // URL
+                       "https", // scheme
+                       "dette.er.en", // host
+                       "er.en", // domain
+                       "er", // siteowner
+                       "en", // tld
+                       "en", // maintld
+                       "", // tldregion
+                       "2020", // port
+                       "/~janie/index.htm", // path
+                       2, // pathdepth
+                       "index.htm", // filename
+                       "htm", // extension
+                       "", // params
+                       "param1=q¶m2=r", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:https,HOST:dette,DOMAIN:er,MAINTLD:en,PORT:2020,PATH:janie,FILENAME:index,EXTENSION:htm,QUERY:param1,QUERY:q,QUERY:param2,QUERY:r");// Tokenstring
+#if 0
+    success = success &&
+              CheckURL("http://www.sony.co.uk/", // URL
+                       "http", // scheme
+                       "www.sony.co.uk", // host
+                       "sony.co.uk", // domain
+                       "sony", // siteowner
+                       "co.uk", // tld
+                       "uk", // maintld
+                       "unitedkingdom", // tldregion
+                       "", // port
+                       "/", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:www,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
+    success = success &&
+              CheckURL("http://sony.co.uk/", // URL
+                       "http", // scheme
+                       "sony.co.uk", // host
+                       "sony.co.uk", // domain
+                       "sony", // siteowner
+                       "co.uk", // tld
+                       "uk", // maintld
+                       "unitedkingdom", // tldregion
+                       "", // port
+                       "/", // path
+                       0, // pathdepth
+                       "", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring
+#endif
+    // Test fixes for bugs reported in cvs commit:
+    // tegge       2000/10/27 22:42:59 CEST
+    success = success &&
+              CheckURL("http://somehost.somedomain/this!is!it/boom", // URL
+                       "http", // scheme
+                       "somehost.somedomain", // host
+                       "somehost.somedomain", // domain
+                       "somehost", // siteowner
+                       "somedomain", // tld
+                       "somedomain", // maintld
+                       "", // tldregion
+                       "", // port
+                       "/this!is!it/boom", // path
+                       2, // pathdepth
+                       "boom", // filename
+                       "", // extension
+                       "", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,DOMAIN:somehost,MAINTLD:somedomain,PATH:this,PATH:is,PATH:it,FILENAME:boom");// Tokenstring
+    success = success &&
+              CheckURL("http://test.com/index.htm?p1=q%20test&p2=r%10d", // URL
+                       "http", // scheme
+                       "test.com", // host
+                       "test.com", // domain
+                       "test", // siteowner
+                       "com", // tld
+                       "com", // maintld
+                       "northamerica", // tldregion
+                       "", // port
+                       "/index.htm", // path
+                       1, // pathdepth
+                       "index.htm", // filename
+                       "htm", // extension
+                       "", // params
+                       "p1=q%20test&p2=r%10d", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,DOMAIN:test,MAINTLD:com,FILENAME:index,EXTENSION:htm,QUERY:p1,QUERY:q,QUERY:20test,QUERY:p2,QUERY:r,QUERY:10d");// Tokenstring
+
+    // Test bugs found 2001/06/25
+    success = success &&
+              CheckURL("http://arthur/qm/images/qm1.gif", // URL
+                       "http", // scheme
+                       "arthur", // host
+                       "arthur", // domain
+                       "", // siteowner
+                       "", // tld
+                       "", // maintld
+                       "", // tldregion
+                       "", // port
+                       "/qm/images/qm1.gif", // path
+                       3, // pathdepth
+                       "qm1.gif", // filename
+                       "gif", // extension
+                       "", // params
+                       "", // query
+                       "", // address
+                       "", // fragment
+                       "SCHEME:http,MAINTLD:arthur,PATH:qm,PATH:images,FILENAME:qm1,EXTENSION:gif");// Tokenstring
+
+    // Test Orjan's hypothesis 2003/02/17
+    success = success &&
+              CheckURL("http://foo.com/ui;.gif", // URL
+                       "http", // scheme
+                       "foo.com", // host
+                       "foo.com", // domain
+                       "foo", // siteowner
+                       "com", // tld
+                       "com", // maintld
+                       "northamerica", // tldregion
+                       "", // port
+                       "/ui;.gif", // path
+                       1, // pathdepth
+                       "ui", // filename
+                       "", // extension
+                       ".gif", // params
+                       "", // query
+                       "", // address
+                       "", // fragment
+                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring
+
+    // Test Orjan's hypothesis 2003/02/17
+    success = success &&
+              CheckURL("http://foo.com/ui;.gif", // URL
+                       "http", // scheme
+                       "foo.com", // host
+                       "foo.com", // domain
+                       "foo", // siteowner
+                       "com", // tld
+                       "com", // maintld
+                       "northamerica", // tldregion
+                       "", // port
+                       "/ui;.gif", // path
+                       1, // pathdepth
+                       "ui", // filename
+                       "", // extension
+                       ".gif", // params
+                       "", // query
+                       "", // address
+                       "", // fragment
+                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring
+
+    // Verify params handling
+    success = success &&
+              CheckURL("http://foo.com/ui;par1=1/par2=2", // URL
+                       "http", // scheme
+                       "foo.com", // host
+                       "foo.com", // domain
+                       "foo", // siteowner
+                       "com", // tld
+                       "com", // maintld
+                       "northamerica", // tldregion
+                       "", // port
+                       "/ui;par1=1/par2=2", // path
+                       1, // pathdepth
+                       "ui", // filename
+                       "", // extension
+                       "par1=1/par2=2", // params
+                       "", // query
+                       "", // fragment
+                       "", // address
+                       "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:par1,PARAMS:1,PARAMS:par2,PARAMS:2");// Tokenstring
+
+    // Verify synthetic url
+    success = success &&
+              CheckURL("http://www.foo.no:8080/path/filename.ext;par1=hello/par2=world?query=test#fragment", // URL
+                       "http", // scheme
+                       "www.foo.no", // host
+                       "foo.no", // domain
+                       "foo", // siteowner
+                       "no", // tld
+                       "no", // maintld
+                       "europe", // tldregion
+                       "8080", // port
+                       "/path/filename.ext;par1=hello/par2=world", // path
+                       2, // pathdepth
+                       "filename.ext", // filename
+                       "ext", // extension
+                       "par1=hello/par2=world", // params
+                       "query=test", // query
+                       "fragment", // fragment
+                       "", // address
+                       "SCHEME:http,HOST:www,DOMAIN:foo,MAINTLD:no,PORT:8080,PATH:path,FILENAME:filename,EXTENSION:ext,PARAMS:par1,PARAMS:hello,PARAMS:par2,PARAMS:world,QUERY:query,QUERY:test,FRAGMENT:fragment");// Tokenstring
+
+    // '&' should be allowed in path according to RFC 1738, 2068 og 2396
+    success = success &&
+              CheckURL("http://canonsarang.com/zboard/data/gallery04/HU&BANG.jpg", // URL
+                       "http", // scheme
+                       "canonsarang.com", // host
+                       "canonsarang.com", // domain
+                       "canonsarang", // siteowner
+                       "com", // tld
+                       "com", // maintld
+                       "northamerica", // tldregion
+                       "", // port
+                       "/zboard/data/gallery04/HU&BANG.jpg", // path
+                       4, // pathdepth
+                       "HU&BANG.jpg", // filename
+                       "jpg", // extension
+                       "", // params
+                       "", // query
+                       "", // address
+                       "", // fragment
+                       "SCHEME:http,DOMAIN:canonsarang,MAINTLD:com,PATH:zboard,PATH:data,PATH:gallery04,FILENAME:HU,FILENAME:BANG,EXTENSION:jpg");// Tokenstring
+
+    return !success;
+}
diff --git a/searchlib/src/tests/util/.gitignore b/searchlib/src/tests/util/.gitignore
new file mode 100644
index 00000000000..a0da2dd3333
--- /dev/null
+++ b/searchlib/src/tests/util/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+*_test
+searchlib_rawbuf_test_app
diff --git a/searchlib/src/tests/util/CMakeLists.txt b/searchlib/src/tests/util/CMakeLists.txt
new file mode 100644
index 00000000000..43fec57c182
--- /dev/null
+++ b/searchlib/src/tests/util/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_rawbuf_test_app
+    SOURCES
+    rawbuf_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_rawbuf_test_app COMMAND searchlib_rawbuf_test_app)
diff --git a/searchlib/src/tests/util/bufferwriter/.gitignore b/searchlib/src/tests/util/bufferwriter/.gitignore
new file mode 100644
index 00000000000..171db45593c
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/.gitignore
@@ -0,0 +1,3 @@
+bufferwriter_bm
+searchlib_bufferwriter_test_app
+searchlib_bufferwriter_bm_app
diff --git a/searchlib/src/tests/util/bufferwriter/CMakeLists.txt b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt
new file mode 100644
index 00000000000..a57749ff853
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bufferwriter_test_app
+    SOURCES
+    bufferwriter_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_bufferwriter_test_app COMMAND searchlib_bufferwriter_test_app)
+vespa_add_executable(searchlib_bufferwriter_bm_app
+    SOURCES
+    work.cpp
+    bm.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_bufferwriter_bm_app COMMAND searchlib_bufferwriter_bm_app BENCHMARK)
diff --git a/searchlib/src/tests/util/bufferwriter/bm.cpp b/searchlib/src/tests/util/bufferwriter/bm.cpp
new file mode 100644
index 00000000000..228ce4adcdc
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/bm.cpp
@@ -0,0 +1,95 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP("bufferwriter_bm");
+#include 
+#include 
+#include "work.h"
+#include 
+
+using search::DrainingBufferWriter;
+
+double getTime() { return fastos::TimeStamp(fastos::ClockSystem::now()).sec(); }
+
+constexpr size_t million = 1000000;
+
+enum class WorkFuncDispatch
+{
+    DIRECT,
+    LAMBDA,
+    FUNCTOR,
+    FUNCTOR2
+};
+
+
+template 
+void
+callWork(size_t size, WorkFuncDispatch dispatch)
+{
+    std::vector foo;
+    DrainingBufferWriter writer;
+    foo.resize(size);
+    std::cout << "will write " << size << " elements of size " << sizeof(T) <<
+        std::endl;
+    double before = getTime();
+    switch (dispatch) {
+    case WorkFuncDispatch::DIRECT:
+        work(foo, writer);
+        break;
+    case WorkFuncDispatch::LAMBDA:
+        workLambda(foo, writer);
+        break;
+    case WorkFuncDispatch::FUNCTOR:
+        workFunctor(foo, writer);
+        break;
+    case WorkFuncDispatch::FUNCTOR2:
+        workFunctor2(foo, writer);
+        break;
+    default:
+        abort();
+    }
+    double after = getTime();
+    double delta = (after - before);
+    double writeSpeed = writer.getBytesWritten() / delta;
+    EXPECT_GREATER(writeSpeed, 1000);
+    std::cout << "written is " << writer.getBytesWritten() << std::endl;
+    std::cout << "time used is " << (delta * 1000.0) << " ms" << std::endl;
+    std::cout << "write speed is " << writeSpeed << std::endl;
+}
+
+
+void
+callWorks(WorkFuncDispatch dispatch)
+{
+    callWork(million * 1000, dispatch);
+    callWork(million * 500, dispatch);
+    callWork(million * 250, dispatch);
+    callWork(million * 125, dispatch);
+}
+
+TEST("simple bufferwriter speed test")
+{
+    callWorks(WorkFuncDispatch::DIRECT);
+}
+
+TEST("lambda func bufferwriter speed test")
+{
+    callWorks(WorkFuncDispatch::LAMBDA);
+}
+
+TEST("functor bufferwriter speed test")
+{
+    callWorks(WorkFuncDispatch::FUNCTOR);
+}
+
+TEST("functor2 bufferwriter speed test")
+{
+    callWorks(WorkFuncDispatch::FUNCTOR2);
+}
+
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp
new file mode 100644
index 00000000000..95e4db95e03
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp
@@ -0,0 +1,158 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP("bufferwriter_test");
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search
+{
+
+namespace
+{
+
+class StoreBufferWriter : public BufferWriter
+{
+    std::vector _buf;
+    std::vector > > _bufs;
+    size_t _bytesWritten;
+    uint32_t _incompleteBuffers;
+public:
+    static constexpr size_t BUFFER_SIZE = 262144;
+
+    StoreBufferWriter();
+
+    virtual ~StoreBufferWriter() = default;
+
+    virtual void flush() override;
+
+    size_t getBytesWritten() const { return _bytesWritten; }
+
+    std::vector getSingleBuffer() const;
+
+};
+
+
+StoreBufferWriter::StoreBufferWriter()
+    : BufferWriter(),
+      _buf(),
+      _bytesWritten(0),
+      _incompleteBuffers(0)
+{
+    _buf.resize(BUFFER_SIZE);
+    setup(&_buf[0], _buf.size());
+}
+
+
+void
+StoreBufferWriter::flush() {
+    assert(_incompleteBuffers == 0); // all previous buffers must have been full
+    size_t nowLen = usedLen();
+    if (nowLen != _buf.size()) {
+        // buffer is not full, only allowed for last buffer
+        ++_incompleteBuffers;
+    }
+    if (nowLen == 0) {
+        return; // empty buffer
+    }
+    _bufs.emplace_back(std::make_unique>());
+    _bufs.back()->resize(BUFFER_SIZE);
+    _buf.resize(nowLen);
+    _bufs.back()->swap(_buf);
+    _bytesWritten += nowLen;
+    setup(&_buf[0], _buf.size());
+}
+
+
+std::vector
+StoreBufferWriter::getSingleBuffer() const
+{
+    std::vector res;
+    size_t needSize = 0;
+    for (const auto &buf : _bufs) {
+        needSize += buf->size();
+    }
+    res.reserve(needSize);
+    for (const auto &buf : _bufs) {
+        res.insert(res.end(), buf->cbegin(), buf->cend());
+    }
+    return std::move(res);
+}
+
+}
+
+
+TEST("Test that bufferwriter works with no writes")
+{
+    DrainingBufferWriter writer;
+    writer.flush();
+    EXPECT_EQUAL(0u, writer.getBytesWritten());
+}
+
+TEST("Test that bufferwriter works with single byte write")
+{
+    DrainingBufferWriter writer;
+    char a = 4;
+    writer.write(&a, sizeof(a));
+    writer.flush();
+    EXPECT_EQUAL(1u, writer.getBytesWritten());
+}
+
+TEST("Test that bufferwriter works with multiple writes")
+{
+    DrainingBufferWriter writer;
+    char a = 4;
+    int16_t b = 5;
+    int32_t c = 6;
+    writer.write(&a, sizeof(a));
+    writer.write(&b, sizeof(b));
+    writer.write(&c, sizeof(c));
+    writer.flush();
+    EXPECT_EQUAL(7u, writer.getBytesWritten());
+}
+
+
+TEST("Test that bufferwriter works with long writes")
+{
+    std::vector a;
+    const size_t mysize = 10000000;
+    const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE;
+    EXPECT_GREATER(mysize, drainerBufferSize);
+    a.resize(mysize);
+    DrainingBufferWriter writer;
+    writer.write(&a[0], a.size());
+    writer.flush();
+    EXPECT_EQUAL(a.size(), writer.getBytesWritten());
+}
+
+
+TEST("Test that bufferwriter passes on written data")
+{
+    std::vector a;
+    const size_t mysize = 25000000;
+    const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE;
+    EXPECT_GREATER(mysize * sizeof(int), drainerBufferSize);
+    a.reserve(mysize);
+    search::Rand48 rnd;
+    for (uint32_t i = 0; i < mysize; ++i) {
+        a.emplace_back(rnd.lrand48());
+    }
+    StoreBufferWriter writer;
+    writer.write(&a[0], a.size() * sizeof(int));
+    writer.flush();
+    EXPECT_EQUAL(a.size() * sizeof(int), writer.getBytesWritten());
+    std::vector written = writer.getSingleBuffer();
+    EXPECT_EQUAL(a.size() * sizeof(int), written.size());
+    EXPECT_TRUE(memcmp(&a[0], &written[0], written.size()) == 0);
+}
+
+}
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/bufferwriter/work.cpp b/searchlib/src/tests/util/bufferwriter/work.cpp
new file mode 100644
index 00000000000..9835b28970e
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/work.cpp
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "work.h"
+#include 
+
+namespace search
+{
+
+template 
+class WriteFunctor
+{
+    BufferWriter &_writer;
+public:
+    WriteFunctor(BufferWriter &writer)
+        : _writer(writer)
+    {
+    }
+
+    void operator()(const T &val) { _writer.write(&val, sizeof(val)); }
+};
+
+template 
+class WriteFunctor2
+{
+    BufferWriter &_writer;
+public:
+    WriteFunctor2(BufferWriter &writer)
+        : _writer(writer)
+    {
+    }
+
+    void operator()(const T &val) __attribute((noinline))
+    { _writer.write(&val, sizeof(val)); }
+};
+
+template 
+void workLoop(const std::vector &v, Func &&func)
+{
+    for (const auto &val : v) {
+        func(val);
+    }
+}
+
+template 
+void work(const std::vector &v, BufferWriter &writer)
+{
+    for (const auto &val : v) {
+        writer.write(&val, sizeof(val));
+    }
+    writer.flush();
+}
+
+template 
+void workLambda(const std::vector &v, BufferWriter &writer)
+{
+    workLoop(v,
+                [&writer](const T &val) { writer.write(&val, sizeof(val)); });
+    writer.flush();
+}
+
+template 
+void workFunctor(const std::vector &v, BufferWriter &writer)
+{
+    workLoop(v, WriteFunctor(writer));
+    writer.flush();
+}
+
+template 
+void workFunctor2(const std::vector &v, BufferWriter &writer)
+{
+    workLoop(v, WriteFunctor2(writer));
+    writer.flush();
+}
+
+template void work(const std::vector &v, BufferWriter &writer);
+template void work(const std::vector &v, BufferWriter &writer);
+template void work(const std::vector &v, BufferWriter &writer);
+template void work(const std::vector &v, BufferWriter &writer);
+template void workLambda(const std::vector &v, BufferWriter &writer);
+template void workLambda(const std::vector &v, BufferWriter &writer);
+template void workLambda(const std::vector &v, BufferWriter &writer);
+template void workLambda(const std::vector &v, BufferWriter &writer);
+template void workFunctor(const std::vector &v, BufferWriter &writer);
+template void workFunctor(const std::vector &v, BufferWriter &writer);
+template void workFunctor(const std::vector &v, BufferWriter &writer);
+template void workFunctor(const std::vector &v, BufferWriter &writer);
+template void workFunctor2(const std::vector &v, BufferWriter &writer);
+template void workFunctor2(const std::vector &v, BufferWriter &writer);
+template void workFunctor2(const std::vector &v, BufferWriter &writer);
+template void workFunctor2(const std::vector &v, BufferWriter &writer);
+
+} // namespace search
diff --git a/searchlib/src/tests/util/bufferwriter/work.h b/searchlib/src/tests/util/bufferwriter/work.h
new file mode 100644
index 00000000000..49a1cb8017d
--- /dev/null
+++ b/searchlib/src/tests/util/bufferwriter/work.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+namespace search {
+
+class BufferWriter;
+
+template 
+using WorkFunc = void (*)(const std::vector &v, BufferWriter &writer);
+template 
+void work(const std::vector &v, BufferWriter &writer);
+template 
+void workLambda(const std::vector &v, BufferWriter &writer);
+template 
+void workFunctor(const std::vector &v, BufferWriter &writer);
+template 
+void workFunctor2(const std::vector &v, BufferWriter &writer);
+
+} // namespace search
+
diff --git a/searchlib/src/tests/util/ioerrorhandler/.gitignore b/searchlib/src/tests/util/ioerrorhandler/.gitignore
new file mode 100644
index 00000000000..2f5c2e77191
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/.gitignore
@@ -0,0 +1 @@
+searchlib_ioerrorhandler_test_app
diff --git a/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt
new file mode 100644
index 00000000000..92d6ab30d72
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_ioerrorhandler_test_app
+    SOURCES
+    ioerrorhandler_test.cpp
+    DEPENDS
+    searchlib_test
+    searchlib
+)
+vespa_add_test(NAME searchlib_ioerrorhandler_test_app COMMAND searchlib_ioerrorhandler_test_app)
diff --git a/searchlib/src/tests/util/ioerrorhandler/DESC b/searchlib/src/tests/util/ioerrorhandler/DESC
new file mode 100644
index 00000000000..3328798b936
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/DESC
@@ -0,0 +1 @@
+IOErrorHandler test. Take a look at ioerrorhandler_test.cpp for details.
diff --git a/searchlib/src/tests/util/ioerrorhandler/FILES b/searchlib/src/tests/util/ioerrorhandler/FILES
new file mode 100644
index 00000000000..2447a10e991
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/FILES
@@ -0,0 +1 @@
+ioerrorhandler_test.cpp
diff --git a/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp
new file mode 100644
index 00000000000..bcfaa5f8ac8
--- /dev/null
+++ b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp
@@ -0,0 +1,358 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("ioerrorhandler_test");
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+extern "C"
+{
+
+ssize_t read(int fd, void *buf, size_t count);
+ssize_t write(int fd, const void *buf, size_t count);
+ssize_t pread(int fd, void *buf, size_t count, off_t offset);
+ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);
+
+
+}
+
+using ReadFunc = ssize_t (*)(int fd, void *buf, size_t count);
+using WriteFunc = ssize_t (*)(int fd, const void *buf, size_t count);
+using PreadFunc = ssize_t (*)(int fd, void *buf, size_t count, off_t offset);
+using PwriteFunc = ssize_t (*)(int fd, const void *buf, size_t count,
+                               off_t offset);
+
+using namespace search::test::statefile;
+using namespace search::test::statestring;
+
+namespace
+{
+
+ReadFunc libc_read;
+WriteFunc libc_write;
+PreadFunc libc_pread;
+PwriteFunc libc_pwrite;
+
+}
+
+int injectErrno;
+std::atomic injectreadErrnoTrigger;
+std::atomic injectpreadErrnoTrigger;
+std::atomic injectwriteErrnoTrigger;
+std::atomic injectpwriteErrnoTrigger;
+
+ssize_t read(int fd, void *buf, size_t count)
+{
+    if (--injectreadErrnoTrigger == 0) {
+        errno = injectErrno;
+        return -1;
+    }
+    if (!libc_read) {
+        libc_read = reinterpret_cast(dlsym(RTLD_NEXT, "read"));
+    }
+    return libc_read(fd, buf, count);
+}
+
+ssize_t write(int fd, const void *buf, size_t count)
+{
+    if (--injectwriteErrnoTrigger == 0) {
+        errno = injectErrno;
+        return -1;
+    }
+    if (!libc_write) {
+        libc_write = reinterpret_cast(dlsym(RTLD_NEXT, "write"));
+    }
+    return libc_write(fd, buf, count);
+}
+
+ssize_t pread(int fd, void *buf, size_t count, off_t offset)
+{
+    if (--injectpreadErrnoTrigger == 0) {
+        errno = injectErrno;
+        return -1;
+    }
+    if (!libc_pread) {
+        libc_pread = reinterpret_cast(dlsym(RTLD_NEXT, "pread"));
+    }
+    return libc_pread(fd, buf, count, offset);
+}
+
+
+ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+    if (--injectpwriteErrnoTrigger == 0) {
+        errno = injectErrno;
+        return -1;
+    }
+    if (!libc_pwrite) {
+        libc_pwrite = reinterpret_cast(dlsym(RTLD_NEXT, "pwrite"));
+    }
+    return libc_pwrite(fd, buf, count, offset);
+}
+
+
+
+namespace search
+{
+
+const char *testStringBase = "This is a test\n";
+
+using strvec = std::vector;
+
+namespace
+{
+
+bool
+assertHistory(std::vector &exp,
+              std::vector &act)
+{
+    if (!EXPECT_EQUAL(exp.size(), act.size())) {
+        return false;
+    }
+    for (size_t i = 0; i < exp.size(); ++i) {
+        if (!EXPECT_EQUAL(exp[i], act[i])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+class Fixture
+{
+public:
+
+    std::unique_ptr sf;
+    std::unique_ptr file;
+    char buf[8192];
+    char *testString;
+
+    Fixture();
+
+    void openFile();
+
+    void openFileDIO();
+
+    void writeTestString();
+};
+
+
+Fixture::Fixture()
+    : sf(),
+      file()
+{
+    unlink("testfile");
+    StateFile::erase("state");
+    sf.reset(new StateFile("state"));
+    testString = &buf[0];
+    int off = reinterpret_cast(testString) & 4095;
+    if (off != 0) {
+        testString += 4096 - off;
+    }
+    assert(testString + strlen(testStringBase) < &buf[0] + sizeof(buf));
+    strcpy(testString, testStringBase);
+}
+
+
+void
+Fixture::openFile()
+{
+    file.reset(new FastOS_File);
+    file->OpenReadWrite("testfile");
+}
+
+void
+Fixture::openFileDIO()
+{
+    file.reset(new FastOS_File);
+    file->EnableDirectIO();
+    file->OpenReadWrite("testfile");
+}
+
+void
+Fixture::writeTestString()
+{
+    file->WriteBuf(testString, strlen(testString));
+    file->SetPosition(0);
+}
+
+
+}
+
+
+TEST("Test that ioerror handler can be instantated")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+    IOErrorHandler ioeh(&sf);
+    EXPECT_FALSE(ioeh.fired());
+}
+
+
+TEST_F("Test that ioerror handler can process read error", Fixture)
+{
+    IOErrorHandler ioeh(f.sf.get());
+    EXPECT_FALSE(ioeh.fired());
+    f.openFile();
+    f.writeTestString();
+    uint64_t fileSize = f.file->GetSize();
+    EXPECT_EQUAL(strlen(f.testString), fileSize);
+    char buf[1024];
+    assert(fileSize <= sizeof(buf));
+    try {
+        f.file->SetPosition(0);
+        injectErrno = EIO;
+        injectreadErrnoTrigger = 1;
+        f.file->ReadBuf(buf, fileSize);
+        LOG(error, "Should never get here");
+        abort();
+    } catch (std::runtime_error &e) {
+        LOG(info, "Caught std::runtime_error exception: %s", e.what());
+        EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+    }
+    EXPECT_TRUE(ioeh.fired());
+    {
+        vespalib::string act = readState(*f.sf);
+        normalizeTimestamp(act);
+        vespalib::string exp = "state=down ts=0.0 operation=read "
+                               "file=testfile error=5 offset=0 len=15 "
+                               "rlen=-1\n";
+        EXPECT_EQUAL(exp, act);
+    }
+    {
+        strvec exp({ "state=down ts=0.0 operation=read "
+                            "file=testfile error=5 offset=0 len=15 "
+                            "rlen=-1\n"});
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+TEST_F("Test that ioerror handler can process pread error", Fixture)
+{
+    IOErrorHandler ioeh(f.sf.get());
+    EXPECT_FALSE(ioeh.fired());
+    f.openFile();
+    f.writeTestString();
+    uint64_t fileSize = f.file->GetSize();
+    EXPECT_EQUAL(strlen(f.testString), fileSize);
+    char buf[1024];
+    assert(fileSize <= sizeof(buf));
+    try {
+        f.file->SetPosition(0);
+        injectErrno = EIO;
+        injectpreadErrnoTrigger = 1;
+        f.file->ReadBuf(buf, fileSize, 0);
+        LOG(error, "Should never get here");
+        abort();
+    } catch (std::runtime_error &e) {
+        LOG(info, "Caught std::runtime_error exception: %s", e.what());
+        EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+    }
+    EXPECT_TRUE(ioeh.fired());
+    {
+        vespalib::string act = readState(*f.sf);
+        normalizeTimestamp(act);
+        vespalib::string exp = "state=down ts=0.0 operation=read "
+                               "file=testfile error=5 offset=0 len=15 "
+                               "rlen=-1\n";
+        EXPECT_EQUAL(exp, act);
+    }
+    {
+        strvec exp({ "state=down ts=0.0 operation=read "
+                            "file=testfile error=5 offset=0 len=15 "
+                            "rlen=-1\n"});
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+TEST_F("Test that ioerror handler can process write error", Fixture)
+{
+    IOErrorHandler ioeh(f.sf.get());
+    EXPECT_FALSE(ioeh.fired());
+    f.openFile();
+    try {
+        injectErrno = EIO;
+        injectwriteErrnoTrigger = 1;
+        f.writeTestString();
+        LOG(error, "Should never get here");
+        abort();
+    } catch (std::runtime_error &e) {
+        LOG(info, "Caught std::runtime_error exception: %s", e.what());
+        EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+    }
+    EXPECT_TRUE(ioeh.fired());
+    {
+        vespalib::string act = readState(*f.sf);
+        normalizeTimestamp(act);
+        vespalib::string exp = "state=down ts=0.0 operation=write "
+                               "file=testfile error=5 offset=0 len=15 "
+                               "rlen=-1\n";
+        EXPECT_EQUAL(exp, act);
+    }
+    {
+        strvec exp({ "state=down ts=0.0 operation=write "
+                            "file=testfile error=5 offset=0 len=15 "
+                            "rlen=-1\n"});
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST_F("Test that ioerror handler can process pwrite error", Fixture)
+{
+    IOErrorHandler ioeh(f.sf.get());
+    EXPECT_FALSE(ioeh.fired());
+    f.openFileDIO();
+    try {
+        injectErrno = EIO;
+        injectpwriteErrnoTrigger = 1;
+        f.writeTestString();
+        LOG(error, "Should never get here");
+        abort();
+    } catch (std::runtime_error &e) {
+        LOG(info, "Caught std::runtime_error exception: %s", e.what());
+        EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr);
+    }
+    EXPECT_TRUE(ioeh.fired());
+    {
+        vespalib::string act = readState(*f.sf);
+        normalizeTimestamp(act);
+        vespalib::string exp = "state=down ts=0.0 operation=write "
+                               "file=testfile error=5 offset=0 len=15 "
+                               "rlen=-1\n";
+        EXPECT_EQUAL(exp, act);
+    }
+    {
+        strvec exp({ "state=down ts=0.0 operation=write "
+                            "file=testfile error=5 offset=0 len=15 "
+                            "rlen=-1\n"});
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+}
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+    search::StateFile::erase("state");
+    unlink("testfile");
+}
diff --git a/searchlib/src/tests/util/rawbuf_test.cpp b/searchlib/src/tests/util/rawbuf_test.cpp
new file mode 100644
index 00000000000..e9dc139bda5
--- /dev/null
+++ b/searchlib/src/tests/util/rawbuf_test.cpp
@@ -0,0 +1,198 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for rawbuf.
+
+#include 
+LOG_SETUP("rawbuf_test");
+#include 
+
+#include 
+#include 
+#include 
+
+using vespalib::string;
+using namespace search;
+
+namespace {
+
+string getString(const RawBuf &buf) {
+    return string(buf.GetDrainPos(), buf.GetUsedLen());
+}
+
+TEST("require that rawbuf can append text") {
+    RawBuf buf(10);
+    buf += "foo";
+    buf += "bar";
+    EXPECT_EQUAL("foobar", getString(buf));
+}
+
+TEST("require that rawbuf expands when appended beyond size") {
+    RawBuf buf(4);
+    buf += "foo";
+    EXPECT_EQUAL(1u, buf.GetFreeLen());
+    buf += "bar";
+    EXPECT_EQUAL(2u, buf.GetFreeLen());
+    EXPECT_EQUAL("foobar", getString(buf));
+}
+
+TEST("require that a rawbuf can be appended to another") {
+    RawBuf buf1(10);
+    RawBuf buf2(10);
+    buf1 += "foo";
+    buf2 += "bar";
+    buf1 += buf2;
+    EXPECT_EQUAL("foobar", getString(buf1));
+}
+
+TEST("require that rawbufs can be tested for equality") {
+    RawBuf buf1(10);
+    RawBuf buf2(10);
+    buf1 += "foo";
+    buf2 += "bar";
+    EXPECT_TRUE(buf1 == buf1);
+    EXPECT_FALSE(buf1 == buf2);
+}
+
+template 
+void checkAddNum(void (RawBuf::*addNum)(T, size_t, char), size_t num,
+                 size_t fieldw, char fill, const string &expected) {
+    RawBuf buf(10);
+    (buf.*addNum)(num, fieldw, fill);
+    EXPECT_EQUAL(expected, getString(buf));
+}
+
+TEST("require that rawbuf can add numbers in decimal") {
+    checkAddNum(&RawBuf::addNum, 0, 4, 'x', "xxx0");
+    checkAddNum(&RawBuf::addNum, 42, 4, '0', "0042");
+    checkAddNum(&RawBuf::addNum, 12345678901234, 4, '0', "12345678901234");
+    checkAddNum(&RawBuf::addNum, -1, 4, '0', "18446744073709551615");
+
+    checkAddNum(&RawBuf::addNum32, 0, 4, 'x', "xxx0");
+    checkAddNum(&RawBuf::addNum32, 42, 4, '0', "0042");
+    checkAddNum(&RawBuf::addNum32, 1234567890, 4, '0', "1234567890");
+    checkAddNum(&RawBuf::addNum32, -1, 0, '0', "-1");
+    checkAddNum(&RawBuf::addNum32, -1, 4, '0', "00-1");
+
+    checkAddNum(&RawBuf::addNum64, 0, 4, 'x', "xxx0");
+    checkAddNum(&RawBuf::addNum64, 42, 4, '0', "0042");
+    checkAddNum(&RawBuf::addNum64, 12345678901234, 4, '0', "12345678901234");
+    checkAddNum(&RawBuf::addNum64, -1, 0, '0', "-1");
+    checkAddNum(&RawBuf::addNum64, -1, 4, '0', "00-1");
+}
+
+TEST("require that rawbuf can add hitrank") {
+    RawBuf buf(10);
+    buf.addHitRank(HitRank(4.2));
+    EXPECT_EQUAL("4.2", getString(buf));
+}
+
+TEST("require that rawbuf can add signedhitrank") {
+    RawBuf buf(10);
+    buf.addHitRank(SignedHitRank(-4.213));
+    EXPECT_EQUAL("-4.213", getString(buf));
+}
+
+TEST("require that rawbuf can append data of known length") {
+    RawBuf buf(10);
+    const string data("foo bar baz qux quux");
+    buf.append(data.data(), data.size());
+    EXPECT_EQUAL(data, getString(buf));
+}
+
+TEST("require that rawbuf can be truncated shorter and longer") {
+    RawBuf buf(10);
+    buf += "foobarbaz";
+    buf.truncate(3);
+    buf += "qux";
+    buf.truncate(9);
+    EXPECT_EQUAL("fooquxbaz", getString(buf));
+}
+
+TEST("require that prealloc makes enough room") {
+    RawBuf buf(10);
+    buf += "foo";
+    EXPECT_EQUAL(7u, buf.GetFreeLen());
+    buf.preAlloc(100);
+    EXPECT_EQUAL("foo", getString(buf));
+    EXPECT_LESS_EQUAL(100u, buf.GetFreeLen());
+}
+
+TEST("require that rawbuf can read from file") {
+    FastOS_File file("mytemporaryfile");
+    file.OpenReadWrite();
+    file.Write2("barbaz", 6);
+    file.SetPosition(0);
+
+    RawBuf buf(10);
+    buf += "foo";
+    buf.readFile(file, 3);
+    EXPECT_EQUAL("foobar", getString(buf));
+    buf.readFile(file, 100);
+    EXPECT_EQUAL("foobarbaz", getString(buf));
+
+    file.Close();
+    file.Delete();
+}
+
+TEST("require that compact discards drained data") {
+    RawBuf buf(10);
+    buf += "foobar";
+    buf.Drain(3);
+    buf.Compact();
+    buf.Fill(3);
+    EXPECT_EQUAL("barbar", getString(buf));
+}
+
+TEST("require that reusing a buffer that has grown 4x will alloc new buffer") {
+    RawBuf buf(10);
+    buf.preAlloc(100);
+    EXPECT_LESS_EQUAL(100u, buf.GetFreeLen());
+    buf.Reuse();
+    EXPECT_EQUAL(10u, buf.GetFreeLen());
+}
+
+TEST("require that various length and position information can be found.") {
+    RawBuf buf(30);
+    buf += "foo bar baz qux quux corge";
+    buf.Drain(7);
+    EXPECT_EQUAL(7u, buf.GetDrainLen());
+    EXPECT_EQUAL(19u, buf.GetUsedLen());
+    EXPECT_EQUAL(26u, buf.GetUsedAndDrainLen());
+    EXPECT_EQUAL(4u, buf.GetFreeLen());
+}
+
+TEST("require that rawbuf can 'putToInet' 16-bit numbers") {
+    RawBuf buf(1);
+    buf.Put16ToInet(0x1234);
+    EXPECT_EQUAL(2, buf.GetFillPos() - buf.GetDrainPos());
+    EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+    EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+}
+
+TEST("require that rawbuf can 'putToInet' 32-bit numbers") {
+    RawBuf buf(1);
+    buf.PutToInet(0x12345678);
+    EXPECT_EQUAL(4, buf.GetFillPos() - buf.GetDrainPos());
+    EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+    EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+    EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff);
+    EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff);
+}
+
+TEST("require that rawbuf can 'putToInet' 64-bit numbers") {
+    RawBuf buf(1);
+    buf.Put64ToInet(0x123456789abcdef0ULL);
+    EXPECT_EQUAL(8, buf.GetFillPos() - buf.GetDrainPos());
+    EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff);
+    EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff);
+    EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff);
+    EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff);
+    EXPECT_EQUAL(0x9a, (int) buf.GetDrainPos()[4] & 0xff);
+    EXPECT_EQUAL(0xbc, (int) buf.GetDrainPos()[5] & 0xff);
+    EXPECT_EQUAL(0xde, (int) buf.GetDrainPos()[6] & 0xff);
+    EXPECT_EQUAL(0xf0, (int) buf.GetDrainPos()[7] & 0xff);
+}
+
+
+}  // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/util/searchable_stats/.gitignore b/searchlib/src/tests/util/searchable_stats/.gitignore
new file mode 100644
index 00000000000..08720bdefb5
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/.gitignore
@@ -0,0 +1,4 @@
+/.depend
+/Makefile
+/*_test
+searchlib_searchable_stats_test_app
diff --git a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt
new file mode 100644
index 00000000000..3bc0fbe9c6b
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_searchable_stats_test_app
+    SOURCES
+    searchable_stats_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_searchable_stats_test_app COMMAND searchlib_searchable_stats_test_app)
diff --git a/searchlib/src/tests/util/searchable_stats/DESC b/searchlib/src/tests/util/searchable_stats/DESC
new file mode 100644
index 00000000000..b8127b278f8
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/DESC
@@ -0,0 +1 @@
+searchable_stats test. Take a look at searchable_stats_test.cpp for details.
diff --git a/searchlib/src/tests/util/searchable_stats/FILES b/searchlib/src/tests/util/searchable_stats/FILES
new file mode 100644
index 00000000000..94e6ce7f4df
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/FILES
@@ -0,0 +1 @@
+searchable_stats_test.cpp
diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp
new file mode 100644
index 00000000000..83aba794824
--- /dev/null
+++ b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp
@@ -0,0 +1,42 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("searchable_stats_test");
+#include 
+#include 
+
+using namespace search;
+
+class Test : public vespalib::TestApp {
+public:
+    int Main();
+};
+
+int
+Test::Main()
+{
+    TEST_INIT("searchable_stats_test");
+    {
+        SearchableStats stats;
+        EXPECT_EQUAL(0u, stats.memoryUsage());
+        EXPECT_EQUAL(0u, stats.docsInMemory());
+        EXPECT_EQUAL(0u, stats.sizeOnDisk());
+        {
+            SearchableStats rhs;
+            EXPECT_EQUAL(&rhs.memoryUsage(100), &rhs);
+            EXPECT_EQUAL(&rhs.docsInMemory(10), &rhs);
+            EXPECT_EQUAL(&rhs.sizeOnDisk(1000), &rhs);
+            EXPECT_EQUAL(&stats.add(rhs), &stats);
+        }
+        EXPECT_EQUAL(100u, stats.memoryUsage());
+        EXPECT_EQUAL(10u, stats.docsInMemory());
+        EXPECT_EQUAL(1000u, stats.sizeOnDisk());
+        EXPECT_EQUAL(&stats.add(SearchableStats().memoryUsage(100).docsInMemory(10).sizeOnDisk(1000)), &stats);
+        EXPECT_EQUAL(200u, stats.memoryUsage());
+        EXPECT_EQUAL(20u, stats.docsInMemory());
+        EXPECT_EQUAL(2000u, stats.sizeOnDisk());
+    }
+    TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/util/sigbushandler/.gitignore b/searchlib/src/tests/util/sigbushandler/.gitignore
new file mode 100644
index 00000000000..ab5a59f3296
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/.gitignore
@@ -0,0 +1 @@
+searchlib_sigbushandler_test_app
diff --git a/searchlib/src/tests/util/sigbushandler/CMakeLists.txt b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt
new file mode 100644
index 00000000000..a55e1e354c7
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sigbushandler_test_app
+    SOURCES
+    sigbushandler_test.cpp
+    DEPENDS
+    searchlib_test
+    searchlib
+)
+vespa_add_test(NAME searchlib_sigbushandler_test_app COMMAND searchlib_sigbushandler_test_app)
diff --git a/searchlib/src/tests/util/sigbushandler/DESC b/searchlib/src/tests/util/sigbushandler/DESC
new file mode 100644
index 00000000000..03ee94c85ba
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/DESC
@@ -0,0 +1 @@
+SigBusHandler test. Take a look at sigbushandler_test.cpp for details.
diff --git a/searchlib/src/tests/util/sigbushandler/FILES b/searchlib/src/tests/util/sigbushandler/FILES
new file mode 100644
index 00000000000..0998cd8a784
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/FILES
@@ -0,0 +1 @@
+sigbushandler_test.cpp
diff --git a/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp
new file mode 100644
index 00000000000..af657420575
--- /dev/null
+++ b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp
@@ -0,0 +1,131 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("sigbushandler_test");
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::test::statefile;
+using namespace search::test::statestring;
+
+namespace search
+{
+
+using strvec = std::vector;
+
+namespace
+{
+
+bool
+assertHistory(std::vector &exp,
+              std::vector &act)
+{
+    if (!EXPECT_EQUAL(exp.size(), act.size())) {
+        return false;
+    }
+    for (size_t i = 0; i < exp.size(); ++i) {
+        if (!EXPECT_EQUAL(exp[i], act[i])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+}
+
+
+TEST("Test that sigbus handler can be instantated")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+    SigBusHandler sbh(&sf);
+    EXPECT_FALSE(sbh.fired());
+}
+
+
+TEST("Test that sigbus handler can trap synthetic sigbus")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+    SigBusHandler sbh(&sf);
+    EXPECT_FALSE(sbh.fired());
+    sigjmp_buf sjb;
+    if (sigsetjmp(sjb, 1) == 0) {
+        sbh.setUnwind(&sjb);
+        kill(getpid(), SIGBUS);
+        LOG(error, "Should never get here");
+        abort();
+    }
+    EXPECT_TRUE(sbh.fired());
+    {
+        vespalib::string act = readState(sf);
+        normalizeTimestamp(act);
+        EXPECT_EQUAL("state=down ts=0.0 operation=sigbus errno=0 code=0\n",
+                     act);
+    }
+    {
+        strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=0\n" });
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+TEST("Test that sigbus handler can trap normal sigbus")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+    SigBusHandler sbh(&sf);
+    EXPECT_FALSE(sbh.fired());
+
+    int fd = open("mmapfile", O_CREAT | O_TRUNC | O_RDWR, 0644);
+    assert(fd >= 0);
+    void *mmapres = mmap(nullptr, 4096, PROT_READ | PROT_WRITE,
+                         MAP_SHARED, fd, 0);
+    assert(mmapres != nullptr);
+    assert(mmapres != reinterpret_cast(-1l));
+    char *p = reinterpret_cast(mmapres) + 42;
+    volatile char r = 0;
+    sigjmp_buf sjb;
+    if (sigsetjmp(sjb, 1) == 0) {
+        sbh.setUnwind(&sjb);
+        r = *p;
+        LOG(error, "Should never get here");
+        abort();
+    }
+    EXPECT_TRUE(sbh.fired());
+    EXPECT_TRUE(r == '\0');
+    {
+        vespalib::string act = readState(sf);
+        vespalib::string exp ="state=down ts=0.0 operation=sigbus errno=0 "
+                         "code=2 addr=0x0000000000000000\n";
+        normalizeAddr(exp, p);
+        normalizeTimestamp(act);
+        EXPECT_EQUAL(exp, act);
+    }
+    {
+        strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=2 "
+                            "addr=0x0000000000000000\n" });
+        normalizeAddrs(exp, p);
+        std::vector act(readHistory("state.history"));
+        normalizeTimestamps(act);
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+}
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+    search::StateFile::erase("state");
+    unlink("mmapfile");
+}
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore
new file mode 100644
index 00000000000..51a916d8333
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore
@@ -0,0 +1 @@
+searchlib_slime_output_raw_buf_adapter_test_app
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt
new file mode 100644
index 00000000000..0735511fe7a
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_slime_output_raw_buf_adapter_test_app
+    SOURCES
+    slime_output_raw_buf_adapter_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_slime_output_raw_buf_adapter_test_app COMMAND searchlib_slime_output_raw_buf_adapter_test_app)
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES
new file mode 100644
index 00000000000..5870aa61349
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES
@@ -0,0 +1 @@
+slime_output_raw_buf_adapter_test.cpp
diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp
new file mode 100644
index 00000000000..5d48520a92c
--- /dev/null
+++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+
+using namespace search;
+using namespace vespalib::slime::convenience;
+
+TEST("use slime with rawbuf") {
+    RawBuf buffer(4096);
+    Slime src;
+    Slime dst;
+    {
+        Cursor &c = src.setObject();
+        c.setLong("foo", 5);
+        c.setString("bar", "text");
+    }
+    EXPECT_NOT_EQUAL(src, dst);
+    SlimeOutputRawBufAdapter adapter(buffer);
+    vespalib::slime::BinaryFormat::encode(src, adapter);
+    vespalib::slime::BinaryFormat::decode(Memory(buffer.GetDrainPos(), buffer.GetUsedLen()), dst);
+    EXPECT_EQUAL(src, dst);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/util/statebuf/.gitignore b/searchlib/src/tests/util/statebuf/.gitignore
new file mode 100644
index 00000000000..270347c1d6b
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/.gitignore
@@ -0,0 +1 @@
+searchlib_statebuf_test_app
diff --git a/searchlib/src/tests/util/statebuf/CMakeLists.txt b/searchlib/src/tests/util/statebuf/CMakeLists.txt
new file mode 100644
index 00000000000..0b24cd1552b
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_statebuf_test_app
+    SOURCES
+    statebuf_test.cpp
+    DEPENDS
+    searchlib
+)
+vespa_add_test(NAME searchlib_statebuf_test_app COMMAND searchlib_statebuf_test_app)
diff --git a/searchlib/src/tests/util/statebuf/DESC b/searchlib/src/tests/util/statebuf/DESC
new file mode 100644
index 00000000000..6368f32bcc6
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/DESC
@@ -0,0 +1 @@
+statefile test. Take a look at statefile_test.cpp for details.
diff --git a/searchlib/src/tests/util/statebuf/FILES b/searchlib/src/tests/util/statebuf/FILES
new file mode 100644
index 00000000000..c651bb2758f
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/FILES
@@ -0,0 +1 @@
+statefile_test.cpp
diff --git a/searchlib/src/tests/util/statebuf/statebuf_test.cpp b/searchlib/src/tests/util/statebuf/statebuf_test.cpp
new file mode 100644
index 00000000000..952412b9eb6
--- /dev/null
+++ b/searchlib/src/tests/util/statebuf/statebuf_test.cpp
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("statebuf_test");
+#include 
+#include 
+#include 
+#include 
+
+namespace search
+{
+
+namespace
+{
+
+
+}
+
+class Fixture : public StateBuf
+{
+    char _buf[1024];
+
+public:
+    Fixture()
+        : StateBuf(_buf, sizeof(_buf))
+    {
+    }
+};
+
+TEST_F("single character can be appended to stream", Fixture)
+{
+    f << 'H' << 'e' << 'l' << 'l' << 'o';
+    EXPECT_EQUAL("Hello", f.str());
+}
+
+
+TEST_F("strings can be appended to stream", Fixture)
+{
+    f << "Hello world";
+    EXPECT_EQUAL("Hello world", f.str());
+}
+
+TEST_F("quoted strings can be appended to stream", Fixture)
+{
+    f.appendQuoted("This is a quoting test, \\ \" \n oops");
+    EXPECT_EQUAL("\"This is a quoting test, \\\\ \\\" \\n oops\"", f.str());
+}
+
+TEST_F("keys can be appended to stream", Fixture)
+{
+    (f.appendKey("foo") << "fooval").appendKey("bar") << "barval";
+    EXPECT_EQUAL("foo=fooval bar=barval", f.str());
+}
+
+
+TEST_F("integers can be appended to stream", Fixture)
+{
+    f << (UINT64_C(1) << 63) << " " << -42l << " " << 0l;
+    EXPECT_EQUAL("9223372036854775808 -42 0", f.str());
+}
+
+TEST_F("struct timespec can be appended to stream", Fixture)
+{
+    struct timespec ts;
+    ts.tv_sec = 15;
+    ts.tv_nsec = 256;
+    f << ts;
+    EXPECT_EQUAL("15.000000256", f.str());
+}
+
+TEST_F("timestamp can be appended to stream", Fixture)
+{
+    struct timespec ts;
+    ts.tv_sec = 16;
+    ts.tv_nsec = 257;
+    f.appendTimestamp(ts);
+    EXPECT_EQUAL("ts=16.000000257", f.str());
+}
+
+
+TEST_F("hexadecimal numbers can be appended to stream", Fixture)
+{
+    (f.appendHex(0xdeadbeefcafebabeul) << " ").appendHex(0x123456789abcdef0ul);
+    EXPECT_EQUAL("0xdeadbeefcafebabe 0x123456789abcdef0", f.str());
+
+}
+
+TEST_F("pointer address can be appended to stream", Fixture)
+{
+    f.appendAddr(nullptr);
+    f.appendAddr(reinterpret_cast(0x12345ul));
+    EXPECT_EQUAL("addr=0x0000000000000000 addr=0x0000000000012345", f.str());
+}
+
+
+TEST_F("base and size methods can be called on stream", Fixture)
+{
+    f << "Hello world\n";
+    std::string s(f.base(), f.base() + f.size());
+    EXPECT_EQUAL("Hello world\n", s);
+}
+
+}
+
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/util/statefile/.gitignore b/searchlib/src/tests/util/statefile/.gitignore
new file mode 100644
index 00000000000..504b7431a7a
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/.gitignore
@@ -0,0 +1 @@
+searchlib_statefile_test_app
diff --git a/searchlib/src/tests/util/statefile/CMakeLists.txt b/searchlib/src/tests/util/statefile/CMakeLists.txt
new file mode 100644
index 00000000000..b90b87fce7d
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_statefile_test_app
+    SOURCES
+    statefile_test.cpp
+    DEPENDS
+    searchlib_test
+    searchlib
+)
+vespa_add_test(NAME searchlib_statefile_test_app COMMAND searchlib_statefile_test_app)
diff --git a/searchlib/src/tests/util/statefile/DESC b/searchlib/src/tests/util/statefile/DESC
new file mode 100644
index 00000000000..6368f32bcc6
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/DESC
@@ -0,0 +1 @@
+statefile test. Take a look at statefile_test.cpp for details.
diff --git a/searchlib/src/tests/util/statefile/FILES b/searchlib/src/tests/util/statefile/FILES
new file mode 100644
index 00000000000..c651bb2758f
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/FILES
@@ -0,0 +1 @@
+statefile_test.cpp
diff --git a/searchlib/src/tests/util/statefile/statefile_test.cpp b/searchlib/src/tests/util/statefile/statefile_test.cpp
new file mode 100644
index 00000000000..583d21e1cec
--- /dev/null
+++ b/searchlib/src/tests/util/statefile/statefile_test.cpp
@@ -0,0 +1,294 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP("statefile_test");
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+using namespace search::test::statefile;
+
+namespace search
+{
+
+namespace
+{
+
+bool
+hasFile(const char *name)
+{
+    return access(name, R_OK | W_OK) == 0;
+}
+
+
+void
+addState(StateFile &sf, const char *buf)
+{
+    size_t bufLen = strlen(buf);
+    sf.addState(buf, bufLen, false);
+}
+
+void
+addSignalState(StateFile &sf, const char *buf)
+{
+    size_t bufLen = strlen(buf);
+    sf.addState(buf, bufLen, true);
+}
+
+
+bool
+assertHistory(std::vector &exp,
+              std::vector &act)
+{
+    if (!EXPECT_EQUAL(exp.size(), act.size())) {
+        return false;
+    }
+    for (size_t i = 0; i < exp.size(); ++i) {
+        if (!EXPECT_EQUAL(exp[i], act[i])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+int64_t
+getSize(const char *name)
+{
+    struct stat stbuf;
+    if (stat(name, &stbuf) != 0)
+        return 0;
+    return stbuf.st_size;
+}
+
+
+void
+setSize(const char *name, int64_t newSize)
+{
+    int truncRes = truncate(name, newSize);
+    assert(truncRes == 0);
+}
+
+
+}
+
+
+TEST("Test lock free atomic int used by async signal safe lock primitive")
+{
+    std::atomic f;
+    ASSERT_TRUE(f.is_lock_free());
+}
+
+
+TEST("Test that statefile can be created")
+{
+    StateFile::erase("state");
+    EXPECT_FALSE(hasFile("state"));
+    EXPECT_FALSE(hasFile("state.history"));
+    StateFile sf("state");
+    EXPECT_TRUE(hasFile("state"));
+    EXPECT_TRUE(hasFile("state.history"));
+    EXPECT_EQUAL(0, sf.getGen());
+    StateFile::erase("state");
+    EXPECT_FALSE(hasFile("state"));
+    EXPECT_FALSE(hasFile("state.history"));
+    StateFile::erase("state");
+    EXPECT_FALSE(hasFile("state"));
+    EXPECT_FALSE(hasFile("state.history"));
+}
+
+
+TEST("Test that statefile can add event")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+
+    addState(sf, "Hello world\n");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Hello world\n", check);
+    EXPECT_EQUAL(1, sf.getGen());
+}
+
+TEST("Test that history is appended to")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+
+    addState(sf, "Hello world\n");
+    addState(sf, "Foo bar\n");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Foo bar\n", check);
+    EXPECT_EQUAL(2, sf.getGen());
+    {
+        std::vector exp({ "Hello world\n", "Foo bar\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST("Test that truncated history is truncated at event boundary")
+{
+    StateFile::erase("state");
+    int64_t histSize = 1;
+    {
+        StateFile sf("state");
+        addState(sf, "Hello world\n");
+        addState(sf, "Foo bar\n");
+        EXPECT_EQUAL(2, sf.getGen());
+        histSize = getSize("state.history");
+        EXPECT_EQUAL(20, histSize);
+        addState(sf, "zap\n");
+        EXPECT_EQUAL(3, sf.getGen());
+    }
+    // Lose 2 last events in history
+    setSize("state.history", histSize - 1);
+    // Last event is restored to history from main state file
+    StateFile sf("state");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("zap\n", check);
+    EXPECT_EQUAL(0, sf.getGen());
+    {
+        std::vector exp({ "Hello world\n", "zap\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST("Test that async signal safe path adds event")
+{
+    StateFile::erase("state");
+    StateFile sf("state");
+
+    addSignalState(sf, "Hello world\n");
+    addSignalState(sf, "Foo bar\n");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Foo bar\n", check);
+    EXPECT_EQUAL(2, sf.getGen());
+    {
+        std::vector exp({ "Hello world\n", "Foo bar\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST("Test that state file can be restored from history")
+{
+    StateFile::erase("state");
+    {
+        StateFile sf("state");
+        addState(sf, "Hello world\n");
+        addState(sf, "Foo bar\n");
+        EXPECT_EQUAL(2, sf.getGen());
+    }
+    // Lose event in main state file
+    setSize("state", 0);
+    EXPECT_EQUAL(0, getSize("state"));
+    // Last event is restored to history from main state file
+    StateFile sf("state");
+    EXPECT_NOT_EQUAL(0, getSize("state"));
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Foo bar\n", check);
+    {
+        std::vector exp({ "Hello world\n", "Foo bar\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST("Test that different entry is added to history")
+{
+    StateFile::erase("state");
+    {
+        StateFile sf("state");
+        addState(sf, "Hello world\n");
+        EXPECT_EQUAL(1, sf.getGen());
+    }
+    // Write changed entry to main state file
+    {
+        std::ofstream of("state");
+        of << "zap\n";
+    }
+    // Add changed event to history
+    StateFile sf("state");
+    EXPECT_NOT_EQUAL(0, getSize("state"));
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("zap\n", check);
+    {
+        std::vector exp({ "Hello world\n", "zap\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+}
+
+
+TEST("Test that state history stops at NUL byte")
+{
+    StateFile::erase("state");
+    {
+        StateFile sf("state");
+        addState(sf, "Hello world\n");
+        addState(sf, "Foo bar\n");
+        EXPECT_EQUAL(2, sf.getGen());
+    }
+    // Corrupt history state file
+    {
+        char buf[1];
+        buf[0] = '\0';
+        std::ofstream of("state.history");
+        of.write(&buf[0], 1);
+    }
+    StateFile sf("state");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Foo bar\n", check);
+    {
+        std::vector exp({ "Foo bar\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+
+}
+
+TEST("Test that main state stops at NUL byte")
+{
+    StateFile::erase("state");
+    {
+        StateFile sf("state");
+        addState(sf, "Hello world\n");
+        addState(sf, "Foo bar\n");
+        EXPECT_EQUAL(2, sf.getGen());
+    }
+    // Corrupt history state file
+    {
+        char buf[10];
+        strcpy(buf, "zap");
+        std::ofstream of("state");
+        of.write(&buf[0], strlen(buf) + 1);
+    }
+    StateFile sf("state");
+    vespalib::string check = readState(sf);
+    EXPECT_EQUAL("Foo bar\n", check);
+    {
+        std::vector exp({ "Hello world\n", "Foo bar\n" });
+        std::vector act(readHistory("state.history"));
+        TEST_DO(assertHistory(exp, act));
+    }
+
+}
+
+}
+
+TEST_MAIN()
+{
+    TEST_RUN_ALL();
+    search::StateFile::erase("state");
+}
diff --git a/searchlib/src/vespa/searchlib/.gitignore b/searchlib/src/vespa/searchlib/.gitignore
new file mode 100644
index 00000000000..42f98e8f86d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+config.h
+/libsearchlib.so.5.1
diff --git a/searchlib/src/vespa/searchlib/CMakeLists.txt b/searchlib/src/vespa/searchlib/CMakeLists.txt
new file mode 100644
index 00000000000..6b94631cdeb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/CMakeLists.txt
@@ -0,0 +1,31 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib
+    SOURCES
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    $
+    INSTALL lib64
+    DEPENDS
+    searchlib_features
+    searchlib_query
+    searchlib_queryeval
+    searchlib_queryeval_test
+    staging_vespalib
+)
diff --git a/searchlib/src/vespa/searchlib/aggregation/.gitignore b/searchlib/src/vespa/searchlib/aggregation/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt b/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt
new file mode 100644
index 00000000000..921a20bb466
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_aggregation OBJECT
+    SOURCES
+    aggregation.cpp
+    fs4hit.cpp
+    group.cpp
+    grouping.cpp
+    groupinglevel.cpp
+    hit.cpp
+    hitlist.cpp
+    hitsaggregationresult.cpp
+    modifiers.cpp
+    rawrank.cpp
+    vdshit.cpp
+    DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/aggregation/OWNERS b/searchlib/src/vespa/searchlib/aggregation/OWNERS
new file mode 100644
index 00000000000..1037590124e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/OWNERS
@@ -0,0 +1 @@
+balder
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp b/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp
new file mode 100644
index 00000000000..07c21137b1d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregation.cpp
@@ -0,0 +1,448 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "expressioncountaggregationresult.h"
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using namespace expression;
+
+namespace aggregation {
+
+namespace {
+
+bool isReady(const ResultNode *myRes, const ResultNode &ref) {
+    return (myRes != 0 && myRes->getClass().id() == ref.getClass().id());
+}
+
+} // namespace search::aggregation::
+
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+#define IMPLEMENT_ABSTRACT_AGGREGATIONRESULT(cclass, base) IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, cclass, base)
+#define IMPLEMENT_AGGREGATIONRESULT(cclass, base) IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, cclass, base)
+
+IMPLEMENT_ABSTRACT_AGGREGATIONRESULT(AggregationResult, ExpressionNode);
+IMPLEMENT_AGGREGATIONRESULT(CountAggregationResult,   AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(SumAggregationResult,     AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(MaxAggregationResult,     AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(MinAggregationResult,     AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(AverageAggregationResult, AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(XorAggregationResult,     AggregationResult);
+IMPLEMENT_AGGREGATIONRESULT(ExpressionCountAggregationResult,
+                            AggregationResult);
+
+bool AggregationResult::Configure::check(const vespalib::Identifiable &obj) const
+{
+    return obj.inherits(AggregationResult::classId);
+}
+
+void AggregationResult::Configure::execute(vespalib::Identifiable &obj)
+{
+    AggregationResult & a(static_cast(obj));
+    a.prepare();
+}
+
+AggregationResult &
+AggregationResult::setExpression(const ExpressionNode::CP &expr)
+{
+    _expressionTree.reset(new ExpressionTree(expr));
+    prepare(&_expressionTree->getResult(), false);
+    return *this;
+}
+
+void CountAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    (void) result;
+    (void) useForInit;
+}
+
+void SumAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    if (isReady(_sum.get(), result)) {
+        return;
+    }
+    _sum.reset(dynamic_cast(result.createBaseType().release()));
+    if ( useForInit ) {
+        _sum->set(result);
+    }
+}
+
+void MinAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    if (isReady(_min.get(), result)) {
+        return;
+    }
+    _min.reset(dynamic_cast(result.createBaseType().release()));
+    if ( !useForInit ) {
+        _min->setMax();
+    } else {
+        _min->set(result);
+    }
+}
+
+void MaxAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    if (isReady(_max.get(), result)) {
+        return;
+    }
+    _max.reset(dynamic_cast(result.createBaseType().release()));
+    if ( !useForInit ) {
+        _max->setMin();  ///Should figure out how to set min too for float.
+    } else {
+        _max->set(result);
+    }
+}
+
+void AverageAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    if (isReady(_sum.get(), result)) {
+        return;
+    }
+    _sum.reset(dynamic_cast(result.createBaseType().release()));
+    if ( useForInit ) {
+        _sum->set(result);
+    }
+}
+
+void XorAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    (void) result;
+    (void) useForInit;
+}
+
+void SumAggregationResult::onMerge(const AggregationResult & b)
+{
+    _sum->add(*static_cast(b)._sum);
+}
+
+void SumAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        static_cast(result).flattenSum(*_sum);
+    } else {
+        _sum->add(result);
+    }
+}
+
+void SumAggregationResult::onReset()
+{
+    _sum.reset(static_cast(_sum->getClass().create()));
+}
+
+void CountAggregationResult::onMerge(const AggregationResult & b)
+{
+    _count.add(static_cast(b)._count);
+}
+
+void CountAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        _count += static_cast(result).size();
+    } else {
+        ++_count;
+    }
+}
+
+void CountAggregationResult::onReset()
+{
+    setCount(0);
+}
+
+void MaxAggregationResult::onMerge(const AggregationResult & b)
+{
+    _max->max(*static_cast(b)._max);
+}
+
+void MaxAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        static_cast(result).flattenMax(*_max);
+    } else {
+        _max->max(result);
+    }
+}
+
+void MaxAggregationResult::onReset()
+{
+    _max.reset(static_cast(_max->getClass().create()));
+    _max->setMin();
+}
+
+void MinAggregationResult::onMerge(const AggregationResult & b)
+{
+    _min->min(*static_cast(b)._min);
+}
+
+void MinAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        static_cast(result).flattenMin(*_min);
+    } else {
+        _min->min(result);
+    }
+}
+
+void MinAggregationResult::onReset()
+{
+    _min.reset(static_cast(_min->getClass().create()));
+    _min->setMax();
+}
+
+void AverageAggregationResult::onMerge(const AggregationResult & b)
+{
+    const AverageAggregationResult & avg(static_cast(b));
+    _sum->add(*avg._sum);
+    _count += avg._count;
+}
+
+void AverageAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        static_cast(result).flattenSum(*_sum);
+        _count += static_cast(result).size();
+    } else {
+        _sum->add(result);
+        _count++;
+    }
+}
+
+void AverageAggregationResult::onReset()
+{
+    _count = 0;
+    _sum.reset(static_cast(_sum->getClass().create()));
+}
+
+const NumericResultNode & AverageAggregationResult::getAverage() const
+{
+    _averageScratchPad = _sum;
+    if ( _count > 0 ) {
+        _averageScratchPad->divide(Int64ResultNode(_count));
+    } else {
+        _averageScratchPad->set(Int64ResultNode(0));
+    }
+    return *_averageScratchPad;
+}
+
+void XorAggregationResult::onMerge(const AggregationResult & b)
+{
+    _xor.xorOp(static_cast(b)._xor);
+}
+
+void XorAggregationResult::onAggregate(const ResultNode & result)
+{
+    if (result.isMultiValue()) {
+        for (size_t i(0), m(static_cast(result).size()); i < m; i++) {
+            _xor.xorOp(static_cast(result).get(i));
+        }
+    } else {
+        _xor.xorOp(result);
+    }
+}
+
+void XorAggregationResult::onReset()
+{
+    _xor = 0;
+}
+
+static FieldBase _G_tagField("tag");
+
+Serializer & AggregationResult::onSerialize(Serializer & os) const
+{
+    return (os << *_expressionTree).put(_G_tagField, _tag);
+}
+
+Deserializer & AggregationResult::onDeserialize(Deserializer & is)
+{
+    _expressionTree.reset(new ExpressionTree());
+    return (is >> *_expressionTree).get(_G_tagField, _tag);
+}
+
+void
+AggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    visit(visitor, "expression", _expressionTree);
+}
+
+void AggregationResult::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+    _expressionTree->select(predicate,operation);
+}
+
+Serializer & CountAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return _count.serialize(os);
+}
+
+Deserializer & CountAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return _count.deserialize(is);
+}
+
+void
+CountAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "count", _count);
+}
+
+Serializer & SumAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return os << _sum;
+}
+
+Deserializer & SumAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return is >> _sum;
+}
+
+void
+SumAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "sum", _sum);
+}
+
+Serializer & MinAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return os << _min;
+}
+
+Deserializer & MinAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return is >> _min;
+}
+
+void
+MinAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "min", _min);
+}
+
+Serializer & MaxAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return os << _max;
+}
+
+Deserializer & MaxAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return is >> _max;
+}
+
+void
+MaxAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "max", _max);
+}
+
+static FieldBase _G_countField("count");
+static FieldBase _G_sumField("sum");
+
+Serializer & AverageAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return os.put(_G_countField, _count) << _sum;
+}
+
+Deserializer & AverageAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return is.get(_G_countField, _count) >> _sum;
+}
+
+void
+AverageAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "count", _count);
+    visit(visitor, "sum", _sum);
+}
+
+Serializer & XorAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    return _xor.serialize(os);
+}
+
+Deserializer & XorAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    return _xor.deserialize(is);
+}
+
+void
+XorAggregationResult::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "xor", _xor);
+}
+
+namespace {
+// Calculates the sum of all buckets.
+template 
+int calculateRank(const Sketch &sketch) {
+    if (sketch.getClassId() == SparseSketch::classId) {
+        return static_cast&>(sketch)
+            .getSize();
+    }
+    auto normal = static_cast&>(sketch);
+    int rank = 0;
+    for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) {
+        rank += normal.bucket[i];
+    }
+    return rank;
+}
+}  // namespace
+
+void ExpressionCountAggregationResult::onMerge(const AggregationResult &r) {
+    const ExpressionCountAggregationResult &result =
+        Identifiable::cast(r);
+    _hll.merge(result._hll);
+    _rank.set(calculateRank(_hll.getSketch()));
+}
+void ExpressionCountAggregationResult::onAggregate(const ResultNode &result) {
+    size_t hash = result.hash();
+    const unsigned int seed = 42;
+    hash = XXH32(&hash, sizeof(hash), seed);
+    // The rank is a maintained sum of all buckets. This should give
+    // almost the same ordering as the actual estimates.
+    _rank += _hll.aggregate(hash);
+}
+void ExpressionCountAggregationResult::onReset() {
+    _hll = HyperLogLog();
+    _rank.set(0);
+}
+Serializer &ExpressionCountAggregationResult::onSerialize(
+        Serializer &os) const {
+    AggregationResult::onSerialize(os);
+    _hll.serialize(os);
+    return os;
+}
+Deserializer &ExpressionCountAggregationResult::onDeserialize(
+        Deserializer &is) {
+    AggregationResult::onDeserialize(is);
+    _hll.deserialize(is);
+    _rank.set(calculateRank(_hll.getSketch()));
+    return is;
+}
+}  // namespace aggregation
+}  // namespace search
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_aggregation() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregation.h b/searchlib/src/vespa/searchlib/aggregation/aggregation.h
new file mode 100644
index 00000000000..cedb5571c6e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregation.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h
new file mode 100644
index 00000000000..69e0fbd2145
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/aggregationresult.h
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+using search::expression::ExpressionTree;
+using search::expression::ExpressionNode;
+using search::expression::ResultNode;
+using search::expression::DocId;
+using search::expression::NumericResultNode;
+using search::expression::SingleResultNode;
+using search::expression::IntegerResultNode;
+using search::expression::Int64ResultNode;
+using search::expression::ConfigureStaticParams;
+
+#define DECLARE_ABSTRACT_AGGREGATIONRESULT(cclass)                  \
+    DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, cclass); \
+    private:                                                        \
+    public:
+
+#define DECLARE_AGGREGATIONRESULT(cclass)                                   \
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, cclass);                  \
+    DECLARE_NBO_SERIALIZE;                                                  \
+    virtual cclass *clone() const { return new cclass(*this); }             \
+    private:                                                                \
+    virtual void onMerge(const AggregationResult & b);                      \
+    virtual void onAggregate(const ResultNode &result);                     \
+    virtual void onReset();                                                 \
+    public:
+
+// resultNodePrimitive : countHits | hits(INTEGER) | groups(INTEGER) | xor | sum | min | max |avg
+
+
+class AggregationResult : public expression::ExpressionNode
+{
+public:
+    DECLARE_NBO_SERIALIZE;
+    DECLARE_ABSTRACT_AGGREGATIONRESULT(AggregationResult);
+    class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+    {
+    private:
+        virtual void execute(vespalib::Identifiable &obj);
+        virtual bool check(const vespalib::Identifiable &obj) const;
+    };
+
+    virtual void visitMembers(vespalib::ObjectVisitor & visitor) const;
+    virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation);
+
+    void reset() { onReset(); }
+    void merge(const AggregationResult & b) { onMerge(b); }
+    virtual void postMerge() {}
+    void aggregate(const document::Document & doc, HitRank rank) {
+        bool ok(_expressionTree->execute(doc, rank));
+        if (ok) {
+            onAggregate(_expressionTree->getResult(), doc, rank);
+        } else {
+            throw std::runtime_error(vespalib::make_string("aggregate(%s, %f) failed ", doc.getId().toString().c_str(), rank));
+        }
+    }
+    void aggregate(DocId docId, HitRank rank) {
+        bool ok(_expressionTree->execute(docId, rank));
+        if (ok) {
+            onAggregate(_expressionTree->getResult(), docId, rank);
+        } else {
+            throw std::runtime_error(vespalib::make_string("aggregate(%u, %f) failed ", docId, rank));
+        }
+    }
+    AggregationResult &setExpression(const ExpressionNode::CP &expr);
+    AggregationResult &setResult(const ResultNode::CP &result) {
+        prepare(result.get(), true);
+        return *this;
+    }
+
+    const ResultNode & getRank() const { return onGetRank(); }
+    const ResultNode & getResult() const { return onGetRank(); }
+    virtual ResultNode & getResult() { return const_cast(onGetRank()); }
+    virtual AggregationResult * clone() const = 0;
+    const ExpressionNode * getExpression() const { return _expressionTree->getRoot().get(); }
+    ExpressionNode * getExpression() { return _expressionTree->getRoot().get(); }
+protected:
+    AggregationResult() : _expressionTree(new ExpressionTree()), _tag(-1) { }
+private:
+    /// from expressionnode
+    virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; }
+    /// from expressionnode
+    virtual bool onExecute() const  { return true; }
+
+    void prepare() { if (getExpression() != NULL) { prepare(&getExpression()->getResult(), false); } }
+    void prepare(const ResultNode * result, bool useForInit) { if (result) { onPrepare(*result, useForInit); } }
+    virtual void onPrepare(const ResultNode & result, bool useForInit) = 0;
+    virtual void onMerge(const AggregationResult & b) = 0;
+    virtual void onReset() = 0;
+    virtual void onAggregate(const ResultNode &result) = 0;
+    virtual const ResultNode & onGetRank() const = 0;
+    virtual void onAggregate(const ResultNode &result, const document::Document & doc, HitRank rank) {
+        (void) doc;
+        (void) rank;
+        onAggregate(result);
+    }
+    virtual void onAggregate(const ResultNode &result, DocId docId, HitRank rank) {
+        (void) docId;
+        (void) rank;
+        onAggregate(result);
+    }
+    search::expression::ExpressionTree::LP _expressionTree;
+    uint32_t _tag;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h
new file mode 100644
index 00000000000..928594acf0a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/averageaggregationresult.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class AverageAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(AverageAggregationResult);
+    AverageAggregationResult() : _sum(), _count(0) {}
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const NumericResultNode & getAverage() const;
+    const NumericResultNode & getSum() const { return *_sum; }
+    uint64_t getCount()                const { return _count; }
+private:
+    virtual const ResultNode & onGetRank() const { return getAverage(); }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    NumericResultNode::CP _sum;
+    uint64_t              _count;
+    mutable NumericResultNode::CP _averageScratchPad;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h
new file mode 100644
index 00000000000..a00e14f1e78
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/countaggregationresult.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class CountAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(CountAggregationResult);
+    CountAggregationResult() : AggregationResult(), _count(0) { }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    uint64_t getCount() const { return _count.get(); }
+    CountAggregationResult &setCount(uint64_t c) {
+        _count = c;
+        return *this;
+    }
+private:
+    virtual const ResultNode & onGetRank() const { return _count; }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    Int64ResultNode _count;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h
new file mode 100644
index 00000000000..182a28f2ec6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/expressioncountaggregationresult.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "aggregationresult.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+/**
+ * Estimates the number of unique values of an expression that has
+ * been observed. This class keeps track of the raw data needed for
+ * estimation (the sketch). Actual estimation is done on the QR
+ * server.
+ */
+class ExpressionCountAggregationResult : public AggregationResult {
+    static const int PRECISION = 10;
+
+    HyperLogLog _hll;
+    Int64ResultNode _rank;
+
+    virtual const ResultNode & onGetRank() const { return _rank; }
+    virtual void onPrepare(const ResultNode &, bool) {}
+public:
+    DECLARE_AGGREGATIONRESULT(ExpressionCountAggregationResult);
+    ExpressionCountAggregationResult() : AggregationResult(), _hll() { }
+
+    virtual void visitMembers(vespalib::ObjectVisitor &) const {}
+    const Sketch &getSketch() const
+    { return _hll.getSketch(); }
+};
+
+}  // namespace aggregation
+}  // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp b/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp
new file mode 100644
index 00000000000..4696d6f5c88
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/forcelink.hpp
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+void forcelink_file_searchlib_aggregation_grouping();
+void forcelink_file_searchlib_aggregation_modifiers();
+void forcelink_file_searchlib_aggregation_aggregation();
+void forcelink_file_searchlib_aggregation_hitlist();
+void forcelink_file_searchlib_aggregation_fs4hit();
+void forcelink_file_searchlib_aggregation_group();
+void forcelink_file_searchlib_aggregation_rawrank();
+void forcelink_file_searchlib_aggregation_hit();
+void forcelink_file_searchlib_aggregation_vdshit();
+void forcelink_file_searchlib_aggregation_hitsaggregationresult();
+void forcelink_file_searchlib_aggregation_groupinglevel();
+
+void forcelink_searchlib_aggregation() {
+    forcelink_file_searchlib_aggregation_grouping();
+    forcelink_file_searchlib_aggregation_modifiers();
+    forcelink_file_searchlib_aggregation_aggregation();
+    forcelink_file_searchlib_aggregation_hitlist();
+    forcelink_file_searchlib_aggregation_fs4hit();
+    forcelink_file_searchlib_aggregation_group();
+    forcelink_file_searchlib_aggregation_rawrank();
+    forcelink_file_searchlib_aggregation_hit();
+    forcelink_file_searchlib_aggregation_vdshit();
+    forcelink_file_searchlib_aggregation_hitsaggregationresult();
+    forcelink_file_searchlib_aggregation_groupinglevel();
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp b/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp
new file mode 100644
index 00000000000..f63f44d3a12
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/fs4hit.cpp
@@ -0,0 +1,61 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "fs4hit.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+static FieldBase _G_pathField("path");
+static FieldBase _G_docIdField("docId");
+static FieldBase _G_globalIdField("globalId");
+static FieldBase _G_distributionKeyField("distributionKey");
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, FS4Hit, Hit);
+
+Serializer &
+FS4Hit::onSerialize(Serializer &os) const
+{
+    Hit::onSerialize(os);
+    os.put(_G_pathField, _path);
+    const unsigned char * rawGid = _globalId.get();
+    for (size_t i = 0; i < document::GlobalId::LENGTH; ++i) {
+        os.put(_G_globalIdField, rawGid[i]);
+    }
+    os.put(_G_distributionKeyField, _distributionKey);
+    return os;
+}
+
+Deserializer &
+FS4Hit::onDeserialize(Deserializer &is)
+{
+    Hit::onDeserialize(is);
+    is.get(_G_pathField, _path);
+    unsigned char rawGid[document::GlobalId::LENGTH];
+    for (size_t i = 0; i < document::GlobalId::LENGTH; ++i) {
+        is.get(_G_globalIdField, rawGid[i]);
+    }
+    _globalId.set(rawGid);
+    is.get(_G_distributionKeyField, _distributionKey);
+    return is;
+}
+
+void
+FS4Hit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    Hit::visitMembers(visitor);
+    visit(visitor, _G_pathField.getName(), _path);
+    visit(visitor, _G_docIdField.getName(), _docId);
+    visit(visitor, _G_globalIdField.getName(), _globalId.toString());
+    visit(visitor, _G_distributionKeyField.getName(), _distributionKey);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_fs4hit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/fs4hit.h b/searchlib/src/vespa/searchlib/aggregation/fs4hit.h
new file mode 100644
index 00000000000..b5392b25e72
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/fs4hit.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "hit.h"
+#include "aggregationresult.h"
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+class FS4Hit : public Hit
+{
+private:
+    uint32_t _path;
+    uint32_t _docId;
+    document::GlobalId _globalId;
+    uint32_t _distributionKey;
+
+public:
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, FS4Hit);
+    DECLARE_NBO_SERIALIZE;
+    FS4Hit() : Hit(), _path(0), _docId(0), _globalId(), _distributionKey(-1) {}
+    FS4Hit(DocId docId, HitRank rank)
+        : Hit(rank), _path(0), _docId(docId), _globalId(), _distributionKey(-1) {}
+    virtual FS4Hit *clone() const { return new FS4Hit(*this); }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    uint32_t getPath() const { return _path; }
+    FS4Hit &setPath(uint32_t val) { _path = val; return *this; }
+    uint32_t getDocId() const { return _docId; }
+    const document::GlobalId & getGlobalId() const { return _globalId; }
+    FS4Hit &setGlobalId(const document::GlobalId & globalId) { _globalId = globalId; return *this; }
+    FS4Hit &setDistributionKey(uint32_t val) { _distributionKey = val; return *this; }
+    bool operator < (const FS4Hit &b) const { return cmp(b) < 0; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/group.cpp b/searchlib/src/vespa/searchlib/aggregation/group.cpp
new file mode 100644
index 00000000000..84f5504ccf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/group.cpp
@@ -0,0 +1,671 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOG_SETUP(".searchlib.aggregation.group");
+
+namespace search {
+namespace aggregation {
+
+using search::expression::FloatResultNode;
+using search::expression::AggregationRefNode;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+namespace {
+
+struct SortByGroupId {
+    bool operator()(const Group::ChildP & a, const Group::ChildP & b) {
+        return (a->cmpId(*b) < 0);
+    }
+};
+
+struct SortByGroupRank {
+    bool operator()(const Group::ChildP & a, const Group::ChildP & b) {
+        return (a->cmpRank(*b) < 0);
+    }
+};
+
+} // namespace search::aggregation::
+
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, Group, vespalib::Identifiable);
+
+void Group::destruct(GroupList & l, size_t m)
+{
+    for (size_t i(0); i < m; i++) {
+        destruct(l[i]);
+    }
+    delete [] l;
+    l = NULL;
+}
+
+int Group::cmpRank(const Group &rhs) const
+{
+    int diff(0);
+    for(size_t i(0), m(getOrderBySize()); (diff == 0) && (i < m); i++) {
+        uint32_t index = std::abs(getOrderBy(i)) - 1;
+        diff = expr(index).getResult().cmp(rhs.expr(index).getResult())*getOrderBy(i);
+    }
+    return diff
+               ? diff
+               : ((_rank > rhs._rank)
+                   ? -1
+                   : ((_rank < rhs._rank) ? 1 : 0));
+}
+
+Group & Group::addOrderBy(const ExpressionNode::CP & orderBy, bool ascending)
+{
+    assert(getOrderBySize() < sizeof(_orderBy)*2-1);
+    assert(getExprSize() < 15);
+    addExpressionResult(orderBy);
+    setOrderBy(getOrderBySize(), (ascending ? getExprSize() : -getExprSize()));
+    setOrderBySize(getOrderBySize() + 1);
+    setupAggregationReferences();
+    return *this;
+}
+
+Group & Group::addAggregationResult(const ExpressionNode::CP & aggr)
+{
+    assert(getAggrSize() < 15);
+    size_t newSize = getAggrSize() + 1 + getExprSize();
+    ExpressionVector n = new ExpressionNode::CP[newSize];
+    for (size_t i(0), m(getAggrSize()); i < m; i++) {
+        n[i] = _aggregationResults[i];
+    }
+    n[getAggrSize()] = aggr;
+    // Copy expressions after aggregationresults
+    for (size_t i(getAggrSize()); i < newSize - 1; i++) {
+        n[i + 1] = _aggregationResults[i];
+    }
+    delete [] _aggregationResults;
+    _aggregationResults = n;
+    setAggrSize(getAggrSize() + 1);
+    return *this;
+}
+
+Group & Group::addExpressionResult(const ExpressionNode::CP & expressionNode)
+{
+    uint32_t newSize = getAggrSize() + getExprSize() + 1;
+    ExpressionVector n = new ExpressionNode::CP[newSize];
+    for (uint32_t i(0); i < (newSize - 1); i++) {
+        n[i] = _aggregationResults[i];
+    }
+    n[newSize - 1] = expressionNode;
+    delete [] _aggregationResults;
+    _aggregationResults = n;
+    setExprSize(getExprSize()+1);
+    return *this;
+}
+
+void Group::setupAggregationReferences()
+{
+    AggregationRefNode::Configure exprRefSetup(_aggregationResults);
+    select(exprRefSetup, exprRefSetup);
+}
+
+Group & Group::addResult(const ExpressionNode::CP & aggr)
+{
+    assert(getExprSize() < 15);
+    addAggregationResult(aggr);
+    addExpressionResult(ExpressionNode::CP(new AggregationRefNode(getAggrSize() - 1)));
+    setupAggregationReferences();
+    return *this;
+}
+
+void Group::addChild(Group * child)
+{
+    const size_t sz(getChildrenSize());
+    assert(sz < 0xffffff);
+    if (_children == 0) {
+        _children = new ChildP[4];
+    } else if ((sz >=4) && vespalib::Optimized::msbIdx(sz) == vespalib::Optimized::lsbIdx(sz)) {
+        GroupList n = new ChildP[sz*2];
+        for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+            n[i] = _children[i];
+        }
+        delete [] _children;
+        _children = n;
+    }
+    _children[sz] = child;
+    setChildrenSize(sz + 1);
+}
+
+void
+Group::selectMembers(const vespalib::ObjectPredicate &predicate,
+                     vespalib::ObjectOperation &operation)
+{
+    if (_id.get()) {
+        _id->select(predicate, operation);
+    }
+    uint32_t totalSize = getAggrSize() + getExprSize();
+    for (uint32_t i(0); i < totalSize; i++) {
+        _aggregationResults[i]->select(predicate, operation);
+    }
+}
+
+void
+Group::preAggregate()
+{
+    assert(_childInfo._childMap == NULL);
+    _childInfo._childMap = new GroupHash(getChildrenSize()*2, GroupHasher(&_children), GroupEqual(&_children));
+    GroupHash & childMap = *_childInfo._childMap;
+    for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+        (*it)->preAggregate();
+        childMap.insert(it - _children);
+    }
+}
+
+template 
+void Group::collect(const Doc & doc, HitRank rank)
+{
+    for(size_t i(0), m(getAggrSize()); i < m; i++) {
+        getAggr(i)->aggregate(doc, rank);
+    }
+}
+
+template 
+void
+Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const Doc & doc, HitRank rank)
+{
+    if (currentLevel >= grouping.getFirstLevel()) {
+        collect(doc, rank);
+    }
+    if (currentLevel < grouping.getLevels().size()) {
+        groupNext(grouping.getLevels()[currentLevel], doc, rank);
+    }
+}
+
+template 
+void
+Group::groupNext(const GroupingLevel & level, const Doc & doc, HitRank rank)
+{
+    const ExpressionTree &selector = level.getExpression();
+    if (!selector.execute(doc, rank)) {
+        throw std::runtime_error("Does not know how to handle failed select statements");
+    }
+    const ResultNode &selectResult = selector.getResult();
+    level.group(*this, selectResult, doc, rank);
+}
+
+Group * Group::groupSingle(const ResultNode & selectResult, HitRank rank, const GroupingLevel & level)
+{
+    if (_childInfo._childMap == NULL) {
+        assert(getChildrenSize() == 0);
+        _childInfo._childMap = new GroupHash(1, GroupHasher(&_children), GroupEqual(&_children));
+    }
+    GroupHash & childMap = *_childInfo._childMap;
+    Group * group(NULL);
+    GroupHash::iterator found = childMap.find(selectResult, GroupResult(&_children));
+    if (found == childMap.end()) { // group not present in child map
+        if (level.allowMoreGroups(childMap.size())) {
+            group = new Group(level.getGroupPrototype());
+            group->setId(selectResult);
+            group->setRank(rank);
+            addChild(group);
+            childMap.insert(getChildrenSize() - 1);
+        }
+    } else {
+        group = _children[(*found)];
+        if ( ! level.isFrozen()) {
+            group->updateRank(rank);
+        }
+    }
+    return group;
+}
+
+void
+Group::postAggregate()
+{
+    delete _childInfo._childMap;
+    _childInfo._childMap = NULL;
+    for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+        (*it)->postAggregate();
+    }
+}
+
+void
+Group::executeOrderBy()
+{
+    for (size_t i(0), m(getExprSize()); i < m; i++) {
+        ExpressionNode & e(expr(i));
+        e.prepare(false); // TODO: What should we do about this flag?
+        e.execute();
+    }
+}
+
+void Group::sortById()
+{
+    std::sort(_children, _children + getChildrenSize(), SortByGroupId());
+    for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+        (*it)->sortById();
+    }
+}
+
+void
+Group::merge(const std::vector &levels,
+             uint32_t firstLevel, uint32_t currentLevel, Group &b)
+{
+    bool frozen  = (currentLevel < firstLevel);    // is this level frozen ?
+    _rank = std::max(_rank, b._rank);
+
+    if (!frozen) { // should we merge collectors for this level ?
+        for(size_t i(0), m(getAggrSize()); i < m; i++) {
+            getAggr(i)->merge(*b.getAggr(i));
+        }
+    }
+    GroupList z = new ChildP[getChildrenSize() + b.getChildrenSize()];
+    size_t kept(0);
+    ChildP * px = _children;
+    ChildP * ex = _children + getChildrenSize();
+    ChildP * py = b._children;
+    ChildP * ey = b._children + b.getChildrenSize();
+    while (px != ex && py != ey) {
+        int c = (*px)->cmpId(**py);
+        if (c == 0) {
+            (*px)->merge(levels, firstLevel, currentLevel + 1, **py);
+            z[kept++] = *px;
+            reset(*px);
+            ++px;
+            ++py;
+        } else if (c < 0) {
+            z[kept++] = *px;
+            reset(*px);
+            ++px;
+        } else {
+            z[kept++] = *py;
+            reset(*py);
+            ++py;
+        }
+    }
+    for (; px != ex; ++px) {
+        z[kept++] = *px;
+        reset(*px);
+    }
+    for (; py != ey; ++py) {
+        z[kept++] = *py;
+        reset(*py);
+    }
+    std::swap(_children, z);
+    destruct(z, getAllChildrenSize());
+    setChildrenSize(kept);
+    _childInfo._allChildren = 0;
+}
+
+void
+Group::prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel)
+{
+    if (currentLevel >= lastLevel) {
+        return;
+    }
+
+    GroupList keep = new ChildP[b.getChildrenSize()];
+    size_t kept(0);
+    ChildP * px = _children;
+    ChildP * ex = _children + getAllChildrenSize();
+    const ChildP * py = b._children;
+    const ChildP * ey = b._children + b.getChildrenSize();
+    // Assumes that both lists are ordered by group id
+    while (py != ey && px != ex) {
+        if ((*py)->cmpId(**px) > 0) {
+            px++;
+        } else if ((*py)->cmpId(**px) == 0) {
+            keep[kept++] = (*px);
+            (*px)->prune((**py), lastLevel, currentLevel + 1);
+            reset(*px);
+            px++;
+            py++;
+        } else if ((*py)->cmpId(**px) < 0) {
+            py++;
+        }
+    }
+    std::swap(_children, keep);
+    destruct(keep, getAllChildrenSize());
+    setChildrenSize(kept);
+    _childInfo._allChildren = 0;
+}
+
+void
+Group::mergePartial(const std::vector &levels,
+                    uint32_t firstLevel,
+                    uint32_t lastLevel,
+                    uint32_t currentLevel,
+                    const Group & b)
+{
+    bool frozen  = (currentLevel < firstLevel);
+
+    if (!frozen) {
+        for(size_t i(0), m(getAggrSize()); i < m; i++) {
+            getAggr(i)->merge(b.getAggr(i));
+        }
+        for(size_t i(0), m(getExprSize()); i < m; i++) {
+            expr(i).execute();
+        }
+
+
+        // At this level, we must create a copy of the other nodes children.
+        if (currentLevel >= lastLevel) {
+            for (ChildP *it(b._children), *mt(b._children + b.getChildrenSize()); it != mt; ++it) {
+                ChildP g(new Group(levels[currentLevel].getGroupPrototype()));
+                g->partialCopy(**it);
+                addChild(g);
+            }
+            return;
+        }
+    }
+
+    ChildP * px = _children;
+    ChildP * ex = _children + getChildrenSize();
+    const ChildP * py = b._children;
+    const ChildP * ey = b._children + b.getChildrenSize();
+    // Assumes that both lists are ordered by group id
+    while (py != ey && px != ex) {
+        if ((*py)->cmpId(**px) > 0) {
+            px++;
+        } else if ((*py)->cmpId(**px) == 0) {
+            (*px)->mergePartial(levels, firstLevel, lastLevel, currentLevel + 1, **py);
+            px++;
+            py++;
+        } else if ((*py)->cmpId(**px) < 0) {
+            py++;
+        }
+    }
+}
+
+void
+Group::postMerge(const std::vector &levels,
+                 uint32_t firstLevel,
+                 uint32_t currentLevel)
+{
+    bool frozen = (currentLevel < firstLevel);    // is this level frozen ?
+
+    if (!frozen) {
+        for(size_t i(0), m(getAggrSize()); i < m; i++) {
+            getAggr(i)->postMerge();
+        }
+    }
+    bool hasNext = (currentLevel < levels.size()); // is there a next level ?
+    if (!hasNext) { // we have reached the bottom of the tree
+        return;
+    }
+    for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+        (*it)->executeOrderBy();
+    }
+    int64_t maxGroups = levels[currentLevel].getPrecision();
+    for (size_t i(getChildrenSize()); i < _childInfo._allChildren; i++) {
+        destruct(_children[i]);
+        reset(_children[i]);
+    }
+    _childInfo._allChildren = getChildrenSize();
+    if (getChildrenSize() > (uint64_t)maxGroups) { // prune groups
+        std::sort(_children, _children + getChildrenSize(), SortByGroupRank());
+        setChildrenSize(maxGroups);
+    }
+    for (ChildP *it(_children), *mt(_children + getChildrenSize()); it != mt; ++it) {
+        (*it)->postMerge(levels, firstLevel, currentLevel + 1);
+    }
+}
+
+Group & Group::setRank(RawRank r)
+{
+    _rank = isnan(r) ? -HUGE_VAL : r;
+    return *this;
+}
+
+Group & Group::updateRank(RawRank r)
+{
+    return setRank(std::max(_rank, r));
+}
+
+bool Group::needResort() const
+{
+    bool resort(needFullRank());
+    for (const ChildP *it(_children), *mt(_children + getChildrenSize()); !resort && (it != mt); ++it) {
+        resort = (*it)->needResort();
+    }
+    return resort;
+}
+
+Serializer & Group::onSerialize(Serializer & os) const
+{
+    if (getChildrenSize() > 1) {
+        for (size_t i(1), m(getChildrenSize()); i < m; i++) {
+            assert(_children[i]->cmpId(*_children[i-1]) > 0);
+        }
+    }
+    LOG(debug, "%s", _id->asString().c_str());
+    os << _id << _rank;
+    os << uint32_t(getOrderBySize());
+    for (size_t i(0), m(getOrderBySize()); i < m; i++) {
+        os << int32_t(getOrderBy(i));
+    }
+    os << uint32_t(getAggrSize());
+    for(size_t i(0), m(getAggrSize()); i < m; i++) {
+        os << getAggrCP(i);
+    }
+    os << uint32_t(getExprSize());
+    for(size_t i(0), m(getExprSize()); i < m; i++) {
+        os << getExprCP(i);
+    }
+    os << uint32_t(getChildrenSize());
+    for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+        os << *_children[i];
+    }
+    return os << _tag;
+}
+
+Deserializer & Group::onDeserialize(Deserializer & is)
+{
+    uint32_t count(0);
+    is >> _id >> _rank >> count;
+    assert(count < sizeof(_orderBy)*2);
+    setOrderBySize(count);
+    for(uint32_t i(0); i < count; i++) {
+        int32_t tmp(0);
+        is >> tmp;
+        assert((-7<= tmp) && (tmp <= 7));
+        setOrderBy(i, tmp);
+    }
+    uint32_t aggrSize(0);
+    is >> aggrSize;
+    assert(aggrSize < 16);
+    // To avoid protocol changes, we must first deserialize the aggregation
+    // results into a temporary buffer, and then reallocate the actual
+    // vector when we know the total size. Then we copy the temp buffer and
+    // deserialize the rest to the end of the vector.
+    ExpressionVector tmpAggregationResults = new ExpressionNode::CP[aggrSize];
+    setAggrSize(aggrSize);
+    for(uint32_t i(0); i < aggrSize; i++) {
+        is >> tmpAggregationResults[i];
+    }
+    uint32_t exprSize(0);
+    is >> exprSize;
+    delete [] _aggregationResults;
+
+    _aggregationResults = new ExpressionNode::CP[aggrSize + exprSize];
+    for (uint32_t i(0); i < aggrSize; i++) {
+        _aggregationResults[i] = tmpAggregationResults[i];
+    }
+    delete [] tmpAggregationResults;
+
+    assert(exprSize < 16);
+    setExprSize(exprSize);
+    for (uint32_t i(aggrSize); i < aggrSize + exprSize; i++) {
+        is >> _aggregationResults[i];
+    }
+    setupAggregationReferences();
+    is >> count;
+    destruct(_children, getAllChildrenSize());
+    _childInfo._allChildren = 0;
+    _children = new ChildP[std::max(4ul, 2ul << vespalib::Optimized::msbIdx(count))];
+    setChildrenSize(count);
+    for(uint32_t i(0); i < count; i++) {
+        ChildP group(new Group);
+        is >> *group;
+        _children[i] = group;
+    }
+    is >> _tag;
+    LOG(debug, "%s", _id->asString().c_str());
+    if (getChildrenSize() > 1) {
+        for (size_t i(1), m(getChildrenSize()); i < m; i++) {
+            assert(_children[i]->cmpId(*_children[i-1]) > 0);
+        }
+    }
+    return is;
+}
+
+void
+Group::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    visit(visitor, "id",                    _id);
+    visit(visitor, "rank",                  _rank);
+//    visit(visitor, "orderBy",               _orderBy);
+    visitor.openStruct("orderBy", "[]");
+    visit(visitor, "size", getOrderBySize());
+    for (size_t i(0), m(getOrderBySize()); i < m; i++) {
+        visit(visitor, vespalib::make_vespa_string("[%lu]", i), getOrderBy(i));
+    }
+    visitor.closeStruct();
+//    visit(visitor, "aggregationResults",    _aggregationResults);
+    visitor.openStruct("aggregationresults", "[]");
+    visit(visitor, "size", getAggrSize());
+    for (size_t i(0), m(getAggrSize()); i < m; i++) {
+        visit(visitor, vespalib::make_vespa_string("[%lu]", i), getAggrCP(i));
+    }
+    visitor.closeStruct();
+//    visit(visitor, "expressionResults",     _expressionResults);
+    visitor.openStruct("expressionResults", "[]");
+    visit(visitor, "size", getExprSize());
+    for (size_t i(0), m(getExprSize()); i < m; i++) {
+        visit(visitor, vespalib::make_vespa_string("[%lu]", i), getExprCP(i));
+    }
+    visitor.closeStruct();
+    //visit(visitor, "children",              _children);
+    visitor.openStruct("children", "[]");
+    visit(visitor, "size", getChildrenSize());
+    for (size_t i(0), m(getChildrenSize()); i < m; i++) {
+        visit(visitor, vespalib::make_vespa_string("[%lu]", i), getChild(i));
+    }
+    visitor.closeStruct();
+    visit(visitor, "tag",                   _tag);
+}
+
+Group::Group() :
+    _id(),
+    _rank(0),
+    _packedLength(0),
+    _tag(-1),
+    _aggregationResults(NULL),
+    _orderBy(),
+    _children(NULL),
+    _childInfo()
+{
+    memset(_orderBy, 0, sizeof(_orderBy));
+    _childInfo._childMap = NULL;
+}
+
+Group::Group(const Group & rhs) :
+    Identifiable(rhs),
+    _id(rhs._id),
+    _rank(rhs._rank),
+    _packedLength(rhs._packedLength),
+    _tag(rhs._tag),
+    _aggregationResults(NULL),
+    _orderBy(),
+    _children(NULL),
+    _childInfo()
+{
+    _childInfo._childMap = NULL;
+    memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+    uint32_t totalAggrSize = rhs.getAggrSize() + rhs.getExprSize();
+    if (totalAggrSize > 0) {
+        _aggregationResults = new ExpressionNode::CP[totalAggrSize];
+        for (size_t i(0), m(totalAggrSize); i < m; i++) {
+            _aggregationResults[i] = rhs._aggregationResults[i];
+        }
+        setupAggregationReferences();
+    }
+
+    if (  rhs.getChildrenSize() > 0 ) {
+        _children = new ChildP[std::max(4ul, 2ul << vespalib::Optimized::msbIdx(rhs.getChildrenSize()))];
+        size_t i(0);
+        for (const ChildP *it(rhs._children), *mt(rhs._children + rhs.getChildrenSize()); it != mt; ++it, i++) {
+            _children[i] = ChildP(new Group(**it));
+        }
+    }
+}
+
+Group::~Group()
+{
+    destruct(_children, getAllChildrenSize());
+    setChildrenSize(0);
+    _childInfo._allChildren = 0;
+    delete [] _aggregationResults;
+}
+
+Group & Group::operator =(const Group & rhs)
+{
+    if (&rhs != this) {
+        Group g(rhs);
+        swap(g);
+    }
+    return *this;
+}
+
+Group &
+Group::partialCopy(const Group & rhs)
+{
+    setId(*rhs._id);
+    _rank = rhs._rank;
+    uint32_t totalAggrSize = getAggrSize() + getExprSize();
+    for(size_t i(0), m(totalAggrSize); i < m; i++) {
+        _aggregationResults[i] = rhs._aggregationResults[i];
+    }
+    for(size_t i(0), m(getAggrSize()); i < m; i++) {
+        getAggr(i)->reset();
+    }
+    setAggrSize(rhs.getAggrSize());
+    setOrderBySize(rhs.getOrderBySize());
+    setExprSize(rhs.getExprSize());
+    setupAggregationReferences();
+    memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+    return *this;
+}
+
+void Group::swap(Group & rhs)
+{
+    _id.swap(rhs._id);
+    std::swap(_rank, rhs._rank);
+    std::swap(_aggregationResults, rhs._aggregationResults);
+    std::swap(_children, rhs._children);
+    std::swap(_childInfo._childMap, rhs._childInfo._childMap);
+    {
+        int8_t tmp[sizeof(_orderBy)];
+        memcpy(tmp, _orderBy, sizeof(_orderBy));
+        memcpy(_orderBy, rhs._orderBy, sizeof(_orderBy));
+        memcpy(rhs._orderBy, tmp, sizeof(_orderBy));
+    }
+    std::swap(_tag, rhs._tag);
+    std::swap(_packedLength, rhs._packedLength);
+}
+
+template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const DocId & doc, HitRank rank);
+template void Group::aggregate(const Grouping & grouping, uint32_t currentLevel, const document::Document & doc, HitRank rank);
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_group() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/group.h b/searchlib/src/vespa/searchlib/aggregation/group.h
new file mode 100644
index 00000000000..1559f53cd9b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/group.h
@@ -0,0 +1,201 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include "rawrank.h"
+
+namespace search {
+namespace aggregation {
+
+class GroupingLevel;
+class Grouping;
+
+/**
+ * Represents a Group instance. To make grouping fast, the serialization format and the group instance itself is very compact. The format is as follows:
+ *
+ * +-------------------------------------+-----------------+
+ * | what                                | number of bytes |
+ * +-------------------------------------+-----------------+
+ * | result node id ptr                  | 8               |
+ * | group rank                          | 8               |
+ * | serialized length                   | 4               |
+ * | group tag                           | 4               |
+ * | aggregator vector                   | 8               |
+ * | orderby vector                      | 2               |
+ * | sub group vector                    | 8               |
+ * | sub group vector size/temp hash map | 8               |
+ * +-------------------------------------+-----------------+
+ *
+ * Total: 50 bytes
+ */
+class Group : public vespalib::Identifiable
+{
+public:
+    typedef Group * ChildP;
+    typedef std::unique_ptr UP;
+    typedef ChildP * GroupList;
+    struct GroupEqual : public std::binary_function {
+        GroupEqual(const GroupList * v) : _v(v) { }
+        bool operator()(uint32_t a, uint32_t b) { return (*_v)[a]->getId().cmpFast((*_v)[b]->getId()) == 0; }
+        const GroupList *_v;
+    };
+    struct GroupHasher {
+        GroupHasher(const GroupList * v) : _v(v) { }
+        size_t operator() (uint32_t arg) const { return (*_v)[arg]->getId().hash(); }
+        const GroupList *_v;
+    };
+    struct GroupResult {
+        GroupResult(const GroupList * v) : _v(v) { }
+        const ResultNode & operator() (uint32_t arg) const { return (*_v)[arg]->getId(); }
+        const GroupList *_v;
+    };
+    struct ResultLess : public std::binary_function {
+        bool operator()(const ResultNode::CP & a, const ResultNode::CP & b) { return a->cmpFast(*b) < 0; }
+    };
+    struct ResultEqual : public std::binary_function {
+        bool operator()(const ResultNode & a, const ResultNode & b) { return a.cmpFast(b) == 0; }
+    };
+    struct ResultHash {
+        size_t operator() (const ResultNode & arg) const { return arg.hash(); }
+    };
+
+    typedef ExpressionNode::CP * ExpressionVector;
+    typedef vespalib::hash_set GroupHash;
+    typedef std::vector         GroupingLevelList;
+
+private:
+    ResultNode::CP   _id;                   // the label of this group, separating it from other groups
+    RawRank          _rank;                 // The default rank taken from the highest hit relevance.
+    uint32_t         _packedLength;         // Length of the 3 vectors below
+    uint32_t         _tag;                  // Opaque tag used to identify the group by the client.
+
+   // The collectors and expressions stored by this group. Currently, both aggregation results and expressions used by orderby() are stored in this
+   // array to save 8 bytes in the Group size. This makes it important to use the getAggr() and expr() methods for accessing elements,
+   // as they will correctly offset the index to the correct place in the array.
+    ExpressionVector _aggregationResults;
+
+    uint8_t          _orderBy[2];           // How this group is ranked, negative means reverse rank.
+    ChildP          *_children;             // the sub-groups of this group. Great care must be taken to ensure proper destruct.
+    union ChildInfo {
+        GroupHash *_childMap;               // child map used during aggregation
+        size_t     _allChildren;            // Keep real number of children.
+    }                _childInfo;
+
+    bool needFullRank() const { return getOrderBySize() != 0; }
+    Group & partialCopy(const Group & rhs);
+    void setOrderBy(uint32_t i, int32_t v) {
+        if (v < 0) {
+            v = -v;
+            v = v | 0x8;
+        }
+        _orderBy[i/2]  = (_orderBy[i/2] & (0xf0 >> (4*(i%2)))) | (v << (4*(i%2)));
+    }
+    uint32_t getExprSize()    const { return (_packedLength >> 4) & 0x03; }
+    void setAggrSize(uint32_t v)    { _packedLength = (_packedLength & ~0x0f) | v; }
+    void setExprSize(uint32_t v)    { _packedLength = (_packedLength & ~0x30) | (v << 4); }
+    void setOrderBySize(uint32_t v) { _packedLength = (_packedLength & ~0xc0) | (v << 6); }
+    void setChildrenSize(uint32_t v) { _packedLength = (_packedLength & ~0xffffff00) | (v << 8); }
+    AggregationResult * getAggr(size_t i) { return static_cast(_aggregationResults[i].get()); }
+    const AggregationResult & getAggr(size_t i) const { return static_cast(*_aggregationResults[i]); }
+    const ExpressionNode::CP & getAggrCP(size_t i) const { return _aggregationResults[i]; }
+    const ExpressionNode::CP & getExprCP(size_t i) const { return _aggregationResults[getExpr(i)]; }
+    ExpressionNode & expr(size_t i)  { return *_aggregationResults[getExpr(i)]; }
+    const ExpressionNode & expr(size_t i)  const { return *_aggregationResults[getExpr(i)]; }
+    static void reset(Group * & v) { v = NULL; }
+    static void destruct(Group * v) { if (v) { delete v; } }
+    static void destruct(GroupList & l, size_t sz);
+    void addChild(Group * child);
+    void setupAggregationReferences();
+    size_t getAllChildrenSize() const { return std::max(static_cast(getChildrenSize()), _childInfo._allChildren); }
+    template 
+    VESPA_DLL_LOCAL void groupNext(const GroupingLevel & level, const Doc & docId, HitRank rank);
+public:
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, Group);
+    DECLARE_NBO_SERIALIZE;
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    Group();
+    Group(const Group & rhs);
+    Group & operator =(const Group & rhs);
+    virtual ~Group();
+    void swap(Group & rhs);
+
+    int cmpId(const Group &rhs) const { return _id->cmpFast(*rhs._id); }
+    int cmpRank(const Group &rhs) const;
+    Group & setRank(RawRank r);
+    Group & updateRank(RawRank r);
+    RawRank getRank() const { return _rank; }
+
+    VESPA_DLL_LOCAL Group * groupSingle(const ResultNode & result, HitRank rank, const GroupingLevel & level);
+
+    bool hasId() const { return (_id.get() != NULL); }
+    const ResultNode &getId() const { return *_id; }
+
+    Group unchain() const { return *this; }
+
+    Group &setId(const ResultNode &id)  { _id.reset(static_cast(id.clone())); return *this; }
+    Group &addAggregationResult(const ExpressionNode::CP &result);
+    Group &addResult(const ExpressionNode::CP &aggr);
+    Group &addExpressionResult(const ExpressionNode::CP &expressionNode);
+    Group &addOrderBy(const ExpressionNode::CP & orderBy, bool ascending);
+    Group &addChild(const Group &child) { addChild(new Group(child)); return *this; }
+    Group &addChild(Group::UP child) { addChild(child.release()); return *this; }
+
+    /**
+     * Prunes this tree, keeping only the nodes found in another
+     * tree.
+     *
+     * @param b The tree containing the nodes that should be kept.
+     * @param lastLevel The last level on which to perform pruning.
+     * @param currentLevel The current level on which to perform pruning.
+     **/
+    void prune(const Group & b, uint32_t lastLevel, uint32_t currentLevel);
+
+    /**
+     * Recursively checks if any itself or any children needs a full resort.
+     * Then all hits must be processed and should be doen before any hit sorting.
+     */
+    bool needResort() const;
+
+    virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+                               vespalib::ObjectOperation &operation);
+
+    void preAggregate();
+    template 
+    VESPA_DLL_LOCAL void aggregate(const Grouping & grouping, uint32_t currentLevel, const Doc & docId, HitRank rank);
+
+    template 
+    void collect(const Doc & docId, HitRank rank);
+    void postAggregate();
+    void merge(const std::vector &levels, uint32_t firstLevel, uint32_t currentLevel, Group &b);
+    void executeOrderBy();
+
+    /**
+     * Merge children and results of another tree within the unfrozen parts of
+     * this tree.
+     *
+     * @param b The tree to pick children and results from.
+     * @param firstLevel The first level to merge.
+     * @param lastLevel The last level to merge.
+     * @param currentLevel The current level on which merging should be done.
+     **/
+    void mergePartial(const std::vector &levels, uint32_t firstLevel, uint32_t lastLevel, uint32_t currentLevel, const Group & b);
+    void postMerge(const std::vector &levels, uint32_t firstLevel, uint32_t currentLevel);
+    void sortById();
+    uint32_t getChildrenSize()   const { return (_packedLength >> 8); }
+    const Group & getChild(size_t i) const { return *_children[i]; }
+    GroupList groups() const { return _children; }
+    const AggregationResult & getAggregationResult(size_t i) const { return static_cast(*_aggregationResults[i]); }
+    AggregationResult & getAggregationResult(size_t i) { return static_cast(*_aggregationResults[i]); }
+    uint32_t getAggrSize()    const { return _packedLength & 0x0f; }
+    uint32_t getOrderBySize() const { return (_packedLength >> 6) & 0x03; }
+    uint32_t getExpr(uint32_t i) const { return getAggrSize() + i; }
+    int32_t getOrderBy(uint32_t i) const {
+        int32_t v((_orderBy[i/2] >> (4*(i%2))) & 0x0f);
+        return (v & 0x8) ? -(v&0x7) : v;
+    }
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.cpp b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp
new file mode 100644
index 00000000000..22b438cd539
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/grouping.cpp
@@ -0,0 +1,357 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+LOG_SETUP(".searchlib.aggregation.grouping");
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::expression;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+namespace search {
+namespace aggregation {
+
+namespace {
+
+void selectGroups(const vespalib::ObjectPredicate &p, vespalib::ObjectOperation &op,
+                  Group &group, uint32_t first, uint32_t last, uint32_t curr)
+{
+    if (curr > last) {
+        return;
+    }
+    if (curr >= first) {
+        group.select(p, op);
+    }
+    Group::GroupList list = group.groups();
+    for (uint32_t i(0), m(group.getChildrenSize()); i < m; ++i) {
+        selectGroups(p, op, *list[i], first, last, curr + 1);
+    }
+}
+
+using search::aggregation::Grouping;
+using search::aggregation::GroupingLevel;
+using search::aggregation::Group;
+using search::expression::ExpressionTree;
+using search::expression::ExpressionNode;
+using search::expression::AttributeNode;
+using search::expression::EnumResultNode;
+using search::expression::EnumResultNodeVector;
+using search::expression::StringResultNode;
+using search::expression::ResultNode;
+using search::StringAttribute;
+
+class EnumConverter : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+    Grouping &_grouping;
+    uint32_t _level;
+public:
+    EnumConverter(Grouping & g, uint32_t level) : _grouping(g), _level(level) { }
+    virtual void execute(vespalib::Identifiable &obj) {
+        Group &group = static_cast(obj);
+        uint32_t tmplevel = _level;
+        if (group.hasId()) {
+            if (group.getId().inherits(EnumResultNode::classId)) {
+                const EnumResultNode & er = static_cast(group.getId());
+                const Grouping::GroupingLevelList &gll = _grouping.getLevels();
+                const GroupingLevel & gl = gll[_level];
+                const ExpressionNode::LP & en = gl.getExpression().getRoot();
+                const AttributeNode & an = static_cast(*en);
+                StringResultNode srn((static_cast(an.getAttribute()))->getFromEnum(er.getEnum()));
+                group.setId(srn);
+            }
+            tmplevel++;
+        }
+        EnumConverter enumConverter(_grouping, tmplevel);
+        Group::GroupList list = group.groups();
+        for (uint32_t i(0), m(group.getChildrenSize()); i < m; ++i) {
+            list[i]->select(enumConverter, enumConverter);
+        }
+    }
+    virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(Group::classId); }
+};
+
+class GlobalIdConverter : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+    const IDocumentMetaStore &_metaStore;
+public:
+    GlobalIdConverter(const IDocumentMetaStore &metaStore) : _metaStore(metaStore) {}
+    virtual void execute(vespalib::Identifiable & obj) {
+        FS4Hit & hit = static_cast(obj);
+        document::GlobalId gid;
+        _metaStore.getGid(hit.getDocId(), gid);
+        hit.setGlobalId(gid);
+        LOG(debug, "GlobalIdConverter: lid(%u) -> gid(%s)", hit.getDocId(), hit.getGlobalId().toString().c_str());
+    }
+    virtual bool check(const vespalib::Identifiable & obj) const {
+        return obj.inherits(FS4Hit::classId);
+    }
+};
+
+
+} // namespace search::aggregation::
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, Grouping, vespalib::Identifiable);
+
+Grouping::Grouping()
+    : _id(0),
+      _valid(true),
+      _all(false),
+      _topN(-1),
+      _firstLevel(0),
+      _lastLevel(0),
+      _levels(),
+      _root(),
+      _clock(NULL),
+      _timeOfDoom(0)
+{
+}
+
+void
+Grouping::selectMembers(const vespalib::ObjectPredicate &predicate,
+                        vespalib::ObjectOperation &operation)
+{
+    for (size_t i(0), m(_levels.size()); i < m; i++) {
+        _levels[i].select(predicate, operation);
+    }
+    selectGroups(predicate, operation, _root, _firstLevel, _lastLevel, 0);
+}
+
+void
+Grouping::prune(const Grouping & b)
+{
+    _root.prune(b._root, b._lastLevel, 0);
+}
+
+void
+Grouping::mergePartial(const Grouping & b)
+{
+    _root.mergePartial(_levels, _firstLevel, _lastLevel, 0, b._root);
+}
+
+
+void
+Grouping::merge(Grouping & b)
+{
+    _root.merge(_levels, _firstLevel, 0, b._root);
+}
+
+void
+Grouping::postMerge()
+{
+    _root.postMerge(_levels, _firstLevel, 0);
+}
+
+void
+Grouping::preAggregate(bool isOrdered)
+{
+    for (size_t i(0), m(_levels.size()); i < m; i++) {
+        _levels[i].prepare(this, i, isOrdered);
+    }
+    _root.preAggregate();
+}
+
+void Grouping::aggregate(DocId from, DocId to)
+{
+    preAggregate(false);
+    if (to > from) {
+        for(DocId i(from), m(i + getMaxN(to-from)); i < m; i++) {
+            aggregate(i, 0.0);
+        }
+    }
+    postProcess();
+}
+
+void Grouping::postProcess()
+{
+    postAggregate();
+    postMerge();
+    bool hasEnums(false);
+    for (size_t i(0), m(_levels.size()); !hasEnums && (i < m); i++) {
+        const GroupingLevel & l = _levels[i];
+        const ResultNode & id(l.getExpression().getResult());
+        hasEnums = id.inherits(EnumResultNode::classId) ||
+                   id.inherits(EnumResultNodeVector::classId);
+        const Group & g(l.getGroupPrototype());
+        for (size_t j(0), n(g.getAggrSize()); !hasEnums && (j < n); j++) {
+            const ResultNode & r(g.getAggregationResult(j).getResult());
+            hasEnums = r.inherits(EnumResultNode::classId) ||
+                       r.inherits(EnumResultNodeVector::classId);
+        }
+    }
+    if (hasEnums) {
+        EnumConverter enumConverter(*this, 0);
+        _root.select(enumConverter, enumConverter);
+    }
+    sortById();
+}
+
+void Grouping::aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len) {
+    for(unsigned int i(0); i < len; i++) {
+        aggregate(rankedHit[i]._docId, rankedHit[i]._rankValue);
+    }
+}
+
+void Grouping::aggregateWithClock(const RankedHit * rankedHit, unsigned int len) {
+    for(unsigned int i(0); (i < len) && !hasExpired(); i++) {
+        aggregate(rankedHit[i]._docId, rankedHit[i]._rankValue);
+    }
+}
+
+void Grouping::aggregate(const RankedHit * rankedHit, unsigned int len)
+{
+    bool isOrdered(! needResort());
+    preAggregate(isOrdered);
+    HitsAggregationResult::SetOrdered pred;
+    select(pred, pred);
+    if (_clock == NULL) {
+        aggregateWithoutClock(rankedHit, getMaxN(len));
+    } else {
+        aggregateWithClock(rankedHit, getMaxN(len));
+    }
+    postProcess();
+}
+
+void Grouping::aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec)
+{
+    preAggregate(false);
+    if (_clock == NULL) {
+        aggregateWithoutClock(rankedHit, getMaxN(len));
+    } else {
+        aggregateWithClock(rankedHit, getMaxN(len));
+    }
+    if (bVec != NULL) {
+        unsigned int sz(bVec->size());
+        if (_clock == NULL) {
+            if (getTopN() > 0) {
+                for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m); d = bVec->getNextTrueBit(d+1), i++) {
+                    aggregate(d, 0.0);
+                }
+            } else {
+                for(DocId d(bVec->getFirstTrueBit()); d < sz; d = bVec->getNextTrueBit(d+1)) {
+                    aggregate(d, 0.0);
+                }
+            }
+        } else {
+            if (getTopN() > 0) {
+                for(DocId d(bVec->getFirstTrueBit()), i(0), m(getMaxN(sz)); (d < sz) && (i < m) && !hasExpired(); d = bVec->getNextTrueBit(d+1), i++) {
+                    aggregate(d, 0.0);
+                }
+            } else {
+                for(DocId d(bVec->getFirstTrueBit()); (d < sz) && !hasExpired(); d = bVec->getNextTrueBit(d+1)) {
+                    aggregate(d, 0.0);
+                }
+            }
+        }
+    }
+    postProcess();
+}
+
+void Grouping::aggregate(DocId docId, HitRank rank)
+{
+    _root.aggregate(*this, 0, docId, rank);
+}
+
+void Grouping::aggregate(const document::Document & doc, HitRank rank)
+{
+    _root.aggregate(*this, 0, doc, rank);
+}
+
+void Grouping::convertToGlobalId(const search::IDocumentMetaStore &metaStore)
+{
+    GlobalIdConverter conv(metaStore);
+    select(conv, conv);
+}
+
+void Grouping::postAggregate()
+{
+    _root.postAggregate();
+}
+
+void Grouping::sortById()
+{
+    _root.sortById();
+}
+
+void Grouping::configureStaticStuff(const ConfigureStaticParams & params)
+{
+    if (params._attrCtx != NULL) {
+        AttributeNode::Configure confAttr(*params._attrCtx);
+        select(confAttr, confAttr);
+    }
+
+    if (params._docType != NULL) {
+        DocumentAccessorNode::Configure confDoc(*params._docType);
+        select(confDoc, confDoc);
+    }
+    ExpressionTree::Configure treeConf;
+    select(treeConf, treeConf);
+
+    AggregationResult::Configure aggrConf;
+    select(aggrConf, aggrConf);
+}
+
+void Grouping::cleanupAttributeReferences()
+{
+    AttributeNode::CleanupAttributeReferences cleanupAttr;
+    select(cleanupAttr, cleanupAttr);
+}
+
+void Grouping::cleanTemporary()
+{
+    for (GroupingLevelList::iterator it(_levels.begin()), mt(_levels.end()); it != mt; ++it) {
+        if (it->getExpression().getRoot()->inherits(FunctionNode::classId)) {
+            static_cast(*it->getExpression().getRoot()).reset();
+        }
+    }
+}
+
+bool Grouping::needResort() const
+{
+    bool resort(_root.needResort());
+    for (GroupingLevelList::const_iterator it(_levels.begin()), mt(_levels.end()); !resort && (it != mt); ++it) {
+        resort = it->needResort();
+    }
+    return (resort && getTopN() <= 0);
+}
+
+
+Serializer & Grouping::onSerialize(Serializer & os) const
+{
+    LOG(spam, "Grouping = %s", asString().c_str());
+    return os << _id << _valid << _all << _topN << _firstLevel << _lastLevel << _levels << _root;
+}
+
+Deserializer & Grouping::onDeserialize(Deserializer & is)
+{
+    is >> _id >> _valid >> _all >> _topN >> _firstLevel >> _lastLevel >> _levels >> _root;
+    LOG(spam, "Grouping = %s", asString().c_str());
+    return is;
+}
+
+void
+Grouping::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    visit(visitor, "id",         _id);
+    visit(visitor, "valid",      _valid);
+    visit(visitor, "all",        _all);
+    visit(visitor, "topN",       _topN);
+    visit(visitor, "firstLevel", _firstLevel);
+    visit(visitor, "lastLevel",  _lastLevel);
+    visit(visitor, "levels",     _levels);
+    visit(visitor, "root",       _root);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_grouping() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/grouping.h b/searchlib/src/vespa/searchlib/aggregation/grouping.h
new file mode 100644
index 00000000000..b9024c384bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/grouping.h
@@ -0,0 +1,93 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "groupinglevel.h"
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+/**
+ * This class represents a top-level grouping request.
+ **/
+class Grouping : public vespalib::Identifiable
+{
+public:
+    typedef std::vector GroupingLevelList;
+    typedef std::unique_ptr UP;
+
+private:
+    uint32_t           _id;         // client id for this grouping
+    bool               _valid;      // is this grouping object valid?
+    bool               _all;        // if true, group all document, not just hits (streaming only)
+    int64_t            _topN;       // hits to process per search node
+    uint32_t           _firstLevel; // first processing level this iteration (levels before considered frozen)
+    uint32_t           _lastLevel;  // last processing level this iteration
+    GroupingLevelList  _levels;     // grouping parameters per level
+    Group              _root;       // the grouping tree
+    const vespalib::Clock *_clock;      // An optional clock to be used for timeout handling.
+    fastos::TimeStamp      _timeOfDoom; // Used if clock is specified. This is time when request expires.
+
+    bool hasExpired() const { return _clock->getTimeNS() >= _timeOfDoom; }
+    void aggregateWithoutClock(const RankedHit * rankedHit, unsigned int len);
+    void aggregateWithClock(const RankedHit * rankedHit, unsigned int len);
+    void postProcess();
+public:
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, Grouping);
+    DECLARE_NBO_SERIALIZE;
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    Grouping();
+
+    Grouping unchain() const { return *this; }
+
+    Grouping &setId(unsigned int i)                { _id = i;                  return *this; }
+    Grouping &invalidate()                         { _valid = false;           return *this; }
+    Grouping &setAll(bool v)                       { _all = v;                 return *this; }
+    Grouping &setTopN(int64_t v)                   { _topN = v;                return *this; }
+    Grouping &setFirstLevel(unsigned int level)    { _firstLevel = level;      return *this; }
+    Grouping &setLastLevel(unsigned int level)     { _lastLevel = level;       return *this; }
+    Grouping &addLevel(const GroupingLevel &level) { _levels.push_back(level); return *this; }
+    Grouping &setRoot(const Group &root_)          { _root = root_;            return *this; }
+    Grouping &setClock(const vespalib::Clock * clock) { _clock = clock; return *this; }
+    Grouping &setTimeOfDoom(fastos::TimeStamp timeOfDoom) { _timeOfDoom = timeOfDoom; return *this; }
+
+    unsigned int getId()     const { return _id; }
+    bool valid()             const { return _valid; }
+    bool getAll()            const { return _all; }
+    int64_t getTopN()        const { return _topN; }
+    size_t getMaxN(size_t n) const { return std::min(n, static_cast(getTopN())); }
+    uint32_t getFirstLevel() const { return _firstLevel; }
+    uint32_t getLastLevel()  const { return _lastLevel; }
+    const GroupingLevelList &getLevels() const { return _levels; }
+    const Group &getRoot()   const { return _root; }
+    bool needResort() const;
+
+    GroupingLevelList &levels() { return _levels; }
+    Group &root() { return _root; }
+
+    virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+                               vespalib::ObjectOperation &operation);
+
+    void merge(Grouping & b);
+    void mergePartial(const Grouping & b);
+    void postMerge();
+    void preAggregate(bool isOrdered);
+    void prune(const Grouping & b);
+    void aggregate(DocId from, DocId to);
+    void aggregate(const RankedHit * rankedHit, unsigned int len);
+    void aggregate(const RankedHit * rankedHit, unsigned int len, const BitVector * bVec);
+    void aggregate(DocId docId, HitRank rank = 0);
+    void aggregate(const document::Document & doc, HitRank rank = 0);
+    void convertToGlobalId(const search::IDocumentMetaStore &metaStore);
+    void postAggregate();
+    void sortById();
+    void cleanTemporary();
+    void configureStaticStuff(const ConfigureStaticParams & params);
+    void cleanupAttributeReferences();
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp
new file mode 100644
index 00000000000..bb801633b26
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.cpp
@@ -0,0 +1,109 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+using expression::ResultNodeVector;
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, GroupingLevel, vespalib::Identifiable);
+
+GroupingLevel::GroupingLevel() :
+    _maxGroups(-1),
+    _precision(-1),
+    _isOrdered(false),
+    _frozen(false),
+    _classify(),
+    _collect(),
+    _grouper(NULL)
+{
+}
+
+Serializer & GroupingLevel::onSerialize(Serializer & os) const
+{
+    return os << _maxGroups << _precision << _classify << _collect;
+}
+
+Deserializer & GroupingLevel::onDeserialize(Deserializer & is)
+{
+    return is >> _maxGroups >> _precision >> _classify >> _collect;
+}
+
+void
+GroupingLevel::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    visit(visitor, "maxGroups", _maxGroups);
+    visit(visitor, "precision", _precision);
+    visit(visitor, "classify",  _classify);
+    visit(visitor, "collect",   _collect);
+}
+
+void GroupingLevel::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+    _classify.select(predicate, operation);
+    _collect.select(predicate, operation);
+}
+
+GroupingLevel::Grouper::Grouper(const Grouping * grouping, uint32_t level) :
+    _grouping(grouping),
+    _level(level),
+    _frozen(_level < _grouping->getFirstLevel()),
+    _hasNext(_level < _grouping->getLevels().size()),
+    _doNext(_level < _grouping->getLastLevel())
+{
+}
+
+bool GroupingLevel::Grouper::isFrosen(size_t level) const
+{
+    return level < _grouping->getFirstLevel();
+}
+
+bool GroupingLevel::Grouper::hasNext(size_t level) const
+{
+    return level < _grouping->getLevels().size();
+}
+
+template
+void GroupingLevel::SingleValueGrouper::groupDoc(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const
+{
+    Group * next = g.groupSingle(result, rank, _grouping->getLevels()[_level]);
+    if ((next != NULL) && doNext()) { // do next level ?
+        next->aggregate(*_grouping, _level + 1, doc, rank);
+    }
+}
+
+template
+void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const
+{
+    const ResultNodeVector & rv(static_cast(result));
+    for (size_t i(0), m(rv.size()); i < m; i++) {
+        const ResultNode & sr(rv.get(i));
+        SingleValueGrouper::groupDoc(g, sr, doc, rank);
+    }
+}
+
+void GroupingLevel::prepare(const Grouping * grouping, uint32_t level, bool isOrdered_)
+{
+    _isOrdered = isOrdered_;
+    _frozen = level < grouping->getFirstLevel();
+    if (_classify.getResult().inherits(ResultNodeVector::classId)) {
+       _grouper.reset(new MultiValueGrouper(grouping, level));
+    } else {
+       _grouper.reset(new SingleValueGrouper(grouping, level));
+    }
+}
+
+// template<> void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode::CP & result, const document::Document & doc, HitRank rank, bool isOrdered) const;
+// template<> void GroupingLevel::MultiValueGrouper::groupDoc(Group & g, const ResultNode::CP & result, DocId doc, HitRank rank, bool isOrdered) const;
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_groupinglevel() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h
new file mode 100644
index 00000000000..c4ce462b0ce
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/groupinglevel.h
@@ -0,0 +1,121 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "group.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+class Grouping;
+
+/**
+ * This struct contains information about how grouping should be
+ * performed on a given level in the grouping tree. The Grouping class
+ * holds an array of these, one for each level in the tree below the
+ * root.
+ **/
+class GroupingLevel : public vespalib::Identifiable
+{
+private:
+    class Grouper {
+    public:
+        virtual ~Grouper() { }
+        virtual void group(Group & group, const ResultNode & result, DocId doc, HitRank rank) const = 0;
+        virtual void group(Group & group, const ResultNode & result, const document::Document & doc, HitRank rank) const = 0;
+        virtual Grouper * clone() const = 0;
+    protected:
+        Grouper(const Grouping * grouping, uint32_t level);
+        bool isFrozen() const { return _frozen; }
+        bool  hasNext() const { return _hasNext; }
+        bool   doNext() const { return _doNext; }
+        bool isFrosen(size_t level) const;
+        bool  hasNext(size_t level) const;
+        const Grouping * _grouping;
+        uint32_t   _level;
+        bool       _frozen;
+        bool       _hasNext;
+        bool       _doNext;
+    };
+    class SingleValueGrouper : public Grouper {
+    public:
+        SingleValueGrouper(const Grouping * grouping, uint32_t level) : Grouper(grouping, level) { }
+    protected:
+        template
+        void groupDoc(Group & group, const ResultNode & result, const Doc & doc, HitRank rank) const;
+        virtual void group(Group & g, const ResultNode & result, DocId doc, HitRank rank) const {
+            groupDoc(g, result, doc, rank);
+        }
+        virtual void group(Group & g, const ResultNode & result, const document::Document & doc, HitRank rank) const {
+            groupDoc(g, result, doc, rank);
+        }
+        virtual SingleValueGrouper * clone() const { return new SingleValueGrouper(*this); }
+    };
+    class MultiValueGrouper : public SingleValueGrouper {
+    public:
+        MultiValueGrouper(const Grouping * grouping, uint32_t level) : SingleValueGrouper(grouping, level) { }
+    private:
+        template
+        void groupDoc(Group & group, const ResultNode & result, const Doc & doc, HitRank rank) const;
+        virtual void group(Group & g, const ResultNode & result, DocId doc, HitRank rank) const {
+            groupDoc(g, result, doc, rank);
+        }
+        virtual void group(Group & g, const ResultNode & result, const document::Document & doc, HitRank rank) const {
+            groupDoc(g, result, doc, rank);
+        }
+        virtual MultiValueGrouper * clone() const { return new MultiValueGrouper(*this); }
+    };
+    int64_t                            _maxGroups;
+    int64_t                            _precision;
+    bool                               _isOrdered;
+    bool                               _frozen;
+    search::expression::ExpressionTree _classify;
+    Group                              _collect;
+
+    vespalib::CloneablePtr    _grouper;
+public:
+
+    GroupingLevel();
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, GroupingLevel);
+    DECLARE_NBO_SERIALIZE;
+
+    GroupingLevel unchain() const { return *this; }
+
+    GroupingLevel &setMaxGroups(int64_t maxGroups) {
+        _maxGroups = maxGroups;
+        if ((maxGroups == -1) || (maxGroups > _precision)) {
+            _precision = maxGroups;
+        }
+        return *this;
+    }
+    GroupingLevel & freeze() { _frozen = true; return *this; }
+    GroupingLevel &setPresicion(int64_t precision) { _precision = precision; return *this; }
+    GroupingLevel &setExpression(const ExpressionNode::CP &root) { _classify = root; return *this; }
+    GroupingLevel &addResult(const ExpressionNode::CP &result) { _collect.addResult(result); return *this; }
+    GroupingLevel &addAggregationResult(const ExpressionNode::CP &aggr) { _collect.addAggregationResult(aggr); return *this; }
+    GroupingLevel &addOrderBy(const ExpressionNode::CP & orderBy, bool ascending) { _collect.addOrderBy(orderBy, ascending); return *this; }
+    bool needResort() const { return _collect.needResort(); }
+
+    int64_t getMaxGroups() const { return _maxGroups; }
+    int64_t getPrecision() const { return _precision; }
+    bool        isFrozen() const { return _frozen; }
+    bool    allowMoreGroups(size_t sz) const { return (!_frozen && (!_isOrdered || (sz < (uint64_t)_precision))); }
+    const ExpressionTree & getExpression() const { return _classify; }
+    const       Group &getGroupPrototype() const { return _collect; }
+    void prepare(const Grouping * grouping, uint32_t level, bool isOrdered_);
+
+    Group &groupPrototype() { return _collect; }
+    const Group & groupPrototype() const { return _collect; }
+
+    template
+    void group(Group & g, const ResultNode & result, const Doc & doc, HitRank rank) const {
+        _grouper->group(g, result, doc, rank);
+    }
+
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    virtual void selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation);
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hit.cpp b/searchlib/src/vespa/searchlib/aggregation/hit.cpp
new file mode 100644
index 00000000000..9bb7cf95757
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hit.cpp
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "hit.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, Hit, vespalib::Identifiable);
+
+static FieldBase _G_rankField("rank");
+
+Serializer &
+Hit::onSerialize(Serializer &os) const
+{
+    return os.put(_G_rankField, _rank);
+}
+
+Deserializer &
+Hit::onDeserialize(Deserializer &is)
+{
+    return is.get(_G_rankField, _rank);
+}
+
+int
+Hit::onCmp(const Identifiable &b) const
+{
+    const Hit &h = (const Hit &)b;
+    return (_rank > h._rank) ? -1 : ((_rank < h._rank) ? 1 : 0);
+}
+
+void
+Hit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    visit(visitor, _G_rankField.getName(), _rank);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hit.h b/searchlib/src/vespa/searchlib/aggregation/hit.h
new file mode 100644
index 00000000000..2fbed2510f2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hit.h
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include "rawrank.h"
+
+
+namespace search {
+namespace aggregation {
+
+class Hit : public vespalib::Identifiable
+{
+private:
+    RawRank _rank;
+
+public:
+    DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, aggregation, Hit);
+    DECLARE_NBO_SERIALIZE;
+    typedef vespalib::IdentifiablePtr CP;
+    typedef std::unique_ptr UP;
+
+    Hit() : _rank() {}
+    Hit(RawRank rank) : _rank(rank) {}
+    RawRank getRank() const { return _rank; }
+    virtual Hit *clone() const = 0;
+    virtual int onCmp(const Identifiable &b) const;
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp b/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp
new file mode 100644
index 00000000000..7994856ec46
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitlist.cpp
@@ -0,0 +1,152 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include "hitsaggregationresult.h"
+#include 
+#include "fs4hit.h"
+#include "vdshit.h"
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, HitList, ResultNode);
+
+HitList & HitList::addHit(const FS4Hit & hit, uint32_t maxHits)
+{
+    if (_fs4hits.size() < maxHits) {
+        _fs4hits.push_back(hit);
+        if (_fs4hits.size() == maxHits) {
+            std::make_heap(_fs4hits.begin(), _fs4hits.end());
+        }
+    } else {
+        if (hit.cmp(_fs4hits.front()) < 0) {
+            std::pop_heap(_fs4hits.begin(), _fs4hits.end());
+            _fs4hits.push_back(hit);
+            std::push_heap(_fs4hits.begin(), _fs4hits.end());
+        }
+    }
+    return *this;
+}
+
+HitList & HitList::addHit(const VdsHit & hit, uint32_t maxHits)
+{
+    if (_vdshits.size() < maxHits) {
+        _vdshits.push_back(hit);
+        if (_vdshits.size() == maxHits) {
+            std::make_heap(_vdshits.begin(), _vdshits.end());
+        }
+    } else {
+        if (hit.cmp(_vdshits.front()) < 0) {
+            std::pop_heap(_vdshits.begin(), _vdshits.end());
+            _vdshits.push_back(hit);
+            std::push_heap(_vdshits.begin(), _vdshits.end());
+        }
+    }
+    return *this;
+}
+
+void
+HitList::onMerge(const HitList & b)
+{
+    _fs4hits.insert(_fs4hits.end(), b._fs4hits.begin(), b._fs4hits.end());
+    _vdshits.insert(_vdshits.end(), b._vdshits.begin(), b._vdshits.end());
+}
+
+void
+HitList::sort()
+{
+    std::sort(_fs4hits.begin(), _fs4hits.end());
+    std::sort(_vdshits.begin(), _vdshits.end());
+}
+
+void
+HitList::postMerge(uint32_t maxHits)
+{
+    sort();
+    if (_fs4hits.size() > maxHits) {
+        _fs4hits.resize(maxHits);
+    }
+    if (_vdshits.size() > maxHits) {
+        _vdshits.resize(maxHits);
+    }
+}
+
+Serializer &
+HitList::onSerialize(Serializer & os) const
+{
+    os << (uint32_t)(_fs4hits.size() + _vdshits.size());
+    for (uint32_t i(0); i < _fs4hits.size(); i++) {
+        Hit::CP hit(const_cast(&_fs4hits[i]));
+        os << hit;
+        hit.release();
+    }
+    for (uint32_t i(0); i < _vdshits.size(); i++) {
+        Hit::CP hit(const_cast(&_vdshits[i]));
+        os << hit;
+        hit.release();
+    }
+    return os;
+}
+
+Deserializer &
+HitList::onDeserialize(Deserializer & is)
+{
+    uint32_t count(0);
+
+    is >> count;
+    for (uint32_t i(0); i < count; i++) {
+        Hit::CP hit;
+        is >> hit;
+        if (hit->inherits(FS4Hit::classId)) {
+            _fs4hits.push_back(static_cast(*hit));
+        } else {
+            _vdshits.push_back(static_cast(*hit));
+        }
+    }
+    return is;
+}
+
+void
+HitList::clear()
+{
+    _fs4hits.clear();
+    _vdshits.clear();
+}
+
+void
+HitList::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+    visit(visitor, "fs4hits", _fs4hits);
+    visit(visitor, "vdshits", _vdshits);
+}
+
+void
+HitList::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+    for (uint32_t i(0); i < _fs4hits.size(); ++i) {
+        _fs4hits[i].select(predicate, operation);
+    }
+    for (uint32_t i(0); i < _vdshits.size(); ++i) {
+        _vdshits[i].select(predicate, operation);
+    }
+}
+
+void
+HitList::set(const ResultNode & rhs)
+{
+    (void) rhs;
+    throw std::runtime_error("HitList::set(const ResultNode & rhs) not implemented.");
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hitlist() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitlist.h b/searchlib/src/vespa/searchlib/aggregation/hitlist.h
new file mode 100644
index 00000000000..a63e923d05e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitlist.h
@@ -0,0 +1,74 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include "fs4hit.h"
+#include "vdshit.h"
+
+namespace search {
+namespace aggregation {
+
+
+class HitList : public ResultNode
+{
+public:
+private:
+    typedef std::vector Fs4V;
+    typedef std::vector VdsV;
+    std::vector         _fs4hits;
+    std::vector         _vdshits;
+
+    virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; }
+    virtual double onGetFloat(size_t index)    const { (void) index; return 0.0; }
+    virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; return buf; }
+    virtual size_t hash() const { return 0; }
+    virtual void set(const ResultNode & rhs);
+    virtual void decode(const void * buf) {
+        _fs4hits = *static_cast(buf);
+        _vdshits = *static_cast(static_cast(static_cast(buf)+sizeof(_fs4hits)));
+    }
+    virtual void swap(void * buf) {
+        static_cast(buf)->swap(_fs4hits);
+        static_cast(static_cast(static_cast(buf)+sizeof(_fs4hits)))->swap(_vdshits);
+    }
+    virtual void encode(void * buf) const {
+        *static_cast(buf) = _fs4hits;
+        *static_cast(static_cast(static_cast(buf)+sizeof(_fs4hits))) = _vdshits;
+    }
+    virtual void create(void * buf) const {
+        new (buf) Fs4V();
+        new (static_cast(buf)+sizeof(_fs4hits)) VdsV();
+    }
+    virtual void destroy(void * buf) const {
+        static_cast(buf)->Fs4V::~Fs4V();
+        static_cast(static_cast(static_cast(buf)+sizeof(_fs4hits)))->VdsV::~VdsV();
+    }
+    virtual size_t getRawByteSize() const { return sizeof(_fs4hits) + sizeof(_vdshits); }
+public:
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, HitList);
+    HitList * clone() const { return new HitList(*this); }
+    DECLARE_NBO_SERIALIZE;
+    HitList() :
+        _fs4hits(),
+        _vdshits()
+    {}
+    uint32_t size() const { return (_fs4hits.size() + _vdshits.size()); }
+    bool empty() const { return (_vdshits.empty() && _fs4hits.empty()); }
+    const Hit & front() const { return ((_fs4hits.size() > 0) ?  (static_cast(_fs4hits[0])) : (static_cast(_vdshits[0]))); }
+
+    void postMerge(uint32_t maxHits);
+    void onMerge(const HitList & b);
+    void clear();
+
+    HitList & addHit(const FS4Hit & hit, uint32_t maxHits);
+    HitList & addHit(const VdsHit & hit, uint32_t maxHits);
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+                               vespalib::ObjectOperation &operation);
+    void sort();
+    HitList & sort2() { sort(); return *this; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp
new file mode 100644
index 00000000000..bdc705a9a7c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.cpp
@@ -0,0 +1,119 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include "hitsaggregationresult.h"
+#include 
+#include "fs4hit.h"
+#include "vdshit.h"
+#include 
+#include 
+LOG_SETUP(".searchlib.aggregation.hitsaggregationresult");
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, HitsAggregationResult, AggregationResult);
+
+void HitsAggregationResult::onPrepare(const ResultNode & result, bool useForInit)
+{
+    (void) result;
+    (void) useForInit;
+}
+
+void
+HitsAggregationResult::onMerge(const AggregationResult &b)
+{
+    const HitsAggregationResult &rhs = (const HitsAggregationResult &)b;
+    _hits.onMerge(rhs._hits);
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode &result, DocId docId, HitRank rank)
+{
+    (void) result;
+    if ( ! _isOrdered || (_hits.size() < _maxHits)) {
+        _hits.addHit(FS4Hit(docId, rank), _maxHits);
+    }
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode & result, const document::Document & doc, HitRank rank)
+{
+    (void) result;
+    LOG(spam, "Filling vdshit for %s hits=%lu, maxHits=%u", doc.getId().toString().c_str(), (unsigned long)_hits.size(), _maxHits);
+    if (!_isOrdered || (_hits.size() < _maxHits)) {
+        VdsHit hit(doc.getId().toString(), rank);
+        vespalib::ConstBufferRef docsum(_summaryGenerator->fillSummary(0, _summaryClass));
+        hit.setSummary(docsum.c_str(), docsum.size());
+        LOG(spam, "actually filled %s with summary %s with blob of size %lu", doc.getId().toString().c_str(),_summaryClass.c_str(), docsum.size() );
+        _hits.addHit(hit, _maxHits);
+    }
+}
+
+void
+HitsAggregationResult::onAggregate(const ResultNode & result)
+{
+    (void) result;
+    assert(false);
+}
+
+void
+HitsAggregationResult::onReset()
+{
+    _hits.clear();
+}
+
+Serializer &
+HitsAggregationResult::onSerialize(Serializer & os) const
+{
+    AggregationResult::onSerialize(os);
+    os << _summaryClass << _maxHits;
+    _hits.serialize(os);
+    return os;
+}
+
+Deserializer &
+HitsAggregationResult::onDeserialize(Deserializer & is)
+{
+    AggregationResult::onDeserialize(is);
+    is >> _summaryClass >> _maxHits;
+    _hits.deserialize(is);
+    if (_maxHits == 0) {
+        _maxHits = std::numeric_limits::max();
+    }
+    return is;
+}
+
+void
+HitsAggregationResult::visitMembers(vespalib::ObjectVisitor & visitor) const
+{
+    AggregationResult::visitMembers(visitor);
+    visit(visitor, "summaryClass", _summaryClass);
+    visit(visitor, "maxHits", _maxHits);
+    _hits.visitMembers(visitor);
+}
+
+void
+HitsAggregationResult::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation)
+{
+    AggregationResult::selectMembers(predicate, operation);
+    _hits.selectMembers(predicate, operation);
+}
+
+const ResultNode & HitsAggregationResult::onGetRank() const
+{
+    if ( ! _hits.empty() ) {
+        _bestHitRank = _hits.front().getRank();
+    }
+    return _bestHitRank;
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_hitsaggregationresult() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h
new file mode 100644
index 00000000000..5b533360d01
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/hitsaggregationresult.h
@@ -0,0 +1,76 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "aggregationresult.h"
+#include "hitlist.h"
+
+namespace search {
+namespace aggregation {
+
+using search::expression::FloatResultNode;
+
+class HitsAggregationResult : public AggregationResult
+{
+public:
+    typedef vespalib::string SummaryClassType;
+
+    class SummaryGenerator
+    {
+    public:
+        virtual ~SummaryGenerator() { }
+        virtual vespalib::ConstBufferRef fillSummary(DocId lid, const SummaryClassType & summaryClass) = 0;
+    };
+
+private:
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+
+    virtual void onAggregate(const ResultNode &result, DocId docId, HitRank rank);
+    virtual void onAggregate(const ResultNode &result, const document::Document & doc, HitRank rank);
+    virtual const ResultNode & onGetRank() const;
+
+    SummaryClassType          _summaryClass;
+    uint32_t                  _maxHits;
+    HitList                   _hits;
+    bool                      _isOrdered;
+    mutable FloatResultNode   _bestHitRank;
+    SummaryGenerator         *_summaryGenerator;
+
+public:
+    class SetOrdered : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+    {
+    private:
+        virtual void execute(vespalib::Identifiable &obj) { static_cast(obj)._isOrdered = true; }
+        virtual bool check(const vespalib::Identifiable &obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); }
+    };
+
+    DECLARE_AGGREGATIONRESULT(HitsAggregationResult);
+    HitsAggregationResult() :
+        AggregationResult(),
+        _summaryClass("default"),
+        _maxHits(std::numeric_limits::max()),
+        _hits(),
+        _isOrdered(false),
+        _bestHitRank(),
+        _summaryGenerator(0)
+    {}
+    virtual void postMerge() { _hits.postMerge(_maxHits); }
+    void setSummaryGenerator(SummaryGenerator & summaryGenerator) { _summaryGenerator = &summaryGenerator; }
+    const SummaryClassType & getSummaryClass() const { return _summaryClass; }
+    HitsAggregationResult setSummaryClass(const SummaryClassType & summaryClass) { _summaryClass = summaryClass; return *this; }
+    HitsAggregationResult &setMaxHits(uint32_t maxHits) {
+        _maxHits = (maxHits == 0) ? std::numeric_limits::max() : maxHits;
+        return *this;
+    }
+    HitsAggregationResult & addHit(const FS4Hit &hit) { _hits.addHit(hit, _maxHits); return *this; }
+    HitsAggregationResult & addHit(const VdsHit &hit) { _hits.addHit(hit, _maxHits); return *this; }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    virtual void selectMembers(const vespalib::ObjectPredicate &predicate,
+                               vespalib::ObjectOperation &operation);
+    HitsAggregationResult & sort() { _hits.sort(); return *this; }
+    virtual const ResultNode & getResult() const { return _hits; }
+    virtual ResultNode & getResult() { return _hits; }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h
new file mode 100644
index 00000000000..4f63b9decec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/maxaggregationresult.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class MaxAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(MaxAggregationResult);
+    MaxAggregationResult() : AggregationResult(), _max() { }
+    MaxAggregationResult(const SingleResultNode & max) : AggregationResult(), _max(max) { }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const SingleResultNode & getMax() const { return *_max; }
+private:
+    virtual const ResultNode & onGetRank() const { return getMax(); }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    SingleResultNode::CP _max;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h
new file mode 100644
index 00000000000..db909cce3f7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/minaggregationresult.h
@@ -0,0 +1,22 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class MinAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(MinAggregationResult);
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const SingleResultNode & getMin() const { return *_min; }
+private:
+    virtual const ResultNode & onGetRank() const { return getMin(); }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    SingleResultNode::CP _min;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp b/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp
new file mode 100644
index 00000000000..9ce8da41ef3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/modifiers.cpp
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+#include 
+
+using namespace search::expression;
+
+namespace search {
+namespace aggregation {
+
+bool Attribute2DocumentAccessor::check(const vespalib::Identifiable &obj) const
+{
+    return obj.getClass().inherits(GroupingLevel::classId) || obj.getClass().inherits(AggregationResult::classId) || obj.getClass().inherits(MultiArgFunctionNode::classId);
+}
+
+void Attribute2DocumentAccessor::execute(vespalib::Identifiable &obj)
+{
+    if (obj.getClass().inherits(GroupingLevel::classId)) {
+        GroupingLevel & g(static_cast(obj));
+        if (g.getExpression().getRoot()->inherits(AttributeNode::classId)) {
+            g.setExpression(new DocumentFieldNode(static_cast(*g.getExpression().getRoot()).getAttributeName()));
+        } else {
+            g.getExpression().getRoot()->select(*this, *this);
+        }
+        g.groupPrototype().select(*this, *this);
+    } else if(obj.getClass().inherits(AggregationResult::classId)) {
+        AggregationResult & a(static_cast(obj));
+        ExpressionNode * e(a.getExpression());
+        if (e) {
+            if (e->inherits(AttributeNode::classId)) {
+                a.setExpression(new DocumentFieldNode(static_cast(*e).getAttributeName()));
+            } else {
+                e->select(*this, *this);
+            }
+        }
+    } else if(obj.getClass().inherits(MultiArgFunctionNode::classId)) {
+        MultiArgFunctionNode::ExpressionNodeVector & v(static_cast(obj).expressionNodeVector());
+        for(size_t i(0), m(v.size()); i < m; i++) {
+            ExpressionNode::CP & e(v[i]);
+            if (e->inherits(AttributeNode::classId)) {
+                e.reset(new DocumentFieldNode(static_cast(*e).getAttributeName()));
+            } else {
+                e->select(*this, *this);
+            }
+        }
+    }
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_modifiers() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/modifiers.h b/searchlib/src/vespa/searchlib/aggregation/modifiers.h
new file mode 100644
index 00000000000..048a8db43b7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/modifiers.h
@@ -0,0 +1,19 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+class Attribute2DocumentAccessor : public vespalib::ObjectOperation, public vespalib::ObjectPredicate
+{
+private:
+    virtual void execute(vespalib::Identifiable &obj);
+    virtual bool check(const vespalib::Identifiable &obj) const;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h b/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h
new file mode 100644
index 00000000000..fcea0918af7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/perdocexpression.h
@@ -0,0 +1,46 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/predicates.h b/searchlib/src/vespa/searchlib/aggregation/predicates.h
new file mode 100644
index 00000000000..c6ef6bc554a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/predicates.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "fs4hit.h"
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+class CountFS4Hits : public vespalib::ObjectPredicate,
+                     public vespalib::ObjectOperation
+{
+private:
+    uint32_t _hitCnt;
+
+public:
+    CountFS4Hits() : _hitCnt(0) {}
+    uint32_t getHitCount() const { return _hitCnt; }
+    virtual bool check(const vespalib::Identifiable &obj) const {
+        return (obj.getClass().id() == FS4Hit::classId);
+    }
+    virtual void execute(vespalib::Identifiable &obj) {
+        (void) obj;
+        ++_hitCnt;
+    }
+};
+
+class FS4HitSetDistributionKey : public vespalib::ObjectPredicate,
+                                  public vespalib::ObjectOperation
+{
+private:
+    uint32_t _distributionKey;
+
+public:
+    FS4HitSetDistributionKey(uint32_t distributionKey) : _distributionKey(distributionKey) {}
+    virtual bool check(const vespalib::Identifiable &obj) const {
+        return (obj.getClass().id() == FS4Hit::classId);
+    }
+    virtual void execute(vespalib::Identifiable &obj) {
+        static_cast(obj).setDistributionKey(_distributionKey);
+    }
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp b/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp
new file mode 100644
index 00000000000..b570c7795c0
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/rawrank.cpp
@@ -0,0 +1,51 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "rawrank.h"
+#include 
+#include 
+
+#if 0
+namespace search {
+namespace aggregation {
+
+RawRank::RawRank(double rank)
+    : _rank()
+{
+    _rank.resize(sizeof(double));
+    vespalib::serializeForSort >(rank, &_rank[0]);
+}
+
+RawRank::RawRank(const char *buf, uint32_t len)
+    : _rank(buf, buf + len)
+{
+}
+
+int
+RawRank::cmp(const RawRank &rhs) const
+{
+    uint32_t l = std::min(_rank.size(), rhs._rank.size());
+    int diff = memcmp(&_rank[0], &rhs._rank[0], l);
+    if (diff == 0) {
+        diff = (_rank.size() - rhs._rank.size());
+    }
+    return diff;
+}
+
+vespalib::nbostream &
+operator << (vespalib::nbostream &os, const RawRank &rr)
+{
+    return os << rr._rank;
+}
+
+vespalib::nbostream &
+operator >> (vespalib::nbostream &is, RawRank &rr)
+{
+    return is >> rr._rank;
+}
+
+} // namespace search::aggregation
+} // namespace search
+
+#endif
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_rawrank() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/rawrank.h b/searchlib/src/vespa/searchlib/aggregation/rawrank.h
new file mode 100644
index 00000000000..f9ca265d84d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/rawrank.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+
+namespace search {
+namespace aggregation {
+
+#if 0
+/**
+ * Thin wrapper around a rank value represented as a sequence of
+ * bytes.
+ **/
+class RawRank
+{
+private:
+    std::vector _rank;
+
+public:
+    RawRank() : _rank() {}
+    RawRank(double rank);
+    RawRank(const char *buf, uint32_t len);
+    int cmp(const RawRank &rhs) const;
+    const std::vector &getRank() const { return _rank; }
+    friend vespalib::nbostream &operator << (vespalib::nbostream &os, const RawRank &rr);
+    friend vespalib::nbostream &operator >> (vespalib::nbostream &is, RawRank &rr);
+};
+#else
+typedef double RawRank;
+#endif
+
+} // namespace search::aggregation
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h
new file mode 100644
index 00000000000..51a6091b794
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/sumaggregationresult.h
@@ -0,0 +1,24 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class SumAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(SumAggregationResult);
+    SumAggregationResult() : AggregationResult(), _sum() { }
+    SumAggregationResult(const SingleResultNode & sum) : AggregationResult(), _sum(sum) { }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const SingleResultNode & getSum() const { return *_sum; }
+private:
+    virtual const ResultNode & onGetRank() const { return getSum(); }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    SingleResultNode::CP _sum;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp b/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp
new file mode 100644
index 00000000000..a1df9646ebd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/vdshit.cpp
@@ -0,0 +1,45 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include "vdshit.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+using vespalib::FieldBase;
+using vespalib::Serializer;
+using vespalib::Deserializer;
+
+static FieldBase _G_docIdField("docId");
+static FieldBase _G_summaryField("summary");
+
+
+IMPLEMENT_IDENTIFIABLE_NS2(search, aggregation, VdsHit, Hit);
+
+Serializer &
+VdsHit::onSerialize(Serializer &os) const
+{
+    Hit::onSerialize(os);
+    return os.put(_G_docIdField, _docId) << _summary;
+}
+
+Deserializer &
+VdsHit::onDeserialize(Deserializer &is)
+{
+    Hit::onDeserialize(is);
+    return is.get(_G_docIdField, _docId) >> _summary;
+}
+
+void
+VdsHit::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    Hit::visitMembers(visitor);
+    visit(visitor, _G_docIdField.getName(), _docId);
+    visit(visitor, _G_summaryField.getName(), _summary);
+}
+
+}
+}
+
+// this function was added by ../../forcelink.sh
+void forcelink_file_searchlib_aggregation_vdshit() {}
diff --git a/searchlib/src/vespa/searchlib/aggregation/vdshit.h b/searchlib/src/vespa/searchlib/aggregation/vdshit.h
new file mode 100644
index 00000000000..89fb1acbae6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/vdshit.h
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "hit.h"
+#include "aggregationresult.h"
+#include 
+
+namespace search {
+namespace aggregation {
+
+class VdsHit : public Hit
+{
+public:
+    typedef vespalib::Array Summary;
+    typedef vespalib::string DocId;
+    DECLARE_IDENTIFIABLE_NS2(search, aggregation, VdsHit);
+    DECLARE_NBO_SERIALIZE;
+    VdsHit() : Hit(), _docId(), _summary() {}
+    VdsHit(DocId docId, HitRank rank) : Hit(rank), _docId(docId), _summary() {}
+    virtual VdsHit *clone() const { return new VdsHit(*this); }
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const   DocId &   getDocId() const { return _docId; }
+    const Summary & getSummary() const { return _summary; }
+    VdsHit &   setDocId(DocId & docId)     { _docId = docId; return *this; }
+    VdsHit & setSummary(const void * buf, size_t sz) {
+        const uint8_t * v(static_cast(buf));
+        Summary n(v, v+sz);
+        _summary.swap(n);
+        return *this;
+    }
+    bool operator < (const VdsHit &b) const { return cmp(b) < 0; }
+
+private:
+    DocId     _docId;
+    Summary   _summary;
+};
+
+}
+}
+
diff --git a/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h b/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h
new file mode 100644
index 00000000000..9afc196ac33
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/aggregation/xoraggregationresult.h
@@ -0,0 +1,26 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+
+namespace search {
+namespace aggregation {
+
+class XorAggregationResult : public AggregationResult
+{
+public:
+    DECLARE_AGGREGATIONRESULT(XorAggregationResult);
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+    const Int64ResultNode & getXor() const { return _xor; }
+    XorAggregationResult &setXor(const Int64ResultNode &i) {
+        _xor = i;
+        return *this;
+    }
+private:
+    virtual const ResultNode & onGetRank() const { return getXor(); }
+    virtual void onPrepare(const ResultNode & result, bool useForInit);
+    Int64ResultNode _xor;
+};
+
+}
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/.gitignore b/searchlib/src/vespa/searchlib/attribute/.gitignore
new file mode 100644
index 00000000000..ee8938b6bf4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/.gitignore
@@ -0,0 +1,6 @@
+*.So
+*.exe
+*.ilk
+*.pdb
+.depend*
+Makefile
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..b949943a42b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -0,0 +1,88 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(searchlib_attribute OBJECT
+    SOURCES
+    address_space.cpp
+    address_space_usage.cpp
+    attribute.cpp
+    attribute_blueprint_factory.cpp
+    attribute_weighted_set_blueprint.cpp
+    attributecontext.cpp
+    attributefactory.cpp
+    attributefile.cpp
+    attributefilebufferwriter.cpp
+    attributefilesavetarget.cpp
+    attributefilewriter.cpp
+    attributeguard.cpp
+    attributeiterators.cpp
+    attributemanager.cpp
+    attributememoryfilebufferwriter.cpp
+    attributememoryfilewriter.cpp
+    attributememorysavetarget.cpp
+    attributesaver.cpp
+    attributevector.cpp
+    attrvector.cpp
+    changevector.cpp
+    configconverter.cpp
+    createarrayfastsearch.cpp
+    createarraystd.cpp
+    createsetfastsearch.cpp
+    createsetstd.cpp
+    createsinglefastsearch.cpp
+    createsinglestd.cpp
+    defines.cpp
+    dociditerator.cpp
+    enumattribute.cpp
+    enumattributesaver.cpp
+    enumcomparator.cpp
+    enumhintsearchcontext.cpp
+    enumstore.cpp
+    enumstorebase.cpp
+    extendableattributes.cpp
+    fixedsourceselector.cpp
+    flagattribute.cpp
+    floatbase.cpp
+    i_document_weight_attribute.cpp
+    iattributemanager.cpp
+    iattributesavetarget.cpp
+    integerbase.cpp
+    ipostinglistsearchcontext.cpp
+    iterator_pack.cpp
+    loadedenumvalue.cpp
+    loadednumericvalue.cpp
+    loadedstringvalue.cpp
+    loadedvalue.cpp
+    multienumattribute.cpp
+    multienumattributesaver.cpp
+    multinumericattribute.cpp
+    multinumericattributesaver.cpp
+    multinumericenumattribute.cpp
+    multinumericpostattribute.cpp
+    multistringattribute.cpp
+    multistringpostattribute.cpp
+    multivalueattribute.cpp
+    multivalueattributesaver.cpp
+    multivaluemapping.cpp
+    numericbase.cpp
+    postingchange.cpp
+    postinglistattribute.cpp
+    postinglistsearchcontext.cpp
+    postinglisttraits.cpp
+    postingstore.cpp
+    predicate_attribute.cpp
+    singleenumattribute.cpp
+    singleenumattributesaver.cpp
+    singlenumericattribute.cpp
+    singlenumericattributesaver.cpp
+    singlenumericenumattribute.cpp
+    singlenumericpostattribute.cpp
+    singlesmallnumericattribute.cpp
+    singlestringattribute.cpp
+    singlestringpostattribute.cpp
+    sourceselector.cpp
+    stringattribute.cpp
+    stringbase.cpp
+    tensorattribute.cpp
+    tensorattributesaver.cpp
+    tensorstore.cpp
+    DEPENDS
+)
diff --git a/searchlib/src/vespa/searchlib/attribute/OWNERS b/searchlib/src/vespa/searchlib/attribute/OWNERS
new file mode 100644
index 00000000000..7066165775a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/OWNERS
@@ -0,0 +1,3 @@
+tegge
+geirst
+balder
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space.cpp b/searchlib/src/vespa/searchlib/attribute/address_space.cpp
new file mode 100644
index 00000000000..c953be81020
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space.cpp
@@ -0,0 +1,20 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "address_space.h"
+#include 
+
+namespace search {
+
+AddressSpace::AddressSpace(size_t used_, size_t limit_)
+    : _used(used_),
+      _limit(limit_)
+{
+}
+
+std::ostream &operator << (std::ostream &out, const AddressSpace &rhs)
+{
+    return out << "used=" << rhs.used() << ", limit=" << rhs.limit();
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space.h b/searchlib/src/vespa/searchlib/attribute/address_space.h
new file mode 100644
index 00000000000..f0c116785ea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+
+namespace search {
+
+/**
+ * Represents an address space with number of bytes/entries used
+ * and the limit number of bytes/entries this address space can represent.
+ */
+class AddressSpace
+{
+private:
+    size_t _used;
+    size_t _limit;
+
+public:
+    AddressSpace(size_t used_, size_t limit_);
+    size_t used() const { return _used; }
+    size_t limit() const  { return _limit; }
+    double usage() const {
+        if (_limit > 0) {
+            return (double)_used / (double)_limit;
+        }
+        return 0;
+    }
+    bool operator==(const AddressSpace &rhs) const {
+        return _used == rhs._used && _limit == rhs._limit;
+    }
+};
+
+std::ostream &operator << (std::ostream &out, const AddressSpace &rhs);
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp b/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp
new file mode 100644
index 00000000000..c127b71de6b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space_usage.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "address_space_usage.h"
+#include "enumstorebase.h"
+#include "multivaluemapping.h"
+
+namespace search {
+
+AddressSpaceUsage::AddressSpaceUsage()
+        : _enumStoreUsage(defaultEnumStoreUsage()),
+          _multiValueUsage(defaultMultiValueUsage()) {
+}
+
+AddressSpaceUsage::AddressSpaceUsage(const AddressSpace &enumStoreUsage_,
+                                     const AddressSpace &multiValueUsage_)
+        : _enumStoreUsage(enumStoreUsage_),
+          _multiValueUsage(multiValueUsage_) {
+}
+
+AddressSpace
+AddressSpaceUsage::defaultEnumStoreUsage()
+{
+    return AddressSpace(0, EnumStoreBase::DataStoreType::RefType::offsetSize());
+}
+
+AddressSpace
+AddressSpaceUsage::defaultMultiValueUsage()
+{
+    return AddressSpace(0, multivalue::Index32::offsetSize());
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/address_space_usage.h b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h
new file mode 100644
index 00000000000..e1c32d9fc48
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/address_space_usage.h
@@ -0,0 +1,29 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "address_space.h"
+
+namespace search {
+
+/**
+ * Represents the address space usage for enum store and multi value mapping.
+ */
+class AddressSpaceUsage
+{
+private:
+    AddressSpace _enumStoreUsage;
+    AddressSpace _multiValueUsage;
+
+public:
+    AddressSpaceUsage();
+    AddressSpaceUsage(const AddressSpace &enumStoreUsage_,
+                      const AddressSpace &multiValueUsage_);
+    static AddressSpace defaultEnumStoreUsage();
+    static AddressSpace defaultMultiValueUsage();
+    const AddressSpace &enumStoreUsage() const { return _enumStoreUsage; }
+    const AddressSpace &multiValueUsage() const { return _multiValueUsage; }
+
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute.cpp b/searchlib/src/vespa/searchlib/attribute/attribute.cpp
new file mode 100644
index 00000000000..649ac1c9786
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attribute.h"
+#include 
+
+LOG_SETUP(".searchlib.attribute.attribute");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute.h b/searchlib/src/vespa/searchlib/attribute/attribute.h
new file mode 100644
index 00000000000..ca14034c4bf
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute.h
@@ -0,0 +1,8 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
new file mode 100644
index 00000000000..517998af50f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -0,0 +1,636 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.attribute_blueprint_factory");
+
+#include "attribute_blueprint_factory.h"
+#include "attribute_weighted_set_blueprint.h"
+#include "i_document_weight_attribute.h"
+#include "iterator_pack.h"
+#include "predicate_attribute.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using search::AttributeVector;
+using search::fef::TermFieldMatchData;
+using search::fef::TermFieldMatchDataArray;
+using search::fef::TermFieldMatchDataPosition;
+using search::query::Location;
+using search::query::LocationTerm;
+using search::query::Node;
+using search::query::NumberTerm;
+using search::query::PredicateQuery;
+using search::query::PrefixTerm;
+using search::query::RangeTerm;
+using search::query::StackDumpCreator;
+using search::query::StringTerm;
+using search::query::SubstringTerm;
+using search::query::SuffixTerm;
+using search::query::RegExpTerm;
+using search::queryeval::AndBlueprint;
+using search::queryeval::AndSearchStrict;
+using search::queryeval::Blueprint;
+using search::queryeval::CreateBlueprintVisitorHelper;
+using search::queryeval::DotProductBlueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::FieldSpecBaseList;
+using search::queryeval::MultiSearch;
+using search::queryeval::OrLikeSearch;
+using search::queryeval::OrSearch;
+using search::queryeval::ParallelWeakAndBlueprint;
+using search::queryeval::PredicateBlueprint;
+using search::queryeval::SearchIterator;
+using search::queryeval::Searchable;
+using search::queryeval::NoUnpack;
+using search::queryeval::IRequestContext;
+using search::queryeval::WeightedSetTermBlueprint;
+using vespalib::geo::ZCurve;
+using vespalib::string;
+
+namespace search {
+namespace {
+
+//-----------------------------------------------------------------------------
+
+/**
+ * Blueprint for creating regular, stack-based attribute iterators.
+ **/
+class AttributeFieldBlueprint :
+        public search::queryeval::SimpleLeafBlueprint
+{
+private:
+    AttributeVector::SearchContext::UP _search_context;
+
+    AttributeFieldBlueprint(const FieldSpec &field,
+                            const AttributeVector &attribute,
+                            const string &query_stack,
+                            const AttributeVector::SearchContext::Params ¶ms)
+        : SimpleLeafBlueprint(field),
+          _search_context(attribute.getSearch(query_stack, params).release())
+    {
+        uint32_t estHits = _search_context->approximateHits();
+        HitEstimate estimate(estHits, estHits == 0);
+        setEstimate(estimate);
+    }
+
+public:
+    AttributeFieldBlueprint(const FieldSpec &field,
+                            const AttributeVector &attribute,
+                            const string &query_stack)
+        : AttributeFieldBlueprint(field,
+                                  attribute,
+                                  query_stack,
+                                  AttributeVector::SearchContext::Params()
+                                  .useBitVector(field.isFilter()))
+    {
+    }
+
+    AttributeFieldBlueprint(const FieldSpec &field,
+                            const AttributeVector &attribute,
+                            const AttributeVector &diversity,
+                            const string &query_stack,
+                            size_t diversityCutoffGroups,
+                            bool diversityCutoffStrict)
+        : AttributeFieldBlueprint(field,
+                                  attribute,
+                                  query_stack,
+                                  AttributeVector::SearchContext::Params()
+                                      .diversityAttribute(&diversity)
+                                      .useBitVector(field.isFilter())
+                                      .diversityCutoffGroups(diversityCutoffGroups)
+                                      .diversityCutoffStrict(diversityCutoffStrict))
+    {
+    }
+
+    virtual SearchIterator::UP
+    createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const
+    {
+        assert(tfmda.size() == 1);
+        return _search_context->createIterator(tfmda[0], strict);
+    }
+
+    virtual void
+    fetchPostings(bool strict)
+    {
+        _search_context->fetchPostings(strict);
+    }
+
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
+};
+
+void
+AttributeFieldBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    search::queryeval::LeafBlueprint::visitMembers(visitor);
+    visit(visitor, "attribute", _search_context->attribute().getName());
+}
+
+//-----------------------------------------------------------------------------
+
+template 
+struct LocationPreFilterIterator : public OrLikeSearch
+{
+    LocationPreFilterIterator(const MultiSearch::Children &children) : OrLikeSearch(children, NoUnpack()) {}
+    virtual void doUnpack(uint32_t) override {}
+};
+
+class LocationPreFilterBlueprint :
+        public search::queryeval::ComplexLeafBlueprint
+{
+private:
+    const AttributeVector & _attribute;
+    std::vector _rangeSearches;
+    bool _should_use;
+
+public:
+    LocationPreFilterBlueprint(const FieldSpec &field, const AttributeVector &attribute, const ZCurve::RangeVector &rangeVector)
+        : ComplexLeafBlueprint(field),
+          _attribute(attribute),
+          _rangeSearches(),
+          _should_use(false)
+    {
+        uint64_t estHits(0);
+        const AttributeVector & attr(_attribute);
+        for (auto it(rangeVector.begin()), mt(rangeVector.end()); it != mt; it++) {
+            const ZCurve::Range &r(*it);
+            search::query::Range qr(r.min(), r.max());
+            search::query::SimpleRangeTerm rt(qr, "", 0, search::query::Weight(0));
+            string stack(StackDumpCreator::create(rt));
+            _rangeSearches.push_back(attr.getSearch(stack, AttributeVector::SearchContext::Params()));
+            estHits += _rangeSearches.back()->approximateHits();
+            LOG(debug, "Range '%s' estHits %ld", qr.getRangeString().c_str(), estHits);
+        }
+        if (estHits > attr.getNumDocs()) {
+            estHits = attr.getNumDocs();
+        }
+        if (estHits * 10 < attr.getNumDocs()) {
+            _should_use = true;
+        }
+        HitEstimate estimate(estHits, estHits == 0);
+        setEstimate(estimate);        
+    }
+
+    bool should_use() const { return _should_use; }
+
+    virtual SearchIterator::UP
+    createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const override
+    {
+        MultiSearch::Children children;
+        for (auto it(_rangeSearches.begin()), mt(_rangeSearches.end()); it != mt; it++) {
+            children.push_back((*it)->createIterator(tfmda[0],
+                                                     strict).release());
+        }
+        if (strict) {
+            return SearchIterator::UP(new LocationPreFilterIterator(children));
+        } else {
+            return SearchIterator::UP(new LocationPreFilterIterator(children));
+        }
+    }
+
+    virtual void fetchPostings(bool strict) override {
+        for (size_t i(0); i < _rangeSearches.size(); i++) {
+            _rangeSearches[i]->fetchPostings(strict);
+        }
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+class LocationPostFilterBlueprint :
+        public search::queryeval::ComplexLeafBlueprint
+{
+private:
+    const AttributeVector & _attribute;
+    search::common::Location _location;
+
+public:
+    LocationPostFilterBlueprint(const FieldSpec &field, const AttributeVector &attribute, const Location &loc)
+        : ComplexLeafBlueprint(field),
+          _attribute(attribute),
+          _location()
+    {
+        _location.setVec(attribute);
+        _location.parse(loc.getLocationString());
+        uint32_t estHits = _attribute.getNumDocs();
+        HitEstimate estimate(estHits, estHits == 0);
+        setEstimate(estimate);
+    }
+
+    const search::common::Location &location() const { return _location; }
+
+    virtual SearchIterator::UP
+    createLeafSearch(const TermFieldMatchDataArray &, bool strict) const override
+    {
+        unsigned int num_docs = _attribute.getNumDocs();
+        return SearchIterator::UP(FastS_AllocLocationIterator(num_docs, strict, _location));
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+Blueprint::UP make_location_blueprint(const FieldSpec &field, const AttributeVector &attribute, const Location &loc) {
+    LocationPostFilterBlueprint *post_filter = new LocationPostFilterBlueprint(field, attribute, loc);
+    Blueprint::UP post_filter_bp(post_filter);
+    const search::common::Location &location = post_filter->location();
+    if (location.getMinX() > location.getMaxX() ||
+        location.getMinY() > location.getMaxY())
+    {
+        return Blueprint::UP(new queryeval::EmptyBlueprint(field));
+    }
+    ZCurve::RangeVector rangeVector = ZCurve::find_ranges(
+            location.getMinX(), location.getMinY(),
+            location.getMaxX(), location.getMaxY());
+    LocationPreFilterBlueprint *pre_filter = new LocationPreFilterBlueprint(field, attribute, rangeVector);
+    Blueprint::UP pre_filter_bp(pre_filter);
+    if (!pre_filter->should_use()) {
+        return post_filter_bp;
+    }
+    AndBlueprint *root = new AndBlueprint();
+    Blueprint::UP root_bp(root);
+    root->addChild(std::move(pre_filter_bp));
+    root->addChild(std::move(post_filter_bp));
+    return root_bp;
+}
+
+//-----------------------------------------------------------------------------
+
+template 
+class DirectWeightedSetBlueprint : public search::queryeval::ComplexLeafBlueprint
+{
+private:
+    HitEstimate                                         _estimate;
+    std::vector                                _weights;
+    std::vector _terms;
+    const IDocumentWeightAttribute                     &_attr;
+
+public:
+    DirectWeightedSetBlueprint(const FieldSpec &field,
+                              const IDocumentWeightAttribute &attr, size_t size_hint)
+        : ComplexLeafBlueprint(field),
+          _estimate(),
+          _weights(),
+          _terms(),
+          _attr(attr)
+    {
+        _weights.reserve(size_hint);
+        _terms.reserve(size_hint);
+    }
+
+    void addTerm(const vespalib::string &term, int32_t weight) {
+        IDocumentWeightAttribute::LookupResult result = _attr.lookup(term);
+        HitEstimate childEst(result.posting_size, (result.posting_size == 0));
+        if (!childEst.empty) {
+            if (_estimate.empty) {
+                _estimate = childEst;
+            } else {
+                _estimate.estHits += childEst.estHits;
+            }
+            setEstimate(_estimate);
+            _weights.push_back(weight);
+            _terms.push_back(result);
+        }
+    }
+
+    SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
+    {
+        assert(tfmda.size() == 1);
+        if (_terms.size() == 0) {
+            return SearchIterator::UP(new search::queryeval::EmptySearch());
+        }
+        std::vector iterators;
+        const size_t numChildren = _terms.size();
+        iterators.reserve(numChildren);
+        for (const IDocumentWeightAttribute::LookupResult &r : _terms) {
+            _attr.create(r.posting_idx, iterators);
+        }
+        return SearchType::create(*tfmda[0], _weights, std::move(iterators));
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+class DirectWandBlueprint : public search::queryeval::ComplexLeafBlueprint
+{
+private:
+    HitEstimate                                         _estimate;
+    mutable queryeval::SharedWeakAndPriorityQueue       _scores;
+    const queryeval::wand::score_t                      _scoreThreshold;
+    double                                              _thresholdBoostFactor;
+    const uint32_t                                      _scoresAdjustFrequency;
+    std::vector                                _weights;
+    std::vector _terms;
+    const IDocumentWeightAttribute                     &_attr;
+
+public:
+    DirectWandBlueprint(const FieldSpec &field,
+                        const IDocumentWeightAttribute &attr,
+                        uint32_t scoresToTrack,
+                        queryeval::wand::score_t scoreThreshold,
+                        double thresholdBoostFactor,
+                        size_t size_hint)
+        : ComplexLeafBlueprint(field),
+          _estimate(),
+          _scores(scoresToTrack),
+          _scoreThreshold(scoreThreshold),
+          _thresholdBoostFactor(thresholdBoostFactor),
+          _scoresAdjustFrequency(queryeval::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY),
+          _weights(),
+          _terms(),
+          _attr(attr)
+    {
+        _weights.reserve(size_hint);
+        _terms.reserve(size_hint);
+    }
+
+    void addTerm(const vespalib::string &term, int32_t weight) {
+        IDocumentWeightAttribute::LookupResult result = _attr.lookup(term);
+        HitEstimate childEst(result.posting_size, (result.posting_size == 0));
+        if (!childEst.empty) {
+            if (_estimate.empty) {
+                _estimate = childEst;
+            } else {
+                _estimate.estHits += childEst.estHits;
+            }
+            setEstimate(_estimate);
+            _weights.push_back(weight);
+            _terms.push_back(result);
+        }
+    }
+
+    SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const
+    {
+        assert(tfmda.size() == 1);
+        if (_terms.size() == 0) {
+            return SearchIterator::UP(new search::queryeval::EmptySearch());
+        }
+        return search::queryeval::ParallelWeakAndSearch::create(*tfmda[0],
+                queryeval::ParallelWeakAndSearch::MatchParams(_scores,
+                        _scoreThreshold,
+                        _thresholdBoostFactor,
+                        _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()),
+                _weights, _terms, _attr, strict);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+class DirectAttributeBlueprint : public search::queryeval::SimpleLeafBlueprint
+{
+private:
+    vespalib::string                        _attrName;
+    const IDocumentWeightAttribute         &_attr;
+    IDocumentWeightAttribute::LookupResult  _dict_entry;
+
+public:
+    DirectAttributeBlueprint(const FieldSpec &field,
+                             const vespalib::string & name,
+                             const IDocumentWeightAttribute &attr, const vespalib::string &term)
+        : SimpleLeafBlueprint(field),
+          _attrName(name),
+          _attr(attr),
+          _dict_entry(_attr.lookup(term))
+    {
+        setEstimate(HitEstimate(_dict_entry.posting_size, (_dict_entry.posting_size == 0)));
+    }
+
+    SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const
+    {
+        assert(tfmda.size() == 1);
+        if (_dict_entry.posting_size == 0) {
+            return SearchIterator::UP(new search::queryeval::EmptySearch());
+        }
+        return SearchIterator::UP(new queryeval::DocumentWeightSearchIterator(*tfmda[0], _attr, _dict_entry));
+    }
+
+    virtual void visitMembers(vespalib::ObjectVisitor &visitor) const
+    {   
+        search::queryeval::LeafBlueprint::visitMembers(visitor);
+        visit(visitor, "attribute", _attrName);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+bool check_valid_diversity_attr(const AttributeVector *attr) {
+    if (attr == nullptr) {
+        return false;
+    }
+    if (attr->hasMultiValue()) {
+        return false;
+    }
+    return (attr->hasEnum() || attr->isIntegerType() || attr->isFloatingPointType());
+}
+
+//-----------------------------------------------------------------------------
+
+
+/**
+ * Determines the correct Blueprint to use.
+ **/
+class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
+{
+private:
+    const FieldSpec &_field;
+    const AttributeVector & _attr;
+    const IDocumentWeightAttribute *_dwa;
+
+public:
+    CreateBlueprintVisitor(Searchable &searchable,
+                           const IRequestContext &requestContext,
+                           const FieldSpec &field,
+                           const AttributeVector &attr)
+        : CreateBlueprintVisitorHelper(searchable, field, requestContext),
+          _field(field),
+          _attr(attr),
+          _dwa(attr.asDocumentWeightAttribute()) {}
+
+    template 
+    void visitTerm(TermNode &n, bool simple = false) {
+        if (simple && (_dwa != nullptr) && !_field.isFilter() && n.isRanked()) {
+            vespalib::string term = search::queryeval::termAsString(n);
+            setResult(make_UP(new DirectAttributeBlueprint(_field, _attr.getName(), *_dwa, term)));
+        } else {
+            const string stack = StackDumpCreator::create(n);
+            setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, stack)));
+        }
+    }
+
+    void visitLocation(LocationTerm &node) {
+        Location loc(node.getTerm());
+        setResult(make_location_blueprint(_field, _attr, loc));
+    }
+
+    void visitPredicate(PredicateQuery &query) {
+        const PredicateAttribute *attr =
+            dynamic_cast(&_attr);
+        if (!attr) {
+            LOG(warning, "Trying to apply a PredicateQuery node to a "
+                "non-predicate attribute.");
+            setResult(Blueprint::UP(new queryeval::EmptyBlueprint(_field)));
+        } else {
+            setResult(Blueprint::UP(new PredicateBlueprint( _field, *attr, query)));
+        }
+    }
+
+    virtual void visit(NumberTerm & n) { visitTerm(n, true); }
+    virtual void visit(LocationTerm &n) { visitLocation(n); }
+    virtual void visit(PrefixTerm & n) { visitTerm(n); }
+
+    virtual void visit(RangeTerm &n) {
+        const string stack = StackDumpCreator::create(n);
+        const string term = search::queryeval::termAsString(n);
+        search::QueryTermSimple parsed_term(term, search::QueryTermSimple::WORD);
+        if (parsed_term.getMaxPerGroup() > 0) {
+            const AttributeVector * diversity(getRequestContext().getAttribute(parsed_term.getDiversityAttribute()));
+            if (check_valid_diversity_attr(diversity)) {
+                setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, *diversity, stack,
+                                                              parsed_term.getDiversityCutoffGroups(),
+                                                              parsed_term.getDiversityCutoffStrict())));
+            } else {
+                setResult(Blueprint::UP(new queryeval::EmptyBlueprint(_field)));
+            }
+        } else {
+            setResult(make_UP(new AttributeFieldBlueprint(_field, _attr, stack)));
+        }
+    }
+
+    virtual void visit(StringTerm & n) { visitTerm(n, true); }
+    virtual void visit(SubstringTerm & n) {
+        search::query::SimpleRegExpTerm re(vespalib::Regexp::make_from_substring(n.getTerm()),
+                                           n.getView(), n.getId(), n.getWeight());
+        visitTerm(re);
+    }
+    virtual void visit(SuffixTerm & n) {
+        search::query::SimpleRegExpTerm re(vespalib::Regexp::make_from_suffix(n.getTerm()),
+                                           n.getView(), n.getId(), n.getWeight());
+        visitTerm(re);
+    }
+    virtual void visit(PredicateQuery &n) { visitPredicate(n); }
+    virtual void visit(RegExpTerm & n) { visitTerm(n); }
+
+    template 
+    void createDirectWeightedSet(WS *bp, NODE &n) {
+        Blueprint::UP result(bp);
+        for (size_t i = 0; i < n.getChildren().size(); ++i) {
+            const search::query::Node &node = *n.getChildren()[i];
+            vespalib::string term = search::queryeval::termAsString(node);
+            uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+            bp->addTerm(term, weight);
+        }
+        setResult(std::move(result));
+    }
+
+    template 
+    void createShallowWeightedSet(WS *bp, NODE &n, const FieldSpec &fs) {
+        Blueprint::UP result(bp);
+        for (size_t i = 0; i < n.getChildren().size(); ++i) {
+            const search::query::Node &node = *n.getChildren()[i];
+            uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+            const string stack = StackDumpCreator::create(node);
+            FieldSpec childfs = bp->getNextChildField(fs);
+            bp->addTerm(make_UP(new AttributeFieldBlueprint(childfs, _attr, stack)), weight);
+        }
+        setResult(std::move(result));
+    }
+
+    virtual void visit(search::query::WeightedSetTerm &n) {
+        bool isSingleValue = !_attr.hasMultiValue();
+        bool isString = (_attr.isStringType() && _attr.hasEnum());
+        bool isInteger = _attr.isIntegerType();
+        if (isSingleValue && (isString || isInteger)) {
+            AttributeWeightedSetBlueprint *ws
+                = new AttributeWeightedSetBlueprint(_field, _attr);
+            Blueprint::UP result(ws);
+            for (size_t i = 0; i < n.getChildren().size(); ++i) {
+                const search::query::Node &node = *n.getChildren()[i];
+                uint32_t weight = search::queryeval::getWeightFromNode(node).percent();
+                vespalib::string term = search::queryeval::termAsString(node);
+                search::QueryTermSimple::UP qt;
+                if (isInteger) {
+                    qt.reset(new search::QueryTermSimple(term, search::QueryTermSimple::WORD));
+                } else {
+                    qt.reset(new search::QueryTermBase(term, search::QueryTermSimple::WORD));
+                }
+                ws->addToken(_attr.getSearch(std::move(qt), AttributeVector::SearchContext::Params()), weight);
+            }
+            setResult(std::move(result));
+        } else {
+            if (_dwa != nullptr) {
+                auto *bp = new DirectWeightedSetBlueprint(_field, *_dwa, n.getChildren().size());
+                createDirectWeightedSet(bp, n);
+            } else {
+                auto *bp = new WeightedSetTermBlueprint(_field);
+                createShallowWeightedSet(bp, n, _field);
+            }
+        }
+    }
+
+    virtual void visit(search::query::DotProduct &n) {
+        if (_dwa != nullptr) {
+            auto *bp = new DirectWeightedSetBlueprint(_field, *_dwa, n.getChildren().size());
+            createDirectWeightedSet(bp, n);
+        } else {
+            auto *bp = new DotProductBlueprint(_field);
+            createShallowWeightedSet(bp, n, _field);
+        }
+    }
+
+    virtual void visit(search::query::WandTerm &n) {
+        if (_dwa != nullptr) {
+            auto *bp = new DirectWandBlueprint(_field, *_dwa,
+                                               n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
+                                               n.getChildren().size());
+            createDirectWeightedSet(bp, n);
+        } else {
+            auto *bp = new ParallelWeakAndBlueprint(_field,
+                    n.getTargetNumHits(),
+                    n.getScoreThreshold(),
+                    n.getThresholdBoostFactor());
+            createShallowWeightedSet(bp, n, _field);
+        }
+    }
+};
+
+} // namespace
+
+//-----------------------------------------------------------------------------
+
+Blueprint::UP
+AttributeBlueprintFactory::createBlueprint(const IRequestContext & requestContext,
+                                            const FieldSpec &field,
+                                            const search::query::Node &term)
+{
+    const AttributeVector * attr(requestContext.getAttribute(field.getName()));
+    CreateBlueprintVisitor visitor(*this, requestContext, field, *attr);
+    const_cast(term).accept(visitor);
+    return visitor.getResult();
+}
+
+}  // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h
new file mode 100644
index 00000000000..a8fb10da02b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+
+namespace search {
+
+class AttributeBlueprintFactory : public queryeval::Searchable
+{
+public:
+    // implements Searchable
+    queryeval::Blueprint::UP
+    createBlueprint(const queryeval::IRequestContext & requestContext,
+                    const queryeval::FieldSpec &field,
+                    const query::Node &term) override;
+};
+
+}  // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
new file mode 100644
index 00000000000..501c78f75b6
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp
@@ -0,0 +1,187 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attribute_weighted_set_blueprint.h"
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+namespace {
+
+//-----------------------------------------------------------------------------
+
+class UseAttr
+{
+private:
+    const attribute::IAttributeVector &_attr;
+
+protected:
+    const attribute::IAttributeVector &attribute() const { return _attr; }
+
+public:
+    UseAttr(const attribute::IAttributeVector & attr)
+        : _attr(attr) {}
+};
+
+//-----------------------------------------------------------------------------
+
+class UseStringEnum : public UseAttr
+{
+public:
+    UseStringEnum(const AttributeVector & attr)
+        : UseAttr(attr) {}
+    bool mapToken(const AttributeVector::SearchContext &context,
+                  int64_t &token) const
+    {
+        attribute::IAttributeVector::EnumHandle handle;
+        if (attribute().findEnum(context.queryTerm().getTerm(), handle)) {
+            token = handle;
+            return true;
+        }
+        return false;
+    }
+    int64_t getToken(uint32_t docId) const {
+        return attribute().getEnum(docId);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+class UseInteger : public UseAttr
+{
+public:
+    UseInteger(const AttributeVector & attr) : UseAttr(attr) {}
+    bool mapToken(const AttributeVector::SearchContext &context,
+                  int64_t &token) const
+    {
+        Int64Range range(context.getAsIntegerTerm());
+        if (range.isPoint()) {
+            token = range.lower();
+            return true;
+        }
+        return false;
+    }
+    int64_t getToken(uint32_t docId) const {
+        return attribute().getInt(docId);
+    }
+};
+
+//-----------------------------------------------------------------------------
+
+template 
+class AttributeFilter : public queryeval::SearchIterator
+{
+private:
+    typedef vespalib::hash_map Map;
+    typedef fef::TermFieldMatchData TFMD;
+
+    TFMD    &_tfmd;
+    T        _attr;
+    Map      _map;
+    int32_t  _weight;
+
+public:
+    AttributeFilter(fef::TermFieldMatchData &tfmd,
+                    const AttributeVector & attr,
+                    const std::vector weights,
+                    const std::vector contexts)
+        : _tfmd(tfmd), _attr(attr), _map(), _weight(0)
+    {
+        for (size_t i = 0; i < contexts.size(); ++i) {
+            int64_t token(0);
+            if (_attr.mapToken(*contexts[i], token)) {
+                _map[token] = weights[i];
+            }
+        }
+    }
+    virtual void doSeek(uint32_t docId) {
+        Map::const_iterator pos = _map.find(_attr.getToken(docId));
+        if (pos != _map.end()) {
+            _weight = pos->second;
+            setDocId(docId);
+        }
+    }
+    virtual void doUnpack(uint32_t docId) {
+        _tfmd.reset(docId);
+        fef::TermFieldMatchDataPosition pos;
+        pos.setElementWeight(_weight);
+        _tfmd.appendPosition(pos);
+    }
+    virtual void visitMembers(vespalib::ObjectVisitor &) const {}
+};
+
+//-----------------------------------------------------------------------------
+
+} // namespace search::
+
+AttributeWeightedSetBlueprint::AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const AttributeVector & attr)
+    : queryeval::ComplexLeafBlueprint(field),
+      _numDocs(attr.getNumDocs()),
+      _estHits(0),
+      _weights(),
+      _attr(attr),
+      _contexts()
+{
+}
+
+AttributeWeightedSetBlueprint::~AttributeWeightedSetBlueprint()
+{
+    while (!_contexts.empty()) {
+        delete _contexts.back();
+        _contexts.pop_back();
+    }
+}
+
+void
+AttributeWeightedSetBlueprint::addToken(AttributeVector::SearchContext::UP context, int32_t weight)
+{
+    _estHits = std::min(_estHits + context->approximateHits(), _numDocs);
+    setEstimate(HitEstimate(_estHits, (_estHits == 0)));
+    _weights.push_back(weight);
+    _contexts.push_back(context.get());
+    context.release();
+}
+
+queryeval::SearchIterator::UP
+AttributeWeightedSetBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda,
+                                                bool strict) const
+{
+    assert(tfmda.size() == 1);
+    fef::TermFieldMatchData &tfmd = *tfmda[0];
+    if (strict) { // use generic weighted set search
+        std::vector children(_contexts.size());
+        for (size_t i = 0; i < _contexts.size(); ++i) {
+            children[i] = _contexts[i]->createIterator(&tfmd,
+                                                       true).release();
+        }
+        return queryeval::SearchIterator::UP(queryeval::WeightedSetTermSearch::create(children, tfmd, _weights));
+    } else { // use attribute filter optimization
+        bool isSingleValue = !_attr.hasMultiValue();
+        bool isString = (_attr.isStringType() && _attr.hasEnum());
+        bool isInteger = _attr.isIntegerType();
+        assert(isSingleValue);
+        (void) isSingleValue;
+        if (isString) {
+            return queryeval::SearchIterator::UP(new AttributeFilter(tfmd, _attr, _weights, _contexts));
+        } else {
+            assert(isInteger);
+            (void) isInteger;
+            return queryeval::SearchIterator::UP(new AttributeFilter(tfmd, _attr, _weights, _contexts));
+        }
+    }
+}
+
+void
+AttributeWeightedSetBlueprint::fetchPostings(bool strict)
+{
+    if (strict) {
+        for (size_t i = 0; i < _contexts.size(); ++i) {
+            _contexts[i]->fetchPostings(true);
+        }
+    }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h
new file mode 100644
index 00000000000..dadb0d1a0ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.h
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+class AttributeWeightedSetBlueprint : public queryeval::ComplexLeafBlueprint
+{
+private:
+    size_t                     _numDocs;
+    size_t                     _estHits;
+    std::vector       _weights;
+    const AttributeVector    & _attr;
+    std::vector _contexts;
+
+    AttributeWeightedSetBlueprint(const AttributeWeightedSetBlueprint &); // disabled
+    AttributeWeightedSetBlueprint &operator=(const AttributeWeightedSetBlueprint &); // disabled
+
+public:
+    AttributeWeightedSetBlueprint(const queryeval::FieldSpec &field, const AttributeVector & attr);
+    virtual ~AttributeWeightedSetBlueprint();
+    void addToken(AttributeVector::SearchContext::UP context, int32_t weight);
+    virtual queryeval::SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool strict) const;
+
+    virtual void
+    fetchPostings(bool strict);
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp b/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp
new file mode 100644
index 00000000000..3c1ada3108d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributecontext.cpp
@@ -0,0 +1,72 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".attributecontext");
+#include "attributecontext.h"
+
+using namespace search;
+using namespace search::attribute;
+
+namespace search {
+
+const IAttributeVector *
+AttributeContext::getAttribute(AttributeMap & map, const string & name, bool stableEnum) const
+{
+    AttributeMap::const_iterator itr = map.find(name);
+    if (itr != map.end()) {
+        return itr->second->operator->();
+    } else {
+        AttributeGuard::UP ret;
+        if (stableEnum) {
+            ret = _manager.getAttributeStableEnum(name);
+        } else {
+            ret = _manager.getAttribute(name);
+        }
+        if (ret) {
+            const AttributeGuard & guard = *ret;
+            map[name] = std::move(ret);
+            return guard.operator->();
+        }
+        return nullptr;
+    }
+}
+
+AttributeContext::AttributeContext(const IAttributeManager & manager) :
+    _manager(manager),
+    _attributes(),
+    _enumAttributes(),
+    _cacheLock()
+{
+}
+
+const IAttributeVector *
+AttributeContext::getAttribute(const string & name) const
+{
+    vespalib::LockGuard guard(_cacheLock);
+    return getAttribute(_attributes, name, false);
+}
+
+const IAttributeVector *
+AttributeContext::getAttributeStableEnum(const string & name) const
+{
+    vespalib::LockGuard guard(_cacheLock);
+    return getAttribute(_enumAttributes, name, true);
+}
+
+void AttributeContext::releaseEnumGuards() {
+    vespalib::LockGuard guard(_cacheLock);
+    _enumAttributes.clear();
+}
+
+void
+AttributeContext::getAttributeList(std::vector & list) const
+{
+    std::vector attributes;
+    _manager.getAttributeList(attributes);
+    for (size_t i = 0; i < attributes.size(); ++i) {
+        list.push_back(getAttribute(attributes[i]->getName()));
+    }
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributecontext.h b/searchlib/src/vespa/searchlib/attribute/attributecontext.h
new file mode 100644
index 00000000000..5d70c06d16a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributecontext.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+#include "iattributemanager.h"
+
+namespace search {
+
+/**
+ * This class is wrapping an attribute manager and
+ * implements the IAttributeContext interface to provide read access to attribute vectors.
+ **/
+class AttributeContext : public attribute::IAttributeContext
+{
+private:
+    typedef vespalib::hash_map AttributeMap;
+
+    const search::IAttributeManager & _manager;
+    mutable AttributeMap              _attributes;
+    mutable AttributeMap              _enumAttributes;
+    mutable vespalib::Lock            _cacheLock;
+
+    const attribute::IAttributeVector *
+        getAttribute(AttributeMap & map, const string & name, bool stableEnum) const;
+
+public:
+    AttributeContext(const search::IAttributeManager & manager);
+
+    // Implements IAttributeContext
+    const attribute::IAttributeVector * getAttribute(const string & name) const override;
+    const attribute::IAttributeVector * getAttributeStableEnum(const string & name) const override;
+    void getAttributeList(std::vector & list) const override;
+    void releaseEnumGuards() override;
+
+    // Give acces to the underlying manager
+    const search::IAttributeManager & getManager() const { return _manager; }
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp b/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp
new file mode 100644
index 00000000000..f7d5adbe049
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefactory.cpp
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attributefactory");
+
+#include "attributefactory.h"
+
+namespace search {
+
+using attribute::CollectionType;
+
+AttributeVector::SP
+AttributeFactory::createAttribute(const vespalib::string & baseFileName, const Config & cfg)
+{
+    AttributeVector::SP ret;
+    if (cfg.collectionType().type() == CollectionType::ARRAY) {
+        if (cfg.fastSearch()) {
+            ret = createArrayFastSearch(baseFileName, cfg);
+            if (ret.get() == NULL) {
+                LOG(warning, "Cannot apply fastsearch hint on attribute %s of type array<%s>. "
+                    "Falling back to normal. You should correct your .sd file.",
+                    baseFileName.c_str(), cfg.basicType().asString());
+                ret = createArrayStd(baseFileName, cfg);
+            }
+        } else {
+            ret = createArrayStd(baseFileName, cfg);
+        }
+    } else if (cfg.collectionType().type() == CollectionType::WSET) {
+        // Ignore if noupdate has been set.
+        if (cfg.fastSearch()) {
+            ret = createSetFastSearch(baseFileName, cfg);
+            if (ret.get() == NULL) {
+                LOG(warning, "Cannot apply fastsearch hint on attribute %s of type set<%s>. "
+                    "Falling back to normal. You should correct your .sd file.",
+                    baseFileName.c_str(), cfg.basicType().asString());
+                ret = createSetStd(baseFileName, cfg);
+            }
+        } else {
+            ret = createSetStd(baseFileName, cfg);
+        }
+    } else {
+        if (cfg.fastSearch()) {
+            ret = createSingleFastSearch(baseFileName, cfg);
+            if (ret.get() == NULL) {
+                LOG(warning, "Cannot apply fastsearch hint on attribute %s of type %s. "
+                    "Falling back to normal. You should correct your .sd file.",
+                    baseFileName.c_str(), cfg.basicType().asString());
+                ret = createSingleStd(baseFileName, cfg);
+            }
+        } else {
+            ret = createSingleStd(baseFileName, cfg);
+        }
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefactory.h b/searchlib/src/vespa/searchlib/attribute/attributefactory.h
new file mode 100644
index 00000000000..d2a1f567620
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefactory.h
@@ -0,0 +1,33 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+
+namespace search {
+
+/**
+ * Factory for creating attribute vector instances.
+ **/
+class AttributeFactory {
+private:
+    typedef attribute::Config Config;
+    static AttributeVector::SP createArrayStd(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createArrayFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSetStd(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSetFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSingleStd(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSingleFastSearch(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSingleFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createArrayFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+    static AttributeVector::SP createSetFastAggregate(const vespalib::string & baseFileName, const Config & cfg);
+
+public:
+    /**
+     * Create an attribute vector with the given name based on the given config.
+     **/
+    static AttributeVector::SP createAttribute(const vespalib::string & baseFileName, const Config & cfg);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefile.cpp b/searchlib/src/vespa/searchlib/attribute/attributefile.cpp
new file mode 100644
index 00000000000..c4384459f9d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefile.cpp
@@ -0,0 +1,457 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributefile.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+LOG_SETUP(".attributefile");
+
+using vespalib::IllegalStateException;
+using search::common::FileHeaderContext;
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+using attribute::BasicType;
+
+namespace
+{
+
+void
+updateHeader(const vespalib::string &name)
+{
+    vespalib::FileHeader h;
+    FastOS_File f;
+    f.OpenReadWrite(name.c_str());
+    h.readFile(f);
+    FileHeaderContext::setFreezeTime(h);
+    typedef vespalib::GenericHeader::Tag Tag;
+    h.putTag(Tag("frozen", 1));
+    h.rewriteFile(f);
+    f.Close();
+}
+
+}
+
+ReadAttributeFile::ReadAttributeFile(const vespalib::string & fileName,
+                                     const Config & config)
+    : AttributeFile(fileName, config)
+{
+    OpenReadOnly();
+    seekIdxPos(0);
+}
+
+
+WriteAttributeFile::WriteAttributeFile(const vespalib::string &fileName,
+                                       const Config &config,
+                                       const FileHeaderContext &
+                                       fileHeaderContext,
+                                       uint32_t docIdLimit)
+    : AttributeFile(fileName, config)
+{
+    OpenWriteOnly(fileHeaderContext, docIdLimit);
+}
+
+
+void
+AttributeFile::OpenReadOnly()
+{
+    if ( ! _datFile->OpenReadOnly() ) {
+        LOG(error, "could not open %s: %s",
+            _datFile->GetFileName(), getLastErrorString().c_str());
+        throw IllegalStateException(
+                vespalib::make_string(
+                        "Failed opening attribute data file '%s' for reading",
+                        _datFile->GetFileName()));
+    }
+    vespalib::FileHeader datHeader;
+    _datHeaderLen = datHeader.readFile(*_datFile);
+    _datFile->SetPosition(_datHeaderLen);
+    _datFileSize = _datFile->GetSize();
+    if (!FileSizeCalculator::extractFileSize(datHeader, _datHeaderLen,
+                                             _datFile->GetFileName(),
+                                             _datFileSize)) {
+        abort();
+    }
+    if (_idxFile.get()) {
+        if ( ! _idxFile->OpenReadOnly()) {
+            LOG(error, "could not open %s: %s",
+                _idxFile->GetFileName(), getLastErrorString().c_str());
+            throw IllegalStateException(
+                    vespalib::make_string(
+                            "Failed opening attribute idx file '%s'"
+                            " for reading",
+                            _idxFile->GetFileName()));
+        }
+        vespalib::FileHeader idxHeader;
+        _idxHeaderLen = idxHeader.readFile(*_idxFile);
+        _idxFile->SetPosition(_idxHeaderLen);
+        _idxFileSize = _idxFile->GetSize();
+        if (!FileSizeCalculator::extractFileSize(idxHeader, _idxHeaderLen,
+                                                 _idxFile->GetFileName(),
+                                                 _idxFileSize)) {
+            abort();
+        }
+        if (_weightFile.get()) {
+            if ( ! _weightFile->OpenReadOnly()) {
+                LOG(error, "could not open %s: %s",
+                    _weightFile->GetFileName(), getLastErrorString().c_str());
+                throw IllegalStateException(
+                        vespalib::make_string(
+                                "Failed opening attribute weight file '%s'"
+                                " for reading",
+                                _weightFile->GetFileName()));
+            }
+            vespalib::FileHeader weightHeader;
+            _weightHeaderLen = weightHeader.readFile(*_weightFile);
+            _weightFile->SetPosition(_weightHeaderLen);
+        }
+    }
+}
+
+
+void
+AttributeFile::OpenWriteOnly(const FileHeaderContext &fileHeaderContext,
+                             uint32_t docIdLimit)
+{
+    if ( ! _datFile->OpenWriteOnlyTruncate() ) {
+        LOG(error, "could not open %s: %s",
+            _datFile->GetFileName(), getLastErrorString().c_str());
+        throw IllegalStateException(
+                vespalib::make_string(
+                        "Failed opening attribute data file '%s' for writing",
+                        _datFile->GetFileName()));
+    }
+    vespalib::FileHeader datHeader;
+    typedef vespalib::GenericHeader::Tag Tag;
+    fileHeaderContext.addTags(datHeader, _datFile->GetFileName());
+    datHeader.putTag(Tag("desc", "Attribute vector data file"));
+
+    datHeader.putTag(Tag("datatype", _config.basicType().asString()));
+    datHeader.putTag(Tag("collectiontype",
+                         _config.collectionType().asString()));
+    datHeader.putTag(Tag("docIdLimit", docIdLimit));
+    datHeader.putTag(Tag("frozen", 0));
+    _datHeaderLen = datHeader.writeFile(*_datFile);
+    if (_idxFile.get()) {
+        if ( ! _idxFile->OpenWriteOnlyTruncate()) {
+            LOG(error, "could not open %s: %s",
+                _idxFile->GetFileName(), getLastErrorString().c_str());
+            throw IllegalStateException(
+                    vespalib::make_string(
+                            "Failed opening attribute idx file '%s'"
+                            " for writing",
+                            _idxFile->GetFileName()));
+        }
+        vespalib::FileHeader idxHeader;
+        fileHeaderContext.addTags(idxHeader, _idxFile->GetFileName());
+        idxHeader.putTag(Tag("desc", "Attribute vector idx file"));
+        idxHeader.putTag(Tag("datatype",
+                             _config.basicType().asString()));
+        idxHeader.putTag(Tag("collectiontype",
+                             _config.collectionType().asString()));
+        idxHeader.putTag(Tag("docIdLimit", docIdLimit));
+        idxHeader.putTag(Tag("frozen", 0));
+        _idxHeaderLen = idxHeader.writeFile(*_idxFile);
+        if ( ! _idxFile->CheckedWrite(&_currIdx, sizeof(_currIdx))) {
+            LOG(error, "could not write to %s: %s",
+                _idxFile->GetFileName(), getLastErrorString().c_str());
+            throw IllegalStateException(
+                    vespalib::make_string(
+                            "Failed writing first idx"
+                            " to attribute idx file '%s'",
+                            _weightFile->GetFileName()));
+        }
+        if (_weightFile.get()) {
+            if ( ! _weightFile->OpenWriteOnlyTruncate()) {
+                LOG(error, "could not open %s: %s",
+                    _weightFile->GetFileName(), getLastErrorString().c_str());
+                throw IllegalStateException(
+                        vespalib::make_string(
+                                "Failed opening attribute weight file '%s'"
+                                " for writing",
+                                _weightFile->GetFileName()));
+            }
+            vespalib::FileHeader weightHeader;
+            fileHeaderContext.addTags(weightHeader,
+                                      _weightFile->GetFileName());
+            weightHeader.putTag(Tag("desc", "Attribute vector weight file"));
+            weightHeader.putTag(Tag("datatype",
+                                    _config.basicType().asString()));
+            weightHeader.putTag(Tag("collectiontype",
+                                    _config.collectionType().asString()));
+            weightHeader.putTag(Tag("docIdLimit", docIdLimit));
+            weightHeader.putTag(Tag("frozen", 0));
+            _weightHeaderLen = weightHeader.writeFile(*_weightFile);
+        }
+    }
+}
+
+
+void
+AttributeFile::enableDirectIO()
+{
+    _datFile->EnableDirectIO();
+    if (_idxFile.get()) {
+        _idxFile->EnableDirectIO();
+        if (_weightFile.get()) {
+            _weightFile->EnableDirectIO();
+        }
+    }
+}
+
+
+void
+AttributeFile::Close()
+{
+    if (_datFile->IsOpened()) {
+        bool writeMode = _datFile->IsWriteMode();
+        _datFile->Flush();
+        _datFile->Close();
+        if (writeMode) {
+            updateHeader(_datFile->GetFileName());
+        }
+    }
+    if (_idxFile.get() != NULL && _idxFile->IsOpened()) {
+        bool writeMode = _idxFile->IsWriteMode();
+        _idxFile->Flush();
+        _idxFile->Close();
+        if (writeMode) {
+            updateHeader(_idxFile->GetFileName());
+        }
+    }
+    if (_weightFile.get() != NULL && _weightFile->IsOpened()) {
+        bool writeMode = _weightFile->IsWriteMode();
+        _weightFile->Flush();
+        _weightFile->Close();
+        if (writeMode) {
+            updateHeader(_weightFile->GetFileName());
+        }
+    }
+}
+
+
+AttributeFile::AttributeFile(const vespalib::string &fileName,
+                             const Config &config)
+    : _currIdx(0),
+      _datFile(new Fast_BufferedFile( new FastOS_File((fileName + ".dat").c_str()))),
+      _idxFile(config.collectionType().isMultiValue() ?
+               new Fast_BufferedFile(new FastOS_File((fileName + ".idx").c_str())) :
+               NULL),
+      _weightFile(config.collectionType().isWeightedSet() ?
+                  new Fast_BufferedFile( new FastOS_File((fileName + ".weight").c_str())) :
+                  NULL),
+      _fileName(fileName),
+      _config(config),
+      _datHeaderLen(0u),
+      _idxHeaderLen(0u),
+      _weightHeaderLen(0u),
+      _datFileSize(0),
+      _idxFileSize(0)
+{
+}
+
+
+AttributeFile::~AttributeFile(void)
+{
+    Close();
+}
+
+
+bool
+AttributeFile::seekIdxPos(size_t idxPos)
+{
+    bool retval(false);
+    if (_idxFile.get()) {
+        _idxFile->SetPosition(_idxHeaderLen + idxPos * sizeof(uint32_t));
+        retval = (_idxFile->Read(&_currIdx, sizeof(_currIdx)) ==
+                  sizeof(_currIdx));
+    }
+    return retval;
+}
+
+
+bool
+AttributeFile::read(Record &record)
+{
+    bool retval(true);
+    uint32_t nextIdx(_currIdx + 1);
+    if (_idxFile.get()) {
+        if (static_cast(_idxFile->GetPosition()) >= _idxFileSize) {
+            retval = false;
+        } else {
+            retval = (_idxFile->Read(&nextIdx, sizeof(nextIdx))
+                      == sizeof(nextIdx));
+            assert(nextIdx >= _currIdx);
+        }
+    } else {
+        if (static_cast(_datFile->GetPosition()) >= _datFileSize) {
+            retval = false;
+        }
+    }
+    if (retval) {
+        retval = record.read(*this, nextIdx - _currIdx);
+        _currIdx = nextIdx;
+    }
+
+    return retval;
+}
+
+
+bool
+AttributeFile::write(const Record & record)
+{
+    bool retval(record.write(*this));
+    if (retval && _idxFile.get()) {
+        _currIdx += record.getValueCount();
+        retval = _idxFile->CheckedWrite(&_currIdx, sizeof(_currIdx));
+    }
+
+    return retval;
+}
+
+
+std::unique_ptr
+AttributeFile::getRecord()
+{
+    std::unique_ptr record;
+    switch (_config.basicType().type()) {
+        case BasicType::UINT1:
+        case BasicType::UINT2:
+        case BasicType::UINT4:
+        case BasicType::INT8:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::INT16:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::INT32:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::INT64:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::FLOAT:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::DOUBLE:
+            record.reset(new FixedRecord());
+            break;
+        case BasicType::STRING:
+            record.reset(new VariableRecord());
+            break;
+        default:
+            break;
+    }
+    return record;
+}
+
+
+template 
+bool
+AttributeFile::FixedRecord::onWrite(AttributeFile & dest) const
+{
+    bool retval(dest._datFile->CheckedWrite(&_data[0],
+                        _data.size() * sizeof(T)));
+    if (retval && dest._weightFile.get()) {
+        retval = dest._weightFile->CheckedWrite(&_weight[0],
+                _weight.size() * sizeof(int32_t));
+    }
+    return retval;
+}
+
+
+bool
+AttributeFile::VariableRecord::onWrite(AttributeFile & dest) const
+{
+    bool retval(dest._datFile->CheckedWrite(&_data[0], _data.size()));
+    if (retval && dest._weightFile.get()) {
+        retval = dest._weightFile->CheckedWrite(&_weight[0],
+                _weight.size() * sizeof(int32_t));
+    }
+    return retval;
+}
+
+
+void
+AttributeFile::VariableRecord::setValue(const void * v, size_t len)
+{
+    _data.resize(len);
+    memcpy(&_data[0], v, len);
+    _weight.clear();
+}
+
+
+size_t
+AttributeFile::VariableRecord::getValueCount() const
+{
+    size_t numValues(_weight.size());
+    if ( numValues == 0) {
+        for(size_t i(0), m(_data.size()); i < m; i++) {
+            if (_data[i] == 0) {
+                numValues++;
+            }
+        }
+    }
+    return numValues;
+}
+
+
+template 
+bool
+AttributeFile::FixedRecord::onRead(AttributeFile &src, size_t numValues)
+{
+    bool retval(true);
+    _data.resize(numValues);
+    if (numValues) {
+        const int bytesRead = src._datFile->Read(&_data[0],
+                _data.size() * sizeof(T));
+        retval = (bytesRead == int(_data.size() * sizeof(T)));
+    }
+    if (src._weightFile.get()) {
+        _weight.resize(numValues);
+        if (numValues && retval) {
+            const int bytesRead = src._weightFile->Read(&_weight[0],
+                    _weight.size() * sizeof(uint32_t));
+            retval = (bytesRead == int(_weight.size() * sizeof(uint32_t)));
+        }
+    }
+    return retval;
+}
+
+
+bool
+AttributeFile::VariableRecord::onRead(AttributeFile &src, size_t numValues)
+{
+    bool retval(true);
+    _data.resize(0);
+    if (numValues) {
+        size_t stringsRead(0);
+        for (int c; (stringsRead < numValues) &&
+                 ((c = src._datFile->GetByte()) >= 0); ) {
+             _data.push_back(c);
+             if (c == 0) {
+                 stringsRead++;
+             }
+        }
+        retval = (stringsRead == numValues);
+    }
+    if (src._weightFile.get()) {
+        _weight.resize(numValues);
+        if (numValues && retval) {
+            const int bytesRead = src._weightFile->Read(&_weight[0],
+                    _weight.size() * sizeof(uint32_t));
+            retval = (bytesRead == int(_weight.size() * sizeof(uint32_t)));
+        }
+    }
+    return retval;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefile.h b/searchlib/src/vespa/searchlib/attribute/attributefile.h
new file mode 100644
index 00000000000..895b2472dfb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefile.h
@@ -0,0 +1,113 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+
+namespace search {
+
+namespace common {
+    class FileHeaderContext;
+}
+
+class AttributeFile
+{
+public:
+    class Record {
+    public:
+        virtual ~Record() { }
+        virtual size_t getValueCount() const = 0;
+        virtual void setValue(const void * v, size_t len) = 0;
+    protected:
+        std::vector _weight;
+    private:
+        bool write(AttributeFile & dest) const { return onWrite(dest); }
+        bool read(AttributeFile & src, size_t numValues) { return onRead(src, numValues); }
+        virtual bool onWrite(AttributeFile & dest) const = 0;
+        virtual bool onRead(AttributeFile & src, size_t numValues) = 0;
+
+        friend class AttributeFile;
+    };
+    template 
+    class FixedRecord : public Record
+    {
+    public:
+        virtual size_t getValueCount() const { return _data.size(); }
+    private:
+        virtual void
+        setValue(const void * v, size_t len) {
+            assert(len == sizeof(T));
+            (void) len;
+            _data.resize(1);
+            _weight.clear();
+            _data[0] = * static_cast(v);
+        }
+
+        virtual bool onWrite(AttributeFile & dest) const;
+        virtual bool onRead(AttributeFile & src, size_t numValues);
+
+        std::vector       _data;
+    };
+
+    class VariableRecord : public Record
+    {
+    public:
+        virtual size_t
+        getValueCount() const;
+    private:
+        virtual void setValue(const void * v, size_t len);
+        virtual bool onWrite(AttributeFile & dest) const;
+        virtual bool onRead(AttributeFile & src, size_t numValues);
+        std::vector    _data;
+    };
+protected:
+    typedef attribute::Config Config;
+public:
+    AttributeFile(const vespalib::string & fileName, const Config & config);
+
+    ~AttributeFile(void);
+
+    std::unique_ptr getRecord();
+    bool read(Record & record);
+    bool write(const Record & toWrite);
+    void enableDirectIO();
+protected:
+    void OpenReadOnly();
+    void OpenWriteOnly(const search::common::FileHeaderContext &
+                       fileHeaderContext,
+                       uint32_t docIdLimit);
+    void Close(void);
+    bool seekIdxPos(size_t idxPos);
+private:
+    uint32_t                          _currIdx;
+    std::unique_ptr  _datFile;
+    std::unique_ptr  _idxFile;
+    std::unique_ptr  _weightFile;
+    vespalib::string                  _fileName;
+    Config                            _config;
+    uint32_t                          _datHeaderLen;
+    uint32_t                          _idxHeaderLen;
+    uint32_t                          _weightHeaderLen;
+    uint64_t                          _datFileSize;
+    uint64_t                          _idxFileSize;
+};
+
+class ReadAttributeFile : public AttributeFile
+{
+public:
+    ReadAttributeFile(const vespalib::string &fileName, const Config &config);
+};
+
+class WriteAttributeFile : public AttributeFile
+{
+public:
+    WriteAttributeFile(const vespalib::string &fileName,
+                       const Config &config,
+                       const search::common::FileHeaderContext &
+                       fileHeaderContext,
+                       uint32_t docIdLimit);
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
new file mode 100644
index 00000000000..15eceae889d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.cpp
@@ -0,0 +1,48 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributefilebufferwriter.h"
+
+namespace search
+{
+
+AttributeFileBufferWriter::
+AttributeFileBufferWriter(IAttributeFileWriter &fileWriter)
+    : BufferWriter(),
+      _buf(),
+      _bytesWritten(0),
+      _incompleteBuffers(0),
+      _fileWriter(fileWriter)
+{
+    _buf = _fileWriter.allocBuf(BUFFER_SIZE);
+    assert(_buf->getFreeLen() >= BUFFER_SIZE);
+    setup(_buf->getFree(), BUFFER_SIZE);
+}
+
+
+AttributeFileBufferWriter::~AttributeFileBufferWriter()
+{
+    assert(usedLen() == 0);
+}
+
+
+void
+AttributeFileBufferWriter::flush()
+{
+    assert(_incompleteBuffers == 0); // all previous buffers must have been full
+    size_t nowLen = usedLen();
+    if (nowLen != BUFFER_SIZE) {
+        // buffer is not full, only allowed for last buffer
+        ++_incompleteBuffers;
+    }
+    if (nowLen == 0) {
+        return; // empty buffer
+    }
+    assert(_buf->getDataLen() == 0);
+    onFlush(nowLen);
+    assert(_buf->getFreeLen() >= BUFFER_SIZE);
+    setup(_buf->getFree(), BUFFER_SIZE);
+    _bytesWritten += nowLen;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h
new file mode 100644
index 00000000000..580e4ed9f96
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilebufferwriter.h
@@ -0,0 +1,39 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributefilewriter.h"
+#include 
+
+namespace search
+{
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * IAttributeFileWriter.
+ */
+class AttributeFileBufferWriter : public BufferWriter
+{
+protected:
+    using BufferBuf = IAttributeFileWriter::BufferBuf;
+    using Buffer = IAttributeFileWriter::Buffer;
+    Buffer _buf;
+    size_t _bytesWritten;
+    uint32_t _incompleteBuffers;
+    IAttributeFileWriter &_fileWriter;
+
+    virtual void onFlush(size_t nowLen) = 0;
+public:
+    static constexpr size_t BUFFER_SIZE = 4 * 1024 * 1024;
+
+    AttributeFileBufferWriter(IAttributeFileWriter &fileWriter);
+
+    virtual ~AttributeFileBufferWriter();
+
+    virtual void flush() override;
+
+    size_t getBytesWritten() const { return _bytesWritten; }
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
new file mode 100644
index 00000000000..e5b5b6567f8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.cpp
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.attributefilesavetarget");
+
+#include "attributefilesavetarget.h"
+#include "attributevector.h"
+#include 
+#include 
+#include 
+#include 
+
+using vespalib::getLastErrorString;
+
+namespace search
+{
+
+using common::FileHeaderContext;
+
+
+AttributeFileSaveTarget::
+AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
+                        const FileHeaderContext &fileHeaderContext)
+    : IAttributeSaveTarget(),
+      _datWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+                 "Attribute vector data file"),
+      _idxWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+                 "Attribute vector idx file"),
+      _weightWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+                    "Attribute vector weight file"),
+      _udatWriter(tuneFileAttributes, fileHeaderContext, _cfg,
+                  "Attribute vector unique data file")
+{
+}
+
+
+bool
+AttributeFileSaveTarget::setup()
+{
+    const vespalib::string & baseFileName = _cfg.getFileName();
+    vespalib::string datFileName(baseFileName + ".dat");
+    if (!_datWriter.open(datFileName)) {
+        return false;
+    }
+    if (_cfg.getEnumerated()) {
+        vespalib::string udatFileName(baseFileName + ".udat");
+        if (!_udatWriter.open(udatFileName)) {
+            return false;
+        }
+    }
+    if (_cfg.hasMultiValue()) {
+        vespalib::string idxFileName(baseFileName + ".idx");
+        if (!_idxWriter.open(idxFileName)) {
+            return false;
+        }
+        if (_cfg.hasWeightedSetType()) {
+            vespalib::string weightFileName(baseFileName + ".weight");
+            if (!_weightWriter.open(weightFileName)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+void
+AttributeFileSaveTarget::close()
+{
+    _datWriter.close();
+    _udatWriter.close();
+    _idxWriter.close();
+    _weightWriter.close();
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::datWriter()
+{
+    return _datWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::idxWriter()
+{
+    return _idxWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::weightWriter()
+{
+    return _weightWriter;
+}
+
+IAttributeFileWriter &
+AttributeFileSaveTarget::udatWriter()
+{
+    return _udatWriter;
+}
+
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
new file mode 100644
index 00000000000..9b931be4fb8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilesavetarget.h
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include "attributefilewriter.h"
+
+namespace search
+{
+
+/**
+ * Class used to save an attribute vector to file(s).
+ **/
+class AttributeFileSaveTarget : public IAttributeSaveTarget
+{
+private:
+    AttributeFileWriter _datWriter;
+    AttributeFileWriter _idxWriter;
+    AttributeFileWriter _weightWriter;
+    AttributeFileWriter _udatWriter;
+
+public:
+    AttributeFileSaveTarget(const TuneFileAttributes &tuneFileAttributes,
+                            const search::common::FileHeaderContext &
+                            fileHeaderContext);
+
+    // Implements IAttributeSaveTarget
+    /** Setups this saveTarget by opening the relevant files **/
+    virtual bool setup() override;
+
+    /** Closes the files used **/
+    virtual void close() override;
+
+    virtual IAttributeFileWriter &datWriter() override;
+    virtual IAttributeFileWriter &idxWriter() override;
+    virtual IAttributeFileWriter &weightWriter() override;
+    virtual IAttributeFileWriter &udatWriter() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
new file mode 100644
index 00000000000..d74b7b09c4e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.cpp
@@ -0,0 +1,213 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.attributefilewriter");
+
+#include "attributefilewriter.h"
+#include 
+#include 
+#include 
+#include 
+#include "attributefilebufferwriter.h"
+
+using search::common::FileHeaderContext;
+using vespalib::getLastErrorString;
+
+
+namespace search
+{
+
+namespace
+{
+
+const uint32_t headerAlign = 4096;
+const uint32_t MIN_ALIGNMENT = 4096;
+
+void
+writeDirectIOAligned(FastOS_FileInterface &file, const void *buf,
+                     size_t length)
+{
+    const char * data(static_cast(buf));
+    size_t remaining(length);
+    for (size_t maxChunk(2048*1024); maxChunk >= MIN_ALIGNMENT; maxChunk >>= 1) {
+        for ( ; remaining > maxChunk; remaining -= maxChunk, data += maxChunk) {
+            file.WriteBuf(data, maxChunk);
+        }
+    }
+    if (remaining > 0) {
+        file.WriteBuf(data, remaining);
+    }
+}
+
+
+void
+updateHeader(const vespalib::string &name, uint64_t fileBitSize)
+{
+    vespalib::FileHeader h(headerAlign);
+    FastOS_File f;
+    f.OpenReadWrite(name.c_str());
+    h.readFile(f);
+    FileHeaderContext::setFreezeTime(h);
+    typedef vespalib::GenericHeader::Tag Tag;
+    h.putTag(Tag("frozen", 1));
+    h.putTag(Tag("fileBitSize", fileBitSize));
+    h.rewriteFile(f);
+    f.Sync();
+    f.Close();
+}
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * AttributeFileWriter.
+ */
+class FileBackedBufferWriter : public AttributeFileBufferWriter
+{
+public:
+    FileBackedBufferWriter(AttributeFileWriter &fileWriter);
+
+    virtual ~FileBackedBufferWriter();
+
+    virtual void onFlush(size_t nowLen) override;
+};
+
+
+FileBackedBufferWriter::FileBackedBufferWriter(AttributeFileWriter &fileWriter)
+    : AttributeFileBufferWriter(fileWriter)
+{
+}
+
+
+FileBackedBufferWriter::~FileBackedBufferWriter()
+{
+}
+
+
+void
+FileBackedBufferWriter::onFlush(size_t nowLen) {
+    // Note: Must use const ptr to indicate that buffer is pre-filled.
+    Buffer buf(std::make_unique
+               ((const char *) _buf->getFree(), nowLen));
+    assert(buf->getDataLen() == nowLen);
+    assert(buf->getData() == _buf->getFree());
+    _fileWriter.writeBuf(std::move(buf));
+}
+
+}
+
+
+AttributeFileWriter::
+AttributeFileWriter(const TuneFileAttributes &tuneFileAttributes,
+                    const FileHeaderContext &fileHeaderContext,
+                    const IAttributeSaveTarget::Config &cfg,
+                    const vespalib::string &desc)
+    : _tuneFileAttributes(tuneFileAttributes),
+      _fileHeaderContext(fileHeaderContext),
+      _cfg(cfg),
+      _desc(desc),
+      _fileBitSize(0)
+{
+}
+
+
+AttributeFileWriter::~AttributeFileWriter()
+{
+}
+
+
+bool
+AttributeFileWriter::open(const vespalib::string &fileName)
+{
+    if (_tuneFileAttributes._write.getWantSyncWrites()) {
+        _file.EnableSyncWrites();
+    }
+    if (_tuneFileAttributes._write.getWantDirectIO()) {
+        _file.EnableDirectIO();
+    }
+    _file.OpenWriteOnlyTruncate(fileName.c_str());
+    if (!_file.IsOpened()) {
+        LOG(error, "Could not open attribute vector '%s' for writing: %s",
+            fileName.c_str(), getLastErrorString().c_str());
+        return false;
+    }
+    writeHeader();
+    return true;
+}
+
+
+void
+AttributeFileWriter::writeHeader()
+{
+    vespalib::FileHeader header(headerAlign);
+    _fileHeaderContext.addTags(header, _file.GetFileName());
+    addTags(header);
+    size_t headerLen = header.writeFile(_file);
+    assert((headerLen % MIN_ALIGNMENT) == 0);
+    _fileBitSize = headerLen * 8;
+}
+
+
+void
+AttributeFileWriter::addTags(vespalib::GenericHeader &header)
+{
+    typedef vespalib::GenericHeader::Tag Tag;
+    header.putTag(Tag("datatype", _cfg.getBasicType()));
+    header.putTag(Tag("collectiontype", _cfg.getCollectionType()));
+    header.putTag(Tag("uniqueValueCount", _cfg.getUniqueValueCount()));
+    header.putTag(Tag("totalValueCount", _cfg.getTotalValueCount()));
+    header.putTag(Tag("docIdLimit", _cfg.getNumDocs()));
+    header.putTag(Tag("frozen", 0));
+    header.putTag(Tag("fileBitSize", 0));
+    header.putTag(Tag("version", _cfg.getVersion()));
+    if (_cfg.getEnumerated()) {
+        header.putTag(Tag("enumerated", 1));
+    }
+    uint64_t createSerialNum = _cfg.getCreateSerialNum();
+    if (createSerialNum != 0u) {
+        header.putTag(Tag("createSerialNum", createSerialNum));
+    }
+    const vespalib::string &tensorType = _cfg.getTensorType();
+    if (!tensorType.empty()) {
+        header.putTag(Tag("tensortype", tensorType));;
+    }
+    header.putTag(Tag("desc", _desc));
+}
+
+
+AttributeFileWriter::Buffer
+AttributeFileWriter::allocBuf(size_t size)
+{
+    return std::make_unique(size, MIN_ALIGNMENT);
+}
+
+
+void
+AttributeFileWriter::writeBuf(Buffer buf)
+{
+    size_t bufLen = buf->getDataLen();
+    // TODO: pad to DirectIO boundary when burning bridges
+    writeDirectIOAligned(_file, buf->getData(), bufLen);
+    _fileBitSize += bufLen * 8;
+}
+
+
+void
+AttributeFileWriter::close()
+{
+    if (_file.IsOpened()) {
+        _file.Sync();
+        _file.Close();
+        updateHeader(_file.GetFileName(), _fileBitSize);
+    }
+}
+
+
+std::unique_ptr
+AttributeFileWriter::allocBufferWriter()
+{
+    return std::make_unique(*this);
+}
+
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h
new file mode 100644
index 00000000000..fa0fd22b837
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributefilewriter.h
@@ -0,0 +1,58 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include "iattributefilewriter.h"
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+class TuneFileAttributes;
+
+/*
+ * Class to write to a single attribute vector file. Used by
+ * AttributeFileSaveTarget.
+ */
+class AttributeFileWriter : public IAttributeFileWriter
+{
+    FastOS_File _file;
+    const TuneFileAttributes &_tuneFileAttributes;
+    const search::common::FileHeaderContext &_fileHeaderContext;
+    const IAttributeSaveTarget::Config &_cfg;
+    vespalib::string _desc;
+    uint64_t _fileBitSize;
+
+    void addTags(vespalib::GenericHeader &header);
+
+    void writeHeader();
+public:
+    AttributeFileWriter(const TuneFileAttributes &tuneFileAttributes,
+                        const search::common::FileHeaderContext &
+                        fileHeaderContext,
+                        const IAttributeSaveTarget::Config &cfg,
+                        const vespalib::string &desc);
+    ~AttributeFileWriter();
+    virtual Buffer allocBuf(size_t size) override;
+    virtual void writeBuf(Buffer buf) override;
+    virtual std::unique_ptr allocBufferWriter() override;
+    bool open(const vespalib::string &fileName);
+    void close();
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp b/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp
new file mode 100644
index 00000000000..11c473ed53f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeguard.cpp
@@ -0,0 +1,41 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributeguard.h"
+
+namespace search {
+
+AttributeGuard::AttributeGuard() :
+    ComponentGuard()
+{
+}
+
+AttributeGuard::AttributeGuard(const AttributeVector::SP & attr) :
+    ComponentGuard(attr)
+{
+}
+
+AttributeEnumGuard::AttributeEnumGuard(const AttributeVector::SP & attr) :
+    AttributeGuard(attr),
+    _lock()
+{
+    takeLock();
+}
+
+AttributeEnumGuard::AttributeEnumGuard(const AttributeGuard & attr) :
+    AttributeGuard(attr),
+    _lock()
+{
+    takeLock();
+}
+
+void AttributeEnumGuard::takeLock() {
+    if (valid()) {
+        std::shared_lock take(get().getEnumLock(),
+                                                       std::defer_lock);
+        _lock = std::move(take);
+        _lock.lock();
+    }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeguard.h b/searchlib/src/vespa/searchlib/attribute/attributeguard.h
new file mode 100644
index 00000000000..42eb381a9d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeguard.h
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+
+namespace search {
+
+/**
+ * General class for guarding a component that is using an underlying generation handler.
+ **/
+template 
+class ComponentGuard
+{
+private:
+    typename T::SP           _component;
+    typedef vespalib::GenerationHandler::Guard Guard;
+    Guard _generationGuard;
+public:
+    ComponentGuard();
+    virtual ~ComponentGuard() { }
+    /**
+     * Creates a guard for the shared pointer of the given component.
+     **/
+    ComponentGuard(const typename T::SP & component);
+    const T & get()          const { return *_component; }
+
+    const typename T::SP & getSP(void) const { return _component; }
+    const T * operator -> () const { return _component.get(); }
+    const T & operator * ()  const { return *_component.get(); }
+    T & get()                      { return *_component; }
+    T * operator -> ()             { return _component.get(); }
+    T & operator * ()              { return *_component.get(); }
+    bool valid()             const { return _component.get() != NULL; }
+};
+
+template 
+ComponentGuard::ComponentGuard() :
+    _component(),
+    _generationGuard()
+{
+}
+
+template 
+ComponentGuard::ComponentGuard(const typename T::SP & component) :
+    _component(component),
+    _generationGuard(valid() ? _component->takeGenerationGuard() : Guard())
+{
+}
+
+/**
+ * This class makes sure that you will have a consistent view per document in the attribute vector
+ * while the guard is held.
+ **/
+class AttributeGuard : public ComponentGuard
+{
+public:
+    typedef std::unique_ptr UP;
+    typedef std::shared_ptr SP;
+    AttributeGuard();
+    AttributeGuard(const AttributeVector::SP & attribute);
+};
+
+/**
+ * This class makes sure that the attribute vector is not updated with enum changes while the guard is held.
+ **/
+class AttributeEnumGuard : public AttributeGuard, public boost::noncopyable
+{
+public:
+    explicit AttributeEnumGuard(const AttributeVector::SP & attribute);
+    explicit AttributeEnumGuard(const AttributeGuard & attribute);
+private:
+    mutable std::shared_lock _lock;
+    void takeLock();
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp
new file mode 100644
index 00000000000..ff0b5d4514a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.cpp
@@ -0,0 +1,237 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include "attributeiterators.h"
+#include "attributeiterators.hpp"
+#include "postinglistattribute.h"
+
+namespace search {
+
+using queryeval::MinMaxPostingInfo;
+using fef::TermFieldMatchData;
+
+AttributeIteratorBase::AttributeIteratorBase(TermFieldMatchData * matchData) :
+    _matchData(matchData),
+    _matchPosition(NULL)
+{
+    fef::TermFieldMatchDataPosition pos;
+    _matchData->appendPosition(pos);
+    _matchPosition = _matchData->getPositions();
+}
+
+void
+AttributeIteratorBase::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    SearchIterator::visitMembers(visitor);
+    visit(visitor, "tfmd.fieldId", _matchData->getFieldId());
+    visit(visitor, "tfmd.docId", _matchData->getDocId());
+}
+
+void
+FilterAttributeIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AttributeIteratorBase::visitMembers(visitor);
+    visit(visitor, "docIdLimit", _docIdLimit);
+}
+
+void
+AttributeIterator::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AttributeIteratorBase::visitMembers(visitor);
+    visit(visitor, "docIdLimit", _docIdLimit);
+    visit(visitor, "weight", _weight);
+}
+
+
+void
+FlagAttributeIterator::doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+}
+
+AttributePostingListIterator::
+    AttributePostingListIterator(bool hasWeight,
+                             TermFieldMatchData *matchData)
+    : AttributeIteratorBase(matchData),
+      _hasWeight(hasWeight)
+      // _hasWeight(_searchContext.attribute().hasWeightedSetType())
+{
+}
+
+FilterAttributePostingListIterator::
+FilterAttributePostingListIterator(TermFieldMatchData *matchData)
+    : AttributeIteratorBase(matchData)
+{
+}
+
+void
+AttributeIterator::doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+    _matchPosition->setElementWeight(_weight);
+}
+
+
+void
+FilterAttributeIterator::doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+}
+
+template <>
+void
+AttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+    _matchPosition->setElementWeight(getWeight());
+}
+
+
+template <>
+void
+AttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+    _matchPosition->setElementWeight(getWeight());
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+}
+
+
+template <>
+void
+AttributePostingListIteratorT::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        const btree::MinMaxAggregated &a(_iterator.getAggregated());
+        _postingInfo = MinMaxPostingInfo(a.getMin(), a.getMax());
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT >::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+AttributePostingListIteratorT >::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        const btree::MinMaxAggregated a(_iterator.getAggregated());
+        _postingInfo = MinMaxPostingInfo(a.getMin(), a.getMax());
+        _postingInfoValid = true;
+    }
+}
+
+template <>
+void
+FilterAttributePostingListIteratorT::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT >::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+template <>
+void
+FilterAttributePostingListIteratorT >::
+setupPostingInfo(void)
+{
+    if (_iterator.valid()) {
+        _postingInfo = MinMaxPostingInfo(1, 1);
+        _postingInfoValid = true;
+    }
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.h b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h
new file mode 100644
index 00000000000..e2cf6a96e49
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.h
@@ -0,0 +1,567 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+#include "attributevector.h"
+#include 
+#include 
+#include 
+#include "dociditerator.h"
+
+namespace search {
+
+/**
+ * Abstract superclass for all attribute iterators with convenience function
+ * for getting the type of the iterator (used for testing).
+ **/
+class AttributeIteratorBase : public queryeval::SearchIterator
+{
+protected:
+    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+    fef::TermFieldMatchData * _matchData;
+    fef::TermFieldMatchDataPosition * _matchPosition;
+
+public:
+    AttributeIteratorBase(fef::TermFieldMatchData * matchData);
+    Trinary is_strict() const override { return Trinary::False; }
+};
+
+
+/**
+ * This class acts as an iterator over documents that are results for
+ * the subquery represented by the search context object associated
+ * with this iterator.  The search context object contains an
+ * attribute vector that does not use posting lists.
+ *
+ * @param SC the specialized search context type associated with this iterator
+ */
+
+class AttributeIterator : public AttributeIteratorBase
+{
+public:
+    AttributeIterator(fef::TermFieldMatchData * matchData, uint32_t docIdLimit)
+        : AttributeIteratorBase(matchData),
+          _docIdLimit(docIdLimit),
+          _weight(1)
+    {
+    }
+protected:
+    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+    void doUnpack(uint32_t docId) override;
+    uint32_t   _docIdLimit;
+    int32_t    _weight;
+};
+
+class FilterAttributeIterator : public AttributeIteratorBase
+{
+public:
+    FilterAttributeIterator(fef::TermFieldMatchData * matchData, uint32_t docIdLimit)
+        : AttributeIteratorBase(matchData),
+          _docIdLimit(docIdLimit)
+    {
+        _matchPosition->setElementWeight(1);
+    }
+protected:
+    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+    void doUnpack(uint32_t docId) override;
+    uint32_t   _docIdLimit;
+};
+
+template 
+class AttributeIteratorT : public AttributeIterator
+{
+private:
+    void doSeek(uint32_t docId) override;
+    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+protected:
+    const SC & _searchContext;
+
+public:
+    AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData);
+    bool seekFast(uint32_t docId) const { return _searchContext.cmp(docId); }
+};
+
+
+template 
+class FilterAttributeIteratorT : public FilterAttributeIterator
+{
+private:
+    void doSeek(uint32_t docId) override;
+    void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+
+protected:
+    const SC & _searchContext;
+
+public:
+    FilterAttributeIteratorT(const SC &searchContext,
+                             fef::TermFieldMatchData *matchData);
+    bool seekFast(uint32_t docId) const { return _searchContext.cmp(docId); }
+};
+
+
+/**
+ * This class acts as a strict iterator over documents that are
+ * results for the subquery represented by the search context object
+ * associated with this iterator.  The search context object contains
+ * an attribute vector that does not use posting lists.
+ *
+ * @param SC the specialized search context type associated with this iterator
+ */
+template 
+class AttributeIteratorStrict : public AttributeIteratorT
+{
+private:
+    using AttributeIteratorT::_docIdLimit;
+    using AttributeIteratorT::_searchContext;
+    using AttributeIteratorT::setDocId;
+    using AttributeIteratorT::setAtEnd;
+    using AttributeIteratorT::_weight;
+    using Trinary=vespalib::Trinary;
+    void doSeek(uint32_t docId) override;
+    Trinary is_strict() const override { return Trinary::True; }
+public:
+    AttributeIteratorStrict(const SC &searchContext, fef::TermFieldMatchData * matchData)
+        : AttributeIteratorT(searchContext, matchData)
+    {
+    }
+};
+
+
+template 
+class FilterAttributeIteratorStrict : public FilterAttributeIteratorT
+{
+private:
+    using FilterAttributeIteratorT::_docIdLimit;
+    using FilterAttributeIteratorT::_searchContext;
+    using FilterAttributeIteratorT::setDocId;
+    using FilterAttributeIteratorT::setAtEnd;
+    using Trinary=vespalib::Trinary;
+    void doSeek(uint32_t docId) override;
+    Trinary is_strict() const override { return Trinary::True; }
+public:
+    FilterAttributeIteratorStrict(const SC &searchContext, fef::TermFieldMatchData * matchData)
+        : FilterAttributeIteratorT(searchContext, matchData)
+    {
+    }
+};
+
+
+template 
+void
+AttributeIteratorT::doSeek(uint32_t docId)
+{
+    if (__builtin_expect(docId >= _docIdLimit, false)) {
+        setAtEnd();
+    } else if (_searchContext.cmp(docId, _weight)) {
+        setDocId(docId);
+    }
+}
+
+template 
+void
+FilterAttributeIteratorT::doSeek(uint32_t docId)
+{
+    if (__builtin_expect(docId >= _docIdLimit, false)) {
+        setAtEnd();
+    } else if (_searchContext.cmp(docId)) {
+        setDocId(docId);
+    }
+}
+
+template 
+void
+AttributeIteratorStrict::doSeek(uint32_t docId)
+{
+    for (uint32_t nextId = docId; nextId < _docIdLimit; ++nextId) {
+        if (_searchContext.cmp(nextId, _weight)) {
+            setDocId(nextId);
+            return;
+        }
+    }
+    setAtEnd();
+}
+
+template 
+void
+FilterAttributeIteratorStrict::doSeek(uint32_t docId)
+{
+    for (uint32_t nextId = docId; nextId < _docIdLimit; ++nextId) {
+        if (_searchContext.cmp(nextId)) {
+            setDocId(nextId);
+            return;
+        }
+    }
+    setAtEnd();
+}
+
+template 
+void
+AttributeIteratorT::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    AttributeIterator::visitMembers(visitor);
+    visit(visitor, "searchcontext.attribute", _searchContext.attribute().getName());
+    visit(visitor, "searchcontext.queryterm", _searchContext.queryTerm());
+}
+
+template 
+void
+FilterAttributeIteratorT::visitMembers(vespalib::ObjectVisitor &visitor) const
+{
+    FilterAttributeIterator::visitMembers(visitor);
+    visit(visitor, "searchcontext.attribute", _searchContext.attribute().getName());
+    visit(visitor, "searchcontext.queryterm", _searchContext.queryTerm());
+}
+
+template 
+AttributeIteratorT::AttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData)
+    : AttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()),
+      _searchContext(searchContext)
+{
+}
+
+
+template 
+FilterAttributeIteratorT::FilterAttributeIteratorT(const SC &searchContext, fef::TermFieldMatchData *matchData)
+    : FilterAttributeIterator(matchData, searchContext._attr.getCommittedDocIdLimit()),
+      _searchContext(searchContext)
+{
+}
+
+
+/**
+ * This class acts as an iterator over documents that are results for
+ * the subquery represented by the search context object associated
+ * with this iterator.  The search context object contains an
+ * attribute vector that uses underlying posting lists, and the search
+ * context will setup a posting list iterator which is used by this
+ * class.  This iterator is always strict.
+ *
+ * @param PL the posting list iterator type to work as an iterator over
+ */
+class AttributePostingListIterator : public AttributeIteratorBase
+{
+public:
+    AttributePostingListIterator(bool hasWeight, fef::TermFieldMatchData *matchData);
+    Trinary is_strict() const override { return Trinary::True; }
+protected:
+    bool  _hasWeight;
+};
+
+
+class FilterAttributePostingListIterator : public AttributeIteratorBase
+{
+public:
+    FilterAttributePostingListIterator(fef::TermFieldMatchData *matchData);
+    Trinary is_strict() const override { return Trinary::True; }
+};
+
+
+typedef btree::BTreeConstIterator,
+                                  btree::BTreeDefaultTraits>
+InnerAttributePostingListIterator;
+
+typedef btree::BTreeConstIterator,
+                                  btree::BTreeDefaultTraits>
+WeightedInnerAttributePostingListIterator; 
+
+template 
+class AttributePostingListIteratorT : public AttributePostingListIterator
+{
+private:
+    PL                                     _iterator;
+    queryeval::MinMaxPostingInfo           _postingInfo;
+    bool                                   _postingInfoValid;
+
+    void doSeek(uint32_t docId) override;
+    void doUnpack(uint32_t docId) override;
+    void setupPostingInfo() { }
+    int32_t getWeight() { return _iterator.getData(); }
+
+    const queryeval::PostingInfo * getPostingInfo() const override {
+        return _postingInfoValid ? &_postingInfo : NULL;
+    }
+
+    void initRange(uint32_t begin, uint32_t end) override {
+        AttributePostingListIterator::initRange(begin, end);
+        _iterator.lower_bound(begin);
+        if (!_iterator.valid() || isAtEnd(_iterator.getKey())) {
+            setAtEnd();
+        } else {
+            setDocId(_iterator.getKey());
+        }
+    }
+
+public:
+    // Note: iterator constructor argument is destroyed
+    AttributePostingListIteratorT(PL &iterator,
+                                  bool hasWeight,
+                                  fef::TermFieldMatchData *matchData);
+};
+
+template 
+class FilterAttributePostingListIteratorT
+    : public FilterAttributePostingListIterator
+{
+private:
+    PL                                     _iterator;
+    queryeval::MinMaxPostingInfo           _postingInfo;
+    bool                                   _postingInfoValid;
+
+    void doSeek(uint32_t docId) override;
+    void doUnpack(uint32_t docId) override;
+    void setupPostingInfo() { }
+
+    const queryeval::PostingInfo * getPostingInfo() const override {
+        return _postingInfoValid ? &_postingInfo : NULL;
+    }
+    
+    void initRange(uint32_t begin, uint32_t end) override {
+        FilterAttributePostingListIterator::initRange(begin, end);
+        _iterator.lower_bound(begin);
+        if (!_iterator.valid() || isAtEnd(_iterator.getKey())) {
+            setAtEnd();
+        } else {
+            setDocId(_iterator.getKey());
+        }
+    }
+
+public:
+    // Note: iterator constructor argument is destroyed
+    FilterAttributePostingListIteratorT(PL &iterator,
+                                        fef::TermFieldMatchData *matchData);
+};
+
+
+template <>
+inline int32_t
+AttributePostingListIteratorT<
+    btree::BTreeConstIterator,
+                              btree::BTreeDefaultTraits> >::
+getWeight()
+{
+    return 1;	// default weight 1 for single value attributes
+}
+
+template <>
+void
+AttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId);
+
+
+template <>
+void
+AttributePostingListIteratorT,
+                   btree::BTreeDefaultTraits> >::
+doUnpack(uint32_t docId);
+
+
+template <>
+void
+AttributePostingListIteratorT::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT >::
+setupPostingInfo();
+
+
+template <>
+void
+AttributePostingListIteratorT >::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT >::
+setupPostingInfo();
+
+
+template <>
+void
+FilterAttributePostingListIteratorT >::
+setupPostingInfo();
+
+
+template 
+AttributePostingListIteratorT::
+AttributePostingListIteratorT(PL &iterator,
+                              bool hasWeight,
+                              fef::TermFieldMatchData *matchData)
+    : AttributePostingListIterator(hasWeight, matchData),
+      _iterator(),
+      _postingInfo(1, 1),
+      _postingInfoValid(false)
+{
+    _iterator.swap(iterator);
+    setupPostingInfo();
+}
+
+
+template 
+FilterAttributePostingListIteratorT::
+FilterAttributePostingListIteratorT(PL &iterator,
+                              fef::TermFieldMatchData *matchData)
+    : FilterAttributePostingListIterator(matchData),
+      _iterator(),
+      _postingInfo(1, 1),
+      _postingInfoValid(false)
+{
+    _iterator.swap(iterator);
+    setupPostingInfo();
+    _matchPosition->setElementWeight(1);
+}
+
+/**
+ * This class acts as an iterator over a flag attribute.
+ */
+class FlagAttributeIterator : public AttributeIteratorBase
+{
+public:
+    FlagAttributeIterator(fef::TermFieldMatchData * matchData)
+        : AttributeIteratorBase(matchData)
+    {
+    }
+protected:
+    void doUnpack(uint32_t docId) override;
+};
+
+template 
+class FlagAttributeIteratorT : public FlagAttributeIterator
+{
+private:
+    void doSeek(uint32_t docId) override;
+
+protected:
+    const SC & _sc;
+    uint32_t   _docIdLimit;
+
+public:
+    FlagAttributeIteratorT(const SC &sc, fef::TermFieldMatchData * matchData)
+        : FlagAttributeIterator(matchData),
+          _sc(sc),
+          _docIdLimit(static_cast
+                      (sc.attribute()).getCommittedDocIdLimit())
+    {
+    }
+
+    void initRange(uint32_t begin, uint32_t end) override {
+        FlagAttributeIterator::initRange(begin, end);
+        if ( _sc._zeroHits ) {
+            setAtEnd();
+        }
+    }
+
+};
+
+template 
+class FlagAttributeIteratorStrict : public FlagAttributeIteratorT
+{
+private:
+    using FlagAttributeIteratorT::_docIdLimit;
+    using FlagAttributeIteratorT::_sc;
+    using FlagAttributeIteratorT::setDocId;
+    using FlagAttributeIteratorT::setAtEnd;
+    using Trinary=vespalib::Trinary;
+    void doSeek(uint32_t docId) override;
+    Trinary is_strict() const override { return Trinary::True; }
+
+public:
+    FlagAttributeIteratorStrict(const SC &sc,
+                                fef::TermFieldMatchData *matchData)
+        : FlagAttributeIteratorT(sc, matchData)
+    {
+    }
+};
+
+template 
+void
+FlagAttributeIteratorStrict::doSeek(uint32_t docId)
+{
+    const SC & sc(_sc);
+    const typename SC::Attribute &attr =
+        static_cast(sc.attribute());
+    for (int i = sc._low; (i <= sc._high); ++i) {
+        const BitVector * bv = attr.getBitVector(i);
+        if ((bv != NULL) && docId < _docIdLimit && bv->testBit(docId)) {
+            setDocId(docId);
+            return;
+        }
+    }
+
+    uint32_t minNextBit(search::endDocId);
+    for (int i = sc._low; (i <= sc._high); ++i) {
+        const BitVector * bv = attr.getBitVector(i);
+        if (bv != NULL && docId < _docIdLimit) {
+            uint32_t nextBit = bv->getNextTrueBit(docId);
+            minNextBit = std::min(nextBit, minNextBit);
+        }
+    }
+    if (minNextBit < _docIdLimit) {
+        setDocId(minNextBit);
+    } else {
+        setAtEnd();
+    }
+}
+
+template 
+void
+FlagAttributeIteratorT::doSeek(uint32_t docId)
+{
+    const SC & sc(_sc);
+    const typename SC::Attribute &attr =
+        static_cast(sc.attribute());
+    for (int i = sc._low; (i <= sc._high); ++i) {
+        const BitVector * bv = attr.getBitVector(i);
+        if ((bv != NULL) && docId < _docIdLimit && bv->testBit(docId)) {
+            setDocId(docId);
+            return;
+        }
+    }
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp
new file mode 100644
index 00000000000..1d7448d04b1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributeiterators.hpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+
+namespace search
+{
+
+
+template 
+void
+AttributePostingListIteratorT::doSeek(uint32_t docId)
+{
+    _iterator.linearSeek(docId);
+    if (_iterator.valid()) {
+        setDocId(_iterator.getKey());
+    } else {
+        setAtEnd();
+    }
+}
+
+
+template 
+void
+FilterAttributePostingListIteratorT::doSeek(uint32_t docId)
+{
+    _iterator.linearSeek(docId);
+    if (_iterator.valid()) {
+        setDocId(_iterator.getKey());
+    } else {
+        setAtEnd();
+    }
+}
+
+
+template 
+void
+AttributePostingListIteratorT::doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+
+    if (_hasWeight) {
+        _matchPosition->setElementWeight(getWeight());
+    } else {
+        uint32_t numOccs(0);
+        for(; _iterator.valid() && (_iterator.getKey() == docId); numOccs += getWeight(), ++_iterator);
+        _matchPosition->setElementWeight(numOccs);
+    }
+}
+
+
+template 
+void
+FilterAttributePostingListIteratorT::doUnpack(uint32_t docId)
+{
+    _matchData->resetOnlyDocId(docId);
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp b/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp
new file mode 100644
index 00000000000..43bf6946feb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributemanager.cpp
@@ -0,0 +1,279 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributemanager.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "interlock.h"
+
+#include 
+LOG_SETUP(".searchlib.attributemanager");
+
+using vespalib::LockGuard;
+using vespalib::string;
+using vespalib::IllegalStateException;
+using search::attribute::IAttributeContext;
+
+namespace
+{
+
+vespalib::Monitor baseDirMonitor("attributemanagerbasedir", false);
+typedef std::set BaseDirSet;
+BaseDirSet baseDirSet;
+
+static void
+waitBaseDir(const string &baseDir)
+{
+    if (baseDir.empty())
+        return;
+    vespalib::MonitorGuard guard(baseDirMonitor);
+    bool waited = false;
+
+    BaseDirSet::iterator it = baseDirSet.find(baseDir);
+    while (it != baseDirSet.end()) {
+        if (!waited) {
+            waited = true;
+            LOG(debug,
+                "AttributeManager: Waiting for basedir %s to be available",
+                baseDir.c_str());
+        }
+        guard.wait();
+        it = baseDirSet.find(baseDir);
+    }
+
+    baseDirSet.insert(baseDir);
+    if (waited)
+        LOG(debug,
+            "AttributeManager: basedir %s available",
+            baseDir.c_str());
+}
+
+
+static void
+dropBaseDir(const string &baseDir)
+{
+    if (baseDir.empty())
+        return;
+    vespalib::MonitorGuard guard(baseDirMonitor);
+
+    BaseDirSet::iterator it = baseDirSet.find(baseDir);
+    if (it == baseDirSet.end()) {
+        LOG(error,
+            "AttributeManager: Cannot drop basedir %s, already dropped",
+            baseDir.c_str());
+    } else
+        baseDirSet.erase(it);
+    guard.broadcast();
+}
+
+
+}
+
+namespace search {
+
+AttributeManager::AttributeManager()
+    : _attributes(),
+      _loadLock(),
+      _baseDir(),
+      _snapShot(),
+      _interlock(std::make_shared())
+{
+    LOG(debug,
+        "New attributeManager %p",
+        static_cast(this));
+}
+
+
+AttributeManager::AttributeManager(const string & baseDir)
+    :  _attributes(),
+       _loadLock(),
+       _baseDir(baseDir),
+       _snapShot(),
+       _interlock(std::make_shared())
+{
+    LOG(debug,
+        "New attributeManager %p, baseDir %s",
+        static_cast(this),
+        baseDir.c_str());
+    waitBaseDir(baseDir);
+}
+
+
+void
+AttributeManager::setBaseDir(const string & base)
+{
+    dropBaseDir(_baseDir);
+    _baseDir = base;
+    LOG(debug,
+        "attributeManager %p new baseDir %s",
+        static_cast(this),
+        _baseDir.c_str());
+    waitBaseDir(base);
+}
+
+
+AttributeManager::~AttributeManager(void)
+{
+    _attributes.clear();
+    LOG(debug,
+        "delete attributeManager %p baseDir %s",
+        static_cast(this),
+        _baseDir.c_str());
+    dropBaseDir(_baseDir);
+}
+
+
+uint64_t AttributeManager::getMemoryFootprint() const
+{
+    uint64_t sum(0);
+    for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+        sum += it->second->getStatus().getAllocated();
+    }
+
+    return sum;
+}
+
+bool AttributeManager::hasReaders() const
+{
+    for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+        if (it->second->hasReaders())
+            return true;
+    }
+
+    return false;
+}
+
+const AttributeManager::VectorHolder *
+AttributeManager::findAndLoadAttribute(const string & name) const
+{
+    const VectorHolder * loadedVector(NULL);
+    AttributeMap::const_iterator found = _attributes.find(name);
+    if (found != _attributes.end()) {
+        AttributeVector & vec = *found->second;
+        if ( ! vec.isLoaded() ) {
+            vespalib::LockGuard loadGuard(_loadLock);
+            if ( ! vec.isLoaded() ) {
+                vec.load();
+            } else {
+                LOG(debug, "Multi load of %s prevented by double checked locking.", vec.getBaseFileName().c_str());
+            }
+        }
+        loadedVector = & found->second;
+    }
+    return loadedVector;
+}
+
+
+const AttributeManager::VectorHolder *
+AttributeManager::getAttributeRef(const string & name) const
+{
+    return findAndLoadAttribute(name);
+}
+
+AttributeGuard::UP
+AttributeManager::getAttribute(const string & name) const
+{
+    AttributeGuard::UP attrGuard(new AttributeGuard(VectorHolder()));
+    const VectorHolder * vh = findAndLoadAttribute(name);
+    if ( vh != NULL ) {
+        attrGuard.reset(new AttributeGuard(*vh));
+    }
+    return attrGuard;
+}
+
+AttributeGuard::UP
+AttributeManager::getAttributeStableEnum(const string & name) const
+{
+    AttributeGuard::UP attrGuard(new AttributeEnumGuard(VectorHolder()));
+    const VectorHolder * vh = findAndLoadAttribute(name);
+    if ( vh != NULL ) {
+        attrGuard.reset(new AttributeEnumGuard(*vh));
+    }
+    return attrGuard;
+}
+
+bool
+AttributeManager::add(const AttributeManager::VectorHolder & vector)
+{
+    bool retval(true);
+    AttributeMap::iterator found = _attributes.find(vector->getName());
+    if (found == _attributes.end()) {
+        vector->setInterlock(_interlock);
+        _attributes[vector->getName()] = vector;
+        retval = true;
+    }
+    return retval;
+}
+
+void
+AttributeManager::getAttributeList(AttributeList & list) const
+{
+    list.reserve(_attributes.size());
+    for(AttributeMap::const_iterator it(_attributes.begin()), mt(_attributes.end()); it != mt; it++) {
+        list.push_back(AttributeGuard(it->second));
+    }
+}
+
+IAttributeContext::UP
+AttributeManager::createContext() const
+{
+    return IAttributeContext::UP(new AttributeContext(*this));
+}
+
+string
+AttributeManager::createBaseFileName(const string & name, bool useSnapshot) const
+{
+    return AttributeVector::BaseName(getBaseDir(), useSnapshot ? getSnapshot().dirName : "", name);
+}
+
+bool
+AttributeManager::addVector(const string & name, const Config & config)
+{
+    bool retval = false;
+    AttributeGuard::UP vector_owner(getAttribute(name));
+    AttributeGuard &vector(*vector_owner);
+
+    if (vector.valid()) {
+        if ((vector->getInternalBasicType() == config.basicType())  &&
+            (vector->getInternalCollectionType() == config.collectionType()))
+        {
+            retval = true;
+        } else {
+            LOG(error, "Attribute Vector '%s' has type conflict", name.c_str());
+        }
+    } else {
+        AttributeMap::iterator found = _attributes.find(name);
+        if (found != _attributes.end()) {
+            const VectorHolder & vh(found->second);
+            if ( vh.get() &&
+                 (vh->getInternalBasicType() == config.basicType()) &&
+                 (vh->getInternalCollectionType() == config.collectionType()))
+            {
+                retval = true;
+            }
+        }
+        if (! retval ) {
+            string baseFileName = createBaseFileName(name, true);
+            VectorHolder vh(AttributeFactory::createAttribute(baseFileName, config));
+            assert(vh.get());
+            if (vh->load()) {
+                assert(vh->getInternalBasicType() == config.basicType());
+                assert(vh->getInternalCollectionType() == config.collectionType());
+                retval = add(vh);
+            } else {
+                retval = add(vh);
+            }
+        }
+    }
+    return retval;
+}
+
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributemanager.h b/searchlib/src/vespa/searchlib/attribute/attributemanager.h
new file mode 100644
index 00000000000..6e166fe0835
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributemanager.h
@@ -0,0 +1,73 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+/**
+ * You use the attribute manager to get access to attributes. You must specify what kind
+ * of access you want to have.
+ **/
+class AttributeManager : public IAttributeManager
+{
+private:
+    typedef attribute::Config Config;
+public:
+    typedef std::vector StringVector;
+    typedef search::IndexMetaInfo::Snapshot Snapshot;
+    typedef std::vector AttributeList;
+    typedef AttributeVector::SP VectorHolder;
+    AttributeManager();
+    AttributeManager(const string & base);
+    ~AttributeManager(void);
+
+    /**
+     * This will give you a handle to an attributevector. It
+     * guarantees that backed attribute is valid.  But no guarantees
+     * about the content of the attribute. If that is required some of
+     * the other getAttributeXX methods must be used.
+     **/
+    const VectorHolder * getAttributeRef(const string & name) const;
+
+    // Implements IAttributeManager
+    virtual AttributeGuard::UP getAttribute(const string & name) const;
+
+    // Implements IAttributeManager
+    virtual AttributeGuard::UP getAttributeStableEnum(const string & name) const;
+    /**
+     * This will load attributes in the most memory economical way by loading largest first.
+     */
+    bool addVector(const string & name, const Config & config);
+
+    bool add(const VectorHolder & vector);
+
+    // Implements IAttributeManager
+    virtual void getAttributeList(AttributeList & list) const;
+
+    // Implements IAttributeManager
+    virtual attribute::IAttributeContext::UP createContext() const;
+
+    const Snapshot & getSnapshot()         const { return _snapShot; }
+    const string & getBaseDir()       const { return _baseDir; }
+    void setSnapshot(const Snapshot &snap)       { _snapShot = snap; }
+    void setBaseDir(const string & base);
+    bool hasReaders(void) const;
+    uint64_t getMemoryFootprint() const;
+protected:
+    typedef vespalib::hash_map AttributeMap;
+    AttributeMap   _attributes;
+    vespalib::Lock _loadLock;
+private:
+    const VectorHolder * findAndLoadAttribute(const string & name) const;
+    string createBaseFileName(const string & name, bool useSnapshot) const;
+    string    _baseDir;
+    Snapshot  _snapShot;
+    std::shared_ptr _interlock;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
new file mode 100644
index 00000000000..534a7d6ff2f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.cpp
@@ -0,0 +1,31 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributememoryfilebufferwriter.h"
+
+namespace search
+{
+
+
+AttributeMemoryFileBufferWriter::
+AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter)
+    : AttributeFileBufferWriter(memoryFileWriter)
+{
+}
+
+
+AttributeMemoryFileBufferWriter::~AttributeMemoryFileBufferWriter()
+{
+}
+
+
+void
+AttributeMemoryFileBufferWriter::onFlush(size_t nowLen)
+{
+    _buf->moveFreeToData(nowLen);
+    assert(_buf->getDataLen() == nowLen);
+    _fileWriter.writeBuf(std::move(_buf));
+    _buf = _fileWriter.allocBuf(BUFFER_SIZE);
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h
new file mode 100644
index 00000000000..c0ef15f571d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilebufferwriter.h
@@ -0,0 +1,25 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributefilebufferwriter.h"
+
+namespace search
+{
+
+/*
+ * BufferWriter implementation that passes full buffers on to
+ * memory variant of IAttributeFileWriter.
+ */
+class AttributeMemoryFileBufferWriter : public AttributeFileBufferWriter
+{
+public:
+    AttributeMemoryFileBufferWriter(IAttributeFileWriter &memoryFileWriter);
+
+    virtual ~AttributeMemoryFileBufferWriter();
+
+    virtual void onFlush(size_t nowSize) override;
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
new file mode 100644
index 00000000000..a9b72350c7e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.cpp
@@ -0,0 +1,60 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributememoryfilewriter.h"
+#include "attributememoryfilebufferwriter.h"
+
+namespace search
+{
+
+namespace
+{
+
+const uint32_t MIN_ALIGNMENT = 4096;
+
+}
+
+AttributeMemoryFileWriter::AttributeMemoryFileWriter()
+    : IAttributeFileWriter(),
+      _bufs()
+{
+}
+
+
+AttributeMemoryFileWriter::~AttributeMemoryFileWriter()
+{
+}
+
+
+AttributeMemoryFileWriter::Buffer
+AttributeMemoryFileWriter::allocBuf(size_t size)
+{
+    return std::make_unique(size, MIN_ALIGNMENT);
+}
+
+
+void
+AttributeMemoryFileWriter::writeBuf(Buffer buf)
+{
+    _bufs.emplace_back(std::move(buf));
+}
+
+
+void
+AttributeMemoryFileWriter::writeTo(IAttributeFileWriter &writer)
+{
+    for (auto &buf : _bufs) {
+        writer.writeBuf(std::move(buf));
+    }
+    _bufs.clear();
+}
+
+
+std::unique_ptr
+AttributeMemoryFileWriter::allocBufferWriter()
+{
+    return std::make_unique(*this);
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h
new file mode 100644
index 00000000000..7afd6c92606
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememoryfilewriter.h
@@ -0,0 +1,27 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributefilewriter.h"
+
+namespace search
+{
+
+/*
+ * Class to write to a memory buffer representation of a single
+ * attribute vector file (without header). Used by AttributeMemorySaveTarget.
+ */
+class AttributeMemoryFileWriter : public IAttributeFileWriter
+{
+    std::vector _bufs;
+public:
+    AttributeMemoryFileWriter();
+    ~AttributeMemoryFileWriter();
+    virtual Buffer allocBuf(size_t size) override;
+    virtual void writeBuf(Buffer buf) override;
+    virtual std::unique_ptr allocBufferWriter() override;
+    void writeTo(IAttributeFileWriter &writer);
+};
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
new file mode 100644
index 00000000000..f1fbfef43aa
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.cpp
@@ -0,0 +1,78 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.attributememorysavetarget");
+
+#include "attributememorysavetarget.h"
+#include "attributefilesavetarget.h"
+#include "attributevector.h"
+
+namespace search
+{
+
+using search::common::FileHeaderContext;
+
+AttributeMemorySaveTarget::AttributeMemorySaveTarget()
+    : _datWriter(),
+      _idxWriter(),
+      _weightWriter(),
+      _udatWriter()
+{
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::datWriter()
+{
+    return _datWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::idxWriter()
+{
+    return _idxWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::weightWriter()
+{
+    return _weightWriter;
+}
+
+
+IAttributeFileWriter &
+AttributeMemorySaveTarget::udatWriter()
+{
+    return _udatWriter;
+}
+
+
+bool
+AttributeMemorySaveTarget::
+writeToFile(const TuneFileAttributes &tuneFileAttributes,
+            const FileHeaderContext &fileHeaderContext)
+{
+    AttributeFileSaveTarget saveTarget(tuneFileAttributes, fileHeaderContext);
+    saveTarget.setConfig(_cfg);
+    if (!saveTarget.setup()) {
+        return false;
+    }
+    _datWriter.writeTo(saveTarget.datWriter());
+    if (_cfg.getEnumerated()) {
+        _udatWriter.writeTo(saveTarget.udatWriter());
+    }
+    if (_cfg.hasMultiValue()) {
+        _idxWriter.writeTo(saveTarget.idxWriter());
+        if (_cfg.hasWeightedSetType()) {
+            _weightWriter.writeTo(saveTarget.weightWriter());
+        }
+    }
+    saveTarget.close();
+    return true;
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
new file mode 100644
index 00000000000..48828039d9e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributememorysavetarget.h
@@ -0,0 +1,54 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "iattributesavetarget.h"
+#include 
+#include 
+#include 
+#include "attributememoryfilewriter.h"
+
+namespace search
+{
+
+namespace common
+{
+
+class FileHeaderContext;
+
+}
+
+class AttributeVector;
+
+/**
+ * Class used to save an attribute vector to memory buffer(s).
+ **/
+class AttributeMemorySaveTarget : public IAttributeSaveTarget
+{
+private:
+    AttributeMemoryFileWriter _datWriter;
+    AttributeMemoryFileWriter _idxWriter;
+    AttributeMemoryFileWriter _weightWriter;
+    AttributeMemoryFileWriter _udatWriter;
+
+public:
+    AttributeMemorySaveTarget();
+
+    /**
+     * Write the underlying buffer(s) to file(s).
+     **/
+    bool
+    writeToFile(const TuneFileAttributes &tuneFileAttributes,
+                const search::common::FileHeaderContext &fileHeaderContext);
+
+    // Implements IAttributeSaveTarget
+    virtual bool setup() override { return true; }
+    virtual void close() override {}
+    virtual IAttributeFileWriter &datWriter() override;
+    virtual IAttributeFileWriter &idxWriter() override;
+    virtual IAttributeFileWriter &weightWriter() override;
+    virtual IAttributeFileWriter &udatWriter() override;
+};
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp
new file mode 100644
index 00000000000..cf8b9cdf1a2
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributesaver.h"
+
+
+using vespalib::GenerationHandler;
+
+namespace search
+{
+
+AttributeSaver::AttributeSaver(GenerationHandler::Guard &&guard,
+                               const IAttributeSaveTarget::Config &cfg)
+    : _guard(std::move(guard)),
+      _cfg(cfg)
+{
+}
+
+
+AttributeSaver::~AttributeSaver()
+{
+}
+
+
+bool
+AttributeSaver::save(IAttributeSaveTarget &saveTarget)
+{
+    saveTarget.setConfig(_cfg);
+    if (!saveTarget.setup()) {
+        return false;
+    }
+    if (!onSave(saveTarget)) {
+        return false;
+    }
+    saveTarget.close();
+    return true;
+}
+
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributesaver.h b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
new file mode 100644
index 00000000000..c398e0726ec
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributesaver.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include "iattributesavetarget.h"
+
+namespace search
+{
+
+/*
+ * Abstract class used to hold data outside attribute vector needed
+ * during a save operation, e.g. copy of data structure without
+ * snapshot property, and guards to protect frozen views on structures
+ * with snapshot properties.
+ */
+class AttributeSaver
+{
+private:
+    vespalib::GenerationHandler::Guard _guard;
+    IAttributeSaveTarget::Config _cfg;
+
+protected:
+    AttributeSaver(vespalib::GenerationHandler::Guard &&guard,
+                         const IAttributeSaveTarget::Config &cfg);
+
+    virtual bool onSave(IAttributeSaveTarget &saveTarget) = 0;
+
+public:
+    virtual ~AttributeSaver();
+
+    bool save(IAttributeSaveTarget &saveTarget);
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
new file mode 100644
index 00000000000..cc223a4fada
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp
@@ -0,0 +1,1110 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "attributevector.h"
+#include "attributevector.hpp"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "ipostinglistsearchcontext.h"
+#include "ipostinglistattributebase.h"
+#include 
+#include "interlock.h"
+#include "attributesaver.h"
+LOG_SETUP(".searchlib.attribute.attributevector");
+
+using vespalib::getLastErrorString;
+
+using document::ValueUpdate;
+using vespalib::make_string;
+using vespalib::Array;
+using vespalib::IllegalStateException;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using search::queryeval::SearchIterator;
+
+namespace {
+
+const vespalib::string enumeratedTag = "enumerated";
+const vespalib::string dataTypeTag = "datatype";
+const vespalib::string collectionTypeTag = "collectiontype";
+const vespalib::string createSerialNumTag = "createSerialNum";
+const vespalib::string versionTag = "version";
+const vespalib::string docIdLimitTag = "docIdLimit";
+
+bool allowEnumeratedLoad = true;
+const size_t DIRECTIO_ALIGNMENT(4096);
+
+bool
+isEnumerated(const vespalib::GenericHeader &header)
+{
+    return header.hasTag(enumeratedTag) &&
+        header.getTag(enumeratedTag).asInteger() != 0;
+}
+
+uint64_t
+extractCreateSerialNum(const vespalib::GenericHeader &header)
+{
+    if (header.hasTag(createSerialNumTag))
+        return header.getTag(createSerialNumTag).asInteger();
+    else
+        return 0u;
+}
+
+template 
+struct FuncMax : public std::binary_function {
+    T operator() (const T & x, const T & y) const {
+        return std::max(x, y);
+    }
+};
+
+}
+
+namespace search {
+
+IMPLEMENT_IDENTIFIABLE_ABSTRACT(AttributeVector, vespalib::Identifiable);
+
+AttributeVector::BaseName::BaseName(const vespalib::stringref &base,
+                                    const vespalib::stringref &snap,
+                                    const vespalib::stringref &name)
+    : string(base),
+      _name(name)
+{
+    if (!empty()) {
+        push_back('/');
+    }
+    if ( ! snap.empty() ) {
+        append(snap);
+        push_back('/');
+    }
+    append(name);
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getIndexName() const
+{
+    // "$VESPA_HOME/var/db/vespa/search/cluster.search/r0/c0/typetest_search/1.ready/attribute/stringfield/snapshot-0/stringfield"
+    string index;
+    size_t snapshotPos(rfind("/snapshot-"));
+    if (snapshotPos == string::npos)
+        return index;
+    size_t attrNamePos(rfind('/', snapshotPos  - 1));
+    if (attrNamePos == string::npos || attrNamePos == 0)
+        return index;
+    size_t attrStrPos(rfind('/', attrNamePos - 1));
+    if (attrStrPos == string::npos || attrStrPos == 0)
+        return index;
+    size_t subDBPos(rfind('/', attrStrPos - 1));
+    if (subDBPos == string::npos || subDBPos == 0)
+        return index;
+    size_t indexNamePos(rfind('/', subDBPos - 1));
+    if (indexNamePos == string::npos)
+        return substr(0, subDBPos);
+    return substr(indexNamePos + 1, subDBPos - indexNamePos - 1);
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getSnapshotName() const
+{
+    string snapShot;
+    size_t p(rfind("snapshot-"));
+    if (p != string::npos) {
+        string fullSnapshot(substr(p));
+        p = fullSnapshot.find('/');
+        if (p != string::npos) {
+            snapShot = fullSnapshot.substr(0, p);
+        }
+    }
+    return snapShot;
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::createAttributeName(const vespalib::stringref & s)
+{
+    size_t p(s.rfind('/'));
+    if (p == string::npos) {
+       return s;
+    } else {
+        return s.substr(p+1);
+    }
+}
+
+
+AttributeVector::BaseName::string
+AttributeVector::BaseName::getDirName() const
+{
+    size_t p = rfind('/');
+    if (p == string::npos) {
+       return "";
+    } else {
+        return substr(0, p);
+    }
+}
+
+
+AttributeVector::ValueModifier::ValueModifier(AttributeVector &attr)
+    : _attr(&attr)
+{
+}
+
+
+AttributeVector::ValueModifier::ValueModifier(const ValueModifier &rhs)
+    : _attr(rhs.stealAttr())
+{
+}
+
+
+AttributeVector::ValueModifier::~ValueModifier()
+{
+    if (_attr) {
+        _attr->incGeneration();
+    }
+}
+
+
+AttributeVector::AttributeVector(const vespalib::stringref &baseFileName,
+                                 const Config &c)
+    : _baseFileName(baseFileName),
+      _config(c),
+      _interlock(std::make_shared()),
+      _enumLock(),
+      _genHandler(),
+      _genHolder(),
+      _status(Status::createName((_baseFileName.getIndexName() +
+                                  (_baseFileName.getSnapshotName().empty() ?
+                                   "" :
+                                   ".") +
+                                  _baseFileName.getSnapshotName()),
+                                 _baseFileName.getAttributeName())),
+      _highestValueCount(1),
+      _enumMax(0),
+      _committedDocIdLimit(0u),
+      _uncommittedDocIdLimit(0u),
+      _createSerialNum(0u),
+      _compactLidSpaceGeneration(0u),
+      _hasEnum(false),
+      _hasSortedEnum(false),
+      _loaded(false),
+      _enableEnumeratedSave(false)
+{
+}
+
+
+AttributeVector::~AttributeVector()
+{
+}
+
+void AttributeVector::updateStat(bool force)
+{
+    if (force) {
+        onUpdateStat();
+    } else if (_nextStatUpdateTime < fastos::ClockSystem::now()) {
+        onUpdateStat();
+        _nextStatUpdateTime = fastos::ClockSystem::now() +
+                              fastos::TimeStamp::SEC;
+    }
+}
+
+
+void
+AttributeVector::commit(bool forceUpdateStat)
+{
+    onCommit();
+    updateCommittedDocIdLimit();
+    updateStat(forceUpdateStat);
+    _loaded = true;
+}
+
+
+void
+AttributeVector::commit(uint64_t firstSyncToken, uint64_t lastSyncToken)
+{
+    if (firstSyncToken < getStatus().getLastSyncToken()) {
+        LOG(error,
+            "Expected first token to be >= %" PRIu64 ", got %" PRIu64 ".",
+            getStatus().getLastSyncToken(), firstSyncToken);
+        abort();
+    }
+    commit();
+    _status.setLastSyncToken(lastSyncToken);
+}
+
+
+bool
+AttributeVector::addDocs(DocId &startDoc, DocId &lastDoc, uint32_t numDocs)
+{
+    if (numDocs != 0) {
+        if (!addDoc(startDoc)) {
+            return false;
+        }
+        lastDoc = startDoc;
+        for (uint32_t i = 1; i < numDocs; ++i) {
+            if (!addDoc(lastDoc)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+
+bool
+AttributeVector::addDocs(uint32_t numDocs)
+{
+    DocId doc;
+    return addDocs(doc, doc, numDocs);
+}
+
+
+void
+AttributeVector::incGeneration()
+{
+    // Freeze trees etc, to stop new readers from accessing currently held data
+    onGenerationChange(_genHandler.getNextGeneration());
+    _genHandler.incGeneration();
+    // Remove old data on hold lists that can no longer be reached by readers
+    removeAllOldGenerations();
+}
+
+
+void
+AttributeVector::updateStatistics(uint64_t numValues,
+                                  uint64_t numUniqueValue,
+                                  uint64_t allocated,
+                                  uint64_t used,
+                                  uint64_t dead,
+                                  uint64_t onHold)
+{
+    _status.updateStatistics(numValues,
+                             numUniqueValue,
+                             allocated,
+                             used,
+                             dead,
+                             onHold);
+}
+
+AddressSpace
+AttributeVector::getEnumStoreAddressSpaceUsage() const
+{
+    return AddressSpaceUsage::defaultEnumStoreUsage();
+}
+
+AddressSpace
+AttributeVector::getMultiValueAddressSpaceUsage() const
+{
+    return AddressSpaceUsage::defaultMultiValueUsage();
+}
+
+AddressSpaceUsage
+AttributeVector::getAddressSpaceUsage() const
+{
+    return AddressSpaceUsage(getEnumStoreAddressSpaceUsage(),
+                             getMultiValueAddressSpaceUsage());
+}
+
+bool
+AttributeVector::headerTypeOK(const vespalib::GenericHeader &header) const
+{
+    return header.hasTag(dataTypeTag) &&
+        header.hasTag(collectionTypeTag) &&
+        header.hasTag(docIdLimitTag) &&
+        header.getTag(dataTypeTag).asString() == 
+        getConfig().basicType().asString() &&
+        header.getTag(collectionTypeTag).asString() == 
+        getConfig().collectionType().asString();
+}
+
+
+std::unique_ptr
+AttributeVector::openFile(const char *suffix)
+{
+    BaseName::string fileName(getBaseFileName());
+    fileName += suffix;
+    return FileUtil::openFile(fileName);
+}
+
+
+std::unique_ptr
+AttributeVector::openDAT()
+{
+    return openFile(".dat");
+}
+
+
+std::unique_ptr
+AttributeVector::openIDX()
+{
+    return openFile(".idx");
+}
+
+
+std::unique_ptr
+AttributeVector::openWeight()
+{
+    return openFile(".weight");
+}
+
+
+std::unique_ptr
+AttributeVector::openUDAT()
+{
+    return openFile(".dat");
+}
+
+
+AttributeVector::ReaderBase::ReaderBase(AttributeVector &attr)
+    : _datFile(attr.openDAT()),
+      _weightFile(attr.hasWeightedSetType() ?
+                  attr.openWeight() : std::unique_ptr()),
+      _idxFile(attr.hasMultiValue() ?
+               attr.openIDX() : std::unique_ptr()),
+      _udatFile(),
+      _weightReader(*_weightFile),
+      _idxReader(*_idxFile),
+      _enumReader(*_datFile),
+      _currIdx(0),
+      _datHeaderLen(0u),
+      _idxHeaderLen(0u),
+      _weightHeaderLen(0u),
+      _udatHeaderLen(0u),
+      _createSerialNum(0u),
+      _fixedWidth(attr.getFixedWidth()),
+      _enumerated(false),
+      _hasLoadData(false),
+      _version(0),
+      _docIdLimit(0),
+      _datHeader(DIRECTIO_ALIGNMENT),
+      _datFileSize(0),
+      _idxFileSize(0)
+{
+    _datHeaderLen = _datHeader.readFile(*_datFile);
+    _datFile->SetPosition(_datHeaderLen);
+    if (!attr.headerTypeOK(_datHeader) ||
+        !extractFileSize(_datHeader, *_datFile, _datFileSize)) {
+        _datFile->Close();
+    }
+    _createSerialNum = extractCreateSerialNum(_datHeader);
+    if (_datHeader.hasTag(versionTag)) {
+        _version = _datHeader.getTag(versionTag).asInteger();
+    }
+    _docIdLimit = _datHeader.getTag(docIdLimitTag).asInteger();
+    if (hasIdx()) {
+        vespalib::FileHeader idxHeader(DIRECTIO_ALIGNMENT);
+        _idxHeaderLen = idxHeader.readFile(*_idxFile);
+        _idxFile->SetPosition(_idxHeaderLen);
+        if (!attr.headerTypeOK(idxHeader) ||
+            !extractFileSize(idxHeader, *_idxFile, _idxFileSize)) {
+            _idxFile->Close();
+        } else  {
+            _currIdx = _idxReader.readHostOrder();
+        }
+    }
+    if (hasWeight()) {
+        vespalib::FileHeader weightHeader(DIRECTIO_ALIGNMENT);
+        _weightHeaderLen = weightHeader.readFile(*_weightFile);
+        _weightFile->SetPosition(_weightHeaderLen);
+        if (!attr.headerTypeOK(weightHeader))
+            _weightFile->Close();
+    }
+    if (hasData() && isEnumerated(_datHeader)) {
+#if 1
+        if (!allowEnumeratedLoad) {
+            /*
+             * Block loading of enumerated attribute vector files until we have
+             * working unit tests in place.
+             */
+            vespalib::string s;
+            s = vespalib::make_string("Attribute vector file '%s' is"
+                                      " enumerated."
+                                      " Install a newer version of vespa that"
+                                      " supports enumerated"
+                                      " attribute vector files, or ask" 
+                                      " vespa team to help "
+                                      " converting attribute vector to "
+                                      " non-enumerated form.",
+                                      _datFile->GetFileName());
+            LOG(error, "%s", s.c_str());
+            throw IllegalStateException(s);
+        }
+#endif
+        _enumerated = true;
+        _udatFile = attr.openUDAT();
+        vespalib::FileHeader udatHeader(DIRECTIO_ALIGNMENT);
+        _udatHeaderLen = udatHeader.readFile(*_udatFile);
+        _udatFile->SetPosition(_udatHeaderLen);
+        if (!attr.headerTypeOK(udatHeader))
+            _udatFile->Close();
+    }
+    _hasLoadData = hasData() &&
+                   (!attr.hasMultiValue() || hasIdx()) &&
+                   (!attr.hasWeightedSetType() || hasWeight()) &&
+                   (!getEnumerated() || hasUData());
+}
+
+
+AttributeVector::ReaderBase::~ReaderBase()
+{
+}
+
+
+bool
+AttributeVector::ReaderBase::
+extractFileSize(const vespalib::GenericHeader &header,
+                FastOS_FileInterface &file, uint64_t &fileSize)
+{
+    fileSize = file.GetSize();
+    return FileSizeCalculator::extractFileSize(header, header.getSize(),
+                                               file.GetFileName(), fileSize);
+}
+
+
+void
+AttributeVector::ReaderBase::rewind()
+{
+    _datFile->SetPosition(_datHeaderLen);
+    _currIdx = 0;
+    if (hasIdx()) {
+        _idxFile->SetPosition(_idxHeaderLen);
+        _currIdx = _idxReader.readHostOrder();
+    }
+    if (hasWeight()) {
+        _weightFile->SetPosition(_weightHeaderLen);
+    }
+    if (getEnumerated()) {
+        _udatFile->SetPosition(_udatHeaderLen);
+    }
+}
+
+
+size_t
+AttributeVector::ReaderBase::getNumValues()
+{
+    if (getEnumerated()) {
+       return getEnumCount();
+    } else {
+       if (_fixedWidth > 0) {
+           size_t dataSize(_datFileSize - _datHeaderLen);
+           assert((dataSize % _fixedWidth) == 0);
+           return dataSize / _fixedWidth;
+        } else {
+            // TODO. This limits the number of multivalues to 2^32-1
+            // This is assert during write, so this should never be a problem here.
+            _idxFile->SetPosition(_idxFileSize - 4);
+            size_t numValues = _idxReader.readHostOrder();
+            rewind();
+            return numValues;
+        }
+    }
+}
+
+
+uint32_t
+ AttributeVector::ReaderBase::getNextValueCount()
+{
+    uint32_t nextIdx = _idxReader.readHostOrder();
+    uint32_t numValues = nextIdx - _currIdx;
+    _currIdx = nextIdx;
+    return numValues;
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadDAT()
+{
+    return loadFile(".dat");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadIDX()
+{
+    return loadFile(".idx");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadWeight()
+{
+    return loadFile(".weight");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadUDAT()
+{
+    return loadFile(".udat");
+}
+
+
+FileUtil::LoadedBuffer::UP
+AttributeVector::loadFile(const char *suffix)
+{
+    BaseName::string fileName(getBaseFileName());
+    fileName += suffix;
+    return FileUtil::loadFile(fileName);
+}
+
+
+bool
+AttributeVector::saveAs(const vespalib::stringref &baseFileName)
+{
+    _baseFileName = baseFileName;
+    return save();
+}
+
+bool
+AttributeVector::saveAs(const vespalib::stringref &baseFileName,
+                        IAttributeSaveTarget & saveTarget)
+{
+    _baseFileName = baseFileName;
+    return save(saveTarget);
+}
+
+
+bool
+AttributeVector::save()
+{
+    TuneFileAttributes tune;
+    DummyFileHeaderContext fileHeaderContext;
+    AttributeFileSaveTarget saveTarget(tune, fileHeaderContext);
+    return save(saveTarget);
+}
+
+
+bool
+AttributeVector::save(IAttributeSaveTarget &saveTarget)
+{
+    commit();
+    // First check if new style save is available.
+    std::unique_ptr saver(onInitSave());
+    if (saver) {
+        // Normally, new style save happens in background, but here it
+        // will occur in the foreground.
+        return saver->save(saveTarget);
+    }
+    // New style save not available, use old style save
+    saveTarget.setConfig(createSaveTargetConfig());
+    if (!saveTarget.setup()) {
+        return false;
+    }
+    onSave(saveTarget);
+    saveTarget.close();
+    return true;
+}
+
+
+IAttributeSaveTarget::Config
+AttributeVector::createSaveTargetConfig() const
+{
+    return IAttributeSaveTarget::Config(getBaseFileName(),
+                                   getConfig().basicType().asString(),
+                                   getConfig().collectionType().asString(),
+                                   getConfig().basicType().type() ==
+                                   BasicType::Type::TENSOR ?
+                                   getConfig().tensorType().toSpec() :
+                                   "",
+                                   hasMultiValue(),
+                                   hasWeightedSetType(),
+                                   getEnumeratedSave(),
+                                   getCommittedDocIdLimit(),
+                                   getFixedWidth(),
+                                   getUniqueValueCount(),
+                                   getTotalValueCount(),
+                                   getCreateSerialNum(),
+                                   getVersion());
+}
+
+
+void
+AttributeVector::onSave(IAttributeSaveTarget & saveTarget)
+{
+    (void) saveTarget;
+    assert(false);
+}
+
+
+bool
+AttributeVector::hasLoadData() const
+{
+    FastOS_StatInfo statInfo;
+    if (!FastOS_File::Stat(vespalib::make_string("%s.dat",
+                                   getBaseFileName().c_str()).c_str(),
+                           &statInfo)) {
+        return false;
+    }
+    if (hasMultiValue() &&
+        !FastOS_File::Stat(vespalib::make_string("%s.idx",
+                                   getBaseFileName().c_str()).c_str(),
+                           &statInfo))
+    {
+        return false;
+    }
+    if (hasWeightedSetType() &&
+        !FastOS_File::Stat(vespalib::make_string("%s.weight",
+                                   getBaseFileName().c_str()).c_str(),
+                           &statInfo))
+    {
+        return false;
+    }
+    if (isEnumeratedSaveFormat() &&
+        !FastOS_File::Stat(vespalib::make_string("%s.udat",
+                                   getBaseFileName().c_str()).c_str(),
+                           &statInfo))
+    {
+        return false;
+    }
+    return true;
+}
+
+
+bool
+AttributeVector::isEnumeratedSaveFormat(void) const
+{
+    vespalib::string datName(vespalib::make_string("%s.dat",
+                                                   getBaseFileName().c_str()));
+    Fast_BufferedFile   datFile;
+    vespalib::FileHeader datHeader(DIRECTIO_ALIGNMENT);
+    if ( ! datFile.OpenReadOnly(datName.c_str()) ) {
+        LOG(error, "could not open %s: %s",
+            datFile.GetFileName(), getLastErrorString().c_str());
+        throw IllegalStateException(
+                vespalib::make_string(
+                        "Failed opening attribute data file '%s' for reading",
+                        datFile.GetFileName()));
+    }
+    datHeader.readFile(datFile);
+    
+    return isEnumerated(datHeader);
+}
+
+
+bool
+AttributeVector::load() {
+    bool loaded = onLoad();
+    if (loaded) {
+        commit();
+    }
+    _loaded = loaded;
+    return _loaded;
+}
+
+
+bool
+AttributeVector::onLoad()
+{
+    return false;
+}
+
+
+int32_t
+AttributeVector::getWeight(DocId doc, uint32_t idx) const
+{
+    (void) doc;
+    (void) idx;
+    return 1;
+}
+
+AttributeVector::SearchContext::Params::Params() :
+    _diversityAttribute(nullptr),
+    _diversityCutoffGroups(std::numeric_limits::max()),
+    _useBitVector(false),
+    _diversityCutoffStrict(false)
+{
+}
+
+AttributeVector::SearchContext::SearchContext(const AttributeVector &attr) :
+    _attr(attr),
+    _plsc(NULL)
+{
+}
+
+AttributeVector::SearchContext::UP
+AttributeVector::getSearch(const QueryPacketT & searchSpec,
+                           const SearchContext::Params & params) const
+{
+    return getSearch(SearchContext::decodeQuery(searchSpec), params);
+}
+
+AttributeVector::SearchContext::~SearchContext()
+{
+}
+
+
+unsigned int
+AttributeVector::SearchContext::approximateHits() const
+{
+    if (_plsc != NULL) {
+        return _plsc->approximateHits();
+    }
+    return std::max(uint64_t(_attr.getNumDocs()),
+                    _attr.getStatus().getNumValues());
+}
+
+
+QueryTermSimple::UP
+AttributeVector::SearchContext::decodeQuery(const QueryPacketT &searchSpec)
+{
+    QueryTermSimple::UP qt;
+    EmptyQueryNodeResult qnb;
+    Query q(qnb, searchSpec);
+    if (q.valid() && (dynamic_cast(q.getRoot().get()))) {
+        qt.reset(static_cast(q.getRoot().release()));
+    } else {
+        throw IllegalStateException("Failed decoding query");
+    }
+    return qt;
+}
+
+
+SearchIterator::UP
+AttributeVector::SearchContext::
+createIterator(fef::TermFieldMatchData *matchData, bool strict)
+{
+    if (_plsc != NULL) {
+        SearchIterator::UP res = 
+            _plsc->createPostingIterator(matchData, strict);
+        if (res.get() != NULL)
+            return res;
+    }
+    return createFilterIterator(matchData, strict);
+}
+
+
+SearchIterator::UP
+AttributeVector::SearchContext::
+createFilterIterator(fef::TermFieldMatchData *matchData, bool strict)
+{
+    if (!valid())
+        return SearchIterator::UP(new queryeval::EmptySearch());
+    if (getIsFilter()) {
+        return SearchIterator::UP(strict ?
+            new FilterAttributeIteratorStrict
+            (*this, matchData) :
+            new FilterAttributeIteratorT
+            (*this, matchData));
+    }
+    return SearchIterator::UP(strict ?
+            new AttributeIteratorStrict
+            (*this, matchData) :
+            new AttributeIteratorT
+            (*this, matchData));
+}
+
+
+void
+AttributeVector::SearchContext::fetchPostings(bool strict)
+{
+    if (_plsc != NULL)
+        _plsc->fetchPostings(strict);
+}
+
+
+bool
+AttributeVector::apply(DocId doc, const MapValueUpdate &map)
+{
+    bool retval(doc < getNumDocs());
+    if (retval) {
+        const ValueUpdate & vu(map.getUpdate());
+        if (vu.inherits(ArithmeticValueUpdate::classId)) {
+            const ArithmeticValueUpdate &
+                au(static_cast(vu));
+            retval = applyWeight(doc, map.getKey(), au);
+        } else {
+            retval = false;
+        }
+    }
+    return retval;
+}
+
+
+bool
+AttributeVector::applyWeight(DocId, const FieldValue &,
+                             const ArithmeticValueUpdate &)
+{
+    return false;
+}
+
+
+void
+AttributeVector::removeAllOldGenerations()
+{
+    _genHandler.updateFirstUsedGeneration();
+    removeOldGenerations(_genHandler.getFirstUsedGeneration());
+}
+
+
+void
+AttributeVector::divideByZeroWarning()
+{
+    LOG(warning,
+        "applyArithmetic(): "
+        "Divide by zero is an illegal operation on integer attributes "
+        "or weighted sets. Ignoring operation.");
+}
+
+
+void
+AttributeVector::performCompactionWarning()
+{
+    LOG(warning,
+        "Could not perform compaction on MultiValueMapping "
+        "with current generation = %" PRIu64,
+        _genHandler.getCurrentGeneration());
+}
+
+
+void
+AttributeVector::addReservedDoc(void)
+{
+    uint32_t docId = 42;
+    addDoc(docId);		// Reserved
+    assert(docId == 0u);
+    assert(docId < getNumDocs());
+    clearDoc(docId);
+    commit();
+    const vespalib::Identifiable::RuntimeClass &info = getClass();
+    if (info.inherits(search::FloatingPointAttribute::classId)) {
+        FloatingPointAttribute &vec =
+            static_cast(*this);
+        if (hasMultiValue()) {
+            bool appendedUndefined = vec.append(0, attribute::getUndefined(), 1);
+            assert(appendedUndefined);
+            (void) appendedUndefined;
+        } else {
+            bool updatedUndefined = vec.update(0, attribute::getUndefined());
+            assert(updatedUndefined);
+            (void) updatedUndefined;
+        }
+        commit();
+    }
+}
+
+
+void
+AttributeVector::enableEnumeratedSave(bool enable)
+{
+    if (hasEnum() || !enable)
+        _enableEnumeratedSave = enable;
+}
+
+
+void
+AttributeVector::enableEnumeratedLoad(void)
+{
+    allowEnumeratedLoad = true;
+}
+
+
+attribute::IPostingListAttributeBase *
+AttributeVector::getIPostingListAttributeBase(void)
+{
+    return NULL;
+}
+
+
+bool
+AttributeVector::hasPostings(void)
+{
+    return getIPostingListAttributeBase() != NULL;
+}
+
+
+uint64_t
+AttributeVector::getUniqueValueCount(void) const
+{
+    return getTotalValueCount();
+}
+
+
+uint64_t
+AttributeVector::getTotalValueCount(void) const
+{
+    return getNumDocs();
+}
+
+
+void
+AttributeVector::setCreateSerialNum(uint64_t createSerialNum)
+{
+    _createSerialNum = createSerialNum;
+}
+
+
+uint64_t
+AttributeVector::getCreateSerialNum(void) const
+{
+    return _createSerialNum;
+}
+
+uint32_t
+AttributeVector::getVersion() const {
+    return 0;
+}
+
+void
+AttributeVector::compactLidSpace(uint32_t wantedLidLimit)
+{
+    commit();
+    assert(_uncommittedDocIdLimit <= wantedLidLimit);
+    if (wantedLidLimit < _committedDocIdLimit) {
+        clearDocs(wantedLidLimit, _committedDocIdLimit);
+    }
+    commit();
+    _committedDocIdLimit = wantedLidLimit;
+    _compactLidSpaceGeneration = _genHandler.getCurrentGeneration();
+    incGeneration();
+}
+
+
+bool
+AttributeVector::canShrinkLidSpace(void) const
+{
+    return wantShrinkLidSpace() &&
+        _compactLidSpaceGeneration < getFirstUsedGeneration();
+}
+
+
+void
+AttributeVector::shrinkLidSpace(void)
+{
+    commit();
+    assert(canShrinkLidSpace());
+    uint32_t committedDocIdLimit = _committedDocIdLimit;
+    clearDocs(committedDocIdLimit, getNumDocs());
+    commit();
+    _committedDocIdLimit = committedDocIdLimit;
+    onShrinkLidSpace();
+    attribute::IPostingListAttributeBase *pab = getIPostingListAttributeBase();
+    if (pab != NULL) {
+        pab->forwardedShrinkLidSpace(_committedDocIdLimit);
+    }
+    incGeneration();
+    updateStat(true);
+}
+
+
+void
+AttributeVector::onShrinkLidSpace(void)
+{
+}
+
+
+void
+AttributeVector::clearDocs(DocId lidLow, DocId lidLimit)
+{
+    assert(lidLow <= lidLimit);
+    assert(lidLimit <= getNumDocs());
+    for (DocId lid = lidLow; lid < lidLimit; ++lid) {
+        clearDoc(lid);
+    }
+}
+
+AttributeVector::EnumModifier
+AttributeVector::getEnumModifier()
+{
+    attribute::InterlockGuard interlockGuard(*_interlock);
+    return EnumModifier(_enumLock, interlockGuard);
+}
+
+
+void
+AttributeVector::setInterlock(const std::shared_ptr &
+                              interlock)
+{
+    _interlock = interlock;
+}
+
+
+std::unique_ptr
+AttributeVector::initSave()
+{
+    commit();
+    return onInitSave();
+}
+
+std::unique_ptr
+AttributeVector::onInitSave()
+{
+    return std::unique_ptr();
+}
+
+
+IExtendAttribute *
+AttributeVector::getExtendInterface()
+{
+    return nullptr;
+}
+
+uint64_t
+AttributeVector::getEstimatedSaveByteSize() const
+{
+    uint64_t headerSize = 4096;
+    uint64_t totalValueCount = getTotalValueCount();
+    uint64_t uniqueValueCount = getUniqueValueCount();
+    uint64_t docIdLimit = getCommittedDocIdLimit();
+    uint64_t datFileSize = 0;
+    uint64_t weightFileSize = 0;
+    uint64_t idxFileSize = 0;
+    uint64_t udatFileSize = 0;
+    AddressSpace enumAddressSpace(getEnumStoreAddressSpaceUsage());
+
+    if (hasMultiValue()) {
+        idxFileSize = headerSize + sizeof(uint32_t) * (docIdLimit + 1);
+    }
+    if (hasWeightedSetType()) {
+        weightFileSize = headerSize + sizeof(int32_t) * totalValueCount;
+    }
+    if (hasEnum() && getEnumeratedSave()) {
+        datFileSize =  headerSize + 4 * totalValueCount;
+        udatFileSize = headerSize + enumAddressSpace.used()
+                       - 8 * uniqueValueCount;
+    } else {
+        BasicType::Type basicType(getBasicType());
+        const Status &status = getStatus();
+        int64_t memorySize = status.getUsed() - status.getDead();
+        if (memorySize < 0) {
+            memorySize = 0;
+        }
+        switch (basicType) {
+        case BasicType::Type::PREDICATE:
+        case BasicType::Type::TENSOR:
+            datFileSize = headerSize + memorySize;
+            break;
+        case BasicType::Type::STRING:
+            assert(hasEnum());
+            datFileSize = headerSize;
+            if (uniqueValueCount > 0) {
+                double avgEntrySize = (static_cast(enumAddressSpace.used()) / uniqueValueCount) - 8;
+                datFileSize += avgEntrySize * totalValueCount;
+            }
+            break;
+        default:
+            datFileSize = headerSize + getFixedWidth() * totalValueCount;
+            break;
+        }
+    }
+    return datFileSize + weightFileSize + idxFileSize + udatFileSize;
+}
+
+
+template bool AttributeVector::append(ChangeVectorT< ChangeTemplate > &changes, uint32_t , const StringChangeData &, int32_t, bool);
+template bool AttributeVector::update(ChangeVectorT< ChangeTemplate > &changes, uint32_t , const StringChangeData &);
+template bool AttributeVector::remove(ChangeVectorT< ChangeTemplate > &changes, uint32_t , const StringChangeData &, int32_t);
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h
new file mode 100644
index 00000000000..aab20a5136d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h
@@ -0,0 +1,845 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "address_space.h"
+#include "address_space_usage.h"
+#include "iattributesavetarget.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+using document::ArithmeticValueUpdate;
+using document::MapValueUpdate;
+using document::FieldValue;
+
+namespace vespalib
+{
+
+class GenericHeader;
+
+}
+
+ 
+namespace search {
+
+template  class ComponentGuard;
+class AttributeReadGuard;
+class AttributeWriteGuard;
+class AttributeSaver;
+class EnumStoreBase;
+class MultiValueMappingBaseBase;
+
+class IDocumentWeightAttribute;
+
+namespace fef {
+class TermFieldMatchData;
+}
+
+namespace attribute
+{
+
+class IPostingListSearchContext;
+
+class IPostingListAttributeBase;
+
+class Interlock;
+class InterlockGuard;
+
+}
+
+using search::attribute::WeightedType;
+using search::attribute::Status;
+
+template 
+class UnWeightedType
+{
+public:
+    UnWeightedType() : _value(T()) { }
+    UnWeightedType(T v) : _value(v) { }
+    const T & getValue() const { return _value; }
+    void setValue(const T & v) { _value = v; }
+    int32_t getWeight()  const { return 1; }
+    void setWeight(int32_t w)  { (void) w; }
+
+    bool operator==(const UnWeightedType & rhs) const {
+        return _value == rhs._value;
+    }
+
+    friend vespalib::asciistream &
+    operator << (vespalib::asciistream & os, const UnWeightedType & v) {
+        return os << "(" << v._value << ", 1)";
+    }
+private:
+    T       _value;
+};
+
+class IExtendAttribute
+{
+public:
+    virtual bool add(int64_t, int32_t = 1) { return false; }
+    virtual bool add(double, int32_t = 1) { return false; }
+    virtual bool add(const char *, int32_t = 1) { return false; }
+    
+    virtual ~IExtendAttribute() {}
+};
+
+class AttributeVector : public vespalib::Identifiable,
+                        public attribute::IAttributeVector
+{
+protected:
+    typedef search::attribute::Config Config;
+    typedef search::attribute::CollectionType CollectionType;
+    typedef search::attribute::BasicType BasicType;
+public:
+    typedef std::shared_ptr SP;
+    class BaseName : public vespalib::string
+    {
+    public:
+        typedef vespalib::string string;
+        BaseName(const vespalib::stringref &s)
+            : string(s),
+              _name(createAttributeName(s))
+        {
+        }
+        BaseName & operator = (const vespalib::stringref & s) {
+            BaseName n(s);
+            std::swap(*this, n);
+            return *this;
+        }
+
+        BaseName(const vespalib::stringref &base,
+                 const vespalib::stringref &snap,
+                 const vespalib::stringref &name);
+
+        string getIndexName() const;
+        string getSnapshotName() const;
+        const string & getAttributeName() const { return _name; }
+        string getDirName() const;
+    private:
+        static string createAttributeName(const vespalib::stringref & s);
+        string _name;
+    };
+
+    class ReaderBase
+    {
+    public:
+        ReaderBase(AttributeVector & attr);
+
+        virtual ~ReaderBase();
+
+        void rewind();
+
+        bool hasWeight() const {
+            return _weightFile.get() && _weightFile->IsOpened();
+        }
+
+        bool hasIdx() const {
+            return _idxFile.get() && _idxFile->IsOpened();
+        }
+
+        bool hasData() const {
+            return _datFile.get() && _datFile->IsOpened();
+        }
+
+        bool hasUData() const {
+            return _udatFile.get() && _udatFile->IsOpened();
+        }
+
+        uint32_t getNumIdx() const {
+            return (_idxFileSize - _idxHeaderLen) /sizeof(uint32_t);
+        }
+
+        size_t getEnumCount(void) const {
+            size_t dataSize(_datFileSize - _datHeaderLen);
+            assert((dataSize % sizeof(uint32_t)) == 0);
+            return dataSize / sizeof(uint32_t);
+        }
+
+        static bool
+        extractFileSize(const vespalib::GenericHeader &header,
+                        FastOS_FileInterface &file, uint64_t &fileSize);
+
+        size_t getNumValues();
+        int32_t getNextWeight() { return _weightReader.readHostOrder(); }
+        uint32_t getNextEnum(void) { return _enumReader.readHostOrder(); }
+        bool getEnumerated(void) const { return _enumerated; }
+        uint32_t getNextValueCount();
+        int64_t getCreateSerialNum(void) const { return _createSerialNum; }
+        bool getHasLoadData(void) const { return _hasLoadData; }
+        uint32_t getVersion() const { return _version; }
+        uint32_t getDocIdLimit() const { return _docIdLimit; }
+        const vespalib::GenericHeader &getDatHeader() const {
+            return _datHeader;
+        }
+    protected:
+        std::unique_ptr  _datFile;
+    private:
+        std::unique_ptr  _weightFile;
+        std::unique_ptr  _idxFile;
+        std::unique_ptr  _udatFile;
+        FileReader   _weightReader;
+        FileReader  _idxReader;
+        FileReader  _enumReader;
+        uint32_t              _currIdx;
+        uint32_t	      _datHeaderLen;
+        uint32_t              _idxHeaderLen;
+        uint32_t              _weightHeaderLen;
+        uint32_t              _udatHeaderLen;
+        uint64_t              _createSerialNum;
+        size_t                _fixedWidth;
+        bool                  _enumerated;
+        bool                  _hasLoadData;
+        uint32_t              _version;
+        uint32_t              _docIdLimit;
+        vespalib::FileHeader  _datHeader;
+        uint64_t              _datFileSize;
+        uint64_t              _idxFileSize;
+    protected:
+        size_t getDataCountHelper(size_t elemSize) const {
+            size_t dataSize(_datFileSize - _datHeaderLen);
+            return dataSize / elemSize;
+        }
+    };
+
+    template 
+    class PrimitiveReader : public ReaderBase
+    {
+    public:
+        PrimitiveReader(AttributeVector &attr)
+            : ReaderBase(attr),
+              _datReader(*_datFile)
+        {
+        }
+
+        virtual ~PrimitiveReader() { }
+        T getNextData() { return _datReader.readHostOrder(); }
+        size_t getDataCount() const { return getDataCountHelper(sizeof(T)); }
+    private:
+        FileReader _datReader;
+    };
+
+    using GenerationHandler = vespalib::GenerationHandler;
+    using GenerationHolder = vespalib::GenerationHolder;
+    typedef GenerationHandler::generation_t generation_t;
+
+    virtual ~AttributeVector();
+protected:
+    /**
+     * Will update statistics by calling onUpdateStat if necessary.
+     */
+    void updateStat(bool forceUpdate);
+
+    void
+    updateStatistics(uint64_t numValues,
+                     uint64_t numUniqueValue,
+                     uint64_t allocated,
+                     uint64_t used,
+                     uint64_t dead,
+                     uint64_t onHold);
+
+    void performCompactionWarning();
+
+    void getByType(DocId doc, const char *&v) const {
+        char tmp[1024]; v = getString(doc, tmp, sizeof(tmp));
+    }
+
+    void getByType(DocId doc, vespalib::string &v) const {
+        char tmp[1024]; v = getString(doc, tmp, sizeof(tmp));
+    }
+
+    void getByType(DocId doc, largeint_t & v) const {
+        v = getInt(doc);
+    }
+
+    void getByType(DocId doc, double &v) const {
+        v = getFloat(doc);
+    }
+
+    uint32_t getByType(DocId doc, const char **v, uint32_t sz) const {
+        return get(doc, v, sz);
+    }
+
+    uint32_t getByType(DocId doc, vespalib::string *v, uint32_t sz) const {
+        return get(doc, v, sz);
+    }
+
+    uint32_t getByType(DocId doc, largeint_t * v, uint32_t sz) const {
+        return get(doc, v, sz);
+    }
+
+    uint32_t getByType(DocId doc, double *v, uint32_t sz) const {
+        return get(doc, v, sz);
+    }
+
+
+    AttributeVector(const vespalib::stringref &baseFileName, const Config & c);
+
+    void checkSetMaxValueCount(int index) {
+        _highestValueCount = std::max(index, _highestValueCount);
+    }
+
+    void setEnumMax(uint32_t e)          { _enumMax = e; setEnum(); }
+    void setEnum(bool hasEnum_=true)     { _hasEnum = hasEnum_; }
+    void setSortedEnum(bool sorted=true) { _hasSortedEnum = sorted; }
+    void setNumDocs(uint32_t n)          { _status.setNumDocs(n); }
+    void incNumDocs()                    { _status.incNumDocs(); }
+
+    std::unique_ptr openDAT();
+
+    std::unique_ptr openIDX();
+
+    std::unique_ptr openWeight();
+
+    std::unique_ptr openUDAT();
+
+    FileUtil::LoadedBuffer::UP loadDAT();
+
+    FileUtil::LoadedBuffer::UP loadIDX();
+
+    FileUtil::LoadedBuffer::UP loadWeight();
+
+    FileUtil::LoadedBuffer::UP loadUDAT();
+
+    class ValueModifier
+    {
+    public:
+        ValueModifier(AttributeVector &attr);
+        ValueModifier(const ValueModifier &rhs);
+        ~ValueModifier();
+    private:
+        AttributeVector * stealAttr() const {
+            AttributeVector * ret(_attr);
+            _attr = NULL;
+            return ret;
+        }
+
+        mutable AttributeVector * _attr;
+    };
+
+    class EnumModifier
+    {
+        std::unique_lock _enumLock;
+    public:
+        EnumModifier(std::shared_timed_mutex &lock,
+                     attribute::InterlockGuard &interlockGuard)
+            : _enumLock(lock)
+        {
+            (void) interlockGuard;
+        }
+        EnumModifier(EnumModifier &&rhs)
+            : _enumLock(std::move(rhs._enumLock))
+        {
+        }
+        EnumModifier &operator=(EnumModifier &&rhs)
+        {
+            _enumLock = std::move(rhs._enumLock);
+            return *this;
+        }
+        virtual ~EnumModifier()
+        {
+        }
+    };
+
+    EnumModifier getEnumModifier();
+    ValueModifier getValueModifier() { return ValueModifier(*this); }
+
+    void updateUncommittedDocIdLimit(DocId doc) {
+        if (_uncommittedDocIdLimit <= doc)  {
+            _uncommittedDocIdLimit = doc + 1;
+        }
+    }
+
+    void updateCommittedDocIdLimit(void) {
+        if (_uncommittedDocIdLimit != 0) {
+            if (_uncommittedDocIdLimit > _committedDocIdLimit) {
+                std::atomic_thread_fence(std::memory_order_release);
+                _committedDocIdLimit = _uncommittedDocIdLimit;
+            }
+            _uncommittedDocIdLimit = 0;
+        }
+    }
+    
+public:
+    void incGeneration();
+    void removeAllOldGenerations();
+
+    generation_t getFirstUsedGeneration() const {
+        return _genHandler.getFirstUsedGeneration();
+    }
+
+    generation_t getCurrentGeneration() const {
+        return _genHandler.getCurrentGeneration();
+    }
+
+    virtual IExtendAttribute * getExtendInterface();
+
+protected:
+    /**
+     * Returns the number of readers holding a generation guard.
+     * Should be called by the writer thread.
+     */
+    uint32_t getGenerationRefCount(generation_t gen) const {
+        return _genHandler.getGenerationRefCount(gen);
+    }
+
+    const GenerationHandler & getGenerationHandler() const {
+        return _genHandler;
+    }
+
+    GenerationHandler & getGenerationHandler() {
+        return _genHandler;
+    }
+
+    GenerationHolder & getGenerationHolder() {
+        return _genHolder;
+    }
+
+    template
+    bool clearDoc(ChangeVectorT< ChangeTemplate > &changes, DocId doc);
+
+    template
+    bool update(ChangeVectorT< ChangeTemplate > &changes, DocId doc, const T & v) __attribute__((noinline));
+
+    template
+    bool append(ChangeVectorT< ChangeTemplate > &changes, DocId doc, const T &v, int32_t w, bool doCount = true) __attribute__((noinline));
+    template
+    bool append(ChangeVectorT< ChangeTemplate > &changes, DocId doc, Accessor & ac) __attribute__((noinline));
+
+    template
+    bool remove(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T &v, int32_t w);
+
+    template
+    bool adjustWeight(ChangeVectorT< ChangeTemplate > &changes, DocId doc, const T &v, const ArithmeticValueUpdate &wd);
+
+    template 
+    static int32_t
+    applyWeightChange(int32_t weight, const ChangeTemplate &weightChange) {
+        if (weightChange._type == ChangeBase::INCREASEWEIGHT) {
+            return weight + weightChange._weight;
+        } else if (weightChange._type == ChangeBase::MULWEIGHT) {
+            return weight * weightChange._weight;
+        } else if (weightChange._type == ChangeBase::DIVWEIGHT) {
+            return weight / weightChange._weight;
+        }
+        return weight;
+    }
+
+    template
+    bool applyArithmetic(ChangeVectorT< ChangeTemplate > &changes, DocId doc, const T &v, const ArithmeticValueUpdate & arithm);
+
+    static double round(double v, double & r) { return r = v; }
+    static largeint_t round(double v, largeint_t &r) { return r = static_cast(::floor(v+0.5)); }
+
+    template 
+    static BaseType
+    applyArithmetic(const BaseType &value,
+                    const ChangeTemplate & arithmetic)
+    {
+        typedef typename ChangeData::DataType LargeType;
+        if (attribute::isUndefined(value)) {
+            return value;
+        } else if (arithmetic._type == ChangeBase::ADD) {
+            return value + static_cast(arithmetic._arithOperand);
+        } else if (arithmetic._type == ChangeBase::SUB) {
+            return value - static_cast(arithmetic._arithOperand);
+        } else if (arithmetic._type == ChangeBase::MUL) {
+            LargeType r;
+            return round((static_cast(value) *
+                          arithmetic._arithOperand), r);
+        } else if (arithmetic._type == ChangeBase::DIV) {
+            LargeType r;
+            return round(static_cast(value) /
+                         arithmetic._arithOperand, r);
+        }
+        return value;
+    }
+
+    virtual AddressSpace getEnumStoreAddressSpaceUsage() const;
+
+    virtual AddressSpace getMultiValueAddressSpaceUsage() const;
+
+public:
+    DECLARE_IDENTIFIABLE_ABSTRACT(AttributeVector);
+    bool isLoaded() const { return _loaded; }
+
+    /** Return the fixed length of the attribute. If 0 then you must inquire each document. */
+    virtual size_t getFixedWidth() const override { return _config.basicType().fixedSize(); }
+    const Config &getConfig() const { return _config; }
+    BasicType getInternalBasicType() const { return _config.basicType(); }
+    CollectionType getInternalCollectionType() const { return _config.collectionType(); }
+    const BaseName & getBaseFileName() const { return _baseFileName; }
+    void setBaseFileName(const vespalib::stringref & name) { _baseFileName = name; }
+
+    // Implements IAttributeVector
+    virtual const vespalib::string & getName(void) const {
+        return _baseFileName.getAttributeName();
+    }
+
+    virtual bool hasMultiValue() const {
+        return _config.collectionType().isMultiValue();
+    }
+
+    virtual bool hasWeightedSetType() const {
+        return _config.collectionType().isWeightedSet();
+    }
+
+    bool hasArrayType() const { return _config.collectionType().isArray(); }
+    virtual bool hasEnum() const { return _hasEnum; }
+    bool hasSortedEnum() const { return _hasSortedEnum; }
+    virtual bool hasEnum2Value() const { return false; }
+    virtual uint32_t getMaxValueCount() const { return _highestValueCount; }
+    uint32_t getEnumMax() const { return _enumMax; }
+
+    // Implements IAttributeVector
+    virtual uint32_t getNumDocs(void) const { return _status.getNumDocs(); }
+    uint32_t getCommittedDocIdLimit(void) const { return _committedDocIdLimit; }
+    uint32_t & getCommittedDocIdLimitRef(void) { return _committedDocIdLimit; }
+    void setCommittedDocIdLimit(uint32_t committedDocIdLimit) {
+        _committedDocIdLimit = committedDocIdLimit;
+    }
+
+    const Status & getStatus() const { return _status; }
+    Status & getStatus() { return _status; }
+
+    AddressSpaceUsage getAddressSpaceUsage() const;
+
+    // Implements IAttributeVector
+    virtual BasicType::Type getBasicType() const {
+        return getInternalBasicType().type();
+    }
+
+    virtual CollectionType::Type getCollectionType() const {
+        return getInternalCollectionType().type();
+    }
+
+    /**
+     * Updates the base file name of this attribute vector and saves
+     * it to file(s)
+     */
+    bool saveAs(const vespalib::stringref &baseFileName);
+
+    /**
+     * Updates the base file name of this attribute vector and saves
+     * it using the given saveTarget
+     */
+    bool saveAs(const vespalib::stringref &baseFileName,
+                IAttributeSaveTarget &saveTarget);
+
+    /** Saves this attribute vector to file(s) **/
+    bool save();
+
+    /** Saves this attribute vector using the given saveTarget **/
+    bool save(IAttributeSaveTarget & saveTarget);
+
+    IAttributeSaveTarget::Config createSaveTargetConfig() const;
+
+    /** Returns whether this attribute has load data files on disk **/
+    bool hasLoadData() const;
+
+    bool isEnumeratedSaveFormat(void) const;
+    bool load();
+    void commit(bool forceStatUpdate = false);
+    void commit(uint64_t firstSyncToken, uint64_t lastSyncToken);
+    void setCreateSerialNum(uint64_t createSerialNum);
+    uint64_t getCreateSerialNum(void) const;
+    virtual uint32_t getVersion() const;
+
+////// Interface to access single documents.
+    /**
+     * Interface to access the individual elements both for update and
+     * retrival are type specific.  They are accessed by their proper
+     * type.
+     */
+    /** Get number of values per document.  */
+    virtual uint32_t getValueCount(DocId doc) const = 0;
+
+    virtual uint32_t clearDoc(DocId doc) = 0;
+    virtual largeint_t getDefaultValue() const = 0;
+    virtual EnumHandle getEnum(DocId doc)  const = 0;
+    virtual const char * getString(DocId doc, char * v, size_t sz) const = 0;
+    virtual largeint_t getInt(DocId doc) const = 0;
+    virtual double getFloat(DocId doc)   const = 0;
+    virtual void getEnumValue(const EnumHandle *v, uint32_t *e, uint32_t sz) const = 0;
+
+    uint32_t getEnumValue(EnumHandle eh) const {
+        uint32_t e(0);
+        getEnumValue(&eh, &e, 1);
+        return e;
+    }
+
+    // Implements IAttributeVector
+    virtual uint32_t get(DocId doc, EnumHandle *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, vespalib::string *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, const char **v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, largeint_t *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, double *v, uint32_t sz) const = 0;
+
+    // Implements IAttributeVector
+    virtual uint32_t get(DocId doc, WeightedEnum *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, WeightedString *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, WeightedConstChar *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, WeightedInt *v, uint32_t sz) const = 0;
+    virtual uint32_t get(DocId doc, WeightedFloat *v, uint32_t sz) const = 0;
+    virtual int32_t getWeight(DocId doc, uint32_t idx) const;
+
+    // Implements IAttributeVector
+    virtual bool findEnum(const char *value, EnumHandle &e) const {
+        (void) value;
+        (void) e;
+        return false;
+    }
+
+///// Modify API
+    virtual void onCommit() = 0;
+    virtual bool addDoc(DocId &doc) = 0;
+    virtual bool addDocs(DocId & startDoc, DocId & lastDoc, uint32_t numDocs);
+    virtual bool addDocs(uint32_t numDocs);
+    bool apply(DocId doc, const MapValueUpdate &map);
+
+////// Search API
+
+    // type-safe down-cast to attribute supporting direct document weight iterators
+    virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const { return nullptr; }
+
+    /**
+       - Search for equality
+       - Range search
+    */
+
+    class SearchContext : public vespalib::noncopyable
+    {
+        template  friend class AttributeIteratorT;
+        template  friend class FilterAttributeIteratorT;
+        template  friend class AttributePostingListIteratorT;
+        template  friend class FilterAttributePostingListIteratorT;
+    public:
+        class Params {
+            using IAttributeVector = attribute::IAttributeVector;
+        public:
+            Params();
+            bool useBitVector() const { return _useBitVector; }
+            const IAttributeVector * diversityAttribute() const { return _diversityAttribute; }
+            size_t diversityCutoffGroups() const { return _diversityCutoffGroups; }
+            bool diversityCutoffStrict() const { return _diversityCutoffStrict; }
+
+            Params & useBitVector(bool value) {
+                _useBitVector = value;
+                return *this;
+            }
+            Params & diversityAttribute(const IAttributeVector * value) {
+                _diversityAttribute = value;
+                return *this;
+            }
+            Params & diversityCutoffGroups(size_t groups) {
+                _diversityCutoffGroups = groups;
+                return *this;
+            }
+            Params & diversityCutoffStrict(bool strict) {
+                _diversityCutoffStrict = strict;
+                return *this;
+            }
+        private:
+            const IAttributeVector * _diversityAttribute;
+            size_t                   _diversityCutoffGroups;
+            bool                     _useBitVector;
+            bool                     _diversityCutoffStrict;
+        };
+        typedef std::unique_ptr UP;
+        virtual ~SearchContext();
+        virtual unsigned int approximateHits() const;
+        static QueryTermSimple::UP decodeQuery(const QueryPacketT & searchSpec);
+
+        /**
+         * Creates an attribute search iterator associated with this
+         * search context.
+         *
+         * @return attribute search iterator
+         *
+         * @param matchData the attribute match data used when
+         * unpacking data for a hit
+         *
+         * @param strict whether the iterator should be strict or not
+         *
+         * @param useBitVector whether bitvectors should be used when available
+         **/
+        virtual queryeval::SearchIterator::UP
+        createIterator(fef::TermFieldMatchData *matchData, bool strict);
+
+        /**
+         * Creates an attribute search iterator associated with this
+         * search context.  Postings lists are not used.
+         *
+         * @return attribute search iterator
+         *
+         * @param matchData the attribute match data used when
+         * unpacking data for a hit
+         *
+         * @param strict whether the iterator should be strict or not
+         **/
+        virtual queryeval::SearchIterator::UP
+        createFilterIterator(fef::TermFieldMatchData *matchData, bool strict);
+
+        /*
+         * Create temporary posting lists.  Should be called before
+         * createIterator is called.
+         */
+        virtual void fetchPostings(bool strict);
+        bool cmp(DocId docId, int32_t &weight) const { return onCmp(docId, weight); }
+        bool cmp(DocId docId) const { return onCmp(docId); }
+        const AttributeVector & attribute() const { return _attr; }
+        virtual bool valid() const { return false; }
+        virtual Int64Range getAsIntegerTerm() const { return Int64Range(); }
+
+        virtual const QueryTermBase & queryTerm() const {
+            return *static_cast(NULL);
+        }
+
+    protected:
+        SearchContext(const AttributeVector &attr);
+    private:
+        virtual bool onCmp(DocId docId, int32_t &weight) const = 0;
+        virtual bool onCmp(DocId docId) const = 0;
+
+        const AttributeVector & _attr;
+    protected:
+        attribute::IPostingListSearchContext *_plsc;
+
+        bool getIsFilter(void) const { return _attr.getConfig().getIsFilter(); }
+    };
+
+    SearchContext::UP getSearch(const QueryPacketT &searchSpec, const SearchContext::Params & params) const;
+    virtual SearchContext::UP getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const = 0;
+    virtual const EnumStoreBase *getEnumStoreBase() const { return nullptr; }
+    virtual const MultiValueMappingBaseBase *getMultiValueBase() const { return nullptr; }
+private:
+    void divideByZeroWarning();
+    virtual bool applyWeight(DocId doc, const FieldValue &fv, const ArithmeticValueUpdate &wAdjust);
+    virtual void onSave(IAttributeSaveTarget & saveTarget);
+    virtual bool onLoad();
+    bool headerTypeOK(const vespalib::GenericHeader &header) const;
+    std::unique_ptr openFile(const char *suffix);
+    FileUtil::LoadedBuffer::UP loadFile(const char *suffix);
+
+
+    BaseName               _baseFileName;
+    Config                 _config;
+    std::shared_ptr _interlock;
+    std::shared_timed_mutex _enumLock;
+    GenerationHandler      _genHandler;
+    GenerationHolder       _genHolder;
+    Status                 _status;
+    int                    _highestValueCount;
+    uint32_t               _enumMax;
+    uint32_t		   _committedDocIdLimit; // docid limit for search
+    uint32_t               _uncommittedDocIdLimit; // based on queued changes
+    uint64_t               _createSerialNum;
+    uint64_t               _compactLidSpaceGeneration; 
+    bool                   _hasEnum;
+    bool                   _hasSortedEnum;
+    bool                   _loaded;
+    bool                   _enableEnumeratedSave;
+    fastos::TimeStamp      _nextStatUpdateTime;
+
+////// Locking strategy interface. only available from the Guards.
+    /**
+     * Used to guard that a value you reference will always reference
+     * a value. It might not be the same value, but at least it will
+     * be a value for that document.  The guarantee holds as long as
+     * the guard is alive.
+    */
+    GenerationHandler::Guard takeGenerationGuard() { return _genHandler.takeGuard(); }
+
+    /// Clean up [0, firstUsed>
+    virtual void removeOldGenerations(generation_t firstUsed) { (void) firstUsed; }
+    virtual void onGenerationChange(generation_t generation) { (void) generation; }
+    virtual void onUpdateStat() = 0;
+    /**
+     * Used to regulate access to critical resources. Apply the
+     * reader/writer guards.
+     */
+    std::shared_timed_mutex & getEnumLock() { return _enumLock; }
+
+    friend class ComponentGuard;
+    friend class AttributeEnumGuard;
+    friend class AttributeValueGuard;
+    friend class AttributeTest;
+    friend class AttributeManagerTest;
+public:
+    /**
+     * Should be called by the writer thread.
+     */
+    void updateFirstUsedGeneration(void) {
+        _genHandler.updateFirstUsedGeneration();
+    }
+
+    /**
+     * Returns true if we might still have readers.  False positives
+     * are possible if writer hasn't updated first used generation
+     * after last reader left.
+     */
+    bool hasReaders(void) const { return _genHandler.hasReaders(); }
+
+    /**
+     * Add reserved initial document with docId 0 and undefined value.
+     */
+    void addReservedDoc(void);
+
+    void enableEnumeratedSave(bool enable = true);
+
+    /*
+     * Temporary method, used by unit tests to enable enumerated load
+     * until it can be enabled by default.
+     */
+    static void enableEnumeratedLoad(void);
+
+    bool getEnumeratedSave(void) const { return _hasEnum && _enableEnumeratedSave; }
+
+    virtual attribute::IPostingListAttributeBase * getIPostingListAttributeBase();
+    bool hasPostings(void);
+    virtual uint64_t getUniqueValueCount(void) const;
+    virtual uint64_t getTotalValueCount(void) const;
+    virtual void compactLidSpace(uint32_t wantedLidLimit);
+    virtual void clearDocs(DocId lidLow, DocId lidLimit);
+    bool wantShrinkLidSpace(void) const { return _committedDocIdLimit < getNumDocs(); }
+    virtual bool canShrinkLidSpace(void) const;
+    void shrinkLidSpace(void);
+    virtual void onShrinkLidSpace(void);
+
+    void setInterlock(const std::shared_ptr &interlock);
+
+    const std::shared_ptr &getInterlock() const
+    {
+        return _interlock;
+    }
+
+    std::unique_ptr initSave();
+
+    virtual std::unique_ptr onInitSave();
+
+    virtual uint64_t getEstimatedSaveByteSize() const;
+};
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.hpp b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp
new file mode 100644
index 00000000000..20edc0826ad
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attributevector.hpp
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+
+namespace search {
+
+template 
+inline bool myIsNan(T v) { (void)v; return false; }
+
+template <>
+inline bool
+myIsNan(float v)
+{
+    return std::isnan(v);
+}
+
+template <>
+inline bool
+myIsNan(double v)
+{
+    return std::isnan(v);
+}
+
+template
+bool
+AttributeVector::adjustWeight(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T & v,
+                              const ArithmeticValueUpdate & wd)
+{
+    bool retval(hasWeightedSetType() && (doc < getNumDocs()));
+    if (retval) {
+        size_t oldSz(changes.size());
+        ArithmeticValueUpdate::Operator op(wd.getOperator());
+        int32_t w(static_cast(wd.getOperand()));
+        if (op == ArithmeticValueUpdate::Add) {
+            changes.push_back(ChangeTemplate(ChangeBase::INCREASEWEIGHT, doc, v, w));
+        } else if (op == ArithmeticValueUpdate::Sub) {
+            changes.push_back(ChangeTemplate(ChangeBase::INCREASEWEIGHT, doc, v, -w));
+        } else if (op == ArithmeticValueUpdate::Mul) {
+            changes.push_back(ChangeTemplate(ChangeBase::MULWEIGHT, doc, v, w));
+        } else if (op == ArithmeticValueUpdate::Div) {
+            if (w != 0) {
+                changes.push_back(ChangeTemplate(ChangeBase::DIVWEIGHT, doc, v, w));
+            } else {
+                divideByZeroWarning();
+            }
+        } else {
+            retval = false;
+        }
+        if (retval) {
+            const size_t diff = changes.size() - oldSz;
+            _status.incNonIdempotentUpdates(diff);
+            _status.incUpdates(diff);
+        }
+    }
+    return retval;
+}
+
+template
+bool
+AttributeVector::applyArithmetic(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T & v,
+                                 const ArithmeticValueUpdate & arithm)
+{
+    (void) v;
+    bool retval(!hasMultiValue() && (doc < getNumDocs()));
+    if (retval) {
+        size_t oldSz(changes.size());
+        ArithmeticValueUpdate::Operator op(arithm.getOperator());
+        double aop = arithm.getOperand();
+        if (op == ArithmeticValueUpdate::Add) {
+            changes.push_back(ChangeTemplate(ChangeBase::ADD, doc, 0, 0));
+        } else if (op == ArithmeticValueUpdate::Sub) {
+            changes.push_back(ChangeTemplate(ChangeBase::SUB, doc, 0, 0));
+        } else if (op == ArithmeticValueUpdate::Mul) {
+            changes.push_back(ChangeTemplate(ChangeBase::MUL, doc, 0, 0));
+        } else if (op == ArithmeticValueUpdate::Div) {
+            if (this->getClass().inherits(IntegerAttribute::classId) && aop == 0) {
+                divideByZeroWarning();
+            } else {
+                changes.push_back(ChangeTemplate(ChangeBase::DIV, doc, 0, 0));
+            }
+        } else {
+            retval = false;
+        }
+        if (retval) {
+            const size_t diff = changes.size() - oldSz;
+            _status.incNonIdempotentUpdates(diff);
+            _status.incUpdates(diff);
+            if (diff > 0) {
+                changes.back()._arithOperand = aop;
+            }
+        }
+    }
+    return retval;
+}
+
+template
+bool AttributeVector::clearDoc(ChangeVectorT< ChangeTemplate > & changes, DocId doc) {
+    bool retval(doc < getNumDocs());
+    if (retval) {
+        changes.push_back(ChangeTemplate(ChangeBase::CLEARDOC, doc, T()));
+        _status.incUpdates();
+        updateUncommittedDocIdLimit(doc);
+    }
+    return retval;
+}
+
+template
+bool AttributeVector::update(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T & v) {
+    bool retval(doc < getNumDocs());
+    if (retval) {
+        if (hasMultiValue()) {
+            clearDoc(doc);
+            retval = append(changes, doc, v, 1, false);
+        } else {
+            changes.push_back(ChangeTemplate(ChangeBase::UPDATE, doc, v));
+            _status.incUpdates();
+            updateUncommittedDocIdLimit(doc);
+        }
+    }
+    return retval;
+}
+
+template
+bool AttributeVector::append(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T & v, int32_t w, bool doCount) {
+    bool retval(hasMultiValue() && (doc < getNumDocs()));
+    if (retval) {
+        changes.push_back(ChangeTemplate(ChangeBase::APPEND, doc, v, w));
+        _status.incUpdates();
+        updateUncommittedDocIdLimit(doc);
+        if ( hasArrayType() && doCount) {
+            _status.incNonIdempotentUpdates();
+        }
+    }
+    return retval;
+}
+
+template
+bool AttributeVector::append(ChangeVectorT< ChangeTemplate > & changes, DocId doc, Accessor & ac) {
+    bool retval(hasMultiValue() && (doc < getNumDocs()));
+    if (retval) {
+        changes.push_back(doc, ac);
+        _status.incUpdates(ac.size());
+        updateUncommittedDocIdLimit(doc);
+        if ( hasArrayType() ) {
+            _status.incNonIdempotentUpdates(ac.size());
+        }
+    }
+    return retval;
+}
+
+template
+bool AttributeVector::remove(ChangeVectorT< ChangeTemplate > & changes, DocId doc, const T & v, int32_t w) {
+    bool retval(hasMultiValue() && (doc < getNumDocs()));
+    if (retval) {
+        changes.push_back(ChangeTemplate(ChangeBase::REMOVE, doc, v, w));
+        _status.incUpdates();
+        updateUncommittedDocIdLimit(doc);
+        if ( hasArrayType() ) {
+            _status.incNonIdempotentUpdates();
+        }
+    }
+    return retval;
+}
+
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.cpp b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
new file mode 100644
index 00000000000..727cfcc3efe
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.cpp
@@ -0,0 +1,188 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include "attrvector.h"
+#include "attrvector.hpp"
+#include 
+#include 
+#include 
+
+LOG_SETUP(".attrvector");
+
+namespace search {
+
+StringDirectAttribute::
+StringDirectAttribute(const vespalib::string & baseFileName, const Config & c)
+    : search::StringAttribute(baseFileName, c),
+      _buffer(),
+      _offsets(),
+      _idx()
+{
+}
+
+bool StringDirectAttribute::findEnum(const char * key, EnumHandle & e) const
+{
+    if (_offsets.size() < 1) {
+        e = 0;
+        return false;
+    }
+    int delta;
+    const int eMax = getEnumMax();
+    for (delta = 1; delta <= eMax; delta <<= 1) { }
+    delta >>= 1;
+    int pos = delta - 1;
+    int cmpres(0);
+
+    while (delta != 0) {
+        delta >>= 1;
+        if (pos >= eMax) {
+            pos -= delta;
+        } else {
+            const char *name = &_buffer[_offsets[pos]];
+            cmpres = strcmp(key, name);
+            if (cmpres == 0) {
+                e = pos;
+                return true;
+            }
+            pos += (cmpres < 0) ? -delta : +delta;
+        }
+    }
+    e = ((cmpres > 0) && (pos < eMax)) ? pos + 1 : pos;
+    return false;
+}
+
+void StringDirectAttribute::onSave(IAttributeSaveTarget & saveTarget)
+{
+    assert(!saveTarget.getEnumerated());
+    using Buffer = IAttributeSaveTarget::Buffer;
+    if ( hasEnum() ) {
+        uint32_t sz(getMaxValueCount());
+        Buffer dat(saveTarget.datWriter().allocBuf(sz*getNumDocs()*11));
+        const char * * v = new const char *[sz];
+        for(size_t i(0), m(getNumDocs()); i < m; i++) {
+            for(size_t j(0), k(static_cast(*this).get(i, v, sz)); j < k; j++) {
+                dat->writeBytes(v[j], strlen(v[j]) + 1);
+            }
+        }
+        delete [] v;
+    } else if ( ! _buffer.empty() ) {
+        Buffer dat(saveTarget.datWriter().allocBuf(_buffer.size()));
+        dat->writeBytes(&_buffer[0], _buffer.size());
+        saveTarget.datWriter().writeBuf(std::move(dat));
+    }
+
+    if (hasMultiValue()) {
+        Buffer buf(saveTarget.idxWriter().allocBuf(sizeof(uint32_t) *
+                                                   _idx.size()));
+        buf->writeBytes(&_idx[0], sizeof(uint32_t) * _idx.size());
+        saveTarget.idxWriter().writeBuf(std::move(buf));
+    }
+}
+
+class stringComp : public std::binary_function {
+public:
+    stringComp(const char * buffer) : _buffer(buffer) { }
+    bool operator()(uint32_t x, uint32_t y) const { return strcmp(_buffer+x, _buffer+y) < 0; }
+private:
+    const char * _buffer;
+};
+
+void addString(const char * v, StringAttribute::OffsetVector & offsets, std::vector & buffer)
+{
+    offsets.push_back(buffer.size());
+    for(const char *p(v); *p; p++) {
+        buffer.push_back(*p);
+    }
+    buffer.push_back('\0');
+}
+
+bool StringDirectAttribute::onLoad()
+{
+    {
+        std::vector empty;
+        std::vector empty1;
+        std::vector empty2;
+        OffsetVector empty3;
+        std::swap(empty, _buffer);
+        std::swap(empty2, _idx);
+        std::swap(empty3, _offsets);
+        setNumDocs(0);
+        setCommittedDocIdLimit(0);
+    }
+
+    FileUtil::LoadedBuffer::UP tmpBuffer(loadDAT());
+    bool rc(tmpBuffer.get());
+    if (rc) {
+        if ( ! tmpBuffer->empty()) {
+            OffsetVector tmpOffsets;
+            tmpOffsets.reserve(countZero(tmpBuffer->c_str(), tmpBuffer->size()) + 1);
+            generateOffsets(tmpBuffer->c_str(), tmpBuffer->size(), tmpOffsets);
+
+            if ( hasEnum() ) {
+                std::sort(tmpOffsets.begin(), tmpOffsets.end(), stringComp(tmpBuffer->c_str()));
+                _offsets.clear();
+                _buffer.clear();
+                if (!tmpOffsets.empty()) {
+                    const char * prev(tmpBuffer->c_str() + tmpOffsets[0]);
+                    addString(prev, _offsets, _buffer);
+                    for(OffsetVector::const_iterator it(tmpOffsets.begin()+1), mt(tmpOffsets.end()); it != mt; it++) {
+                        if (strcmp(tmpBuffer->c_str() + *it, prev) != 0) {
+                            prev = tmpBuffer->c_str() + *it;
+                            addString(prev, _offsets, _buffer);
+                        }
+                    }
+                }
+                setEnumMax(_offsets.size());
+                generateOffsets(tmpBuffer->c_str(), tmpBuffer->size(), tmpOffsets);
+            } else {
+                _buffer.clear();
+                _buffer.reserve(tmpBuffer->size());
+                for (size_t i=0; i < tmpBuffer->size(); i++) {
+                    _buffer.push_back(tmpBuffer->c_str()[i]);
+                }
+                std::swap(tmpOffsets, _offsets);
+            }
+        }
+
+        if (hasMultiValue()) {
+            FileUtil::LoadedBuffer::UP tmpIdx(loadIDX());
+            size_t tmpIdxLen(tmpIdx->size(sizeof(uint32_t)));
+            _idx.clear();
+            _idx.reserve(tmpIdxLen);
+            uint32_t prev(0);
+            const uint32_t * idxPtr(static_cast(tmpIdx->buffer()));
+            for (size_t i=0; i < tmpIdxLen; i++) {
+                checkSetMaxValueCount(idxPtr[i] - prev);
+                prev = idxPtr[i];
+                _idx.push_back(prev);
+            }
+            rc = tmpIdx.get();
+            tmpIdx.reset();
+        }
+        uint32_t numDocs(hasMultiValue() ? (_idx.size()-1) : _offsets.size());
+        setNumDocs(numDocs);
+        setCommittedDocIdLimit(numDocs);
+    }
+
+    // update statistics
+    uint64_t numValues = _offsets.size();
+    uint64_t numUniqueValues = _offsets.size();
+    uint64_t allocated = _buffer.size() * sizeof(char) + _offsets.size() * sizeof(uint32_t) +
+        + _idx.size() * sizeof(uint32_t);
+    this->updateStatistics(numValues, numUniqueValues, allocated, allocated, 0, 0);
+    return rc;
+}
+
+void StringDirectAttribute::onCommit()
+{
+    abort();
+}
+
+bool StringDirectAttribute::addDoc(DocId & doc)
+{
+    (void) doc;
+    return false;
+}
+
+}  // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.h b/searchlib/src/vespa/searchlib/attribute/attrvector.h
new file mode 100644
index 00000000000..a13c2a98002
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.h
@@ -0,0 +1,235 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+//TODO: This one should go.
+//
+using search::AttributeVector;
+
+//-----------------------------------------------------------------------------
+
+class AttrVector
+{
+public:
+    template 
+    struct Features
+    {
+        typedef uint32_t EnumType;
+        static bool IsMultiValue() { return MULTI; }
+    };
+};
+
+namespace search {
+
+template 
+class NumericDirectAttribute : public B
+{
+private:
+    typedef typename B::EnumHandle EnumHandle;
+    NumericDirectAttribute(const NumericDirectAttribute &);
+    NumericDirectAttribute & operator=(const NumericDirectAttribute &);
+    virtual bool onLoad();
+    virtual typename B::BaseType getFromEnum(EnumHandle e) const { return _data[e]; }
+    virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+        for (size_t i(0); i < sz; i++) {
+            e[i] = v[i];
+        }
+    }
+protected:
+    typedef typename B::BaseType   BaseType;
+    typedef typename B::DocId      DocId;
+    typedef typename B::Change     Change;
+    typedef typename B::largeint_t largeint_t;
+    typedef typename B::Config     Config;
+
+    NumericDirectAttribute(const vespalib::string & baseFileName, const Config & c);
+
+    virtual bool findEnum(BaseType value, EnumHandle & e) const;
+    virtual void onCommit();
+    virtual void onUpdateStat() { }
+    virtual bool addDoc(DocId & );
+
+    std::vector   _data;
+    std::vector   _idx;
+};
+
+}
+
+template 
+class NumericDirectAttrVector : public search::NumericDirectAttribute
+{
+protected:
+    typedef typename B::DocId         DocId;
+    typedef NumericDirectAttrVector NumDirectAttrVec;
+private:
+    typedef typename B::largeint_t    largeint_t;
+public:
+    NumericDirectAttrVector(const vespalib::string & baseFileName);
+    NumericDirectAttrVector(const vespalib::string & baseFileName, const AttributeVector::Config & c);
+    virtual largeint_t getInt(DocId doc)   const { return static_cast(getHelper(doc, 0)); }
+    virtual double getFloat(DocId doc)     const { return getHelper(doc, 0); }
+    virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz)     const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, double * v, uint32_t sz)         const { return getAllHelper(doc, v, sz); }
+private:
+    typedef typename B::EnumHandle    EnumHandle;
+    typedef typename B::BaseType      BaseType;
+    typedef typename B::Weighted      Weighted;
+    typedef typename B::WeightedEnum  WeightedEnum;
+    typedef typename B::WeightedInt   WeightedInt;
+    typedef typename B::WeightedFloat WeightedFloat;
+    virtual BaseType get(DocId doc)        const { return getHelper(doc, 0); }
+    virtual EnumHandle getEnum(DocId doc)  const { return getEnumHelper(doc, 0); }
+    virtual uint32_t getAll(DocId doc, BaseType * v, uint32_t sz)    const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { return getAllEnumHelper(doc, e, sz); }
+
+    virtual uint32_t getValueCount(DocId doc) const { return getValueCountHelper(doc); }
+    virtual bool hasEnum2Value() const { return false; }
+
+    uint32_t getValueCountHelper(DocId doc) const {
+        if (F::IsMultiValue()) {
+            return this->_idx[doc+1] - this->_idx[doc];
+        } else {
+            return 1;
+        }
+    }
+
+    EnumHandle getEnumHelper(DocId doc, int idx) const {
+        (void) doc;
+        (void) idx;
+        return uint32_t(-1);
+    }
+
+    BaseType getHelper(DocId doc, int idx) const {
+        if (F::IsMultiValue()) {
+            return this->_data[this->_idx[doc] + idx];
+        } else {
+            return this->_data[doc];
+        }
+    }
+    template 
+    uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const {
+        uint32_t available(getValueCountHelper(doc));
+        uint32_t num2Read(std::min(available, sz));
+        for (uint32_t i(0); i < num2Read; i++) {
+            v[i] = T(static_cast(getHelper(doc, i)));
+        }
+        return available;
+    }
+    template 
+    uint32_t getAllEnumHelper(DocId doc, T * v, uint32_t sz) const {
+        uint32_t available(getValueCountHelper(doc));
+        uint32_t num2Read(std::min(available, sz));
+        for (uint32_t i(0); i < num2Read; i++) {
+            v[i] = T(getEnumHelper(doc, i));
+        }
+        return available;
+    }
+
+    virtual uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const { return getAllEnumHelper(doc, v, sz); }
+    virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz)      const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz)      const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz)    const { return getAllHelper(doc, v, sz); }
+};
+
+//-----------------------------------------------------------------------------
+
+namespace search {
+class StringDirectAttribute : public StringAttribute
+{
+private:
+    StringDirectAttribute(const StringDirectAttribute &);
+    StringDirectAttribute & operator=(const StringDirectAttribute &);
+    virtual void onSave(IAttributeSaveTarget & saveTarget);
+    virtual bool onLoad();
+    virtual const char * getFromEnum(EnumHandle e) const { return &_buffer[e]; }
+protected:
+    StringDirectAttribute(const vespalib::string & baseFileName, const Config & c);
+    virtual bool findEnum(const char * value, EnumHandle & e) const;
+    virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const {
+        for (size_t i(0); i < sz; i++) {
+            e[i] = v[i];
+        }
+    }
+    virtual void onCommit();
+    virtual void onUpdateStat() { }
+    virtual bool addDoc(DocId & );
+
+protected:
+    std::vector        _buffer;
+    OffsetVector             _offsets;
+    std::vector    _idx;
+};
+
+}
+
+template 
+class StringDirectAttrVector : public search::StringDirectAttribute
+{
+
+public:
+    StringDirectAttrVector(const vespalib::string & baseFileName);
+    StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c);
+    virtual const char * getString(DocId doc, char * v, size_t sz) const { (void) v; (void) sz; return getHelper(doc, 0); }
+    virtual uint32_t get(DocId doc, const char ** v, uint32_t sz)  const { return getAllHelper(doc, v, sz); }
+private:
+    virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz)  const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { return getAllEnumHelper(doc, e, sz); }
+    virtual const char * get(DocId doc)  const { return getHelper(doc, 0); }
+    virtual EnumHandle getEnum(DocId doc)  const { return getEnumHelper(doc, 0); }
+    virtual uint32_t getValueCount(DocId doc) const { return getValueCountHelper(doc); }
+    virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz)  const { return getAllEnumHelper(doc, e, sz); }
+    virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz)    const { return getAllHelper(doc, v, sz); }
+    virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const { return getAllHelper(doc, v, sz); }
+    virtual bool hasEnum2Value() const { return true; }
+
+    uint32_t getValueCountHelper(DocId doc) const {
+        if (F::IsMultiValue()) {
+            return this->_idx[doc+1] - this->_idx[doc];
+        } else {
+            return 1;
+        }
+    }
+
+    EnumHandle getEnumHelper(DocId doc, int idx) const {
+        if (F::IsMultiValue()) {
+            return  this->_offsets[this->_idx[doc] + idx];
+        } else {
+            return this->_offsets[doc];
+        }
+        return uint32_t(-1);
+    }
+
+    const char *getHelper(DocId doc, int idx) const {
+        if (F::IsMultiValue()) {
+            return & this->_buffer[this->_offsets[this->_idx[doc] + idx]];
+        } else if (idx == 0) {
+            return & this->_buffer[this->_offsets[doc]];
+        }
+        return NULL;
+    }
+    template 
+    uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const
+    {
+        uint32_t available(getValueCountHelper(doc));
+        uint32_t num2Read(std::min(available, sz));
+        for (uint32_t i(0); i < num2Read; i++) {
+            v[i] = T(getHelper(doc, i));
+        }
+        return available;
+    }
+    template 
+    uint32_t getAllEnumHelper(DocId doc, T * v, uint32_t sz) const
+    {
+        uint32_t available(getValueCountHelper(doc));
+        uint32_t num2Read(std::min(available, sz));
+        for (uint32_t i(0); i < num2Read; i++) {
+            v[i] = T(getEnumHelper(doc, i));
+        }
+        return available;
+    }
+};
+
diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
new file mode 100644
index 00000000000..592ed6fe737
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp
@@ -0,0 +1,185 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+template 
+NumericDirectAttribute::
+NumericDirectAttribute(const vespalib::string & baseFileName, const Config & c)
+    : B(baseFileName, c),
+      _data(),
+      _idx()
+{
+}
+
+template 
+bool NumericDirectAttribute::onLoad()
+{
+    FileUtil::LoadedBuffer::UP dataBuffer(B::loadDAT());
+    bool rc(dataBuffer.get());
+    if (rc) {
+        const BaseType * tmpData(static_cast (dataBuffer->buffer()));
+        size_t tmpDataLen(dataBuffer->size(sizeof(BaseType)));
+        if (this->hasEnum() ) {
+            std::vector tmp;
+            tmp.reserve(tmpDataLen);
+            for(size_t i(0); i < tmpDataLen; i++) {
+                tmp.push_back(tmpData[i]);
+            }
+            std::sort(tmp.begin(), tmp.end());
+            _data.clear();
+            if (!tmp.empty()) {
+                BaseType prev = tmp[0];
+                _data.push_back(prev);
+                for(typename std::vector::const_iterator it(tmp.begin()+1), mt(tmp.end()); it != mt; it++) {
+                    if (*it != prev) {
+                        prev = *it;
+                        _data.push_back(prev);
+                    }
+                }
+            }
+            this->setEnumMax(_data.size());
+        } else {
+            _data.clear();
+            _data.reserve(tmpDataLen);
+            for (size_t i=0; i < tmpDataLen; i++) {
+                _data.push_back(tmpData[i]);
+            }
+        }
+        dataBuffer.reset();
+        if (this->hasMultiValue()) {
+            FileUtil::LoadedBuffer::UP idxBuffer(B::loadIDX());
+            rc = idxBuffer.get();
+            if (rc) {
+                const uint32_t * tmpIdx(static_cast(idxBuffer->buffer()));
+                size_t tmpIdxLen(idxBuffer->size(sizeof(uint32_t)));
+                _idx.clear();
+                _idx.reserve(tmpIdxLen);
+                uint32_t prev(0);
+                for (size_t i=0; i < tmpIdxLen; i++) {
+                    this->checkSetMaxValueCount(tmpIdx[i] - prev);
+                    prev = tmpIdx[i];
+                    _idx.push_back(prev);
+                }
+            }
+        }
+    }
+    if (rc) {
+        uint32_t numDocs(this->hasMultiValue() ? (_idx.size() - 1) : _data.size());
+        this->setNumDocs(numDocs);
+        this->setCommittedDocIdLimit(numDocs);
+    } else {
+        std::vector emptyData;
+        std::vector empty1;
+        std::vector empty2;
+        std::swap(emptyData, _data);
+        std::swap(empty2, _idx);
+    }
+
+    // update statistics
+    uint64_t numValues = _data.size();
+    uint64_t numUniqueValues = _data.size();
+    uint64_t allocated = _data.size() * sizeof(BaseType) + _idx.size() * sizeof(uint32_t);
+    this->updateStatistics(numValues, numUniqueValues, allocated, allocated, 0, 0);
+
+    return rc;
+}
+
+template 
+bool NumericDirectAttribute::findEnum(typename B::BaseType key, EnumHandle & e) const
+{
+    if (_data.empty()) {
+        e = 0;
+        return false;
+    }
+    int delta;
+    const int eMax = B::getEnumMax();
+    for (delta = 1; delta <= eMax; delta <<= 1) { }
+    delta >>= 1;
+    int pos = delta - 1;
+    typename B::BaseType value = key;
+
+    while (delta != 0) {
+        delta >>= 1;
+        if (pos >= eMax) {
+            pos -= delta;
+        } else {
+            value = _data[pos];
+            if (value == key) {
+                e = pos;
+                return true;
+            } else if (value < key) {
+                pos += delta;
+            } else {
+                pos -= delta;
+            }
+        }
+    }
+    e = ((key > value) && (pos < eMax)) ? pos + 1 : pos;
+    return false;
+}
+
+template 
+void NumericDirectAttribute::onCommit()
+{
+    B::_changes.clear();
+    abort();
+}
+
+template 
+bool NumericDirectAttribute::addDoc(DocId & )
+{
+    return false;
+}
+
+}
+
+template 
+NumericDirectAttrVector::
+NumericDirectAttrVector(const vespalib::string & baseFileName, const AttributeVector::Config & c)
+    : search::NumericDirectAttribute(baseFileName, c)
+{
+    if (F::IsMultiValue()) {
+        this->_idx.push_back(0);
+    }
+}
+
+template 
+NumericDirectAttrVector::
+NumericDirectAttrVector(const vespalib::string & baseFileName)
+    : search::NumericDirectAttribute(baseFileName, AttributeVector::Config(AttributeVector::BasicType::fromType(BaseType()), F::IsMultiValue() ? search::attribute::CollectionType::ARRAY : search::attribute::CollectionType::SINGLE))
+{
+    if (F::IsMultiValue()) {
+        this->_idx.push_back(0);
+    }
+}
+
+template 
+StringDirectAttrVector::
+StringDirectAttrVector(const vespalib::string & baseFileName, const Config & c) :
+    search::StringDirectAttribute(baseFileName, c)
+{
+    if (F::IsMultiValue()) {
+        _idx.push_back(0);
+    }
+    setEnum();
+    setSortedEnum(true);
+}
+
+template 
+StringDirectAttrVector::
+StringDirectAttrVector(const vespalib::string & baseFileName) :
+    search::StringDirectAttribute(baseFileName, Config(BasicType::STRING, F::IsMultiValue() ? search::attribute::CollectionType::ARRAY : search::attribute::CollectionType::SINGLE))
+{
+    if (F::IsMultiValue()) {
+        _idx.push_back(0);
+    }
+    setEnum();
+    setSortedEnum(true);
+}
+
diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.cpp b/searchlib/src/vespa/searchlib/attribute/changevector.cpp
new file mode 100644
index 00000000000..028ae98d985
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/changevector.cpp
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include 
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.changevector");
+
+namespace search {
+
+StringChangeData::StringChangeData(const vespalib::string & s)
+    : _s(s)
+{
+    if (StringAttribute::countZero(s.data(), s.size()) > 0) {
+        LOG(warning,
+            "StringChangeData(): "
+            "Input string contains  byte(s); "
+            "truncating. (ticket #3079131)");
+        _s.assign(s.data()); // keep data up to (not including) first '\0' byte
+    }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/changevector.h b/searchlib/src/vespa/searchlib/attribute/changevector.h
new file mode 100644
index 00000000000..a714e502588
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/changevector.h
@@ -0,0 +1,230 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include 
+#include 
+
+namespace search {
+
+struct ChangeBase {
+    enum Type {
+        NOOP,
+        UPDATE,
+        APPEND,
+        REMOVE,
+        INCREASEWEIGHT,
+        MULWEIGHT,
+        DIVWEIGHT,
+        ADD,
+        SUB,
+        MUL,
+        DIV,
+        CLEARDOC
+    };
+    enum {TAIL=0, UNSET_ENUM = 0xffffffffu};
+
+    ChangeBase() :
+        _type(NOOP),
+        _next(TAIL),
+        _doc(0),
+        _weight(1),
+        _enumScratchPad(UNSET_ENUM),
+        _arithOperand(0)
+    { }
+
+    ChangeBase(Type type, uint32_t d, int32_t w = 1) :
+        _type(type),
+        _next(TAIL),
+        _doc(d),
+        _weight(w),
+        _enumScratchPad(UNSET_ENUM),
+        _arithOperand(0)
+    { }
+
+    int cmp(const ChangeBase &b) const { int diff(_doc - b._doc); return diff; }
+    bool operator <(const ChangeBase & b) const { return cmp(b) < 0; }
+    bool isAtEnd() const { return _next == TAIL; }
+    uint32_t getNext() const { return _next; }
+    void setNext(uint32_t next) { _next = next; }
+    uint32_t getEnum() const { return _enumScratchPad; }
+    void setEnum(uint32_t value) const { _enumScratchPad = value; }
+    bool isEnumValid() const { return _enumScratchPad != UNSET_ENUM; }
+    void invalidateEnum() const { _enumScratchPad = UNSET_ENUM; }
+
+    Type               _type;
+private:
+    uint32_t           _next;
+public:
+    uint32_t           _doc;
+    int32_t            _weight;
+    mutable uint32_t   _enumScratchPad;
+    double             _arithOperand;
+};
+
+template 
+class NumericChangeData {
+private:
+    T _v;
+public:
+    typedef T DataType;
+
+    NumericChangeData(T v) : _v(v) { }
+    NumericChangeData() : _v(T()) { }
+
+    T get() const { return _v; }
+    T raw() const { return _v; }
+    operator T() const { return _v; }
+    operator T&() { return _v; }
+    bool operator<(const NumericChangeData &rhs) const { return _v < rhs._v; }
+};
+
+class StringChangeData {
+public:
+    typedef vespalib::string DataType;
+
+    StringChangeData(const DataType & s);
+    StringChangeData() : _s() { }
+
+    const DataType & get() const { return _s; }
+    const char * raw() const { return _s.c_str(); }
+    operator const DataType&() const { return _s; }
+    operator DataType&() { return _s; }
+    bool operator <(const StringChangeData & rhs) const { return _s < rhs._s; }
+private:
+    DataType _s;
+};
+
+template
+struct ChangeTemplate : public ChangeBase {
+    typedef T DataType;
+
+    ChangeTemplate() : ChangeBase() { }
+    ChangeTemplate(Type type, uint32_t d, const T & v, int32_t w = 1) :
+        ChangeBase(type, d, w), _data(v)
+    { }
+
+    T          _data;
+};
+
+template <>
+inline
+NumericChangeData::NumericChangeData(double v) :
+    _v(attribute::isUndefined(v) ?  attribute::getUndefined() : v)
+{
+}
+
+template <>
+inline bool
+NumericChangeData::operator<(const NumericChangeData &rhs) const
+{
+    if (std::isnan(_v)) {
+        return !std::isnan(rhs._v);
+    }
+    if (std::isnan(rhs._v)) {
+        return false;
+    }
+    return _v < rhs._v;
+}
+
+class ChangeVectorBase {
+protected:
+};
+
+/**
+ * Maintains a list of changes where changes to the same docid are adjacent, but ordered by insertion order.
+ * Apart from that no ordering by docid.
+ */
+template 
+class ChangeVectorT : public ChangeVectorBase {
+private:
+    typedef vespalib::hash_map Map;
+    typedef vespalib::Array Vector;
+public:
+    ChangeVectorT() : _tail(0) { } 
+    class const_iterator {
+    public:
+        const_iterator(const Vector & vector, uint32_t next) : _v(&vector), _next(next) { }
+        bool operator == (const const_iterator & rhs) const { return _v == rhs._v && _next == rhs._next; }
+        bool operator != (const const_iterator & rhs) const { return _v != rhs._v || _next != rhs._next; }
+        const_iterator& operator++()    { advance(); return *this; }
+        const_iterator  operator++(int) { const_iterator other(*this); advance(); return other; }
+        const T & operator *  ()  const { return v()[_next]; }
+        const T * operator -> ()  const { return &v()[_next]; }
+    private:
+        void  advance()          { _next = v()[_next].getNext(); }
+        const Vector & v() const { return *_v; }
+        const Vector * _v;
+        uint32_t       _next;
+    };
+
+    void push_back(const T & c);
+    template 
+    void push_back(uint32_t doc, Accessor & ac);
+    const T & back()       const { return _v.back(); }
+    T & back()                   { return _v.back(); }
+    size_t size()          const { return _v.size(); }
+    void clear()                 { _v.clear(); _docs.clear(); }
+    const_iterator begin() const { return const_iterator(_v, 0); }
+    const_iterator end()   const { return const_iterator(_v, size()); }
+private:
+    void linkIn(uint32_t doc, size_t index, size_t last);
+    Vector _v;
+    Map    _docs;
+    uint32_t _tail;
+};
+
+template 
+void
+ChangeVectorT::push_back(const T & c)
+{
+    size_t index(size());
+    _v.push_back(c);
+    linkIn(c._doc, index, index);
+}
+
+template 
+template 
+void
+ChangeVectorT::push_back(uint32_t doc, Accessor & ac)
+{
+    if (ac.size() <= 0) { return; }
+
+    size_t index(size());
+    _v.reserve(vespalib::roundUp2inN(index + ac.size()));
+    for (size_t i(0), m(ac.size()); i < m; i++, ac.next()) {
+        _v.push_back(T(ChangeBase::APPEND, doc, typename T::DataType(ac.value()), ac.weight()));
+        _v.back().setNext(index + i + 1);
+    }
+    linkIn(doc, index, size() - 1);
+}
+
+template 
+void
+ChangeVectorT::linkIn(uint32_t doc, size_t first, size_t last)
+{
+    if (first != 0 && (_v[_tail]._doc == doc)) {
+        _v[_tail].setNext(first);
+        _tail = last;
+    } else {
+        Map::iterator found(_docs.find(doc));
+        if (found == _docs.end()) {
+            _docs[doc] = last;
+            if (_tail != first) {
+                _v[_tail].setNext(first);
+            }
+            _tail = last;
+        } else {
+            uint32_t prev(found->second);
+            for (; _v[_v[prev].getNext()]._doc == doc; prev = _v[prev].getNext());
+            _v[last].setNext(_v[prev].getNext());
+            _v[prev].setNext(first);
+            found->second = last;
+        }
+    }
+    _v[_tail].setNext(size());
+}
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
new file mode 100644
index 00000000000..250cad79357
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+LOG_SETUP(".searchlib.attribute.configconverter");
+#include "configconverter.h"
+
+using namespace vespa::config::search;
+using namespace search;
+
+
+namespace {
+
+using search::attribute::CollectionType;
+using search::attribute::BasicType;
+using vespalib::tensor::TensorType;
+
+typedef std::map DataTypeMap;
+typedef std::map CollectionTypeMap;
+
+DataTypeMap
+getDataTypeMap()
+{
+    DataTypeMap map;
+    map[AttributesConfig::Attribute::STRING] = BasicType::STRING;
+    map[AttributesConfig::Attribute::UINT1] = BasicType::UINT1;
+    map[AttributesConfig::Attribute::UINT2] = BasicType::UINT2;
+    map[AttributesConfig::Attribute::UINT4] = BasicType::UINT4;
+    map[AttributesConfig::Attribute::INT8] = BasicType::INT8;
+    map[AttributesConfig::Attribute::INT16] = BasicType::INT16;
+    map[AttributesConfig::Attribute::INT32] = BasicType::INT32;
+    map[AttributesConfig::Attribute::INT64] = BasicType::INT64;
+    map[AttributesConfig::Attribute::FLOAT] = BasicType::FLOAT;
+    map[AttributesConfig::Attribute::DOUBLE] = BasicType::DOUBLE;
+    map[AttributesConfig::Attribute::PREDICATE] = BasicType::PREDICATE;
+    map[AttributesConfig::Attribute::TENSOR] = BasicType::TENSOR;
+    map[AttributesConfig::Attribute::NONE] = BasicType::NONE;
+    return map;
+}
+
+CollectionTypeMap
+getCollectionTypeMap()
+{
+    CollectionTypeMap map;
+    map[AttributesConfig::Attribute::SINGLE] = CollectionType::SINGLE;
+    map[AttributesConfig::Attribute::ARRAY] = CollectionType::ARRAY;
+    map[AttributesConfig::Attribute::WEIGHTEDSET] = CollectionType::WSET;
+    return map;
+}
+
+static DataTypeMap _dataTypeMap = getDataTypeMap();
+static CollectionTypeMap _collectionTypeMap = getCollectionTypeMap();
+
+}
+
+namespace search {
+namespace attribute {
+
+Config
+ConfigConverter::convert(const AttributesConfig::Attribute & cfg)
+{
+    BasicType bType(_dataTypeMap[cfg.datatype]);
+    CollectionType cType(_collectionTypeMap[cfg.collectiontype]);
+    cType.removeIfZero(cfg.removeifzero);
+    cType.createIfNonExistant(cfg.createifnonexistent);
+    Config retval(bType, cType);
+    retval.setFastSearch(cfg.fastsearch);
+    retval.setHuge(cfg.huge);
+    retval.setEnableBitVectors(cfg.enablebitvectors);
+    retval.setEnableOnlyBitVector(cfg.enableonlybitvector);
+    retval.setIsFilter(cfg.enableonlybitvector);
+    retval.setFastAccess(cfg.fastaccess);
+    retval.setArity(cfg.arity);
+    retval.setBounds(cfg.lowerbound, cfg.upperbound);
+    retval.setDensePostingListThreshold(cfg.densepostinglistthreshold);
+    if (retval.basicType().type() == BasicType::Type::TENSOR) {
+        retval.setTensorType(TensorType::fromSpec(cfg.tensortype));
+    }
+    return retval;
+}
+
+
+} // namespace attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.h b/searchlib/src/vespa/searchlib/attribute/configconverter.h
new file mode 100644
index 00000000000..796c40c2eb1
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "attributevector.h"
+#include 
+
+namespace search {
+namespace attribute {
+
+/**
+ * Class used to convert from attributes config to the config used by the AttributeVector implementation.
+ **/
+class ConfigConverter {
+public:
+    static Config convert(const vespa::config::search::AttributesConfig::Attribute & cfg);
+};
+
+} // namespace attribute
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp
new file mode 100644
index 00000000000..1312fd2b331
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createarrayfastsearch.cpp
@@ -0,0 +1,69 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "defines.h"
+
+#include 
+LOG_SETUP(".createarrayfastsearch");
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTARRAY(T, I)   MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate), MULTIVALUE_ENUM_ARG(I) >
+#define FLOATARRAY(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate), MULTIVALUE_ENUM_ARG(I) >
+#define CREATEINTARRAY(T, H, fname, info) H ? static_cast(new INTARRAY(T, multivalue::Index64)(fname, info)) : static_cast(new INTARRAY(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATARRAY(T, H, fname, info) H ? static_cast(new FLOATARRAY(T, multivalue::Index64)(fname, info)) : static_cast(new FLOATARRAY(T, multivalue::Index32)(fname, info))
+
+AttributeVector::SP
+AttributeFactory::createArrayFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::ARRAY);
+    assert(info.fastSearch());
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+    case BasicType::UINT2:
+    case BasicType::UINT4:
+        break;
+    case BasicType::INT8:
+        ret.reset(info.huge() ? static_cast(new HugeFlagAttribute(baseFileName, info)) : static_cast(new FlagAttribute(baseFileName, info)));
+        break;
+    case BasicType::INT16:
+        ret.reset(CREATEINTARRAY(int16_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(CREATEINTARRAY(int32_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(CREATEINTARRAY(int64_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(CREATEFLOATARRAY(float, info.huge(), baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(CREATEFLOATARRAY(double, info.huge(), baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(info.huge() ? static_cast(new HugeArrayStringPostingAttribute(baseFileName, info)) : static_cast(new ArrayStringPostingAttribute(baseFileName, info)));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp b/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp
new file mode 100644
index 00000000000..9a1bb04cbac
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createarraystd.cpp
@@ -0,0 +1,63 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include "defines.h"
+
+#include 
+LOG_SETUP(".createarraystd");
+
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTARRAY(T, I)   MultiValueNumericAttribute< IntegerAttributeTemplate, MULTIVALUE_ARG(T, I) >
+#define FLOATARRAY(T, I) MultiValueNumericAttribute< FloatingPointAttributeTemplate, MULTIVALUE_ARG(T, I) >
+
+#define CREATEINTARRAY(T, H, fname, info) H ? static_cast(new INTARRAY(T, multivalue::Index64)(fname, info)) : static_cast(new INTARRAY(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATARRAY(T, H, fname, info) H ? static_cast(new FLOATARRAY(T, multivalue::Index64)(fname, info)) : static_cast(new FLOATARRAY(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createArrayStd(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::ARRAY);
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+    case BasicType::UINT2:
+    case BasicType::UINT4:
+        break;
+    case BasicType::INT8:
+        ret.reset(CREATEINTARRAY(int8_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT16:
+        ret.reset(CREATEINTARRAY(int16_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(CREATEINTARRAY(int32_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(CREATEINTARRAY(int64_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(CREATEFLOATARRAY(float, info.huge(), baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(CREATEFLOATARRAY(double, info.huge(), baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(info.huge() ? static_cast(new HugeArrayStringAttribute(baseFileName, info)) : static_cast(new ArrayStringAttribute(baseFileName, info)));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp
new file mode 100644
index 00000000000..0a15b252adc
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsetfastsearch.cpp
@@ -0,0 +1,71 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include 
+#include 
+#include "defines.h"
+
+#include 
+LOG_SETUP(".createsetfastsearch");
+
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTSET(T, I)   MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate), WEIGHTED_MULTIVALUE_ENUM_ARG(I) >
+#define FLOATSET(T, I) MultiValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate), WEIGHTED_MULTIVALUE_ENUM_ARG(I) >
+
+#define CREATEINTSET(T, H, fname, info) H ? static_cast(new INTSET(T, multivalue::Index64)(fname, info)) : static_cast(new INTSET(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATSET(T, H, fname, info) H ? static_cast(new FLOATSET(T, multivalue::Index64)(fname, info)) : static_cast(new FLOATSET(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createSetFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::WSET);
+    assert(info.fastSearch());
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+    case BasicType::UINT2:
+    case BasicType::UINT4:
+        break;
+    case BasicType::INT8:
+        ret.reset(CREATEINTSET(int8_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT16:
+        ret.reset(CREATEINTSET(int16_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(CREATEINTSET(int32_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(CREATEINTSET(int64_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(CREATEFLOATSET(float, info.huge(), baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(CREATEFLOATSET(double, info.huge(), baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(info.huge() ? static_cast(new HugeWeightedSetStringPostingAttribute(baseFileName, info)) : static_cast(new WeightedSetStringPostingAttribute(baseFileName, info)));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp b/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp
new file mode 100644
index 00000000000..b52593228b8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsetstd.cpp
@@ -0,0 +1,62 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include "defines.h"
+
+#include 
+LOG_SETUP(".createsetstd");
+
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using attribute::BasicType;
+
+#define INTSET(T, I)   MultiValueNumericAttribute< IntegerAttributeTemplate, WEIGHTED_MULTIVALUE_ARG(T, I) >
+#define FLOATSET(T, I) MultiValueNumericAttribute< FloatingPointAttributeTemplate, WEIGHTED_MULTIVALUE_ARG(T, I) >
+#define CREATEINTSET(T, H, fname, info) H ? static_cast(new INTSET(T, multivalue::Index64)(fname, info)) : static_cast(new INTSET(T, multivalue::Index32)(fname, info))
+#define CREATEFLOATSET(T, H, fname, info) H ? static_cast(new FLOATSET(T, multivalue::Index64)(fname, info)) : static_cast(new FLOATSET(T, multivalue::Index32)(fname, info))
+
+
+AttributeVector::SP
+AttributeFactory::createSetStd(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::WSET);
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+    case BasicType::UINT2:
+    case BasicType::UINT4:
+        break;
+    case BasicType::INT8:
+        ret.reset(CREATEINTSET(int8_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT16:
+        ret.reset(CREATEINTSET(int16_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(CREATEINTSET(int32_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(CREATEINTSET(int64_t, info.huge(), baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(CREATEFLOATSET(float, info.huge(), baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(CREATEFLOATSET(double, info.huge(), baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(info.huge() ? static_cast(new HugeWeightedSetStringAttribute(baseFileName, info)) : static_cast(new WeightedSetStringAttribute(baseFileName, info)));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp
new file mode 100644
index 00000000000..02890eb8789
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsinglefastsearch.cpp
@@ -0,0 +1,65 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+#include 
+#include 
+#include "defines.h"
+
+#include 
+LOG_SETUP(".createsinglefastsearch");
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define INTPOSTING(T)   SingleValueNumericPostingAttribute< ENUM_ATTRIBUTE(IntegerAttributeTemplate) >
+#define FLOATPOSTING(T) SingleValueNumericPostingAttribute< ENUM_ATTRIBUTE(FloatingPointAttributeTemplate) >
+
+namespace search {
+
+using attribute::BasicType;
+
+AttributeVector::SP
+AttributeFactory::createSingleFastSearch(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::SINGLE);
+    assert(info.fastSearch());
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+    case BasicType::UINT2:
+    case BasicType::UINT4:
+        break;
+    case BasicType::INT8:
+        ret.reset(new INTPOSTING(int8_t)(baseFileName, info));
+        break;
+    case BasicType::INT16:
+        ret.reset(new INTPOSTING(int16_t)(baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(new INTPOSTING(int32_t)(baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(new INTPOSTING(int64_t)(baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(new FLOATPOSTING(float)(baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(new FLOATPOSTING(double)(baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(new SingleValueStringPostingAttribute(baseFileName, info));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
new file mode 100644
index 00000000000..3d4ebaabc92
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/createsinglestd.cpp
@@ -0,0 +1,68 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include 
+
+#include 
+LOG_SETUP(".createsinglestd");
+
+#include "predicate_attribute.h"
+#include "singlesmallnumericattribute.h"
+#include 
+#include 
+#include 
+#include 
+
+namespace search {
+
+using attribute::BasicType;
+
+AttributeVector::SP
+AttributeFactory::createSingleStd(const vespalib::string & baseFileName, const Config & info)
+{
+    assert(info.collectionType().type() == attribute::CollectionType::SINGLE);
+    AttributeVector::SP ret;
+    switch(info.basicType().type()) {
+    case BasicType::UINT1:
+        ret.reset(new SingleValueBitNumericAttribute(baseFileName));
+        break;
+    case BasicType::UINT2:
+        ret.reset(new SingleValueSemiNibbleNumericAttribute(baseFileName));
+        break;
+    case BasicType::UINT4:
+        ret.reset(new SingleValueNibbleNumericAttribute(baseFileName));
+        break;
+    case BasicType::INT8:
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::INT16:
+        // XXX: Unneeded since we don't have short document fields in java.
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::INT32:
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::INT64:
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::FLOAT:
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::DOUBLE:
+        ret.reset(new SingleValueNumericAttribute >(baseFileName, info));
+        break;
+    case BasicType::STRING:
+        ret.reset(new SingleValueStringAttribute(baseFileName, info));
+        break;
+    case BasicType::PREDICATE:
+        ret.reset(new PredicateAttribute(baseFileName, info));
+        break;
+    case BasicType::TENSOR:
+        ret.reset(new attribute::TensorAttribute(baseFileName, info));
+        break;
+    default:
+        break;
+    }
+    return ret;
+}
+}  // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/defines.cpp b/searchlib/src/vespa/searchlib/attribute/defines.cpp
new file mode 100644
index 00000000000..cbe69920ac3
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/defines.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "defines.h"
+#include 
+
+LOG_SETUP(".searchlib.attribute.defines");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/defines.h b/searchlib/src/vespa/searchlib/attribute/defines.h
new file mode 100644
index 00000000000..355a841c0b9
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/defines.h
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#define ENUM_ATTRIBUTE(B) EnumAttribute
+
+#define MULTIVALUE_ARG(T, I) multivalue::MVMTemplateArg, I>
+#define MULTIVALUE_ENUM_ARG(I) multivalue::MVMTemplateArg, I>
+#define WEIGHTED_MULTIVALUE_ARG(T, I) multivalue::MVMTemplateArg, I>
+#define WEIGHTED_MULTIVALUE_ENUM_ARG(I) multivalue::MVMTemplateArg, I>
+
diff --git a/searchlib/src/vespa/searchlib/attribute/diversity.h b/searchlib/src/vespa/searchlib/attribute/diversity.h
new file mode 100644
index 00000000000..68440d05cea
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/diversity.h
@@ -0,0 +1,226 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include 
+#include "singleenumattribute.h"
+#include "singlenumericattribute.h"
+
+/**
+ * This file contains low-level code used to implement diversified
+ * limited attribute range searches. Terms on the form [;;100;foo;3]
+ * are used to specify unbound range searches in an attribute that
+ * produces a limited number of results while also ensuring
+ * diversified results based on a secondary attribute.
+ **/
+
+namespace search {
+namespace attribute {
+namespace diversity {
+
+template 
+class ForwardRange
+{
+private:
+    ITR _lower;
+    ITR _upper;
+public:
+    class Next {
+    private:
+        ITR &_lower;
+    public:
+        Next(const Next &) = delete;
+        explicit Next(ForwardRange &range) : _lower(range._lower) {}
+        const ITR &get() const { return _lower; }
+        ~Next() { ++_lower; }
+    };
+    ForwardRange(const ITR &lower, const ITR &upper) : _lower(lower), _upper(upper) {}
+    bool has_next() const { return _lower != _upper; }
+};
+
+template 
+class ReverseRange
+{
+private:
+    ITR _lower;
+    ITR _upper;
+public:
+    class Next {
+    private:
+        ITR &_upper;
+    public:
+        Next(const Next &) = delete;
+        explicit Next(ReverseRange &range) : _upper(range._upper) { --_upper; }
+        const ITR &get() const { return _upper; }
+    };
+    ReverseRange(const ITR &lower, const ITR &upper) : _lower(lower), _upper(upper) {}
+    bool has_next() const { return _lower != _upper; }
+};
+
+template 
+struct FetchNumberFast {
+    const T * const attr;
+    typedef typename T::LoadedValueType ValueType;
+    FetchNumberFast(const IAttributeVector &attr_in) : attr(dynamic_cast(&attr_in)) {}
+    ValueType get(uint32_t docid) const { return attr->getFast(docid); }
+    bool valid() const { return (attr != nullptr); }
+};
+
+struct FetchEnumFast {
+    const SingleValueEnumAttributeBase * const attr;
+    typedef uint32_t ValueType;
+    FetchEnumFast(const IAttributeVector &attr_in) : attr(dynamic_cast(&attr_in)) {}
+    ValueType get(uint32_t docid) const { return attr->getE(docid); }
+    bool valid() const { return (attr != nullptr); }
+};
+
+struct FetchEnum {
+    const IAttributeVector &attr;
+    typedef uint32_t ValueType;
+    FetchEnum(const IAttributeVector &attr_in) : attr(attr_in) {}
+    ValueType get(uint32_t docid) const { return attr.getEnum(docid); }
+};
+
+struct FetchInteger {
+    const IAttributeVector &attr;
+    typedef int64_t ValueType;
+    FetchInteger(const IAttributeVector &attr_in) : attr(attr_in) {}
+    ValueType get(uint32_t docid) const { return attr.getInt(docid); }
+};
+
+struct FetchFloat {
+    const IAttributeVector &attr;
+    typedef double ValueType;
+    FetchFloat(const IAttributeVector &attr_in) : attr(attr_in) {}
+    ValueType get(uint32_t docid) const { return attr.getFloat(docid); }
+};
+
+template 
+class DiversityFilter {
+private:
+    size_t _total_count;
+    size_t _max_total;
+    const Fetcher &_diversity;
+    size_t _max_per_group;
+    size_t _cutoff_max_groups;
+    bool   _cutoff_strict;
+
+    typedef vespalib::hash_map Diversity;
+    Diversity _seen;
+    Result &_result;
+public:
+    DiversityFilter(const Fetcher &diversity, size_t max_per_group,
+                    size_t cutoff_max_groups, bool cutoff_strict,
+                    Result &result, size_t max_total)
+        : _total_count(0), _max_total(max_total), _diversity(diversity), _max_per_group(max_per_group),
+          _cutoff_max_groups(cutoff_max_groups), _cutoff_strict(cutoff_strict), _seen(std::min(cutoff_max_groups, 10000ul)*3), _result(result) {}
+    template 
+    void push_back(Item item) {
+        if (_total_count < _max_total) {
+            if ((_seen.size() < _cutoff_max_groups) || _cutoff_strict) {
+                typename Fetcher::ValueType group = _diversity.get(item._key);
+                if (_seen.size() < _cutoff_max_groups) {
+                    conditional_add(_seen[group], item);
+                } else {
+                    auto found = _seen.find(group);
+                    if (found == _seen.end()) {
+                        add(item);
+                    } else {
+                        conditional_add(found->second, item);
+                    }
+                }
+            } else if ( !_cutoff_strict) {
+                add(item);
+            }
+        }
+    }
+private:
+    template 
+    void add(Item item) {
+        ++_total_count;
+        _result.push_back(item);
+    }
+    template 
+    void conditional_add(uint32_t & group_count, Item item) {
+        if (group_count  < _max_per_group) {
+            ++group_count;
+            add(item);
+        }
+    }
+};
+
+template 
+void diversify_3(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
+                 const Fetcher &diversity, size_t max_per_group,
+                 size_t cutoff_max_groups, bool cutoff_strict,
+                 Result &result, std::vector &fragments)
+{
+    DictRange range(range_in);
+    using DataType = typename PostingStore::DataType;
+    using KeyDataType = typename PostingStore::KeyDataType;
+    DiversityFilter filter(diversity, max_per_group, cutoff_max_groups, cutoff_strict, result, wanted_hits);
+    while (range.has_next() && (result.size() < wanted_hits)) {
+        typename DictRange::Next dict_entry(range);
+        posting.foreach_frozen(dict_entry.get().getData(),
+                               [&](uint32_t key, const DataType &data)
+                               { filter.push_back(KeyDataType(key, data)); });
+        if (fragments.back() < result.size()) {
+            fragments.push_back(result.size());
+        }
+    }
+}
+
+template 
+void diversify_2(const DictRange &range_in, const PostingStore &posting, size_t wanted_hits,
+                 const IAttributeVector &diversity_attr, size_t max_per_group,
+                 size_t cutoff_max_groups, bool cutoff_strict,
+                 Result &result, std::vector &fragments)
+{
+    if (diversity_attr.hasEnum()) { // must handle enum first
+        FetchEnumFast fastEnum(diversity_attr);
+        if (fastEnum.valid()) {
+            diversify_3(range_in, posting, wanted_hits, fastEnum, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        } else {
+            diversify_3(range_in, posting, wanted_hits, FetchEnum(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        }
+    } else if (diversity_attr.isIntegerType()) {
+        FetchNumberFast > > fastInt32(diversity_attr);
+        FetchNumberFast > > fastInt64(diversity_attr);
+        if (fastInt32.valid()) {
+            diversify_3(range_in, posting, wanted_hits, fastInt32, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        } else if (fastInt64.valid()) {
+            diversify_3(range_in, posting, wanted_hits, fastInt64, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        } else {
+            diversify_3(range_in, posting, wanted_hits, FetchInteger(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        }
+    } else if (diversity_attr.isFloatingPointType()) {
+        FetchNumberFast > > fastFloat(diversity_attr);
+        FetchNumberFast > > fastDouble(diversity_attr);
+        if (fastFloat.valid()) {
+            diversify_3(range_in, posting, wanted_hits, fastFloat, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        } else if (fastDouble.valid()) {
+            diversify_3(range_in, posting, wanted_hits, fastDouble, max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        } else {
+            diversify_3(range_in, posting, wanted_hits, FetchFloat(diversity_attr), max_per_group, cutoff_max_groups, cutoff_strict, result, fragments);
+        }
+    }
+}
+
+template 
+void diversify(bool forward, const DictItr &lower, const DictItr &upper, const PostingStore &posting, size_t wanted_hits,
+               const IAttributeVector &diversity_attr, size_t max_per_group,
+               size_t cutoff_max_groups, bool cutoff_strict,
+               Result &array, std::vector &fragments)
+{
+    if (forward) {
+        diversify_2(ForwardRange(lower, upper), posting, wanted_hits,
+                    diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+    } else {
+        diversify_2(ReverseRange(lower, upper), posting, wanted_hits,
+                    diversity_attr, max_per_group, cutoff_max_groups, cutoff_strict, array, fragments);
+    }
+}
+
+} // namespace search::attribute::diversity
+} // namespace search::attribute
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp b/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp
new file mode 100644
index 00000000000..f7429070f34
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/dociditerator.cpp
@@ -0,0 +1,11 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include 
+#include "dociditerator.h"
+#include 
+
+LOG_SETUP(".searchlib.attribute.dociditerator");
+namespace search {
+
+} // namespace search
+
diff --git a/searchlib/src/vespa/searchlib/attribute/dociditerator.h b/searchlib/src/vespa/searchlib/attribute/dociditerator.h
new file mode 100644
index 00000000000..c1d33d5769e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/dociditerator.h
@@ -0,0 +1,105 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "postinglisttraits.h"
+
+namespace search
+{
+
+/**
+ * Inner attribute iterator used for temporary posting lists (range
+ * searches).
+ */
+
+template 
+class DocIdIterator
+{
+public:
+    DocIdIterator() : _cur(nullptr), _end(nullptr), _begin(nullptr) { }
+
+    const P * operator->() const { return _cur; }
+
+    DocIdIterator & operator++() {
+        ++_cur;
+        return *this;
+    }
+
+    bool valid() const { return _cur != _end; }
+
+    void linearSeek(uint32_t docId) {
+        while (_cur != _end && _cur->_key < docId) {
+            ++_cur;
+        }
+    }
+
+    uint32_t getKey(void) const { return _cur->_key; }
+    inline int32_t getData(void) const { return _cur->getData(); }
+
+    void set(const P *begin, const P *end) {
+        _cur = begin;
+        _end = end;
+        _begin = begin;
+    }
+
+    void lower_bound(uint32_t docId) {
+        if (valid() && (docId > getKey())) {
+            linearSeek(docId);
+        } else {
+            _cur = _begin;
+            linearSeek(docId);
+        }
+    }
+
+    void swap(DocIdIterator &rhs) {
+        std::swap(_cur, rhs._cur);
+        std::swap(_end, rhs._end);
+        std::swap(_begin, rhs._begin);
+    }
+protected:
+    const P *_cur;
+    const P *_end;
+    const P *_begin;
+};
+
+template <>
+inline int32_t
+DocIdIterator::getData(void) const
+{
+    return 1;	// default weight 1 for single value attributes
+}
+
+
+/**
+ * Inner attribute iterator used for short posting lists (8 or less
+ * documents).
+ */
+
+template 
+class DocIdMinMaxIterator : public DocIdIterator

+{ +public: + DocIdMinMaxIterator(void) + : DocIdIterator

() + { } + + inline btree::MinMaxAggregated + getAggregated(void) const { + return btree::MinMaxAggregated(1, 1); + } +}; + + +template<> +inline btree::MinMaxAggregated +DocIdMinMaxIterator::getAggregated(void) const +{ + btree::MinMaxAggregated a; + for (const AttributeWeightPosting *cur = _cur, *end = _end; cur != end; ++cur) { + a.add(cur->getData()); + } + return a; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp new file mode 100644 index 00000000000..396c50eba65 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumattribute.h" +#include "enumattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.enumattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.h b/searchlib/src/vespa/searchlib/attribute/enumattribute.h new file mode 100644 index 00000000000..75d9e44b43f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.h @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "attributevector.h" +#include "loadedenumvalue.h" +#include + +namespace search { + +namespace attribute +{ + +template class PostingSearchContext; + +} + +template +class EnumAttribute : public B +{ + template + friend class attribute::PostingSearchContext; // getEnumStore() +protected: + typedef B BaseClass; + typedef typename B::DocId DocId; + typedef typename B::EnumHandle EnumHandle; + typedef typename B::EnumEntryType EnumEntryType; // Template argument for enum store + typedef typename B::EnumEntryType::Type EnumType; // Type stored in enum store (integer, float, string) + typedef typename B::Change Change; + typedef typename B::Change::DataType ChangeDataType; + typedef typename B::ChangeVector ChangeVector; + typedef typename B::ChangeVector::const_iterator ChangeVectorIterator; + typedef typename B::EnumModifier EnumModifier; + typedef typename B::ValueModifier ValueModifier; +public: + typedef typename B::LoadedVector LoadedVector; + typedef typename B::EnumIndexVector EnumIndexVector; + typedef typename B::EnumVector EnumVector; + typedef typename B::LoadedValueType LoadedValueType; +protected: + typedef typename B::generation_t generation_t; + typedef std::set UniqueSet; + typedef attribute::LoadedEnumAttributeVector + LoadedEnumAttributeVector; + using B::getGenerationHolder; + using B::getStatus; + +public: + typedef EnumStoreT EnumStore; +protected: + typedef EnumStoreBase::Index EnumIndex; + + EnumStore _enumStore; + + EnumStore & getEnumStore() { return _enumStore; } + const EnumStore & getEnumStore() const { return _enumStore; } + + virtual const EnumStoreBase * getEnumStoreBase() const { return &_enumStore; } + virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const { _enumStore.getEnumValue(v, e, sz); } + virtual EnumType getFromEnum(EnumHandle e) const { return _enumStore.getValue(e); } + + virtual void fillPostings(LoadedVector & loaded) { (void) loaded; } + virtual void fillEnum(LoadedVector & loaded); + + virtual void + fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs); + + virtual void + fixupEnumRefCounts(const EnumVector &enumHist); + + virtual uint64_t + getUniqueValueCount(void) const; + + static EnumType getDefaultEnumTypeValue() { return B::defaultValue(); } + + /* + * Iterate through the change vector and find new unique values. + * Perform compaction if necessary and insert the new unique values into the EnumStore. + */ + void insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes); + virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques) = 0; + virtual void reEnumerate() = 0; + virtual bool hasEnum2Value() const { return true; } + virtual AddressSpace getEnumStoreAddressSpaceUsage() const override; + +public: + EnumAttribute(const vespalib::string & baseFileName, + const AttributeVector::Config & cfg); + + virtual ~EnumAttribute(); + virtual bool findEnum(EnumType v, EnumHandle & e) const { return _enumStore.findEnum(v, e); } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp new file mode 100644 index 00000000000..7159dc42860 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumattribute.hpp @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +template +EnumAttribute:: +EnumAttribute(const vespalib::string &baseFileName, + const AttributeVector::Config &cfg) + : B(baseFileName, cfg), + _enumStore(0, cfg.fastSearch()) +{ + this->setEnum(true); +} + +template +EnumAttribute::~EnumAttribute() +{ +} + +template +void EnumAttribute::fillEnum(LoadedVector & loaded) +{ + typename EnumStore::Builder builder; + if (!loaded.empty()) { + typename LoadedVector::Type v = loaded.read(); + LoadedValueType prev = v.getValue(); + uint32_t prevRefCount(0); + EnumIndex index = builder.insert(v.getValue(), v._pidx.ref()); + for(size_t i(0), m(loaded.size()); i < m; ++i, loaded.next()) { + v = loaded.read(); + if (EnumStore::ComparatorType::compare(prev, v.getValue()) != 0) { + builder.updateRefCount(prevRefCount); + index = builder.insert(v.getValue(), v._pidx.ref()); + prev = v.getValue(); + prevRefCount = 1; + } else { + prevRefCount++; + } + v.setEidx(index); + loaded.write(v); + } + builder.updateRefCount(prevRefCount); + } + _enumStore.reset(builder); + this->setEnumMax(_enumStore.getLastEnum()); +} + + +template +void +EnumAttribute::fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs) +{ + ssize_t sz = _enumStore.deserialize(src, srcLen, eidxs); + assert(static_cast(sz) == srcLen); + (void) sz; + this->setEnumMax(_enumStore.getLastEnum()); +} + + +template +void +EnumAttribute::fixupEnumRefCounts( + const EnumVector &enumHist) +{ + _enumStore.fixupRefCounts(enumHist); +} + + +template +uint64_t +EnumAttribute::getUniqueValueCount(void) const +{ + return _enumStore.getNumUniques(); +} + + + +template +void +EnumAttribute::insertNewUniqueValues(EnumStoreBase::IndexVector & newIndexes) +{ + UniqueSet newUniques; + + // find new unique strings + for (const auto & data : this->_changes) { + considerAttributeChange(data, newUniques); + } + + uint64_t extraBytesNeeded = 0; + for (const auto & data : newUniques) { + extraBytesNeeded += _enumStore.getEntrySize(data.raw()); + } + + do { + // perform compaction on EnumStore if necessary + if (extraBytesNeeded > this->_enumStore.getRemaining() || + this->_enumStore.getPendingCompact()) { + this->_enumStore.clearPendingCompact(); + this->removeAllOldGenerations(); + if (!this->_enumStore.performCompaction(extraBytesNeeded)) { + // fallback to resize strategy + this->_enumStore.fallbackResize(extraBytesNeeded); + if (extraBytesNeeded > this->_enumStore.getRemaining()) { + fprintf(stderr, "Cannot fallbackResize enumStore\n"); + abort(); + } + break; // fallback resize performed instead of compaction. + } + + // update underlying structure with new EnumIndex values. + reEnumerate(); + // Clear scratch enumeration + for (auto & data : this->_changes) { + data._enumScratchPad = ChangeBase::UNSET_ENUM; + } + + // clear mapping from old enum value to new index + _enumStore.clearIndexMap(); + } + } while (0); + + // insert new unique values in EnumStore + for (const auto & data : newUniques) { + EnumIndex idx; + _enumStore.addEnum(data.raw(), idx); + newIndexes.push_back(idx); + } +} + + +template +AddressSpace +EnumAttribute::getEnumStoreAddressSpaceUsage() const +{ + return _enumStore.getAddressSpaceUsage(); +} + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp new file mode 100644 index 00000000000..37c66213134 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumattributesaver.h" +#include "iattributesavetarget.h" +#include + +namespace search { + +EnumAttributeSaver:: +EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate) + : _enumStore(enumStore), + _disableReEnumerate(disableReEnumerate), + _rootRef() +{ + if (_disableReEnumerate) { + // Prevent enum store from re-enumerating enum values during compaction + _enumStore.disableReEnumerate(); + } + const EnumStoreDictBase &enumDict = enumStore.getEnumStoreDict(); + _rootRef = enumDict.getFrozenRootRef(); +} + +EnumAttributeSaver::~EnumAttributeSaver() +{ + enableReEnumerate(); +} + +void +EnumAttributeSaver::enableReEnumerate() +{ + if (_disableReEnumerate) { + // compaction of enumstore can now re-enumerate enum values + _enumStore.enableReEnumerate(); + _disableReEnumerate = false; + } +} + +void +EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget) +{ + if (saveTarget.getEnumerated()) { + std::unique_ptr + udatWriter(saveTarget.udatWriter().allocBufferWriter()); + const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict(); + enumDict.writeAllValues(*udatWriter, _rootRef); + udatWriter->flush(); + } +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h new file mode 100644 index 00000000000..c30a416c9fe --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "enumstorebase.h" + +namespace search { + +class IAttributeSaveTarget; + +/* + * Helper class for saving an enumerated multivalue attribute. + * + * It handles writing to the udat file. + */ +class EnumAttributeSaver +{ + const EnumStoreBase &_enumStore; + bool _disableReEnumerate; + btree::BTreeNode::Ref _rootRef; + +public: + EnumAttributeSaver(const EnumStoreBase &enumStore, bool disableReEnumerate); + + ~EnumAttributeSaver(); + + void enableReEnumerate(); + + void writeUdat(IAttributeSaveTarget &saveTarget); + + const EnumStoreBase &getEnumStore() const { return _enumStore; } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp new file mode 100644 index 00000000000..fde5666371b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumcomparator.h" +#include +#include "enumstore.hpp" + +namespace search { + +namespace { + +FoldedStringCompare _strCmp; + +} + +template <> +int +EnumStoreComparatorT >::compare(EntryValue lhs, EntryValue rhs) +{ + return FloatingPointCompareHelper::compare(lhs, rhs); +} + +template <> +int +EnumStoreComparatorT >::compare(EntryValue lhs, EntryValue rhs) +{ + return FloatingPointCompareHelper::compare(lhs, rhs); +} + +template <> +EnumStoreFoldedComparatorT:: +EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, + EntryValue value, bool prefix) + : ParentType(enumStore, value), + _prefix(prefix), + _prefixLen(0u) +{ + if (getUsePrefix()) + _prefixLen = _strCmp.size(value); +} + +template <> +int +EnumStoreComparatorT::compare(EntryValue lhs, EntryValue rhs) +{ + return _strCmp.compare(lhs, rhs); +} + +template <> +int +EnumStoreFoldedComparatorT::compareFolded(EntryValue lhs, + EntryValue rhs) +{ + return _strCmp.compareFolded(lhs, rhs); +} + +template <> +int +EnumStoreFoldedComparatorT:: +compareFoldedPrefix(EntryValue lhs, + EntryValue rhs, + size_t prefixLen) +{ + return _strCmp.compareFoldedPrefix(lhs, rhs, prefixLen); +} + +template class EnumStoreComparatorT; +template class EnumStoreComparatorT >; +template class EnumStoreComparatorT >; +template class EnumStoreComparatorT >; +template class EnumStoreComparatorT >; +template class EnumStoreComparatorT >; +template class EnumStoreComparatorT >; +template class EnumStoreFoldedComparatorT; +template class EnumStoreFoldedComparatorT >; +template class EnumStoreFoldedComparatorT >; +template class EnumStoreFoldedComparatorT >; +template class EnumStoreFoldedComparatorT >; +template class EnumStoreFoldedComparatorT >; +template class EnumStoreFoldedComparatorT >; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h new file mode 100644 index 00000000000..1a6dcb4c4e1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h @@ -0,0 +1,195 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "enumstore.h" + +namespace search { + +/** + * Template comparator class for the various entry types. + **/ +template +class EnumStoreComparatorT : public EnumStoreComparator { +public: + typedef EnumStoreT EnumStoreType; +protected: + typedef typename EntryType::Type EntryValue; + const EnumStoreType & _enumStore; + EntryValue _value; + EntryValue getValue(const EnumIndex & idx) const { + if (idx.valid()) { + return _enumStore.getValue(idx); + } + return _value; + } +public: + /** + * Creates a comparator using the given enum store. + **/ + EnumStoreComparatorT(const EnumStoreType & enumStore); + /** + * Creates a comparator using the given enum store and that uses the + * given value during compare if the enum index is invalid. + **/ + EnumStoreComparatorT(const EnumStoreType & enumStore, + EntryValue value); + + static int compare(EntryValue lhs, EntryValue rhs) { + if (lhs < rhs) { + return -1; + } else if (lhs == rhs) { + return 0; + } + return 1; + } + virtual bool operator() (const EnumIndex & lhs, const EnumIndex & rhs) const { + return compare(getValue(lhs), getValue(rhs)) < 0; + } +}; + + +/** + * Template comparator class for the various entry types that uses folded compare. + **/ +template +class EnumStoreFoldedComparatorT : public EnumStoreComparatorT { +private: + typedef EnumStoreComparatorT ParentType; + typedef typename ParentType::EnumStoreType EnumStoreType; + typedef typename ParentType::EnumIndex EnumIndex; + typedef typename ParentType::EntryValue EntryValue; + using ParentType::getValue; + bool _prefix; + size_t _prefixLen; +public: + /** + * Creates a comparator using the given enum store. + * @param prefix whether we should perform prefix compare. + **/ + EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, bool prefix = false); + /** + * Creates a comparator using the given enum store and that uses the + * given value during compare if the enum index is invalid. + * @param prefix whether we should perform prefix compare. + **/ + EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, + EntryValue value, bool prefix = false); + + inline bool + getUsePrefix(void) const + { + return false; + } + + static int + compareFolded(EntryValue lhs, EntryValue rhs) + { + return ParentType::compare(lhs, rhs); + } + + static int + compareFoldedPrefix(EntryValue lhs, EntryValue rhs, size_t prefixLen) + { + (void) prefixLen; + return ParentType::compare(lhs, rhs); + } + + virtual bool + operator() (const EnumIndex & lhs, const EnumIndex & rhs) const + { + if (getUsePrefix()) + return compareFoldedPrefix(getValue(lhs), + getValue(rhs), _prefixLen) < 0; + return compareFolded(getValue(lhs), getValue(rhs)) < 0; + } +}; + + +template +EnumStoreComparatorT::EnumStoreComparatorT(const EnumStoreType & enumStore) : + _enumStore(enumStore), + _value() +{ +} + +template +EnumStoreComparatorT::EnumStoreComparatorT(const EnumStoreType & enumStore, + EntryValue value) : + _enumStore(enumStore), + _value(value) +{ +} + +template <> +int +EnumStoreComparatorT >::compare(EntryValue lhs, EntryValue rhs); + +template <> +int +EnumStoreComparatorT >::compare(EntryValue lhs, EntryValue rhs); + +template <> +int +EnumStoreComparatorT::compare(EntryValue lhs, EntryValue rhs); + + +template +EnumStoreFoldedComparatorT:: +EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, bool prefix) + : ParentType(enumStore), + _prefix(prefix), + _prefixLen(0u) +{ +} + +template +EnumStoreFoldedComparatorT:: +EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, + EntryValue value, bool prefix) + : ParentType(enumStore, value), + _prefix(prefix), + _prefixLen(0u) +{ +} + +template <> +EnumStoreFoldedComparatorT:: +EnumStoreFoldedComparatorT(const EnumStoreType & enumStore, + EntryValue value, bool prefix); + +template <> +int +EnumStoreFoldedComparatorT::compareFolded(EntryValue lhs, + EntryValue rhs); + +template <> +int +EnumStoreFoldedComparatorT:: +compareFoldedPrefix(EntryValue lhs, EntryValue rhs, size_t prefixLen); + +template <> +inline bool +EnumStoreFoldedComparatorT::getUsePrefix(void) const +{ + return _prefix; +} + + +extern template class EnumStoreComparatorT; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreComparatorT >; +extern template class EnumStoreFoldedComparatorT; +extern template class EnumStoreFoldedComparatorT >; +extern template class EnumStoreFoldedComparatorT >; +extern template class EnumStoreFoldedComparatorT >; +extern template class EnumStoreFoldedComparatorT >; +extern template class EnumStoreFoldedComparatorT >; +extern template class EnumStoreFoldedComparatorT >; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp new file mode 100644 index 00000000000..3b01d02b166 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumhintsearchcontext.h" +#include + +namespace search +{ + +using queryeval::SearchIterator; + +namespace attribute +{ + +using btree::BTreeNode; +using fef::TermFieldMatchData; + +EnumHintSearchContext:: +EnumHintSearchContext(const EnumStoreDictBase &dictionary, + uint32_t docIdLimit, + uint64_t numValues) + : _dictionary(dictionary), + _frozenRootRef(dictionary.getFrozenRootRef()), + _uniqueValues(0u), + _docIdLimit(docIdLimit), + _numValues(numValues) +{ +} + + +EnumHintSearchContext::~EnumHintSearchContext(void) +{ +} + + +void +EnumHintSearchContext::lookupTerm(const EnumStoreComparator &comp) +{ + _uniqueValues = _dictionary.lookupFrozenTerm(_frozenRootRef, comp); +} + + +void +EnumHintSearchContext::lookupRange(const EnumStoreComparator &low, + const EnumStoreComparator &high) +{ + _uniqueValues = _dictionary.lookupFrozenRange(_frozenRootRef, low, high); +} + +void +EnumHintSearchContext::fetchPostings(bool strict) +{ + (void) strict; +} + +SearchIterator::UP +EnumHintSearchContext::createPostingIterator(TermFieldMatchData *matchData, + bool strict) +{ + (void) matchData; + (void) strict; + + return (_uniqueValues == 0u) + ? SearchIterator::UP(new queryeval::EmptySearch()) + : SearchIterator::UP(); +} + + +unsigned int +EnumHintSearchContext::approximateHits(void) const +{ + return (_uniqueValues == 0u) + ? 0u + : std::max(uint64_t(_docIdLimit), _numValues); +} + +} // namespace attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h new file mode 100644 index 00000000000..b77db84b520 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "enumstore.h" +#include "postinglisttraits.h" +#include "ipostinglistsearchcontext.h" +#include + +namespace search +{ + +namespace attribute +{ + +/** + * Search context helper for enumerated attributes, used to eliminate + * searches for values that are not present at all. + */ + +class EnumHintSearchContext : public IPostingListSearchContext +{ + const EnumStoreDictBase &_dictionary; + const btree::BTreeNode::Ref _frozenRootRef; + uint32_t _uniqueValues; + uint32_t _docIdLimit; + uint64_t _numValues; // attr.getStatus().getNumValues(); + +protected: + EnumHintSearchContext(const EnumStoreDictBase &dictionary, + uint32_t docIdLimit, + uint64_t numValues); + ~EnumHintSearchContext(void); + + void lookupTerm(const EnumStoreComparator &comp); + void lookupRange(const EnumStoreComparator &low, const EnumStoreComparator &high); + + queryeval::SearchIterator::UP + createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override; + + void fetchPostings(bool strict) override; + unsigned int approximateHits(void) const override; +}; + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp new file mode 100644 index 00000000000..10a8b46ce80 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp @@ -0,0 +1,361 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumstore.h" +#include "enumstore.hpp" +#include + +namespace search { + +template <> +void +EnumStoreT:: +insertEntryValue(char * dst, Type value) +{ + strcpy(dst, value); +} + +template <> +void +EnumStoreT:: +printEntry(vespalib::asciistream & os, const Entry & e) const +{ + os << "Entry: {"; + os << "enum: " << e.getEnum(); + os << ", refcount: " << e.getRefCount(); + os << ", value: " << vespalib::string(e.getValue()); + os << "}"; +} + + +template <> +void +EnumStoreT >:: +printEntry(vespalib::asciistream & os, const Entry & e) const +{ + os << "Entry: {"; + os << "enum: " << e.getEnum(); + os << ", refcount: " << e.getRefCount(); + os << ", value: " << e.getValue(); + union + { + unsigned int _asInt; + float _asFloat; + } u; + u._asFloat = e.getValue(); + os << ", bvalue: 0x" << std::hex << u._asInt; + os << "}"; +} + + +template <> +void +EnumStoreT >:: +printEntry(vespalib::asciistream & os, const Entry & e) const +{ + os << "Entry: {"; + os << "enum: " << e.getEnum(); + os << ", refcount: " << e.getRefCount(); + os << ", value: " << e.getValue(); + union + { + unsigned long _asLong; + double _asDouble; + } u; + u._asDouble = e.getValue(); + os << ", bvalue: 0x" << std::hex << u._asLong; + os << "}"; +} + + +template <> +void +EnumStoreT::printValue(vespalib::asciistream & os, Index idx) const +{ + os << vespalib::string(getValue(idx)); +} + +template <> +void +EnumStoreT::printValue(vespalib::asciistream & os, Type value) const +{ + os << vespalib::string(value); +} + + +template <> +void +EnumStoreT::writeValues(BufferWriter &writer, + const Index *idxs, + size_t count) const +{ + for (uint32_t i = 0; i < count; ++i) { + Index idx = idxs[i]; + const char *src(_store.getBufferEntry(idx.bufferId(), + idx.offset()) + + EntryBase::size()); + size_t sz = strlen(src) + 1; + writer.write(src, sz); + } +} + + +template <> +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + size_t &initSpace) +{ + size_t slen = strlen(static_cast(src)); + size_t sz(StringEntryType::fixedSize() + slen); + if (available < sz) + return -1; + uint32_t entrySize(alignEntrySize(EntryBase::size() + sz)); + initSpace += entrySize; + return sz; +} + + +template <> +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + Index &idx) +{ + size_t slen = strlen(static_cast(src)); + size_t sz(StringEntryType::fixedSize() + slen); + if (available < sz) + return -1; + uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID); + btree::BufferState & buffer = _store.getBufferState(activeBufferId); + uint32_t entrySize(alignEntrySize(EntryBase::size() + sz)); + if (buffer.remaining() < entrySize) { + fprintf(stderr, "Out of enumstore bufferspace\n"); + abort(); // not enough space + } + uint64_t offset = buffer.size(); + char *dst(_store.getBufferEntry(activeBufferId, offset)); + memcpy(dst, &_nextEnum, sizeof(uint32_t)); + uint32_t pos = sizeof(uint32_t); + uint32_t refCount(0); + memcpy(dst + pos, &refCount, sizeof(uint32_t)); + pos += sizeof(uint32_t); + memcpy(dst + pos, src, sz); + buffer.pushed_back(entrySize); + ++_nextEnum; + + if (idx.valid()) { + assert(ComparatorType::compare(getValue(idx), + Entry(dst).getValue()) < 0); + } + idx = Index(offset, activeBufferId); + return sz; +} + + +template +class btree::BTreeNodeDataWrap; + +template +class btree::BTreeNodeDataWrap; + +#if 0 +template +class btree::BTreeKeyData; + +template +class btree::BTreeKeyData; +#endif + +template +class btree::BTreeNodeT; + +#if 0 +template +class btree::BTreeNodeT; +#endif + +template +class btree::BTreeNodeTT; + +template +class btree::BTreeNodeTT; + +#if 0 +template +class btree::BTreeNodeTT; +#endif + +template +class btree::BTreeInternalNode; + +template +class btree::BTreeLeafNode; + +template +class btree::BTreeLeafNode; + +template +class btree::BTreeLeafNodeTemp; + +template +class btree::BTreeLeafNodeTemp; + +template +class btree::BTreeNodeStore; + +template +class btree::BTreeNodeStore; + +template +class btree::BTreeIteratorBase; +template +class btree::BTreeIteratorBase; +template +class btree::BTreeIterator; +template +class btree::BTreeIterator; +template +class btree::BTree; +template +class btree::BTree; +template +class btree::BTreeRoot; + +template +class btree::BTreeRoot; +template +class btree::BTreeRootT; + +template +class btree::BTreeRootT; +template +class btree::BTreeRootBase; + +template +class btree::BTreeRootBase; + +template +class btree::BTreeNodeAllocator; + +template +class btree::BTreeNodeAllocator; + +template +class btree::BTreeBuilder; + +template +class btree::BTreeBuilder; + +template class EnumStoreT< StringEntryType >; +template class EnumStoreT >; +template class EnumStoreT >; +template class EnumStoreT >; +template class EnumStoreT >; +template class EnumStoreT >; +template class EnumStoreT >; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h new file mode 100644 index 00000000000..0f0675248a7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -0,0 +1,501 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "enumstorebase.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +template class EnumStoreComparatorT; +template class EnumStoreFoldedComparatorT; + +/** + * Class representing a numeric entry type in a enum store. + * Used as template argument for EnumStoreT. + **/ + +template +class NumericEntryType { +public: + typedef T Type; + static uint32_t size(Type) { return fixedSize(); } + static uint32_t fixedSize() { return sizeof(T); } + + static bool + hasFold(void) + { + return false; + } +}; + +/** + * Class representing a string entry type in a enum store. + * Used as template argument for EnumStoreT. + **/ +class StringEntryType { +public: + typedef const char * Type; + static uint32_t size(Type value) { return strlen(value) + fixedSize(); } + static uint32_t fixedSize() { return 1; } + + static bool + hasFold(void) + { + return true; + } +}; + + +/** + * Used to determine the ordering between two floating point values that can be NAN. + **/ +struct FloatingPointCompareHelper +{ + template + static int compare(T a, T b) { + if (std::isnan(a) && std::isnan(b)) { + return 0; + } else if (std::isnan(a)) { + return -1; + } else if (std::isnan(b)) { + return 1; + } else if (a < b) { + return -1; + } else if (a == b) { + return 0; + } + return 1; + } +}; + + +//----------------------------------------------------------------------------- +// EnumStoreT +//----------------------------------------------------------------------------- +template +class EnumStoreT : public EnumStoreBase +{ + friend class EnumStoreTest; +public: + typedef typename EntryType::Type Type; + typedef EnumStoreComparatorT ComparatorType; + typedef EnumStoreFoldedComparatorT FoldedComparatorType; + using EnumStoreBase::deserialize; + using EnumStoreBase::fixupRefCounts; + using EnumStoreBase::reset; + + class Entry : public EntryBase { + public: + Entry(void * data) : EntryBase(data) {} + Type getValue() const; + static uint32_t fixedSize() { return EntryBase::size() + EntryType::fixedSize(); } + }; + static void insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value); + +private: + EnumStoreT(const EnumStoreT & rhs) = delete; + EnumStoreT & operator=(const EnumStoreT & rhs) = delete; + + static void insertEntryValue(char * dst, Type value) { + memcpy(dst, &value, sizeof(Type)); + } + +protected: + typedef EnumStoreBase::IndexSet IndexSet; + using EnumStoreBase::_store; + using EnumStoreBase::TYPE_ID; + + Entry getEntry(Index idx) const { + return Entry(const_cast(_store).getBufferEntry(idx.bufferId(), idx.offset())); + } + void printEntry(vespalib::asciistream & os, const Entry & e) const; + + virtual void + freeUnusedEnum(Index idx, IndexSet & unused); + +public: + EnumStoreT(uint64_t initBufferSize, bool hasPostings) + : EnumStoreBase(initBufferSize, hasPostings) + { + } + + bool getValue(Index idx, Type & value) const; + Type getValue(uint32_t idx) const { return getValue(Index(btree::EntryRef(idx))); } + Type getValue(Index idx) const { return getEntry(idx).getValue(); } + virtual uint32_t getFixedSize() const { return Entry::fixedSize(); } + + static uint32_t + getEntrySize(Type value) + { + return alignEntrySize(EntryBase::size() + EntryType::size(value)); + } + void printBuffer(vespalib::asciistream & os, uint32_t bufferIdx) const; + void printValue(vespalib::asciistream & os, Index idx) const; + void printValue(vespalib::asciistream & os, Type value) const; + + class Builder { + public: + struct UniqueEntry { + UniqueEntry(const Type & val, size_t sz, uint32_t pidx = 0) : _value(val), _sz(sz), _pidx(pidx), _refCount(1) { } + Type _value; + size_t _sz; + size_t _pidx; + uint32_t _refCount; + }; + + typedef vespalib::Array Uniques; + private: + Uniques _uniques; + uint64_t _bufferSize; + public: + Builder() : _uniques(), _bufferSize(Index::align(1)) {} + Index insert(Type value, uint32_t pidx = 0) { + uint32_t entrySize = getEntrySize(value); + _uniques.push_back(UniqueEntry(value, entrySize, pidx)); + Index index(_bufferSize, 0); // bufferId 0 should be used when resetting with a builder + _bufferSize += entrySize; + return index; + } + void updateRefCount(uint32_t refCount) { _uniques.rbegin()->_refCount = refCount; } + const Uniques & getUniques() const { return _uniques; } + uint64_t getBufferSize() const { return _bufferSize; } + }; + + virtual void + writeValues(BufferWriter &writer, + const Index *idxs, size_t count) const override; + + virtual ssize_t + deserialize(const void *src, size_t available, size_t &initSpace); + + virtual ssize_t + deserialize(const void *src, size_t available, Index &idx); + + virtual bool + foldedChange(const Index &idx1, const Index &idx2); + + virtual bool + findEnum(Type value, EnumStoreBase::EnumHandle &e) const; + + void + addEnum(Type value, Index &newIdx); + + virtual bool + findIndex(Type value, Index &idx) const; + + virtual void + freeUnusedEnums(bool movePostingidx); + + virtual void + freeUnusedEnums(const IndexVector &toRemove); + + void + reset(Builder &builder); + + virtual bool + performCompaction(uint64_t bytesNeeded); + + void + printCurrentContent(vespalib::asciistream &os) const; + +private: + template + void + reset(Builder &builder, Dictionary &dict); + + template + void + addEnum(Type value, Index &newIdx, Dictionary &dict); + + template + void + performCompaction(Dictionary &dict); + + template + void + printCurrentContent(vespalib::asciistream &os, + const Dictionary &dict) const; +}; + +template +inline typename EntryType::Type +EnumStoreT::Entry::getValue() const // implementation for numeric +{ + Type dst; + const char * src = _data + EntryBase::size(); + memcpy(&dst, src, sizeof(Type)); + return dst; +} + +template <> +inline StringEntryType::Type +EnumStoreT::Entry::getValue() const +{ + return (_data + EntryBase::size()); +} + + +template <> +void +EnumStoreT::writeValues(BufferWriter &writer, + const Index *idxs, + size_t count) const; + +template <> +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + size_t &initSpace); + +template <> +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + Index &idx); + + +//----------------------------------------------------------------------------- +// EnumStore +//----------------------------------------------------------------------------- + +template <> +void +EnumStoreT:: +insertEntryValue(char * dst, Type value); + +template <> +void +EnumStoreT:: +printEntry(vespalib::asciistream & os, const Entry & e) const; + +template <> +void +EnumStoreT >:: +printEntry(vespalib::asciistream & os, const Entry & e) const; + +template <> +void +EnumStoreT >:: +printEntry(vespalib::asciistream & os, const Entry & e) const; + +template <> +void +EnumStoreT::printValue(vespalib::asciistream & os, Index idx) const; + +template <> +void +EnumStoreT::printValue(vespalib::asciistream & os, Type value) const; + +extern template +class btree::BTreeNodeDataWrap; + +extern template +class btree::BTreeNodeDataWrap; + +#if 0 +extern template +class btree::BTreeKeyData; + +extern template +class btree::BTreeKeyData; +#endif + +extern template +class btree::BTreeNodeT; + +#if 0 +extern template +class btree::BTreeNodeT; +#endif + +extern template +class btree::BTreeNodeTT; + +extern template +class btree::BTreeNodeTT; + +#if 0 +extern template +class btree::BTreeNodeTT; +#endif + +extern template +class btree::BTreeInternalNode; + +extern template +class btree::BTreeLeafNode; + +extern template +class btree::BTreeLeafNode; + +extern template +class btree::BTreeLeafNodeTemp; + +extern template +class btree::BTreeLeafNodeTemp; + +extern template +class btree::BTreeNodeStore; + +extern template +class btree::BTreeNodeStore; + +extern template +class btree::BTreeIteratorBase; +extern template +class btree::BTreeIteratorBase; +extern template +class btree::BTreeIterator; +extern template +class btree::BTreeIterator; +extern template +class btree::BTree; +extern template +class btree::BTree; +extern template +class btree::BTreeRoot; +extern template +class btree::BTreeRoot; +extern template +class btree::BTreeRootT; +extern template +class btree::BTreeRootT; +extern template +class btree::BTreeRootBase; +extern template +class btree::BTreeRootBase; + +extern template +class btree::BTreeNodeAllocator; +extern template +class btree::BTreeNodeAllocator; + +extern template +class btree::BTreeBuilder; +extern template +class btree::BTreeBuilder; + +extern template class EnumStoreT< StringEntryType >; +extern template class EnumStoreT >; +extern template class EnumStoreT >; +extern template class EnumStoreT >; +extern template class EnumStoreT >; +extern template class EnumStoreT >; +extern template class EnumStoreT >; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp new file mode 100644 index 00000000000..aa0de0e8845 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -0,0 +1,502 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +template +void EnumStoreT::freeUnusedEnum(Index idx, IndexSet & unused) +{ + Entry e = getEntry(idx); + if (e.getRefCount() == 0) { + Type value = e.getValue(); + if (unused.insert(idx).second) { + _store.incDead(idx.bufferId(), getEntrySize(value)); + } + } +} + +template +void +EnumStoreT:: +insertEntry(char * dst, uint32_t enumValue, uint32_t refCount, Type value) +{ + memcpy(dst, &enumValue, sizeof(uint32_t)); + uint32_t pos = sizeof(uint32_t); + memcpy(dst + pos, &refCount, sizeof(uint32_t)); + pos += sizeof(uint32_t); + insertEntryValue(dst + pos, value); +} + +template <> +void +EnumStoreT:: +insertEntryValue(char * dst, Type value); + +template +void +EnumStoreT::printEntry(vespalib::asciistream & os, const Entry & e) const +{ + os << "Entry: {"; + os << "enum: " << e.getEnum(); + os << ", refcount: " << e.getRefCount(); + os << ", value: " << e.getValue(); + os << "}"; +} + +template +bool +EnumStoreT::getValue(Index idx, Type & value) const +{ + if (!validIndex(idx)) { + return false; + } + value = getEntry(idx).getValue(); + return true; +} + +template +void +EnumStoreT::printBuffer(vespalib::asciistream & os, uint32_t bufferIdx) const +{ + uint64_t i = 0; + while (i < _store.getBufferState(bufferIdx).size()) { + Index idx(i, bufferIdx); + + Entry e = this->getEntry(idx); + this->printEntry(os, e); + os << ", " << idx << '\n'; + i += this->getEntrySize(e.getValue()); + } +} + +template +void +EnumStoreT::printValue(vespalib::asciistream & os, Index idx) const +{ + os << getValue(idx); +} + +template +void +EnumStoreT::printValue(vespalib::asciistream & os, Type value) const +{ + os << value; +} + + +template +void +EnumStoreT::writeValues(BufferWriter &writer, + const Index *idxs, size_t count) const +{ + size_t sz(EntryType::fixedSize()); + for (uint32_t i = 0; i < count; ++i) { + Index idx = idxs[i]; + const char *src(_store.getBufferEntry(idx.bufferId(), + idx.offset()) + + EntryBase::size()); + writer.write(src, sz); + } +} + + +template +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + size_t &initSpace) +{ + (void) src; + size_t sz(EntryType::fixedSize()); + if (available < sz) + return -1; + uint32_t entrySize(alignEntrySize(EntryBase::size() + sz)); + initSpace += entrySize; + return sz; +} + +template +ssize_t +EnumStoreT::deserialize(const void *src, + size_t available, + Index &idx) +{ + size_t sz(EntryType::fixedSize()); + if (available < sz) + return -1; + uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID); + btree::BufferState & buffer = _store.getBufferState(activeBufferId); + uint32_t entrySize(alignEntrySize(EntryBase::size() + sz)); + if (buffer.remaining() < entrySize) { + abort(); // not enough space + } + uint64_t offset = buffer.size(); + char *dst(_store.getBufferEntry(activeBufferId, offset)); + memcpy(dst, &_nextEnum, sizeof(uint32_t)); + uint32_t pos = sizeof(uint32_t); + uint32_t refCount(0); + memcpy(dst + pos, &refCount, sizeof(uint32_t)); + pos += sizeof(uint32_t); + memcpy(dst + pos, src, sz); + buffer.pushed_back(entrySize); + ++_nextEnum; + + if (idx.valid()) { + assert(ComparatorType::compare(getValue(idx), + Entry(dst).getValue()) < 0); + } + idx = Index(offset, activeBufferId); + return sz; +} + + +template +bool +EnumStoreT::foldedChange(const Index &idx1, const Index &idx2) +{ + int cmpres = FoldedComparatorType::compareFolded(getValue(idx1), + getValue(idx2)); + assert(cmpres <= 0); + return cmpres < 0; +} + + +template +bool +EnumStoreT::findEnum(Type value, + EnumStoreBase::EnumHandle &e) const +{ + ComparatorType cmp(*this, value); + Index idx; + if (_enumDict->findFrozenIndex(cmp, idx)) { + e = idx.ref(); + return true; + } + return false; +} + +template +bool +EnumStoreT::findIndex(Type value, Index &idx) const +{ + ComparatorType cmp(*this, value); + return _enumDict->findIndex(cmp, idx); +} + + +template +void +EnumStoreT::freeUnusedEnums(bool movePostingIdx) +{ + ComparatorType cmp(*this); + if (EntryType::hasFold() && movePostingIdx) { + FoldedComparatorType fcmp(*this); + _enumDict->freeUnusedEnums(cmp, &fcmp); + } else { + _enumDict->freeUnusedEnums(cmp, NULL); + } +} + + +template +void +EnumStoreT::freeUnusedEnums(const IndexVector &toRemove) +{ + ComparatorType cmp(*this); + if (EntryType::hasFold()) { + FoldedComparatorType fcmp(*this); + _enumDict->freeUnusedEnums(toRemove, cmp, &fcmp); + } else { + _enumDict->freeUnusedEnums(toRemove, cmp, NULL); + } +} + + +template +template +void +EnumStoreT::addEnum(Type value, + Index &newIdx, + Dictionary &dict) +{ + typedef typename Dictionary::Iterator DictionaryIterator; + uint32_t entrySize = this->getEntrySize(value); + uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID); + btree::BufferState & buffer = _store.getBufferState(activeBufferId); +#ifdef LOG_ENUM_STORE + LOG(info, + "addEnum(): buffer[%u]: capacity = %" PRIu64 + ", size = %" PRIu64 ", remaining = %" PRIu64 + ", dead = %" PRIu64 ", entrySize = %u", + activeBufferId, buffer.capacity(), + buffer.size(), buffer.remaining(), + buffer._deadElems, entrySize); +#endif + if (buffer.remaining() < entrySize) { + abort(); // not enough space + } + + // check if already present + ComparatorType cmp(*this, value); + DictionaryIterator it(btree::BTreeNode::Ref(), dict.getAllocator()); + it.lower_bound(dict.getRoot(), Index(), cmp); + if (it.valid() && !cmp(Index(), it.getKey())) { + newIdx = it.getKey(); + return; + } + + uint64_t offset = buffer.size(); + char * dst = _store.template getBufferEntry(activeBufferId, offset); + this->insertEntry(dst, this->_nextEnum++, 0, value); + buffer.pushed_back(entrySize); + assert(Index::pad(offset) == 0); + newIdx = Index(offset, activeBufferId); + + // update tree with new index + dict.insert(it, newIdx, typename Dictionary::DataType()); + + // Copy posting list idx from next entry if same + // folded value. + // Only for string posting list attributes, i.e. dictionary has + // data and entry type has folded compare. + if (DictionaryIterator::hasData() && EntryType::hasFold()) { + FoldedComparatorType foldCmp(*this); + ++it; + if (!it.valid() || foldCmp(newIdx, it.getKey())) + return; // Next entry does not use same posting list + --it; + --it; + if (it.valid() && !foldCmp(it.getKey(), newIdx)) + return; // Previous entry uses same posting list + if (it.valid()) + ++it; + else + it.begin(); + assert(it.valid() && it.getKey() == newIdx); + ++it; + typename Dictionary::DataType pidx(it.getData()); + dict.thaw(it); + it.writeData(typename Dictionary::DataType()); + --it; + assert(it.valid() && it.getKey() == newIdx); + it.writeData(pidx); + } +} + + +template +void +EnumStoreT::addEnum(Type value, Index & newIdx) +{ + if (_enumDict->hasData()) + addEnum(value, newIdx, + static_cast *>(_enumDict)-> + getDictionary()); + else + addEnum(value, newIdx, + static_cast *>(_enumDict)-> + getDictionary()); +} + + +template +struct TreeBuilderInserter { + static void insert(typename DictionaryType::Builder & builder, + EnumStoreBase::Index enumIdx, + btree::EntryRef postingIdx) + { + (void) postingIdx; + builder.insert(enumIdx, typename DictionaryType::DataType()); + } +}; + +template <> +struct TreeBuilderInserter { + static void insert(EnumPostingTree::Builder & builder, + EnumStoreBase::Index enumIdx, + btree::EntryRef postingIdx) + { + builder.insert(enumIdx, postingIdx); + } +}; + + +template +template +void +EnumStoreT::reset(Builder &builder, Dictionary &dict) +{ + typedef typename Dictionary::Builder DictionaryBuilder; + EnumStoreBase::reset(builder.getBufferSize()); + + DictionaryBuilder treeBuilder(dict.getAllocator()); + uint32_t activeBufferId = _store.getActiveBufferId(TYPE_ID); + btree::BufferState & state = _store.getBufferState(activeBufferId); + + // insert entries and update DictionaryBuilder + const typename Builder::Uniques & uniques = builder.getUniques(); + for (typename Builder::Uniques::const_iterator iter = uniques.begin(); + iter != uniques.end(); ++iter) + { + uint64_t offset = state.size(); + Index idx(offset, activeBufferId); + char * dst = _store.template getBufferEntry(activeBufferId, offset); + this->insertEntry(dst, this->_nextEnum++, iter->_refCount, iter->_value); + state.pushed_back(iter->_sz); + + // update DictionaryBuilder with enum index and posting index + TreeBuilderInserter::insert(treeBuilder, idx, btree::EntryRef(iter->_pidx)); + } + + // reset Dictionary + dict.assign(treeBuilder); // destructive copy of treeBuilder +} + + +template +void +EnumStoreT::reset(Builder &builder) +{ + if (_enumDict->hasData()) + reset(builder, + static_cast *>(_enumDict)-> + getDictionary()); + else + reset(builder, + static_cast *>(_enumDict)-> + getDictionary()); +} + + +template +template +void +EnumStoreT::performCompaction(Dictionary &dict) +{ + typedef typename Dictionary::Iterator DictionaryIterator; + uint32_t freeBufferIdx = _store.getActiveBufferId(TYPE_ID); + btree::BufferState & freeBuf = _store.getBufferState(freeBufferIdx); + bool disabledReEnumerate = _disabledReEnumerate; + + uint32_t newEnum = 0; + // copy entries from active buffer to free buffer + for (DictionaryIterator iter = dict.begin(); iter.valid(); ++iter) { + Index activeIdx = iter.getKey(); + + Entry e = this->getEntry(activeIdx); + + // At this point the tree shal never reference any empy stuff. + assert(e.getRefCount() > 0); +#ifdef LOG_ENUM_STORE + LOG(info, "performCompaction(): copy entry: enum = %u, refCount = %u, value = %s", + e.getEnum(), e.getRefCount(), e.getValue()); +#endif + Type value = e.getValue(); + uint32_t refCount = e.getRefCount(); + uint32_t oldEnum = e.getEnum(); + uint32_t entrySize = this->getEntrySize(value); + if (disabledReEnumerate) { + newEnum = oldEnum; // use old enum value + } + + uint64_t offset = freeBuf.size(); + char * dst = _store.template getBufferEntry(freeBufferIdx, offset); + // insert entry into free buffer + this->insertEntry(dst, newEnum, refCount, value); +#ifdef LOG_ENUM_STORE + LOG(info, "performCompaction(): new entry: enum = %u, refCount = %u, value = %s", newEnum, 0, value); +#endif + if (!disabledReEnumerate) { + ++newEnum; + } + freeBuf.pushed_back(entrySize); + assert(Index::pad(offset) == 0); + Index newIdx = Index(offset, freeBufferIdx); +#ifdef LOG_ENUM_STORE + LOG(info, + "performCompaction(): new index: offset = %" PRIu64 + ", bufferIdx = %u", + offset, freeBufferIdx); +#endif + + // update tree with new index + std::atomic_thread_fence(std::memory_order_release); + iter.writeKey(newIdx); + + // update index map with new index + this->_indexMap[oldEnum] = newIdx; + } + if (disabledReEnumerate) { + newEnum = this->_nextEnum; // use old range of enum values + } + this->postCompact(newEnum); +} + + +template +bool +EnumStoreT::performCompaction(uint64_t bytesNeeded) +{ + if ( ! this->preCompact(bytesNeeded) ) { + return false; + } + if (_enumDict->hasData()) + performCompaction(static_cast *> + (_enumDict)->getDictionary()); + else + performCompaction(static_cast *> + (_enumDict)->getDictionary()); + return true; +} + + +template +template +void +EnumStoreT::printCurrentContent(vespalib::asciistream &os, + const Dictionary &dict) const +{ + typedef typename Dictionary::ConstIterator DictionaryConstIterator; + + for (DictionaryConstIterator iter = dict.begin(); iter.valid(); ++iter) { + Index idx = iter.getKey(); + if (!this->validIndex(idx)) { + os << "Bad entry: " << idx << '\n'; + } else { + Entry e = this->getEntry(idx); + this->printEntry(os, e); + os << ", " << idx << '\n'; + } + } +} + + +template +void +EnumStoreT::printCurrentContent(vespalib::asciistream &os) const +{ + if (_enumDict->hasData()) + printCurrentContent(os, + static_cast *> + (_enumDict)->getDictionary()); + else + printCurrentContent(os, + static_cast *> + (_enumDict)->getDictionary()); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp new file mode 100644 index 00000000000..3d5744b115b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -0,0 +1,657 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "enumstorebase.h" +#include +#include +#include +#include +#include "enumstore.h" +#include +#include +#include + +namespace search +{ + +using btree::BTreeNode; + +void +EnumStoreBase::verifyBufferSize(uint64_t initBufferSize) +{ + uint64_t alignedInitBufferSize = alignBufferSize(initBufferSize); + if (alignedInitBufferSize > Index::offsetSize()) { + failNewSize(alignedInitBufferSize, Index::offsetSize()); + } +} + +EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, + bool hasPostings) + : _enumDict(NULL), + _store(), + _type(alignBufferSize(initBufferSize)), + _nextEnum(0), + _indexMap(), + _toHoldBuffers(), + _disabledReEnumerate(false) +{ + if (hasPostings) + _enumDict = new EnumStoreDict(*this); + else + _enumDict = new EnumStoreDict(*this); + verifyBufferSize(initBufferSize); + _store.addType(&_type); + _store.initActiveBuffers(); +} + +EnumStoreBase::~EnumStoreBase() +{ + _store.clearHoldLists(); + _store.dropBuffers(); + delete _enumDict; +} + +void +EnumStoreBase::reset(uint64_t initBufferSize) +{ + verifyBufferSize(initBufferSize); + _store.clearHoldLists(); + _store.dropBuffers(); + _type.setInitBufferSize(alignBufferSize(initBufferSize)); + _store.initActiveBuffers(); + clearIndexMap(); + _enumDict->onReset(); + _nextEnum = 0; +} + +uint32_t +EnumStoreBase::getBufferIndex(btree::BufferState::State status) +{ + for (uint32_t i = 0; i < _store.getNumBuffers(); ++i) { + if (_store.getBufferState(i)._state == status) { + return i; + } + } + return Index::numBuffers(); +} + +bool +EnumStoreBase::getCurrentIndex(Index oldIdx, Index & newIdx) const +{ + uint32_t oldEnum = getEnum(oldIdx); + if (oldEnum >= _indexMap.size()) { + return false; + } + newIdx = _indexMap[oldEnum]; + return true; +} + +MemoryUsage +EnumStoreBase::getMemoryUsage() const +{ + return _store.getMemoryUsage(); +} + +AddressSpace +EnumStoreBase::getAddressSpaceUsage() const +{ + const btree::BufferState &activeState = + _store.getBufferState(_store.getActiveBufferId(TYPE_ID)); + return AddressSpace(activeState.size() - activeState.getDeadElems(), + DataStoreType::RefType::offsetSize()); +} + +void +EnumStoreBase::getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const +{ + for(size_t i(0); i < sz; i++) { + e[i] = getEnum(Index(v[i])); + } +} + +void +EnumStoreBase::transferHoldLists(generation_t generation) +{ + _enumDict->onTransferHoldLists(generation); + _store.transferHoldLists(generation); +} + +void +EnumStoreBase::trimHoldLists(generation_t firstUsed) +{ + // remove generations in the range [0, firstUsed> + _enumDict->onTrimHoldLists(firstUsed); + _store.trimHoldLists(firstUsed); +} + +bool +EnumStoreBase::preCompact(uint64_t bytesNeeded) +{ + if (getBufferIndex(btree::BufferState::FREE) == Index::numBuffers()) { + return false; + } + btree::BufferState & activeBuf = _store.getBufferState(_store.getActiveBufferId(TYPE_ID)); + + // allocate enough space in free buffer + uint64_t newSize = computeNewSize(activeBuf.size(), activeBuf._deadElems, bytesNeeded); + _type.setInitBufferSize(newSize); + _toHoldBuffers = _store.startCompact(TYPE_ID); + + _indexMap.resize(_nextEnum); + return true; +} + + +void +EnumStoreBase::fallbackResize(uint64_t bytesNeeded) +{ + uint32_t activeBufId = _store.getActiveBufferId(TYPE_ID); + btree::BufferState &activeBuf = _store.getBufferState(activeBufId); + + // allocate enough space in free buffer + uint64_t newSize = computeNewSize(activeBuf.size(), + activeBuf._deadElems, + bytesNeeded); + + uint64_t maxSize = Index::offsetSize(); + + uint64_t fallbackNewSize = newSize + activeBuf._deadElems + 16384; + fallbackNewSize = alignBufferSize(fallbackNewSize); + if (fallbackNewSize > maxSize) + fallbackNewSize = maxSize; + if (fallbackNewSize <= activeBuf._allocElems || + fallbackNewSize < activeBuf._usedElems + bytesNeeded) + failNewSize(activeBuf._usedElems + bytesNeeded, maxSize); + + _type.setInitBufferSize(alignBufferSize(fallbackNewSize)); + _type.setWantCompact(); + _store.fallbackResize(activeBufId, fallbackNewSize); +} + + +void +EnumStoreBase::disableReEnumerate() const +{ + assert(!_disabledReEnumerate); + _disabledReEnumerate = true; +} + + +void +EnumStoreBase::enableReEnumerate() const +{ + assert(_disabledReEnumerate); + _disabledReEnumerate = false; +} + + +void +EnumStoreBase::postCompact(uint32_t newEnum) +{ + _store.finishCompact(_toHoldBuffers); + _nextEnum = newEnum; +} + +void +EnumStoreBase::failNewSize(uint64_t minNewSize, uint64_t maxSize) +{ + throw vespalib::IllegalStateException(vespalib::make_string("EnumStoreBase::failNewSize: Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", minNewSize, maxSize)); +} + +uint64_t +EnumStoreBase::computeNewSize(uint64_t used, uint64_t dead, uint64_t needed) +{ + double growRatio = 1.5f; + uint64_t maxSize = Index::offsetSize(); + uint64_t newSize = static_cast + ((used - dead + needed) * growRatio); + newSize = alignBufferSize(newSize); + if (newSize <= maxSize) + return newSize; + newSize = used - dead + needed + 1000000; + newSize = alignBufferSize(newSize); + if (newSize <= maxSize) + return maxSize; + failNewSize(newSize, maxSize); + return 0; +} + + +template +void +EnumStoreBase::reEnumerate(const Tree &tree) +{ + typedef typename Tree::Iterator Iterator; + Iterator it(tree.begin()); + uint32_t enumValue = 0; + while (it.valid()) { + EntryBase eb(getEntryBase(it.getKey())); + eb.setEnum(enumValue); + ++enumValue; + ++it; + } + _nextEnum = enumValue; + std::atomic_thread_fence(std::memory_order_release); +} + + +ssize_t +EnumStoreBase::deserialize0(const void *src, + size_t available, + IndexVector &idx) +{ + size_t left = available; + size_t initSpace = Index::align(1); + const char * p = static_cast(src); + while (left > 0) { + ssize_t sz = deserialize(p, left, initSpace); + if (sz < 0) + return sz; + p += sz; + left -= sz; + } + reset(initSpace); + left = available; + p = static_cast(src); + Index idx1; + while (left > 0) { + ssize_t sz = deserialize(p, left, idx1); + if (sz < 0) + return sz; + p += sz; + left -= sz; + idx.push_back(idx1); + } + return available - left; +} + + +template +ssize_t +EnumStoreBase::deserialize(const void *src, + size_t available, + IndexVector &idx, + Tree &tree) +{ + ssize_t sz(deserialize0(src, available, idx)); + if (sz >= 0) { + typename Tree::Builder builder(tree.getAllocator()); + typedef IndexVector::const_iterator IT; + for (IT i(idx.begin()), ie(idx.end()); i != ie; ++i) { + builder.insert(*i, typename Tree::DataType()); + } + tree.assign(builder); + } + return sz; +} + + +template +void +EnumStoreBase::fixupRefCounts(const EnumVector &hist, Tree &tree) +{ + if ( hist.empty() ) + return; + typename Tree::Iterator ti(tree.begin()); + typedef EnumVector::const_iterator HistIT; + + for (HistIT hi(hist.begin()), hie(hist.end()); hi != hie; ++hi, ++ti) { + assert(ti.valid()); + fixupRefCount(ti.getKey(), *hi); + } + assert(!ti.valid()); + freeUnusedEnums(false); +} + + +void +EnumStoreBase::writeEnumValues(BufferWriter &writer, + const Index *idxs, size_t count) const +{ + for (uint32_t i = 0; i < count; ++i) { + uint32_t enumValue = getEnum(idxs[i]); + writer.write(&enumValue, sizeof(uint32_t)); + } +} + + +vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx) { + return os << "offset(" << idx.offset() << "), bufferId(" << idx.bufferId() << "), idx(" << idx.ref() << ")"; +} + + +EnumStoreDictBase::EnumStoreDictBase(EnumStoreBase &enumStore) + : _enumStore(enumStore) +{ +} + + +EnumStoreDictBase::~EnumStoreDictBase(void) +{ +} + + +template +EnumStoreDict::EnumStoreDict(EnumStoreBase &enumStore) + : EnumStoreDictBase(enumStore), + _dict() +{ +} + +template +EnumStoreDict::~EnumStoreDict(void) +{ +} + + +template +void +EnumStoreDict::freezeTree(void) +{ + _dict.getAllocator().freeze(); +} + +template +uint32_t +EnumStoreDict::getNumUniques() const +{ + return _dict.size(); +} + + +template +MemoryUsage +EnumStoreDict::getTreeMemoryUsage() const +{ + return _dict.getMemoryUsage(); +} + +template +void +EnumStoreDict::reEnumerate(void) +{ + _enumStore.reEnumerate(_dict); +} + + +template +void +EnumStoreDict:: +writeAllValues(BufferWriter &writer, + btree::BTreeNode::Ref rootRef) const +{ + constexpr size_t BATCHSIZE = 1000; + std::vector idxs; + idxs.reserve(BATCHSIZE); + typename Dictionary::Iterator it(rootRef, _dict.getAllocator()); + while (it.valid()) { + if (idxs.size() >= idxs.capacity()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + idxs.clear(); + } + idxs.push_back(it.getKey()); + ++it; + } + if (!idxs.empty()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + } +} + + +template +ssize_t +EnumStoreDict::deserialize(const void *src, + size_t available, + IndexVector &idx) +{ + return _enumStore.deserialize(src, available, idx, _dict); +} + + +template +void +EnumStoreDict::fixupRefCounts(const EnumVector & hist) +{ + _enumStore.fixupRefCounts(hist, _dict); +} + + +template +void +EnumStoreDict::removeUnusedEnums(const IndexSet &unused, + const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp) +{ + typedef typename Dictionary::Iterator Iterator; + if (unused.empty()) + return; + Iterator it(BTreeNode::Ref(), _dict.getAllocator()); + for (IndexSet::const_iterator iter(unused.begin()), mt(unused.end()); + iter != mt; ++iter) { + it.lower_bound(_dict.getRoot(), *iter, cmp); + assert(it.valid() && !cmp(*iter, it.getKey())); + if (Iterator::hasData() && fcmp != NULL) { + typename Dictionary::DataType pidx(it.getData()); + _dict.remove(it); + if (!it.valid() || (*fcmp)(*iter, it.getKey())) + continue; // Next entry does not use same posting list + --it; + if (it.valid() && !(*fcmp)(it.getKey(), *iter)) + continue; // Previous entry uses same posting list + if (it.valid()) + ++it; + else + it.begin(); + _dict.thaw(it); + it.writeData(pidx); + } else { + _dict.remove(it); + } + } +} + +template +void +EnumStoreDict::freeUnusedEnums(const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp) +{ + IndexSet unused; + + // find unused enums + for (typename Dictionary::Iterator iter(_dict.begin()); iter.valid(); + ++iter) { + _enumStore.freeUnusedEnum(iter.getKey(), unused); + } + removeUnusedEnums(unused, cmp, fcmp); +} + +template +void +EnumStoreDict::freeUnusedEnums(const IndexVector &toRemove, + const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp) +{ + IndexSet unused; + for(IndexVector::const_iterator it(toRemove.begin()), mt(toRemove.end()); + it != mt; it++) { + _enumStore.freeUnusedEnum(*it, unused); + } + + removeUnusedEnums(unused, cmp, fcmp); +} + + +template +bool +EnumStoreDict::findIndex(const EnumStoreComparator &cmp, + Index &idx) const +{ + typename Dictionary::Iterator itr = _dict.find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + + +template +bool +EnumStoreDict::findFrozenIndex(const EnumStoreComparator &cmp, + Index &idx) const +{ + typename Dictionary::ConstIterator itr = + _dict.getFrozenView().find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + + +template +void +EnumStoreDict::onReset(void) +{ + _dict.clear(); +} + + +template +void +EnumStoreDict::onTransferHoldLists(generation_t generation) +{ + _dict.getAllocator().transferHoldLists(generation); +} + + +template +void +EnumStoreDict::onTrimHoldLists(generation_t firstUsed) +{ + _dict.getAllocator().trimHoldLists(firstUsed); +} + + +template +BTreeNode::Ref +EnumStoreDict::getFrozenRootRef(void) const +{ + return _dict.getFrozenView().getRoot(); +} + + +template +uint32_t +EnumStoreDict:: +lookupFrozenTerm(BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &comp) const +{ + typename Dictionary::ConstIterator itr(BTreeNode::Ref(), + _dict.getAllocator()); + itr.lower_bound(frozenRootRef, Index(), comp); + if (itr.valid() && !comp(Index(), itr.getKey())) { + return 1u; + } + return 0u; +} + + +template +uint32_t +EnumStoreDict:: +lookupFrozenRange(BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &low, + const EnumStoreComparator &high) const +{ + typename Dictionary::ConstIterator lowerDictItr(BTreeNode::Ref(), + _dict.getAllocator()); + lowerDictItr.lower_bound(frozenRootRef, Index(), low); + typename Dictionary::ConstIterator upperDictItr = lowerDictItr; + if (upperDictItr.valid() && !high(Index(), upperDictItr.getKey())) + upperDictItr.seekPast(Index(), high); + return upperDictItr - lowerDictItr; +} + + +template <> +EnumPostingTree & +EnumStoreDict::getPostingDictionary(void) +{ + abort(); +} + + +template <> +EnumPostingTree & +EnumStoreDict::getPostingDictionary(void) +{ + return _dict; +} + + +template <> +const EnumPostingTree & +EnumStoreDict::getPostingDictionary(void) const +{ + abort(); +} + + +template <> +const EnumPostingTree & +EnumStoreDict::getPostingDictionary(void) const +{ + return _dict; +} + + +template +bool +EnumStoreDict::hasData(void) const +{ + return Dictionary::LeafNodeType::hasData(); +} + + +template class btree::DataStoreT >; + +template +void +EnumStoreBase::reEnumerate(const EnumTree &tree); + +template +void +EnumStoreBase::reEnumerate(const EnumPostingTree &tree); + +template +ssize_t +EnumStoreBase::deserialize(const void *src, + size_t available, + IndexVector &idx, + EnumTree &tree); + +template +ssize_t +EnumStoreBase::deserialize(const void *src, + size_t available, + IndexVector &idx, + EnumPostingTree &tree); + +template +void +EnumStoreBase::fixupRefCounts(const EnumVector &hist, + EnumTree &tree); + +template +void +EnumStoreBase::fixupRefCounts( + const EnumVector &hist, + EnumPostingTree &tree); + +template class EnumStoreDict; + +template class EnumStoreDict; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h new file mode 100644 index 00000000000..3b8b9823d87 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -0,0 +1,622 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "address_space.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + +class BufferWriter; + +namespace attribute +{ + +class Status; + +} + +class EnumStoreBase; + +class EnumStoreComparator; +class EnumStoreComparatorWrapper; + +typedef btree::DataStoreT > +EnumStoreDataStoreType; +typedef EnumStoreDataStoreType::RefType EnumStoreIndex; +typedef vespalib::Array EnumStoreIndexVector; +typedef vespalib::Array EnumStoreEnumVector; + +typedef btree::BTreeTraits<32, 32, 7, true> EnumTreeTraits; + +typedef btree::BTree EnumTree; +typedef btree::BTree EnumPostingTree; + +struct CompareEnumIndex +{ + typedef EnumStoreIndex Index; + + inline bool + operator()(const Index &lhs, const Index &rhs) const + { + return lhs.ref() < rhs.ref(); + } +}; + +class EnumStoreDictBase +{ +public: + typedef EnumStoreIndex Index; + typedef EnumStoreIndexVector IndexVector; + typedef EnumStoreEnumVector EnumVector; + typedef std::set IndexSet; + typedef vespalib::GenerationHandler::generation_t generation_t; + +protected: + EnumStoreBase &_enumStore; + +public: + EnumStoreDictBase(EnumStoreBase &enumStore); + + virtual + ~EnumStoreDictBase(void); + + virtual void + freezeTree(void) = 0; + + virtual uint32_t + getNumUniques(void) const = 0; + + virtual MemoryUsage + getTreeMemoryUsage(void) const = 0; + + virtual void + reEnumerate(void) = 0; + + virtual void + writeAllValues(BufferWriter &writer, + btree::BTreeNode::Ref rootRef) const = 0; + + virtual ssize_t + deserialize(const void *src, size_t available, + IndexVector &idx) = 0; + + virtual void + fixupRefCounts(const EnumVector &hist) = 0; + + virtual void + freeUnusedEnums(const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp) = 0; + + virtual void + freeUnusedEnums(const IndexVector &toRemove, + const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp) = 0; + + virtual bool + findIndex(const EnumStoreComparator &cmp, Index &idx) const = 0; + + virtual bool + findFrozenIndex(const EnumStoreComparator &cmp, Index &idx) const = 0; + + virtual void + onReset(void) = 0; + + virtual void + onTransferHoldLists(generation_t generation) = 0; + + virtual void + onTrimHoldLists(generation_t firstUsed) = 0; + + virtual btree::BTreeNode::Ref + getFrozenRootRef(void) const = 0; + + virtual uint32_t + lookupFrozenTerm(btree::BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &comp) const = 0; + + virtual uint32_t + lookupFrozenRange(btree::BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &low, + const EnumStoreComparator &high) const = 0; + + virtual EnumPostingTree & + getPostingDictionary(void) = 0; + + virtual const EnumPostingTree & + getPostingDictionary(void) const = 0; + + virtual bool + hasData(void) const = 0; +}; + + +template +class EnumStoreDict : public EnumStoreDictBase +{ +protected: + Dictionary _dict; + +public: + EnumStoreDict(EnumStoreBase &enumStore); + + virtual + ~EnumStoreDict(void); + + const Dictionary & + getDictionary() const + { + return _dict; + } + + Dictionary & + getDictionary() + { + return _dict; + } + + virtual void + freezeTree(void); + + virtual uint32_t + getNumUniques(void) const; + + virtual MemoryUsage + getTreeMemoryUsage(void) const; + + virtual void + reEnumerate(void); + + virtual void + writeAllValues(BufferWriter &writer, + btree::BTreeNode::Ref rootRef) const override; + + virtual ssize_t + deserialize(const void *src, size_t available, + IndexVector &idx); + + virtual void + fixupRefCounts(const EnumVector &hist); + + void + removeUnusedEnums(const IndexSet &unused, + const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp); + + virtual void + freeUnusedEnums(const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp); + + virtual void + freeUnusedEnums(const IndexVector &toRemove, + const EnumStoreComparator &cmp, + const EnumStoreComparator *fcmp); + + virtual bool + findIndex(const EnumStoreComparator &cmp, Index &idx) const; + + virtual bool + findFrozenIndex(const EnumStoreComparator &cmp, Index &idx) const; + + virtual void + onReset(void); + + virtual void + onTransferHoldLists(generation_t generation); + + virtual void + onTrimHoldLists(generation_t firstUsed); + + virtual btree::BTreeNode::Ref + getFrozenRootRef(void) const; + + virtual uint32_t + lookupFrozenTerm(btree::BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &comp) const; + + virtual uint32_t + lookupFrozenRange(btree::BTreeNode::Ref frozenRootRef, + const EnumStoreComparator &low, + const EnumStoreComparator &high) const; + + virtual EnumPostingTree & + getPostingDictionary(void); + + virtual const EnumPostingTree & + getPostingDictionary(void) const; + + virtual bool + hasData(void) const; +}; + + +class EnumStoreBase +{ +public: + typedef vespalib::GenerationHandler::generation_t generation_t; + typedef attribute::IAttributeVector::EnumHandle EnumHandle; + typedef EnumStoreDataStoreType DataStoreType; + typedef EnumStoreIndex Index; + typedef EnumStoreIndexVector IndexVector; + typedef EnumStoreEnumVector EnumVector; + + class EntryBase { + protected: + char * _data; + public: + EntryBase(void * data) + : + _data(static_cast(data)) + { + } + + uint32_t + getEnum() const + { + uint32_t *src = reinterpret_cast(_data); + return *src; + } + + uint32_t + getRefCount(void) const + { + uint32_t *src = reinterpret_cast(_data) + 1; + return *src; + } + + void + incRefCount(void) + { + uint32_t *dst = reinterpret_cast(_data) + 1; + ++(*dst); + } + + void + decRefCount(void) + { + uint32_t *dst = reinterpret_cast(_data) + 1; + --(*dst); + } + + void + setEnum(uint32_t enumValue) + { + uint32_t *dst = reinterpret_cast(_data); + *dst = enumValue; + } + + void + setRefCount(uint32_t refCount) + { + uint32_t *dst = reinterpret_cast(_data) + 1; + *dst = refCount; + } + + static uint32_t size() { return 2*sizeof(uint32_t); } + }; + + typedef std::set IndexSet; + +private: + void verifyBufferSize(uint64_t initBufferSize); + +protected: + + class EnumBufferType : public btree::BufferType { + private: + uint64_t _initBufferSize; // in bytes + bool _pendingCompact; + bool _wantCompact; + public: + EnumBufferType(uint64_t initBufferSize) + : btree::BufferType(Index::align(1), + Index::offsetSize() / Index::align(1), + Index::offsetSize() / Index::align(1)), + _initBufferSize(initBufferSize), + _pendingCompact(false), + _wantCompact(false) + { + } + virtual size_t calcClustersToAlloc(size_t sizeNeeded, + uint64_t clusterRefSize) const { + (void) sizeNeeded; + uint64_t clusterSize = elementSize() * getClusterSize(); + uint64_t wantedClustersToAlloc = _initBufferSize / clusterSize; + assert(_initBufferSize % clusterSize == 0); + ++wantedClustersToAlloc; // Index(0,0) is illegal + return std::min(wantedClustersToAlloc, clusterRefSize); + } + void setInitBufferSize(uint64_t newSize) { _initBufferSize = newSize; } + + virtual void + onFree(size_t usedElems) + { + btree::BufferType::onFree(usedElems); + _pendingCompact = _wantCompact; + _wantCompact = false; + } + + void + setWantCompact(void) + { + _wantCompact = true; + } + + bool + getPendingCompact(void) const + { + return _pendingCompact; + } + + void + clearPendingCompact(void) + { + _pendingCompact = false; + } + }; + + EnumStoreDictBase *_enumDict; + DataStoreType _store; + EnumBufferType _type; + uint32_t _nextEnum; + IndexVector _indexMap; + std::vector _toHoldBuffers; // used during compaction + // set before backgound flush, cleared during background flush + mutable std::atomic _disabledReEnumerate; + + static const uint32_t TYPE_ID = 0; + + EnumStoreBase(uint64_t initBufferSize, + bool hasPostings); + + virtual ~EnumStoreBase(); + + EntryBase getEntryBase(Index idx) const { + return EntryBase(const_cast(_store).getBufferEntry(idx.bufferId(), idx.offset())); + } + btree::BufferState & getBuffer(uint32_t bufferIdx) { + return _store.getBufferState(bufferIdx); + } + const btree::BufferState & getBuffer(uint32_t bufferIdx) const { + return _store.getBufferState(bufferIdx); + } + bool validIndex(Index idx) const { + return (idx.valid() && idx.offset() < _store.getBufferState(idx.bufferId()).size()); + } + + uint32_t getBufferIndex(btree::BufferState::State status); + void postCompact(uint32_t newEnum); + bool preCompact(uint64_t bytesNeeded); + +public: + void + reset(uint64_t initBufferSize); + + virtual uint32_t getFixedSize() const = 0; + size_t getMaxEnumOffset() const { + return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).size(); + } + void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const; + uint32_t getRefCount(Index idx) const { return getEntryBase(idx).getRefCount(); } + uint32_t getEnum(Index idx) const { return getEntryBase(idx).getEnum(); } + void incRefCount(Index idx) { getEntryBase(idx).incRefCount(); } + void decRefCount(Index idx) { getEntryBase(idx).decRefCount(); } + + // Only use when reading from enumerated attribute save files + void + fixupRefCount(Index idx, uint32_t refCount) + { + getEntryBase(idx).setRefCount(refCount); + } + + template + void + fixupRefCounts(const EnumVector &hist, + Tree &tree); + + void clearIndexMap() { IndexVector().swap(_indexMap); } + uint32_t getLastEnum() const { return _nextEnum ? _nextEnum - 1 : _nextEnum; } + + inline uint32_t + getNumUniques() const + { + return _enumDict->getNumUniques(); + } + + uint32_t getRemaining() const { + return _store.getBufferState(_store.getActiveBufferId(TYPE_ID)).remaining(); + } + MemoryUsage getMemoryUsage() const; + + inline MemoryUsage + getTreeMemoryUsage() const + { + return _enumDict->getTreeMemoryUsage(); + } + + AddressSpace getAddressSpaceUsage() const; + + bool getCurrentIndex(Index oldIdx, Index & newIdx) const; + + void + transferHoldLists(generation_t generation); + + void trimHoldLists(generation_t firstUsed); + + static void failNewSize(uint64_t minNewSize, uint64_t maxSize); + static uint64_t computeNewSize(uint64_t used, uint64_t dead, uint64_t needed); + + // Align buffers and entries to 4 bytes boundary. + static uint64_t alignBufferSize(uint64_t val) { + return Index::align(val); + } + static uint32_t alignEntrySize(uint32_t val) { + return Index::align(val); + } + + void + fallbackResize(uint64_t bytesNeeded); + + bool + getPendingCompact(void) const + { + return _type.getPendingCompact(); + } + + void + clearPendingCompact(void) + { + _type.clearPendingCompact(); + } + + template + void + reEnumerate(const Tree &tree); + + inline void + reEnumerate(void) + { + _enumDict->reEnumerate(); + } + + // Disable reenumeration during compaction. + void disableReEnumerate() const; + + // Allow reenumeration during compaction. + void enableReEnumerate() const; + + virtual void writeValues(BufferWriter &writer, + const Index *idxs, size_t count) const = 0; + + void writeEnumValues(BufferWriter &writer, + const Index *idxs, size_t count) const; + + virtual ssize_t + deserialize(const void *src, size_t available, size_t &initSpace) = 0; + + virtual ssize_t + deserialize(const void *src, size_t available, Index &idx) = 0; + + virtual bool + foldedChange(const Index &idx1, const Index &idx2) = 0; + + ssize_t + deserialize0(const void *src, size_t available, IndexVector &idx); + + template + ssize_t + deserialize(const void *src, size_t available, IndexVector &idx, + Tree &tree); + + inline ssize_t + deserialize(const void *src, size_t available, + IndexVector &idx) + { + return _enumDict->deserialize(src, available, idx); + } + + virtual void + freeUnusedEnum(Index idx, IndexSet &unused) = 0; + + virtual void + freeUnusedEnums(bool movePostingIdx) = 0; + + virtual void + freeUnusedEnums(const IndexVector &toRemove) = 0; + + inline void + fixupRefCounts(const EnumVector &hist) + { + _enumDict->fixupRefCounts(hist); + } + + inline void + freezeTree(void) + { + _enumDict->freezeTree(); + } + + virtual bool + performCompaction(uint64_t bytesNeeded) = 0; + + EnumStoreDictBase & + getEnumStoreDict(void) + { + return *_enumDict; + } + + const EnumStoreDictBase & + getEnumStoreDict(void) const + { + return *_enumDict; + } + + EnumPostingTree & + getPostingDictionary(void) + { + return _enumDict->getPostingDictionary(); + } + + const EnumPostingTree & + getPostingDictionary(void) const + { + return _enumDict->getPostingDictionary(); + } +}; + + +vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx); + + +/** + * Base comparator class needed by the btree. + **/ +class EnumStoreComparator { +public: + typedef EnumStoreBase::Index EnumIndex; + virtual ~EnumStoreComparator() {} + /** + * Compare the values represented by the given enum indexes. + * Uses the enum store to map from enum index to actual value. + **/ + virtual bool operator() (const EnumIndex & lhs, const EnumIndex & rhs) const = 0; +}; + + +class EnumStoreComparatorWrapper +{ + const EnumStoreComparator &_comp; +public: + typedef EnumStoreBase::Index EnumIndex; + EnumStoreComparatorWrapper(const EnumStoreComparator &comp) + : _comp(comp) + { + } + + inline bool operator()(const EnumIndex &lhs, const EnumIndex &rhs) const + { + return _comp(lhs, rhs); + } +}; + +extern template class +btree::DataStoreT >; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp b/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp new file mode 100644 index 00000000000..ae011bbcb0a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/extendableattributes.cpp @@ -0,0 +1,162 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "extendableattributes.h" +#include + +namespace search { + +//******************** CollectionType::SINGLE ********************// + + +SingleStringExtAttribute::SingleStringExtAttribute(const vespalib::string & name) : + StringDirectAttrVector< AttrVector::Features >(name, Config(BasicType::STRING, CollectionType::SINGLE)) +{ + setEnum(false); + setSortedEnum(false); +} + +bool SingleStringExtAttribute::addDoc(DocId & docId) +{ + size_t offset(_buffer.size()); + _buffer.push_back('\0'); + _buffer.push_back(0); + docId = _offsets.size(); + _offsets.push_back(offset); + incNumDocs(); + setCommittedDocIdLimit(getNumDocs()); + return true; +} + +bool SingleStringExtAttribute::add(const char * v, int32_t) +{ + const size_t start(_offsets.back()); + const size_t sz(strlen(v) + 1); + _buffer.resize(start+sz); + strcpy(&_buffer[start], v); + return true; +} + + +//******************** CollectionType::ARRAY ********************// + + +MultiStringExtAttribute::MultiStringExtAttribute(const vespalib::string & name, const CollectionType & ctype) : + StringDirectAttrVector< AttrVector::Features > + (name, Config(BasicType::STRING, ctype)) +{ + setEnum(false); + setSortedEnum(false); +} + +MultiStringExtAttribute::MultiStringExtAttribute(const vespalib::string & name) : + StringDirectAttrVector< AttrVector::Features > + (name, Config(BasicType::STRING, CollectionType::ARRAY)) +{ + setEnum(false); + setSortedEnum(false); +} + +bool MultiStringExtAttribute::addDoc(DocId & docId) +{ + docId = _idx.size() - 1; + _idx.push_back(_idx.back()); + incNumDocs(); + setCommittedDocIdLimit(getNumDocs()); + return true; +} + +bool MultiStringExtAttribute::add(const char * v, int32_t) +{ + const size_t start(_buffer.size()); + const size_t sz(strlen(v) + 1); + _buffer.resize(start+sz); + strcpy(&_buffer[start], v); + + _offsets.push_back(start); + + _idx.back()++; + checkSetMaxValueCount(_idx.back() - _idx[_idx.size() - 2]); + return true; +} + + +//******************** CollectionType::WSET ********************// + +WeightedSetIntegerExtAttribute::WeightedSetIntegerExtAttribute(const vespalib::string & name) : + WeightedSetExtAttributeBase(name) +{ +} + +bool +WeightedSetIntegerExtAttribute::add(int64_t v, int32_t w) +{ + addWeight(w); + MultiIntegerExtAttribute::add(v); + return true; +} + +uint32_t +WeightedSetIntegerExtAttribute::get(DocId doc, AttributeVector::WeightedInt * v, uint32_t sz) const +{ + uint32_t valueCount = _idx[doc + 1] - _idx[doc]; + uint32_t num2Read = std::min(valueCount, sz); + for (uint32_t i = 0; i < num2Read; ++i) { + v[i] = AttributeVector::WeightedInt(_data[_idx[doc] + i], getWeightHelper(doc, i)); + } + return valueCount; +} + +WeightedSetFloatExtAttribute::WeightedSetFloatExtAttribute(const vespalib::string & name) : + WeightedSetExtAttributeBase(name) +{ +} + +bool +WeightedSetFloatExtAttribute::add(double v, int32_t w) +{ + addWeight(w); + MultiFloatExtAttribute::add(v); + return true; +} + +uint32_t +WeightedSetFloatExtAttribute::get(DocId doc, AttributeVector::WeightedFloat * v, uint32_t sz) const +{ + uint32_t valueCount = _idx[doc + 1] - _idx[doc]; + uint32_t num2Read = std::min(valueCount, sz); + for (uint32_t i = 0; i < num2Read; ++i) { + v[i] = AttributeVector::WeightedFloat(_data[_idx[doc] + i], getWeightHelper(doc, i)); + } + return valueCount; +} + +WeightedSetStringExtAttribute::WeightedSetStringExtAttribute(const vespalib::string & name) : + WeightedSetExtAttributeBase(name) +{ + setEnum(false); + setSortedEnum(false); +} + +bool +WeightedSetStringExtAttribute::add(const char * v, int32_t w) +{ + addWeight(w); + MultiStringExtAttribute::add(v); + return true; +} + +uint32_t +WeightedSetStringExtAttribute::get(DocId doc, AttributeVector::WeightedString * v, uint32_t sz) const +{ + return getAllHelper(doc, v, sz); +} + +uint32_t +WeightedSetStringExtAttribute::get(DocId doc, AttributeVector::WeightedConstChar * v, uint32_t sz) const +{ + return getAllHelper(doc, v, sz); +} + + +} diff --git a/searchlib/src/vespa/searchlib/attribute/extendableattributes.h b/searchlib/src/vespa/searchlib/attribute/extendableattributes.h new file mode 100644 index 00000000000..ba5776d6b0c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/extendableattributes.h @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * @class search::SearchVisitor + * + * @brief Visitor that applies a search query to visitor data and converts them to a SearchResultCommand + */ +#pragma once + +#include +#include + +namespace search { + +// Translates the actual value type to the type required by IExtendAttribute. +template struct AddValueType { + typedef int64_t Type; +}; +template <> struct AddValueType { + typedef double Type; +}; + +//******************** CollectionType::SINGLE ********************// + +template struct AttributeTemplate { + typedef search::IntegerAttributeTemplate Type; +}; +template <> struct AttributeTemplate { + typedef search::FloatingPointAttributeTemplate Type; +}; + +template +class SingleExtAttribute + : public NumericDirectAttrVector, + typename AttributeTemplate::Type>, + public IExtendAttribute +{ + typedef typename SingleExtAttribute::NumDirectAttrVec Super; + typedef typename Super::Config Config; + typedef typename Super::BasicType BasicType; + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override + { + (void) term; + (void) params; + return AttributeVector::SearchContext::UP(); + } + IExtendAttribute * getExtendInterface() override { return this; } +public: + SingleExtAttribute(const vespalib::string &name) + : Super(name, Config(BasicType::fromType(T()), + attribute::CollectionType::SINGLE)) {} + + virtual bool addDoc(typename Super::DocId &docId) { + docId = this->_data.size(); + this->_data.push_back(attribute::getUndefined()); + this->incNumDocs(); + this->setCommittedDocIdLimit(this->getNumDocs()); + return true; + } + virtual bool add(typename AddValueType::Type v, int32_t = 1) { + this->_data.back() = v; + return true; + } + virtual bool onLoad() override { + return false; // Emulate that this attribute is never loaded + } +}; + +typedef SingleExtAttribute SingleInt8ExtAttribute; +typedef SingleExtAttribute SingleInt16ExtAttribute; +typedef SingleExtAttribute SingleInt32ExtAttribute; +typedef SingleExtAttribute SingleInt64ExtAttribute; +typedef SingleExtAttribute SingleFloatExtAttribute; + +typedef SingleInt64ExtAttribute SingleIntegerExtAttribute; + +class SingleStringExtAttribute + : public StringDirectAttrVector< AttrVector::Features >, + public IExtendAttribute +{ + IExtendAttribute * getExtendInterface() override { return this; } +public: + SingleStringExtAttribute(const vespalib::string & name); + virtual bool addDoc(DocId & docId); + virtual bool add(const char * v, int32_t w = 1); + virtual bool onLoad() override { + return false; // Emulate that this attribute is never loaded + } +}; + +//******************** CollectionType::ARRAY ********************// + +template +class MultiExtAttribute + : public NumericDirectAttrVector, + typename AttributeTemplate::Type>, + public IExtendAttribute +{ + typedef typename MultiExtAttribute::NumDirectAttrVec Super; + typedef typename Super::Config Config; + typedef typename Super::BasicType BasicType; + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override + { + (void) term; + (void) params; + return AttributeVector::SearchContext::UP(); + } + IExtendAttribute * getExtendInterface() override { return this; } +protected: + MultiExtAttribute(const vespalib::string &name, const attribute::CollectionType &ctype) + : Super(name, Config(BasicType::fromType(T()), ctype)) {} +public: + MultiExtAttribute(const vespalib::string &name) + : Super(name, Config(BasicType::fromType(static_cast(0)), + attribute::CollectionType::ARRAY)) {} + + virtual bool addDoc(typename Super::DocId &docId) { + docId = this->_idx.size() - 1; + this->_idx.push_back(this->_idx.back()); + this->incNumDocs(); + this->setCommittedDocIdLimit(this->getNumDocs()); + return true; + } + virtual bool add(typename AddValueType::Type v, int32_t = 1) { + this->_data.push_back(v); + std::vector &idx = this->_idx; + idx.back()++; + this->checkSetMaxValueCount(idx.back() - idx[idx.size() - 2]); + return true; + } + virtual bool onLoad() override { + return false; // Emulate that this attribute is never loaded + } +}; + +typedef MultiExtAttribute MultiInt8ExtAttribute; +typedef MultiExtAttribute MultiInt16ExtAttribute; +typedef MultiExtAttribute MultiInt32ExtAttribute; +typedef MultiExtAttribute MultiInt64ExtAttribute; +typedef MultiExtAttribute MultiFloatExtAttribute; + +typedef MultiInt64ExtAttribute MultiIntegerExtAttribute; + +class MultiStringExtAttribute : + public StringDirectAttrVector< AttrVector::Features >, + public IExtendAttribute +{ + IExtendAttribute * getExtendInterface() override { return this; } +protected: + MultiStringExtAttribute(const vespalib::string & name, const attribute::CollectionType & ctype); + +public: + MultiStringExtAttribute(const vespalib::string & name); + virtual bool addDoc(DocId & docId); + virtual bool add(const char * v, int32_t w = 1); + virtual bool onLoad() override { + return false; // Emulate that this attribute is never loaded + } +}; + + +//******************** CollectionType::WSET ********************// + +template +class WeightedSetExtAttributeBase : public B +{ +private: + std::vector _weights; + +protected: + void addWeight(int32_t w) { + _weights.push_back(w); + } + int32_t getWeightHelper(AttributeVector::DocId docId, uint32_t idx) const { + return _weights[this->_idx[docId] + idx]; + } + WeightedSetExtAttributeBase(const vespalib::string & name) : + B(name, attribute::CollectionType::WSET), + _weights() + { + } +}; + +class WeightedSetIntegerExtAttribute + : public WeightedSetExtAttributeBase +{ + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override + { + (void) term; + (void) params; + return AttributeVector::SearchContext::UP(); + } +public: + WeightedSetIntegerExtAttribute(const vespalib::string & name); + virtual bool add(int64_t v, int32_t w = 1); + virtual uint32_t get(DocId doc, AttributeVector::WeightedInt * v, uint32_t sz) const; +}; + +class WeightedSetFloatExtAttribute + : public WeightedSetExtAttributeBase +{ + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override + { + (void) term; + (void) params; + return AttributeVector::SearchContext::UP(); + } +public: + WeightedSetFloatExtAttribute(const vespalib::string & name); + virtual bool add(double v, int32_t w = 1); + virtual uint32_t get(DocId doc, AttributeVector::WeightedFloat * v, uint32_t sz) const; +}; + +class WeightedSetStringExtAttribute + : public WeightedSetExtAttributeBase +{ +private: + const char * getHelper(DocId doc, int idx) const { + return &_buffer[_offsets[_idx[doc] + idx]]; + } + template + uint32_t getAllHelper(DocId doc, T * v, uint32_t sz) const + { + uint32_t valueCount = _idx[doc + 1] - _idx[doc]; + uint32_t num2Read = std::min(valueCount, sz); + for (uint32_t i = 0; i < num2Read; ++i) { + v[i] = T(getHelper(doc, i), getWeightHelper(doc, i)); + } + return valueCount; + } + +public: + WeightedSetStringExtAttribute(const vespalib::string & name); + virtual bool add(const char * v, int32_t w = 1); + virtual uint32_t get(DocId doc, AttributeVector::WeightedString * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, AttributeVector::WeightedConstChar * v, uint32_t sz) const; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp new file mode 100644 index 00000000000..ba36774bf29 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "fixedsourceselector.h" +#include "singlenumericattribute.hpp" + +namespace search { + +namespace { + attribute::Config getConfig() { return attribute::Config(attribute::BasicType::INT8); } +} + +FixedSourceSelector::Iterator::Iterator(const FixedSourceSelector & sourceSelector) : + ISourceSelector::Iterator(sourceSelector._source), + _attributeGuard(sourceSelector._realSource) +{ +} + +FixedSourceSelector::FixedSourceSelector(queryeval::Source defaultSource, + const vespalib::string & attrBaseFileName, + uint32_t initialNumDocs) : + SourceSelector(defaultSource, AttributeVector::SP(new SourceStore(attrBaseFileName, getConfig()))), + _source(static_cast(*_realSource)) +{ + reserve(initialNumDocs); + _source.commit(); +} + +FixedSourceSelector::~FixedSourceSelector() +{ +} + +FixedSourceSelector::UP +FixedSourceSelector::cloneAndSubtract(const vespalib::string & attrBaseFileName, + uint32_t diff) +{ + queryeval::Source newDefault = getNewSource(getDefaultSource(), diff); + FixedSourceSelector::UP selector(new FixedSourceSelector(newDefault, attrBaseFileName, _source.getNumDocs()-1)); + for (uint32_t docId = 0; docId < _source.getNumDocs(); ++docId) { + queryeval::Source src = _source.get(docId); + src = getNewSource(src, diff); + assert(src < SOURCE_LIMIT); + selector->_source.set(docId, src); + } + selector->_source.commit(); + selector->setBaseId(getBaseId() + diff); + return selector; +} + +FixedSourceSelector::UP +FixedSourceSelector::load(const vespalib::string & baseFileName) +{ + LoadInfo::UP info = extractLoadInfo(baseFileName); + info->load(); + FixedSourceSelector::UP selector(new FixedSourceSelector( + info->header()._defaultSource, + info->header()._baseFileName, + 0)); + selector->setBaseId(info->header()._baseId); + selector->_source.load(); + return selector; +} + +void FixedSourceSelector::reserve(uint32_t numDocs) +{ + const uint32_t maxDoc(_source.getNumDocs()); + const uint32_t newMaxDocIdPlussOne(numDocs + 1); + if (newMaxDocIdPlussOne > maxDoc) { + uint32_t newDocId(0); + for (_source.addDoc(newDocId); newDocId < numDocs; _source.addDoc(newDocId)); + for (uint32_t i = maxDoc; i < newMaxDocIdPlussOne; ++i) { + _source.set(i, getDefaultSource()); + } + } +} + +void +FixedSourceSelector::setSource(uint32_t docId, queryeval::Source source) +{ + assert(source < SOURCE_LIMIT); + /** + * Due to matchingloop advancing 1 past end, we need to initialize data that + * far too. + **/ + reserve(docId+1); + _source.update(docId, source); + _source.commit(); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h new file mode 100644 index 00000000000..3e787523909 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/fixedsourceselector.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributeguard.h" +#include "sourceselector.h" + +namespace search { + +class FixedSourceSelector : public SourceSelector +{ +private: + SourceStore & _source; + queryeval::Source getSource(uint32_t docId) const { + return _source.getFast(docId); + } + void reserve(uint32_t numDocs); + +public: + typedef std::unique_ptr UP; + class Iterator : public ISourceSelector::Iterator { + private: + AttributeGuard _attributeGuard; + public: + Iterator(const FixedSourceSelector & sourceSelector); + }; + +public: + FixedSourceSelector(queryeval::Source defaultSource, + const vespalib::string & attrBaseFileName, + uint32_t initialNumDocs = 0); + virtual ~FixedSourceSelector(); + + FixedSourceSelector::UP cloneAndSubtract(const vespalib::string & attrBaseFileName, uint32_t diff); + static FixedSourceSelector::UP load(const vespalib::string & baseFileName); + + // Inherit doc from ISourceSelector + virtual void setSource(uint32_t docId, queryeval::Source source); + virtual uint32_t getDocIdLimit() const { + return _source.getNumDocs() - 1; + } + virtual ISourceSelector::Iterator::UP createIterator() const { + return ISourceSelector::Iterator::UP(new Iterator(*this)); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp new file mode 100644 index 00000000000..069a936c8ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp @@ -0,0 +1,283 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "flagattribute.h" +#include + +#include +LOG_SETUP(".searchlib.attribute.flagattribute"); + +#include +#include +#include "multivaluemapping.hpp" +#include + +namespace search { + +using queryeval::SearchIterator; +namespace +{ + + +template +class SaveBits +{ + const T *_map; + const size_t _mapSize; + FA &_fa; + +public: + SaveBits(const T *map, + const size_t mapSize, + FA &fa) + : _map(map), + _mapSize(mapSize), + _fa(fa) + { + } + + void + save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight) + { + (void) vci; + (void) weight; + assert(e < _mapSize); + _fa.setNewBVValue(docId, _map[e]); + } +}; + +} + + +template +FlagAttributeT::FlagAttributeT(const vespalib::string & baseFileName, const AttributeVector::Config & cfg) : + B(baseFileName, cfg), + _bitVectorHolder(), + _bitVectorStore(256), + _bitVectors(256), + _bitVectorSize(cfg.getGrowStrategy().getDocsInitialCapacity()) +{ +} + +template +AttributeVector::SearchContext::UP +FlagAttributeT::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + return AttributeVector::SearchContext::UP (new SearchContext(std::move(qTerm), *this)); +} + +template +void FlagAttributeT::clearOldValues(DocId doc) +{ + const typename B::WType * values(NULL); + for (uint32_t i(0), m(this->get(doc, values)); i < m; i++) { + BitVector * bv = _bitVectors[getOffset(values[i].value())]; + if (bv != NULL) { + bv->clearBit(doc); + } + } +} + +template +bool +FlagAttributeT::onLoadEnumerated(typename B::ReaderBase &attrReader) +{ + typedef typename B::WType::ValueType TT; + + uint32_t numDocs = attrReader.getNumIdx() - 1; + uint64_t numValues = attrReader.getNumValues(); + uint64_t enumCount = attrReader.getEnumCount(); + assert(numValues == enumCount); + (void) enumCount; + + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + + if (numValues > 0) + _bitVectorSize = numDocs; + + FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT()); + const TT *map = reinterpret_cast(udatBuffer->buffer()); + assert((udatBuffer->size() % sizeof(TT)) == 0); + size_t mapSize = udatBuffer->size() / sizeof(TT); + SaveBits, TT> saver(map, mapSize, *this); + uint32_t maxvc = this->_mvMapping.fillMapped(attrReader, + numValues, + map, + mapSize, + saver, + this->getNumDocs(), + this->hasWeightedSetType()); + this->checkSetMaxValueCount(maxvc); + + return true; +} + +template +bool FlagAttributeT::onLoad() +{ + for (size_t i(0), m(_bitVectors.size()); i < m; i++) { + _bitVectorStore[i].reset(); + _bitVectors[i] = NULL; + } + _bitVectorSize = 0; + return B::onLoad(); +} + +template +void FlagAttributeT::setNewValues(DocId doc, const std::vector & values) +{ + B::setNewValues(doc, values); + if (_bitVectorSize == 0) { // attribute being loaded + _bitVectorSize = this->getNumDocs(); + } + for (uint32_t i(0), m(values.size()); i < m; i++) { + typename B::WType value = values[i]; + uint32_t offset = getOffset(value); + BitVector * bv = _bitVectors[offset]; + if (bv == NULL) { + assert(_bitVectorSize >= this->getNumDocs()); + _bitVectorStore[offset] = BitVector::create(_bitVectorSize); + _bitVectors[offset] = _bitVectorStore[offset].get(); + bv = _bitVectors[offset]; + bv->invalidateCachedCount(); + ensureGuardBit(*bv); + } + bv->setBit(doc); + } +} + +template +void +FlagAttributeT::setNewBVValue(DocId doc, typename B::WType::ValueType value) +{ + uint32_t offset = getOffset(value); + BitVector * bv = _bitVectors[offset]; + if (bv == NULL) { + assert(_bitVectorSize >= this->getNumDocs()); + _bitVectorStore[offset] = BitVector::create(_bitVectorSize); + _bitVectors[offset] = _bitVectorStore[offset].get(); + bv = _bitVectors[offset]; + bv->invalidateCachedCount(); + ensureGuardBit(*bv); + } + bv->setBit(doc); +} + + +template +bool +FlagAttributeT::onAddDoc(DocId doc) +{ + bool retval = false; + if (doc >= _bitVectorSize) { + resizeBitVectors(this->getNumDocs()); + retval = true; + } else { + ensureGuardBit(); + } + std::atomic_thread_fence(std::memory_order_release); + clearGuardBit(doc); + return retval; +} + +template +void +FlagAttributeT::ensureGuardBit(BitVector & bv) +{ + if (this->getNumDocs() < bv.size()) { + bv.setBit(this->getNumDocs()); // add guard bit to avoid scanning to the end during search + } +} + +template +void +FlagAttributeT::ensureGuardBit() +{ + for (uint32_t i = 0; i < _bitVectors.size(); ++i) { + BitVector * bv = _bitVectors[i]; + if (bv != NULL) { + ensureGuardBit(*bv); + } + } +} + +template +void +FlagAttributeT::clearGuardBit(DocId doc) +{ + for (uint32_t i = 0; i < _bitVectors.size(); ++i) { + BitVector * bv = _bitVectors[i]; + if (bv != NULL) { + bv->clearBit(doc); // clear guard bit and start using this doc id + } + } +} + +template +void +FlagAttributeT::resizeBitVectors(uint32_t neededSize) +{ + const GrowStrategy & gs = this->getConfig().getGrowStrategy(); + uint32_t newSize = neededSize + (neededSize * gs.getDocsGrowPercent() / 100) + gs.getDocsGrowDelta(); + for (uint32_t i = 0; i < _bitVectors.size(); ++i) { + BitVector * bv = _bitVectors[i]; + if (bv != NULL) { + vespalib::GenerationHeldBase::UP hold(bv->grow(newSize)); + ensureGuardBit(*bv); + _bitVectorHolder.hold(std::move(hold)); + } + } + _bitVectorSize = newSize; + _bitVectorHolder.transferHoldLists(this->getCurrentGeneration()); +} + + +template +void +FlagAttributeT::removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed) +{ + B::removeOldGenerations(firstUsed); + _bitVectorHolder.trimHoldLists(firstUsed); +} + +template +FlagAttributeT::SearchContext::SearchContext(QueryTermSimple::UP qTerm, const FlagAttributeT & toBeSearched) : + BaseSC(std::move(qTerm), toBeSearched), + _zeroHits(false) +{ +} + +template +SearchIterator::UP +FlagAttributeT::SearchContext::createIterator(fef::TermFieldMatchData * + matchData, + bool strict) +{ + if (valid()) { + if (_low == _high) { + const Attribute & attr(static_cast(attribute())); + const BitVector * bv(attr.getBitVector(_low)); + if (bv != NULL) { + return BitVectorIterator::create(bv, attr.getCommittedDocIdLimit(), *matchData, strict); + } else { + return SearchIterator::UP(new queryeval::EmptySearch()); + } + } else { + SearchIterator::UP flagIterator( + strict + ? new FlagAttributeIteratorStrict::SearchContext>(*this, matchData) + : new FlagAttributeIteratorT::SearchContext>(*this, matchData)); + return flagIterator; + } + } else { + return SearchIterator::UP(new queryeval::EmptySearch()); + } +} + +template class FlagAttributeT; +template class FlagAttributeT; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.h b/searchlib/src/vespa/searchlib/attribute/flagattribute.h new file mode 100644 index 00000000000..998cc54a44b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.h @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { + +typedef MultiValueNumericAttribute< IntegerAttributeTemplate, multivalue::MVMTemplateArg, multivalue::Index32> > FlagBaseImpl; +typedef MultiValueNumericAttribute< IntegerAttributeTemplate, multivalue::MVMTemplateArg, multivalue::Index64> > HugeFlagBaseImpl; + +template +class FlagAttributeT : public B { +public: + FlagAttributeT(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); +private: + typedef AttributeVector::DocId DocId; + typedef FlagBaseImpl::ArraySearchContext BaseSC; + class SearchContext : public BaseSC { + public: + typedef FlagAttributeT Attribute; + SearchContext(QueryTermSimple::UP qTerm, const FlagAttributeT & toBeSearched); + + virtual std::unique_ptr + createIterator(fef::TermFieldMatchData * matchData, + bool strict); + + private: + bool _zeroHits; + + template friend class FlagAttributeIteratorT; + template friend class FlagAttributeIteratorStrict; + }; + virtual bool onLoad(); + + virtual bool + onLoadEnumerated(typename B::ReaderBase &attrReader); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + virtual void clearOldValues(DocId doc); + virtual void setNewValues(DocId doc, const std::vector & values); + +public: + void + setNewBVValue(DocId doc, typename B::WType::ValueType value); + +private: + virtual bool onAddDoc(DocId doc); + void ensureGuardBit(BitVector & bv); + void ensureGuardBit(); + void clearGuardBit(DocId doc); + void resizeBitVectors(uint32_t neededSize); + void removeOldGenerations(vespalib::GenerationHandler::generation_t firstUsed); + uint32_t getOffset(int8_t value) const { return value + 128; } + BitVector * getBitVector(typename B::BaseType value) const { + return _bitVectors[value + 128]; + } + + vespalib::GenerationHolder _bitVectorHolder; + std::vector > _bitVectorStore; + std::vector _bitVectors; + uint32_t _bitVectorSize; + template friend class FlagAttributeIteratorT; + template friend class FlagAttributeIteratorStrict; +}; + +typedef FlagAttributeT FlagAttribute; +typedef FlagAttributeT HugeFlagAttribute; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.cpp b/searchlib/src/vespa/searchlib/attribute/floatbase.cpp new file mode 100644 index 00000000000..8532302a70c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.cpp @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "floatbase.h" +#include + +LOG_SETUP(".searchlib.attribute.floatbase"); + +#include + + +namespace search { + +IMPLEMENT_IDENTIFIABLE_ABSTRACT(FloatingPointAttribute, NumericAttribute); + +FloatingPointAttribute::FloatingPointAttribute(const vespalib::string & name, const Config & c) : + NumericAttribute(name, c), + _changes() +{ +} + +uint32_t FloatingPointAttribute::clearDoc(DocId doc) +{ + uint32_t removed(0); + if (hasMultiValue() && (doc < getNumDocs())) { + removed = getValueCount(doc); + } + AttributeVector::clearDoc(_changes, doc); + + return removed; +} + +uint32_t FloatingPointAttribute::get(DocId doc, WeightedString * s, uint32_t sz) const +{ + WeightedFloat * v = new WeightedFloat[sz]; + unsigned num(static_cast(this)->get(doc, v, sz)); + for(unsigned i(0); i < num; i++) { + char tmp[32]; + snprintf(tmp, sizeof(tmp), "%g", v[i].getValue()); + s[i] = WeightedString(tmp, v[i].getWeight()); + } + delete [] v; + return num; +} + +uint32_t FloatingPointAttribute::get(DocId doc, WeightedConstChar * v, uint32_t sz) const +{ + (void) doc; + (void) v; + (void) sz; + return 0; +} + +uint32_t FloatingPointAttribute::get(DocId doc, vespalib::string * s, uint32_t sz) const +{ + double * v = new double[sz]; + unsigned num(static_cast(this)->get(doc, v, sz)); + for(unsigned i(0); i < num; i++) { + char tmp[32]; + snprintf(tmp, sizeof(tmp), "%g", v[i]); + s[i] = tmp; + } + delete [] v; + return num; +} + +uint32_t FloatingPointAttribute::get(DocId doc, const char ** v, uint32_t sz) const +{ + (void) doc; + (void) v; + (void) sz; + return 0; +} + +bool FloatingPointAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) +{ + double v = fv.getAsDouble(); + return AttributeVector::adjustWeight(_changes, doc, NumericChangeData(v), wAdjust); +} + +bool FloatingPointAttribute::apply(DocId doc, const ArithmeticValueUpdate & op) +{ + bool retval(doc < getNumDocs()); + if (retval) { + retval = AttributeVector::applyArithmetic(_changes, doc, NumericChangeData(0), op); + } + return retval; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h new file mode 100644 index 00000000000..e802fd9675a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { + +// forward declaration of class in enumstore.h +template +class NumericEntryType; + +class FloatingPointAttribute : public NumericAttribute +{ +public: + DECLARE_IDENTIFIABLE_ABSTRACT(FloatingPointAttribute); + template + bool append(DocId doc, Accessor & ac) { + return AttributeVector::append(_changes, doc, ac); + } + bool append(DocId doc, double v, int32_t weight) { + return AttributeVector::append(_changes, doc, NumericChangeData(v), weight); + } + bool remove(DocId doc, double v, int32_t weight) { + return AttributeVector::remove(_changes, doc, NumericChangeData(v), weight); + } + bool update(DocId doc, double v) { + return AttributeVector::update(_changes, doc, NumericChangeData(v)); + } + bool apply(DocId doc, const ArithmeticValueUpdate & op); + virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust); + virtual uint32_t clearDoc(DocId doc); +protected: + virtual const char * getString(DocId doc, char * s, size_t sz) const { double v = getFloat(doc); snprintf(s, sz, "%g", v); return s; } + FloatingPointAttribute(const vespalib::string & name, const Config & c); + typedef ChangeTemplate > Change; + typedef ChangeVectorT< Change > ChangeVector; + ChangeVector _changes; + +private: + virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const; + virtual double getFloatFromEnum(EnumHandle e) const = 0; +}; + +template +class FloatingPointAttributeTemplate : public FloatingPointAttribute +{ +public: + typedef WeightedType Weighted; + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const = 0; + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const = 0; +protected: + typedef NumericEntryType EnumEntryType; + + typedef attribute::LoadedNumericValue LoadedNumericValueT; + +public: + typedef T BaseType; + typedef T LoadedValueType; + typedef SequentialReadModifyWriteInterface LoadedVector; + virtual uint32_t getRawValues(DocId doc, const multivalue::Value * & values) const { + (void) doc; + (void) values; + throw std::runtime_error(getNativeClassName() + "::getRawValues() not implemented."); + } + +protected: + FloatingPointAttributeTemplate(const vespalib::string & name) : + FloatingPointAttribute(name, BasicType::fromType(T())), + _defaultValue(ChangeBase::UPDATE, 0, attribute::getUndefined()) + { } + FloatingPointAttributeTemplate(const vespalib::string & name, const Config & c) : + FloatingPointAttribute(name, c), + _defaultValue(ChangeBase::UPDATE, 0, attribute::getUndefined()) + { assert(c.basicType() == BasicType::fromType(T())); } + static T defaultValue() { return attribute::getUndefined(); } + virtual bool findEnum(T v, EnumHandle & e) const = 0; + virtual largeint_t getDefaultValue() const { return static_cast(-std::numeric_limits::max()); } + Change _defaultValue; +private: + virtual bool findEnum(const char *value, EnumHandle &e) const { + vespalib::asciistream iss(value); + T fvalue = 0; + try { + iss >> fvalue; + } catch (const vespalib::IllegalArgumentException &) { + } + return findEnum(fvalue, e); + } + virtual bool isUndefined(DocId doc) const { return attribute::isUndefined(get(doc)); } + virtual T get(DocId doc) const = 0; + virtual T getFromEnum(EnumHandle e) const = 0; + + virtual double getFloatFromEnum(EnumHandle e) const { return getFromEnum(e); } + virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { + (void) bc; + if (available >= long(sizeof(T))) { + T origValue(get(doc)); + vespalib::serializeForSort< vespalib::convertForSort >(origValue, serTo); + } else { + return -1; + } + return sizeof(T); + } + virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { + (void) bc; + if (available >= long(sizeof(T))) { + T origValue(get(doc)); + vespalib::serializeForSort< vespalib::convertForSort >(origValue, serTo); + } else { + return -1; + } + return sizeof(T); + } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp new file mode 100644 index 00000000000..2b1d4a01838 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.cpp @@ -0,0 +1,4 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "i_document_weight_attribute.h" diff --git a/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h new file mode 100644 index 00000000000..71d6e94bb87 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/i_document_weight_attribute.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +namespace query { class Node; } + +typedef btree::BTreeConstIterator, btree::BTreeDefaultTraits> DocumentWeightIterator; + +struct IDocumentWeightAttribute +{ + struct LookupResult { + const btree::EntryRef posting_idx; + const uint32_t posting_size; + const int32_t min_weight; + const int32_t max_weight; + LookupResult() : posting_idx(), posting_size(0), min_weight(0), max_weight(0) {} + LookupResult(btree::EntryRef posting_idx_in, uint32_t posting_size_in, int32_t min_weight_in, int32_t max_weight_in) + : posting_idx(posting_idx_in), posting_size(posting_size_in), min_weight(min_weight_in), max_weight(max_weight_in) {} + }; + virtual LookupResult lookup(const vespalib::string &term) const = 0; + virtual void create(btree::EntryRef idx, std::vector &dst) const = 0; + virtual DocumentWeightIterator create(btree::EntryRef idx) const = 0; + virtual ~IDocumentWeightAttribute() {} +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h new file mode 100644 index 00000000000..ec0fbf8e6a4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iattributefilewriter.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +class BufferWriter; + +/* + * Interface class to write to a single attribute vector file. Used by + * IAttributSaver. + */ +class IAttributeFileWriter +{ +public: + using BufferBuf = vespalib::MMapDataBuffer; + using Buffer = std::unique_ptr; + + virtual ~IAttributeFileWriter() = default; + + /* + * Allocate a buffer that can later be passed on to writeBuf. + */ + virtual Buffer allocBuf(size_t size) = 0; + + /** + * Writes the given data. Multiple calls are allowed, but only the + * last call can provide an unaligned buffer. + **/ + virtual void writeBuf(Buffer buf) = 0; + + virtual std::unique_ptr allocBufferWriter() = 0; +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp b/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp new file mode 100644 index 00000000000..0141534b1ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iattributemanager.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "iattributemanager.h" +#include + +LOG_SETUP(".searchlib.attribute.iattributemanager"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/iattributemanager.h b/searchlib/src/vespa/searchlib/attribute/iattributemanager.h new file mode 100644 index 00000000000..7e3ab9c164c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iattributemanager.h @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributeguard.h" +#include +#include + +namespace search { + +/** + * This is an interface used to access all registered attribute vectors. + **/ +class IAttributeManager { +public: + typedef std::shared_ptr SP; + typedef vespalib::string string; + + /** + * Returns a view of the attribute vector with the given name. + * + * @param name name of the attribute vector. + * @return view of the attribute vector or empty view if the attribute vector does not exists. + **/ + virtual AttributeGuard::UP getAttribute(const string & name) const = 0; + + /** + * Returns a view of the attribute vector with the given name. + * Makes sure that the underlying enum values are stable during the use of this attribute vector. + * + * @param name name of the attribute vector. + * @return view of the attribute vector or empty view if the attribute vector does not exists. + **/ + virtual AttributeGuard::UP getAttributeStableEnum(const string & name) const = 0; + + /** + * Fill the given list with all attribute vectors registered in this manager. + * + * @param list the list to fill in attribute vectors. + **/ + virtual void getAttributeList(std::vector & list) const = 0; + + /** + * Creates a per thread attribute context used to provide read access to attributes. + * + * @return the attribute context + **/ + virtual attribute::IAttributeContext::UP createContext() const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IAttributeManager() {} +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp new file mode 100644 index 00000000000..e0f240dc15f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "iattributesavetarget.h" +#include + +LOG_SETUP(".searchlib.attribute.iattributesavetarget"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h new file mode 100644 index 00000000000..d3e74ef4885 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iattributesavetarget.h @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "iattributefilewriter.h" + +namespace search { + +/** + * Interface used for saving an attribute vector. + **/ +class IAttributeSaveTarget { +public: + /** + * Config class used by actual saveTarget implementations. + **/ + class Config { + private: + vespalib::string _fileName; + vespalib::string _basicType; + vespalib::string _collectionType; + vespalib::string _tensorType; + bool _hasMultiValue; + bool _hasWeightedSetType; + bool _enumerated; + uint32_t _numDocs; + uint32_t _fixedWidth; + uint64_t _uniqueValueCount; + uint64_t _totalValueCount; + uint64_t _createSerialNum; + uint32_t _version; + public: + Config() + : _fileName(""), + _basicType(""), + _collectionType(""), + _hasMultiValue(false), + _hasWeightedSetType(false), + _enumerated(false), + _numDocs(0), + _fixedWidth(0), + _uniqueValueCount(0), + _totalValueCount(0), + _createSerialNum(0u), + _version(0) + { + } + + Config(const vespalib::string &fileName, + const vespalib::string &basicType, + const vespalib::string &collectionType, + const vespalib::string &tensorType, + bool multiValue, bool weightedSetType, + bool enumerated, + uint32_t numDocs, + uint32_t fixedWidth, + uint64_t uniqueValueCount, + uint64_t totalValueCount, + uint64_t createSerialNum, + uint32_t version + ) + : _fileName(fileName), + _basicType(basicType), + _collectionType(collectionType), + _tensorType(tensorType), + _hasMultiValue(multiValue), + _hasWeightedSetType(weightedSetType), + _enumerated(enumerated), + _numDocs(numDocs), + _fixedWidth(fixedWidth), + _uniqueValueCount(uniqueValueCount), + _totalValueCount(totalValueCount), + _createSerialNum(createSerialNum), + _version(version) + { + } + const vespalib::string & getFileName() const { return _fileName; } + + const vespalib::string & + getBasicType() const + { + return _basicType; + } + + const vespalib::string & + getCollectionType() const + { + return _collectionType; + } + + const vespalib::string &getTensorType() const { + return _tensorType; + } + + bool hasMultiValue() const { return _hasMultiValue; } + bool hasWeightedSetType() const { return _hasWeightedSetType; } + uint32_t getNumDocs() const { return _numDocs; } + size_t getFixedWidth() const { return _fixedWidth; } + + uint64_t + getUniqueValueCount(void) const + { + return _uniqueValueCount; + } + + uint64_t + getTotalValueCount(void) const + { + return _totalValueCount; + } + + bool + getEnumerated(void) const + { + return _enumerated; + } + + uint64_t + getCreateSerialNum(void) const + { + return _createSerialNum; + } + + uint32_t getVersion() const { return _version; } + }; + using Buffer = IAttributeFileWriter::Buffer; +protected: + Config _cfg; +public: + IAttributeSaveTarget() : _cfg() {} + void setConfig(const Config & cfg) { _cfg = cfg; } + + bool + getEnumerated(void) const + { + return _cfg.getEnumerated(); + } + + /** + * Setups this saveTarget before any data is written. Returns true + * on success. + **/ + virtual bool setup() = 0; + /** + * Closes this saveTarget when all data is written. + **/ + virtual void close() = 0; + + virtual IAttributeFileWriter &datWriter() = 0; + virtual IAttributeFileWriter &idxWriter() = 0; + virtual IAttributeFileWriter &weightWriter() = 0; + virtual IAttributeFileWriter &udatWriter() = 0; + + virtual ~IAttributeSaveTarget() {} +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.cpp b/searchlib/src/vespa/searchlib/attribute/integerbase.cpp new file mode 100644 index 00000000000..af49b222b04 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "integerbase.h" +#include +#include + +LOG_SETUP(".searchlib.attribute.integerbase"); + +#include + +namespace search { + +IMPLEMENT_IDENTIFIABLE_ABSTRACT(IntegerAttribute, NumericAttribute); + +IntegerAttribute::IntegerAttribute(const vespalib::string & name, const Config & c) : + NumericAttribute(name, c), + _changes() +{ +} + +uint32_t IntegerAttribute::clearDoc(DocId doc) +{ + uint32_t removed(0); + if (hasMultiValue() && (doc < getNumDocs())) { + removed = getValueCount(doc); + } + AttributeVector::clearDoc(_changes, doc); + + return removed; +} + +uint32_t IntegerAttribute::get(DocId doc, WeightedString * s, uint32_t sz) const +{ + WeightedInt * v = new WeightedInt[sz]; + unsigned num(static_cast(this)->get(doc, v, sz)); + for(unsigned i(0); i < num; i++) { + char tmp[32]; + snprintf(tmp, sizeof(tmp), "%" PRId64, v[i].getValue()); + s[i] = WeightedString(tmp, v[i].getWeight()); + } + delete [] v; + return num; +} + +uint32_t IntegerAttribute::get(DocId doc, WeightedConstChar * v, uint32_t sz) const +{ + (void) doc; + (void) v; + (void) sz; + return 0; +} + +uint32_t IntegerAttribute::get(DocId doc, vespalib::string * s, uint32_t sz) const +{ + largeint_t * v = new largeint_t[sz]; + unsigned num(static_cast(this)->get(doc, v, sz)); + for(unsigned i(0); i < num; i++) { + char tmp[32]; + snprintf(tmp, sizeof(tmp), "%" PRId64, v[i]); + s[i] = tmp; + } + delete [] v; + return num; +} + +uint32_t IntegerAttribute::get(DocId doc, const char ** v, uint32_t sz) const +{ + (void) doc; + (void) v; + (void) sz; + return 0; +} + +bool IntegerAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) +{ + largeint_t v = fv.getAsLong(); + return AttributeVector::adjustWeight(_changes, doc, NumericChangeData(v), wAdjust); +} + +bool IntegerAttribute::apply(DocId doc, const ArithmeticValueUpdate & op) +{ + bool retval(doc < getNumDocs()); + if (retval) { + retval = AttributeVector::applyArithmetic(_changes, doc, NumericChangeData(0), op); + } + return retval; +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h new file mode 100644 index 00000000000..702bab0772c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h @@ -0,0 +1,136 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { + +// forward declaration of class in enumstore.h +template +class NumericEntryType; + +class IntegerAttribute : public NumericAttribute +{ +public: + DECLARE_IDENTIFIABLE_ABSTRACT(IntegerAttribute); + bool update(DocId doc, largeint_t v) { + return AttributeVector::update(_changes, doc, NumericChangeData(v)); + } + template + bool append(DocId doc, Accessor & ac) { + return AttributeVector::append(_changes, doc, ac); + } + bool append(DocId doc, largeint_t v, int32_t weight) { + return AttributeVector::append(_changes, doc, NumericChangeData(v), weight); + } + bool remove(DocId doc, largeint_t v, int32_t weight) { + return AttributeVector::remove(_changes, doc, NumericChangeData(v), weight); + } + bool apply(DocId doc, const ArithmeticValueUpdate & op); + virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust); + virtual uint32_t clearDoc(DocId doc); +protected: + IntegerAttribute(const vespalib::string & name, const Config & c); + typedef ChangeTemplate > Change; + typedef ChangeVectorT< Change > ChangeVector; + ChangeVector _changes; + +private: + virtual const char * getString(DocId doc, char * s, size_t sz) const { largeint_t v = getInt(doc); snprintf(s, sz, "%" PRId64, v); return s; } + virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const; + virtual largeint_t getIntFromEnum(EnumHandle e) const = 0; +}; + +template +class IntegerAttributeTemplate : public IntegerAttribute +{ +public: + typedef WeightedType Weighted; + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const = 0; + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const = 0; +protected: + typedef NumericEntryType EnumEntryType; + typedef attribute::LoadedNumericValue LoadedNumericValueT; +public: + typedef T BaseType; + typedef T LoadedValueType; + typedef SequentialReadModifyWriteInterface LoadedVector; + virtual uint32_t getRawValues(DocId doc, const multivalue::Value * & values) const { + (void) doc; + (void) values; + throw std::runtime_error(getNativeClassName() + "::getRawValues() not implemented."); + } + +protected: + IntegerAttributeTemplate(const vespalib::string & name) : + IntegerAttribute(name, BasicType::fromType(T())), + _defaultValue(ChangeBase::UPDATE, 0, defaultValue()) + { } + IntegerAttributeTemplate(const vespalib::string & name, const Config & c) : + IntegerAttribute(name, c), + _defaultValue(ChangeBase::UPDATE, 0, defaultValue()) + { assert(c.basicType() == BasicType::fromType(T())); } + IntegerAttributeTemplate(const vespalib::string & name, + const Config & c, + const BasicType &realType) + : IntegerAttribute(name, c), + _defaultValue(ChangeBase::UPDATE, 0, 0u) + { + assert(c.basicType() == realType); + (void) realType; + assert(BasicType::fromType(T()) == BasicType::INT8); + } + static T defaultValue() { return attribute::getUndefined(); } + virtual bool findEnum(T v, EnumHandle & e) const = 0; + virtual largeint_t getDefaultValue() const { return defaultValue(); } + virtual bool isUndefined(DocId doc) const { return get(doc) == defaultValue(); } + Change _defaultValue; +private: + virtual bool findEnum(const char *value, EnumHandle &e) const { + vespalib::asciistream iss(value); + int64_t ivalue = 0; + try { + iss >> ivalue; + } catch (const vespalib::IllegalArgumentException &) { + } + return findEnum(ivalue, e); + } + virtual T get(DocId doc) const = 0; + virtual T getFromEnum(EnumHandle e) const = 0; + virtual largeint_t getIntFromEnum(EnumHandle e) const { + T v(getFromEnum(e)); + if (attribute::isUndefined(v)) { + return attribute::getUndefined(); + } + return v; + } + virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { + (void) bc; + if (available >= long(sizeof(T))) { + T origValue(get(doc)); + vespalib::serializeForSort< vespalib::convertForSort >(origValue, serTo); + } else { + return -1; + } + return sizeof(T); + } + virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { + (void) bc; + if (available >= long(sizeof(T))) { + T origValue(get(doc)); + vespalib::serializeForSort< vespalib::convertForSort >(origValue, serTo); + } else { + return -1; + } + return sizeof(T); + } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/interlock.h b/searchlib/src/vespa/searchlib/attribute/interlock.h new file mode 100644 index 00000000000..3600fe70bce --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/interlock.h @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace attribute +{ + +class InterlockGuard; + +/** + * Class used to serialize getting enum change exclusive lock. This + * eliminates the need for defining a locking order when getting enum + * change shared locks. Scenario avoided is: + * + * Threads T1, T2: Grouping queries + * Threads T3, T4: Attribute writer threads + * + * Thread T1 gets shared lock on A1 + * Thread T2 gets shared lock on A2 + * Theead T3 tries to get exclusive lock on A1 + * Theead T4 tries to get exclusive lock on A2 + * Thread T1 tries to get shared lock on A2 + * Thread T2 tries to get shared lock on A1 + * + * With the interlock properly used, thread T3 will hold the + * interlock, preventing thread T4 from registering intent to get + * write lock on A2, thus thread T1 can get a shared lock on A2 and complete. + */ +class Interlock { + std::mutex _mutex; + friend class InterlockGuard; +public: + Interlock() + : _mutex() + { + } + + virtual ~Interlock() { } +}; + +/** + * Class used to serialize getting enum change exclusive lock. The guard + * is passed to EnumModifier constructor to signal that interlock is held. + */ +class InterlockGuard +{ + std::lock_guard _guard; +public: + InterlockGuard(Interlock &interlock) + : _guard(interlock._mutex) + { + } + + ~InterlockGuard() { } +}; + + +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h new file mode 100644 index 00000000000..b4dbf78f64c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + + +namespace search +{ + +namespace attribute +{ + +class IPostingListAttributeBase +{ +public: + virtual + ~IPostingListAttributeBase() + { + } + + virtual void + clearPostings(IAttributeVector::EnumHandle eidx, + uint32_t fromLid, + uint32_t toLid) = 0; + + virtual void + forwardedShrinkLidSpace(uint32_t newSize) = 0; +}; + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp new file mode 100644 index 00000000000..89927def124 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "ipostinglistsearchcontext.h" +#include + +LOG_SETUP(".searchlib.attribute.ipostinglistsearchcontext"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h new file mode 100644 index 00000000000..48f1d1f509a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistsearchcontext.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + + +namespace search +{ + +namespace queryeval +{ + +class SearchIterator; +} + +namespace fef +{ + +class TermFieldMatchData; +} + + +namespace attribute +{ + + +/** + * Interface for search context helper classes to create attribute + * search iterators based on posting lists and using dictionary + * information to better estimate number of hits. Also used for + * enumerated attributes without posting lists to eliminate brute + * force searches for nonexisting values. + */ + +class IPostingListSearchContext +{ +protected: + + IPostingListSearchContext(void) + { + } + + virtual + ~IPostingListSearchContext(void) + { + } + +public: + virtual void + fetchPostings(bool strict) = 0; + + virtual std::unique_ptr + createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) = 0; + + virtual unsigned int + approximateHits(void) const = 0; +}; + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp new file mode 100644 index 00000000000..31844ec3a02 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.cpp @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "iterator_pack.h" +#include + +LOG_SETUP(".searchlib.attribute.iterator_pack"); +namespace search { + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/iterator_pack.h b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h new file mode 100644 index 00000000000..fa4b64599f6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/iterator_pack.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_document_weight_attribute.h" +#include +#include + +namespace search { + +class AttributeIteratorPack +{ +private: + std::vector _children; + +public: + AttributeIteratorPack() : _children() {} + AttributeIteratorPack(AttributeIteratorPack &&rhs) + : _children(std::move(rhs._children)) {} + + AttributeIteratorPack &operator=(AttributeIteratorPack &&rhs) { + _children = std::move(rhs._children); + return *this; + } + + explicit AttributeIteratorPack(std::vector &&children) + : _children(std::move(children)) {} + + uint32_t get_docid(uint16_t ref) const { + return _children[ref].valid() ? _children[ref].getKey() : endDocId; + } + + uint32_t seek(uint16_t ref, uint32_t docid) { + _children[ref].linearSeek(docid); + if (__builtin_expect(_children[ref].valid(), true)) { + return _children[ref].getKey(); + } + return endDocId; + } + + int32_t get_weight(uint16_t ref, uint32_t) { + return _children[ref].getData(); + } + + size_t size() const { return _children.size(); } + void initRange(uint32_t begin, uint32_t end) { + (void) end; + for (auto &child: _children) { + child.lower_bound(begin); + } + } +}; + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp new file mode 100644 index 00000000000..d86e41d332e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.cpp @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "loadedenumvalue.h" +#include + + +namespace search +{ + +namespace attribute +{ + +void +sortLoadedByEnum(LoadedEnumAttributeVector &loaded) +{ + ShiftBasedRadixSorter:: + radix_sort(LoadedEnumAttribute::EnumRadix(), + LoadedEnumAttribute::EnumCompare(), + &loaded[0], loaded.size(), 16); +} + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h new file mode 100644 index 00000000000..47b7eab1b83 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedenumvalue.h @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search +{ + +namespace attribute +{ + +/** + * Temporary representation of enumerated attribute loaded from enumerated + * save file. + */ + +class LoadedEnumAttribute +{ +private: + uint32_t _enum; + uint32_t _docId; + uint32_t _idx; + int32_t _weight; + +public: + class EnumRadix + { + public: + uint64_t + operator()(const LoadedEnumAttribute &v) + { + return (static_cast(v._enum) << 32) | v._docId; + } + }; + + class EnumCompare : public std::binary_function + { + public: + bool + operator()(const LoadedEnumAttribute &x, + const LoadedEnumAttribute &y) const + { + if (x.getEnum() != y.getEnum()) + return x.getEnum() < y.getEnum(); + return x.getDocId() < y.getDocId(); + } + }; + + LoadedEnumAttribute(void) + : _enum(0), + _docId(0), + _idx(0), + _weight(1) + { + } + + LoadedEnumAttribute(uint32_t e, + uint32_t docId, + uint32_t idx, + int32_t weight) + : _enum(e), + _docId(docId), + _idx(idx), + _weight(weight) + { + } + + uint32_t + getEnum(void) const + { + return _enum; + } + + uint32_t + getDocId(void) const + { + return _docId; + } + + uint32_t + getIdx(void) const + { + return _idx; + } + + int32_t + getWeight(void) const + { + return _weight; + } +}; + +typedef vespalib::Array LoadedEnumAttributeVector; + + +/** + * Helper class used to populate temporary vector representing loaded + * enumerated attribute with posting lists loaded from enumerated save + * file. + */ + +class SaveLoadedEnum +{ +private: + LoadedEnumAttributeVector &_loaded; + +public: + SaveLoadedEnum(LoadedEnumAttributeVector &loaded) + : _loaded(loaded) + { + } + + void + save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight) + { + _loaded.push_back(LoadedEnumAttribute(e, docId, vci, weight)); + } +}; + +/** + * Helper class used when loading non-enumerated attribute from + * enumerated save file. + */ + +class NoSaveLoadedEnum +{ +public: + static void + save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight) + { + (void) e; + (void) docId; + (void) vci; + (void) weight; + } +}; + +/** + * Helper class used to populate temporary vector representing loaded + * enumerated attribute without posting lists loaded from enumerated + * save file. + */ + +class SaveEnumHist +{ + uint32_t *const _hist; + const size_t _histSize; + +public: + SaveEnumHist(EnumStoreBase::EnumVector &enumHist) + : _hist(&enumHist[0]), + _histSize(enumHist.size()) + { + } + + void + save(uint32_t e, uint32_t docId, uint32_t vci, int32_t weight) + { + (void) docId; + (void) vci; + (void) weight; + assert(e < _histSize); + ++_hist[e]; + } +}; + +void +sortLoadedByEnum(LoadedEnumAttributeVector &loaded); + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp new file mode 100644 index 00000000000..bea214ed8bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "loadednumericvalue.h" + + +namespace search +{ + +namespace attribute +{ + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded) +{ + ShiftBasedRadixSorter, + typename LoadedNumericValue::ValueRadix, + typename LoadedNumericValue::ValueCompare, 56>:: + radix_sort(typename LoadedNumericValue::ValueRadix(), + typename LoadedNumericValue::ValueCompare(), + &loaded[0], + loaded.size(), + 16); +} + + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded) +{ + ShiftBasedRadixSorter, + typename LoadedNumericValue::DocRadix, + typename LoadedNumericValue::DocOrderCompare, 56>:: + radix_sort(typename LoadedNumericValue::DocRadix(), + typename LoadedNumericValue::DocOrderCompare(), + &loaded[0], + loaded.size(), + 16); +} + + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h new file mode 100644 index 00000000000..a8855ba5f15 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadednumericvalue.h @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "loadedvalue.h" + + +namespace search +{ + +namespace attribute +{ + +/** + * Temporary representation of enumerated attribute loaded from non-enumerated + * save file (i.e. old save format). For numeric data types. + */ + +template +struct LoadedNumericValue : public LoadedValue +{ + LoadedNumericValue() + : LoadedValue() + { + } + + class ValueCompare : public std::binary_function, + LoadedNumericValue, + bool> + { + public: + bool + operator()(const LoadedNumericValue &x, + const LoadedNumericValue &y) const + { + return x < y; + } + }; + + class ValueRadix + { + public: + uint64_t + operator()(const LoadedValue &v) const + { + return vespalib::convertForSort::convert(v.getValue()); + } + }; +}; + + +template +void +sortLoadedByValue(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +template +void +sortLoadedByDocId(SequentialReadModifyWriteVector, + vespalib::DefaultAlloc> & + loaded); + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp new file mode 100644 index 00000000000..335abb799b0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "loadedstringvalue.h" + + +namespace search +{ + +namespace attribute +{ + +void +sortLoadedByValue(LoadedStringVectorReal &loaded) +{ + vespalib::Array + radixScratchPad(loaded.size()); + for(size_t i(0), m(loaded.size()); i < m; i++) { + loaded[i].prepareRadixSort(); + } + radix_sort(LoadedStringValue::ValueRadix(), + LoadedStringValue::ValueCompare(), + AlwaysEof(), + 1, + &loaded[0], + loaded.size(), + &radixScratchPad[0], + 0, + 96); +} + +void +sortLoadedByDocId(LoadedStringVectorReal &loaded) +{ + ShiftBasedRadixSorter:: + radix_sort(LoadedStringValue::DocRadix(), + LoadedStringValue::DocOrderCompare(), + &loaded[0], + loaded.size(), + 16); +} + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h new file mode 100644 index 00000000000..87e2574bdb6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedstringvalue.h @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "loadedvalue.h" + +namespace search +{ + +namespace attribute +{ + +/** + * Temporary representation of enumerated attribute loaded from non-enumerated + * save file (i.e. old save format). For string data types. + */ + +template +struct RadixSortable : public B +{ + RadixSortable() + : B(), + _currRadix(NULL), + _currRadixFolding(false) + { + } + + class ValueRadix + { + public: + uint32_t + operator ()(RadixSortable &x) const + { + vespalib::Utf8ReaderForZTS u8reader(x._currRadix); + uint32_t val = u8reader.getChar(); + if (x._currRadixFolding) { + if (val != 0) { + val = vespalib::LowerCase::convert(val); + } else { + // switch to returning unfolded values + x._currRadix = x.getValue(); + x._currRadixFolding = false; + val = 1; + } + } + return val; + } + }; + + class ValueCompare : public std::binary_function + { + FoldedStringCompare _compareHelper; + public: + bool + operator()(const B &x, const B &y) const + { + return _compareHelper.compare(x.getValue(), y.getValue()) < 0; + } + }; + + void + prepareRadixSort() + { + _currRadix = this->getValue(); + _currRadixFolding = true; + } +private: + const char * _currRadix; + bool _currRadixFolding; +}; + +typedef RadixSortable > LoadedStringValue; + +typedef SequentialReadModifyWriteInterface LoadedStringVector; + +typedef SequentialReadModifyWriteVector +LoadedStringVectorReal; + + +void +sortLoadedByValue(LoadedStringVectorReal &loaded); + +void +sortLoadedByDocId(LoadedStringVectorReal &loaded); + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp b/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp new file mode 100644 index 00000000000..cf2e611459b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedvalue.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "loadedvalue.h" +#include + +LOG_SETUP(".searchlib.attribute.loadedvalue"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/loadedvalue.h b/searchlib/src/vespa/searchlib/attribute/loadedvalue.h new file mode 100644 index 00000000000..08c7833f394 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/loadedvalue.h @@ -0,0 +1,163 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search +{ + +namespace attribute +{ + +/* + * Temporary representation of enumerated attribute loaded from non-enumerated + * save file (i.e. old save format). + */ +template +class LoadedValue +{ +public: + LoadedValue() + : _docId(0), + _idx(0), + _pidx(0), + _weight(1) + { + memset(&_value, 0, sizeof(_value)); + } + + class DocRadix + { + public: + uint64_t + operator() (const LoadedValue & v) const + { + uint64_t tmp(v._docId); + return tmp << 32 | v._idx; + } + }; + + class DocOrderCompare : public std::binary_function, + LoadedValue, + bool> + { + public: + bool + operator()(const LoadedValue &x, + const LoadedValue &y) const + { + int32_t diff(x._docId - y._docId); + if (diff == 0) { + diff = x._idx - y._idx; + } + return diff < 0; + } + }; + + EnumStoreBase::Index + getEidx() const + { + return EnumStoreBase::Index(btree::EntryRef(_value._eidx)); + } + + void + setEidx(EnumStoreBase::Index v) + { + _value._eidx = v.ref(); + } + + T + getValue() const + { + return _value._value; + } + + inline void + setValue(T v) + { + _value._value = v; + } + + int32_t + getWeight() const + { + return _weight; + } + + void + setWeight(int32_t v) + { + _weight = v; + } + + inline bool + operator<(const LoadedValue &rhs) const + { + return _value._value < rhs._value._value; + } + + union Value { + T _value; + uint32_t _eidx; + }; + uint32_t _docId; + uint32_t _idx; + btree::EntryRef _pidx; +private: + int32_t _weight; + Value _value; +}; + + +template <> +inline void +LoadedValue::setValue(float v) +{ + // Consolidate nans during load to avoid sort order issues + _value._value = isUndefined(v) ? getUndefined() : v; +} + +template <> +inline void +LoadedValue::setValue(double v) +{ + // Consolidate nans during load to avoid sort order issues + _value._value = isUndefined(v) ? getUndefined() : v; +} + + +template <> +inline bool +LoadedValue::operator<(const LoadedValue &rhs) const +{ + if (std::isnan(_value._value)) { + return !std::isnan(rhs._value._value); + } + if (std::isnan(rhs._value._value)) { + return false; + } + return _value._value < rhs._value._value; +} + + +template <> +inline bool +LoadedValue::operator<(const LoadedValue &rhs) const +{ + if (std::isnan(_value._value)) { + return !std::isnan(rhs._value._value); + } + if (std::isnan(rhs._value._value)) { + return false; + } + return _value._value < rhs._value._value; +} + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp new file mode 100644 index 00000000000..33588c94694 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multienumattribute.h" +#include "multienumattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multienumattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.h b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h new file mode 100644 index 00000000000..b4da7d495f9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.h @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { + +/* + * Implementation of multi value enum attribute that uses an underlying enum store + * to store unique values and a multi value mapping to store enum indices for each document. + * + * B: EnumAttribute + * M: MultiValueType (MultiValueMapping template argument) + */ +template +class MultiValueEnumAttribute : public MultiValueAttribute +{ +protected: + typedef typename B::UniqueSet UniqueSet; + + typedef typename B::BaseClass::Change Change; + typedef typename B::BaseClass::DocId DocId; + typedef typename B::BaseClass::EnumHandle EnumHandle; + typedef typename B::BaseClass::EnumModifier EnumModifier; + typedef typename B::BaseClass::generation_t generation_t; + typedef typename B::BaseClass::LoadedVector LoadedVector; + typedef typename B::BaseClass::ValueModifier ValueModifier; + typedef typename B::BaseClass::WeightedEnum WeightedEnum; + + typedef typename EnumStoreBase::Index EnumIndex; + typedef typename EnumStoreBase::IndexVector EnumIndexVector; + typedef typename EnumStoreBase::EnumVector EnumVector; + typedef typename MultiValueAttribute::MultiValueType WeightedIndex; + typedef typename MultiValueAttribute::ValueVector WeightedIndexVector; + typedef typename MultiValueAttribute::Histogram Histogram; + typedef typename MultiValueAttribute::DocumentValues DocIndices; + typedef AttributeVector::ReaderBase ReaderBase; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; + + // from MultiValueAttribute + virtual bool extractChangeData(const Change & c, EnumIndex & idx); // EnumIndex is ValueType. Use EnumStore + + // from EnumAttribute + virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques); // same for both string and numeric + virtual void reEnumerate(); // same for both string and numeric + + virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused); + + void incRefCount(const WeightedIndex & idx) { this->_enumStore.incRefCount(idx); } + void decRefCount(const WeightedIndex & idx) { this->_enumStore.decRefCount(idx); } + + virtual void + freezeEnumDictionary() + { + this->getEnumStore().freezeTree(); + } + + virtual void fillValues(LoadedVector & loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + LoadedEnumAttributeVector &loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist); + + virtual void mergeMemoryStats(MemoryUsage & total) { (void) total; } + +public: + MultiValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); + + virtual void onCommit(); + virtual void onUpdateStat(); + + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + + //----------------------------------------------------------------------------------------------------------------- + // Attribute read API + //----------------------------------------------------------------------------------------------------------------- + virtual EnumHandle getEnum(DocId doc) const { + if (this->getValueCount(doc) == 0) { + return std::numeric_limits::max(); + } else { + WeightedIndex idx; + this->_mvMapping.get(doc, 0, idx); + return idx.value().ref(); + } + } + virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) { + e[i] = indices[i].value().ref(); + } + return valueCount; + } + virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) { + e[i] = WeightedEnum(indices[i].value().ref(), indices[i].weight()); + } + return valueCount; + } + + virtual std::unique_ptr onInitSave() override; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp new file mode 100644 index 00000000000..961fc3b1ee9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multienumattribute.hpp @@ -0,0 +1,238 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "multienumattributesaver.h" + +#include + +namespace search { + +template +bool +MultiValueEnumAttribute::extractChangeData(const Change & c, EnumIndex & idx) +{ + if (c._enumScratchPad == Change::UNSET_ENUM) { + return this->_enumStore.findIndex(c._data.raw(), idx); + } + idx = EnumIndex(c._enumScratchPad); + return true; +} + +template +void +MultiValueEnumAttribute::considerAttributeChange(const Change & c, UniqueSet & newUniques) +{ + if (c._type == ChangeBase::APPEND || + (this->getInternalCollectionType().createIfNonExistant() && + (c._type >= ChangeBase::INCREASEWEIGHT && c._type <= ChangeBase::DIVWEIGHT))) + { + EnumIndex idx; + if (!this->_enumStore.findIndex(c._data.raw(), idx)) { + newUniques.insert(c._data); + } else { + c._enumScratchPad = idx.ref(); + } + } +} + +template +void +MultiValueEnumAttribute::reEnumerate() +{ + // update MultiValueMapping with new EnumIndex values. + EnumModifier enumGuard(this->getEnumModifier()); + for (DocId doc = 0; doc < this->getNumDocs(); ++doc) { + uint32_t valueCount = this->_mvMapping.getValueCount(doc); + WeightedIndexVector indices(valueCount); + this->_mvMapping.get(doc, &indices[0], valueCount); + + for (uint32_t i = 0; i < indices.size(); ++i) { + EnumIndex oldIndex = indices[i].value(); + EnumIndex newIndex; + this->_enumStore.getCurrentIndex(oldIndex, newIndex); + std::atomic_thread_fence(std::memory_order_release); + indices[i] = WeightedIndex(newIndex, indices[i].weight()); + } + + std::atomic_thread_fence(std::memory_order_release); + this->_mvMapping.replace(doc, indices); + } +} + +template +void +MultiValueEnumAttribute::applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused) +{ + // set new set of indices for documents with changes + ValueModifier valueGuard(this->getValueModifier()); + for (typename DocIndices::const_iterator iter = docIndices.begin(); iter != docIndices.end(); ++iter) { + const WeightedIndex * oldIndices = NULL; + uint32_t valueCount(this->_mvMapping.get(iter->first, oldIndices)); + this->_mvMapping.set(iter->first, iter->second); + for (uint32_t i = 0; i < iter->second.size(); ++i) { + incRefCount(iter->second[i]); + } + for (uint32_t i = 0; i < valueCount; ++i) { + decRefCount(oldIndices[i]); + if (this->_enumStore.getRefCount(oldIndices[i]) == 0) { + unused.push_back(oldIndices[i].value()); + } + } + } +} + +template +void +MultiValueEnumAttribute::fillValues(LoadedVector & loaded) +{ + Histogram capacityNeeded = this->_mvMapping.getEmptyHistogram(); + uint32_t numDocs(this->getNumDocs()); + size_t numValues = loaded.size(); + size_t count = 0; + for (DocId doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount(0); + for(;(count < numValues) && (loaded.read()._docId == doc); count++, loaded.next()) { + valueCount++; + } + if (valueCount < this->_mvMapping.maxValues()) { + capacityNeeded[valueCount] += 1; + } else { + capacityNeeded[this->_mvMapping.maxValues()] += 1; + } + } + + this->_mvMapping.reset(numDocs, capacityNeeded); + + loaded.rewind(); + count = 0; + WeightedIndexVector indices; + for (DocId doc = 0; doc < numDocs; ++doc) { + for(const typename LoadedVector::Type * v = & loaded.read();(count < numValues) && (v->_docId == doc); count++, loaded.next(), v = & loaded.read()) { + indices.push_back(WeightedIndex(v->getEidx(), v->getWeight())); + } + this->checkSetMaxValueCount(indices.size()); + this->_mvMapping.set(doc, indices); + indices.clear(); + } +} + + +template +void +MultiValueEnumAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + LoadedEnumAttributeVector &loaded) +{ + attribute::SaveLoadedEnum saver(loaded); + uint32_t maxvc = this->_mvMapping.fillMapped(attrReader, + numValues, + &eidxs[0], + eidxs.size(), + saver, + this->getNumDocs(), + this->hasWeightedSetType()); + this->checkSetMaxValueCount(maxvc); +} + +template +void +MultiValueEnumAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist) +{ + attribute::SaveEnumHist saver(enumHist); + uint32_t maxvc = this->_mvMapping.fillMapped(attrReader, + numValues, + &eidxs[0], + eidxs.size(), + saver, + this->getNumDocs(), + this->hasWeightedSetType()); + this->checkSetMaxValueCount(maxvc); +} + +template +MultiValueEnumAttribute:: +MultiValueEnumAttribute(const vespalib::string &baseFileName, + const AttributeVector::Config & cfg) + : MultiValueAttribute(baseFileName, cfg) +{ +} + +template +void +MultiValueEnumAttribute::onCommit() +{ + // update enum store + EnumStoreBase::IndexVector possiblyUnused; + this->insertNewUniqueValues(possiblyUnused); + DocIndices docIndices; + this->applyAttributeChanges(docIndices); + applyValueChanges(docIndices, possiblyUnused); + this->_changes.clear(); + this->_enumStore.freeUnusedEnums(possiblyUnused); + this->freezeEnumDictionary(); + this->setEnumMax(this->_enumStore.getLastEnum()); + std::atomic_thread_fence(std::memory_order_release); + this->removeAllOldGenerations(); +} +template +void +MultiValueEnumAttribute::onUpdateStat() +{ + // update statistics + MemoryUsage total; + total.merge(this->_enumStore.getMemoryUsage()); + total.merge(this->_enumStore.getTreeMemoryUsage()); + total.merge(this->_mvMapping.getMemoryUsage()); + mergeMemoryStats(total); + this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_enumStore.getNumUniques(), total.allocatedBytes(), + total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold()); +} + +template +void +MultiValueEnumAttribute::removeOldGenerations(generation_t firstUsed) +{ + this->_enumStore.trimHoldLists(firstUsed); + this->_mvMapping.trimHoldLists(firstUsed); +} + +template +void +MultiValueEnumAttribute::onGenerationChange(generation_t generation) +{ + /* + * Freeze tree before generation is increased in attribute vector + * but after generation is increased in tree. This ensures that + * unlocked readers accessing a frozen tree will access a + * sufficiently new frozen tree. + */ + freezeEnumDictionary(); + this->_mvMapping.transferHoldLists(generation - 1); + this->_enumStore.transferHoldLists(generation - 1); +} + +template +std::unique_ptr +MultiValueEnumAttribute::onInitSave() +{ + { + EnumModifier enumGuard(this->getEnumModifier()); + this->_enumStore.reEnumerate(); + } + vespalib::GenerationHandler::Guard guard(this->getGenerationHandler(). + takeGuard()); + return std::make_unique> + (std::move(guard), this->createSaveTargetConfig(), this->_mvMapping, + this->_enumStore); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp new file mode 100644 index 00000000000..0b2d8cc00b9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.cpp @@ -0,0 +1,122 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multienumattributesaver.h" +#include +#include "multivalueattributesaverutils.h" + +using vespalib::GenerationHandler; +using search::multivalueattributesaver::CountWriter; +using search::multivalueattributesaver::WeightWriter; + +namespace search { + +namespace +{ + +/* + * Class to write enum indexes mapped over to either enum values + * or values, depending on the requirements of the save target. + */ +class DatWriter +{ + std::vector _indexes; + const EnumStoreBase &_enumStore; + std::unique_ptr _datWriter; + bool _enumerated; +public: + DatWriter(IAttributeSaveTarget &saveTarget, + const EnumStoreBase &enumStore) + : _indexes(), + _enumStore(enumStore), + _datWriter(saveTarget.datWriter().allocBufferWriter()), + _enumerated(saveTarget.getEnumerated()) + { + _indexes.reserve(1000); + } + + ~DatWriter() + { + assert(_indexes.empty()); + _datWriter->flush(); + } + + void flush() + { + if (!_indexes.empty()) { + if (_enumerated) { + _enumStore.writeEnumValues(*_datWriter, + &_indexes[0], _indexes.size()); + } else { + _enumStore.writeValues(*_datWriter, + &_indexes[0], _indexes.size()); + } + _indexes.clear(); + } + } + + template + void + writeValues(const MultiValueT *values, uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + if (_indexes.size() >= _indexes.capacity()) { + flush(); + } + _indexes.push_back(values[i].value()); + } + } +}; + +} + +template +MultiValueEnumAttributeSaver:: +MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMapping &mvMapping, + const EnumStoreBase &enumStore) + : Parent(std::move(guard), cfg, mvMapping), + _mvMapping(mvMapping), + _enumSaver(enumStore, true) +{ +} + + + +template +MultiValueEnumAttributeSaver:: +~MultiValueEnumAttributeSaver() +{ +} + +template +bool +MultiValueEnumAttributeSaver:: +onSave(IAttributeSaveTarget &saveTarget) +{ + CountWriter countWriter(saveTarget); + WeightWriter weightWriter(saveTarget); + DatWriter datWriter(saveTarget, _enumSaver.getEnumStore()); + _enumSaver.writeUdat(saveTarget); + for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) { + Index idx = _frozenIndices[docId]; + const MultiValueType *handle; + uint32_t count = _mvMapping.getDataForIdx(idx, handle); + countWriter.writeCount(count); + weightWriter.writeWeights(handle, count); + datWriter.writeValues(handle, count); + } + datWriter.flush(); + _enumSaver.enableReEnumerate(); + return true; +} + +using EnumIdxArray = multivalue::Value; +using EnumIdxWset = multivalue::WeightedValue; + +template class MultiValueEnumAttributeSaver; +template class MultiValueEnumAttributeSaver; +template class MultiValueEnumAttributeSaver; +template class MultiValueEnumAttributeSaver; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h new file mode 100644 index 00000000000..7d7b4f581ed --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multienumattributesaver.h @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multivalueattributesaver.h" +#include "enumattributesaver.h" + +namespace search { + +/* + * Class for saving an enumerated multivalue attribute. + * + * Template argument MultiValueT is either multivalue::Value or + * multivalue::WeightedValue + * Template argument IndexT is either multivalue::Index32 or multivalue::Index64 + */ +template +class MultiValueEnumAttributeSaver : public MultiValueAttributeSaver +{ + using Parent = MultiValueAttributeSaver; + using Index = IndexT; + using MultiValueType = MultiValueT; + using ValueType = typename MultiValueType::ValueType; + using GenerationHandler = vespalib::GenerationHandler; + using Parent::_frozenIndices; + using MultiValueMapping = MultiValueMappingT; + + const MultiValueMapping &_mvMapping; + EnumAttributeSaver _enumSaver; +public: + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; + MultiValueEnumAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMapping &mvMapping, + const EnumStoreBase &enumStore); + virtual ~MultiValueEnumAttributeSaver(); +}; + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp new file mode 100644 index 00000000000..b6021881ee6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multinumericattribute.h" +#include "multinumericattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multinumericattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h new file mode 100644 index 00000000000..63cf52a42bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h @@ -0,0 +1,333 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +/* + * Implementation of multi value numeric attribute that uses an underlying + * multi value mapping from MultiValueAttribute. + * + * B: Base class + * M: MultiValueType (MultiValueMapping template argument) + */ +template +class MultiValueNumericAttribute : public MultiValueAttribute +{ +private: + typedef typename B::BaseType T; + typedef typename B::DocId DocId; + typedef typename B::EnumHandle EnumHandle; + typedef typename B::largeint_t largeint_t; + typedef typename B::Weighted Weighted; + typedef typename B::WeightedInt WeightedInt; + typedef typename B::WeightedFloat WeightedFloat; + typedef typename B::WeightedEnum WeightedEnum; + + typedef typename MultiValueAttribute::MultiValueMapping MultiValueMapping; + typedef typename MultiValueAttribute::DocumentValues DocumentValues; + typedef typename MultiValueAttribute::Change Change; + typedef typename MultiValueAttribute::ValueType MValueType; // = B::BaseType + typedef typename MultiValueAttribute::MultiValueType MultiValueType; // = B::BaseType + + virtual bool extractChangeData(const Change & c, MValueType & data) { + data = static_cast(c._data.get()); + return true; + } + + virtual T getFromEnum(EnumHandle e) const; + virtual bool findEnum(T value, EnumHandle & e) const; + virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const { + (void) v; + (void) e; + (void) sz; + } + + +protected: + typedef typename B::generation_t generation_t; + typedef MultiValueType WType; + uint32_t get(DocId doc, const WType * & values) const { return this->_mvMapping.get(doc, values); } + +public: + virtual uint32_t getRawValues(DocId doc, const WType * & values) const { return get(doc, values); } + /* + * Specialization of SearchContext for weighted set type + */ + class SetSearchContext : public NumericAttribute::Range, public AttributeVector::SearchContext + { + private: + const MultiValueNumericAttribute & _toBeSearched; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + virtual bool valid() const { return this->isValid(); } + + public: + SetSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + NumericAttribute::Range(*qTerm), + AttributeVector::SearchContext(toBeSearched), + _toBeSearched(static_cast &>(toBeSearched)) + { + } + + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + bool + cmp(DocId doc, int32_t & weight) const + { + const MultiValueType * buffer; + for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer); + i < m; i++) { + T v(buffer[i].value()); + if (this->match(v)) { + weight = buffer[i].weight(); + return true; + } + } + return false; + } + + bool + cmp(DocId doc) const + { + const MultiValueType * buffer; + for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer); + i < m; i++) { + T v(buffer[i].value()); + if (this->match(v)) { + return true; + } + } + return false; + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + /* + * Specialization of SearchContext for array type + */ + class ArraySearchContext : public NumericAttribute::Range, public AttributeVector::SearchContext + { + private: + const MultiValueNumericAttribute & _toBeSearched; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + protected: + virtual bool valid() const { return this->isValid(); } + + public: + ArraySearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + NumericAttribute::Range(*qTerm), + AttributeVector::SearchContext(toBeSearched), + _toBeSearched(static_cast &>(toBeSearched)) + { + } + + bool + cmp(DocId doc, int32_t & weight) const + { + uint32_t hitCount = 0; + const MultiValueType * buffer; + for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer); + i < m; i++) { + T v = buffer[i].value(); + if (this->match(v)) { + hitCount++; + } + } + weight = hitCount; + + return hitCount != 0; + } + + bool + cmp(DocId doc) const + { + const MultiValueType * buffer; + for (uint32_t i = 0, m = _toBeSearched._mvMapping.get(doc, buffer); + i < m; i++) { + T v = buffer[i].value(); + if (this->match(v)) { + return true; + } + } + return false; + } + + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::fromType(T()), + attribute::CollectionType::ARRAY)); + virtual uint32_t getValueCount(DocId doc) const; + virtual void onCommit(); + virtual void onUpdateStat(); + virtual void removeOldGenerations(generation_t firstUsed); + + virtual void onGenerationChange(generation_t generation); + + virtual bool onLoad(); + + virtual bool + onLoadEnumerated(typename B::ReaderBase &attrReader); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + virtual void clearOldValues(DocId doc); + virtual void setNewValues(DocId doc, const std::vector & values); + + //------------------------------------------------------------------------- + // new read api + //------------------------------------------------------------------------- + virtual T get(DocId doc) const { + MultiValueType value; + this->_mvMapping.get(doc, 0, value); + return value; + } + virtual largeint_t getInt(DocId doc) const { + MultiValueType value; + this->_mvMapping.get(doc, 0, value); + return static_cast(value.value()); + } + virtual double getFloat(DocId doc) const { + MultiValueType value; + this->_mvMapping.get(doc, 0, value); + return static_cast(value.value()); + } + virtual EnumHandle getEnum(DocId doc) const { + (void) doc; + return std::numeric_limits::max(); // does not have enum + } + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + template + uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const { + const MultiValueType * handle; + uint32_t ret = this->_mvMapping.get(doc, handle); + for(size_t i(0), m(std::min(sz, ret)); i < m; i++) { + buffer[i] = static_cast(handle[i].value()); + } + return ret; + } + virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { + return getEnumHelper(doc, e, sz); + } + virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { + return getEnumHelper(doc, e, sz); + } + template + uint32_t getEnumHelper(DocId doc, E * e, uint32_t sz) const { + uint32_t available = getValueCount(doc); + uint32_t num2Read = std::min(available, sz); + for (uint32_t i = 0; i < num2Read; ++i) { + e[i] = E(std::numeric_limits::max()); // does not have enum + } + return available; + } + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + template + uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const { + const MultiValueType * handle; + uint32_t ret = this->_mvMapping.get(doc, handle); + for(size_t i(0), m(std::min(sz, ret)); i < m; i++) { + buffer[i] = WeightedType(static_cast(handle[i].value()), + handle[i].weight()); + } + return ret; + } + + virtual std::unique_ptr onInitSave() override; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp new file mode 100644 index 00000000000..25f0ed88e20 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -0,0 +1,197 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include "multinumericattributesaver.h" + +namespace search { + +template +typename MultiValueNumericAttribute::T +MultiValueNumericAttribute::getFromEnum(EnumHandle e) const +{ + (void) e; + return 0; +} + +template +bool MultiValueNumericAttribute::findEnum(T value, EnumHandle & e) const +{ + (void) value; (void) e; + return false; +} + +template +MultiValueNumericAttribute:: +MultiValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c) : + MultiValueAttribute(baseFileName, c) +{ +} + +template +uint32_t MultiValueNumericAttribute::getValueCount(DocId doc) const +{ + if (doc >= B::getNumDocs()) { + return 0; + } + return this->_mvMapping.getValueCount(doc); +} + +template +void +MultiValueNumericAttribute::onCommit() +{ + DocumentValues docValues; + this->applyAttributeChanges(docValues); + { + typename B::ValueModifier valueGuard(this->getValueModifier()); + for (const auto & value : docValues) { + clearOldValues(value.first); + setNewValues(value.first, value.second); + } + } + + std::atomic_thread_fence(std::memory_order_release); + this->removeAllOldGenerations(); + + this->_changes.clear(); +} + +template +void MultiValueNumericAttribute::onUpdateStat() +{ + MemoryUsage usage = this->_mvMapping.getMemoryUsage(); + this->updateStatistics(this->_mvMapping.getTotalValueCnt(), this->_mvMapping.getTotalValueCnt(), usage.allocatedBytes(), + usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold()); +} + + +template +void MultiValueNumericAttribute::clearOldValues(DocId doc) +{ + (void) doc; +} + +template +void MultiValueNumericAttribute::setNewValues(DocId doc, const std::vector & values) +{ + this->_mvMapping.set(doc, values); +} + +template +void MultiValueNumericAttribute::removeOldGenerations(generation_t firstUsed) +{ + this->_mvMapping.trimHoldLists(firstUsed); +} + + +template +void MultiValueNumericAttribute::onGenerationChange(generation_t generation) +{ + this->_mvMapping.transferHoldLists(generation - 1); +} + +template +bool +MultiValueNumericAttribute::onLoadEnumerated(typename B::ReaderBase & + attrReader) +{ + uint32_t numDocs = attrReader.getNumIdx() - 1; + uint64_t numValues = attrReader.getNumValues(); + uint64_t enumCount = attrReader.getEnumCount(); + assert(numValues == enumCount); + (void) enumCount; + + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + + FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT()); + const T *map = reinterpret_cast(udatBuffer->buffer()); + assert((udatBuffer->size() % sizeof(T)) == 0); + size_t mapSize = udatBuffer->size() / sizeof(T); + attribute::NoSaveLoadedEnum saver; + uint32_t maxvc = this->_mvMapping.fillMapped(attrReader, + numValues, + map, + mapSize, + saver, + this->getNumDocs(), + this->hasWeightedSetType()); + this->checkSetMaxValueCount(maxvc); + + return true; +} + +template +bool +MultiValueNumericAttribute::onLoad() +{ + typename B::template PrimitiveReader attrReader(*this); + bool ok(attrReader.getHasLoadData()); + + if (!ok) + return false; + + this->setCreateSerialNum(attrReader.getCreateSerialNum()); + + if (attrReader.getEnumerated()) + return onLoadEnumerated(attrReader); + + bool hasWeight(attrReader.hasWeight()); + size_t numDocs = attrReader.getNumIdx() - 1; + + typename MultiValueMappingBaseBase::Histogram capacityNeeded = + this->_mvMapping.getHistogram(attrReader); + this->_mvMapping.reset(numDocs, capacityNeeded); + // set values + std::vector values; + B::setNumDocs(numDocs); + B::setCommittedDocIdLimit(numDocs); + attrReader.rewind(); + for (DocId doc = 0; doc < numDocs; ++doc) { + const uint32_t valueCount(attrReader.getNextValueCount()); + for (uint32_t i(0); i < valueCount; i++) { + MValueType currData = attrReader.getNextData(); + values.push_back(MultiValueType(currData, + hasWeight ? + attrReader.getNextWeight() : 1)); + } + this->checkSetMaxValueCount(valueCount); + setNewValues(doc, values); + values.clear(); + } + return true; +} + +template +AttributeVector::SearchContext::UP +MultiValueNumericAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + if (this->hasArrayType()) { + return std::unique_ptr + (new ArraySearchContext(std::move(qTerm), *this)); + } else { + return std::unique_ptr + (new SetSearchContext(std::move(qTerm), *this)); + } +} + + +template +std::unique_ptr +MultiValueNumericAttribute::onInitSave() +{ + vespalib::GenerationHandler::Guard guard(this->getGenerationHandler(). + takeGuard()); + return std::make_unique> + (std::move(guard), this->createSaveTargetConfig(), this->_mvMapping); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp new file mode 100644 index 00000000000..fc45530594b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.cpp @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multinumericattributesaver.h" +#include +#include "multivalueattributesaverutils.h" + +using vespalib::GenerationHandler; +using search::multivalueattributesaver::CountWriter; +using search::multivalueattributesaver::WeightWriter; + +namespace search { + +namespace +{ + +class DatWriter +{ + std::unique_ptr _datWriter; +public: + DatWriter(IAttributeSaveTarget &saveTarget) + : _datWriter(saveTarget.datWriter().allocBufferWriter()) + { + } + + ~DatWriter() + { + _datWriter->flush(); + } + + template + void + writeValues(const MultiValueT *values, uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + typename MultiValueT::ValueType value(values[i]); + _datWriter->write(&value, sizeof(typename MultiValueT::ValueType)); + } + } +}; + +} + +template +MultiValueNumericAttributeSaver:: +MultiValueNumericAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMapping &mvMapping) + : Parent(std::move(guard), cfg, mvMapping), + _mvMapping(mvMapping) +{ +} + + + +template +MultiValueNumericAttributeSaver:: +~MultiValueNumericAttributeSaver() +{ +} + +template +bool +MultiValueNumericAttributeSaver:: +onSave(IAttributeSaveTarget &saveTarget) +{ + CountWriter countWriter(saveTarget); + WeightWriter weightWriter(saveTarget); + DatWriter datWriter(saveTarget); + + for (uint32_t docId = 0; docId < _frozenIndices.size(); ++docId) { + Index idx = _frozenIndices[docId]; + const MultiValueType *handle; + uint32_t count = _mvMapping.getDataForIdx(idx, handle); + countWriter.writeCount(count); + weightWriter.writeWeights(handle, count); + datWriter.writeValues(handle, count); + } + return true; +} + +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index32>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; +template class MultiValueNumericAttributeSaver, + multivalue::Index64>; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h new file mode 100644 index 00000000000..4b20f94cad0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattributesaver.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multivalueattributesaver.h" + +namespace search { + +/* + * Class for saving a multivalue attribute. + * + * Template argument MultiValueT is either multivalue::Value or + * multivalue::WeightedValue + * Template argument IndexT is either multivalue::Index32 or multivalue::Index64 + */ +template +class MultiValueNumericAttributeSaver : public MultiValueAttributeSaver +{ + using Parent = MultiValueAttributeSaver; + using Index = IndexT; + using MultiValueType = MultiValueT; + using ValueType = typename MultiValueType::ValueType; + using GenerationHandler = vespalib::GenerationHandler; + using Parent::_frozenIndices; + using MultiValueMapping = MultiValueMappingT; + + const MultiValueMapping &_mvMapping; +public: + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; + MultiValueNumericAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMapping &mvMapping); + + virtual ~MultiValueNumericAttributeSaver(); +}; + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp new file mode 100644 index 00000000000..1f62445a2b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multinumericenumattribute.h" +#include "multinumericenumattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multinumericenumattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h new file mode 100644 index 00000000000..8ef3d28549b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h @@ -0,0 +1,289 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +/* + * Implementation of multi value numeric attribute that uses an underlying enum store + * to store unique numeric values and a multi value mapping to store enum indices for each document. + * This class is used for both array and weighted set types. + * + * B: EnumAttribute + * M: MultiValueType (MultiValueMapping template argument) + */ +template +class MultiValueNumericEnumAttribute : public MultiValueEnumAttribute +{ +protected: + typedef typename B::BaseClass::DocId DocId; + typedef typename B::BaseClass::EnumHandle EnumHandle; +public: + typedef typename B::BaseClass::BaseType T; +protected: + typedef typename B::BaseClass::largeint_t largeint_t; + typedef typename B::BaseClass::LoadedNumericValueT LoadedNumericValueT; + typedef typename B::BaseClass::LoadedVector LoadedVector; + typedef SequentialReadModifyWriteVector LoadedVectorR; + typedef typename B::BaseClass::Weighted Weighted; + typedef typename B::BaseClass::WeightedInt WeightedInt; + typedef typename B::BaseClass::WeightedFloat WeightedFloat; + typedef typename B::BaseClass::WeightedEnum WeightedEnum; + + typedef typename MultiValueEnumAttribute::MultiValueType WeightedIndex; + typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef EnumStoreBase::IndexVector EnumIndexVector; + typedef EnumStoreBase::EnumVector EnumVector; + typedef EnumStoreBase::Index EnumIndex; + +protected: + /* + * Specialization of SearchContext for weighted set type + */ + class SetSearchContext : public NumericAttribute::Range, public AttributeVector::SearchContext + { + protected: + const MultiValueNumericEnumAttribute & _toBeSearched; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + virtual bool valid() const { return this->isValid(); } + + public: + SetSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + NumericAttribute::Range(*qTerm), + SearchContext(toBeSearched), + _toBeSearched(static_cast &>(toBeSearched)) + { + } + + bool + cmp(DocId doc, int32_t & weight) const + { + const WeightedIndex * indices; + uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices); + for (uint32_t i = 0; i < valueCount; ++i) { + T v = _toBeSearched._enumStore.getValue(indices[i].value()); + if (this->match(v)) { + weight = indices[i].weight(); + return true; + } + } + return false; + } + + bool + cmp(DocId doc) const + { + const WeightedIndex * indices; + uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices); + for (uint32_t i = 0; i < valueCount; ++i) { + T v = _toBeSearched._enumStore.getValue(indices[i].value()); + if (this->match(v)) { + return true; + } + } + return false; + } + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + /* + * Specialization of SearchContext for array type + */ + class ArraySearchContext : public NumericAttribute::Range, public AttributeVector::SearchContext + { + protected: + const MultiValueNumericEnumAttribute & _toBeSearched; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + virtual bool valid() const { return this->isValid(); } + + public: + ArraySearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + NumericAttribute::Range(*qTerm), + SearchContext(toBeSearched), + _toBeSearched(static_cast &>(toBeSearched)) + { + } + + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + bool + cmp(DocId doc, int32_t & weight) const + { + uint32_t hitCount = 0; + const WeightedIndex * indices; + uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices); + for (uint32_t i = 0; i < valueCount; ++i) { + T v = _toBeSearched._enumStore.getValue(indices[i].value()); + if (this->match(v)) { + hitCount++; + } + } + weight = hitCount; + + return hitCount != 0; + } + + bool + cmp(DocId doc) const + { + const WeightedIndex * indices; + uint32_t valueCount = _toBeSearched._mvMapping.get(doc, indices); + for (uint32_t i = 0; i < valueCount; ++i) { + T v = _toBeSearched._enumStore.getValue(indices[i].value()); + if (this->match(v)) { + return true; + } + } + + return false; + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + +public: + MultiValueNumericEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); + + virtual bool onLoad(); + + bool + onLoadEnumerated(typename B::ReaderBase &attrReader); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + //------------------------------------------------------------------------- + // Attribute read API + //------------------------------------------------------------------------- + virtual T get(DocId doc) const { + if (this->getValueCount(doc) == 0) { + return T(); + } else { + WeightedIndex idx; + this->_mvMapping.get(doc, 0, idx); + return this->_enumStore.getValue(idx.value()); + } + } + virtual largeint_t getInt(DocId doc) const { + return static_cast(get(doc)); + } + virtual double getFloat(DocId doc) const { + return static_cast(get(doc)); + } + + template + uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for(uint32_t i = 0, m = std::min(sz, valueCount); i < m; i++) { + buffer[i] = static_cast(this->_enumStore.getValue(indices[i].value())); + } + return valueCount; + } + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + + template + uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) { + buffer[i] = WeightedType(static_cast(this->_enumStore.getValue(indices[i].value())), indices[i].weight()); + } + return valueCount; + } + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + +private: + typedef typename B::template PrimitiveReader AttributeReader; + void loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp new file mode 100644 index 00000000000..52aef8bf54a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -0,0 +1,145 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { + +template +MultiValueNumericEnumAttribute:: +MultiValueNumericEnumAttribute(const vespalib::string & baseFileName, + const AttributeVector::Config & cfg) + : MultiValueEnumAttribute(baseFileName, cfg) +{ +} + +template +void +MultiValueNumericEnumAttribute::loadAllAtOnce(AttributeReader & attrReader, size_t numDocs, size_t numValues) +{ + LoadedVectorR loaded(numValues); + + bool hasWeight(attrReader.hasWeight()); + for (uint32_t docIdx(0), valueIdx(0); docIdx < numDocs; ++docIdx) { + const uint32_t currValueCount = attrReader.getNextValueCount(); + for (uint32_t subIdx = 0; subIdx < currValueCount; ++subIdx) { + loaded[valueIdx]._docId = docIdx; + loaded[valueIdx]._idx = subIdx; + loaded[valueIdx].setValue(attrReader.getNextData()); + loaded[valueIdx].setWeight(hasWeight ? attrReader.getNextWeight() : 1); + valueIdx++; + } + } + + attribute::sortLoadedByValue(loaded); + this->fillPostings(loaded); + loaded.rewind(); + this->fillEnum(loaded); + attribute::sortLoadedByDocId(loaded); + + loaded.rewind(); + this->fillValues(loaded); +} + +template +bool +MultiValueNumericEnumAttribute::onLoadEnumerated(typename B::ReaderBase & + attrReader) +{ + FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT()); + + uint32_t numDocs = attrReader.getNumIdx() - 1; + uint64_t numValues = attrReader.getNumValues(); + uint64_t enumCount = attrReader.getEnumCount(); + assert(numValues == enumCount); + (void) enumCount; + + EnumIndexVector eidxs; + this->fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs); + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + LoadedEnumAttributeVector loaded; + EnumVector enumHist; + if (this->hasPostings()) { + loaded.reserve(numValues); + this->fillEnumIdx(attrReader, + numValues, + eidxs, + loaded); + } else { + EnumVector(eidxs.size(), 0).swap(enumHist); + this->fillEnumIdx(attrReader, + numValues, + eidxs, + enumHist); + } + EnumIndexVector().swap(eidxs); + if (this->hasPostings()) { + if (numDocs > 0) { + this->onAddDoc(numDocs - 1); + } + attribute::sortLoadedByEnum(loaded); + this->fillPostingsFixupEnum(loaded); + } else { + this->fixupEnumRefCounts(enumHist); + } + return true; +} + + +template +bool +MultiValueNumericEnumAttribute::onLoad() +{ + AttributeReader attrReader(*this); + bool ok(attrReader.getHasLoadData()); + + if (!ok) + return false; + + this->setCreateSerialNum(attrReader.getCreateSerialNum()); + + if (attrReader.getEnumerated()) + return onLoadEnumerated(attrReader); + + size_t numDocs = attrReader.getNumIdx() - 1; + uint32_t numValues = attrReader.getNumValues(); + + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + if (numDocs > 0) { + this->onAddDoc(numDocs - 1); + } + loadAllAtOnce(attrReader, numDocs, numValues); + + return true; +} + +template +AttributeVector::SearchContext::UP +MultiValueNumericEnumAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + QueryTermSimple::RangeResult res = qTerm->getRange(); + if (this->hasArrayType()) { + if (res.isEqual()) { + return AttributeVector::SearchContext::UP(new ArraySearchContext(std::move(qTerm), *this)); + } else { + return AttributeVector::SearchContext::UP(new ArraySearchContext(std::move(qTerm), *this)); + } + } else { + if (res.isEqual()) { + return AttributeVector::SearchContext::UP(new SetSearchContext(std::move(qTerm), *this)); + } else { + return AttributeVector::SearchContext::UP(new SetSearchContext(std::move(qTerm), *this)); + } + } +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp new file mode 100644 index 00000000000..277fdafbf42 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multinumericpostattribute.h" +#include "multinumericpostattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multinumericpostattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h new file mode 100644 index 00000000000..cc42f1e4445 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.h @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "postinglistattribute.h" +#include "i_document_weight_attribute.h" + +namespace search { + +/* + * Implementation of multi value numeric attribute that in addition to enum store and + * multi value mapping uses an underlying posting list to provide faster search. + * This class is used for both array and weighted set types. + * + * B: EnumAttribute + * M: multivalue::Value (array) or + * multivalue::WeightedValue (weighted set) + * M specifies the type stored in the MultiValueMapping + */ +template +class MultiValueNumericPostingAttribute + : public MultiValueNumericEnumAttribute, + protected PostingListAttributeSubBase +{ +private: + struct DocumentWeightAttributeAdapter : IDocumentWeightAttribute + { + const MultiValueNumericPostingAttribute &self; + DocumentWeightAttributeAdapter(const MultiValueNumericPostingAttribute &self_in) : self(self_in) {} + virtual LookupResult lookup(const vespalib::string &term) const override final; + virtual void create(btree::EntryRef idx, std::vector &dst) const override final; + virtual DocumentWeightIterator create(btree::EntryRef idx) const override final; + }; + DocumentWeightAttributeAdapter _document_weight_attribute_adapter; + + friend class PostingListAttributeTest; + template + friend class attribute::PostingSearchContext; // getEnumStore() + typedef MultiValueNumericPostingAttribute SelfType; +public: + typedef typename B::EnumStore EnumStore; + typedef typename EnumStore::Index EnumIndex; +private: + typedef typename B::DocId DocId; + typedef typename B::LoadedVector LoadedVector; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef PostingListAttributeSubBase PostingParent; + typedef typename PostingParent::PostingList PostingList; + typedef typename PostingParent::PostingMap PostingMap; + typedef typename PostingParent::Posting Posting; + typedef EnumPostingTree Dictionary; + typedef typename Dictionary::Iterator DictionaryIterator; + typedef typename Dictionary::ConstIterator DictionaryConstIterator; + typedef typename Dictionary::FrozenView FrozenDictionary; + typedef typename EnumStore::ComparatorType ComparatorType; + + typedef typename MultiValueNumericEnumAttribute::DocIndices DocIndices; + typedef typename MultiValueNumericEnumAttribute::generation_t generation_t; + typedef typename MultiValueNumericEnumAttribute::WeightedIndex WeightedIndex; + + typedef typename MultiValueNumericEnumAttribute::ArraySearchContext ArraySearchContext; + typedef typename MultiValueNumericEnumAttribute::SetSearchContext SetSearchContext; + typedef ArraySearchContext ArrayNumericSearchContext; + typedef SetSearchContext SetNumericSearchContext; + typedef attribute::NumericPostingSearchContext + ArrayPostingSearchContext; + typedef attribute::NumericPostingSearchContext + SetPostingSearchContext; + using PostingParent::_postingList; + using PostingParent::clearAllPostings; + using PostingParent::handleFillPostings; + using PostingParent::fillPostingsFixupEnumBase; + using PostingParent::forwardedOnAddDoc; + + virtual void freezeEnumDictionary(); + virtual void mergeMemoryStats(MemoryUsage & total); + virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused); + +public: + MultiValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg); + + virtual + ~MultiValueNumericPostingAttribute(); + + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + virtual const IDocumentWeightAttribute *asDocumentWeightAttribute() const override; + + virtual bool + onAddDoc(DocId doc) + { + return forwardedOnAddDoc(doc, + this->_mvMapping.getNumKeys(), + this->_mvMapping.getCapacityKeys()); + } + + virtual void + fillPostings(LoadedVector & loaded) + { + handleFillPostings(loaded); + } + + virtual attribute::IPostingListAttributeBase * + getIPostingListAttributeBase(void) + { + return this; + } + + virtual void + fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) + { + fillPostingsFixupEnumBase(loaded); + } +}; + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp new file mode 100644 index 00000000000..c3e06976316 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multinumericpostattribute.hpp @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +template +void +MultiValueNumericPostingAttribute::freezeEnumDictionary() +{ + this->getEnumStore().freezeTree(); +} + +template +void +MultiValueNumericPostingAttribute::mergeMemoryStats(MemoryUsage & total) +{ + total.merge(this->getPostingList().getMemoryUsage()); +} + +template +void +MultiValueNumericPostingAttribute::applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused) +{ + typedef PostingChangeComputerT PostingChangeComputer; + EnumStore & enumStore = this->getEnumStore(); + ComparatorType compare(enumStore); + + EnumIndexMapper mapper; + PostingMap changePost(PostingChangeComputer::compute(this->getMultiValueMapping(), docIndices, compare, mapper)); + this->updatePostings(changePost); + MultiValueNumericEnumAttribute::applyValueChanges(docIndices, unused); +} + + +template +MultiValueNumericPostingAttribute::MultiValueNumericPostingAttribute(const vespalib::string & name, + const AttributeVector::Config & cfg) + : MultiValueNumericEnumAttribute(name, cfg), + PostingParent(*this, this->getEnumStore()), + _document_weight_attribute_adapter(*this) +{ +} + +template +MultiValueNumericPostingAttribute:: +~MultiValueNumericPostingAttribute(void) +{ + this->disableFreeLists(); + this->disableElemHoldList(); + clearAllPostings(); +} + + +template +void +MultiValueNumericPostingAttribute::removeOldGenerations(generation_t firstUsed) +{ + MultiValueNumericEnumAttribute::removeOldGenerations(firstUsed); + _postingList.trimHoldLists(firstUsed); +} + +template +void +MultiValueNumericPostingAttribute::onGenerationChange(generation_t generation) +{ + _postingList.freeze(); + MultiValueNumericEnumAttribute::onGenerationChange(generation); + _postingList.transferHoldLists(generation - 1); +} + +template +AttributeVector::SearchContext::UP +MultiValueNumericPostingAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + std::unique_ptr sc; + sc.reset(new typename std::conditional:: + type(std::move(qTerm), params, *this)); + return sc; +} + + +template +IDocumentWeightAttribute::LookupResult +MultiValueNumericPostingAttribute::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term) const +{ + const Dictionary &dictionary = self._enumStore.getPostingDictionary(); + const FrozenDictionary frozenDictionary(dictionary.getFrozenView()); + DictionaryConstIterator dictItr(btree::BTreeNode::Ref(), dictionary.getAllocator()); + + char *end = nullptr; + int64_t int_term = strtoll(term.c_str(), &end, 10); + if (*end == '\0') { + ComparatorType comp(self._enumStore, int_term); + + dictItr.lower_bound(frozenDictionary.getRoot(), EnumIndex(), comp); + if (dictItr.valid() && !comp(EnumIndex(), dictItr.getKey())) { + btree::EntryRef pidx = dictItr.getData(); + if (pidx.valid()) { + const PostingList &plist = self.getPostingList(); + auto minmax = plist.getAggregated(pidx); + return LookupResult(pidx, plist.frozenSize(pidx), minmax.getMin(), minmax.getMax()); + } + } + } + return LookupResult(); +} + +template +void +MultiValueNumericPostingAttribute::DocumentWeightAttributeAdapter::create(btree::EntryRef idx, std::vector &dst) const +{ + assert(idx.valid()); + self.getPostingList().beginFrozen(idx, dst); +} + +template +DocumentWeightIterator +MultiValueNumericPostingAttribute::DocumentWeightAttributeAdapter::create(btree::EntryRef idx) const +{ + assert(idx.valid()); + return self.getPostingList().beginFrozen(idx); +} + +template +const IDocumentWeightAttribute * +MultiValueNumericPostingAttribute::asDocumentWeightAttribute() const +{ + if (this->hasWeightedSetType() && + this->getBasicType() == AttributeVector::BasicType::INT64 && + !this->getConfig().getIsFilter()) { + return &_document_weight_attribute_adapter; + } + return nullptr; +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp new file mode 100644 index 00000000000..9ffc62f219d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multistringattribute.h" +#include "multistringattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multistringattribute"); +namespace search { + +template class MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index32>>; +template class MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index32>>; +template class MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index64>>; +template class MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index64>>; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h new file mode 100644 index 00000000000..2f740cd6b30 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h @@ -0,0 +1,173 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "enumhintsearchcontext.h" + +namespace search { + +/* + * Implementation of multi value string attribute that uses an underlying enum store + * to store unique string values and a multi value mapping to store the enum store indices + * for each document. + * This class is used for both array and weighted set types. + * + * B: Base class: EnumAttribute + * M: multivalue::Value (array) or + * multivalue::WeightedValue (weighted set) + * M specifies the type stored in the MultiValueMapping + */ +template +class MultiValueStringAttributeT : public MultiValueEnumAttribute +{ +protected: + typedef typename B::EnumStore EnumStore; + typedef typename MultiValueAttribute::MultiValueType WeightedIndex; + typedef typename MultiValueAttribute::ValueType EnumIndex; + typedef typename MultiValueAttribute::MultiValueMapping MultiValueMapping; + typedef typename MultiValueAttribute::ValueVector WeightedIndexVector; + typedef typename MultiValueAttribute::DocumentValues DocIndices; + + typedef StringAttribute::DocId DocId; + typedef StringAttribute::EnumHandle EnumHandle; + typedef StringAttribute::LoadedVector LoadedVector; + typedef StringAttribute::generation_t generation_t; + typedef StringAttribute::WeightedString WeightedString; + typedef StringAttribute::WeightedConstChar WeightedConstChar; + typedef StringAttribute::SearchContext SearchContext; + typedef StringAttribute::Change Change; + typedef StringAttribute::ChangeVector ChangeVector; + typedef StringAttribute::ValueModifier ValueModifier; + typedef StringAttribute::EnumModifier EnumModifier; + typedef StringAttribute::WeightedEnum WeightedEnum; + typedef attribute::EnumHintSearchContext EnumHintSearchContext; + +private: + friend class StringAttributeTest; + +public: + typedef typename MultiValueMappingBaseBase::Histogram Histogram; + + MultiValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::STRING, + attribute::CollectionType::ARRAY)); + ~MultiValueStringAttributeT(); + + virtual void + freezeEnumDictionary(void); + + //------------------------------------------------------------------------- + // new read api + //------------------------------------------------------------------------- + virtual const char * get(DocId doc) const { + if (this->getValueCount(doc) == 0) { + return NULL; + } else { + WeightedIndex idx; + this->_mvMapping.get(doc, 0, idx); + return this->_enumStore.getValue(idx.value()); + } + } + template + uint32_t getHelper(DocId doc, BufferType * buffer, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for(uint32_t i = 0, m = std::min(sz, valueCount); i < m; i++) { + buffer[i] = this->_enumStore.getValue(indices[i].value()); + } + return valueCount; + } + virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const { + return getHelper(doc, v, sz); + } + + /// Weighted interface + template + uint32_t getWeightedHelper(DocId doc, WeightedType * buffer, uint32_t sz) const { + const WeightedIndex * indices; + uint32_t valueCount = this->_mvMapping.get(doc, indices); + for (uint32_t i = 0, m = std::min(sz, valueCount); i < m; ++i) { + buffer[i] = WeightedType(this->_enumStore.getValue(indices[i].value()), indices[i].weight()); + } + return valueCount; + } + virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const { + return getWeightedHelper(doc, v, sz); + } + + /* + * Specialization of SearchContext for weighted set type + */ + class StringImplSearchContext : public StringAttribute::StringSearchContext { + public: + StringImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) : + StringAttribute::StringSearchContext(std::move(qTerm), toBeSearched) + { } + protected: + const MultiValueStringAttributeT & myAttribute() const { + return static_cast< const MultiValueStringAttributeT & > (attribute()); + } + bool onCmp(DocId docId) const override; + + template + bool collectWeight(DocId doc, int32_t & weight, Collector & collector) const; + }; + + /* + * Specialization of SearchContext for weighted set type + */ + class StringSetImplSearchContext : public StringImplSearchContext { + public: + StringSetImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) : + StringImplSearchContext(std::move(qTerm), toBeSearched) + { } + protected: + bool onCmp(DocId docId, int32_t & weight) const override; + }; + + /* + * Specialization of SearchContext for array type + */ + class StringArrayImplSearchContext : public StringImplSearchContext { + public: + StringArrayImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) : + StringImplSearchContext(std::move(qTerm), toBeSearched) + { } + protected: + bool onCmp(DocId docId, int32_t & weight) const override; + }; + + template + class StringTemplSearchContext : public BT, + public EnumHintSearchContext + { + using BT::queryTerm; + typedef MultiValueStringAttributeT AttrType; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + public: + StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched); + }; + + SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; +}; + + +typedef MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index32> > ArrayStringAttribute; +typedef MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index32> > WeightedSetStringAttribute; +typedef MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index64> > HugeArrayStringAttribute; +typedef MultiValueStringAttributeT, multivalue::MVMTemplateArg, multivalue::Index64> > HugeWeightedSetStringAttribute; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp new file mode 100644 index 00000000000..e791adb3231 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -0,0 +1,146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +//----------------------------------------------------------------------------- +// MultiValueStringAttributeT public +//----------------------------------------------------------------------------- +template +MultiValueStringAttributeT:: +MultiValueStringAttributeT(const vespalib::string &name, + const AttributeVector::Config &c) + : MultiValueEnumAttribute(name, c) +{ +} + +template +MultiValueStringAttributeT::~MultiValueStringAttributeT() +{ +} + + +template +void +MultiValueStringAttributeT::freezeEnumDictionary(void) +{ + this->getEnumStore().freezeTree(); +} + + +template +AttributeVector::SearchContext::UP +MultiValueStringAttributeT::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + std::unique_ptr sc; + if (this->getCollectionType() == attribute::CollectionType::WSET) { + sc.reset(new StringTemplSearchContext(std::move(qTerm), *this)); + } else { + sc.reset(new StringTemplSearchContext(std::move(qTerm), *this)); + } + return sc; +} + +namespace { + +template +class EnumAccessor { +public: + EnumAccessor(const E & enumStore) : _enumStore(enumStore) { } + const char * get(typename E::Index index) const { return _enumStore.getValue(index); } +private: + const E & _enumStore; +}; + +} + +template +bool +MultiValueStringAttributeT::StringSetImplSearchContext::onCmp(DocId doc, int32_t & weight) const +{ + StringAttribute::StringSearchContext::CollectWeight collector; + return this->collectWeight(doc, weight, collector); +} + +template +bool +MultiValueStringAttributeT::StringArrayImplSearchContext::onCmp(DocId doc, int32_t & weight) const +{ + StringAttribute::StringSearchContext::CollectHitCount collector; + return this->collectWeight(doc, weight, collector); +} + +template +template +bool +MultiValueStringAttributeT::StringImplSearchContext::collectWeight(DocId doc, int32_t & weight, Collector & collector) const +{ + const WeightedIndex * indices; + uint32_t valueCount = myAttribute()._mvMapping.get(doc, indices); + + EnumAccessor accessor(myAttribute()._enumStore); + collectMatches(indices, valueCount, accessor, collector); + weight = collector.getWeight(); + return collector.hasMatch(); +} + +template +bool +MultiValueStringAttributeT::StringImplSearchContext::onCmp(DocId doc) const +{ + const MultiValueStringAttributeT & attr(static_cast< const MultiValueStringAttributeT & > (attribute())); + const WeightedIndex * indices; + uint32_t valueCount = attr._mvMapping.get(doc, indices); + + for (uint32_t i(0); (i < valueCount); i++) { + if (isMatch(attr._enumStore.getValue(indices[i].value()))) { + return true; + } + } + + return false; +} + +template +template +MultiValueStringAttributeT::StringTemplSearchContext:: +StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) : + BT(std::move(qTerm), toBeSearched), + EnumHintSearchContext(toBeSearched.getEnumStore().getEnumStoreDict(), + toBeSearched.getCommittedDocIdLimit(), + toBeSearched.getStatus().getNumValues()) +{ + const EnumStore &enumStore(toBeSearched.getEnumStore()); + + this->_plsc = static_cast(this); + if (this->valid()) { + if (this->isPrefix()) { + FoldedComparatorType comp(enumStore, queryTerm().getTerm(), true); + lookupRange(comp, comp); + } else if (this->isRegex()) { + vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm().getTerm())); + FoldedComparatorType comp(enumStore, prefix.c_str(), true); + lookupRange(comp, comp); + } else { + FoldedComparatorType comp(enumStore, queryTerm().getTerm()); + lookupTerm(comp); + } + } +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp new file mode 100644 index 00000000000..a447ad1bd8d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.cpp @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multistringpostattribute.h" +#include "multistringpostattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multistringpostattribute"); +namespace search { + +EnumStoreBase::Index +StringEnumIndexMapper::map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const +{ + return _dictionary.find(original, compare).getKey(); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h new file mode 100644 index 00000000000..638e9b870e5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "i_document_weight_attribute.h" + +namespace search { + +/* + * Implementation of multi value string attribute that in addition to enum store and + * multi value mapping uses an underlying posting list to provide faster search. + * This class is used for both array and weighted set types. + * + * B: EnumAttribute + * T: multivalue::Value (array) or + * multivalue::WeightedValue (weighted set) + * T specifies the type stored in the MultiValueMapping + */ +template +class MultiValueStringPostingAttributeT + : public MultiValueStringAttributeT, + protected PostingListAttributeSubBase +{ +private: + struct DocumentWeightAttributeAdapter : IDocumentWeightAttribute + { + const MultiValueStringPostingAttributeT &self; + DocumentWeightAttributeAdapter(const MultiValueStringPostingAttributeT &self_in) : self(self_in) {} + virtual LookupResult lookup(const vespalib::string &term) const override final; + virtual void create(btree::EntryRef idx, std::vector &dst) const override final; + virtual DocumentWeightIterator create(btree::EntryRef idx) const override final; + }; + DocumentWeightAttributeAdapter _document_weight_attribute_adapter; + + friend class PostingListAttributeTest; + template + friend class attribute::PostingSearchContext; // getEnumStore() + friend class StringAttributeTest; + typedef MultiValueStringPostingAttributeT SelfType; + typedef typename B::LoadedVector LoadedVector; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef PostingListAttributeSubBase PostingParent; + typedef typename MultiValueStringAttributeT::DocId DocId; +public: + typedef typename MultiValueStringAttributeT::EnumStore EnumStore; +private: + typedef typename MultiValueStringAttributeT::WeightedIndex WeightedIndex; + typedef typename MultiValueStringAttributeT::DocIndices DocIndices; + typedef typename MultiValueStringAttributeT::generation_t generation_t; + typedef typename PostingParent::PostingList PostingList; + typedef typename PostingParent::PostingMap PostingMap; + typedef typename PostingParent::Posting Posting; + + typedef typename MultiValueStringAttributeT::StringSetImplSearchContext StringSetImplSearchContext; + typedef typename MultiValueStringAttributeT::StringArrayImplSearchContext StringArrayImplSearchContext; + typedef attribute::StringPostingSearchContext StringSetPostingSearchContext; + typedef attribute::StringPostingSearchContext StringArrayPostingSearchContext; + + typedef EnumPostingTree Dictionary; + typedef typename EnumStore::Index EnumIndex; + typedef typename EnumStore::ComparatorType ComparatorType; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + typedef typename Dictionary::Iterator DictionaryIterator; + typedef typename Dictionary::ConstIterator DictionaryConstIterator; + typedef typename Dictionary::FrozenView FrozenDictionary; + using PostingParent::_postingList; + using PostingParent::clearAllPostings; + using PostingParent::handleFillPostings; + using PostingParent::fillPostingsFixupEnumBase; + using PostingParent::forwardedOnAddDoc; + + virtual void freezeEnumDictionary(); + virtual void mergeMemoryStats(MemoryUsage & total); + virtual void applyValueChanges(const DocIndices & docIndices, EnumStoreBase::IndexVector & unused); + +public: + typedef MultiValueMappingBaseBase::Histogram Histogram; + + MultiValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::STRING, + attribute::CollectionType::ARRAY)); + ~MultiValueStringPostingAttributeT(); + + void removeOldGenerations(generation_t firstUsed) override; + void onGenerationChange(generation_t generation) override; + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + const IDocumentWeightAttribute *asDocumentWeightAttribute() const override; + + bool onAddDoc(DocId doc) override { + return forwardedOnAddDoc(doc, this->_mvMapping.getNumKeys(), this->_mvMapping.getCapacityKeys()); + } + + void fillPostings(LoadedVector & loaded) override { + handleFillPostings(loaded); + } + + attribute::IPostingListAttributeBase * getIPostingListAttributeBase(void) override { + return this; + } + + void fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) override { + fillPostingsFixupEnumBase(loaded); + } +}; + +typedef MultiValueStringPostingAttributeT, multivalue::MVMTemplateArg, multivalue::Index32> > ArrayStringPostingAttribute; +typedef MultiValueStringPostingAttributeT, multivalue::MVMTemplateArg, multivalue::Index32> > WeightedSetStringPostingAttribute; +typedef MultiValueStringPostingAttributeT, multivalue::MVMTemplateArg, multivalue::Index64> > HugeArrayStringPostingAttribute; +typedef MultiValueStringPostingAttributeT, multivalue::MVMTemplateArg, multivalue::Index64> > HugeWeightedSetStringPostingAttribute; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp new file mode 100644 index 00000000000..4db2ce1da2c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { + +template +MultiValueStringPostingAttributeT::MultiValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c) + : MultiValueStringAttributeT(name, c), + PostingParent(*this, this->getEnumStore()), + _document_weight_attribute_adapter(*this) +{ +} + +template +MultiValueStringPostingAttributeT::~MultiValueStringPostingAttributeT() +{ + this->disableFreeLists(); + this->disableElemHoldList(); + clearAllPostings(); +} + +class StringEnumIndexMapper : public EnumIndexMapper +{ +public: + StringEnumIndexMapper(const EnumPostingTree & dictionary) : _dictionary(dictionary) { } + EnumStoreBase::Index map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const override; +private: + const EnumPostingTree & _dictionary; +}; + +template +void +MultiValueStringPostingAttributeT:: +applyValueChanges(const DocIndices &docIndices, EnumStoreBase::IndexVector &unused) +{ + typedef PostingChangeComputerT PostingChangeComputer; + EnumStore &enumStore(this->getEnumStore()); + Dictionary &dict(enumStore.getPostingDictionary()); + FoldedComparatorType compare(enumStore); + + StringEnumIndexMapper mapper(dict); + PostingMap changePost(PostingChangeComputer::compute(this->getMultiValueMapping(), docIndices, compare, mapper)); + this->updatePostings(changePost); + MultiValueStringAttributeT::applyValueChanges(docIndices, unused); +} + +template +void +MultiValueStringPostingAttributeT::freezeEnumDictionary() +{ + this->getEnumStore().freezeTree(); +} + +template +void +MultiValueStringPostingAttributeT::mergeMemoryStats(MemoryUsage &total) +{ + total.merge(this->_postingList.getMemoryUsage()); +} + + +template +void +MultiValueStringPostingAttributeT::removeOldGenerations(generation_t firstUsed) +{ + MultiValueStringAttributeT::removeOldGenerations(firstUsed); + _postingList.trimHoldLists(firstUsed); +} + +template +void +MultiValueStringPostingAttributeT::onGenerationChange(generation_t generation) +{ + _postingList.freeze(); + MultiValueStringAttributeT::onGenerationChange(generation); + _postingList.transferHoldLists(generation - 1); +} + + +template +AttributeVector::SearchContext::UP +MultiValueStringPostingAttributeT::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + std::unique_ptr sc; + sc.reset(new typename std::conditional:: + type(std::move(qTerm), params.useBitVector(), *this)); + return sc; +} + + +template +IDocumentWeightAttribute::LookupResult +MultiValueStringPostingAttributeT::DocumentWeightAttributeAdapter::lookup(const vespalib::string &term) const +{ + const Dictionary &dictionary = self._enumStore.getPostingDictionary(); + const FrozenDictionary frozenDictionary(dictionary.getFrozenView()); + DictionaryConstIterator dictItr(btree::BTreeNode::Ref(), dictionary.getAllocator()); + FoldedComparatorType comp(self._enumStore, term.c_str()); + + dictItr.lower_bound(frozenDictionary.getRoot(), EnumIndex(), comp); + if (dictItr.valid() && !comp(EnumIndex(), dictItr.getKey())) { + btree::EntryRef pidx = dictItr.getData(); + if (pidx.valid()) { + const PostingList &plist = self.getPostingList(); + auto minmax = plist.getAggregated(pidx); + return LookupResult(pidx, plist.frozenSize(pidx), minmax.getMin(), minmax.getMax()); + } + } + return LookupResult(); +} + +template +void +MultiValueStringPostingAttributeT::DocumentWeightAttributeAdapter::create(btree::EntryRef idx, std::vector &dst) const +{ + assert(idx.valid()); + self.getPostingList().beginFrozen(idx, dst); +} + +template +DocumentWeightIterator +MultiValueStringPostingAttributeT::DocumentWeightAttributeAdapter::create(btree::EntryRef idx) const +{ + assert(idx.valid()); + return self.getPostingList().beginFrozen(idx); +} + +template +const IDocumentWeightAttribute * +MultiValueStringPostingAttributeT::asDocumentWeightAttribute() const +{ + if (this->hasWeightedSetType() && + this->getBasicType() == AttributeVector::BasicType::STRING && + !this->getConfig().getIsFilter()) { + return &_document_weight_attribute_adapter; + } + return nullptr; +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multivalue.h b/searchlib/src/vespa/searchlib/attribute/multivalue.h new file mode 100644 index 00000000000..ae8a96ff6dc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalue.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +namespace multivalue { + +template +class Value { +public: + typedef T ValueType; + Value() + : _v() + { + } + Value(T v) : _v(v) { } + Value(T v, int32_t w) : _v(v) { (void) w; } + T value() const { return _v; } + operator T () const { return _v; } + operator T & () { return _v; } + int32_t weight() const { return 1; } + void setWeight(int32_t w) { (void) w; } + void incWeight(int32_t w) { (void) w; } + bool operator ==(const Value & rhs) const { return _v == rhs._v; } + bool operator <(const Value & rhs) const { return _v < rhs._v; } + bool operator >(const Value & rhs) const { return _v > rhs._v; } + static bool hasWeight(void) { return false; } + + static const bool _hasWeight = false; +private: + T _v; +}; + +template +class WeightedValue { +public: + typedef T ValueType; + WeightedValue() : _v(), _w(1) { } + WeightedValue(T v, int32_t w) : _v(v), _w(w) { } + T value() const { return _v; } + operator T () const { return _v; } + operator T & () { return _v; } + int32_t weight() const { return _w; } + void setWeight(int32_t w) { _w = w; } + void incWeight(int32_t w) { _w += w; } + + bool operator==(const WeightedValue & rhs) const { return _v == rhs._v; } + bool operator <(const WeightedValue & rhs) const { return _v < rhs._v; } + bool operator >(const WeightedValue & rhs) const { return _v > rhs._v; } + static bool hasWeight(void) { return true; } + + static const bool _hasWeight = true; +private: + T _v; + int32_t _w; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp new file mode 100644 index 00000000000..9db883f7ad2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multivalueattribute.h" +#include "multivalueattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.multivalueattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h new file mode 100644 index 00000000000..4d94e74d37e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.h @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/* + * Implementation of multi value attribute using an underlying multi value mapping + * + * B: Base class + * M: MultiValueType (MultiValueMapping template argument) + */ +template +class MultiValueAttribute : public B +{ +protected: + typedef typename B::DocId DocId; + typedef typename B::Change Change; + typedef typename B::ChangeVector ChangeVector; + typedef typename B::ChangeVector::const_iterator ChangeVectorIterator; + + typedef typename M::Value MultiValueType; + typedef MultiValueMappingT MultiValueMapping; + typedef typename MultiValueMappingBaseBase::Histogram Histogram; + typedef typename MultiValueType::ValueType ValueType; + typedef std::vector ValueVector; + typedef typename ValueVector::iterator ValueVectorIterator; + typedef std::vector > DocumentValues; + + MultiValueMapping _mvMapping; + + MultiValueMapping & getMultiValueMapping() { return _mvMapping; } + const MultiValueMapping & getMultiValueMapping() const { return _mvMapping; } + + /* + * Iterate through the change vector and calculate new values for documents with changes + */ + void applyAttributeChanges(DocumentValues & docValues); + + virtual bool extractChangeData(const Change & c, ValueType & data) = 0; + + /** + * Called when a new document has been added. + * Can be overridden by subclasses that need to resize structures as a result of this. + * Should return true if underlying structures were resized. + **/ + virtual bool onAddDoc(DocId doc) { (void) doc; return false; } + + virtual AddressSpace getMultiValueAddressSpaceUsage() const override; + +public: + MultiValueAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); + virtual ~MultiValueAttribute(); + + virtual bool addDoc(DocId & doc); + virtual uint32_t getValueCount(DocId doc) const; + virtual const MultiValueMappingBaseBase *getMultiValueBase() const override { + return &getMultiValueMapping(); + } + +private: + virtual int32_t getWeight(DocId doc, uint32_t idx) const; + + virtual uint64_t + getTotalValueCount(void) const; + +public: + virtual void + clearDocs(DocId lidLow, DocId lidLimit); + + virtual void + onShrinkLidSpace(); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp new file mode 100644 index 00000000000..759364b4f93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattribute.hpp @@ -0,0 +1,203 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +template +MultiValueAttribute:: +MultiValueAttribute(const vespalib::string &baseFileName, + const AttributeVector::Config &cfg) + : B(baseFileName, cfg), + _mvMapping(this->getCommittedDocIdLimitRef(), cfg.getGrowStrategy()) +{ +} + +template +MultiValueAttribute::~MultiValueAttribute() +{ +} + +template +int32_t MultiValueAttribute::getWeight(DocId doc, uint32_t idx) const +{ + MultiValueType value; + this->_mvMapping.get(doc, idx, value); + return (value.weight()); +} + + +template +void +MultiValueAttribute::applyAttributeChanges(DocumentValues & docValues) +{ + Histogram capacityNeeded = _mvMapping.getEmptyHistogram(); + + // compute new values for each document with changes + for (ChangeVectorIterator current(this->_changes.begin()), end(this->_changes.end()); (current != end); ) { + DocId doc = current->_doc; + + ValueVector newValues(_mvMapping.getValueCount(doc)); + _mvMapping.get(doc, newValues); + + // find last clear doc + ChangeVectorIterator lastClearDoc = end; + for (ChangeVectorIterator iter = current; (iter != end) && (iter->_doc == doc); ++iter) { + if (iter->_type == ChangeBase::CLEARDOC) { + lastClearDoc = iter; + } + } + + // use last clear doc if found + if (lastClearDoc != end) { + current = lastClearDoc; + } + + // iterate through all changes for this document + for (; (current != end) && (current->_doc == doc); ++current) { + + if (current->_type == ChangeBase::CLEARDOC) { + newValues.clear(); + continue; + } + + ValueType data; + bool hasData = extractChangeData(*current, data); + + if (current->_type == ChangeBase::APPEND) { + if (hasData) { + if (this->hasArrayType()) { + newValues.push_back(MultiValueType(data, current->_weight)); + } else if (this->hasWeightedSetType()) { + ValueVectorIterator witer; + for (witer = newValues.begin(); witer != newValues.end(); ++witer) { + if (witer->value() == data) { + break; + } + } + if (witer != newValues.end()) { + witer->setWeight(current->_weight); + } else { + newValues.push_back(MultiValueType(data, current->_weight)); + } + } + } + } else if (current->_type == ChangeBase::REMOVE) { + if (hasData) { + for (ValueVectorIterator witer = newValues.begin(); witer != newValues.end(); ) { + if (witer->value() == data) { + witer = newValues.erase(witer); + } else { + ++witer; + } + } + } + } else if ((current->_type >= ChangeBase::INCREASEWEIGHT) && (current->_type <= ChangeBase::DIVWEIGHT)) { + if (this->hasWeightedSetType() && hasData) { + ValueVectorIterator witer; + for (witer = newValues.begin(); witer != newValues.end(); ++witer) { + if (witer->value() == data) { + break; + } + } + if (witer != newValues.end()) { + witer->setWeight(this->applyWeightChange(witer->weight(), *current)); + if (witer->weight() == 0 && this->getInternalCollectionType().removeIfZero()) { + newValues.erase(witer); + } + } else if (this->getInternalCollectionType().createIfNonExistant()) { + int32_t weight = this->applyWeightChange(0, *current); + if (weight != 0 || !this->getInternalCollectionType().removeIfZero()) { + newValues.push_back(MultiValueType(data, weight)); + } + } + } + } + } + + // update histogram + uint32_t maxValues = MultiValueMapping::maxValues(); + if (newValues.size() < maxValues) { + capacityNeeded[newValues.size()] += 1; + } else { + capacityNeeded[maxValues] += 1; + } + + this->checkSetMaxValueCount(newValues.size()); + + docValues.push_back(std::make_pair(doc, ValueVector())); + docValues.back().second.swap(newValues); + } + + if (!_mvMapping.enoughCapacity(capacityNeeded)) { + this->removeAllOldGenerations(); + _mvMapping.performCompaction(capacityNeeded); + } +} + + +template +AddressSpace +MultiValueAttribute::getMultiValueAddressSpaceUsage() const +{ + return _mvMapping.getAddressSpaceUsage(); +} + + +template +bool +MultiValueAttribute::addDoc(DocId & doc) +{ + bool incGen = this->_mvMapping.isFull(); + this->_mvMapping.addKey(doc); + this->incNumDocs(); + this->updateUncommittedDocIdLimit(doc); + incGen |= onAddDoc(doc); + if (incGen) { + this->incGeneration(); + } else + this->removeAllOldGenerations(); + return true; +} + +template +uint32_t +MultiValueAttribute::getValueCount(DocId doc) const +{ + if (doc >= this->getNumDocs()) { + return 0; + } + return this->_mvMapping.getValueCount(doc); +} + + +template +uint64_t +MultiValueAttribute::getTotalValueCount(void) const +{ + return _mvMapping.getTotalValueCnt(); +} + + +template +void +MultiValueAttribute::clearDocs(DocId lidLow, DocId lidLimit) +{ + _mvMapping.clearDocs(lidLow, lidLimit, *this); +} + + +template +void +MultiValueAttribute::onShrinkLidSpace() +{ + uint32_t committedDocIdLimit = this->getCommittedDocIdLimit(); + _mvMapping.shrinkKeys(committedDocIdLimit); + this->setNumDocs(committedDocIdLimit); +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp new file mode 100644 index 00000000000..b3d38f60546 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multivalueattributesaver.h" + +using vespalib::GenerationHandler; +using search::IAttributeSaveTarget; + +namespace search { + +template +MultiValueAttributeSaver:: +MultiValueAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMappingBase &mvMapping) + : AttributeSaver(std::move(guard), cfg), + _frozenIndices(mvMapping.getIndicesCopy()) +{ +} + + +template +MultiValueAttributeSaver::~MultiValueAttributeSaver() +{ +} + +template class MultiValueAttributeSaver; + +template class MultiValueAttributeSaver; + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h new file mode 100644 index 00000000000..5332d01c980 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaver.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributesaver.h" +#include "iattributesavetarget.h" +#include "multivaluemapping.h" + +namespace search { + +/* + * Base class for saving a multivalue attribute (e.g. weighted set of int). + * + * Template argument IndexT is either multivalue::Index32 or + * multivalue::Index64 + */ +template +class MultiValueAttributeSaver : public AttributeSaver +{ +protected: + using Index = IndexT; + using GenerationHandler = vespalib::GenerationHandler; + using IndexCopyVector = + typename MultiValueMappingBase::IndexCopyVector; + IndexCopyVector _frozenIndices; + +public: + MultiValueAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + const MultiValueMappingBase &mvMapping); + + virtual ~MultiValueAttributeSaver(); +}; + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h new file mode 100644 index 00000000000..92c9a169404 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivalueattributesaverutils.h @@ -0,0 +1,97 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +namespace multivalueattributesaver { + +/* + * Class to write to count files for multivalue attributes (.idx suffix). + */ +class CountWriter +{ + std::unique_ptr _countWriter; + uint64_t _cnt; + +public: + CountWriter(IAttributeSaveTarget &saveTarget) + : _countWriter(saveTarget.idxWriter().allocBufferWriter()), + _cnt(0) + { + uint32_t initialCount = 0; + _countWriter->write(&initialCount, sizeof(uint32_t)); + } + + ~CountWriter() + { + _countWriter->flush(); + } + + void + writeCount(uint32_t count) { + _cnt += count; + uint32_t cnt32 = static_cast(_cnt); + _countWriter->write(&cnt32, sizeof(uint32_t)); + } +}; + +/* + * Class to write to weight files (or not) for multivalue attributes. + */ +template +class WeightWriter; + +/* + * Class to write to weight files for multivalue attributes (.weight suffix). + */ +template <> +class WeightWriter +{ + std::unique_ptr _weightWriter; + +public: + WeightWriter(IAttributeSaveTarget &saveTarget) + : _weightWriter(saveTarget.weightWriter().allocBufferWriter()) + { + } + + ~WeightWriter() + { + _weightWriter->flush(); + } + + template + void + writeWeights(const MultiValueT *values, uint32_t count) { + for (uint32_t i = 0; i < count; ++i) { + int32_t weight = values[i].weight(); + _weightWriter->write(&weight, sizeof(int32_t)); + } + } +}; + +/* + * Class to not write to weight files for multivalue attributes. + */ +template <> +class WeightWriter +{ +public: + WeightWriter(IAttributeSaveTarget &) + { + } + + ~WeightWriter() + { + } + + template + void + writeWeights(const MultiValueT *, uint32_t) { + } +}; + +} // namespace search::multivalueattributesaver + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp new file mode 100644 index 00000000000..e8e21073323 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.cpp @@ -0,0 +1,858 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchlib.attribute.multivaluemapping"); +#include "multivaluemapping.h" +#include "multivaluemapping.hpp" +#include "attributevector.h" +#include "loadedenumvalue.h" + +namespace search { + +using vespalib::GenerationHeldBase; + +MultiValueMappingBaseBase::MultiValueMappingBaseBase(size_t maxValues, + size_t maxAlternatives) + : _singleVectorsStatus(maxValues * maxAlternatives), + _vectorVectorsStatus(maxAlternatives), + _genHolder(), + _pendingCompactSingleVector(), + _pendingCompactVectorVector(false), + _pendingCompact(false), + _totalValueCnt(0) +{ +} + +MultiValueMappingBaseBase::~MultiValueMappingBaseBase() +{ +} + +void +MultiValueMappingBaseBase::failNewSize(uint64_t minNewSize, uint64_t maxSize) +{ + LOG(fatal, + "MultiValueMappingBase::failNewSize: " + "Minimum new size (%" PRIu64 ") exceeds max size (%" PRIu64 ")", + minNewSize, maxSize); + abort(); +} + +size_t +MultiValueMappingBaseBase:: +computeNewSize(size_t used, size_t dead, size_t needed, size_t maxSize) +{ + float growRatio = 1.5f; + size_t newSize = static_cast + ((used - dead + needed) * growRatio); + if (newSize <= maxSize) + return newSize; + newSize = (used - dead + needed) + 1000000; + if (newSize <= maxSize) + return maxSize; + failNewSize(newSize, maxSize); + return 0; +} + +MultiValueMappingBaseBase::Histogram::Histogram(size_t maxValues) : + _maxValues(maxValues), + _histogram() +{ +} + +MultiValueMappingBaseBase::Histogram +MultiValueMappingBaseBase::getEmptyHistogram(size_t maxValues) const +{ + return Histogram(maxValues); +} + +MultiValueMappingBaseBase::Histogram +MultiValueMappingBaseBase::getHistogram(AttributeVector::ReaderBase &reader) + const +{ + Histogram capacityNeeded = getEmptyHistogram(); + uint32_t numDocs(reader.getNumIdx() - 1); + for (AttributeVector::DocId doc = 0; doc < numDocs; ++doc) { + const uint32_t valueCount(reader.getNextValueCount()); + capacityNeeded[valueCount] += 1; + } + return capacityNeeded; +} + + +void +MultiValueMappingBaseBase::clearPendingCompact(void) +{ + if (!_pendingCompact || _pendingCompactVectorVector || + !_pendingCompactSingleVector.empty()) + return; + _pendingCompact = false; +} + + +template +class MultiValueMappingHeldVector : public GenerationHeldBase +{ + typedef I Index; + + MultiValueMappingBase &_mvmb; + Index _idx; + +public: + MultiValueMappingHeldVector(size_t size, + MultiValueMappingBase &mvmb, + Index &idx) + : GenerationHeldBase(size), + _mvmb(mvmb), + _idx(idx) + { + } + + virtual + ~MultiValueMappingHeldVector(void) + { + _mvmb.doneHoldVector(_idx); + } +}; + + +template +void MultiValueMappingBase::doneHoldVector(Index idx) +{ +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "free vector: idx.values() = %u, idx.alternative() = %u", + idx.values(), idx.alternative()); +#endif + clearVector(idx); + if (idx.values() < Index::maxValues()) { + _singleVectorsStatus[idx.vectorIdx()] = FREE; + } else if (idx.values() == Index::maxValues()) { + _vectorVectorsStatus[idx.alternative()] = FREE; + } +} + + +template +MemoryUsage +MultiValueMappingBase::getMemoryUsage() const +{ + MemoryUsage retval = _indices.getMemoryUsage(); + + for (uint32_t i = 0; i < _singleVectorsStatus.size(); ++i) { + if (_singleVectorsStatus[i] == HOLD) + continue; + const MemoryUsage & memUsage(getSingleVectorUsage(i)); + retval.merge(memUsage); + } + for (uint32_t i = 0; i < _vectorVectorsStatus.size(); ++i) { + if (_vectorVectorsStatus[i] == HOLD) + continue; + const MemoryUsage & memUsage(getVectorVectorUsage(i)); + retval.merge(memUsage); + } + retval.incAllocatedBytesOnHold(_genHolder.getHeldBytes()); + return retval; +} + +template +AddressSpace +MultiValueMappingBase::getAddressSpaceUsage() const +{ + size_t addressSpaceUsed = 0; + for (uint32_t i = 0; i < _singleVectorsStatus.size(); ++i) { + if (_singleVectorsStatus[i] == ACTIVE) { + addressSpaceUsed = std::max(addressSpaceUsed, getSingleVectorAddressSpaceUsed(i)); + } + } + for (uint32_t i = 0; i < _vectorVectorsStatus.size(); ++i) { + if (_vectorVectorsStatus[i] == ACTIVE) { + addressSpaceUsed = std::max(addressSpaceUsed, getVectorVectorAddressSpaceUsed(i)); + } + } + return AddressSpace(addressSpaceUsed, Index::offsetSize()); +} + +template +MultiValueMappingBase::MultiValueMappingBase(uint32_t &committedDocIdLimit, + uint32_t numKeys, + const GrowStrategy & gs) + : MultiValueMappingBaseBase(Index::maxValues(), Index::alternativeSize()), + _indices(gs.getDocsInitialCapacity(), + gs.getDocsGrowPercent(), + gs.getDocsGrowDelta(), + _genHolder), + _committedDocIdLimit(committedDocIdLimit) +{ + _indices.unsafe_reserve(numKeys); + _indices.unsafe_resize(numKeys); +} + +template +MultiValueMappingBase::~MultiValueMappingBase() +{ +} + +template +void MultiValueMappingBase::insertIntoHoldList(Index idx) +{ + size_t holdBytes = 0u; + if (idx.values() < Index::maxValues()) { + _singleVectorsStatus[idx.vectorIdx()] = HOLD; + holdBytes = getSingleVectorUsage(idx.vectorIdx()).allocatedBytes(); + } else { + _vectorVectorsStatus[idx.alternative()] = HOLD; + holdBytes = getVectorVectorUsage(idx.alternative()).allocatedBytes(); + } + GenerationHeldBase::UP hold(new MultiValueMappingHeldVector(holdBytes, + *this, + idx)); + _genHolder.hold(std::move(hold)); +} + + +template +void MultiValueMappingBase::setActiveVector(Index idx) +{ + if (idx.values() < Index::maxValues()) { + _singleVectorsStatus[idx.vectorIdx()] = ACTIVE; + } else { + _vectorVectorsStatus[idx.alternative()] = ACTIVE; + } +} + +template +void +MultiValueMappingBase::reset(uint32_t numKeys) +{ + _genHolder.clearHoldLists(); + _indices.reset(); + _indices.unsafe_reserve(numKeys); + for (size_t i = 0; i < numKeys; ++i) { + _indices.push_back(Index()); + } +} + + +template +void +MultiValueMappingBase::addKey(uint32_t & key) +{ + uint32_t retval = _indices.size(); + _indices.push_back(Index()); + key = retval; +} + + +template +void +MultiValueMappingBase::shrinkKeys(uint32_t newSize) +{ + assert(newSize >= _committedDocIdLimit); + assert(newSize < _indices.size()); + _indices.shrink(newSize); +} + + +template +void +MultiValueMappingBase::clearDocs(uint32_t lidLow, uint32_t lidLimit, + AttributeVector &v) +{ + assert(lidLow <= lidLimit); + assert(lidLimit <= v.getNumDocs()); + assert(lidLimit <= _indices.size()); + for (uint32_t lid = lidLow; lid < lidLimit; ++lid) { + if (_indices[lid].idx() != 0) { + v.clearDoc(lid); + } + } +} + +template +class MultiValueMappingHoldElem : public GenerationHeldBase +{ + typedef I Index; + + MultiValueMappingBase &_mvmb; + Index _idx; +public: + MultiValueMappingHoldElem(size_t size, + MultiValueMappingBase &mvmb, + Index idx) + : GenerationHeldBase(size), + _mvmb(mvmb), + _idx(idx) + { + } + + virtual ~MultiValueMappingHoldElem() { + _mvmb.doneHoldElem(_idx); + } +}; + + +template +void +MultiValueMappingBase::holdElem(Index idx, size_t size) +{ + GenerationHeldBase::UP hold(new MultiValueMappingHoldElem(size, *this, + idx)); + _genHolder.hold(std::move(hold)); +} + + +template class MultiValueMappingBase; +template class MultiValueMappingBase; + +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; + +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::Value >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + multivalue::WeightedValue >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; +template class MultiValueMappingVector< + vespalib::Array > >; + +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT< + multivalue::Value >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT >; +template class MultiValueMappingT< + multivalue::WeightedValue >; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT< + multivalue::Value, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT, + multivalue::Index64>; +template class MultiValueMappingT< + multivalue::WeightedValue, + multivalue::Index64>; + +using attribute::SaveLoadedEnum; +using attribute::NoSaveLoadedEnum; +using attribute::SaveEnumHist; +typedef EnumStoreBase::Index EnumIndex; + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveEnumHist &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveEnumHist &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveEnumHist &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const EnumIndex *map, + size_t mapSize, + SaveEnumHist &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int8_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int16_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int32_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int64_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const float *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const double *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int8_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int16_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int32_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int64_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const float *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index32>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const double *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int8_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int16_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int32_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int64_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const float *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const double *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int8_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int16_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int32_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const int64_t *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const float *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +template +uint32_t +MultiValueMappingT, + multivalue::Index64>:: +fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const double *map, + size_t mapSize, + NoSaveLoadedEnum &saver, + uint32_t numDocs, + bool hasWeights); + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h new file mode 100644 index 00000000000..3134f826774 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.h @@ -0,0 +1,1498 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "address_space.h" +#include "enumstorebase.h" +#include + +namespace search { + +namespace multivalue { + +template +class Index { +private: + // unused X | values (NUM_VALUE_BITS bit) | + // alternative (NUM_ALT_BITS bit) | offset (NUM_OFFSET_BITS bit) + T _idx; +public: + Index() : _idx(0) {} + Index(uint32_t values_, uint32_t alternative_, uint32_t offset_) + : _idx(0) + { + _idx += static_cast(values_) << (NUM_ALT_BITS+NUM_OFFSET_BITS); + _idx += static_cast((alternative_) & + ((1<> (NUM_ALT_BITS+NUM_OFFSET_BITS); + } + + uint32_t + alternative(void) const + { + return (_idx >> NUM_OFFSET_BITS) & ((1<> NUM_OFFSET_BITS; + } + + uint32_t offset(void) const + { + return (_idx & ((1u << NUM_OFFSET_BITS) - 1)); + } + + T idx() const { return _idx; } + + static uint32_t + maxValues(void) + { + return (1 << NUM_VALUE_BITS) - 1; + } + + static uint32_t + alternativeSize(void) + { + return 1 << NUM_ALT_BITS; + } + + static T + offsetSize(void) + { + return 1 << (NUM_OFFSET_BITS); + } +}; + +typedef Index Index32; +typedef Index Index64; + +template +struct MVMTemplateArg { + typedef T Value; + typedef I Index; +}; + +} + +class MultiValueMappingVectorBaseBase +{ +public: + MultiValueMappingVectorBaseBase() + : _used(0), + _dead(0), + _wantCompact(false), + _usage() + { + } + + uint32_t used() const { return _used; } + uint32_t dead() const { return _dead; } + void incUsed(uint32_t inc) { _used += inc; } + void incDead(uint32_t inc) { _dead += inc; } + + void + setWantCompact(void) + { + _wantCompact = true; + } + + bool + getWantCompact(void) const + { + return _wantCompact; + } + + MemoryUsage & getUsage() { return _usage; } + const MemoryUsage & getUsage() const { return _usage; } +protected: + void reset() { _used = 0; _dead = 0; } +private: + uint32_t _used; + uint32_t _dead; + bool _wantCompact; + MemoryUsage _usage; +}; + + +class MultiValueMappingBaseBase +{ +public: + class Histogram + { + private: + typedef vespalib::hash_map HistogramM; + public: + typedef HistogramM::const_iterator const_iterator; + Histogram(size_t maxValues); + uint32_t & operator [] (uint32_t i) { return _histogram[std::min(i, _maxValues)]; } + const_iterator begin() const { return _histogram.begin(); } + const_iterator end() const { return _histogram.end(); } + private: + uint32_t _maxValues; + HistogramM _histogram; + }; +protected: + MultiValueMappingBaseBase(size_t maxValues, size_t maxAlternatives); + virtual ~MultiValueMappingBaseBase(); + //------------------------------------------------------------------------- + // private inner classes + //------------------------------------------------------------------------- + + enum VectorStatus { + ACTIVE, FREE, HOLD + }; + + typedef AttributeVector::generation_t generation_t; + typedef vespalib::Array StatusVector; + typedef vespalib::GenerationHolder GenerationHolder; + + // active -> hold + void incValueCnt(uint32_t cnt) { _totalValueCnt += cnt; } + void decValueCnt(uint32_t cnt) { _totalValueCnt -= cnt; } + + StatusVector _singleVectorsStatus; + StatusVector _vectorVectorsStatus; + GenerationHolder _genHolder; + std::set _pendingCompactSingleVector; + bool _pendingCompactVectorVector; + bool _pendingCompact; + Histogram getEmptyHistogram(size_t maxValues) const; + virtual const MemoryUsage & getSingleVectorUsage(size_t i) const = 0; + virtual const MemoryUsage & getVectorVectorUsage(size_t i) const = 0; + virtual size_t getSingleVectorAddressSpaceUsed(size_t i) const = 0; + virtual size_t getVectorVectorAddressSpaceUsed(size_t i) const = 0; + +private: + size_t _totalValueCnt; + +public: + virtual Histogram getEmptyHistogram() const = 0; + virtual MemoryUsage getMemoryUsage() const = 0; + Histogram getHistogram(AttributeVector::ReaderBase & reader) const; + size_t getTotalValueCnt() const { return _totalValueCnt; } + static void failNewSize(uint64_t minNewSize, uint64_t maxSize); + + void + clearPendingCompact(void); + + static size_t + computeNewSize(size_t used, size_t dead, size_t needed, size_t maxSize); + + void + transferHoldLists(generation_t generation) + { + _genHolder.transferHoldLists(generation); + } + + void + trimHoldLists(generation_t firstUsed) + { + _genHolder.trimHoldLists(firstUsed); + } +}; + + +template +class MultiValueMappingBase : public MultiValueMappingBaseBase +{ +protected: + typedef I Index; + MultiValueMappingBase(uint32_t &committedDocIdLimit, + uint32_t numKeys = 0, + const GrowStrategy &gs = GrowStrategy()); + virtual ~MultiValueMappingBase(); + + typedef search::attribute::RcuVectorBase IndexVector; + IndexVector _indices; + uint32_t &_committedDocIdLimit; + + // active -> hold + void insertIntoHoldList(Index idx); + void setActiveVector(Index idx); + + void reset(uint32_t numKeys=0); +private: + virtual void clearVector(Index idx) = 0; + +public: + using IndexCopyVector = vespalib::Array; + + void + doneHoldVector(Index idx); + + virtual Histogram getEmptyHistogram() const override { + return MultiValueMappingBaseBase::getEmptyHistogram(Index::maxValues()); + } + + virtual MemoryUsage getMemoryUsage() const override; + + AddressSpace getAddressSpaceUsage() const; + + size_t getNumKeys(void) const + { + return _indices.size(); + } + + size_t getCapacityKeys(void) const + { + return _indices.capacity(); + } + + IndexCopyVector + getIndicesCopy() const + { + uint32_t size = _committedDocIdLimit; + assert(size <= _indices.size()); + return std::move(IndexCopyVector(&_indices[0], &_indices[0] + size)); + } + + bool + hasKey(uint32_t key) const + { + return key < _indices.size(); + } + + bool + hasReaderKey(uint32_t key) const + { + return key < _committedDocIdLimit && key < _indices.size(); + } + + bool + isFull(void) const + { + return _indices.isFull(); + } + + static size_t + maxValues(void) + { + return Index::maxValues(); + } + + void + addKey(uint32_t & key); + + void + shrinkKeys(uint32_t newSize); + + void + clearDocs(uint32_t lidLow, uint32_t lidLimit, AttributeVector &v); + + void holdElem(Index idx, size_t size); + + virtual void doneHoldElem(Index idx) = 0; +}; + +extern template class MultiValueMappingBase; +extern template class MultiValueMappingBase; + +template +class MultiValueMappingFallbackVectorHold + : public vespalib::GenerationHeldBase +{ + V _hold; +public: + MultiValueMappingFallbackVectorHold(size_t size, + V &rhs) + : vespalib::GenerationHeldBase(size), + _hold() + { + _hold.swap(rhs); + } + + virtual + ~MultiValueMappingFallbackVectorHold(void) + { + } +}; + + +template +class MultiValueMappingVector : public vespalib::Array, + public MultiValueMappingVectorBaseBase +{ +public: + typedef vespalib::Array VectorBase; + typedef MultiValueMappingFallbackVectorHold FallBackHold; + MultiValueMappingVector(); + MultiValueMappingVector(uint32_t n); + MultiValueMappingVector(const MultiValueMappingVector & rhs); + MultiValueMappingVector & + operator=(const MultiValueMappingVector & rhs); + + ~MultiValueMappingVector(); + void reset(uint32_t n); + uint32_t remaining() const { return this->size() - used(); } + void swapVector(MultiValueMappingVector & rhs); + + vespalib::GenerationHeldBase::UP + fallbackResize(uint64_t newSize); +}; + + +template +class MultiValueMappingT : public MultiValueMappingBase +{ +public: + friend class MultiValueMappingTest; + typedef MultiValueMappingVectorBaseBase VectorBaseBase; + typedef MultiValueMappingBaseBase::Histogram Histogram; + typedef MultiValueMappingBaseBase::VectorStatus VectorStatus; + typedef typename MultiValueMappingBase::Index Index; + +private: + using MultiValueMappingBase::_pendingCompactSingleVector; + using MultiValueMappingBaseBase::_pendingCompactVectorVector; + using MultiValueMappingBaseBase::_pendingCompact; + using MultiValueMappingBaseBase::clearPendingCompact; + using MultiValueMappingBaseBase::failNewSize; + using MultiValueMappingBase::_genHolder; + using MultiValueMappingBase::holdElem; + + typedef MultiValueMappingVector SingleVector; + typedef std::pair SingleVectorPtr; + typedef typename SingleVector::VectorBase VectorBase; + typedef MultiValueMappingVector VectorVector; + typedef std::pair VectorVectorPtr; + + //------------------------------------------------------------------------- + // private variables + //------------------------------------------------------------------------- + std::vector _singleVectors; + std::vector _vectorVectors; + + //------------------------------------------------------------------------- + // private methods + //------------------------------------------------------------------------- + virtual void clearVector(Index idx); + virtual const MemoryUsage & getSingleVectorUsage(size_t i) const override; + virtual const MemoryUsage & getVectorVectorUsage(size_t i) const override; + virtual size_t getSingleVectorAddressSpaceUsed(size_t i) const override; + virtual size_t getVectorVectorAddressSpaceUsed(size_t i) const override; + void initVectors(uint32_t initSize); + void initVectors(const Histogram & initCapacity); + bool getValidIndex(Index & newIdx, uint32_t numValues); + + void + compactSingleVector(SingleVectorPtr &activeVector, + uint32_t valueCnt, + uint64_t newSize, + uint64_t neededEntries, + uint64_t maxSize); + + void + compactVectorVector(VectorVectorPtr &activeVector, + uint64_t newSize, + uint64_t neededEntries, + uint64_t maxSize); + + SingleVectorPtr getSingleVector(uint32_t numValues, VectorStatus status); + VectorVectorPtr getVectorVector(VectorStatus status); + Index getIndex(uint32_t numValues, VectorStatus status); + + void incUsed(SingleVector & vec, uint32_t numValues) { + vec.incUsed(numValues); + vec.getUsage().incUsedBytes(numValues * sizeof(T)); + } + void incDead(SingleVector & vec, uint32_t numValues) { + vec.incDead(numValues); + vec.getUsage().incDeadBytes(numValues * sizeof(T)); + } + void swapVector(SingleVector & vec, uint32_t initSize) { + SingleVector(initSize).swapVector(vec); + vec.getUsage().setAllocatedBytes(initSize * sizeof(T)); + } + void incUsed(VectorVector & vec, uint32_t numValues) { + vec.incUsed(1); + vec.getUsage().incUsedBytes(numValues * sizeof(T) + + sizeof(VectorBase)); + vec.getUsage().incAllocatedBytes(numValues * sizeof(T)); + } + void incDead(VectorVector & vec) { + vec.incDead(1); + } + void swapVector(VectorVector & vec, uint32_t initSize) { + VectorVector(initSize).swapVector(vec); + vec.getUsage().setAllocatedBytes(initSize * sizeof(VectorBase)); + } + + +public: + MultiValueMappingT(uint32_t &committedDocIdLimit, + const GrowStrategy & gs = GrowStrategy()); + MultiValueMappingT(uint32_t &committedDocIdLimit, + uint32_t numKeys, uint32_t initSize = 0, + const GrowStrategy & gs = GrowStrategy()); + MultiValueMappingT(uint32_t &committedDocIdLimit, + uint32_t numKeys, const Histogram & initCapacity, + const GrowStrategy & gs = GrowStrategy()); + ~MultiValueMappingT(); + void reset(uint32_t numKeys, uint32_t initSize = 0); + void reset(uint32_t numKeys, const Histogram & initCapacity); + uint32_t get(uint32_t key, std::vector & buffer) const; + template + uint32_t get(uint32_t key, BufferType * buffer, uint32_t sz) const; + bool get(uint32_t key, uint32_t index, T & value) const; + uint32_t getDataForIdx(Index idx, const T * & handle) const { + if (__builtin_expect(idx.values() < Index::maxValues(), true)) { + // We do not need to specialcase 0 as _singleVectors will refer to valid stuff + // and handle SHALL not be used as the number of values returned shall be obeyed. + const SingleVector & vec = _singleVectors[idx.vectorIdx()]; + handle = &vec[idx.offset() * idx.values()]; + __builtin_prefetch(handle, 0, 0); + return idx.values(); + } else { + const VectorBase & vec = + _vectorVectors[idx.alternative()][idx.offset()]; + handle = &vec[0]; + return vec.size(); + } + } + uint32_t get(uint32_t key, const T * & handle) const { + return getDataForIdx(this->_indices[key], handle); + } + inline uint32_t getValueCount(uint32_t key) const; + void set(uint32_t key, const std::vector & values); + void set(uint32_t key, const T * values, uint32_t numValues); + + /* XXX: Unsafe operation, reader gets inconsistent view */ + void replace(uint32_t key, const std::vector & values); + + /* XXX: Unsafe operation, reader gets inconsistent view */ + void replace(uint32_t key, const T * values, uint32_t numValues); + + Histogram getRemaining(); + bool enoughCapacity(const Histogram & capacityNeeded); + void performCompaction(Histogram & capacityNeeded); + + template + uint32_t + fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const V *map, + size_t mapSize, + Saver &saver, + uint32_t numDocs, + bool hasWeights); + + virtual void doneHoldElem(Index idx) override; + +#ifdef DEBUG_MULTIVALUE_MAPPING + void printContent() const; + void printVectorVectors() const; +#endif +}; + +//----------------------------------------------------------------------------- +// implementation of private methods +//----------------------------------------------------------------------------- +template +MultiValueMappingVector::MultiValueMappingVector() + : VectorBase(), + MultiValueMappingVectorBaseBase() +{ +} + +template +MultiValueMappingVector::~MultiValueMappingVector() +{ +} + +template +MultiValueMappingVector::MultiValueMappingVector(uint32_t n) + : VectorBase(), + MultiValueMappingVectorBaseBase() +{ + reset(n); +} + +template +MultiValueMappingVector::MultiValueMappingVector( + const MultiValueMappingVector & rhs) + : VectorBase(rhs), + MultiValueMappingVectorBaseBase(rhs) +{ +} + +template +MultiValueMappingVector & +MultiValueMappingVector::operator=(const MultiValueMappingVector & rhs) +{ + if (this != & rhs) { + VectorBase::operator=(rhs); + MultiValueMappingVectorBaseBase::operator=(rhs); + } + return *this; +} + +template +void +MultiValueMappingVector::reset(uint32_t n) +{ + this->resize(n); + MultiValueMappingVectorBaseBase::reset(); +} + +template +void +MultiValueMappingVector::swapVector(MultiValueMappingVector & rhs) +{ + MultiValueMappingVectorBaseBase tmp(rhs); + rhs.MultiValueMappingVectorBaseBase::operator=(*this); + MultiValueMappingVectorBaseBase::operator=(tmp); + this->swap(rhs); +} + +template +vespalib::GenerationHeldBase::UP +MultiValueMappingVector::fallbackResize(uint64_t newSize) +{ + VectorBase tmp(newSize); + VectorBase &old(*this); + size_t oldSize = old.size(); + size_t oldCapacity = old.capacity(); + for (size_t i = 0; i < oldSize; ++i) { + tmp[i] = old[i]; + } + std::atomic_thread_fence(std::memory_order_release); + this->swap(tmp); + return vespalib::GenerationHeldBase::UP( + new MultiValueMappingFallbackVectorHold + (oldCapacity * sizeof(VT), + tmp)); +} + +template +void +MultiValueMappingT::initVectors(uint32_t initSize) +{ + for (uint32_t i = 0; i < this->_singleVectorsStatus.size(); ++i) { + if (i % Index::alternativeSize() == 0) { + swapVector(_singleVectors[i], initSize); + this->_singleVectorsStatus[i] = MultiValueMappingBaseBase::ACTIVE; + } else { + swapVector(_singleVectors[i], 0); + this->_singleVectorsStatus[i] = MultiValueMappingBaseBase::FREE; + } + } + for (uint32_t i = 0; i < this->_vectorVectorsStatus.size(); ++i) { + if (i % Index::alternativeSize() == 0) { + swapVector(_vectorVectors[i], initSize); + this->_vectorVectorsStatus[i] = MultiValueMappingBaseBase::ACTIVE; + } else { + swapVector(_vectorVectors[i], 0); + this->_vectorVectorsStatus[i] = MultiValueMappingBaseBase::FREE; + } + } +} + +template +void +MultiValueMappingT::initVectors(const Histogram &initCapacity) +{ + for (typename Histogram::const_iterator it(initCapacity.begin()), mt(initCapacity.end()); it != mt; ++it) { + uint32_t valueCnt = it->first; + uint64_t numEntries = it->second; + if (valueCnt != 0 && valueCnt < Index::maxValues()) { + uint64_t maxSize = Index::offsetSize() * valueCnt; + if (maxSize > std::numeric_limits::max()) { + maxSize = std::numeric_limits::max(); + maxSize -= (maxSize % valueCnt); + } + if (numEntries * valueCnt > maxSize) { + failNewSize(numEntries * valueCnt, maxSize); + } + swapVector(_singleVectors[valueCnt * 2], valueCnt * numEntries); + } else if (valueCnt == Index::maxValues()) { + uint64_t maxSize = Index::offsetSize(); + if (maxSize > std::numeric_limits::max()) + maxSize = std::numeric_limits::max(); + if (numEntries > maxSize) { + failNewSize(numEntries, maxSize); + } + swapVector(_vectorVectors[0], numEntries); + } + } +} + +template +bool +MultiValueMappingT::getValidIndex(Index &newIdx, uint32_t numValues) +{ + if (numValues == 0) { + newIdx = Index(); + } else if (numValues < Index::maxValues()) { + SingleVectorPtr active = + getSingleVector(numValues, MultiValueMappingBaseBase::ACTIVE); + + if (active.first->remaining() < numValues) { + return false; + } + + uint32_t used = active.first->used(); + assert(used % numValues == 0); + incUsed(*active.first, numValues); + newIdx = Index(active.second.values(), active.second.alternative(), + used / numValues); + } else { + VectorVectorPtr active = + getVectorVector(MultiValueMappingBaseBase::ACTIVE); + + if (active.first->remaining() == 0) { + return false; + } + + uint32_t used = active.first->used(); + incUsed(*active.first, numValues); + (*active.first)[used].resize(numValues); + newIdx = Index(active.second.values(), active.second.alternative(), + used); + } + return true; +} + +template +void +MultiValueMappingT:: +compactSingleVector(SingleVectorPtr &activeVector, + uint32_t valueCnt, + uint64_t newSize, + uint64_t neededEntries, + uint64_t maxSize) +{ + _pendingCompactSingleVector.erase(activeVector.second.values()); + clearPendingCompact(); + SingleVectorPtr freeVector = + getSingleVector(valueCnt, MultiValueMappingBaseBase::FREE); + if (freeVector.first == NULL) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(warning, "did not find any free '%u-vector'", valueCnt); +#endif + uint64_t dead = activeVector.first->dead(); + uint64_t fallbackNewSize = newSize + dead * valueCnt + 1024 * valueCnt; + if (fallbackNewSize > maxSize) + fallbackNewSize = maxSize; + if (fallbackNewSize <= activeVector.first->size() || + fallbackNewSize < activeVector.first->used() + + neededEntries * valueCnt) { + fprintf(stderr, "did not find any free '%u-vector'\n", valueCnt); + abort(); + } + _genHolder.hold(activeVector.first->fallbackResize(fallbackNewSize)); + // When held buffer is freed then pending compact should be set + SingleVectorPtr holdVector = + getSingleVector(valueCnt, MultiValueMappingBaseBase::HOLD); + assert(holdVector.first != NULL); + holdVector.first->setWantCompact(); + return; + } + swapVector(*freeVector.first, newSize); +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "compacting from '%u-vector(%u)' " + "(s = %u, u = %u, d = %u) to " + "'%u-vector(%u)' (s = %u)", + valueCnt, activeVector.second.alternative(), + activeVector.first->size(), + activeVector.first->used() , activeVector.first->dead(), + valueCnt, freeVector.second.alternative(), newSize); +#endif + uint32_t activeVectorIdx = activeVector.second.vectorIdx(); + for (uint32_t i = 0; i < this->_indices.size(); ++i) { + Index & idx = this->_indices[i]; + if (activeVectorIdx == idx.vectorIdx()) { + for (uint32_t j = idx.offset() * idx.values(), + k = freeVector.first->used(); + j < (idx.offset() + 1) * idx.values() && + k < freeVector.first->used() + valueCnt; ++j, ++k) + { + (*freeVector.first)[k] = (*activeVector.first)[j]; + } + assert(freeVector.first->used() % valueCnt == 0); + std::atomic_thread_fence(std::memory_order_release); + this->_indices[i] = Index(freeVector.second.values(), + freeVector.second.alternative(), + freeVector.first->used() / valueCnt); + incUsed(*freeVector.first, valueCnt); + } + } + // active -> hold + this->insertIntoHoldList(activeVector.second); + // free -> active + this->setActiveVector(freeVector.second); + activeVector = freeVector; +} + + +template +void +MultiValueMappingT:: +compactVectorVector(VectorVectorPtr &activeVector, + uint64_t newSize, + uint64_t neededEntries, + uint64_t maxSize) +{ + _pendingCompactVectorVector = false; + clearPendingCompact(); + VectorVectorPtr freeVector = + getVectorVector(MultiValueMappingBaseBase::FREE); + if (freeVector.first == NULL) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(error, "did not find any free vectorvector"); +#endif + uint64_t dead = activeVector.first->dead(); + uint64_t fallbackNewSize = newSize + dead + 1024; + if (fallbackNewSize > maxSize) + fallbackNewSize = maxSize; + if (fallbackNewSize <= activeVector.first->size() || + fallbackNewSize < activeVector.first->used() + neededEntries) { + fprintf(stderr, "did not find any free vectorvector\n"); + abort(); + } + _genHolder.hold(activeVector.first->fallbackResize(fallbackNewSize)); + // When held buffer is freed then pending compact should be set + VectorVectorPtr holdVector = + getVectorVector(MultiValueMappingBaseBase::HOLD); + assert(holdVector.first != NULL); + holdVector.first->setWantCompact(); + return; + } + swapVector(*freeVector.first, newSize); +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "compacting from 'vectorvector(%u)' " + "(s = %u, u = %u, d = %u) to " + "'vectorvector(%u)' (s = %u)", + activeVector.second.alternative(), activeVector.first->size(), + activeVector.first->used(), activeVector.first->dead(), + freeVector.second.alternative(), newSize); +#endif + uint32_t activeVectorIdx = activeVector.second.vectorIdx(); + for (uint32_t i = 0; i < this->_indices.size(); ++i) { + Index & idx = this->_indices[i]; + if (activeVectorIdx == idx.vectorIdx()) { + uint32_t activeOffset = idx.offset(); + uint32_t vecSize = (*activeVector.first)[activeOffset].size(); + uint32_t freeOffset = freeVector.first->used(); + (*freeVector.first)[freeOffset].resize(vecSize); + for (uint32_t j = 0; j < vecSize; ++j) { + (*freeVector.first)[freeOffset][j] = + (*activeVector.first)[activeOffset][j]; + } + std::atomic_thread_fence(std::memory_order_release); + this->_indices[i] = Index(freeVector.second.values(), + freeVector.second.alternative(), + freeVector.first->used()); + incUsed(*freeVector.first, vecSize); + } + } + // active -> hold + this->insertIntoHoldList(activeVector.second); + // free -> active + this->setActiveVector(freeVector.second); + activeVector = freeVector; +} + +template +typename MultiValueMappingT::SingleVectorPtr +MultiValueMappingT::getSingleVector(uint32_t numValues, + VectorStatus status) +{ + for (uint32_t i = numValues * Index::alternativeSize(); + i < (numValues + 1) * Index::alternativeSize(); ++i) + { + if (this->_singleVectorsStatus[i] == status) { + return SingleVectorPtr(&_singleVectors[i], + Index(numValues, + i % Index::alternativeSize(), + 0)); + } + } + return SingleVectorPtr(static_cast(NULL), Index()); +} + +template +typename MultiValueMappingT::VectorVectorPtr +MultiValueMappingT::getVectorVector(VectorStatus status) +{ + for (uint32_t i = 0; i < _vectorVectors.size(); ++i) { + if (this->_vectorVectorsStatus[i] == status) { + return VectorVectorPtr(&_vectorVectors[i], + Index(Index::maxValues(), i, 0)); + } + } + return VectorVectorPtr(static_cast(NULL), Index()); +} + +template +typename MultiValueMappingT::Index +MultiValueMappingT::getIndex(uint32_t numValues, VectorStatus status) +{ + if (numValues < Index::maxValues()) { + return getSingleVector(numValues, status).second; + } else { + return getVectorVector(status).second; + } +} + + +//----------------------------------------------------------------------------- +// implementation of public methods +//----------------------------------------------------------------------------- + +template +MultiValueMappingT::MultiValueMappingT(uint32_t &committedDocIdLimit, + const GrowStrategy & gs) + : MultiValueMappingBase(committedDocIdLimit, 0, gs), + _singleVectors((Index::maxValues()) * Index::alternativeSize()), + _vectorVectors(Index::alternativeSize()) +{ + initVectors(0); +} + +template +MultiValueMappingT::MultiValueMappingT(uint32_t &committedDocIdLimit, + uint32_t numKeys, + uint32_t initSize, + const GrowStrategy & gs) + : MultiValueMappingBase(committedDocIdLimit, numKeys, gs), + _singleVectors((Index::maxValues()) * Index::alternativeSize()), + _vectorVectors(Index::alternativeSize()) +{ + initVectors(initSize); +} + +template +MultiValueMappingT::MultiValueMappingT(uint32_t &committedDocIdLimit, + uint32_t numKeys, + const Histogram & initCapacity, + const GrowStrategy & gs) + : MultiValueMappingBase(committedDocIdLimit, numKeys, gs), + _singleVectors((Index::maxValues()) * Index::alternativeSize()), + _vectorVectors(Index::alternativeSize()) +{ + initVectors(0); + initVectors(initCapacity); +} + +template +MultiValueMappingT::~MultiValueMappingT() +{ + _genHolder.clearHoldLists(); +} + +template +void +MultiValueMappingT::reset(uint32_t numKeys, uint32_t initSize) +{ + MultiValueMappingBase::reset(numKeys); + initVectors(initSize); +} + +template +void +MultiValueMappingT::reset(uint32_t numKeys, + const Histogram &initCapacity) +{ + MultiValueMappingBase::reset(numKeys); + initVectors(0); + initVectors(initCapacity); +} + + +template +uint32_t +MultiValueMappingT::get(uint32_t key, std::vector & buffer) const +{ + return get(key, &buffer[0], buffer.size()); +} + +template +template +uint32_t +MultiValueMappingT::get(uint32_t key, + BufferType * buffer, + uint32_t sz) const +{ + Index idx = this->_indices[key]; + if (idx.values() < Index::maxValues()) { + uint32_t available = idx.values(); + uint32_t num2Read = std::min(available, sz); + const SingleVector & vec = _singleVectors[idx.vectorIdx()]; + for (uint32_t i = 0, j = idx.offset() * idx.values(); + i < num2Read && j < (idx.offset() + 1) * idx.values(); ++i, ++j) { + buffer[i] = static_cast(vec[j]); + } + return available; + } else { + const VectorBase & vec = + _vectorVectors[idx.alternative()][idx.offset()]; + uint32_t available = vec.size(); + uint32_t num2Read = std::min(available, sz); + for (uint32_t i = 0; i < num2Read; ++i) { + buffer[i] = static_cast(vec[i]); + } + return available; + } +} + +template +bool +MultiValueMappingT::get(uint32_t key, uint32_t index, T & value) const +{ + if (!this->hasReaderKey(key)) { + return false; + } + Index idx = this->_indices[key]; + if (idx.values() < Index::maxValues()) { + if (index >= idx.values()) { + return false; + } + uint32_t offset = idx.offset() * idx.values() + index; + value = _singleVectors[idx.vectorIdx()][offset]; + return true; + } else { + if (index >= _vectorVectors[idx.alternative()][idx.offset()].size()) { + return false; + } + value = _vectorVectors[idx.alternative()][idx.offset()][index]; + return true; + } + return false; +} + +template +inline uint32_t +MultiValueMappingT::getValueCount(uint32_t key) const +{ + if (!this->hasReaderKey(key)) { + return 0; + } + Index idx = this->_indices[key]; + if (idx.values() < Index::maxValues()) { + return idx.values(); + } else { + return _vectorVectors[idx.alternative()][idx.offset()].size(); + } +} + +template +void +MultiValueMappingT::set(uint32_t key, const std::vector & values) +{ + set(key, &values[0], values.size()); +} + +template +void +MultiValueMappingT::set(uint32_t key, + const T * values, + uint32_t numValues) +{ + if (!this->hasKey(key)) { + abort(); + } + + Index oldIdx = this->_indices[key]; + Index newIdx; + if (!getValidIndex(newIdx, numValues)) { + abort(); + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "newIdx: values = %u, alternative = %u, offset = %u", + newIdx.values(), newIdx.alternative(), newIdx.offset()); +#endif + + if (newIdx.values() != 0 && newIdx.values() < Index::maxValues()) { + SingleVector & vec = _singleVectors[newIdx.vectorIdx()]; + for (uint32_t i = newIdx.offset() * newIdx.values(), j = 0; + i < (newIdx.offset() + 1) * newIdx.values() && j < numValues; + ++i, ++j) + { + vec[i] = values[j]; + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "inserted in '%u-vector(%u)': " + "key = %u, size = %u, used = %u, dead = %u, offset = %u", + newIdx.values(), newIdx.alternative(), + key, vec.size(), + vec.used(), vec.dead(), newIdx.offset() * newIdx.values()); +#endif + } else if (newIdx.values() == Index::maxValues()) { + VectorVector & vec = _vectorVectors[newIdx.alternative()]; + for (uint32_t i = 0; i < numValues; ++i) { + vec[newIdx.offset()][i] = values[i]; + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "inserted %u values in 'vector-vector(%u)': " + "key = %u, size = %u, used = %u, dead = %u, offset = %u", + numValues, newIdx.alternative(), + key, vec.size(), vec.used(), vec.dead(), newIdx.offset()); +#endif + } + + std::atomic_thread_fence(std::memory_order_release); + this->_indices[key] = newIdx; + this->incValueCnt(numValues); + + // mark space in oldIdx as dead; + if (oldIdx.values() != 0 && oldIdx.values() < Index::maxValues()) { + SingleVector & vec = _singleVectors[oldIdx.vectorIdx()]; + incDead(vec, oldIdx.values()); + this->decValueCnt(oldIdx.values()); +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "mark space dead in '%u-vector(%u)': " + "size = %u, used = %u, dead = %u", + oldIdx.values(), oldIdx.alternative(), + vec.size(), vec.used(), vec.dead()); +#endif + } else if (oldIdx.values() == Index::maxValues()) { + VectorVector & vec = _vectorVectors[oldIdx.alternative()]; + uint32_t oldNumValues = vec[oldIdx.offset()].size(); + incDead(vec); + this->decValueCnt(oldNumValues); + holdElem(oldIdx, sizeof(VectorBase) + sizeof(T) * oldNumValues); +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, + "mark space dead in 'vector-vector(%u)': " + "size = %u, used = %u, dead = %u", + oldIdx.alternative(), vec.size(), vec.used(), vec.dead()); +#endif + } +} + +template +void +MultiValueMappingT::replace(uint32_t key, const std::vector & values) +{ + /* XXX: Unsafe operation, reader gets inconsistent view */ + replace(key, &values[0], values.size()); +} + +template +void +MultiValueMappingT::replace(uint32_t key, + const T * values, uint32_t numValues) +{ + /* XXX: Unsafe operation, reader gets inconsistent view */ + if (!this->hasKey(key)) { + abort(); + } + + Index currIdx = this->_indices[key]; + + if (currIdx.values() != 0 && currIdx.values() < Index::maxValues()) { + SingleVector & vec = _singleVectors[currIdx.vectorIdx()]; + for (uint32_t i = currIdx.offset() * currIdx.values(), j = 0; + i < (currIdx.offset() + 1) * currIdx.values() && j < numValues; + ++i, ++j) + { + vec[i] = values[j]; + } + } else if (currIdx.values() == Index::maxValues()) { + VectorBase & vec = + _vectorVectors[currIdx.alternative()][currIdx.offset()]; + for (uint32_t i = 0; i < vec.size() && i < numValues; ++i) { + vec[i] = values[i]; + } + } +} + + +template +void MultiValueMappingT::clearVector(Index idx) +{ + if (idx.values() < Index::maxValues()) { + SingleVector &vec = _singleVectors[idx.vectorIdx()]; + if (vec.getWantCompact()) { + _pendingCompactSingleVector.insert(idx.values()); + _pendingCompact = true; + } + vec = SingleVector(); + } else { + VectorVector &vec = _vectorVectors[idx.alternative()]; + if (vec.getWantCompact()) { + _pendingCompactVectorVector = true; + _pendingCompact = true; + } + vec = VectorVector(); + } +} + + +template +void +MultiValueMappingT::doneHoldElem(Index idx) +{ + assert(idx.values() == Index::maxValues()); + VectorVector &vv = _vectorVectors[idx.alternative()]; + VectorBase &v = vv[idx.offset()]; + uint32_t numValues = v.size(); + VectorBase().swap(v); + vv.getUsage().decAllocatedBytes(numValues * sizeof(T)); + vv.getUsage().incDeadBytes(sizeof(VectorBase)); +} + + +template +const MemoryUsage & +MultiValueMappingT::getSingleVectorUsage(size_t i) const +{ + return _singleVectors[i].getUsage(); +} + +template +const MemoryUsage & +MultiValueMappingT::getVectorVectorUsage(size_t i) const +{ + return _vectorVectors[i].getUsage(); +} + +template +size_t +MultiValueMappingT::getSingleVectorAddressSpaceUsed(size_t i) const +{ + if (i < Index::alternativeSize()) { + return 0; + } + size_t numValues = i / Index::alternativeSize(); + size_t actualUsed = _singleVectors[i].used() - _singleVectors[i].dead(); + return (actualUsed / numValues); +} + +template +size_t +MultiValueMappingT::getVectorVectorAddressSpaceUsed(size_t i) const +{ + return _vectorVectors[i].used() - _vectorVectors[i].dead(); +} + +template +typename MultiValueMappingT::Histogram +MultiValueMappingT::getRemaining() +{ + Histogram result(Index::maxValues()); + result[0] = 0; + for (uint32_t key = 1; key < Index::maxValues(); ++key) { + SingleVectorPtr active = + getSingleVector(key, MultiValueMappingBaseBase::ACTIVE); + result[key] = active.first->remaining() / key; + } + VectorVectorPtr active = + getVectorVector(MultiValueMappingBaseBase::ACTIVE); + result[Index::maxValues()] = active.first->remaining(); + return result; +} + +template +bool +MultiValueMappingT::enoughCapacity(const Histogram & capacityNeeded) +{ + if (_pendingCompact) + return false; + for (typename Histogram::const_iterator it(capacityNeeded.begin()), mt(capacityNeeded.end()); it != mt; ++it) { + uint32_t valueCnt = it->first; + uint64_t numEntries = it->second; + if (valueCnt < Index::maxValues()) { + SingleVectorPtr active = + getSingleVector(valueCnt, MultiValueMappingBaseBase::ACTIVE); + if (active.first->remaining() < numEntries * valueCnt) { + return false; + } + } else if (valueCnt == Index::maxValues()) { + VectorVectorPtr active = + getVectorVector(MultiValueMappingBaseBase::ACTIVE); + if (active.first->remaining() < numEntries) { + return false; + } + } + } + return true; +} + +template +void +MultiValueMappingT::performCompaction(Histogram & capacityNeeded) +{ +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "performCompaction()"); +#endif + if (_pendingCompact) { + // Further populate histogram to ensure pending compaction being done. + for (std::set::const_iterator + pit(_pendingCompactSingleVector.begin()), + pmt(_pendingCompactSingleVector.end()); + pit != pmt; ++pit) { + (void) capacityNeeded[*pit]; + } + if (_pendingCompactVectorVector) { + (void) capacityNeeded[Index::maxValues()]; + } + } + for (typename Histogram::const_iterator it(capacityNeeded.begin()), mt(capacityNeeded.end()); it != mt; ++it) { + uint32_t valueCnt = it->first; + uint64_t numEntries = it->second; + if (valueCnt != 0 && valueCnt < Index::maxValues()) { + SingleVectorPtr active = + getSingleVector(valueCnt, MultiValueMappingBaseBase::ACTIVE); + + if (active.first->remaining() < valueCnt * numEntries || + _pendingCompactSingleVector.find(valueCnt) != + _pendingCompactSingleVector.end()) { + uint64_t maxSize = Index::offsetSize() * valueCnt; + if (maxSize > std::numeric_limits::max()) { + maxSize = std::numeric_limits::max(); + maxSize -= (maxSize % valueCnt); + } + uint64_t newSize = this->computeNewSize(active.first->used(), + active.first->dead(), + valueCnt * numEntries, + maxSize); + compactSingleVector(active, valueCnt, newSize, + numEntries, maxSize); + } + } else if (valueCnt == Index::maxValues()) { + VectorVectorPtr active = + getVectorVector(MultiValueMappingBaseBase::ACTIVE); + + if (active.first->remaining() < numEntries || + _pendingCompactVectorVector) { + uint64_t maxSize = Index::offsetSize(); + if (maxSize > std::numeric_limits::max()) + maxSize = std::numeric_limits::max(); + uint64_t newSize = this->computeNewSize(active.first->used(), + active.first->dead(), + numEntries, + maxSize); + compactVectorVector(active, newSize, + numEntries, maxSize); + } + } + } + assert(!_pendingCompact); +} + +#ifdef DEBUG_MULTIVALUE_MAPPING +template +void +MultiValueMappingT::printContent() const +{ + for (uint32_t key = 0; key < this->_indices.size(); ++key) { + std::vector buffer(getValueCount(key)); + get(key, buffer); + std::cout << "key = " << key << ", count = " << + getValueCount(key) << ": "; + for (uint32_t i = 0; i < buffer.size(); ++i) { + std::cout << buffer[i] << ", "; + } + std::cout << '\n'; + } +} + +template +void +MultiValueMappingT::printVectorVectors() const +{ + for (uint32_t i = 0; i < _vectorVectors.size(); ++i) { + std::cout << "Alternative " << i << '\n'; + for (uint32_t j = 0; j < _vectorVectors[i].size(); ++j) { + std::cout << "Vector " << j << ": ["; + uint32_t size = _vectorVectors[i][j].size(); + for (uint32_t k = 0; k < size; ++k) { + std::cout << _vectorVectors[i][j][k] << ", "; + } + std::cout << "]\n"; + } + } +} +#endif + +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; +extern template class MultiValueMappingFallbackVectorHold< + MultiValueMappingVector > >::VectorBase >; + +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::Value >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + multivalue::WeightedValue >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; +extern template class MultiValueMappingVector< + vespalib::Array > >; + +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT< + multivalue::Value >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT >; +extern template class MultiValueMappingT< + multivalue::WeightedValue >; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT< + multivalue::Value, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT, + multivalue::Index64>; +extern template class MultiValueMappingT< + multivalue::WeightedValue, + multivalue::Index64>; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp new file mode 100644 index 00000000000..a1e06ee4759 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multivaluemapping.hpp @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + + +template +template +uint32_t +MultiValueMappingT::fillMapped(AttributeVector::ReaderBase &attrReader, + uint64_t numValues, + const V *map, + size_t mapSize, + Saver &saver, + uint32_t numDocs, + bool hasWeights) +{ + typedef AttributeVector::DocId DocId; + Histogram capacityNeeded = this->getHistogram(attrReader); + reset(numDocs, capacityNeeded); + attrReader.rewind(); + std::vector indices; + uint64_t di = 0; + uint32_t maxvc = 0; + for (DocId doc = 0; doc < numDocs; ++doc) { + indices.clear(); + uint32_t vc = attrReader.getNextValueCount(); + indices.reserve(vc); + for (uint32_t vci = 0; vci < vc; ++vci, ++di) { + uint32_t e = attrReader.getNextEnum(); + assert(e < mapSize); + (void) mapSize; + int32_t weight = hasWeights ? attrReader.getNextWeight() : 1; + indices.push_back(T(map[e], weight)); + saver.save(e, doc, vci, weight); + } + if (maxvc < indices.size()) + maxvc = indices.size(); + set(doc, indices); + } + assert(di == numValues); + (void) numValues; + return maxvc; +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h new file mode 100644 index 00000000000..73f0491d808 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h @@ -0,0 +1,182 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributevector.h" + +namespace search { + +struct NotImplementedAttribute : AttributeVector { + using AttributeVector::AttributeVector; + + virtual void notImplemented() const __attribute__((noinline)) { + assert(false); + throw vespalib::IllegalStateException( + "The function is not implemented."); + } + + virtual uint32_t + getValueCount(DocId) const + { + notImplemented(); + return 0; + } + + virtual largeint_t + getInt(DocId) const + { + notImplemented(); + return 0; + } + + virtual double + getFloat(DocId) const + { + notImplemented(); + return 0; + } + + virtual const char * + getString(DocId, char *, size_t) const + { + notImplemented(); + return NULL; + } + + virtual uint32_t + get(DocId, largeint_t *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, double *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, vespalib::string *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, const char **, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, EnumHandle *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, WeightedInt *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, WeightedFloat *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, WeightedString *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, WeightedConstChar *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + get(DocId, WeightedEnum *, uint32_t) const + { + notImplemented(); + return 0; + } + + virtual bool + findEnum(const char *, EnumHandle &) const + { + notImplemented(); + return false; + } + + virtual long + onSerializeForAscendingSort(DocId, void *, long, + const common::BlobConverter *) const + { + notImplemented(); + return 0; + } + + virtual long + onSerializeForDescendingSort(DocId, void *, long, + const common::BlobConverter *) const + { + notImplemented(); + return 0; + } + + virtual uint32_t + clearDoc(DocId) + { + notImplemented(); + return 0; + } + + virtual int64_t + getDefaultValue() const + { + notImplemented(); + return 0; + } + + virtual uint32_t + getEnum(DocId) const + { + notImplemented(); + return 0; + } + + virtual void + getEnumValue(const EnumHandle *, uint32_t *, uint32_t) const + { + notImplemented(); + } + + virtual bool + addDoc(DocId &) + { + notImplemented(); + return false; + } + + SearchContext::UP + getSearch(QueryTermSimple::UP, const SearchContext::Params &) const override + { + notImplemented(); + return SearchContext::UP(); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/numericbase.cpp b/searchlib/src/vespa/searchlib/attribute/numericbase.cpp new file mode 100644 index 00000000000..cbc8bde655e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/numericbase.cpp @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "numericbase.h" +#include +#include "enumstorebase.h" +#include +#include + +LOG_SETUP(".searchlib.attribute.numericbase"); + +namespace search +{ + +IMPLEMENT_IDENTIFIABLE_ABSTRACT(NumericAttribute, AttributeVector); + +using attribute::LoadedEnumAttributeVector; + +void +NumericAttribute::fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs) +{ + (void) src; + (void) srcLen; + (void) eidxs; + fprintf(stderr, "NumericAttribute::fillEnum0\n"); +} + + +void +NumericAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + LoadedEnumAttributeVector &loaded) +{ + (void) attrReader; + (void) numValues; + (void) eidxs; + (void) loaded; + fprintf(stderr, "NumericAttribute::fillEnumIdx (loaded)\n"); +} + + +void +NumericAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist) +{ + (void) attrReader; + (void) numValues; + (void) eidxs; + (void) enumHist; + fprintf(stderr, "NumericAttribute::fillEnumIdx (enumHist)\n"); +} + + +void +NumericAttribute::fillPostingsFixupEnum(const LoadedEnumAttributeVector & + loaded) +{ + (void) loaded; + fprintf(stderr, "NumericAttribute::fillPostingsFixupEnum\n"); +} + +void +NumericAttribute::fixupEnumRefCounts(const EnumVector &enumHist) +{ + (void) enumHist; + fprintf(stderr, "NumericAttribute::fixupEnumRefCounts\n"); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/numericbase.h b/searchlib/src/vespa/searchlib/attribute/numericbase.h new file mode 100644 index 00000000000..d7b45b73574 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/numericbase.h @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributevector.h" +#include +#include +#include "loadedenumvalue.h" + +namespace search { + +class NumericAttribute : public AttributeVector +{ +protected: + typedef EnumStoreBase::Index EnumIndex; + typedef EnumStoreBase::IndexVector EnumIndexVector; + typedef EnumStoreBase::EnumVector EnumVector; + + NumericAttribute(const vespalib::string & name, + const AttributeVector::Config & cfg) + : AttributeVector(name, cfg) + { + } + + virtual void + fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + attribute::LoadedEnumAttributeVector &loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist); + + virtual void + fillPostingsFixupEnum(const attribute::LoadedEnumAttributeVector &loaded); + + virtual void + fixupEnumRefCounts(const EnumVector &enumHist); + + template + class Equal + { + private: + T _value; + bool _valid; + protected: + Equal(QueryTermSimple &queryTerm, bool avoidUndefinedInRange = false) + : _value(0), + _valid(false) + { + (void) avoidUndefinedInRange; + QueryTermSimple::RangeResult res = queryTerm.getRange(); + _valid = res.valid && res.isEqual() && !res.adjusted; + _value = res.high; + } + bool isValid() const { return _valid; } + bool match(T v) const { return v == _value; } + Int64Range getRange() const { + return Int64Range(static_cast(_value)); + } + }; + + template + class Range + { + protected: + T _low; + T _high; + private: + bool _valid; + int _limit; + size_t _max_per_group; + protected: + Range(const QueryTermSimple & queryTerm, + bool avoidUndefinedInRange=false) + : _low(0), + _high(0), + _valid(false) + { + QueryTermSimple::RangeResult res = queryTerm.getRange(); + _valid = res.isEqual() ? (res.valid && !res.adjusted) : res.valid; + _low = res.low; + _high = res.high; + _limit = queryTerm.getRangeLimit(); + _max_per_group = queryTerm.getMaxPerGroup(); + if (_valid && avoidUndefinedInRange && + _low == std::numeric_limits::min()) { + _low += 1; + } + } + Int64Range + getRange() const + { + return Int64Range(static_cast(_low), + static_cast(_high)); + } + bool isValid() const { return _valid; } + bool match(T v) const { return (_low <= v) && (v <= _high); } + int getRangeLimit() const { return _limit; } + size_t getMaxPerGroup() const { return _max_per_group; } + + template + search::Range + cappedRange(bool isFloat, bool isUnsigned) + { + BaseType low = static_cast(_low); + BaseType high = static_cast(_high); + + BaseType numMin = std::numeric_limits::min(); + BaseType numMax = std::numeric_limits::max(); + + if (isFloat) + { + if (_low <= (-numMax)) { + low = -numMax; + } + } else { + if (_low <= (numMin)) { + if (isUnsigned) { + low = numMin; + } else { + low = numMin + 1; // we must avoid the undefined value + } + } + } + + if (_high >= (numMax)) { + high = numMax; + } + return search::Range(low, high); + } + + }; +public: + DECLARE_IDENTIFIABLE_ABSTRACT(NumericAttribute); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postingchange.cpp b/searchlib/src/vespa/searchlib/attribute/postingchange.cpp new file mode 100644 index 00000000000..2731fb0157d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postingchange.cpp @@ -0,0 +1,275 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "postingchange.h" +#include "multivaluemapping.h" +#include "postinglistattribute.h" +#include +#include + +namespace search { + +namespace +{ + +void +removeDupAdditions(PostingChange::A &additions) +{ + typedef PostingChange::A::iterator Iterator; + if (additions.empty()) + return; + if (additions.size() == 1) + return; + std::sort(additions.begin(), additions.end()); + Iterator i = additions.begin(); + Iterator ie = additions.end(); + Iterator d = i; + for (++i; i != ie; ++i, ++d) { + if (d->_key == i->_key) + break; + } + if (i == ie) + return; // no dups found + for (++i; i != ie; ++i) { + if (d->_key != i->_key) { + ++d; + *d = *i; + } + } + additions.resize(d - additions.begin() + 1); +} + + +void +removeDupAdditions(PostingChange::A &additions) +{ + typedef PostingChange::A::iterator Iterator; + if (additions.empty()) + return; + if (additions.size() == 1u) + return; + std::sort(additions.begin(), additions.end()); + Iterator i = additions.begin(); + Iterator ie = additions.end(); + Iterator d = i; + for (++i; i != ie; ++i, ++d) { + if (d->_key == i->_key) + break; + } + if (i == ie) + return; // no dups found + // sum weights together + d->setData(d->getData() + i->getData()); + for (++i; i != ie; ++i) { + if (d->_key != i->_key) { + ++d; + *d = *i; + } else { + // sum weights together + d->setData(d->getData() + i->getData()); + } + } + additions.resize(d - additions.begin() + 1); +} + +void +removeDupRemovals(std::vector &removals) +{ + typedef std::vector::iterator Iterator; + if (removals.empty()) + return; + if (removals.size() == 1u) + return; + std::sort(removals.begin(), removals.end()); + Iterator i = removals.begin(); + Iterator ie = removals.end(); + Iterator d = i; + for (++i; i != ie; ++i, ++d) { + if (*d == *i) + break; + } + if (i == ie) + return; // no dups found + for (++i; i != ie; ++i) { + if (*d != *i) { + ++d; + *d = *i; + } + } + removals.resize(d - removals.begin() + 1); +} + +} + +EnumStoreBase::Index +EnumIndexMapper::map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const +{ + (void) compare; + return original; +} + +template <> +void +PostingChange::removeDups(void) +{ + removeDupAdditions(_additions); + removeDupRemovals(_removals); +} + + +template <> +void +PostingChange::removeDups(void) +{ + removeDupAdditions(_additions); + removeDupRemovals(_removals); +} + + +template +void +PostingChange

::apply(GrowableBitVector &bv) +{ + P *a = &_additions[0]; + P *ae = &_additions[0] + _additions.size(); + uint32_t *r = &_removals[0]; + uint32_t *re = &_removals[0] + _removals.size(); + + while (a != ae || r != re) { + if (r != re && (a == ae || *r < a->_key)) { + // remove + assert(*r < bv.size()); + bv.slowClearBit(*r); + ++r; + } else { + if (r != re && !(a->_key < *r)) { + // update or add + assert(a->_key < bv.size()); + bv.slowSetBit(a->_key); + ++r; + } else { + assert(a->_key < bv.size()); + bv.slowSetBit(a->_key); + } + ++a; + } + } +} + +template +class ActualChangeComputer { +public: + typedef std::vector V; + void compute(const WeightedIndex * entriesNew, size_t szNew, + const WeightedIndex * entriesOld, size_t szOld, + V & added, V & changed, V & removed) const; +private: + mutable V _oldEntries; + mutable V _newEntries; +}; + +template +void +ActualChangeComputer::compute(const WeightedIndex * entriesNew, size_t szNew, + const WeightedIndex * entriesOld, size_t szOld, + V & added, V & changed, V & removed) const +{ + _newEntries.reserve(szNew); + _oldEntries.reserve(szOld); + _newEntries.clear(); + _oldEntries.clear(); + _newEntries.insert(_newEntries.begin(), entriesNew, entriesNew + szNew); + _oldEntries.insert(_oldEntries.begin(), entriesOld, entriesOld + szOld); + std::sort(_newEntries.begin(), _newEntries.end()); + std::sort(_oldEntries.begin(), _oldEntries.end()); + auto newIt(_newEntries.begin()), oldIt(_oldEntries.begin()); + while (newIt != _newEntries.end() && oldIt != _oldEntries.end()) { + if (newIt->value() == oldIt->value()) { + if (newIt->weight() != oldIt->weight()) { + changed.push_back(*newIt); + } + newIt++, oldIt++; + } else if (newIt->value() < oldIt->value()) { + added.push_back(*newIt++); + } else { + removed.push_back(*oldIt++); + } + } + added.insert(added.end(), newIt, _newEntries.end()); + removed.insert(removed.end(), oldIt, _oldEntries.end()); +} + +template +template +PostingMap +PostingChangeComputerT:: +compute(const MultivalueMapping & mvm, const DocIndices & docIndices, + const EnumStoreComparator & compare, const EnumIndexMapper & mapper) +{ + typedef ActualChangeComputer AC; + AC actualChange; + typename AC::V added, changed, removed; + PostingMap changePost; + + // generate add postings and remove postings + for (const auto & docIndex : docIndices) { + const WeightedIndex * oldIndices = NULL; + uint32_t valueCount = mvm.get(docIndex.first, oldIndices); + added.clear(), changed.clear(), removed.clear(); + actualChange.compute(&docIndex.second[0], docIndex.second.size(), oldIndices, valueCount, + added, changed, removed); + for (const WeightedIndex & wi : added) { + changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].add(docIndex.first, wi.weight()); + } + for (const WeightedIndex & wi : removed) { + changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].remove(docIndex.first); + } + for (const WeightedIndex & wi : changed) { + changePost[EnumPostingPair(mapper.map(wi.value(), compare), &compare)].remove(docIndex.first).add(docIndex.first, wi.weight()); + } + } + return changePost; +} + +template class PostingChange; + +template class PostingChange; + +typedef PostingChange > WeightedPostingChange; +typedef std::map WeightedPostingChangeMap; +typedef EnumStoreBase::Index EnumIndex; +typedef multivalue::WeightedValue WeightedIndex; +typedef multivalue::Value ValueIndex; + +typedef MultiValueMappingT NormalWeightedMultiValueMapping; +typedef MultiValueMappingT HugeWeightedMultiValueMapping; +typedef MultiValueMappingT NormalValueMultiValueMapping; +typedef MultiValueMappingT HugeValueMultiValueMapping; +typedef std::vector>> DocIndicesWeighted; +typedef std::vector>> DocIndicesValue; + +template WeightedPostingChangeMap PostingChangeComputerT + ::compute(const NormalWeightedMultiValueMapping &, + const DocIndicesWeighted &, + const EnumStoreComparator &, + const EnumIndexMapper &); + +template WeightedPostingChangeMap PostingChangeComputerT + ::compute(const HugeWeightedMultiValueMapping &, + const DocIndicesWeighted &, + const EnumStoreComparator &, + const EnumIndexMapper &); + +template WeightedPostingChangeMap PostingChangeComputerT + ::compute(const NormalValueMultiValueMapping &, + const DocIndicesValue &, + const EnumStoreComparator &, + const EnumIndexMapper &); + +template WeightedPostingChangeMap PostingChangeComputerT + ::compute(const HugeValueMultiValueMapping &, + const DocIndicesValue &, + const EnumStoreComparator &, + const EnumIndexMapper &); + +} diff --git a/searchlib/src/vespa/searchlib/attribute/postingchange.h b/searchlib/src/vespa/searchlib/attribute/postingchange.h new file mode 100644 index 00000000000..8309cf91516 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postingchange.h @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "postinglisttraits.h" +#include "enumstorebase.h" + +namespace search +{ + +class GrowableBitVector; + +/** + * Class representing changes to a posting list for a single value. + */ +template +class PostingChange +{ +public: + typedef vespalib::Array A; + typedef std::vector R; + A _additions; + R _removals; + + inline void add(uint32_t docId, int32_t weight); + + PostingChange & remove(uint32_t docId) { + _removals.push_back(docId); + return *this; + } + + void clear(void) { + _additions.clear(); + _removals.clear(); + } + + /* + * Remove duplicates in additions and removals vectors, since new + * posting list tree doesn't support duplicate entries. + */ + void removeDups(void); + void apply(GrowableBitVector &bv); +}; + +class EnumIndexMapper +{ +public: + virtual ~EnumIndexMapper() { } + virtual EnumStoreBase::Index map(EnumStoreBase::Index original, const EnumStoreComparator & compare) const; +}; + +template +class PostingChangeComputerT +{ +private: + typedef std::vector>> DocIndices; +public: + template + static PostingMap compute(const MultivalueMapping & mvm, const DocIndices & docIndices, + const EnumStoreComparator & compare, const EnumIndexMapper & mapper); +}; + +template <> +inline void +PostingChange::add(uint32_t docId, int32_t weight) +{ + (void) weight; + _additions.push_back(AttributePosting(docId, + btree::BTreeNoLeafData())); +} + + +template <> +inline void +PostingChange::add(uint32_t docId, int32_t weight) +{ + _additions.push_back(AttributeWeightPosting(docId, weight)); +} + + +} // namespace search + + + + diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp new file mode 100644 index 00000000000..2d79e80142a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -0,0 +1,451 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "postinglistattribute.h" +#include "loadednumericvalue.h" +#include "loadedstringvalue.h" +#include "enumcomparator.h" + +namespace search +{ + +using attribute::LoadedNumericValue; + +template +PostingListAttributeBase

:: +PostingListAttributeBase(AttributeVector &attr, + EnumStoreBase &enumStore) + : attribute::IPostingListAttributeBase(), + _postingList(enumStore.getPostingDictionary(), attr.getStatus(), + attr.getConfig()), + _attr(attr), + _dict(enumStore.getPostingDictionary()), + _esb(enumStore) +{ +} + + +template +PostingListAttributeBase

::~PostingListAttributeBase() +{ +} + + +template +void +PostingListAttributeBase

::clearAllPostings() +{ + _postingList.clearBuilder(); + _attr.incGeneration(); // Force freeze + EnumPostingTree::Iterator itr(_dict.begin()); + EntryRef prev; + while (itr.valid()) { + EntryRef ref = itr.getData(); + if (ref.ref() != prev.ref()) { + if (ref.valid()) { + _postingList.clear(ref); + } + prev = ref; + } + itr.writeData(EntryRef()); + ++itr; + } + _attr.incGeneration(); // Force freeze +} + + +template +void +PostingListAttributeBase

::fillPostingsFixupEnumBase( + const LoadedEnumAttributeVector &loaded) +{ + clearAllPostings(); + uint32_t docIdLimit = _attr.getNumDocs(); + EnumStoreBase &enumStore = _esb; + EntryRef newIndex; + PostingChange

postings; + if ( loaded.empty() ) + return; + typedef LoadedEnumAttributeVector::const_iterator LoadedIT; + uint32_t preve = 0; + uint32_t refCount = 0; + typedef EnumPostingTree::Iterator DictIT; + + DictIT di(_dict.begin()); + DictIT pdi(di); + assert(di.valid()); + for(LoadedIT i(loaded.begin()), ie(loaded.end()); i != ie; ++i) { + if (preve != i->getEnum()) { + assert(preve < i->getEnum()); + enumStore.fixupRefCount(di.getKey(), refCount); + refCount = 0; + while (preve != i->getEnum()) { + ++di; + assert(di.valid()); + ++preve; + } + assert(di.valid()); + if (enumStore.foldedChange(pdi.getKey(), di.getKey())) { + postings.removeDups(); + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + + postings._removals.size()); + pdi.writeData(newIndex); + while (pdi != di) { + ++pdi; + } + postings.clear(); + } + } + ++refCount; + assert(i->getDocId() < docIdLimit); + (void) docIdLimit; + postings.add(i->getDocId(), i->getWeight()); + } + assert(refCount != 0); + enumStore.fixupRefCount(di.getKey(), refCount); + postings.removeDups(); + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + postings._removals.size()); + pdi.writeData(newIndex); + enumStore.freeUnusedEnums(false); +} + + +template +void +PostingListAttributeBase

::updatePostings(PostingMap &changePost, + EnumStoreComparator &cmp) +{ + for (typename PostingMap::iterator + it(changePost.begin()), mt(changePost.end()); it != mt; it++) { + PostingChange

&change(it->second); + EnumIndex idx(it->first.getEnumIdx()); + typename EnumPostingTree::Iterator dictItr = + _dict.lowerBound(idx, cmp); + assert(dictItr.valid() && dictItr.getKey() == idx); + EntryRef newPosting = dictItr.getData(); + + change.removeDups(); + _postingList.apply(newPosting, + &change._additions[0], + &change._additions[0] + change._additions.size(), + &change._removals[0], + &change._removals[0] + change._removals.size()); + + _dict.thaw(dictItr); + dictItr.writeData(newPosting); + } +} + + +template +bool +PostingListAttributeBase

::forwardedOnAddDoc(DocId doc, + size_t wantSize, + size_t wantCapacity) +{ + if (!_postingList._enableBitVectors) + return false; + if (doc >= wantSize) + wantSize = doc + 1; + if (doc >= wantCapacity) + wantCapacity = doc + 1; + return _postingList.resizeBitVectors(wantSize, wantCapacity); +} + + +template +void +PostingListAttributeBase

:: +clearPostings(attribute::IAttributeVector::EnumHandle eidx, + uint32_t fromLid, + uint32_t toLid, + EnumStoreComparator &cmp) +{ + PostingChange

postings; + + for (uint32_t lid = fromLid; lid < toLid; ++lid) { + postings.remove(lid); + } + + typedef EnumPostingTree::Iterator DictIT; + EntryRef er(eidx); + DictIT di(_dict.lowerBound(er, cmp)); + assert(di.valid()); + + EntryRef newPosting = di.getData(); + assert(newPosting.valid()); + + _postingList.apply(newPosting, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + + postings._removals.size()); + _dict.thaw(di); + di.writeData(newPosting); +} + + +template +void +PostingListAttributeBase

::forwardedShrinkLidSpace(uint32_t newSize) +{ + (void) _postingList.resizeBitVectors(newSize, newSize); +} + + +template +PostingListAttributeSubBase:: +PostingListAttributeSubBase(AttributeVector &attr, + EnumStore &enumStore) + : Parent(attr, enumStore), + _es(enumStore) +{ +} + + +template +PostingListAttributeSubBase:: +~PostingListAttributeSubBase(void) +{ +} + + +template +void +PostingListAttributeSubBase:: +handleFillPostings(LoadedVector &loaded) +{ + clearAllPostings(); + EntryRef newIndex; + PostingChange

postings; + uint32_t docIdLimit = _attr.getNumDocs(); + _postingList.resizeBitVectors(docIdLimit, docIdLimit); + if ( ! loaded.empty() ) { + vespalib::Array similarValues; + typename LoadedVector::Type v = loaded.read(); + LoadedValueType prev = v.getValue(); + for(size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) { + v = loaded.read(); + if (FoldedComparatorType::compareFolded(prev, v.getValue()) == 0) { + // for single value attributes loaded[numDocs] is used + // for default value but we don't want to add an + // invalid docId to the posting list. + if (v._docId < docIdLimit) { + postings.add(v._docId, v.getWeight()); + similarValues.push_back(v); + } + } else { + postings.removeDups(); + + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + + postings._removals.size()); + postings.clear(); + if (v._docId < docIdLimit) { + postings.add(v._docId, v.getWeight()); + } + similarValues[0]._pidx = newIndex; + for(size_t j(0), k(similarValues.size()); j < k; j++) { + loaded.write(similarValues[j]); + } + similarValues.clear(); + similarValues.push_back(v); + prev = v.getValue(); + } + } + + postings.removeDups(); + newIndex = EntryRef(); + _postingList.apply(newIndex, + &postings._additions[0], + &postings._additions[0] + + postings._additions.size(), + &postings._removals[0], + &postings._removals[0] + postings._removals.size()); + similarValues[0]._pidx = newIndex; + for(size_t i(0), m(similarValues.size()); i < m; i++) { + loaded.write(similarValues[i]); + } + } +} + + +template +void +PostingListAttributeSubBase:: +updatePostings(PostingMap &changePost) +{ + FoldedComparatorType cmpa(_es); + + updatePostings(changePost, cmpa); +} + + +template +void +PostingListAttributeSubBase:: +printPostingListContent(vespalib::asciistream & os) const +{ + for (DictionaryIterator itr = _es.getPostingDictionary().begin(); + itr.valid(); ++itr) { + EnumIndex enumIdx = itr.getKey(); + os << "PostingList["; + _es.printValue(os, enumIdx); + os << "]: {"; + + EntryRef postIdx = itr.getData(); + PostingIterator postings = _postingList.begin(postIdx); + for (; postings.valid(); ++postings) { + os << postings.getKey() << ", "; + } + os << "}\n"; + } +} + + +template +void +PostingListAttributeSubBase:: +clearPostings(attribute::IAttributeVector::EnumHandle eidx, + uint32_t fromLid, + uint32_t toLid) +{ + FoldedComparatorType cmp(_es); + clearPostings(eidx, fromLid, toLid, cmp); +} + + + +template class PostingListAttributeBase; +template class PostingListAttributeBase; + +typedef SequentialReadModifyWriteInterface > +LoadedInt8Vector; + +typedef SequentialReadModifyWriteInterface > +LoadedInt16Vector; + +typedef SequentialReadModifyWriteInterface > +LoadedInt32Vector; + +typedef SequentialReadModifyWriteInterface > +LoadedInt64Vector; + +typedef SequentialReadModifyWriteInterface > +LoadedFloatVector; + +typedef SequentialReadModifyWriteInterface > +LoadedDoubleVector; + + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase > >; + +template class +PostingListAttributeSubBase >; + + +} diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h new file mode 100644 index 00000000000..3632e6b5f35 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h @@ -0,0 +1,165 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "dociditerator.h" +#include "postinglistsearchcontext.h" +#include "postingchange.h" +#include "ipostinglistattributebase.h" + +namespace search { + +class EnumPostingPair +{ +private: + EnumStoreBase::Index _idx; + const EnumStoreComparator *_cmp; +public: + // EnumPostingPair() : _itr() {} + EnumPostingPair(EnumStoreBase::Index idx, + const EnumStoreComparator *cmp) + : _idx(idx), + _cmp(cmp) + { + } + + bool + operator<(const EnumPostingPair &rhs) const + { + return (*_cmp)(_idx, rhs._idx); + } + + EnumStoreBase::Index + getEnumIdx() const + { + return _idx; + } +}; + + +template +class PostingListAttributeBase : public attribute::IPostingListAttributeBase +{ +protected: + typedef P Posting; + typedef typename Posting::DataType DataType; + typedef attribute::PostingListTraits AggregationTraits; + typedef typename AggregationTraits::PostingList PostingList; + typedef AttributeVector::DocId DocId; + typedef std::map > PostingMap; + typedef btree::EntryRef EntryRef; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef EnumStoreBase::Index EnumIndex; + PostingList _postingList; + AttributeVector &_attr; + EnumPostingTree &_dict; + EnumStoreBase &_esb; + + PostingListAttributeBase(AttributeVector &attr, + EnumStoreBase &enumStore); + virtual ~PostingListAttributeBase(); + + virtual void + updatePostings(PostingMap & changePost) = 0; + + void + updatePostings(PostingMap &changePost, + EnumStoreComparator &cmp); + + void + clearAllPostings(void); + + void disableFreeLists() { _postingList.disableFreeLists(); } + + void + disableElemHoldList() + { + _postingList.disableElemHoldList(); + } + + void + fillPostingsFixupEnumBase(const LoadedEnumAttributeVector &loaded); + + bool + forwardedOnAddDoc(DocId doc, + size_t wantSize, + size_t wantCapacity); + + void + clearPostings(attribute::IAttributeVector::EnumHandle eidx, + uint32_t fromLid, + uint32_t toLid, + EnumStoreComparator &cmp); + + virtual void + forwardedShrinkLidSpace(uint32_t newSize); + +public: + const PostingList & getPostingList() const { return _postingList; } + PostingList & getPostingList() { return _postingList; } +}; + + +template +class PostingListAttributeSubBase : public PostingListAttributeBase

+{ +public: + typedef PostingListAttributeBase

Parent; + typedef EnumStoreType EnumStore; + typedef EnumPostingTree Dictionary; + typedef typename Dictionary::Iterator DictionaryIterator; + typedef EnumStoreBase::Index EnumIndex; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + typedef btree::EntryRef EntryRef; + typedef typename Parent::PostingMap PostingMap; + typedef typename Parent::PostingList PostingList; + typedef typename PostingList::Iterator PostingIterator; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + + using Parent::clearAllPostings; + using Parent::updatePostings; + using Parent::fillPostingsFixupEnumBase; + using Parent::clearPostings; + using Parent::_postingList; + using Parent::_attr; + using Parent::_dict; + +private: + EnumStore &_es; + + +public: + PostingListAttributeSubBase(AttributeVector &attr, + EnumStore &enumStore); + + virtual + ~PostingListAttributeSubBase(void); + + void + handleFillPostings(LoadedVector &loaded); + + virtual void + updatePostings(PostingMap &changePost); + + void + printPostingListContent(vespalib::asciistream & os) const; + + virtual void + clearPostings(attribute::IAttributeVector::EnumHandle eidx, + uint32_t fromLid, + uint32_t toLid); +}; + + +extern template class PostingListAttributeBase; +extern template class PostingListAttributeBase; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp new file mode 100644 index 00000000000..2a64a4d2bdc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "postinglistsearchcontext.h" +#include "postinglistsearchcontext.hpp" +#include +#include +#include "attributeiterators.hpp" + + +namespace search +{ + +namespace attribute +{ + +using btree::BTreeNode; + +PostingListSearchContext:: +PostingListSearchContext(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const EnumStoreBase &esb, + uint32_t minBvDocFreq, + bool useBitVector) + : _frozenDictionary(dictionary.getFrozenView()), + _lowerDictItr(BTreeNode::Ref(), dictionary.getAllocator()), + _upperDictItr(BTreeNode::Ref(), dictionary.getAllocator()), + _uniqueValues(0u), + _docIdLimit(docIdLimit), + _dictSize(_frozenDictionary.size()), + _numValues(numValues), + _hasWeight(hasWeight), + _useBitVector(useBitVector), + _pidx(), + _frozenRoot(), + _FSTC(0.0), + _PLSTC(0.0), + _esb(esb), + _minBvDocFreq(minBvDocFreq), + _gbv(nullptr) +{ +} + + +PostingListSearchContext::~PostingListSearchContext(void) +{ +} + + +void +PostingListSearchContext::lookupTerm(const EnumStoreComparator &comp) +{ + _lowerDictItr.lower_bound(_frozenDictionary.getRoot(), EnumIndex(), comp); + _upperDictItr = _lowerDictItr; + if (_upperDictItr.valid() && !comp(EnumIndex(), _upperDictItr.getKey())) { + ++_upperDictItr; + _uniqueValues = 1u; + } +} + + +void +PostingListSearchContext::lookupRange(const EnumStoreComparator &low, + const EnumStoreComparator &high) +{ + _lowerDictItr.lower_bound(_frozenDictionary.getRoot(), EnumIndex(), low); + _upperDictItr = _lowerDictItr; + if (_upperDictItr.valid() && !high(EnumIndex(), _upperDictItr.getKey())) { + _upperDictItr.seekPast(EnumIndex(), high); + } + _uniqueValues = _upperDictItr - _lowerDictItr; +} + + +void +PostingListSearchContext::lookupSingle(void) +{ + if (_lowerDictItr.valid()) { + _pidx = _lowerDictItr.getData(); + } +} + +template class PostingListSearchContextT; +template class PostingListSearchContextT; +template class PostingListFoldedSearchContextT; +template class PostingListFoldedSearchContextT; + + +} // namespace attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h new file mode 100644 index 00000000000..f857c93049b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -0,0 +1,388 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "enumstore.h" +#include "postinglisttraits.h" +#include "postingstore.h" +#include "ipostinglistsearchcontext.h" +#include +#include "attributevector.h" +#include + +namespace search +{ + +namespace attribute +{ + + +/** + * Search context helper for posting list attributes, used to instantiate + * iterators based on posting lists instead of brute force filtering search. + */ + +class PostingListSearchContext : public IPostingListSearchContext +{ +protected: + typedef EnumPostingTree Dictionary; + typedef Dictionary::ConstIterator DictionaryConstIterator; + typedef Dictionary::FrozenView FrozenDictionary; + typedef EnumStoreBase::Index EnumIndex; + + const FrozenDictionary _frozenDictionary; + DictionaryConstIterator _lowerDictItr; + DictionaryConstIterator _upperDictItr; + uint32_t _uniqueValues; + uint32_t _docIdLimit; + uint32_t _dictSize; + uint64_t _numValues; // attr.getStatus().getNumValues(); + bool _hasWeight; + bool _useBitVector; + search::btree::EntryRef _pidx; + search::btree::EntryRef _frozenRoot; // Posting list in tree form + float _FSTC; // Filtering Search Time Constant + float _PLSTC; // Posting List Search Time Constant + const EnumStoreBase &_esb; + uint32_t _minBvDocFreq; + const GrowableBitVector *_gbv; // bitvector if _useBitVector has been set + + + PostingListSearchContext(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const EnumStoreBase &esb, + uint32_t minBvDocFreq, + bool useBitVector); + + ~PostingListSearchContext(void); + + void lookupTerm(const EnumStoreComparator &comp); + void lookupRange(const EnumStoreComparator &low, const EnumStoreComparator &high); + void lookupSingle(void); + virtual bool useThis(const DictionaryConstIterator & it) const { + (void) it; + return true; + } + + float calculateFilteringCost(void) const { + // filtering search time (ms) ~ FSTC * numValues; (FSTC = + // Filtering Search Time Constant) + return _FSTC * _numValues; + } + + float calculatePostingListCost(uint32_t approxNumHits) const { + // search time (ms) ~ PLSTC * numHits * log(numHits); (PLSTC = + // Posting List Search Time Constant) + return _PLSTC * approxNumHits; + } + + uint32_t calculateApproxNumHits(void) const { + float docsPerUniqueValue = static_cast(_docIdLimit) / + static_cast(_dictSize); + return static_cast(docsPerUniqueValue * _uniqueValues); + } + + virtual bool fallbackToFiltering(void) const { + uint32_t numHits = calculateApproxNumHits(); + // numHits > 1000: make sure that posting lists are unit tested. + return (numHits > 1000) && + (calculateFilteringCost() < calculatePostingListCost(numHits)); + } + +public: +}; + + +template +class PostingListSearchContextT : public PostingListSearchContext +{ +protected: + typedef DataT DataType; + typedef PostingListTraits Traits; + typedef typename Traits::PostingList PostingList; + typedef typename Traits::Posting Posting; + typedef std::vector PostingVector; + typedef btree::EntryRef EntryRef; + typedef typename PostingList::ConstIterator PostingConstIterator; + + const PostingList &_postingList; + /* + * Synthetic posting lists for range search, in array or bitvector form + */ + PostingVector _array; + BitVector::UP _bitVector; + bool _fetchPostingsDone; + bool _arrayValid; + + static const long MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION = 100; + static const long MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 20; + static const long MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION = 10; + + PostingListSearchContextT(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const PostingList &postingList, + const EnumStoreBase &esb, + uint32_t minBvCocFreq, + bool useBitVector); + + void lookupSingle(void); + size_t countHits(void) const; + void fillArray(size_t numDocs); + void fillBitVector(void); + + PostingVector & + merge(PostingVector &v, PostingVector &temp, + const std::vector & startPos) __attribute__((noinline)); + + void fetchPostings(bool strict) override; + // this will be called instead of the fetchPostings function in some cases + void diversify(bool forward, size_t wanted_hits, + const IAttributeVector &diversity_attr, size_t max_per_group, + size_t cutoff_groups, bool cutoff_strict); + + queryeval::SearchIterator::UP + createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override; + + unsigned int singleHits(void) const; + unsigned int approximateHits(void) const override; + void applyRangeLimit(int rangeLimit); +}; + + +template +class PostingListFoldedSearchContextT : public PostingListSearchContextT +{ +protected: + typedef PostingListSearchContextT Parent; + typedef typename Parent::Dictionary Dictionary; + typedef typename Parent::PostingList PostingList; + using Parent::_lowerDictItr; + using Parent::_uniqueValues; + using Parent::_postingList; + using Parent::_docIdLimit; + using Parent::countHits; + using Parent::singleHits; + + PostingListFoldedSearchContextT(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const PostingList &postingList, + const EnumStoreBase &esb, + uint32_t minBvCocFreq, + bool useBitVector); + + unsigned int approximateHits(void) const override; +}; + + +template +class PostingSearchContext: public BaseSC, + public BaseSC2 +{ +public: + typedef typename AttrT::EnumStore EnumStore; +protected: + const AttrT &_toBeSearched; + const EnumStore &_enumStore; + + PostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched); +}; + +template +class StringPostingSearchContext + : public PostingSearchContext, AttrT> +{ +private: + typedef PostingListTraits AggregationTraits; + typedef typename AggregationTraits::PostingList PostingList; + typedef typename PostingList::Iterator PostingIterator; + typedef typename PostingList::ConstIterator PostingConstIterator; + typedef PostingSearchContext, AttrT> + Parent; + typedef typename Parent::EnumStore EnumStore; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + typedef vespalib::Regexp Regexp; + using Parent::_toBeSearched; + using Parent::_enumStore; + using Parent::getRegex; + bool useThis(const PostingListSearchContext::DictionaryConstIterator & it) const override { + return getRegex() ? getRegex()->match(_enumStore.getValue(it.getKey())) : true; + } +public: + StringPostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched); +}; + +template +class NumericPostingSearchContext + : public PostingSearchContext, AttrT> +{ +private: + typedef PostingSearchContext, AttrT> Parent; + typedef PostingListTraits AggregationTraits; + typedef typename AggregationTraits::PostingList PostingList; + typedef typename PostingList::Iterator PostingIterator; + typedef typename PostingList::ConstIterator PostingConstIterator; + typedef typename Parent::EnumStore EnumStore; + typedef typename EnumStore::ComparatorType ComparatorType; + typedef typename AttrT::T BaseType; + typedef typename Parent::Params Params; + using Parent::_low; + using Parent::_high; + using Parent::_toBeSearched; + using Parent::_enumStore; + Params _params; + + void getIterators(bool shouldApplyRangeLimit); + bool valid() const override { return this->isValid(); } + + bool fallbackToFiltering(void) const override { + return (this->getRangeLimit() != 0) + ? false + : Parent::fallbackToFiltering(); + } + unsigned int approximateHits(void) const override { + const unsigned int estimate = PostingListSearchContextT::approximateHits(); + const unsigned int limit = std::abs(this->getRangeLimit()); + return ((limit > 0) && (limit < estimate)) + ? limit + : estimate; + } + void fetchPostings(bool strict) override { + if (params().diversityAttribute() != nullptr) { + bool forward = (this->getRangeLimit() > 0); + size_t wanted_hits = std::abs(this->getRangeLimit()); + PostingListSearchContextT::diversify(forward, wanted_hits, + *(params().diversityAttribute()), this->getMaxPerGroup(), + params().diversityCutoffGroups(), params().diversityCutoffStrict()); + } else { + PostingListSearchContextT::fetchPostings(strict); + } + } + +public: + NumericPostingSearchContext(QueryTermSimple::UP qTerm, const Params & params, const AttrT &toBeSearched); + const Params ¶ms() const { return _params; } +}; + + +template +PostingSearchContext:: +PostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched) + : BaseSC(std::move(qTerm), toBeSearched), + BaseSC2(toBeSearched.getEnumStore().getPostingDictionary(), + toBeSearched.getCommittedDocIdLimit(), + toBeSearched.getStatus().getNumValues(), + toBeSearched.hasWeightedSetType(), + toBeSearched.getPostingList(), + toBeSearched.getEnumStore(), + toBeSearched._postingList._minBvDocFreq, + useBitVector), + _toBeSearched(toBeSearched), + _enumStore(_toBeSearched.getEnumStore()) +{ + this->_plsc = static_cast(this); +} + + +template +StringPostingSearchContext:: +StringPostingSearchContext(QueryTermSimple::UP qTerm, bool useBitVector, const AttrT &toBeSearched) + : Parent(std::move(qTerm), useBitVector, toBeSearched) +{ + // after benchmarking prefix search performance on single, array, and weighted set fast-aggregate string attributes + // with 1M values the following constant has been derived: + this->_FSTC = 0.000028; + + // after benchmarking prefix search performance on single, array, and weighted set fast-search string attributes + // with 1M values the following constant has been derived: + this->_PLSTC = 0.000000; + + if (this->valid()) { + if (this->isPrefix()) { + FoldedComparatorType comp(_enumStore, this->queryTerm().getTerm(), true); + this->lookupRange(comp, comp); + } else if (this->isRegex()) { + vespalib::string prefix(Regexp::get_prefix(this->queryTerm().getTerm())); + FoldedComparatorType comp(_enumStore, prefix.c_str(), true); + this->lookupRange(comp, comp); + } else { + FoldedComparatorType comp(_enumStore, this->queryTerm().getTerm()); + this->lookupTerm(comp); + } + if (this->_uniqueValues == 1u) { + this->lookupSingle(); + } + } +} + + +template +NumericPostingSearchContext:: +NumericPostingSearchContext(QueryTermSimple::UP qTerm, const Params & params_in, const AttrT &toBeSearched) + : Parent(std::move(qTerm), params_in.useBitVector(), toBeSearched), + _params(params_in) +{ + // after simplyfying the formula and simple benchmarking and thumbs in the air + // a ratio of 8 between numvalues and estimated number of hits has been found. + this->_FSTC = 1; + + this->_PLSTC = 8; + if (valid()) { + if (_low == _high) { + ComparatorType comp(_enumStore, _low); + this->lookupTerm(comp); + } else if (_low < _high) { + bool shouldApplyRangeLimit = (params().diversityAttribute() == nullptr) && + (this->getRangeLimit() != 0); + getIterators( shouldApplyRangeLimit ); + } + if (this->_uniqueValues == 1u) { + this->lookupSingle(); + } + } +} + + +template +void +NumericPostingSearchContext:: +getIterators(bool shouldApplyRangeLimit) +{ + bool isFloat = + _toBeSearched.getBasicType() == BasicType::FLOAT || + _toBeSearched.getBasicType() == BasicType::DOUBLE; + bool isUnsigned = _toBeSearched.getInternalBasicType().isUnsigned(); + search::Range capped = this->template cappedRange(isFloat, isUnsigned); + + ComparatorType compLow(_enumStore, capped.lower()); + ComparatorType compHigh(_enumStore, capped.upper()); + + this->lookupRange(compLow, compHigh); + if (shouldApplyRangeLimit) { + this->applyRangeLimit(this->getRangeLimit()); + } + + if (this->_lowerDictItr != this->_upperDictItr) { + _low = _enumStore.getValue(this->_lowerDictItr.getKey()); + auto last = this->_upperDictItr; + --last; + _high = _enumStore.getValue(last.getKey()); + } +} + + + +extern template class PostingListSearchContextT; +extern template class PostingListSearchContextT; +extern template class PostingListFoldedSearchContextT; +extern template class PostingListFoldedSearchContextT; + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp new file mode 100644 index 00000000000..295feae75a6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -0,0 +1,388 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "dociditerator.h" +#include "attributeiterators.h" +#include +#include "diversity.h" + +namespace search +{ + +using queryeval::EmptySearch; +using queryeval::SearchIterator; + +namespace attribute +{ + +template +PostingListSearchContextT:: +PostingListSearchContextT(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const PostingList &postingList, + const EnumStoreBase &esb, + uint32_t minBvDocFreq, + bool useBitVector) + : PostingListSearchContext(dictionary, docIdLimit, numValues, hasWeight, + esb, minBvDocFreq, useBitVector), + _postingList(postingList), + _array(), + _bitVector(), + _fetchPostingsDone(false), + _arrayValid(false) +{ +} + + +template +void +PostingListSearchContextT::lookupSingle(void) +{ + PostingListSearchContext::lookupSingle(); + if (!_pidx.valid()) + return; + uint32_t typeId = _postingList.getTypeId(_pidx); + if (!_postingList.isSmallArray(typeId)) { + if (_postingList.isBitVector(typeId)) { + const BitVectorEntry *bve = _postingList.getBitVectorEntry(_pidx); + const GrowableBitVector *bv = bve->_bv.get(); + if (_useBitVector) { + _gbv = bv; + } else { + _pidx = bve->_tree; + if (_pidx.valid()) { + typename PostingList::BTreeType::FrozenView + frozenView(_postingList.getTreeEntry(_pidx)-> + getFrozenView(_postingList.getAllocator())); + _frozenRoot = frozenView.getRoot(); + if (!_frozenRoot.valid()) { + _pidx = btree::EntryRef(); + } + } else { + _gbv = bv; + } + } + } else { + typename PostingList::BTreeType::FrozenView + frozenView(_postingList.getTreeEntry(_pidx)-> + getFrozenView(_postingList.getAllocator())); + _frozenRoot = frozenView.getRoot(); + if (!_frozenRoot.valid()) { + _pidx = btree::EntryRef(); + } + } + } +} + + +template +size_t +PostingListSearchContextT::countHits(void) const +{ + size_t sum(0); + for (auto it(_lowerDictItr); it != _upperDictItr; ++it) { + if (useThis(it)) { + sum += _postingList.frozenSize(it.getData()); + } + } + return sum; +} + + +template +void +PostingListSearchContextT::fillArray(size_t numDocs) +{ + _array.clear(); + _array.reserve(numDocs); + std::vector startPos; + startPos.reserve(_uniqueValues + 1); + startPos.push_back(0); + for (auto it(_lowerDictItr); it != _upperDictItr; ++it) { + if (useThis(it)) { + _postingList.foreach_frozen(it.getData(), + [&](uint32_t key, const DataT &data) + { _array.push_back(Posting(key, data)); + }); + startPos.push_back(_array.size()); + } + } + if (startPos.size() > 2) { + PostingVector temp(_array.size()); + _array.swap(merge(_array, temp, startPos)); + } + _arrayValid = true; +} + + +template +void +PostingListSearchContextT::fillBitVector(void) +{ + _bitVector = BitVector::create(_docIdLimit); + BitVector &bv(*_bitVector); + uint32_t limit = bv.size(); + for (auto it(_lowerDictItr); it != _upperDictItr; ++it) { + if (useThis(it)) { + _postingList.foreach_frozen_key(it.getData(), + [&](uint32_t key) + { if (key < limit) { + bv.setBit(key); + } + }); + } + } + bv.invalidateCachedCount(); +} + + +template +typename PostingListSearchContextT::PostingVector & +PostingListSearchContextT:: +merge(PostingVector &v, PostingVector &temp, + const std::vector &startPos) +{ + std::vector nextStartPos; + nextStartPos.reserve((startPos.size() + 1) / 2); + nextStartPos.push_back(0); + for (size_t i(0), m((startPos.size() - 1) / 2); i < m; i++) { + size_t aStart = startPos[i * 2 + 0]; + size_t aEnd = startPos[i * 2 + 1]; + size_t bStart = aEnd; + size_t bEnd = startPos[i * 2 + 2]; + typename PostingVector::const_iterator it = v.begin(); + std::merge(it + aStart, it + aEnd, + it + bStart, it + bEnd, + temp.begin() + aStart); + nextStartPos.push_back(bEnd); + } + if ((startPos.size() - 1) % 2) { + for (size_t i(startPos[startPos.size() - 2]), m(v.size()); i < m; i++) { + temp[i] = v[i]; + } + nextStartPos.push_back(temp.size()); + } + return (nextStartPos.size() > 2) ? merge(temp, v, nextStartPos) : temp; +} + + +template +void +PostingListSearchContextT::fetchPostings(bool strict) +{ + assert(!_fetchPostingsDone); + _fetchPostingsDone = true; + if (_uniqueValues < 2u) { + return; + } + if (strict && !fallbackToFiltering()) { + size_t sum(countHits()); + if (sum < _docIdLimit / 64) { + fillArray(sum); + } else { + fillBitVector(); + } + } +} + + +template +void +PostingListSearchContextT::diversify(bool forward, size_t wanted_hits, + const IAttributeVector &diversity_attr, size_t max_per_group, + size_t cutoff_groups, bool cutoff_strict) +{ + assert(!_fetchPostingsDone); + _fetchPostingsDone = true; + _array.clear(); + _array.reserve(wanted_hits); + std::vector fragments; + fragments.push_back(0); + diversity::diversify(forward, _lowerDictItr, _upperDictItr, _postingList, wanted_hits, + diversity_attr, max_per_group, cutoff_groups, cutoff_strict, + _array, fragments); + if (fragments.size() > 2) { + PostingVector temp(_array.size()); + _array.swap(merge(_array, temp, fragments)); + } + _arrayValid = true; +} + + +template +SearchIterator::UP +PostingListSearchContextT:: +createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) +{ + assert(_fetchPostingsDone); + if (_uniqueValues == 0u) { + return SearchIterator::UP(new EmptySearch()); + } + if (_arrayValid || (_bitVector.get() != nullptr)) { // synthetic results are available + if (!_array.empty()) { + assert(_arrayValid); + typedef DocIdIterator DocIt; + DocIt postings; + postings.set(&_array[0], &_array[_array.size()]); + return (_postingList._isFilter) + ? SearchIterator::UP(new FilterAttributePostingListIteratorT(postings, matchData)) + : SearchIterator::UP(new AttributePostingListIteratorT(postings, _hasWeight, matchData)); + } + if (_arrayValid) { + return SearchIterator::UP(new EmptySearch()); + } + BitVector *bv(_bitVector.get()); + assert(bv != nullptr); + return search::BitVectorIterator::create(bv, bv->size(), *matchData, strict); + } + if (_uniqueValues == 1) { + if (_gbv != nullptr) { + return BitVectorIterator::create(_gbv, std::min(_gbv->size(), _docIdLimit), *matchData, strict); + } + if (!_pidx.valid()) { + return SearchIterator::UP(new EmptySearch()); + } + const PostingList &postingList = _postingList; + if (!_frozenRoot.valid()) { + uint32_t clusterSize = _postingList.getClusterSize(_pidx); + assert(clusterSize != 0); + typedef DocIdMinMaxIterator DocIt; + DocIt postings; + const Posting *array = postingList.getKeyDataEntry(_pidx, clusterSize); + postings.set(array, array + clusterSize); + return (postingList._isFilter) + ? SearchIterator::UP(new FilterAttributePostingListIteratorT(postings, matchData)) + : SearchIterator::UP(new AttributePostingListIteratorT(postings, _hasWeight, matchData)); + } + typename PostingList::BTreeType::FrozenView frozenView(_frozenRoot, postingList.getAllocator()); + PostingConstIterator postings = frozenView.begin(); + return (_postingList._isFilter) + ? SearchIterator::UP(new FilterAttributePostingListIteratorT (postings, matchData)) + : SearchIterator::UP(new AttributePostingListIteratorT (postings, _hasWeight, matchData)); + } + // returning nullptr will trigger fallback to filter iterator + return SearchIterator::UP(); +} + + +template +unsigned int +PostingListSearchContextT::singleHits(void) const +{ + if (_gbv) { + const GrowableBitVector *bv = _gbv; + uint32_t extraGuards = bv->capacity() - bv->size(); + // Some inaccuracy is expected, data changes underfeet + int32_t res = bv->countTrueBits() - extraGuards; + if (res < 1) + res = 1; + return res; + } + if (!_pidx.valid()) + return 0u; + if (!_frozenRoot.valid()) + return _postingList.getClusterSize(_pidx); + typename PostingList::BTreeType::FrozenView + frozenView(_frozenRoot, _postingList.getAllocator()); + return frozenView.size(); +} + +template +unsigned int +PostingListSearchContextT::approximateHits(void) const +{ + unsigned int numHits = 0; + if (_uniqueValues == 0u) { + } else if (_uniqueValues == 1u) { + numHits = singleHits(); + } else { + if (this->fallbackToFiltering()) { + numHits = _docIdLimit; + } else if (_uniqueValues > MIN_UNIQUE_VALUES_BEFORE_APPROXIMATION) { + if ((_uniqueValues * + MIN_UNIQUE_VALUES_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > + static_cast(_docIdLimit)) || + (this->calculateApproxNumHits() * + MIN_APPROXHITS_TO_NUMDOCS_RATIO_BEFORE_APPROXIMATION > + _docIdLimit)) { + numHits = this->calculateApproxNumHits(); + } else { + // XXX: Unsafe + numHits = countHits(); + } + } else { + // XXX: Unsafe + numHits = countHits(); + } + } + return numHits; +} + + +template +void +PostingListSearchContextT::applyRangeLimit(int rangeLimit) +{ + if (rangeLimit > 0) { + DictionaryConstIterator middle = _lowerDictItr; + for (int n(0); (n < rangeLimit) && (middle != _upperDictItr); ++middle) { + n += _postingList.frozenSize(middle.getData()); + } + _upperDictItr = middle; + _uniqueValues = _upperDictItr - _lowerDictItr; + } else if ((rangeLimit < 0) && (_lowerDictItr != _upperDictItr)) { + rangeLimit = -rangeLimit; + DictionaryConstIterator middle = _upperDictItr; + for (int n(0); (n < rangeLimit) && (middle != _lowerDictItr); ) { + --middle; + n += _postingList.frozenSize(middle.getData()); + } + _lowerDictItr = middle; + _uniqueValues = _upperDictItr - _lowerDictItr; + } +} + + +template +PostingListFoldedSearchContextT:: +PostingListFoldedSearchContextT(const Dictionary &dictionary, + uint32_t docIdLimit, + uint64_t numValues, + bool hasWeight, + const PostingList &postingList, + const EnumStoreBase &esb, + uint32_t minBvDocFreq, + bool useBitVector) + : Parent(dictionary, docIdLimit, numValues, hasWeight, postingList, + esb, minBvDocFreq, useBitVector) +{ +} + + +template +unsigned int +PostingListFoldedSearchContextT::approximateHits(void) const +{ + unsigned int numHits = 0; + if (_uniqueValues == 0u) { + } else if (_uniqueValues == 1u) { + numHits = singleHits(); + } else { + if (this->fallbackToFiltering()) { + numHits = _docIdLimit; + } else { + // XXX: Unsafe + numHits = countHits(); + } + } + return numHits; +} + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp new file mode 100644 index 00000000000..38bf41d4c49 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "postinglisttraits.h" +#include + +LOG_SETUP(".searchlib.attribute.postinglisttraits"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h new file mode 100644 index 00000000000..c2504f779a1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postinglisttraits.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace attribute +{ + +template class PostingListTraits; +template class PostingStore; + +template <> +class PostingListTraits +{ +public: + typedef btree::NoAggregated AggregatedType; + typedef btree::NoAggrCalc AggrCalcType; + typedef btree::BTreeStore, + btree::BTreeDefaultTraits, + AggrCalcType> PostingStoreBase; + typedef PostingStore PostingList; + typedef PostingStoreBase::KeyDataType Posting; +}; + + +template <> +class PostingListTraits +{ +public: + typedef btree::MinMaxAggregated AggregatedType; + typedef btree::MinMaxAggrCalc AggrCalcType; + typedef btree::BTreeStore, + btree::BTreeDefaultTraits, + AggrCalcType> PostingStoreBase; + typedef PostingStore PostingList; + typedef PostingStoreBase::KeyDataType Posting; +}; + + +} // namespace attribute + +typedef btree::BTreeKeyData AttributePosting; + +typedef btree::BTreeKeyData AttributeWeightPosting; + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp new file mode 100644 index 00000000000..7902d3f8d87 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -0,0 +1,638 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "postingstore.h" +#include +#include +#include +#include + +namespace search +{ + +namespace attribute +{ + +using btree::BTreeNoLeafData; + +// #define FORCE_BITVECTORS + + +PostingStoreBase2::PostingStoreBase2(EnumPostingTree &dict, Status &status, + const Config &config) + : +#ifdef FORCE_BITVECTORS + _enableBitVectors(true), +#else + _enableBitVectors(config.getEnableBitVectors()), +#endif + _enableOnlyBitVector(config.getEnableOnlyBitVector()), + _isFilter(config.getIsFilter()), + _bvSize(64u), + _bvCapacity(128u), + _minBvDocFreq(64), + _maxBvDocFreq(std::numeric_limits::max()), + _bvs(), + _dict(dict), + _status(status), + _bvExtraBytes(0) +{ +} + + +PostingStoreBase2::~PostingStoreBase2() +{ +} + + +bool +PostingStoreBase2::resizeBitVectors(uint32_t newSize, uint32_t newCapacity) +{ + assert(newCapacity >= newSize); + newSize = (newSize + 63) & ~63; + if (newSize >= newCapacity) + newSize = newCapacity; + if (newSize == _bvSize && newCapacity == _bvCapacity) + return false; + _minBvDocFreq = std::max(newSize >> 6, 64u); + _maxBvDocFreq = std::max(newSize >> 5, 128u); + if (_bvs.empty()) { + _bvSize = newSize; + _bvCapacity = newCapacity; + return false; + } + _bvSize = newSize; + _bvCapacity = newCapacity; + return removeSparseBitVectors(); +} + + +template +PostingStore::PostingStore(EnumPostingTree &dict, Status &status, + const Config &config) + : Parent(false), + PostingStoreBase2(dict, status, config), + _bvType(1, 1024u, RefType::offsetSize()) +{ + // TODO: Add type for bitvector + _store.addType(&_bvType); + _store.initActiveBuffers(); + _store.enableFreeLists(); +} + + +template +PostingStore::~PostingStore() +{ + _builder.clear(); + _store.dropBuffers(); // Drop buffers before type handlers are dropped +} + + +template +bool +PostingStore::removeSparseBitVectors() +{ + bool res = false; + bool needscan = false; + for (auto &i : _bvs) { + RefType iRef(i); + uint32_t typeId = getTypeId(iRef); + assert(isBitVector(typeId)); + BitVectorEntry *bve = getWBitVectorEntry(iRef); + GrowableBitVector &bv = *bve->_bv.get(); + uint32_t docFreq = bv.countTrueBits(); + if (bve->_tree.valid()) { + RefType iRef2(bve->_tree); + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + assert(tree->size(_allocator) == docFreq); + } + if (docFreq < _minBvDocFreq) + needscan = true; + unsigned int oldExtraSize = bv.extraByteSize(); + if (bv.size() > _bvSize) { + bv.shrink(_bvSize); + res = true; + } + if (bv.capacity() < _bvCapacity) { + bv.reserve(_bvCapacity); + res = true; + } + if (bv.size() < _bvSize) { + bv.extend(_bvSize); + } + unsigned int newExtraSize = bv.extraByteSize(); + if (oldExtraSize != newExtraSize) { + _bvExtraBytes = _bvExtraBytes + newExtraSize - oldExtraSize; + } + } + if (needscan) { + typedef EnumPostingTree::Iterator EnumIterator; + for (EnumIterator dictItr = _dict.begin(); dictItr.valid(); ++dictItr) { + if (!isBitVector(getTypeId(dictItr.getData()))) + continue; + EntryRef ref(dictItr.getData()); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + assert(isBitVector(typeId)); + assert(_bvs.find(ref.ref() )!= _bvs.end()); + BitVectorEntry *bve = getWBitVectorEntry(iRef); + BitVector &bv = *bve->_bv.get(); + uint32_t docFreq = bv.countTrueBits(); + if (bve->_tree.valid()) { + RefType iRef2(bve->_tree); + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + assert(tree->size(_allocator) == docFreq); + } + if (docFreq < _minBvDocFreq) { + dropBitVector(ref); + if (ref.valid()) { + iRef = ref; + typeId = getTypeId(iRef); + if (isBTree(typeId)) { + BTreeType *tree = getWTreeEntry(iRef); + normalizeTree(ref, tree, false); + } + } + _dict.thaw(dictItr); + dictItr.writeData(ref); + res = true; + } + } + } + return res; +} + + +template +void +PostingStore::applyNew(EntryRef &ref, + AddIter a, + AddIter ae) +{ + // No old data + assert(!ref.valid()); + size_t additionSize(ae - a); + uint32_t clusterSize = additionSize; + if (clusterSize <= clusterLimit) { + applyNewArray(ref, a, ae); + } else if (_enableBitVectors && clusterSize >= _maxBvDocFreq) { + applyNewBitVector(ref, a, ae); + } else { + applyNewTree(ref, a, ae, CompareT()); + } +} + + +template +void +PostingStore::makeDegradedTree(EntryRef &ref, + const BitVector &bv) +{ + assert(!ref.valid()); + BTreeTypeRefPair tPair(allocBTree()); + BTreeType *tree = tPair.second; + Builder &builder = _builder; + builder.reuse(); + uint32_t docIdLimit = _bvSize; + assert(_bvSize == bv.size()); + uint32_t docId = bv.getFirstTrueBit(); + while (docId < docIdLimit) { + builder.insert(docId, bitVectorWeight()); + docId = bv.getNextTrueBit(docId + 1); + } + tree->assign(builder, _allocator); + assert(tree->size(_allocator) == bv.countTrueBits()); + // barrier ? + ref = tPair.first; +} + + +template +void +PostingStore::dropBitVector(EntryRef &ref) +{ + assert(ref.valid()); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + assert(isBitVector(typeId)); + BitVectorEntry *bve = getWBitVectorEntry(iRef); + AllocatedBitVector *bv = bve->_bv.get(); + assert(bv); + uint32_t docFreq = bv->countTrueBits(); + EntryRef ref2(bve->_tree); + if (!ref2.valid()) { + makeDegradedTree(ref2, *bv); + } + assert(ref2.valid()); + assert(isBTree(ref2)); + const BTreeType *tree = getTreeEntry(ref2); + assert(tree->size(_allocator) == docFreq); + _bvs.erase(ref.ref()); + _store.holdElem(iRef, 1); + _status.decBitVectors(); + _bvExtraBytes -= bv->extraByteSize(); + ref = ref2; +} + + +template +void +PostingStore::makeBitVector(EntryRef &ref) +{ + assert(ref.valid()); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + assert(isBTree(typeId)); + std::shared_ptr bvsp; + vespalib::GenerationHolder &genHolder = _store.getGenerationHolder(); + bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder)); + AllocatedBitVector &bv = *bvsp.get(); + uint32_t docIdLimit = _bvSize; + Iterator it = begin(ref); + uint32_t expDocFreq = it.size(); + (void) expDocFreq; + for (; it.valid(); ++it) { + uint32_t docId = it.getKey(); + assert(docId < docIdLimit); + bv.setBit(docId); + } + bv.invalidateCachedCount(); + assert(bv.countTrueBits() == expDocFreq); + BitVectorRefPair bPair(allocBitVector()); + BitVectorEntry *bve = bPair.second; + if (_enableOnlyBitVector) { + BTreeType *tree = getWTreeEntry(iRef); + tree->clear(_allocator); + _store.holdElem(ref, 1); + } else { + bve->_tree = ref; + } + bve->_bv = bvsp; + _bvs.insert(bPair.first.ref()); + _status.incBitVectors(); + _bvExtraBytes += bv.extraByteSize(); + // barrier ? + ref = bPair.first; +} + + +template +void +PostingStore::applyNewBitVector(EntryRef &ref, + AddIter aOrg, + AddIter ae) +{ + assert(!ref.valid()); + RefType iRef(ref); + std::shared_ptr bvsp; + vespalib::GenerationHolder &genHolder = _store.getGenerationHolder(); + bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder)); + AllocatedBitVector &bv = *bvsp.get(); + uint32_t docIdLimit = _bvSize; + uint32_t expDocFreq = ae - aOrg; + (void) expDocFreq; + for (AddIter a = aOrg; a != ae; ++a) { + uint32_t docId = a->_key; + assert(docId < docIdLimit); + bv.setBit(docId); + } + bv.invalidateCachedCount(); + assert(bv.countTrueBits() == expDocFreq); + BitVectorRefPair bPair(allocBitVector()); + BitVectorEntry *bve = bPair.second; + if (!_enableOnlyBitVector) { + applyNewTree(bve->_tree, aOrg, ae, CompareT()); + } + bve->_bv = bvsp; + _bvs.insert(bPair.first.ref()); + _status.incBitVectors(); + _bvExtraBytes += bv.extraByteSize(); + // barrier ? + ref = bPair.first; +} + + +template +void +PostingStore::apply(BitVector &bv, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re) +{ + while (a != ae || r != re) { + if (r != re && (a == ae || *r < a->_key)) { + // remove + assert(*r < bv.size()); + bv.slowClearBit(*r); + ++r; + } else { + if (r != re && !(a->_key < *r)) { + // update or add + assert(a->_key < bv.size()); + bv.slowSetBit(a->_key); + ++r; + } else { + assert(a->_key < bv.size()); + bv.slowSetBit(a->_key); + } + ++a; + } + } +} + + +template +void +PostingStore::apply(EntryRef &ref, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re) +{ + if (!ref.valid()) { + // No old data + applyNew(ref, a, ae); + return; + } + RefType iRef(ref); + bool wasArray = false; + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize != 0) { + wasArray = true; + if (applyCluster(ref, clusterSize, a, ae, r, re, CompareT())) + return; + iRef = ref; + typeId = getTypeId(iRef); + } + // Old data was tree or has been converted to a tree + // ... or old data was bitvector + if (isBitVector(typeId)) { + BitVectorEntry *bve = getWBitVectorEntry(iRef); + EntryRef ref2(bve->_tree); + RefType iRef2(ref2); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + BTreeType *tree = getWTreeEntry(iRef2); + applyTree(tree, a, ae, r, re, CompareT()); + } + BitVector *bv = bve->_bv.get(); + assert(bv); + apply(*bv, a, ae, r, re); + uint32_t docFreq = bv->countTrueBits(); + if (docFreq < _minBvDocFreq) { + dropBitVector(ref); + if (ref.valid()) { + iRef = ref; + typeId = getTypeId(iRef); + if (isBTree(typeId)) { + BTreeType *tree = getWTreeEntry(iRef); + assert(tree->size(_allocator) == docFreq); + normalizeTree(ref, tree, wasArray); + } + } + } + } else { + BTreeType *tree = getWTreeEntry(iRef); + applyTree(tree, a, ae, r, re, CompareT()); + if (_enableBitVectors) { + uint32_t docFreq = tree->size(_allocator); + if (docFreq >= _maxBvDocFreq) { + makeBitVector(ref); + return; + } + } + normalizeTree(ref, tree, wasArray); + } +} + + +template +size_t +PostingStore::internalSize(uint32_t typeId, const RefType & iRef) const +{ + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + return tree->size(_allocator); + } else { + const BitVector *bv = bve->_bv.get(); + return bv->countTrueBits(); + } + } else { + const BTreeType *tree = getTreeEntry(iRef); + return tree->size(_allocator); + } +} + + +template +size_t +PostingStore::internalFrozenSize(uint32_t typeId, const RefType & iRef) const +{ + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + return tree->frozenSize(_allocator); + } else { + const BitVector *bv = bve->_bv.get(); + // Some inaccuracy is expected, data changes underfeet + int32_t res = bv->countTrueBits(); + if (res < 1) + res = 1; + return res; + } + } else { + const BTreeType *tree = getTreeEntry(iRef); + return tree->frozenSize(_allocator); + } +} + + +template +typename PostingStore::Iterator +PostingStore::begin(const EntryRef ref) const +{ + if (!ref.valid()) + return Iterator(); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + return tree->begin(_allocator); + } + return Iterator(); + } + const BTreeType *tree = getTreeEntry(iRef); + return tree->begin(_allocator); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + return Iterator(shortArray, clusterSize, _allocator, _aggrCalc); +} + + +template +typename PostingStore::ConstIterator +PostingStore::beginFrozen(const EntryRef ref) const +{ + if (!ref.valid()) + return ConstIterator(); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + return tree->getFrozenView(_allocator).begin(); + } + return ConstIterator(); + } + const BTreeType *tree = getTreeEntry(iRef); + return tree->getFrozenView(_allocator).begin(); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc); +} + + +template +void +PostingStore::beginFrozen(const EntryRef ref, + std::vector &where) const +{ + if (!ref.valid()) { + where.emplace_back(); + return; + } + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + tree->getFrozenView(_allocator).begin(where); + return; + } + where.emplace_back(); + return; + } + const BTreeType *tree = getTreeEntry(iRef); + tree->getFrozenView(_allocator).begin(where); + return; + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + where.emplace_back(shortArray, clusterSize, _allocator, _aggrCalc); +} + + +template +typename PostingStore::AggregatedType +PostingStore::getAggregated(const EntryRef ref) const +{ + if (!ref.valid()) + return AggregatedType(); + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + return tree->getAggregated(_allocator); + } + return AggregatedType(); + } + const BTreeType *tree = getTreeEntry(iRef); + return tree->getAggregated(_allocator); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + AggregatedType a; + for (uint32_t i = 0; i < clusterSize; ++i) { + _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData())); + } + return a; +} + + +template +void +PostingStore::clear(const EntryRef ref) +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + RefType iRef2(bve->_tree); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + BTreeType *tree = getWTreeEntry(iRef2); + tree->clear(_allocator); + _store.holdElem(iRef2, 1); + } + _bvs.erase(ref.ref()); + _status.decBitVectors(); + _bvExtraBytes -= bve->_bv->extraByteSize(); + _store.holdElem(ref, 1); + } else { + BTreeType *tree = getWTreeEntry(iRef); + tree->clear(_allocator); + _store.holdElem(ref, 1); + } + } else { + _store.holdElem(ref, clusterSize); + } +} + + +template +MemoryUsage +PostingStore::getMemoryUsage() const +{ + MemoryUsage usage; + usage.merge(_allocator.getMemoryUsage()); + usage.merge(_store.getMemoryUsage()); + uint64_t bvExtraBytes = _bvExtraBytes; + usage.incUsedBytes(bvExtraBytes); + usage.incAllocatedBytes(bvExtraBytes); + return usage; +} + + +template class PostingStore; + +template class PostingStore; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.h b/searchlib/src/vespa/searchlib/attribute/postingstore.h new file mode 100644 index 00000000000..cc379731bf9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.h @@ -0,0 +1,361 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "postinglisttraits.h" +#include +#include +#include +#include "enumstorebase.h" + +namespace search +{ + +namespace attribute +{ + +class Status; +class Config; + +class BitVectorEntry +{ +public: + btree::EntryRef _tree; // Daisy chained reference to tree based posting list + std::shared_ptr _bv; // bitvector + +public: + BitVectorEntry() + : _tree(), + _bv() + { + } +}; + + +class PostingStoreBase2 +{ +public: + bool _enableBitVectors; + bool _enableOnlyBitVector; + bool _isFilter; +protected: + uint32_t _bvSize; + uint32_t _bvCapacity; +public: + uint32_t _minBvDocFreq; // Less than this ==> destroy bv + uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv +protected: + std::set _bvs; // Current bitvectors + EnumPostingTree &_dict; + Status &_status; + uint64_t _bvExtraBytes; + + static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u; + +public: + PostingStoreBase2(EnumPostingTree &dict, Status &status, + const Config &config); + + virtual + ~PostingStoreBase2(); + + bool + resizeBitVectors(uint32_t newSize, uint32_t newCapacity); + + virtual bool + removeSparseBitVectors() = 0; +}; + +template +class PostingStore : public PostingListTraits::PostingStoreBase, + public PostingStoreBase2 +{ + btree::BufferType _bvType; +public: + typedef DataT DataType; + typedef typename PostingListTraits::PostingStoreBase Parent; + typedef typename Parent::AddIter AddIter; + typedef typename Parent::RemoveIter RemoveIter; + typedef typename Parent::RefType RefType; + typedef typename Parent::BTreeType BTreeType; + typedef typename Parent::Iterator Iterator; + typedef typename Parent::ConstIterator ConstIterator; + typedef typename Parent::KeyDataType KeyDataType; + typedef typename Parent::AggregatedType AggregatedType; + typedef typename Parent::BTreeTypeRefPair BTreeTypeRefPair; + typedef typename Parent::Builder Builder; + typedef btree::EntryRef EntryRef; + typedef std::less CompareT; + using Parent::applyNewArray; + using Parent::applyNewTree; + using Parent::applyCluster; + using Parent::applyTree; + using Parent::normalizeTree; + using Parent::getTypeId; + using Parent::getClusterSize; + using Parent::getWTreeEntry; + using Parent::getTreeEntry; + using Parent::getKeyDataEntry; + using Parent::clusterLimit; + using Parent::allocBTree; + using Parent::_builder; + using Parent::_store; + using Parent::_allocator; + using Parent::_aggrCalc; + using Parent::BUFFERTYPE_BTREE; + typedef std::pair BitVectorRefPair; + + + PostingStore(EnumPostingTree &dict, Status &status, const Config &config); + + virtual + ~PostingStore(); + + virtual bool + removeSparseBitVectors(); + + static bool + isBitVector(uint32_t typeId) + { + return typeId == BUFFERTYPE_BITVECTOR; + } + + static bool + isBTree(uint32_t typeId) + { + return typeId == BUFFERTYPE_BTREE; + } + + bool + isBTree(RefType ref) const + { + return isBTree(getTypeId(ref)); + } + + void + applyNew(EntryRef &ref, + AddIter a, + AddIter ae); + + BitVectorRefPair + allocBitVector(void) + { + return _store.template allocEntry >(BUFFERTYPE_BITVECTOR); + } + + /* + * Recreate btree from bitvector. Weight information is not recreated. + */ + void + makeDegradedTree(EntryRef &ref, const BitVector &bv); + + void + dropBitVector(EntryRef &ref); + + void + makeBitVector(EntryRef &ref); + + void + applyNewBitVector(EntryRef &ref, + AddIter aOrg, + AddIter ae); + + void + apply(BitVector &bv, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re); + + /** + * Apply multiple changes at once. + * + * additions and removals should be sorted on key without duplicates. + * Overlap between additions and removals indicates updates. + */ + void + apply(EntryRef &ref, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re); + + void + clear(const EntryRef ref); + + size_t + size(const EntryRef ref) const { + if (!ref.valid()) + return 0; + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + return internalSize(typeId, iRef); + } + return clusterSize; + } + + size_t + frozenSize(const EntryRef ref) const { + if (!ref.valid()) + return 0; + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + return internalFrozenSize(typeId, iRef); + } + return clusterSize; + } + + Iterator + begin(const EntryRef ref) const; + + ConstIterator + beginFrozen(const EntryRef ref) const; + + void + beginFrozen(const EntryRef ref, std::vector &where) const; + + template + VESPA_DLL_LOCAL void + foreach_frozen_key(EntryRef ref, FunctionType func) const; + + template + VESPA_DLL_LOCAL void + foreach_frozen(EntryRef ref, FunctionType func) const; + + AggregatedType + getAggregated(const EntryRef ref) const; + + const BitVectorEntry * + getBitVectorEntry(RefType ref) const + { + return _store.template getBufferEntry(ref.bufferId(), + ref.offset()); + } + + BitVectorEntry * + getWBitVectorEntry(RefType ref) + { + return _store.template getBufferEntry(ref.bufferId(), + ref.offset()); + } + + static inline DataT + bitVectorWeight(); + + MemoryUsage + getMemoryUsage() const; + +private: + size_t internalSize(uint32_t typeId, const RefType & iRef) const; + size_t internalFrozenSize(uint32_t typeId, const RefType & iRef) const; +}; + +template <> +inline btree::BTreeNoLeafData +PostingStore::bitVectorWeight() +{ + return btree::BTreeNoLeafData(); +} + +template <> +inline int32_t +PostingStore::bitVectorWeight() +{ + return 1; +} + +template +template +void +PostingStore::foreach_frozen_key(EntryRef ref, FunctionType func) const +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + EntryRef ref2(bve->_tree); + RefType iRef2(ref2); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + _allocator.getNodeStore().foreach_key(tree->getFrozenRoot(), func); + } else { + const BitVector *bv = bve->_bv.get(); + uint32_t docIdLimit = bv->size(); + uint32_t docId = bv->getFirstTrueBit(1); + while (docId < docIdLimit) { + func(docId); + docId = bv->getNextTrueBit(docId + 1); + } + } + } else { + assert(isBTree(typeId)); + const BTreeType *tree = getTreeEntry(iRef); + _allocator.getNodeStore().foreach_key(tree->getFrozenRoot(), func); + } + } else { + const KeyDataType *p = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *pe = p + clusterSize; + for (; p != pe; ++p) { + func(p->_key); + } + } +} + + +template +template +void +PostingStore::foreach_frozen(EntryRef ref, FunctionType func) const +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t typeId = getTypeId(iRef); + uint32_t clusterSize = getClusterSize(typeId); + if (clusterSize == 0) { + if (isBitVector(typeId)) { + const BitVectorEntry *bve = getBitVectorEntry(iRef); + EntryRef ref2(bve->_tree); + RefType iRef2(ref2); + if (iRef2.valid()) { + assert(isBTree(iRef2)); + const BTreeType *tree = getTreeEntry(iRef2); + _allocator.getNodeStore().foreach(tree->getFrozenRoot(), func); + } else { + const BitVector *bv = bve->_bv.get(); + uint32_t docIdLimit = bv->size(); + uint32_t docId = bv->getFirstTrueBit(1); + while (docId < docIdLimit) { + func(docId, bitVectorWeight()); + docId = bv->getNextTrueBit(docId + 1); + } + } + } else { + const BTreeType *tree = getTreeEntry(iRef); + _allocator.getNodeStore().foreach(tree->getFrozenRoot(), func); + } + } else { + const KeyDataType *p = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *pe = p + clusterSize; + for (; p != pe; ++p) { + func(p->_key, p->getData()); + } + } +} + + + +} // namespace attribute + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp new file mode 100644 index 00000000000..a0693755666 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.cpp @@ -0,0 +1,277 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_attribute"); +#include + +#include "predicate_attribute.h" +#include +#include +#include +#include +#include + +using document::Predicate; +using document::PredicateFieldValue; +using vespalib::MMapDataBuffer; +using namespace search::predicate; + +namespace search { + +namespace { +constexpr uint8_t MAX_MIN_FEATURE = 255; +constexpr uint16_t MAX_INTERVAL_RANGE = static_cast(predicate::MAX_INTERVAL); + + +int64_t adjustBound(int32_t arity, int64_t bound) { + int64_t adjusted = arity; + int64_t value = bound; + int64_t max = LLONG_MAX / arity; + while ((value /= arity) > 0) { + if (adjusted > max) { + return bound; + } + adjusted *= arity; + } + return adjusted - 1; +} + +int64_t adjustLowerBound(int32_t arity, int64_t lower_bound) { + if (lower_bound == LLONG_MIN) { + return lower_bound; + } else if (lower_bound > 0) { + return 0ll; + } else { + return -adjustBound(arity, -lower_bound); + } +} + +int64_t adjustUpperBound(int32_t arity, int64_t upper_bound) { + if (upper_bound == LLONG_MAX) { + return upper_bound; + } else if (upper_bound < 0) { + return -1ll; // 0 belongs to the positive range. + } else { + return adjustBound(arity, upper_bound); + } +} + +SimpleIndexConfig createSimpleIndexConfig(const search::attribute::Config &config) { + return SimpleIndexConfig(config.dense_posting_list_threshold(), config.getGrowStrategy()); +} + +} // namespace + +PredicateAttribute::PredicateAttribute(const vespalib::string &base_file_name, + const Config &config) + : NotImplementedAttribute(base_file_name, config), + _base_file_name(base_file_name), + _limit_provider(*this), + _index(new PredicateIndex(getGenerationHandler(), getGenerationHolder(), + _limit_provider, createSimpleIndexConfig(config), config.arity())), + _lower_bound(adjustLowerBound(config.arity(), config.lower_bound())), + _upper_bound(adjustUpperBound(config.arity(), config.upper_bound())), + _min_feature(config.getGrowStrategy(), getGenerationHolder()), + _interval_range_vector(config.getGrowStrategy(), getGenerationHolder()), + _max_interval_range(1) +{ +} + +PredicateAttribute::~PredicateAttribute() +{ + getGenerationHolder().clearHoldLists(); +} + +uint32_t +PredicateAttribute::getValueCount(DocId) const +{ + return 1; +} + +void +PredicateAttribute::onCommit() +{ + populateIfNeeded(); + _index->commit(); + incGeneration(); +} + +void +PredicateAttribute::onUpdateStat() +{ + // update statistics + MemoryUsage combined; + combined.merge(_min_feature.getMemoryUsage()); + combined.merge(_interval_range_vector.getMemoryUsage()); + combined.merge(_index->getMemoryUsage()); + combined.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); + this->updateStatistics(_min_feature.size(), _min_feature.size(), + combined.allocatedBytes(), combined.usedBytes(), + combined.deadBytes(), combined.allocatedBytesOnHold()); +} + +void +PredicateAttribute::removeOldGenerations(generation_t firstUsed) +{ + getGenerationHolder().trimHoldLists(firstUsed); + _index->trimHoldLists(firstUsed); +} + +void +PredicateAttribute::onGenerationChange(generation_t generation) +{ + getGenerationHolder().transferHoldLists(generation - 1); + _index->transferHoldLists(generation - 1); +} + +void +PredicateAttribute::onSave(IAttributeSaveTarget &saveTarget) { + LOG(info, "Saving predicate attribute version %d", getVersion()); + IAttributeSaveTarget::Buffer buffer(saveTarget.datWriter().allocBuf(4096)); + _index->serialize(*buffer); + uint32_t highest_doc_id = static_cast(_min_feature.size() - 1); + buffer->writeInt32(highest_doc_id); + for (size_t i = 1; i <= highest_doc_id; ++i) { + buffer->writeInt8(_min_feature[i]); + } + for (size_t i = 1; i <= highest_doc_id; ++i) { + buffer->writeInt16(_interval_range_vector[i]); + } + buffer->writeInt16(_max_interval_range); + saveTarget.datWriter().writeBuf(std::move(buffer)); +} + + +uint32_t +PredicateAttribute::getVersion() const { + return PREDICATE_ATTRIBUTE_VERSION; +} + +namespace { + +template +struct DocIdLimitFinderAndMinFeatureFiller : SimpleIndexDeserializeObserver<> { + uint32_t _highest_doc_id; + V & _min_feature; + PredicateIndex &_index; + DocIdLimitFinderAndMinFeatureFiller(V & min_feature, + PredicateIndex &index) : + _highest_doc_id(0), + _min_feature(min_feature), + _index(index) + {} + void notifyInsert(uint64_t, uint32_t doc_id, uint32_t min_feature) override { + if (doc_id > _highest_doc_id) { + _highest_doc_id = doc_id; + _min_feature.ensure_size(doc_id + 1, PredicateAttribute::MIN_FEATURE_FILL); + } + _min_feature[doc_id] = min_feature; + } +}; + +struct DummyObserver : SimpleIndexDeserializeObserver<> { + DummyObserver() {} + void notifyInsert(uint64_t, uint32_t, uint32_t) override {} +}; + +} + +bool PredicateAttribute::onLoad() +{ + FileUtil::LoadedBuffer::UP loaded_buffer = loadDAT(); + char *rawBuffer = const_cast(static_cast(loaded_buffer->buffer())); + size_t size = loaded_buffer->size(); + MMapDataBuffer buffer(rawBuffer, size); + buffer.moveFreeToData(size); + + const GenericHeader &header = loaded_buffer->getHeader(); + uint32_t version = static_cast( + header.hasTag("version") ? header.getTag("version").asInteger() : 0); + LOG(info, "Loading predicate attribute version %d. getVersion() = %d", version, getVersion()); + + DocId highest_doc_id; + if (version == 0) { + DocIdLimitFinderAndMinFeatureFiller observer(_min_feature, *_index); + _index.reset(new PredicateIndex(getGenerationHandler(), getGenerationHolder(), + _limit_provider, createSimpleIndexConfig(getConfig()), + buffer, observer, 0)); + highest_doc_id = observer._highest_doc_id; + } else { + DummyObserver observer; + _index.reset( + new PredicateIndex(getGenerationHandler(), getGenerationHolder(), _limit_provider, + createSimpleIndexConfig(getConfig()), buffer, observer, version)); + highest_doc_id = buffer.readInt32(); + // Deserialize min feature vector + _min_feature.ensure_size(highest_doc_id + 1, PredicateAttribute::MIN_FEATURE_FILL); + for (uint32_t docId = 1; docId <= highest_doc_id; ++docId) { + _min_feature[docId] = buffer.readInt8(); + } + } + _interval_range_vector.ensure_size(highest_doc_id + 1); + // Interval ranges are only stored in version >= 2 + for (uint32_t docId = 1; docId <= highest_doc_id; ++docId) { + _interval_range_vector[docId] = version < 2 ? MAX_INTERVAL_RANGE : buffer.readInt16(); + } + _max_interval_range = version < 2 ? MAX_INTERVAL_RANGE : buffer.readInt16(); + _index->adjustDocIdLimit(highest_doc_id); + setNumDocs(highest_doc_id + 1); + setCommittedDocIdLimit(highest_doc_id + 1); + _index->onDeserializationCompleted(); + return true; +} + +bool +PredicateAttribute::addDoc(DocId &doc_id) +{ + doc_id = getNumDocs(); + incNumDocs(); + updateUncommittedDocIdLimit(doc_id); + _index->adjustDocIdLimit(doc_id); + _interval_range_vector.ensure_size(doc_id + 1); + _min_feature.ensure_size(doc_id + 1); + return true; +} +uint32_t +PredicateAttribute::clearDoc(DocId doc_id) +{ + _index->removeDocument(doc_id); + _min_feature[doc_id] = MIN_FEATURE_FILL; + _interval_range_vector[doc_id] = 0; + return 0; +} + +void +PredicateAttribute::updateValue(uint32_t doc_id, const PredicateFieldValue &value) +{ + const auto &inspector = value.getSlime().get(); + + _index->removeDocument(doc_id); + updateUncommittedDocIdLimit(doc_id); + + long root_type = inspector[Predicate::NODE_TYPE].asLong(); + if (root_type == Predicate::TYPE_FALSE) { // never match + _min_feature[doc_id] = MIN_FEATURE_FILL; + _interval_range_vector[doc_id] = 0; + return; + } else if (root_type == Predicate::TYPE_TRUE) { + _min_feature[doc_id] = 0; + _interval_range_vector[doc_id] = 0x1; + _index->indexEmptyDocument(doc_id); + return; + } + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(inspector, result, + _lower_bound, _upper_bound); + _index->indexDocument(doc_id, result); + assert(result.min_feature <= MAX_MIN_FEATURE); + uint8_t minFeature = static_cast(result.min_feature); + _min_feature[doc_id] = minFeature; + _interval_range_vector[doc_id] = result.interval_range; + _max_interval_range = std::max(result.interval_range, _max_interval_range); + assert(result.interval_range > 0); +} + +IMPLEMENT_IDENTIFIABLE_ABSTRACT(PredicateAttribute, AttributeVector); + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h new file mode 100644 index 00000000000..c5af5893305 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/predicate_attribute.h @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "not_implemented_attribute.h" +#include +#include + +namespace document { class PredicateFieldValue; } + +namespace search { + +struct AttributeVectorDocIdLimitProvider : public predicate::DocIdLimitProvider { + AttributeVectorDocIdLimitProvider(const AttributeVector &attribute_vector) : + _attribute_vector(attribute_vector) {} + + virtual uint32_t getDocIdLimit() const { return _attribute_vector.getNumDocs(); }; + virtual uint32_t getCommittedDocIdLimit() const { + return _attribute_vector.getCommittedDocIdLimit(); + } +private: + const AttributeVector &_attribute_vector; +}; + +/** + * Attribute that manages a predicate index. It is not a traditional + * attribute in that it doesn't store values for each document, but + * rather keeps an index for boolean search. Summaries are not fetched + * from the attribute, but rather using the summary store like a + * non-index field. + */ +class PredicateAttribute : public NotImplementedAttribute { +public: + typedef uint8_t MinFeature; + typedef std::pair MinFeatureHandle; + using IntervalRange = uint16_t; + using IntervalRangeVector = attribute::RcuVectorBase; + + DECLARE_IDENTIFIABLE_ABSTRACT(PredicateAttribute); + + PredicateAttribute(const vespalib::string &base_file_name, + const Config &config); + + virtual ~PredicateAttribute(); + + predicate::PredicateIndex &getIndex() { return *_index; } + + void onSave(IAttributeSaveTarget & saveTarget) override; + bool onLoad() override; + void onCommit() override; + void removeOldGenerations(generation_t firstUsed) override; + void onGenerationChange(generation_t generation) override; + void onUpdateStat() override; + bool addDoc(DocId &doc_id) override; + uint32_t clearDoc(DocId doc_id) override; + uint32_t getValueCount(DocId doc) const override; + + void updateValue(uint32_t doc_id, + const document::PredicateFieldValue &value); + + /** + * Will return a handle with a pointer to the min_features and how many there are. + * The pointer is only guaranteed to be valid for as long as you hold the attribute guard. + **/ + MinFeatureHandle getMinFeatureVector() const { + return MinFeatureHandle(&_min_feature[0], getNumDocs()); + } + + const IntervalRange * getIntervalRangeVector() const { + return &_interval_range_vector[0]; + } + + IntervalRange getMaxIntervalRange() const { + return _max_interval_range; + } + + void updateMaxIntervalRange(IntervalRange intervalRange) { + _max_interval_range = std::max(intervalRange, _max_interval_range); + } + + void populateIfNeeded() { + _index->populateIfNeeded(getNumDocs()); + } +private: + vespalib::string _base_file_name; + const AttributeVectorDocIdLimitProvider _limit_provider; + predicate::PredicateIndex::UP _index; + int64_t _lower_bound; + int64_t _upper_bound; + + typedef attribute::RcuVectorBase MinFeatureVector; + MinFeatureVector _min_feature; + + IntervalRangeVector _interval_range_vector; + IntervalRange _max_interval_range; +public: + static constexpr uint8_t MIN_FEATURE_FILL = 255; + static constexpr uint32_t PREDICATE_ATTRIBUTE_VERSION = 2; + + virtual uint32_t getVersion() const override; + +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp new file mode 100644 index 00000000000..fe987a35d60 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.cpp @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singleenumattribute.h" +#include "singleenumattribute.hpp" + +namespace search +{ + +using attribute::Config; + +SingleValueEnumAttributeBase:: +SingleValueEnumAttributeBase(const Config & c, GenerationHolder &genHolder) + : _enumIndices(c.getGrowStrategy().getDocsInitialCapacity(), + c.getGrowStrategy().getDocsGrowPercent(), + c.getGrowStrategy().getDocsGrowDelta(), + genHolder) +{ +} + + +SingleValueEnumAttributeBase::~SingleValueEnumAttributeBase() +{ +} + + +AttributeVector::DocId +SingleValueEnumAttributeBase::addDoc(bool &incGeneration) +{ + incGeneration = _enumIndices.isFull(); + _enumIndices.push_back(EnumStoreBase::Index()); + return _enumIndices.size() - 1; +} + + +SingleValueEnumAttributeBase::EnumIndexCopyVector +SingleValueEnumAttributeBase::getIndicesCopy(uint32_t size) const +{ + assert(size <= _enumIndices.size()); + return EnumIndexCopyVector(&_enumIndices[0], &_enumIndices[0] + size); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h new file mode 100644 index 00000000000..ad0cc2a98a3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.h @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +/* + * Implementation of single value enum attribute that uses an underlying enum store + * to store unique values. + * + * B: EnumAttribute + */ + +class SingleValueEnumAttributeBase +{ +protected: + typedef EnumStoreBase::Index EnumIndex; + typedef search::attribute::RcuVectorBase EnumIndexVector; + typedef AttributeVector::DocId DocId; + typedef AttributeVector::EnumHandle EnumHandle; + typedef vespalib::GenerationHolder GenerationHolder; + +public: + using EnumIndexCopyVector = vespalib::Array; + + EnumStoreBase::Index getEnumIndex(DocId docId) const { return _enumIndices[docId]; } + EnumHandle getE(DocId doc) const { return _enumIndices[doc].ref(); } +protected: + SingleValueEnumAttributeBase(const attribute::Config & c, + GenerationHolder &genHolder); + ~SingleValueEnumAttributeBase(); + AttributeVector::DocId addDoc(bool & incGeneration); + + EnumIndexVector _enumIndices; + + EnumIndexCopyVector + getIndicesCopy(uint32_t size) const; +}; + +template +class SingleValueEnumAttribute : public B, public SingleValueEnumAttributeBase +{ +protected: + typedef typename B::DocId DocId; + typedef typename B::WeightedEnum WeightedEnum; + typedef typename B::Change Change; + typedef typename B::ChangeVector ChangeVector; + typedef typename B::ChangeVector::const_iterator ChangeVectorIterator; + typedef typename B::generation_t generation_t; + typedef typename B::EnumModifier EnumModifier; + typedef typename B::ValueModifier ValueModifier; + typedef typename B::EnumStore EnumStore; + typedef typename B::LoadedVector LoadedVector; + typedef typename B::UniqueSet UniqueSet; + typedef AttributeVector::ReaderBase ReaderBase; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; + using B::getGenerationHolder; + +private: + void considerUpdateAttributeChange(const Change & c, UniqueSet & newUniques); + void applyUpdateValueChange(const Change & c, EnumStoreBase::IndexVector & unused); + +protected: + // from EnumAttribute + virtual void considerAttributeChange(const Change & c, UniqueSet & newUniques); + virtual void reEnumerate(); + + // implemented by single value numeric enum attribute. + virtual void considerUpdateAttributeChange(const Change & c) { (void) c; } + virtual void considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques) { (void) c; (void) newUniques; } + + // update enum index vector with new values according to change vector + virtual void applyValueChanges(EnumStoreBase::IndexVector & unused); + virtual void applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused) { + (void) c; (void) unused; + } + void updateEnumRefCounts(const Change & c, EnumIndex newIdx, EnumIndex oldIdx, EnumStoreBase::IndexVector & unused); + + virtual void + freezeEnumDictionary() + { + this->getEnumStore().freezeTree(); + } + + virtual void mergeMemoryStats(MemoryUsage & total) { (void) total; } + + virtual void fillValues(LoadedVector & loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumStoreBase::IndexVector &eidxs, + LoadedEnumAttributeVector &loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumStoreBase::IndexVector &eidxs, + EnumStoreBase::EnumVector &enumHist); + + /** + * Called when a new document has been added. + * + * Can be overridden by subclasses that need to resize structures + * as a result of this. + * + * Should return true if underlying structures were resized. + **/ + virtual bool onAddDoc(DocId doc) { (void) doc; return false; } + +public: + SingleValueEnumAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & cfg); + virtual ~SingleValueEnumAttribute(); + + virtual bool addDoc(DocId & doc); + virtual uint32_t getValueCount(DocId doc) const; + virtual void onCommit(); + virtual void onUpdateStat(); + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + virtual EnumHandle getEnum(DocId doc) const { + return getE(doc); + } + virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { + if (sz > 0) { + e[0] = getE(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { + if (sz > 0) { + e[0] = WeightedEnum(getE(doc), 1); + } + return 1; + } + + virtual void + clearDocs(DocId lidLow, DocId lidLimit); + + virtual void + onShrinkLidSpace(); + + virtual std::unique_ptr onInitSave() override; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp new file mode 100644 index 00000000000..df48fe949fe --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -0,0 +1,310 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "ipostinglistattributebase.h" +#include "singleenumattributesaver.h" + +namespace search { + +template +SingleValueEnumAttribute:: +SingleValueEnumAttribute(const vespalib::string &baseFileName, + const AttributeVector::Config &cfg) + : B(baseFileName, cfg), + SingleValueEnumAttributeBase(cfg, getGenerationHolder()) +{ +} + +template +SingleValueEnumAttribute::~SingleValueEnumAttribute() +{ +} + +template +bool +SingleValueEnumAttribute::addDoc(DocId & doc) +{ + bool incGen = false; + doc = SingleValueEnumAttributeBase::addDoc(incGen); + if (doc > 0u) { + // Make sure that a valid value(magic default) is referenced, + // even between addDoc and commit(). + if (_enumIndices[0].valid()) { + _enumIndices[doc] = _enumIndices[0]; + this->_enumStore.incRefCount(_enumIndices[0]); + } + } + this->incNumDocs(); + this->updateUncommittedDocIdLimit(doc); + incGen |= onAddDoc(doc); + if (incGen) { + this->incGeneration(); + } else + this->removeAllOldGenerations(); + return true; +} + +template +uint32_t +SingleValueEnumAttribute::getValueCount(DocId doc) const +{ + if (doc >= this->getNumDocs()) { + return 0; + } + return 1; +} + +template +void +SingleValueEnumAttribute::onCommit() +{ + this->checkSetMaxValueCount(1); + + // update enum store + EnumStoreBase::IndexVector possiblyUnused; + this->insertNewUniqueValues(possiblyUnused); + // apply updates + applyValueChanges(possiblyUnused); + this->_changes.clear(); + this->_enumStore.freeUnusedEnums(possiblyUnused); + freezeEnumDictionary(); + this->setEnumMax(this->_enumStore.getLastEnum()); + std::atomic_thread_fence(std::memory_order_release); + this->removeAllOldGenerations(); +} + +template +void +SingleValueEnumAttribute::onUpdateStat() +{ + // update statistics + MemoryUsage total = _enumIndices.getMemoryUsage(); + total.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); + total.merge(this->_enumStore.getMemoryUsage()); + total.merge(this->_enumStore.getTreeMemoryUsage()); + mergeMemoryStats(total); + this->updateStatistics(_enumIndices.size(), this->_enumStore.getNumUniques(), total.allocatedBytes(), + total.usedBytes(), total.deadBytes(), total.allocatedBytesOnHold()); +} + +template +void +SingleValueEnumAttribute::considerUpdateAttributeChange(const Change & c, UniqueSet & newUniques) +{ + EnumIndex idx; + if (!this->_enumStore.findIndex(c._data.raw(), idx)) { + newUniques.insert(c._data); + } + considerUpdateAttributeChange(c); // for numeric +} + +template +void +SingleValueEnumAttribute::considerAttributeChange(const Change & c, UniqueSet & newUniques) +{ + if (c._type == ChangeBase::UPDATE) { + considerUpdateAttributeChange(c, newUniques); + } else if (c._type >= ChangeBase::ADD && c._type <= ChangeBase::DIV) { + considerArithmeticAttributeChange(c, newUniques); // for numeric + } else if (c._type == ChangeBase::CLEARDOC) { + this->_defaultValue._doc = c._doc; + considerUpdateAttributeChange(this->_defaultValue, newUniques); + } +} + +template +void +SingleValueEnumAttribute::reEnumerate() +{ + EnumModifier enumGuard(this->getEnumModifier()); + for (uint32_t i = 0; i < _enumIndices.size(); ++i) { + EnumIndex oldIdx = _enumIndices[i]; + if (oldIdx.valid()) { + EnumIndex newIdx; + this->_enumStore.getCurrentIndex(oldIdx, newIdx); + std::atomic_thread_fence(std::memory_order_release); + _enumIndices[i] = newIdx; + } + } +} + +template +void +SingleValueEnumAttribute::applyUpdateValueChange(const Change & c, EnumStoreBase::IndexVector & unused) +{ + EnumIndex oldIdx = _enumIndices[c._doc]; + EnumIndex newIdx; + this->_enumStore.findIndex(c._data.raw(), newIdx); + updateEnumRefCounts(c, newIdx, oldIdx, unused); +} + +template +void +SingleValueEnumAttribute::applyValueChanges(EnumStoreBase::IndexVector & unused) +{ + ValueModifier valueGuard(this->getValueModifier()); + for (ChangeVectorIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) { + if (iter->_type == ChangeBase::UPDATE) { + applyUpdateValueChange(*iter, unused); + } else if (iter->_type >= ChangeBase::ADD && iter->_type <= ChangeBase::DIV) { + applyArithmeticValueChange(*iter, unused); + } else if (iter->_type == ChangeBase::CLEARDOC) { + this->_defaultValue._doc = iter->_doc; + applyUpdateValueChange(this->_defaultValue, unused); + } + } +} + +template +void +SingleValueEnumAttribute::updateEnumRefCounts(const Change & c, EnumIndex newIdx, EnumIndex oldIdx, + EnumStoreBase::IndexVector & unused) +{ + // increase and decrease refcount + this->_enumStore.incRefCount(newIdx); + + _enumIndices[c._doc] = newIdx; + + if (oldIdx.valid()) { + this->_enumStore.decRefCount(oldIdx); + if (this->_enumStore.getRefCount(oldIdx) == 0) { + unused.push_back(oldIdx); + } + } +} + +template +void +SingleValueEnumAttribute::fillValues(LoadedVector & loaded) +{ + uint32_t numDocs = this->getNumDocs(); + getGenerationHolder().clearHoldLists(); + _enumIndices.reset(); + _enumIndices.unsafe_reserve(numDocs); + for (DocId doc = 0; doc < numDocs; ++doc, loaded.next()) { + _enumIndices.push_back(loaded.read().getEidx()); + } +} + + +template +void +SingleValueEnumAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumStoreBase::IndexVector &eidxs, + LoadedEnumAttributeVector &loaded) +{ + attribute::SaveLoadedEnum saver(loaded); + _enumIndices.fillMapped(getGenerationHolder(), + attrReader, + numValues, + &eidxs[0], + eidxs.size(), + saver, + this->getNumDocs()); +} + + +template +void +SingleValueEnumAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumStoreBase::IndexVector &eidxs, + EnumStoreBase::EnumVector &enumHist) +{ + attribute::SaveEnumHist saver(enumHist); + _enumIndices.fillMapped(getGenerationHolder(), + attrReader, + numValues, + &eidxs[0], + eidxs.size(), + saver, + this->getNumDocs()); +} + + + +template +void +SingleValueEnumAttribute::removeOldGenerations(generation_t firstUsed) +{ + this->_enumStore.trimHoldLists(firstUsed); + getGenerationHolder().trimHoldLists(firstUsed); +} + +template +void +SingleValueEnumAttribute::onGenerationChange(generation_t generation) +{ + /* + * Freeze tree before generation is increased in attribute vector + * but after generation is increased in tree. This ensures that + * unlocked readers accessing a frozen tree will access a + * sufficiently new frozen tree. + */ + freezeEnumDictionary(); + getGenerationHolder().transferHoldLists(generation - 1); + this->_enumStore.transferHoldLists(generation - 1); +} + + +template +void +SingleValueEnumAttribute::clearDocs(DocId lidLow, DocId lidLimit) +{ + EnumHandle e; + bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e)); + if (!findDefaultEnumRes) { + e = EnumHandle(); + } + assert(lidLow <= lidLimit); + assert(lidLimit <= this->getNumDocs()); + for (DocId lid = lidLow; lid < lidLimit; ++lid) { + if (_enumIndices[lid] != e) { + this->clearDoc(lid); + } + } +} + + +template +void +SingleValueEnumAttribute::onShrinkLidSpace(void) +{ + EnumHandle e; + bool findDefaultEnumRes(this->findEnum(this->getDefaultEnumTypeValue(), e)); + assert(findDefaultEnumRes); + uint32_t committedDocIdLimit = this->getCommittedDocIdLimit(); + assert(_enumIndices.size() >= committedDocIdLimit); + attribute::IPostingListAttributeBase *pab = + this->getIPostingListAttributeBase(); + if (pab != NULL) { + pab->clearPostings(e, committedDocIdLimit, _enumIndices.size()); + } + _enumIndices.shrink(committedDocIdLimit); + this->setNumDocs(committedDocIdLimit); +} + +template +std::unique_ptr +SingleValueEnumAttribute::onInitSave() +{ + { + EnumModifier enumGuard(this->getEnumModifier()); + this->_enumStore.reEnumerate(); + } + vespalib::GenerationHandler::Guard guard(this->getGenerationHandler(). + takeGuard()); + return std::make_unique + (std::move(guard), + this->createSaveTargetConfig(), + getIndicesCopy(this->getCommittedDocIdLimit()), + this->_enumStore); +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp new file mode 100644 index 00000000000..d8185a0b614 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singleenumattributesaver.h" +#include + +using vespalib::GenerationHandler; + +namespace search { + +SingleValueEnumAttributeSaver:: +SingleValueEnumAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + EnumIndexCopyVector &&indices, + const EnumStoreBase &enumStore) + : AttributeSaver(std::move(guard), cfg), + _indices(std::move(indices)), + _enumSaver(enumStore, false) +{ +} + + +SingleValueEnumAttributeSaver::~SingleValueEnumAttributeSaver() +{ +} + + +bool +SingleValueEnumAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + _enumSaver.writeUdat(saveTarget); + const EnumStoreBase &enumStore = _enumSaver.getEnumStore(); + std::unique_ptr datWriter(saveTarget.datWriter(). + allocBufferWriter()); + if (saveTarget.getEnumerated()) { + enumStore.writeEnumValues(*datWriter, + &_indices[0], _indices.size()); + } else { + enumStore.writeValues(*datWriter, + &_indices[0], _indices.size()); + } + datWriter->flush(); + _enumSaver.enableReEnumerate(); + return true; +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h new file mode 100644 index 00000000000..7e7de3ef84e --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattributesaver.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributesaver.h" +#include "iattributesavetarget.h" +#include "singleenumattribute.h" +#include "enumattributesaver.h" + +namespace search { + +/* + * Class for saving a single value enumerated attribute. + */ +class SingleValueEnumAttributeSaver : public AttributeSaver +{ +private: + using EnumIndexCopyVector = + SingleValueEnumAttributeBase::EnumIndexCopyVector; + EnumIndexCopyVector _indices; + EnumAttributeSaver _enumSaver; + + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; +public: + SingleValueEnumAttributeSaver(vespalib::GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + EnumIndexCopyVector &&indices, + const EnumStoreBase &enumStore); + + virtual ~SingleValueEnumAttributeSaver(); +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp new file mode 100644 index 00000000000..b08931f36fc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlenumericattribute.h" +#include "singlenumericattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.singlenumericattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h new file mode 100644 index 00000000000..9cc2a90da32 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.h @@ -0,0 +1,235 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +template +class SingleValueNumericAttribute : public B +{ +private: + typedef typename B::BaseType T; + typedef typename B::DocId DocId; + typedef typename B::EnumHandle EnumHandle; + typedef typename B::largeint_t largeint_t; + typedef typename B::Weighted Weighted; + typedef typename B::WeightedInt WeightedInt; + typedef typename B::WeightedFloat WeightedFloat; + typedef typename B::WeightedEnum WeightedEnum; + typedef typename B::generation_t generation_t; + using B::getGenerationHolder; + + typedef attribute::RcuVectorBase DataVector; + DataVector _data; + + virtual T getFromEnum(EnumHandle e) const { + (void) e; + return T(); + } + + /* + * Specialization of SearchContext + */ + template + class SingleSearchContext : public M, public AttributeVector::SearchContext + { + private: + const T * _data; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + virtual bool valid() const { return M::isValid(); } + + public: + SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + M(*qTerm, true), + AttributeVector::SearchContext(toBeSearched), + _data(&static_cast &>(toBeSearched)._data[0]) + { + } + + bool + cmp(DocId docId, int32_t & weight) const + { + const T v = _data[docId]; + weight = 1; + return this->match(v); + } + + bool + cmp(DocId docId) const + { + const T v = _data[docId]; + return this->match(v); + } + + virtual Int64Range getAsIntegerTerm() const { + return M::getRange(); + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict >(*this, matchData) + : new FilterAttributeIteratorT >(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict >(*this, matchData) + : new AttributeIteratorT >(*this, matchData)); + } + }; + + +protected: + virtual bool findEnum(T value, EnumHandle & e) const { + (void) value; (void) e; + return false; + } + +public: + SingleValueNumericAttribute(const vespalib::string & baseFileName, + const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector:: + BasicType::fromType(T()), + attribute::CollectionType::SINGLE)); + + + virtual + ~SingleValueNumericAttribute(void); + + virtual uint32_t getValueCount(DocId doc) const { + if (doc >= B::getNumDocs()) { + return 0; + } + return 1; + } + virtual void onCommit(); + virtual void onUpdateStat(); + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + virtual bool addDoc(DocId & doc) { + bool incGen = _data.isFull(); + _data.push_back(attribute::getUndefined()); + std::atomic_thread_fence(std::memory_order_release); + B::incNumDocs(); + doc = B::getNumDocs() - 1; + this->updateUncommittedDocIdLimit(doc); + if (incGen) { + this->incGeneration(); + } else + this->removeAllOldGenerations(); + return true; + } + virtual bool onLoad(); + + bool + onLoadEnumerated(typename B::ReaderBase &attrReader); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + void set(DocId doc, T v) { + _data[doc] = v; + } + + T getFast(DocId doc) const { + return _data[doc]; + } + + //------------------------------------------------------------------------- + // new read api + //------------------------------------------------------------------------- + virtual T get(DocId doc) const { + return getFast(doc); + } + virtual largeint_t getInt(DocId doc) const { + return static_cast(getFast(doc)); + } + virtual void getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const { + (void) v; + (void) e; + (void) sz; + } + virtual double getFloat(DocId doc) const { + return static_cast(_data[doc]); + } + virtual uint32_t getEnum(DocId doc) const { + (void) doc; + return std::numeric_limits::max(); // does not have enum + } + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const { + (void) sz; + v[0] = _data[doc]; + return 1; + } + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { + (void) sz; + v[0] = static_cast(_data[doc]); + return 1; + } + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { + (void) sz; + v[0] = static_cast(_data[doc]); + return 1; + } + virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { + (void) sz; + e[0] = getEnum(doc); + return 1; + } + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { + (void) doc; (void) v; (void) sz; + return 0; + } + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { + (void) sz; + v[0] = WeightedInt(static_cast(_data[doc])); + return 1; + } + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { + (void) sz; + v[0] = WeightedFloat(static_cast(_data[doc])); + return 1; + } + virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { + (void) doc; (void) e; (void) sz; + return 0; + } + + virtual void + clearDocs(DocId lidLow, DocId lidLimit); + + virtual void + onShrinkLidSpace(); + + virtual std::unique_ptr onInitSave() override; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp new file mode 100644 index 00000000000..5c04375c31f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattribute.hpp @@ -0,0 +1,188 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include "singlenumericattributesaver.h" + +namespace search { + +template +SingleValueNumericAttribute:: +SingleValueNumericAttribute(const vespalib::string & baseFileName, const AttributeVector::Config & c) : + B(baseFileName, c), + _data(c.getGrowStrategy().getDocsInitialCapacity(), + c.getGrowStrategy().getDocsGrowPercent(), + c.getGrowStrategy().getDocsGrowDelta(), + getGenerationHolder()) +{ +} + + +template +SingleValueNumericAttribute::~SingleValueNumericAttribute(void) +{ + getGenerationHolder().clearHoldLists(); +} + + +template +void +SingleValueNumericAttribute::onCommit() +{ + this->checkSetMaxValueCount(1); + + { + // apply updates + typename B::ValueModifier valueGuard(this->getValueModifier()); + for (const auto & change : this->_changes) { + if (change._type == ChangeBase::UPDATE) { + std::atomic_thread_fence(std::memory_order_release); + _data[change._doc] = change._data; + } else if (change._type >= ChangeBase::ADD && change._type <= ChangeBase::DIV) { + std::atomic_thread_fence(std::memory_order_release); + _data[change._doc] = this->applyArithmetic(_data[change._doc], change); + } else if (change._type == ChangeBase::CLEARDOC) { + std::atomic_thread_fence(std::memory_order_release); + _data[change._doc] = this->_defaultValue._data; + } + } + } + + std::atomic_thread_fence(std::memory_order_release); + this->removeAllOldGenerations(); + + this->_changes.clear(); +} + +template +void +SingleValueNumericAttribute::onUpdateStat() +{ + MemoryUsage usage = _data.getMemoryUsage(); + usage.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); + this->updateStatistics(_data.size(), _data.size(), + usage.allocatedBytes(), usage.usedBytes(), usage.deadBytes(), usage.allocatedBytesOnHold()); +} + +template +void +SingleValueNumericAttribute::removeOldGenerations(generation_t firstUsed) +{ + getGenerationHolder().trimHoldLists(firstUsed); +} + +template +void +SingleValueNumericAttribute::onGenerationChange(generation_t generation) +{ + getGenerationHolder().transferHoldLists(generation - 1); +} + +template +bool +SingleValueNumericAttribute::onLoadEnumerated(typename B::ReaderBase & + attrReader) +{ + uint64_t numValues = attrReader.getEnumCount(); + uint32_t numDocs = numValues; + + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + + FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT()); + const T *map = reinterpret_cast(udatBuffer->buffer()); + assert((udatBuffer->size() % sizeof(T)) == 0); + size_t mapSize = udatBuffer->size() / sizeof(T); + attribute::NoSaveLoadedEnum saver; + _data.fillMapped(getGenerationHolder(), + attrReader, + numValues, + map, + mapSize, + saver, + numDocs); + return true; +} + + +template +bool +SingleValueNumericAttribute::onLoad() +{ + typename B::template PrimitiveReader attrReader(*this); + bool ok(attrReader.getHasLoadData()); + + if (!ok) + return false; + + this->setCreateSerialNum(attrReader.getCreateSerialNum()); + + if (attrReader.getEnumerated()) + return onLoadEnumerated(attrReader); + + const size_t sz(attrReader.getDataCount()); + getGenerationHolder().clearHoldLists(); + _data.reset(); + _data.unsafe_reserve(sz); + for (uint32_t i = 0; i < sz; ++i) { + _data.push_back(attrReader.getNextData()); + } + + B::setNumDocs(sz); + B::setCommittedDocIdLimit(sz); + + return true; +} + +template +AttributeVector::SearchContext::UP +SingleValueNumericAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + QueryTermSimple::RangeResult res = qTerm->getRange(); + if (res.isEqual()) { + return AttributeVector::SearchContext::UP(new SingleSearchContext< NumericAttribute::Equal >(std::move(qTerm), *this)); + } else { + return AttributeVector::SearchContext::UP(new SingleSearchContext< NumericAttribute::Range >(std::move(qTerm), *this)); + } +} + + +template +void +SingleValueNumericAttribute::clearDocs(DocId lidLow, DocId lidLimit) +{ + assert(lidLow <= lidLimit); + assert(lidLimit <= this->getNumDocs()); + for (DocId lid = lidLow; lid < lidLimit; ++lid) { + if (!attribute::isUndefined(_data[lid])) { + this->clearDoc(lid); + } + } +} + +template +void +SingleValueNumericAttribute::onShrinkLidSpace() +{ + uint32_t committedDocIdLimit = this->getCommittedDocIdLimit(); + assert(_data.size() >= committedDocIdLimit); + _data.shrink(committedDocIdLimit); + this->setNumDocs(committedDocIdLimit); +} + +template +std::unique_ptr +SingleValueNumericAttribute::onInitSave() +{ + const uint32_t numDocs(this->getCommittedDocIdLimit()); + assert(numDocs <= _data.size()); + return std::make_unique + (this->createSaveTargetConfig(), &_data[0], numDocs * sizeof(T)); +} + + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp new file mode 100644 index 00000000000..3320dd977d2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlenumericattributesaver.h" + +using vespalib::GenerationHandler; +using search::IAttributeSaveTarget; + +namespace search { + +namespace +{ + +const uint32_t MIN_ALIGNMENT = 4096; + +} + + +SingleValueNumericAttributeSaver:: +SingleValueNumericAttributeSaver(const IAttributeSaveTarget::Config &cfg, + const void *data, size_t size) + : AttributeSaver(vespalib::GenerationHandler::Guard(), cfg), + _buf() +{ + _buf = std::make_unique(size, MIN_ALIGNMENT); + assert(_buf->getFreeLen() >= size); + if (size > 0) { + memcpy(_buf->getFree(), data, size); + _buf->moveFreeToData(size); + } + assert(_buf->getDataLen() == size); +} + + +SingleValueNumericAttributeSaver::~SingleValueNumericAttributeSaver() +{ +} + + +bool +SingleValueNumericAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + saveTarget.datWriter().writeBuf(std::move(_buf)); + return true; +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h new file mode 100644 index 00000000000..585e5c49dab --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericattributesaver.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributesaver.h" +#include "iattributefilewriter.h" + +namespace search { + +/* + * Class for saving a plain attribute (i.e. single value numeric + * atttribute). + */ +class SingleValueNumericAttributeSaver : public AttributeSaver +{ +public: + using Buffer = IAttributeFileWriter::Buffer; + +private: + Buffer _buf; + using BufferBuf = IAttributeFileWriter::BufferBuf; + + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; +public: + SingleValueNumericAttributeSaver(const IAttributeSaveTarget::Config &cfg, + const void *data, size_t size); + + virtual ~SingleValueNumericAttributeSaver(); +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp new file mode 100644 index 00000000000..df86159833d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlenumericenumattribute.h" +#include "singlenumericenumattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.singlenumericenumattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h new file mode 100644 index 00000000000..3793431f75b --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.h @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +/* + * Implementation of single value numeric enum attribute that uses an underlying enum store + * to store unique numeric values. + * + * B: EnumAttribute + */ +template +class SingleValueNumericEnumAttribute : public SingleValueEnumAttribute +{ +protected: + typedef typename B::BaseClass::BaseType T; + typedef typename B::BaseClass::Change Change; + typedef typename B::BaseClass::DocId DocId; + typedef typename B::BaseClass::EnumHandle EnumHandle; + typedef typename B::BaseClass::largeint_t largeint_t; + typedef typename B::BaseClass::Weighted Weighted; + typedef typename B::BaseClass::WeightedInt WeightedInt; + typedef typename B::BaseClass::WeightedFloat WeightedFloat; + typedef typename B::BaseClass::generation_t generation_t; + typedef typename B::BaseClass::LoadedNumericValueT LoadedNumericValueT; + typedef typename B::BaseClass::LoadedVector LoadedVector; + typedef SequentialReadModifyWriteVector LoadedVectorR; + + typedef typename SingleValueEnumAttribute::EnumStore EnumStore; + typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex; + typedef typename SingleValueEnumAttribute::UniqueSet UniqueSet; + typedef EnumStoreBase::IndexVector EnumIndexVector; + typedef EnumStoreBase::EnumVector EnumVector; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef attribute::LoadedEnumAttribute LoadedEnumAttribute; + +private: + // used to make sure several arithmetic operations on the same document in a single commit works + std::map _currDocValues; + +protected: + + // from SingleValueEnumAttribute + virtual void considerUpdateAttributeChange(const Change & c); + virtual void considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques); + virtual void applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused); + + /* + * Specialization of SearchContext + */ + class SingleSearchContext : public NumericAttribute::Range, public AttributeVector::SearchContext + { + protected: + const SingleValueNumericEnumAttribute & _toBeSearched; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + virtual bool valid() const { return this->isValid(); } + + public: + SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) : + NumericAttribute::Range(*qTerm, true), + AttributeVector::SearchContext(toBeSearched), + _toBeSearched(static_cast &>(toBeSearched)) + { + } + + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + bool + cmp(DocId docId, int32_t & weight) const + { + T v = _toBeSearched._enumStore.getValue( + _toBeSearched.getEnumIndex(docId)); + weight = 1; + return this->match(v); + } + + bool + cmp(DocId docId) const + { + T v = _toBeSearched._enumStore.getValue( + _toBeSearched.getEnumIndex(docId)); + return this->match(v); + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + +public: + SingleValueNumericEnumAttribute(const vespalib::string & baseFileName, + const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::fromType(T()), + attribute::CollectionType::SINGLE)); + + virtual void onCommit(); + virtual bool onLoad(); + + bool + onLoadEnumerated(typename B::ReaderBase &attrReader); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + //------------------------------------------------------------------------- + // Attribute read API + //------------------------------------------------------------------------- + virtual T get(DocId doc) const { + return this->_enumStore.getValue(this->_enumIndices[doc]); + } + virtual largeint_t getInt(DocId doc) const { + return static_cast(get(doc)); + } + virtual double getFloat(DocId doc) const { + return static_cast(get(doc)); + } + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const { + if (sz > 0) { + v[0] = get(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { + if (sz > 0) { + v[0] = getInt(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { + if (sz > 0) { + v[0] = getFloat(doc); + } + return 1; + } + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { + if (sz > 0) { + v[0] = Weighted(get(doc)); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedInt(getInt(doc)); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedFloat(getFloat(doc)); + } + return 1; + } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp new file mode 100644 index 00000000000..f4447e7c6b7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericenumattribute.hpp @@ -0,0 +1,172 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +template +void +SingleValueNumericEnumAttribute::considerUpdateAttributeChange(const Change & c) +{ + _currDocValues[c._doc] = c._data.get(); +} + +template +void +SingleValueNumericEnumAttribute::considerArithmeticAttributeChange(const Change & c, UniqueSet & newUniques) +{ + T oldValue; + typename std::map::const_iterator iter = _currDocValues.find(c._doc); + if (iter != _currDocValues.end()) { + oldValue = iter->second; + } else { + oldValue = get(c._doc); + } + + T newValue = this->applyArithmetic(oldValue, c); + + EnumIndex idx; + if (!this->_enumStore.findIndex(newValue, idx)) { + newUniques.insert(newValue); + } + + _currDocValues[c._doc] = newValue; +} + +template +void +SingleValueNumericEnumAttribute::applyArithmeticValueChange(const Change & c, EnumStoreBase::IndexVector & unused) +{ + EnumIndex oldIdx = this->_enumIndices[c._doc]; + EnumIndex newIdx; + T newValue = this->applyArithmetic(get(c._doc), c); + this->_enumStore.findIndex(newValue, newIdx); + + this->updateEnumRefCounts(c, newIdx, oldIdx, unused); +} + +template +SingleValueNumericEnumAttribute:: +SingleValueNumericEnumAttribute(const vespalib::string & baseFileName, + const AttributeVector::Config & c) + : SingleValueEnumAttribute(baseFileName, c), + _currDocValues() +{ +} + + +template +void +SingleValueNumericEnumAttribute::onCommit() +{ + SingleValueEnumAttribute::onCommit(); + _currDocValues.clear(); +} + + +template +bool +SingleValueNumericEnumAttribute::onLoadEnumerated(typename B::ReaderBase & + attrReader) +{ + FileUtil::LoadedBuffer::UP udatBuffer(this->loadUDAT()); + + uint64_t numValues = attrReader.getEnumCount(); + uint32_t numDocs = numValues; + + EnumIndexVector eidxs; + this->fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs); + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + LoadedEnumAttributeVector loaded; + EnumVector enumHist; + if (this->hasPostings()) { + loaded.reserve(numValues); + this->fillEnumIdx(attrReader, + numValues, + eidxs, + loaded); + } else { + EnumVector(eidxs.size(), 0).swap(enumHist); + this->fillEnumIdx(attrReader, + numValues, + eidxs, + enumHist); + } + EnumIndexVector().swap(eidxs); + if (this->hasPostings()) { + if (numDocs > 0) { + this->onAddDoc(numDocs - 1); + } + attribute::sortLoadedByEnum(loaded); + this->fillPostingsFixupEnum(loaded); + } else { + this->fixupEnumRefCounts(enumHist); + } + return true; +} + + +template +bool +SingleValueNumericEnumAttribute::onLoad() +{ + typename B::template PrimitiveReader attrReader(*this); + bool ok(attrReader.getHasLoadData()); + + if (!ok) + return false; + + this->setCreateSerialNum(attrReader.getCreateSerialNum()); + + if (attrReader.getEnumerated()) + return onLoadEnumerated(attrReader); + + const uint32_t numDocs(attrReader.getDataCount()); + LoadedVectorR loaded(numDocs); + + this->setNumDocs(numDocs); + this->setCommittedDocIdLimit(numDocs); + if (numDocs > 0) { + this->onAddDoc(numDocs - 1); + } + for (uint32_t docIdx = 0; docIdx < numDocs; ++docIdx) { + loaded[docIdx]._docId = docIdx; + loaded[docIdx]._idx = 0; + loaded[docIdx].setValue(attrReader.getNextData()); + } + + attribute::sortLoadedByValue(loaded); + this->fillPostings(loaded); + loaded.rewind(); + this->fillEnum(loaded); + attribute::sortLoadedByDocId(loaded); + loaded.rewind(); + this->fillValues(loaded); + + return true; +} + + +template +AttributeVector::SearchContext::UP +SingleValueNumericEnumAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + QueryTermSimple::RangeResult res = qTerm->getRange(); + if (res.isEqual()) { + return AttributeVector::SearchContext::UP (new SingleSearchContext(std::move(qTerm), *this)); + } else { + return AttributeVector::SearchContext::UP (new SingleSearchContext(std::move(qTerm), *this)); + } +} + + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp new file mode 100644 index 00000000000..3eb6f61101d --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlenumericpostattribute.h" +#include "singlenumericpostattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.singlenumericpostattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h new file mode 100644 index 00000000000..55072b62d5a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "postinglistsearchcontext.h" + +namespace search { + +/* + * Implementation of single value numeric attribute that in addition to enum store + * uses an underlying posting list to provide faster search. + * + * B: EnumAttribute + */ +template +class SingleValueNumericPostingAttribute + : public SingleValueNumericEnumAttribute, + protected PostingListAttributeSubBase +{ +private: + friend class PostingListAttributeTest; + template + friend class attribute::PostingSearchContext; // getEnumStore() + typedef SingleValueNumericPostingAttribute SelfType; + typedef typename B::LoadedVector LoadedVector; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef PostingListAttributeSubBase PostingParent; +public: + typedef typename SingleValueNumericEnumAttribute::EnumStore EnumStore; +private: + typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex; + typedef typename SingleValueNumericEnumAttribute::generation_t generation_t; +public: + typedef typename SingleValueNumericEnumAttribute::T T; +private: + + typedef typename SingleValueNumericEnumAttribute::SingleSearchContext SingleSearchContext; + typedef SingleSearchContext SingleNumericSearchContext; + typedef attribute::NumericPostingSearchContext + SinglePostingSearchContext; + + typedef typename PostingParent::PostingMap PostingMap; + typedef typename B::BaseClass::Change Change; + typedef typename B::BaseClass::ChangeVector ChangeVector; + typedef typename B::BaseClass::ChangeVector::const_iterator ChangeVectorIterator; + typedef typename B::BaseClass::DocId DocId; + typedef typename B::BaseClass::ValueModifier ValueModifier; + +public: + typedef EnumPostingTree Dictionary; +private: + typedef typename Dictionary::Iterator DictionaryIterator; + typedef typename Dictionary::ConstIterator DictionaryConstIterator; + typedef typename EnumStore::ComparatorType ComparatorType; + using PostingParent::_postingList; + using PostingParent::clearAllPostings; + using PostingParent::handleFillPostings; + using PostingParent::fillPostingsFixupEnumBase; + using PostingParent::forwardedOnAddDoc; + + virtual void freezeEnumDictionary(); + virtual void mergeMemoryStats(MemoryUsage & total); + void applyUpdateValueChange(const Change & c, + EnumStore & enumStore, + std::map & currEnumIndices); + void + makePostingChange(const EnumStoreComparator *cmp, + const std::map &currEnumIndices, + PostingMap &changePost); + + virtual void applyValueChanges(EnumStoreBase::IndexVector & unused); + +public: + SingleValueNumericPostingAttribute(const vespalib::string & name, const AttributeVector::Config & cfg); + virtual ~SingleValueNumericPostingAttribute(); + + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + virtual bool + onAddDoc(DocId doc) + { + return forwardedOnAddDoc(doc, + this->_enumIndices.size(), + this->_enumIndices.capacity()); + } + + virtual void + fillPostings(LoadedVector & loaded) + { + handleFillPostings(loaded); + } + + virtual attribute::IPostingListAttributeBase * + getIPostingListAttributeBase(void) + { + return this; + } + + virtual void + fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) + { + fillPostingsFixupEnumBase(loaded); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp new file mode 100644 index 00000000000..ebfdbe9b066 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.hpp @@ -0,0 +1,153 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +template +SingleValueNumericPostingAttribute::~SingleValueNumericPostingAttribute() +{ + this->disableFreeLists(); + this->disableElemHoldList(); + clearAllPostings(); +} + +template +SingleValueNumericPostingAttribute::SingleValueNumericPostingAttribute(const vespalib::string & name, + const AttributeVector::Config & c) : + SingleValueNumericEnumAttribute(name, c), + PostingParent(*this, this->getEnumStore()) +{ +} + +template +void +SingleValueNumericPostingAttribute::freezeEnumDictionary() +{ + this->getEnumStore().freezeTree(); +} + +template +void +SingleValueNumericPostingAttribute::mergeMemoryStats(MemoryUsage & total) +{ + total.merge(this->_postingList.getMemoryUsage()); +} + +template +void +SingleValueNumericPostingAttribute::applyUpdateValueChange(const Change & c, + EnumStore & enumStore, + std::map & currEnumIndices) +{ + EnumIndex newIdx; + enumStore.findIndex(c._data.raw(), newIdx); + currEnumIndices[c._doc] = newIdx; +} + +template +void +SingleValueNumericPostingAttribute:: +makePostingChange(const EnumStoreComparator *cmpa, + const std::map &currEnumIndices, + PostingMap &changePost) +{ + typedef typename std::map::const_iterator EnumIter; + for (EnumIter iter = currEnumIndices.begin(), end = currEnumIndices.end(); + iter != end; ++iter) { + uint32_t docId = iter->first; + EnumIndex oldIdx = this->_enumIndices[docId]; + EnumIndex newIdx = iter->second; + + // add new posting + changePost[EnumPostingPair(newIdx, cmpa)].add(docId, 1); + + // remove old posting + if ( oldIdx.valid()) { + changePost[EnumPostingPair(oldIdx, cmpa)].remove(docId); + } + } +} + + +template +void +SingleValueNumericPostingAttribute::applyValueChanges(EnumStoreBase::IndexVector & unused) +{ + EnumStore & enumStore = this->getEnumStore(); + Dictionary & dict = enumStore.getPostingDictionary(); + ComparatorType cmpa(enumStore); + PostingMap changePost; + + // used to make sure several arithmetic operations on the same document in a single commit works + std::map currEnumIndices; + + for (ChangeVectorIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) { + typename std::map::const_iterator enumIter = currEnumIndices.find(iter->_doc); + EnumIndex oldIdx; + if (enumIter != currEnumIndices.end()) { + oldIdx = enumIter->second; + } else { + oldIdx = this->_enumIndices[iter->_doc]; + } + + if (iter->_type == ChangeBase::UPDATE) { + applyUpdateValueChange(*iter, enumStore, + currEnumIndices); + } else if (iter->_type >= ChangeBase::ADD && iter->_type <= ChangeBase::DIV) { + if (oldIdx.valid()) { + T oldValue = enumStore.getValue(oldIdx); + T newValue = this->applyArithmetic(oldValue, *iter); + + DictionaryIterator addItr = dict.find(EnumIndex(), ComparatorType(enumStore, newValue)); + EnumIndex newIdx = addItr.getKey(); + currEnumIndices[iter->_doc] = newIdx; + } + } else if(iter->_type == ChangeBase::CLEARDOC) { + this->_defaultValue._doc = iter->_doc; + applyUpdateValueChange(this->_defaultValue, enumStore, + currEnumIndices); + } + } + + makePostingChange(&cmpa, currEnumIndices, changePost); + + this->updatePostings(changePost); + SingleValueNumericEnumAttribute::applyValueChanges(unused); +} + +template +void +SingleValueNumericPostingAttribute::removeOldGenerations(generation_t firstUsed) +{ + SingleValueNumericEnumAttribute::removeOldGenerations(firstUsed); + _postingList.trimHoldLists(firstUsed); +} + +template +void +SingleValueNumericPostingAttribute::onGenerationChange(generation_t generation) +{ + _postingList.freeze(); + SingleValueNumericEnumAttribute::onGenerationChange(generation); + _postingList.transferHoldLists(generation - 1); +} + +template +AttributeVector::SearchContext::UP +SingleValueNumericPostingAttribute::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + return std::unique_ptr + (new SinglePostingSearchContext(std::move(qTerm), + params, + *this)); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp new file mode 100644 index 00000000000..a855adfdbf9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -0,0 +1,242 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlesmallnumericattribute.h" +#include + +namespace search +{ + +SingleValueSmallNumericAttribute:: +SingleValueSmallNumericAttribute(const vespalib::string & baseFileName, + const Config & c, + Word valueMask, + uint32_t valueShiftShift, + uint32_t valueShiftMask, + uint32_t wordShift) + : B(baseFileName, c, c.basicType()), + _valueMask(valueMask), + _valueShiftShift(valueShiftShift), + _valueShiftMask(valueShiftMask), + _wordShift(wordShift), + _wordData(c.getGrowStrategy().getDocsInitialCapacity(), + c.getGrowStrategy().getDocsGrowPercent(), + c.getGrowStrategy().getDocsGrowDelta(), + getGenerationHolder()) +{ + assert(_valueMask + 1 == (1u << (1u << valueShiftShift))); + assert((_valueShiftMask + 1) * (1u << valueShiftShift) == + 8 * sizeof(Word)); + assert(_valueShiftMask + 1 == (1u << wordShift)); +} + + +SingleValueSmallNumericAttribute::~SingleValueSmallNumericAttribute(void) +{ + getGenerationHolder().clearHoldLists(); +} + + +void +SingleValueSmallNumericAttribute::onCommit() +{ + checkSetMaxValueCount(1); + + { + // apply updates + B::ValueModifier valueGuard(getValueModifier()); + for (const auto & change : _changes) { + if (change._type == ChangeBase::UPDATE) { + std::atomic_thread_fence(std::memory_order_release); + set(change._doc, change._data); + } else if (change._type >= ChangeBase::ADD && + change._type <= ChangeBase::DIV) { + std::atomic_thread_fence(std::memory_order_release); + set(change._doc, applyArithmetic(getFast(change._doc), change)); + } else if (change._type == ChangeBase::CLEARDOC) { + std::atomic_thread_fence(std::memory_order_release); + set(change._doc, 0u); + } + } + } + + std::atomic_thread_fence(std::memory_order_release); + removeAllOldGenerations(); + + _changes.clear(); +} + + +void +SingleValueSmallNumericAttribute::onUpdateStat() +{ + MemoryUsage usage = _wordData.getMemoryUsage(); + usage.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); + uint32_t numDocs = B::getNumDocs(); + updateStatistics(numDocs, numDocs, + usage.allocatedBytes(), usage.usedBytes(), + usage.deadBytes(), usage.allocatedBytesOnHold()); +} + + +void +SingleValueSmallNumericAttribute::removeOldGenerations(generation_t firstUsed) +{ + getGenerationHolder().trimHoldLists(firstUsed); +} + + +void +SingleValueSmallNumericAttribute::onGenerationChange(generation_t generation) +{ + getGenerationHolder().transferHoldLists(generation - 1); +} + + +bool +SingleValueSmallNumericAttribute::onLoad() +{ + B::PrimitiveReader attrReader(*this); + bool ok(attrReader.hasData()); + if (ok) { + setCreateSerialNum(attrReader.getCreateSerialNum()); + const size_t sz(attrReader.getDataCount()); + getGenerationHolder().clearHoldLists(); + _wordData.reset(); + _wordData.unsafe_reserve(sz - 1); + Word numDocs = attrReader.getNextData(); + for (uint32_t i = 1; i < sz; ++i) { + _wordData.push_back(attrReader.getNextData()); + } + assert(((numDocs + _valueShiftMask) >> _wordShift) + 1 == sz); + B::setNumDocs(numDocs); + B::setCommittedDocIdLimit(numDocs); + } + + return ok; +} + + +void +SingleValueSmallNumericAttribute::onSave(IAttributeSaveTarget &saveTarget) +{ + assert(!saveTarget.getEnumerated()); + const size_t numDocs(getCommittedDocIdLimit()); + const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift); + const size_t sz((numDataWords + 1) * sizeof(Word)); + IAttributeSaveTarget::Buffer buf(saveTarget.datWriter().allocBuf(sz)); + + char *p = buf->getFree(); + const char *e = p + sz; + Word numDocs2 = numDocs; + memcpy(p, &numDocs2, sizeof(Word)); + p += sizeof(Word); + memcpy(p, &_wordData[0], numDataWords * sizeof(Word)); + p += numDataWords * sizeof(Word); + assert(p == e); + (void) e; + buf->moveFreeToData(sz); + saveTarget.datWriter().writeBuf(std::move(buf)); + assert(numDocs == getCommittedDocIdLimit()); +} + + +AttributeVector::SearchContext::UP +SingleValueSmallNumericAttribute::getSearch(QueryTermSimple::UP qTerm, + const SearchContext::Params & params) const +{ + (void) params; + return SearchContext::UP(new SingleSearchContext(std::move(qTerm), *this)); +} + + +void +SingleValueSmallNumericAttribute::clearDocs(DocId lidLow, DocId lidLimit) +{ + assert(lidLow <= lidLimit); + assert(lidLimit <= getNumDocs()); + for (DocId lid = lidLow; lid < lidLimit; ++lid) { + if (getFast(lid) != 0) { + clearDoc(lid); + } + } +} + + +void +SingleValueSmallNumericAttribute::onShrinkLidSpace() +{ + uint32_t committedDocIdLimit = getCommittedDocIdLimit(); + assert(committedDocIdLimit < getNumDocs()); + const size_t numDocs(committedDocIdLimit); + const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift); + _wordData.shrink(numDataWords); + setNumDocs(committedDocIdLimit); +} + + +uint64_t +SingleValueSmallNumericAttribute::getEstimatedSaveByteSize() const +{ + uint64_t headerSize = 4096; + const size_t numDocs(getCommittedDocIdLimit()); + const size_t numDataWords((numDocs + _valueShiftMask) >> _wordShift); + const size_t sz((numDataWords + 1) * sizeof(Word)); + return headerSize + sz; +} + + +namespace +{ + +template +uint32_t +log2bits(void); + +template <> +uint32_t +log2bits(void) +{ + return 0x05u; +} + +} + + +SingleValueBitNumericAttribute:: +SingleValueBitNumericAttribute(const vespalib::string &baseFileName) + : SingleValueSmallNumericAttribute(baseFileName, + Config(BasicType::UINT1, CollectionType::SINGLE), + 0x01u /* valueMask */, + 0x00u /* valueShiftShift */, + 8 * sizeof(Word) - 1 /* valueShiftMask */, + log2bits() /* wordShift */) +{ +} + + +SingleValueSemiNibbleNumericAttribute:: +SingleValueSemiNibbleNumericAttribute(const vespalib::string &baseFileName) + : SingleValueSmallNumericAttribute(baseFileName, + Config(BasicType::UINT2, CollectionType::SINGLE), + 0x03u /* valueMask */, + 0x01u /* valueShiftShift */, + 4 * sizeof(Word) - 1 /* valueShiftMask */, + log2bits() - 1/* wordShift */) +{ +} + + +SingleValueNibbleNumericAttribute:: +SingleValueNibbleNumericAttribute(const vespalib::string &baseFileName) + : SingleValueSmallNumericAttribute(baseFileName, + Config(BasicType::UINT1, CollectionType::SINGLE), + 0x0fu /* valueMask */, + 0x02u /* valueShiftShift */, + 2 * sizeof(Word) - 1 /* valueShiftMask */, + log2bits() - 2/* wordShift */) +{ +} + + +} diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h new file mode 100644 index 00000000000..548f612a6f6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h @@ -0,0 +1,313 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +class SingleValueSmallNumericAttribute : + public IntegerAttributeTemplate +{ +private: +// friend class AttributeVector::SearchContext; + typedef IntegerAttributeTemplate B; + typedef B::BaseType T; + typedef B::DocId DocId; + typedef B::EnumHandle EnumHandle; + typedef B::largeint_t largeint_t; + typedef B::Weighted Weighted; + typedef B::WeightedInt WeightedInt; + typedef B::WeightedFloat WeightedFloat; + typedef B::WeightedEnum WeightedEnum; + typedef B::generation_t generation_t; + +protected: + typedef uint32_t Word; // Large enough to contain numDocs. +private: + Word _valueMask; // 0x01, 0x03 or 0x0f + uint32_t _valueShiftShift; // 0x00, 0x01 or 0x02 + uint32_t _valueShiftMask; // 0x1f, 0x0f or 0x07 + uint32_t _wordShift; // 0x05, 0x04 or 0x03 + + typedef search::attribute::RcuVectorBase DataVector; + DataVector _wordData; + + virtual T getFromEnum(EnumHandle e) const { + (void) e; + return T(); + } + +protected: + virtual bool + findEnum(T value, EnumHandle & e) const + { + (void) value; (void) e; + return false; + } + + void + set(DocId doc, T v) + { + Word &word = _wordData[doc >> _wordShift]; + uint32_t valueShift = (doc & _valueShiftMask) << _valueShiftShift; + word = (word & ~(_valueMask << valueShift)) | + ((v & _valueMask) << valueShift); + } + + +public: + /* + * Specialization of SearchContext + */ + class SingleSearchContext : public NumericAttribute::Range, public SearchContext + { + private: + const Word *_wordData; + Word _valueMask; + uint32_t _valueShiftShift; + uint32_t _valueShiftMask; + uint32_t _wordShift; + + virtual bool + onCmp(DocId docId, int32_t & weight) const + { + return cmp(docId, weight); + } + + virtual bool + onCmp(DocId docId) const + { + return cmp(docId); + } + + virtual bool valid() const { return this->isValid(); } + + public: + SingleSearchContext(QueryTermSimple::UP qTerm, const NumericAttribute & toBeSearched) + : NumericAttribute::Range(*qTerm), + SearchContext(toBeSearched), + _wordData(&static_cast + (toBeSearched)._wordData[0]), + _valueMask(static_cast + (toBeSearched)._valueMask), + _valueShiftShift( + static_cast + (toBeSearched)._valueShiftShift), + _valueShiftMask( + static_cast + (toBeSearched)._valueShiftMask), + _wordShift(static_cast + (toBeSearched)._wordShift) + { + } + + bool + cmp(DocId docId, int32_t & weight) const + { + const Word &word = _wordData[docId >> _wordShift]; + uint32_t valueShift = + (docId & _valueShiftMask) << _valueShiftShift; + T v = (word >> valueShift) & _valueMask; + weight = 1; + return match(v); + } + + bool + cmp(DocId docId) const + { + const Word &word = _wordData[docId >> _wordShift]; + uint32_t valueShift = + (docId & _valueShiftMask) << _valueShiftShift; + T v = (word >> valueShift) & _valueMask; + return match(v); + } + + virtual Int64Range getAsIntegerTerm() const { + return this->getRange(); + } + + virtual std::unique_ptr + createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) + { + if (!valid()) { + return queryeval::SearchIterator::UP( + new queryeval::EmptySearch()); + } + if (getIsFilter()) { + return queryeval::SearchIterator::UP + (strict + ? new FilterAttributeIteratorStrict(*this, matchData) + : new FilterAttributeIteratorT(*this, matchData)); + } + return queryeval::SearchIterator::UP + (strict + ? new AttributeIteratorStrict(*this, matchData) + : new AttributeIteratorT(*this, matchData)); + } + }; + + SingleValueSmallNumericAttribute(const vespalib::string & baseFileName, + const Config &c, + Word valueMask, + uint32_t valueShiftShift, + uint32_t valueShiftMask, + uint32_t wordShift); + + virtual + ~SingleValueSmallNumericAttribute(void); + + virtual uint32_t + getValueCount(DocId doc) const + { + if (doc >= B::getNumDocs()) { + return 0; + } + return 1; + } + virtual void onCommit(); + virtual void onUpdateStat(); + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + virtual bool addDoc(DocId & doc) { + if ((B::getNumDocs() & _valueShiftMask) == 0) { + bool incGen = _wordData.isFull(); + _wordData.push_back(Word()); + std::atomic_thread_fence(std::memory_order_release); + B::incNumDocs(); + doc = B::getNumDocs() - 1; + updateUncommittedDocIdLimit(doc); + if (incGen) { + this->incGeneration(); + } else + this->removeAllOldGenerations(); + } else { + B::incNumDocs(); + doc = B::getNumDocs() - 1; + updateUncommittedDocIdLimit(doc); + } + return true; + } + virtual bool onLoad(); + + virtual void + onSave(IAttributeSaveTarget &saveTarget); + + SearchContext::UP + getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const override; + + T getFast(DocId doc) const { + const Word &word = _wordData[doc >> _wordShift]; + uint32_t valueShift = (doc & _valueShiftMask) << _valueShiftShift; + return (word >> valueShift) & _valueMask; + } + + //------------------------------------------------------------------------- + // new read api + //------------------------------------------------------------------------- + virtual T get(DocId doc) const { + return getFast(doc); + } + virtual largeint_t getInt(DocId doc) const { + return static_cast(getFast(doc)); + } + virtual void + getEnumValue(const EnumHandle * v, uint32_t *e, uint32_t sz) const { + (void) v; + (void) e; + (void) sz; + } + virtual double getFloat(DocId doc) const { + return static_cast(getFast(doc)); + } + virtual uint32_t getEnum(DocId doc) const { + (void) doc; + return std::numeric_limits::max(); // does not have enum + } + virtual uint32_t getAll(DocId doc, T * v, uint32_t sz) const { + if (sz > 0) { + v[0] = getFast(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const { + if (sz > 0) { + v[0] = static_cast(getFast(doc)); + } + return 1; + } + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const { + if (sz > 0) { + v[0] = static_cast(getFast(doc)); + } + return 1; + } + virtual uint32_t get(DocId doc, EnumHandle * e, uint32_t sz) const { + if (sz > 0) { + e[0] = getEnum(doc); + } + return 1; + } + virtual uint32_t getAll(DocId doc, Weighted * v, uint32_t sz) const { + (void) doc; (void) v; (void) sz; + return 0; + } + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedInt(static_cast(getFast(doc))); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedFloat(static_cast(getFast(doc))); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedEnum * e, uint32_t sz) const { + (void) doc; (void) e; (void) sz; + return 0; + } + + virtual void + clearDocs(DocId lidLow, DocId lidLimit); + + virtual void + onShrinkLidSpace(); + + virtual uint64_t getEstimatedSaveByteSize() const override; +}; + + +class SingleValueBitNumericAttribute : public SingleValueSmallNumericAttribute +{ +public: + SingleValueBitNumericAttribute(const vespalib::string & baseFileName); +}; + + +class SingleValueSemiNibbleNumericAttribute : + public SingleValueSmallNumericAttribute +{ +public: + SingleValueSemiNibbleNumericAttribute(const vespalib::string & + baseFileName); +}; + +class SingleValueNibbleNumericAttribute : + public SingleValueSmallNumericAttribute +{ +public: + SingleValueNibbleNumericAttribute(const vespalib::string & + baseFileName); +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp new file mode 100644 index 00000000000..0c6c6d8ee06 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.cpp @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlestringattribute.h" +#include "singlestringattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.singlestringattribute"); +namespace search { + +template class SingleValueStringAttributeT>; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h new file mode 100644 index 00000000000..805850839a6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "enumhintsearchcontext.h" + +namespace search { + +/* + * Implementation of single value string attribute that uses an underlying enum store + * to store unique string values. + * + * B: EnumAttribute + */ +template +class SingleValueStringAttributeT : public SingleValueEnumAttribute +{ +protected: + typedef StringAttribute::DocId DocId; + typedef StringAttribute::EnumHandle EnumHandle; + typedef StringAttribute::generation_t generation_t; + typedef StringAttribute::WeightedString WeightedString; + typedef StringAttribute::WeightedConstChar WeightedConstChar; + typedef StringAttribute::WeightedEnum WeightedEnum; + typedef StringAttribute::SearchContext SearchContext; + typedef StringAttribute::ChangeVector ChangeVector; + typedef StringAttribute::Change Change; + typedef StringAttribute::ValueModifier ValueModifier; + typedef StringAttribute::EnumModifier EnumModifier; + typedef StringAttribute::LoadedVector LoadedVector; + + typedef typename SingleValueEnumAttribute::EnumStore EnumStore; + typedef typename SingleValueEnumAttributeBase::EnumIndex EnumIndex; + typedef typename SingleValueEnumAttributeBase::EnumIndexVector EnumIndexVector; + typedef attribute::EnumHintSearchContext EnumHintSearchContext; + +public: + SingleValueStringAttributeT(const vespalib::string & name, const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::STRING)); + ~SingleValueStringAttributeT(); + + virtual void + freezeEnumDictionary(void); + + //------------------------------------------------------------------------- + // Attribute read API + //------------------------------------------------------------------------- + virtual bool isUndefined(DocId doc) const { return get(doc)[0] == '\0'; } + virtual const char * get(DocId doc) const { + return this->_enumStore.getValue(this->_enumIndices[doc]); + } + virtual uint32_t get(DocId doc, vespalib::string * v, uint32_t sz) const { + if (sz > 0) { + v[0] = get(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, const char ** v, uint32_t sz) const { + if (sz > 0) { + v[0] = get(doc); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedString * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedString(get(doc), 1); + } + return 1; + } + virtual uint32_t get(DocId doc, WeightedConstChar * v, uint32_t sz) const { + if (sz > 0) { + v[0] = WeightedConstChar(get(doc), 1); + } + return 1; + } + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + class StringSingleImplSearchContext : public StringAttribute::StringSearchContext { + public: + StringSingleImplSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) : + StringSearchContext(std::move(qTerm), toBeSearched) + { } + protected: + bool onCmp(DocId doc, int32_t & weight) const override { + weight = 1; + return onCmp(doc); + } + + bool onCmp(DocId doc) const override { + const SingleValueStringAttributeT & attr(static_cast &>(attribute())); + return isMatch(attr._enumStore.getValue(attr._enumIndices[doc])); + } + + }; + + class StringTemplSearchContext : public StringSingleImplSearchContext, + public EnumHintSearchContext + { + using StringSingleImplSearchContext::queryTerm; + typedef SingleValueStringAttributeT AttrType; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + public: + StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched); + }; +}; + +typedef SingleValueStringAttributeT > SingleValueStringAttribute; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp new file mode 100644 index 00000000000..42859d0d862 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +//----------------------------------------------------------------------------- +// SingleValueStringAttributeT public +//----------------------------------------------------------------------------- +template +SingleValueStringAttributeT:: +SingleValueStringAttributeT(const vespalib::string &name, + const AttributeVector::Config & c) + : SingleValueEnumAttribute(name, c) +{ +} + +template +SingleValueStringAttributeT::~SingleValueStringAttributeT() +{ +} + +template +void +SingleValueStringAttributeT::freezeEnumDictionary(void) +{ + this->getEnumStore().freezeTree(); +} + + +template +AttributeVector::SearchContext::UP +SingleValueStringAttributeT::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + (void) params; + return std::unique_ptr + (new StringTemplSearchContext(std::move(qTerm), *this)); +} + +template +SingleValueStringAttributeT::StringTemplSearchContext::StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) : + StringSingleImplSearchContext(std::move(qTerm), toBeSearched), + EnumHintSearchContext(toBeSearched.getEnumStore().getEnumStoreDict(), + toBeSearched.getCommittedDocIdLimit(), + toBeSearched.getStatus().getNumValues()) +{ + const EnumStore &enumStore(toBeSearched.getEnumStore()); + + this->_plsc = static_cast(this); + if (this->valid()) { + if (this->isPrefix()) { + FoldedComparatorType comp(enumStore, queryTerm().getTerm(), true); + lookupRange(comp, comp); + } else if (this->isRegex()) { + vespalib::string prefix(vespalib::Regexp::get_prefix(this->queryTerm().getTerm())); + FoldedComparatorType comp(enumStore, prefix.c_str(), true); + lookupRange(comp, comp); + } else { + FoldedComparatorType comp(enumStore, queryTerm().getTerm()); + lookupTerm(comp); + } + } +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp new file mode 100644 index 00000000000..e0ac10c10af --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "singlestringpostattribute.h" +#include "singlestringpostattribute.hpp" +#include + +LOG_SETUP(".searchlib.attribute.singlestringpostattribute"); +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h new file mode 100644 index 00000000000..449c75cadc4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +/* + * Implementation of single value string attribute that in addition to enum store + * uses an underlying posting list to provide faster search. + * + * B: EnumAttribute + */ +template +class SingleValueStringPostingAttributeT + : public SingleValueStringAttributeT, + protected PostingListAttributeSubBase +{ +private: + friend class PostingListAttributeTest; + template + friend class attribute::PostingSearchContext; // getEnumStore() + friend class StringAttributeTest; + typedef SingleValueStringPostingAttributeT SelfType; + typedef typename B::LoadedVector LoadedVector; + typedef attribute::LoadedEnumAttributeVector LoadedEnumAttributeVector; + typedef PostingListAttributeSubBase PostingParent; + typedef typename SingleValueStringAttributeT::DocId DocId; +public: + typedef typename SingleValueStringAttributeT::EnumStore EnumStore; +private: + typedef typename SingleValueStringAttributeT::EnumIndex EnumIndex; + typedef typename SingleValueStringAttributeT::generation_t generation_t; + typedef typename SingleValueStringAttributeT::ValueModifier ValueModifier; + + typedef typename SingleValueStringAttributeT::StringSingleImplSearchContext StringSingleImplSearchContext; + typedef attribute::StringPostingSearchContext + StringSinglePostingSearchContext; + + typedef StringAttribute::Change Change; + typedef StringAttribute::ChangeVector ChangeVector; + + typedef typename PostingParent::PostingList PostingList; + typedef typename PostingParent::PostingMap PostingMap; + // typedef typename PostingParent::Posting Posting; + + typedef EnumPostingTree Dictionary; + typedef typename EnumStore::ComparatorType ComparatorType; + typedef typename EnumStore::FoldedComparatorType FoldedComparatorType; + typedef typename Dictionary::Iterator DictionaryIterator; + typedef typename Dictionary::ConstIterator DictionaryConstIterator; + typedef typename Dictionary::FrozenView FrozenDictionary; + using PostingParent::_postingList; + using PostingParent::clearAllPostings; + using PostingParent::handleFillPostings; + using PostingParent::fillPostingsFixupEnumBase; + using PostingParent::forwardedOnAddDoc; +public: + using PostingParent::getPostingList; + +private: + virtual void freezeEnumDictionary(); + virtual void mergeMemoryStats(MemoryUsage & total); + void applyUpdateValueChange(const Change & c, + EnumStore & enumStore, + std::map &currEnumIndices); + + void + makePostingChange(const EnumStoreComparator *cmp, + Dictionary &dict, + const std::map &currEnumIndices, + PostingMap &changePost); + + virtual void applyValueChanges(EnumStoreBase::IndexVector & unused); +public: + SingleValueStringPostingAttributeT(const vespalib::string & name, const AttributeVector::Config & c = + AttributeVector::Config(AttributeVector::BasicType::STRING)); + ~SingleValueStringPostingAttributeT(); + + virtual void removeOldGenerations(generation_t firstUsed); + virtual void onGenerationChange(generation_t generation); + + AttributeVector::SearchContext::UP + getSearch(QueryTermSimple::UP term, const AttributeVector::SearchContext::Params & params) const override; + + virtual bool + onAddDoc(DocId doc) + { + return forwardedOnAddDoc(doc, + this->_enumIndices.size(), + this->_enumIndices.capacity()); + } + + virtual void + fillPostings(LoadedVector & loaded) + { + handleFillPostings(loaded); + } + + virtual attribute::IPostingListAttributeBase * + getIPostingListAttributeBase(void) + { + return this; + } + + virtual void + fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) + { + fillPostingsFixupEnumBase(loaded); + } +}; + +typedef SingleValueStringPostingAttributeT > SingleValueStringPostingAttribute; + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp new file mode 100644 index 00000000000..a2fe36b2b16 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -0,0 +1,150 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +template +SingleValueStringPostingAttributeT::SingleValueStringPostingAttributeT(const vespalib::string & name, + const AttributeVector::Config & c) : + SingleValueStringAttributeT(name, c), + PostingParent(*this, this->getEnumStore()) +{ +} + +template +SingleValueStringPostingAttributeT::~SingleValueStringPostingAttributeT() +{ + this->disableFreeLists(); + this->disableElemHoldList(); + clearAllPostings(); +} + +template +void +SingleValueStringPostingAttributeT::freezeEnumDictionary() +{ + this->getEnumStore().freezeTree(); +} + +template +void +SingleValueStringPostingAttributeT::mergeMemoryStats(MemoryUsage & total) +{ + total.merge(this->_postingList.getMemoryUsage()); +} + +template +void +SingleValueStringPostingAttributeT::applyUpdateValueChange(const Change & c, + EnumStore & enumStore, + std::map &currEnumIndices) +{ + EnumIndex newIdx; + enumStore.findIndex(c._data.raw(), newIdx); + + currEnumIndices[c._doc] = newIdx; + +} + + +template +void +SingleValueStringPostingAttributeT:: +makePostingChange(const EnumStoreComparator *cmpa, + Dictionary &dict, + const std::map &currEnumIndices, + PostingMap &changePost) +{ + typedef typename std::map::const_iterator EnumIter; + for (EnumIter iter = currEnumIndices.begin(), end = currEnumIndices.end(); + iter != end; ++iter) { + + uint32_t docId = iter->first; + EnumIndex oldIdx = this->_enumIndices[docId]; + EnumIndex newIdx = iter->second; + + // add new posting + DictionaryIterator addItr = dict.find(newIdx, *cmpa); + changePost[EnumPostingPair(addItr.getKey(), cmpa)].add(docId, 1); + + // remove old posting + if ( oldIdx.valid()) { + DictionaryIterator rmItr = dict.find(oldIdx, *cmpa); + changePost[EnumPostingPair(rmItr.getKey(), cmpa)].remove(docId); + } + } +} + + +template +void +SingleValueStringPostingAttributeT::applyValueChanges(EnumStoreBase::IndexVector & unused) +{ + EnumStore & enumStore = this->getEnumStore(); + Dictionary & dict = enumStore.getPostingDictionary(); + FoldedComparatorType cmpa(enumStore); + PostingMap changePost; + + // used to make sure several arithmetic operations on the same document in a single commit works + std::map currEnumIndices; + + typedef ChangeVector::const_iterator CVIterator; + for (CVIterator iter = this->_changes.begin(), end = this->_changes.end(); iter != end; ++iter) { + typename std::map::const_iterator enumIter = currEnumIndices.find(iter->_doc); + EnumIndex oldIdx; + if (enumIter != currEnumIndices.end()) { + oldIdx = enumIter->second; + } else { + oldIdx = this->_enumIndices[iter->_doc]; + } + if (iter->_type == ChangeBase::UPDATE) { + applyUpdateValueChange(*iter, enumStore, + currEnumIndices); + } else if (iter->_type == ChangeBase::CLEARDOC) { + this->_defaultValue._doc = iter->_doc; + applyUpdateValueChange(this->_defaultValue, enumStore, + currEnumIndices); + } + } + + makePostingChange(&cmpa, dict, currEnumIndices, changePost); + + this->updatePostings(changePost); + + SingleValueStringAttributeT::applyValueChanges(unused); +} + +template +void +SingleValueStringPostingAttributeT::removeOldGenerations(generation_t firstUsed) +{ + SingleValueStringAttributeT::removeOldGenerations(firstUsed); + _postingList.trimHoldLists(firstUsed); +} + +template +void +SingleValueStringPostingAttributeT::onGenerationChange(generation_t generation) +{ + _postingList.freeze(); + SingleValueStringAttributeT::onGenerationChange(generation); + _postingList.transferHoldLists(generation - 1); +} + +template +AttributeVector::SearchContext::UP +SingleValueStringPostingAttributeT::getSearch(QueryTermSimple::UP qTerm, + const AttributeVector::SearchContext::Params & params) const +{ + return std::unique_ptr + (new StringSinglePostingSearchContext(std::move(qTerm), + params.useBitVector(), + *this)); +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp new file mode 100644 index 00000000000..bac7dcfa7f7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp @@ -0,0 +1,136 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "sourceselector.h" +#include +#include +#include +#include + +using search::queryeval::Source; +using vespalib::FileHeader; +using vespalib::GenericHeader; +using search::common::FileHeaderContext; + +namespace search { + +namespace { + +const vespalib::string defaultSourceTag = "Default source"; +const vespalib::string baseIdTag = "Base id"; +const vespalib::string docIdLimitTag = "Doc id limit"; + +class AddMyHeaderTags : public FileHeaderContext +{ + const SourceSelector::HeaderInfo &_hi; + const FileHeaderContext &_parent; + +public: + AddMyHeaderTags(const SourceSelector::HeaderInfo &hi, + const FileHeaderContext &parent) + : _hi(hi), + _parent(parent) + { + } + + virtual void + addTags(GenericHeader &header, const vespalib::string &name) const + { + typedef GenericHeader::Tag Tag; + _parent.addTags(header, name); + header.putTag(Tag(defaultSourceTag, _hi._defaultSource)); + header.putTag(Tag(baseIdTag, _hi._baseId)); + header.putTag(Tag(docIdLimitTag, _hi._docIdLimit)); + } +}; + +} // namespace + +SourceSelector::HeaderInfo::HeaderInfo(const vespalib::string & baseFileName, + Source defaultSource, + uint32_t baseId, + uint32_t docIdLimit) : + _baseFileName(baseFileName), + _defaultSource(defaultSource), + _baseId(baseId), + _docIdLimit(docIdLimit) +{ +} + +SourceSelector::SaveInfo::SaveInfo(const vespalib::string & baseFileName, + Source defaultSource, + uint32_t baseId, + uint32_t docIdLimit, + AttributeVector & sourceStore) + : _header(baseFileName, defaultSource, baseId, docIdLimit), + _memSaver() +{ + vespalib::string attrName = sourceStore.getBaseFileName(); + sourceStore.saveAs(_header._baseFileName, _memSaver); + sourceStore.setBaseFileName(attrName); +} + +bool +SourceSelector::SaveInfo::save(const TuneFileAttributes &tuneFileAttributes, + const FileHeaderContext &fileHeaderContext) +{ + AddMyHeaderTags fh(_header, fileHeaderContext); + return _memSaver.writeToFile(tuneFileAttributes, fh); +} + +SourceSelector::LoadInfo::LoadInfo(const vespalib::string &baseFileName) + : _header(baseFileName, 0, 0, 0) +{ +} + +void +SourceSelector::LoadInfo::load() +{ + const vespalib::string fileName = _header._baseFileName + ".dat"; + Fast_BufferedFile file; + // XXX no checking for success + file.ReadOpen(fileName.c_str()); + + FileHeader fileHeader(4096); + fileHeader.readFile(file); + if (fileHeader.hasTag(defaultSourceTag)) { + _header._defaultSource = fileHeader.getTag(defaultSourceTag).asInteger(); + } + if (fileHeader.hasTag(baseIdTag)) { + _header._baseId = fileHeader.getTag(baseIdTag).asInteger(); + } + if (fileHeader.hasTag(docIdLimitTag)) { + _header._docIdLimit = fileHeader.getTag(docIdLimitTag).asInteger(); + } + file.Close(); +} + +SourceSelector::SourceSelector(Source defaultSource, AttributeVector::SP realSource) : + ISourceSelector(defaultSource), + _realSource(realSource) +{ +} + +SourceSelector::SaveInfo::UP +SourceSelector::extractSaveInfo(const vespalib::string & baseFileName) +{ + return SaveInfo::UP(new SaveInfo(baseFileName, getDefaultSource(), getBaseId(), + getDocIdLimit(), *_realSource)); +} + +SourceSelector::LoadInfo::UP +SourceSelector::extractLoadInfo(const vespalib::string & baseFileName) +{ + return LoadInfo::UP(new LoadInfo(baseFileName)); +} + +SourceSelector::Histogram SourceSelector::getDistribution() const +{ + Histogram h; + ISourceSelector::Iterator::UP it = createIterator(); + for (size_t i(0), m(getDocIdLimit()); i < m; i++) { + h.inc(it->getSource(i)); + } + return h; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.h b/searchlib/src/vespa/searchlib/attribute/sourceselector.h new file mode 100644 index 00000000000..424839c7495 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.h @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributememorysavetarget.h" +#include "attributevector.h" +#include + +namespace search { + +class SourceSelector : public queryeval::ISourceSelector +{ +private: +protected: + AttributeVector::SP _realSource; + + queryeval::Source getNewSource(queryeval::Source src, uint32_t diff) { + return src > diff ? src - diff : 0; + } + +public: + struct HeaderInfo { + vespalib::string _baseFileName; + queryeval::Source _defaultSource; + uint32_t _baseId; + uint32_t _docIdLimit; + HeaderInfo(const vespalib::string & baseFileName, + queryeval::Source defaultSource, + uint32_t baseId, + uint32_t docIdLimit); + }; + + class SaveInfo { + private: + HeaderInfo _header; + AttributeMemorySaveTarget _memSaver; + public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + SaveInfo(const vespalib::string & baseFileName, + queryeval::Source defaultSource, + uint32_t baseId, + uint32_t docIdLimit, + AttributeVector & sourceStore); + const HeaderInfo & getHeader() const { return _header; } + bool save(const TuneFileAttributes &tuneFileAttributes, + const search::common::FileHeaderContext &fileHeaderContext); + }; + + class LoadInfo { + private: + HeaderInfo _header; + public: + typedef std::unique_ptr UP; + LoadInfo(const vespalib::string & baseFileName); + void load(); + const HeaderInfo & header() const { return _header; } + }; + + class Histogram { + public: + Histogram() { memset(_h, 0, sizeof(_h)); } + uint32_t operator [] (queryeval::Source s) const { return _h[s]; } + void inc(queryeval::Source s) { _h[s]++; } + private: + uint32_t _h[256]; + }; + +public: + typedef std::unique_ptr UP; + SourceSelector(queryeval::Source defaultSource, AttributeVector::SP realSource); + /** + * This will compute the distribution of the sources used over the whole lid space. + */ + Histogram getDistribution() const; + SaveInfo::UP extractSaveInfo(const vespalib::string & baseFileName); + static LoadInfo::UP extractLoadInfo(const vespalib::string & baseFileName); + + // Inherit doc from ISourceSelector + virtual void setSource(uint32_t docId, queryeval::Source source) = 0; + virtual ISourceSelector::Iterator::UP createIterator() const = 0; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp new file mode 100644 index 00000000000..95f38484fae --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "stringattribute.h" +#include + +LOG_SETUP(".searchlib.attribute.stringattribute"); + +namespace search { + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.h b/searchlib/src/vespa/searchlib/attribute/stringattribute.h new file mode 100644 index 00000000000..8d38f5f1910 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/stringattribute.h @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp new file mode 100644 index 00000000000..911905aaf83 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -0,0 +1,542 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "stringbase.h" +#include +#include +#include +#include +#include + +LOG_SETUP(".searchlib.attribute.stringbase"); + +#include + +namespace search +{ + +IMPLEMENT_IDENTIFIABLE_ABSTRACT(StringAttribute, AttributeVector); + +using attribute::LoadedEnumAttribute; +using attribute::LoadedEnumAttributeVector; +using vespalib::Regexp; + +AttributeVector::SearchContext::UP +StringAttribute::getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const +{ + (void) params; + return SearchContext::UP(new StringSearchContext(std::move(term), *this)); +} + +class SortDataChar { +public: + SortDataChar() { } + SortDataChar(const char *s) : _data(s), _pos(0) { } + operator const char * () const { return _data; } + bool operator != (const vespalib::string & b) const { return b != _data; } + const char * _data; + uint32_t _pos; +}; + +class SortDataCharRadix +{ +public: + uint32_t operator () (SortDataChar & a) const { + uint32_t r(0); + const uint8_t *u((const uint8_t *)(a._data)); + if (u[a._pos]) { + r |= u[a._pos + 0] << 24; + if (u[a._pos + 1]) { + r |= u[a._pos + 1] << 16; + if (u[a._pos + 2]) { + r |= u[a._pos + 2] << 8; + if (u[a._pos + 3]) { + r |= u[a._pos + 3]; + a._pos += 4; + } else { + a._pos += 3; + } + } else { + a._pos += 2; + } + } else { + a._pos += 1; + } + } + return r; + } +}; + +class StdSortDataCharCompare : public std::binary_function +{ +public: + bool operator() (const SortDataChar & x, const SortDataChar & y) const { + return cmp(x, y) < 0; + } + int cmp(const SortDataChar & a, const SortDataChar & b) const { + int retval = strcmp(a._data, b._data); + return retval; + } +}; + + +class SortDataCharEof +{ +public: + bool operator () (const SortDataChar & a) const { return a._data[a._pos] == 0; } + static bool alwaysEofOnCheck() { return false; } +}; + +class StringSorter { +public: + typedef const char * constcharp; + void operator() (SortDataChar * start, size_t sz) const { + vespalib::Array radixScratchPad(sz); + search::radix_sort(SortDataCharRadix(), StdSortDataCharCompare(), SortDataCharEof(), 1, start, sz, &radixScratchPad[0], 0, 32); + } +}; + +size_t StringAttribute::countZero(const char * bt, size_t sz) +{ + size_t size(0); + for(size_t i(0); i < sz; i++) { + if (bt[i] == '\0') { + size++; + } + } + return size; +} + +void StringAttribute::generateOffsets(const char * bt, size_t sz, OffsetVector & offsets) +{ + offsets.clear(); + uint32_t start(0); + for (size_t i(0); i < sz; i++) { + if (bt[i] == '\0') { + offsets.push_back(start); + start = i + 1; + } + } +} + +StringAttribute::StringAttribute(const vespalib::string & name) : + AttributeVector(name, Config(BasicType::STRING)), + _changes(), + _defaultValue(ChangeBase::UPDATE, 0, vespalib::string("")) +{ +} + +StringAttribute::StringAttribute(const vespalib::string & name, const Config & c) : + AttributeVector(name, c), + _changes(), + _defaultValue(ChangeBase::UPDATE, 0, vespalib::string("")) +{ +} + +uint32_t StringAttribute::get(DocId doc, WeightedInt * v, uint32_t sz) const +{ + WeightedConstChar * s = new WeightedConstChar[sz]; + uint32_t n = static_cast(this)->get(doc, s, sz); + for(uint32_t i(0),m(std::min(n,sz)); i(this)->get(doc, s, sz); + for(uint32_t i(0),m(std::min(n,sz)); i(this)->get(doc, s, sz); + for(uint32_t i(0),m(std::min(n,sz)); i(this)->get(doc, s, sz); + for(uint32_t i(0),m(std::min(n,sz)); i(serTo); + const char *value(get(doc)); + int size = strlen(value) + 1; + vespalib::ConstBufferRef buf(value, size); + if (bc != 0) { + buf = bc->convert(buf); + } + if (available >= (long)buf.size()) { + memcpy(dst, buf.data(), buf.size()); + } else { + return -1; + } + return buf.size(); +} + +long StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const +{ + (void) bc; + unsigned char *dst = static_cast(serTo); + const char *value(get(doc)); + int size = strlen(value) + 1; + vespalib::ConstBufferRef buf(value, size); + if (bc != 0) { + buf = bc->convert(buf); + } + if (available >= (long)buf.size()) { + const uint8_t * src(static_cast(buf.data())); + for (size_t i(0), m(buf.size()); i < m; ++i) { + dst[i] = 0xff - src[i]; + } + } else { + return -1; + } + return buf.size(); +} + +StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm, + const StringAttribute & toBeSearched) : + SearchContext(toBeSearched), + _isPrefix(qTerm->isPrefix()), + _isRegex(qTerm->isRegex()), + _queryTerm(std::move(qTerm)), + _bufferLen(toBeSearched.getMaxValueCount()), + _buffer() +{ + queryTerm().term(_termUCS4); + if (isRegex()) { + _regex.reset(new Regexp(_queryTerm->getTerm(), Regexp::Flags().enableICASE())); + } +} + +StringAttribute::StringSearchContext::~StringSearchContext() +{ + if (_buffer != NULL) { + delete [] _buffer; + } +} + + +uint32_t StringAttribute::clearDoc(DocId doc) +{ + uint32_t removed(0); + if (hasMultiValue() && (doc < getNumDocs())) { + removed = getValueCount(doc); + } + AttributeVector::clearDoc(_changes, doc); + + return removed; +} + +namespace { + +class DirectAccessor { +public: + DirectAccessor() { } + const char * get(const char * v) const { return v; } +}; + +} + +bool +StringAttribute::StringSearchContext::onCmp(DocId docId, int32_t & weight) const +{ + WeightedConstChar * buffer = getBuffer(); + uint32_t valueCount = attribute().get(docId, buffer, _bufferLen); + + CollectWeight collector; + DirectAccessor accessor; + collectMatches(buffer, std::min(valueCount, _bufferLen), accessor, collector); + weight = collector.getWeight(); + return collector.hasMatch(); +} + +bool +StringAttribute::StringSearchContext::onCmp(DocId docId) const +{ + WeightedConstChar * buffer = getBuffer(); + uint32_t valueCount = attribute().get(docId, buffer, _bufferLen); + for (uint32_t i = 0, m = std::min(valueCount, _bufferLen); (i < m); i++) { + if (isMatch(buffer[i].getValue())) { + return true; + } + } + + return false; +} + +bool StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) +{ + vespalib::string v = fv.getAsString(); + return AttributeVector::adjustWeight(_changes, doc, StringChangeData(v), wAdjust); +} + +bool StringAttribute::apply(DocId, const ArithmeticValueUpdate & ) +{ + return false; +} + +template +void StringAttribute::loadAllAtOnce(T & loaded, FileUtil::LoadedBuffer::UP dataBuffer, uint32_t numDocs, ReaderBase & attrReader, bool hasWeight, bool hasIdx) +{ + if (dataBuffer->c_str()) { + const char *value = dataBuffer->c_str(); + for(uint32_t docIdx(0), valueIdx(0); docIdx < numDocs; docIdx++) { + uint32_t currValueCount(hasIdx ? attrReader.getNextValueCount() : 1); + for(uint32_t subIdx(0); subIdx < currValueCount; subIdx++) { + loaded[valueIdx]._docId = docIdx; + loaded[valueIdx]._idx = subIdx; + loaded[valueIdx].setValue(value); + loaded[valueIdx].setWeight(hasWeight ? attrReader.getNextWeight() : 1); + valueIdx++; + while(*value++) { } + } + } + } + + attribute::sortLoadedByValue(loaded); + fillPostings(loaded); + loaded.rewind(); + fillEnum(loaded); + + dataBuffer.reset(); + + attribute::sortLoadedByDocId(loaded); + loaded.rewind(); + fillValues(loaded); +} + +bool +StringAttribute::onLoadEnumerated(ReaderBase &attrReader) +{ + FileUtil::LoadedBuffer::UP udatBuffer(loadUDAT()); + + bool hasIdx(attrReader.hasIdx()); + size_t numDocs(0); + uint64_t numValues(0); + if (hasIdx) { + numDocs = attrReader.getNumIdx() - 1; + numValues = attrReader.getNumValues(); + uint64_t enumCount = attrReader.getEnumCount(); + assert(numValues == enumCount); + (void) enumCount; + } else { + numValues = attrReader.getEnumCount(); + numDocs = numValues; + } + + LOG(debug, + "StringAttribute::onLoadEnumerated: attribute '%s' %u docs, %u values", + getBaseFileName().c_str(), + (unsigned int) numDocs, + (unsigned int) numValues); + EnumIndexVector eidxs; + FastOS_Time timer; + FastOS_Time timer0; + timer0.SetNow(); + LOG(debug, "start fillEnum0"); + timer.SetNow(); + fillEnum0(udatBuffer->buffer(), udatBuffer->size(), eidxs); + LOG(debug, "done fillEnum0, %u unique values, %8.3f s elapsed", + (unsigned int) eidxs.size(), timer.MilliSecsToNow() / 1000); + setNumDocs(numDocs); + setCommittedDocIdLimit(numDocs); + LoadedEnumAttributeVector loaded; + EnumVector enumHist; + if (hasPostings()) { + loaded.reserve(numValues); + } else { + EnumVector(eidxs.size(), 0).swap(enumHist); + } + timer.SetNow(); + LOG(debug, "start fillEnumIdx"); + if(hasPostings()) { + fillEnumIdx(attrReader, + numValues, + eidxs, + loaded); + } else { + fillEnumIdx(attrReader, + numValues, + eidxs, + enumHist); + } + LOG(debug, "done fillEnumIdx, %8.3f s elapsed", + timer.MilliSecsToNow() / 1000); + + EnumIndexVector().swap(eidxs); + + if (hasPostings()) { + LOG(debug, "start sort loaded"); + timer.SetNow(); + + attribute::sortLoadedByEnum(loaded); + + LOG(debug, "done sort loaded, %8.3f s elapsed", + timer.MilliSecsToNow() / 1000); + + LOG(debug, "start fillPostingsFixupEnum"); + timer.SetNow(); + + if (numDocs > 0) { + onAddDoc(numDocs - 1); + } + fillPostingsFixupEnum(loaded); + + LOG(debug, "done fillPostingsFixupEnum, %8.3f s elapsed", + timer.MilliSecsToNow() / 1000); + } else { + LOG(debug, "start fixupEnumRefCounts"); + timer.SetNow(); + + fixupEnumRefCounts(enumHist); + + LOG(debug, "done fixupEnumRefCounts, %8.3f s elapsed", + timer.MilliSecsToNow() / 1000); + } + + LOG(debug, "attribute '%s', loaded, %8.3f s elapsed", + getBaseFileName().c_str(), + timer0.MilliSecsToNow() / 1000); + return true; +} + +bool StringAttribute::onLoad() +{ + ReaderBase attrReader(*this); + bool ok(attrReader.getHasLoadData()); + + if (!ok) + return false; + + setCreateSerialNum(attrReader.getCreateSerialNum()); + + if (attrReader.getEnumerated()) + return onLoadEnumerated(attrReader); + + FileUtil::LoadedBuffer::UP dataBuffer(loadDAT()); + + bool hasIdx(attrReader.hasIdx()); + size_t numDocs(0); + uint32_t numValues(0); + if (hasIdx) { + numDocs = attrReader.getNumIdx() - 1; + numValues = attrReader.getNumValues(); + } else if (dataBuffer->c_str()) { + numValues = countZero(dataBuffer->c_str(), dataBuffer->size()); + numDocs = numValues; + } + + setNumDocs(numDocs); + setCommittedDocIdLimit(numDocs); + if (numDocs > 0) { + onAddDoc(numDocs - 1); + } + + LoadedVectorR loaded(numValues); + loadAllAtOnce(loaded, std::move(dataBuffer), numDocs, attrReader, + hasWeightedSetType(), hasIdx); + + return true; +} + + +bool +StringAttribute::onAddDoc(DocId doc) +{ + (void) doc; + return false; +} + + +void StringAttribute::fillPostings(LoadedVector & loaded) +{ + (void) loaded; +} + +void StringAttribute::fillEnum(LoadedVector & loaded) +{ + (void) loaded; +} + +void StringAttribute::fillValues(LoadedVector & loaded) +{ + (void) loaded; +} + +void +StringAttribute::fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs) +{ + (void) src; + (void) srcLen; + (void) eidxs; + fprintf(stderr, "StringAttribute::fillEnum0\n"); +} + + +void +StringAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + LoadedEnumAttributeVector &loaded) +{ + (void) attrReader; + (void) numValues; + (void) eidxs; + (void) loaded; + fprintf(stderr, "StringAttribute::fillEnumIdx (loaded)\n"); +} + + +void +StringAttribute::fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist) +{ + (void) attrReader; + (void) numValues; + (void) eidxs; + (void) enumHist; + fprintf(stderr, "StringAttribute::fillEnumIdx (enumHist)\n"); +} + + +void +StringAttribute::fillPostingsFixupEnum(const LoadedEnumAttributeVector &loaded) +{ + (void) loaded; + fprintf(stderr, "StringAttribute::fillPostingsFixupEnum\n"); +} + +void +StringAttribute::fixupEnumRefCounts(const EnumVector &enumHist) +{ + (void) enumHist; + fprintf(stderr, "StringAttribute::fixupEnumRefCounts\n"); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h new file mode 100644 index 00000000000..a70cc6ecfab --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +class StringEntryType; + +class StringAttribute : public AttributeVector +{ +public: + typedef vespalib::Array OffsetVector; + typedef const char * LoadedValueType; + typedef EnumStoreBase::Index EnumIndex; + typedef EnumStoreBase::IndexVector EnumIndexVector; + typedef EnumStoreBase::EnumVector EnumVector; + typedef attribute::LoadedStringVector LoadedVector; +public: + DECLARE_IDENTIFIABLE_ABSTRACT(StringAttribute); + bool append(DocId doc, const vespalib::string & v, int32_t weight) { + return AttributeVector::append(_changes, doc, StringChangeData(v), weight); + } + template + bool append(DocId doc, Accessor & ac) { + return AttributeVector::append(_changes, doc, ac); + } + bool remove(DocId doc, const vespalib::string & v, int32_t weight) { + return AttributeVector::remove(_changes, doc, StringChangeData(v), weight); + } + bool update(DocId doc, const vespalib::string & v) { + return AttributeVector::update(_changes, doc, StringChangeData(v)); + } + bool apply(DocId doc, const ArithmeticValueUpdate & op); + virtual bool applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust); + virtual bool findEnum(const char * value, EnumHandle & e) const = 0; + virtual uint32_t get(DocId doc, largeint_t * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, double * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const; + virtual uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const; + virtual const char *get(DocId doc) const = 0; + virtual uint32_t clearDoc(DocId doc); + virtual largeint_t getDefaultValue() const { return 0; } + static size_t countZero(const char * bt, size_t sz); + static void generateOffsets(const char * bt, size_t sz, OffsetVector & offsets); + virtual const char * getFromEnum(EnumHandle e) const = 0; + +protected: + StringAttribute(const vespalib::string & name); + StringAttribute(const vespalib::string & name, const Config & c); + static const char * defaultValue() { return ""; } + typedef ChangeTemplate Change; + typedef ChangeVectorT< Change > ChangeVector; + typedef StringEntryType EnumEntryType; + ChangeVector _changes; + Change _defaultValue; + virtual bool onLoad(); + + bool onLoadEnumerated(ReaderBase &attrReader); + + virtual bool + onAddDoc(DocId doc); +private: + typedef attribute::LoadedStringVectorReal LoadedVectorR; + virtual void fillPostings(LoadedVector & loaded); + virtual void fillEnum(LoadedVector & loaded); + virtual void fillValues(LoadedVector & loaded); + + virtual void + fillEnum0(const void *src, + size_t srcLen, + EnumIndexVector &eidxs); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + attribute::LoadedEnumAttributeVector &loaded); + + virtual void + fillEnumIdx(ReaderBase &attrReader, + uint64_t numValues, + const EnumIndexVector &eidxs, + EnumVector &enumHist); + + virtual void + fillPostingsFixupEnum(const attribute::LoadedEnumAttributeVector &loaded); + + virtual void + fixupEnumRefCounts(const EnumVector &enumHist); + + virtual largeint_t getInt(DocId doc) const { return strtoll(get(doc), NULL, 0); } + virtual double getFloat(DocId doc) const { return strtod(get(doc), NULL); } + virtual const char * getString(DocId doc, char * v, size_t sz) const { (void) v; (void) sz; return get(doc); } + + virtual long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const; + virtual long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const; + + template + void loadAllAtOnce(T & loaded, FileUtil::LoadedBuffer::UP dataBuffer, uint32_t numDocs, ReaderBase & attrReader, bool hasWeight, bool hasIdx); + + class StringSearchContext : public SearchContext { + public: + StringSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched); + virtual ~StringSearchContext(); + private: + bool _isPrefix; + bool _isRegex; + protected: + bool valid() const override { + return (_queryTerm.get() && (!_queryTerm->empty())); + } + + const QueryTermBase & queryTerm() const override { + return static_cast(*_queryTerm); + } + bool isMatch(const char *src) const { + if (__builtin_expect(isRegex(), false)) { + return getRegex()->match(src); + } + vespalib::Utf8ReaderForZTS u8reader(src); + uint32_t j = 0; + uint32_t val; + for (;; ++j) { + val = u8reader.getChar(); + val = vespalib::LowerCase::convert(val); + if (_termUCS4[j] == 0 || _termUCS4[j] != val) { + break; + } + } + return (_termUCS4[j] == 0 && (val == 0 || isPrefix())); + } + class CollectHitCount { + public: + CollectHitCount() : _hitCount(0) { } + void addWeight(int32_t w) { + (void) w; + _hitCount++; + } + int32_t getWeight() const { return _hitCount; } + bool hasMatch() const { return _hitCount != 0; } + private: + uint32_t _hitCount; + }; + class CollectWeight { + public: + CollectWeight() : _hitCount(0), _weight(0) { } + void addWeight(int32_t w) { + _weight += w; + _hitCount++; + } + int32_t getWeight() const { return _weight; } + bool hasMatch() const { return _hitCount != 0; } + private: + uint32_t _hitCount; + int32_t _weight; + }; + + template + void collectMatches(const WeightedT * w, size_t sz, const Accessor & ac, Collector & collector) const { + for (uint32_t i(0); i < sz; i++) { + if (isMatch(ac.get(w[i].value()))) { + collector.addWeight(w[i].weight()); + } + } + } + + + bool onCmp(DocId docId, int32_t & weight) const override; + bool onCmp(DocId docId) const override; + + bool isPrefix() const { return _isPrefix; } + bool isRegex() const { return _isRegex; } + QueryTermSimple::UP _queryTerm; + const ucs4_t * _termUCS4; + const vespalib::Regexp * getRegex() const { return _regex.get(); } + private: + WeightedConstChar * getBuffer() const { + if (_buffer == NULL) { + _buffer = new WeightedConstChar[_bufferLen]; + } + return _buffer; + } + unsigned _bufferLen; + mutable WeightedConstChar * _buffer; + std::unique_ptr _regex; + }; + SearchContext::UP getSearch(QueryTermSimple::UP term, const SearchContext::Params & params) const override; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp b/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp new file mode 100644 index 00000000000..0e83749847f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorattribute.cpp @@ -0,0 +1,270 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tensorattribute.h" +#include +#include "tensorattributesaver.h" + +using vespalib::tensor::Tensor; + +namespace search { + +namespace attribute { + +namespace { + +constexpr uint32_t TENSOR_ATTRIBUTE_VERSION = 0; + +// minimum dead bytes in tensor attribute before consider compaction +constexpr size_t DEAD_SLACK = 0x10000u; + + +class TensorReader : public AttributeVector::ReaderBase +{ +private: + FileReader _tensorSizeReader; +public: + TensorReader(AttributeVector &attr) + : AttributeVector::ReaderBase(attr), + _tensorSizeReader(*_datFile) + { + } + uint32_t getNextTensorSize() { return _tensorSizeReader.readHostOrder(); } + void readTensor(void *buf, size_t len) { _datFile->ReadBuf(buf, len); } +}; + +} + +TensorAttribute::TensorAttribute(const vespalib::stringref &baseFileName, + const Config &cfg) + : NotImplementedAttribute(baseFileName, cfg), + _refVector(cfg.getGrowStrategy().getDocsInitialCapacity(), + cfg.getGrowStrategy().getDocsGrowPercent(), + cfg.getGrowStrategy().getDocsGrowDelta(), + getGenerationHolder()), + _tensorStore(), + _tensorMapper(cfg.tensorType()), + _compactGeneration(0) +{ +} + + +TensorAttribute::~TensorAttribute() +{ + getGenerationHolder().clearHoldLists(); + _tensorStore.clearHoldLists(); +} + + + +uint32_t +TensorAttribute::clearDoc(DocId docId) +{ + RefType oldRef(_refVector[docId]); + updateUncommittedDocIdLimit(docId); + _refVector[docId] = RefType(); + if (oldRef.valid()) { + _tensorStore.holdTensor(oldRef); + return 1u; + } + return 0u; +} + + +void +TensorAttribute::compactWorst() +{ + uint32_t bufferId = _tensorStore.startCompactWorstBuffer(); + size_t lidLimit = _refVector.size(); + for (uint32_t lid = 0; lid < lidLimit; ++lid) { + RefType ref = _refVector[lid]; + if (ref.valid() && ref.bufferId() == bufferId) { + RefType newRef = _tensorStore.move(ref); + // TODO: validate if following fence is sufficient. + std::atomic_thread_fence(std::memory_order_release); + _refVector[lid] = newRef; + } + } + _tensorStore.finishCompactWorstBuffer(bufferId); + _compactGeneration = getCurrentGeneration(); + incGeneration(); + updateStat(true); +} + +void +TensorAttribute::onCommit() +{ + // Note: Cost can be reduced if unneeded generation increments are dropped + incGeneration(); + if (getFirstUsedGeneration() > _compactGeneration) { + // No data held from previous compact operation + Status &status = getStatus(); + size_t used = status.getUsed(); + size_t dead = status.getDead(); + if ((dead >= DEAD_SLACK) && (dead * 5 > used)) { + compactWorst(); + } + } +} + + +void +TensorAttribute::onUpdateStat() +{ + // update statistics + MemoryUsage total = _refVector.getMemoryUsage(); + total.merge(_tensorStore.getMemoryUsage()); + total.incAllocatedBytesOnHold(getGenerationHolder().getHeldBytes()); + this->updateStatistics(_refVector.size(), + _refVector.size(), + total.allocatedBytes(), + total.usedBytes(), + total.deadBytes(), + total.allocatedBytesOnHold()); +} + + +void +TensorAttribute::removeOldGenerations(generation_t firstUsed) +{ + _tensorStore.trimHoldLists(firstUsed); + getGenerationHolder().trimHoldLists(firstUsed); +} + +void +TensorAttribute::onGenerationChange(generation_t generation) +{ + getGenerationHolder().transferHoldLists(generation - 1); + _tensorStore.transferHoldLists(generation - 1); +} + + +bool +TensorAttribute::addDoc(DocId &docId) +{ + bool incGen = _refVector.isFull(); + _refVector.push_back(RefType()); + AttributeVector::incNumDocs(); + docId = AttributeVector::getNumDocs() - 1; + updateUncommittedDocIdLimit(docId); + if (incGen) { + incGeneration(); + } else { + removeAllOldGenerations(); + } + return true; +} + + +void +TensorAttribute::setTensor(DocId docId, const Tensor &tensor) +{ + assert(docId < _refVector.size()); + updateUncommittedDocIdLimit(docId); + // TODO: Handle generic tensor attribute in a better way ? + RefType ref = _tensorStore.setTensor( + getConfig().tensorType().is_tensor() ? + *_tensorMapper.map(tensor) : tensor); + // TODO: validate if following fence is sufficient. + std::atomic_thread_fence(std::memory_order_release); + // TODO: Check if refVector must consist of std::atomic + _refVector[docId] = ref; +} + + +std::unique_ptr +TensorAttribute::getTensor(DocId docId) const +{ + RefType ref; + if (docId < getCommittedDocIdLimit()) { + ref = _refVector[docId]; + } + if (!ref.valid()) { + return std::unique_ptr(); + } + return _tensorStore.getTensor(ref); +} + + +void +TensorAttribute::clearDocs(DocId lidLow, DocId lidLimit) +{ + assert(lidLow <= lidLimit); + assert(lidLimit <= this->getNumDocs()); + for (DocId lid = lidLow; lid < lidLimit; ++lid) { + RefType &ref = _refVector[lid]; + if (ref.valid()) { + _tensorStore.holdTensor(ref); + ref = RefType(); + } + } +} + + +void +TensorAttribute::onShrinkLidSpace() +{ + // Tensors for lids > committedDocIdLimit have been cleared. + uint32_t committedDocIdLimit = getCommittedDocIdLimit(); + assert(_refVector.size() >= committedDocIdLimit); + _refVector.shrink(committedDocIdLimit); + setNumDocs(committedDocIdLimit); +} + + +bool +TensorAttribute::onLoad() +{ + TensorReader tensorReader(*this); + if (!tensorReader.hasData()) { + return false; + } + setCreateSerialNum(tensorReader.getCreateSerialNum()); + assert(tensorReader.getVersion() == TENSOR_ATTRIBUTE_VERSION); + uint32_t numDocs(tensorReader.getDocIdLimit()); + _refVector.reset(); + _refVector.unsafe_reserve(numDocs); + for (uint32_t lid = 0; lid < numDocs; ++lid) { + uint32_t tensorSize = tensorReader.getNextTensorSize(); + auto raw = _tensorStore.allocRawBuffer(tensorSize); + if (tensorSize != 0) { + tensorReader.readTensor(raw.first, tensorSize); + } + _refVector.push_back(raw.second); + } + setNumDocs(numDocs); + setCommittedDocIdLimit(numDocs); + return true; +} + + +uint32_t +TensorAttribute::getVersion() const +{ + return TENSOR_ATTRIBUTE_VERSION; +} + + +TensorAttribute::RefCopyVector +TensorAttribute::getRefCopy() const +{ + uint32_t size = getCommittedDocIdLimit(); + assert(size <= _refVector.size()); + return RefCopyVector(&_refVector[0], &_refVector[0] + size); +} + +std::unique_ptr +TensorAttribute::onInitSave() +{ + vespalib::GenerationHandler::Guard guard(getGenerationHandler(). + takeGuard()); + return std::make_unique + (std::move(guard), + this->createSaveTargetConfig(), + getRefCopy(), + _tensorStore); +} + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattribute.h b/searchlib/src/vespa/searchlib/attribute/tensorattribute.h new file mode 100644 index 00000000000..954d211d13f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorattribute.h @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "not_implemented_attribute.h" +#include "tensorstore.h" +#include +#include + +namespace vespalib { namespace tensor { class Tensor; } } + +namespace search { + +namespace attribute { + +/** + * Attribute vector class used to store tensors for all documents in memory. + */ +class TensorAttribute : public NotImplementedAttribute +{ +private: + using RefType = TensorStore::RefType; + using RefVector = RcuVectorBase; + + RefVector _refVector; // docId -> ref in data store for serialized tensor + TensorStore _tensorStore; // data store for serialized tensors + vespalib::tensor::TensorMapper _tensorMapper; // mapper to our tensor type + uint64_t _compactGeneration; // Generation when last compact occurred + + void compactWorst(); +public: + using RefCopyVector = vespalib::Array; + using Tensor = vespalib::tensor::Tensor; + TensorAttribute(const vespalib::stringref &baseFileName, const Config &cfg); + ~TensorAttribute(); + virtual uint32_t clearDoc(DocId docId) override; + virtual void onCommit() override; + virtual void onUpdateStat() override; + virtual void removeOldGenerations(generation_t firstUsed) override; + virtual void onGenerationChange(generation_t generation) override; + virtual bool addDoc(DocId &docId) override; + void setTensor(DocId docId, const Tensor &tensor); + std::unique_ptr getTensor(DocId docId) const; + virtual void clearDocs(DocId lidLow, DocId lidLimit) override; + virtual void onShrinkLidSpace() override; + virtual bool onLoad() override; + virtual uint32_t getVersion() const override; + RefCopyVector getRefCopy() const; + virtual std::unique_ptr onInitSave() override; +}; + + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp new file mode 100644 index 00000000000..6c27689a0c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tensorattributesaver.h" +#include + +using vespalib::GenerationHandler; +using search::IAttributeSaveTarget; + +namespace search { + +namespace attribute { + +TensorAttributeSaver:: +TensorAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + RefCopyVector &&refs, + const TensorStore &tensorStore) + : AttributeSaver(std::move(guard), cfg), + _refs(std::move(refs)), + _tensorStore(tensorStore) +{ +} + + +TensorAttributeSaver::~TensorAttributeSaver() +{ +} + + +bool +TensorAttributeSaver::onSave(IAttributeSaveTarget &saveTarget) +{ + std::unique_ptr + datWriter(saveTarget.datWriter().allocBufferWriter()); + const uint32_t docIdLimit(_refs.size()); + for (uint32_t lid = 0; lid < docIdLimit; ++lid) { + auto raw = _tensorStore.getRawBuffer(_refs[lid]); + datWriter->write(&raw.second, sizeof(raw.second)); + if (raw.second != 0) { + datWriter->write(raw.first, raw.second); + } + } + datWriter->flush(); + return true; +} + + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h new file mode 100644 index 00000000000..e988e1b05ec --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorattributesaver.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "attributesaver.h" +#include "iattributesavetarget.h" +#include "tensorattribute.h" + +namespace search { + +namespace attribute { + +/* + * Class for saving a tensor attribute. + */ +class TensorAttributeSaver : public AttributeSaver +{ +public: + using RefCopyVector = TensorAttribute::RefCopyVector; +private: + RefCopyVector _refs; + const TensorStore &_tensorStore; + using GenerationHandler = vespalib::GenerationHandler; + + virtual bool onSave(IAttributeSaveTarget &saveTarget) override; +public: + TensorAttributeSaver(GenerationHandler::Guard &&guard, + const IAttributeSaveTarget::Config &cfg, + RefCopyVector &&refs, + const TensorStore &tensorStore); + + virtual ~TensorAttributeSaver(); +}; + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp b/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp new file mode 100644 index 00000000000..83b870cfaeb --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorstore.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tensorstore.h" +#include +#include +#include +#include +#include +#include +#include + +using vespalib::tensor::Tensor; +using vespalib::tensor::TypedBinaryFormat; +using document::DeserializeException; + +namespace search { + +namespace attribute { + +constexpr size_t MIN_BUFFER_CLUSTERS = 1024; + +TensorStore::TensorStore() + : _store(), + _type(RefType::align(1), + MIN_BUFFER_CLUSTERS, + RefType::offsetSize() / RefType::align(1)), + _typeId(0) +{ + _store.addType(&_type); + _store.initActiveBuffers(); +} + + +TensorStore::~TensorStore() +{ + _store.dropBuffers(); +} + + +std::pair +TensorStore::getRawBuffer(RefType ref) const +{ + if (!ref.valid()) { + return std::make_pair(nullptr, 0u); + } + const char *buf = _store.getBufferEntry(ref.bufferId(), + ref.offset()); + uint32_t len = *reinterpret_cast(buf); + return std::make_pair(buf + sizeof(uint32_t), len); +} + + +std::pair +TensorStore::allocRawBuffer(uint32_t size) +{ + if (size == 0) { + return std::make_pair(nullptr, RefType()); + } + size_t extSize = size + sizeof(uint32_t); + size_t bufSize = RefType::align(extSize); + _store.ensureBufferCapacity(_typeId, bufSize); + uint32_t activeBufferId = _store.getActiveBufferId(_typeId); + btree::BufferState &state = _store.getBufferState(activeBufferId); + size_t oldSize = state.size(); + char *bufferEntryWritePtr = + _store.getBufferEntry(activeBufferId, oldSize); + *reinterpret_cast(bufferEntryWritePtr) = size; + char *padWritePtr = bufferEntryWritePtr + extSize; + for (size_t i = extSize; i < bufSize; ++i) { + *padWritePtr++ = 0; + } + state.pushed_back(bufSize); + return std::make_pair(bufferEntryWritePtr + sizeof(uint32_t), + RefType(oldSize, activeBufferId)); +} + +void +TensorStore::hold(RefType ref) +{ + if (!ref.valid()) { + return; + } + const char *buf = _store.getBufferEntry(ref.bufferId(), + ref.offset()); + uint32_t len = *reinterpret_cast(buf); + _store.holdElem(ref, len + sizeof(uint32_t)); +} + + +TensorStore::RefType +TensorStore::move(RefType ref) { + if (!ref.valid()) { + return RefType(); + } + auto oldraw = getRawBuffer(ref); + auto newraw = allocRawBuffer(oldraw.second); + memcpy(newraw.first, oldraw.first, oldraw.second); + _store.holdElem(ref, oldraw.second + sizeof(uint32_t)); + return newraw.second; +} + +std::unique_ptr +TensorStore::getTensor(RefType ref) const +{ + auto raw = getRawBuffer(ref); + if (raw.second == 0u) { + return std::unique_ptr(); + } + vespalib::nbostream wrapStream(raw.first, raw.second); + auto tensor = TypedBinaryFormat::deserialize(wrapStream); + if (wrapStream.size() != 0) { + throw DeserializeException("Leftover bytes deserializing " + "tensor attribute value.", + VESPA_STRLOC); + } + return std::move(tensor); +} + + +TensorStore::RefType +TensorStore::setTensor(const Tensor &tensor) +{ + vespalib::nbostream stream; + TypedBinaryFormat::serialize(stream, tensor); + auto raw = allocRawBuffer(stream.size()); + memcpy(raw.first, stream.peek(), stream.size()); + return raw.second; +} + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/tensorstore.h b/searchlib/src/vespa/searchlib/attribute/tensorstore.h new file mode 100644 index 00000000000..669362ea57f --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/tensorstore.h @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace vespalib { namespace tensor { class Tensor; } } + +namespace search { + +namespace attribute { + +/** + * Class for storing serialized tensors in memory, used by TensorAttribute. + * + * Serialization format is subject to change. Changes to serialization format + * might also require corresponding changes to implemented optimized tensor + * operations that use the serialized tensor as argument. + */ +class TensorStore +{ +public: + using RefType = btree::AlignedEntryRefT<22, 2>; + using DataStoreType = btree::DataStoreT; + typedef vespalib::GenerationHandler::generation_t generation_t; + using Tensor = vespalib::tensor::Tensor; + +private: + DataStoreType _store; + btree::BufferType _type; + const uint32_t _typeId; + +public: + TensorStore(); + + ~TensorStore(); + + // Inherit doc from DataStoreBase + void + trimHoldLists(generation_t usedGen) + { + _store.trimHoldLists(usedGen); + } + + // Inherit doc from DataStoreBase + void + transferHoldLists(generation_t generation) + { + _store.transferHoldLists(generation); + } + + void + clearHoldLists(void) + { + _store.clearHoldLists(); + } + + MemoryUsage + getMemoryUsage() const + { + return _store.getMemoryUsage(); + } + + + std::pair getRawBuffer(RefType ref) const; + + std::pair allocRawBuffer(uint32_t size); + + void hold(RefType ref); + + RefType move(RefType ref); + + std::unique_ptr getTensor(RefType ref) const; + + void holdTensor(RefType ref) { hold(ref); } + + RefType setTensor(const Tensor &tensor); + + uint32_t startCompactWorstBuffer() { + return _store.startCompactWorstBuffer(_typeId); + } + + void finishCompactWorstBuffer(uint32_t bufferId) { + _store.holdBuffer(bufferId); + } +}; + + +} // namespace search::attribute + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/bitcompression/.gitignore b/searchlib/src/vespa/searchlib/bitcompression/.gitignore new file mode 100644 index 00000000000..0b3af54ee50 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt new file mode 100644 index 00000000000..51d299bacfa --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_bitcompression OBJECT + SOURCES + compression.cpp + countcompression.cpp + pagedict4.cpp + posocccompression.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/bitcompression/OWNERS b/searchlib/src/vespa/searchlib/bitcompression/OWNERS new file mode 100644 index 00000000000..64735d11d93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/OWNERS @@ -0,0 +1 @@ +tegge diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.cpp b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp new file mode 100644 index 00000000000..06c96dc96ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.cpp @@ -0,0 +1,450 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(".compression"); +#include "compression.h" +#include +#include +#include +#include + +namespace search +{ + +namespace bitcompression +{ + +using vespalib::nbostream; + +uint8_t CodingTables::_log2Table[65536]; + +CodingTables tables; // Static initializer + +CodingTables::CodingTables() +{ + unsigned int x; + uint8_t log2Val; + + for(x=0; x<65536; x++) { + unsigned int val = x; + for (log2Val = 0; (val >>= 1) != 0; log2Val++); + _log2Table[x] = log2Val; + } +} + +uint64_t CodingTables::_intMask64[65] = +{ + (UINT64_C(1) << 0) - 1, (UINT64_C(1) << 1) - 1, + (UINT64_C(1) << 2) - 1, (UINT64_C(1) << 3) - 1, + (UINT64_C(1) << 4) - 1, (UINT64_C(1) << 5) - 1, + (UINT64_C(1) << 6) - 1, (UINT64_C(1) << 7) - 1, + (UINT64_C(1) << 8) - 1, (UINT64_C(1) << 9) - 1, + (UINT64_C(1) << 10) - 1, (UINT64_C(1) << 11) - 1, + (UINT64_C(1) << 12) - 1, (UINT64_C(1) << 13) - 1, + (UINT64_C(1) << 14) - 1, (UINT64_C(1) << 15) - 1, + (UINT64_C(1) << 16) - 1, (UINT64_C(1) << 17) - 1, + (UINT64_C(1) << 18) - 1, (UINT64_C(1) << 19) - 1, + (UINT64_C(1) << 20) - 1, (UINT64_C(1) << 21) - 1, + (UINT64_C(1) << 22) - 1, (UINT64_C(1) << 23) - 1, + (UINT64_C(1) << 24) - 1, (UINT64_C(1) << 25) - 1, + (UINT64_C(1) << 26) - 1, (UINT64_C(1) << 27) - 1, + (UINT64_C(1) << 28) - 1, (UINT64_C(1) << 29) - 1, + (UINT64_C(1) << 30) - 1, (UINT64_C(1) << 31) - 1, + (UINT64_C(1) << 32) - 1, (UINT64_C(1) << 33) - 1, + (UINT64_C(1) << 34) - 1, (UINT64_C(1) << 35) - 1, + (UINT64_C(1) << 36) - 1, (UINT64_C(1) << 37) - 1, + (UINT64_C(1) << 38) - 1, (UINT64_C(1) << 39) - 1, + (UINT64_C(1) << 40) - 1, (UINT64_C(1) << 41) - 1, + (UINT64_C(1) << 42) - 1, (UINT64_C(1) << 43) - 1, + (UINT64_C(1) << 44) - 1, (UINT64_C(1) << 45) - 1, + (UINT64_C(1) << 46) - 1, (UINT64_C(1) << 47) - 1, + (UINT64_C(1) << 48) - 1, (UINT64_C(1) << 49) - 1, + (UINT64_C(1) << 50) - 1, (UINT64_C(1) << 51) - 1, + (UINT64_C(1) << 52) - 1, (UINT64_C(1) << 53) - 1, + (UINT64_C(1) << 54) - 1, (UINT64_C(1) << 55) - 1, + (UINT64_C(1) << 56) - 1, (UINT64_C(1) << 57) - 1, + (UINT64_C(1) << 58) - 1, (UINT64_C(1) << 59) - 1, + (UINT64_C(1) << 60) - 1, (UINT64_C(1) << 61) - 1, + (UINT64_C(1) << 62) - 1, (UINT64_C(1) << 63) - 1, + static_cast(-1), +}; + + +uint64_t +CodingTables::_intMask64le[65] = +{ + /**/ 0, -(UINT64_C(1) << 63), + -(UINT64_C(1) << 62), -(UINT64_C(1) << 61), + -(UINT64_C(1) << 60), -(UINT64_C(1) << 59), + -(UINT64_C(1) << 58), -(UINT64_C(1) << 57), + -(UINT64_C(1) << 56), -(UINT64_C(1) << 55), + -(UINT64_C(1) << 54), -(UINT64_C(1) << 53), + -(UINT64_C(1) << 52), -(UINT64_C(1) << 51), + -(UINT64_C(1) << 50), -(UINT64_C(1) << 49), + -(UINT64_C(1) << 48), -(UINT64_C(1) << 47), + -(UINT64_C(1) << 46), -(UINT64_C(1) << 45), + -(UINT64_C(1) << 44), -(UINT64_C(1) << 43), + -(UINT64_C(1) << 42), -(UINT64_C(1) << 41), + -(UINT64_C(1) << 40), -(UINT64_C(1) << 39), + -(UINT64_C(1) << 38), -(UINT64_C(1) << 37), + -(UINT64_C(1) << 36), -(UINT64_C(1) << 35), + -(UINT64_C(1) << 34), -(UINT64_C(1) << 33), + -(UINT64_C(1) << 32), -(UINT64_C(1) << 31), + -(UINT64_C(1) << 30), -(UINT64_C(1) << 29), + -(UINT64_C(1) << 28), -(UINT64_C(1) << 27), + -(UINT64_C(1) << 26), -(UINT64_C(1) << 25), + -(UINT64_C(1) << 24), -(UINT64_C(1) << 23), + -(UINT64_C(1) << 22), -(UINT64_C(1) << 21), + -(UINT64_C(1) << 20), -(UINT64_C(1) << 19), + -(UINT64_C(1) << 18), -(UINT64_C(1) << 17), + -(UINT64_C(1) << 16), -(UINT64_C(1) << 15), + -(UINT64_C(1) << 14), -(UINT64_C(1) << 13), + -(UINT64_C(1) << 12), -(UINT64_C(1) << 11), + -(UINT64_C(1) << 10), -(UINT64_C(1) << 9), + -(UINT64_C(1) << 8), -(UINT64_C(1) << 7), + -(UINT64_C(1) << 6), -(UINT64_C(1) << 5), + -(UINT64_C(1) << 4), -(UINT64_C(1) << 3), + -(UINT64_C(1) << 2), -(UINT64_C(1) << 1), + static_cast(-1), +}; + + +void +EncodeContext64Base::checkPointWrite(nbostream &out) +{ + out << _cacheInt << _cacheFree; +} + + +void +EncodeContext64Base::checkPointRead(nbostream &in) +{ + in >> _cacheInt >> _cacheFree; +} + + +void +DecodeContext64Base::checkPointWrite(nbostream &out) +{ + (void) out; +} + + +void +DecodeContext64Base::checkPointRead(nbostream &in) +{ + (void) in; +} + +} // namespace bitcompression + + +namespace +{ + +vespalib::string noFeatures = "NoFeatures"; + +} + +namespace bitcompression +{ + +template +void +FeatureDecodeContext:: +readBytes(uint8_t *buf, size_t len) +{ + while (len > 0) { + // Ensure that buffer to read from isn't empty + if (__builtin_expect(_valI >= _valE, false)) + _readContext->readComprBuffer(); + uint64_t readOffset = getReadOffset(); + // Validate that read offset is byte aligned + assert((readOffset & 7) == 0); + // Get start and end of buffer to read from, then calculate size + const uint8_t *rbuf = reinterpret_cast(getCompr()) + + (getBitOffset() >> 3); + const uint8_t *rbufE = reinterpret_cast(_realValE); + size_t rbufSize = rbufE - rbuf; // Size of buffer to read from + // How much to copy in this iteration of the loop + size_t copySize = std::min(rbufSize, len); + // Something must be copied during each iteration + assert(copySize > 0); + memcpy(buf, rbuf, copySize); + buf += copySize; + len -= copySize; + // Adjust read position to account for bytes read + _readContext->setPosition(readOffset + copySize * 8); + } + if (__builtin_expect(_valI >= _valE, false)) + _readContext->readComprBuffer(); +} + + +template +uint32_t +FeatureDecodeContext:: +readHeader(vespalib::GenericHeader &header, int64_t fileSize) +{ + size_t hhSize = vespalib::GenericHeader::getMinSize(); + assert(static_cast(hhSize) <= fileSize); + vespalib::DataBuffer dataBuffer(32768u); + dataBuffer.ensureFree(hhSize); + readBytes(reinterpret_cast(dataBuffer.getFree()), + hhSize); + dataBuffer.moveFreeToData(hhSize); + vespalib::GenericHeader::BufferReader bufferReader(dataBuffer); + uint32_t headerLen = vespalib::GenericHeader::readSize(bufferReader); + // Undo read from buffer + dataBuffer.moveDeadToData(hhSize - dataBuffer.getDataLen()); + assert(headerLen <= fileSize); + (void) fileSize; + if (headerLen > hhSize) { + // Read remaining header into buffer + dataBuffer.ensureFree(headerLen - hhSize); + readBytes(reinterpret_cast(dataBuffer.getFree()), + headerLen - hhSize); + dataBuffer.moveFreeToData(headerLen - hhSize); + } + uint32_t len = header.read(bufferReader); + assert(len >= header.getSize()); + assert(len == headerLen); + return headerLen; +} + + +template +void +FeatureEncodeContext:: +writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength) +{ + typedef FeatureEncodeContext EC; + UC64_ENCODECONTEXT_CONSTRUCTOR(o, _); + + if (bitOffset + bitLength < 64) { + uint32_t length = bitLength; + if (bigEndian) { + uint64_t data = (EC::bswap(*bits) >> + (64 - bitOffset - length)) & + CodingTables::_intMask64[length]; + UC64BE_WRITEBITS_NS(o, EC); + } else { + uint64_t data = (EC::bswap(*bits) >> bitOffset) & + CodingTables::_intMask64[length]; + UC64LE_WRITEBITS_NS(o, EC); + } + } else { + uint32_t bitsLeft = bitLength; + do { + uint32_t length = 64 - bitOffset; + bitsLeft -= length; + if (bigEndian) { + uint64_t data = EC::bswap(*bits) & + CodingTables::_intMask64[length]; + UC64BE_WRITEBITS_NS(o, EC); + } else { + uint64_t data = (EC::bswap(*bits) >> bitOffset) & + CodingTables::_intMask64[length]; + UC64LE_WRITEBITS_NS(o, EC); + } + ++bits; + } while (0); + while (bitsLeft >= 64) { + uint32_t length = 64; + uint64_t data = EC::bswap(*bits); + UC64_WRITEBITS_NS(o, EC); + ++bits; + bitsLeft -= 64; + if (__builtin_expect(oBufI >= _valE, false)) { + UC64_ENCODECONTEXT_STORE(o, _); + _writeContext->writeComprBuffer(false); + UC64_ENCODECONTEXT_LOAD(o, _); + } + } + if (bitsLeft > 0) { + uint32_t length = bitsLeft; + if (bigEndian) { + uint64_t data = EC::bswap(*bits) >> (64 - length); + UC64BE_WRITEBITS_NS(o, EC); + } else { + uint64_t data = EC::bswap(*bits) & + CodingTables::_intMask64[length]; + UC64LE_WRITEBITS_NS(o, EC); + } + } + } + UC64_ENCODECONTEXT_STORE(o, _); + if (__builtin_expect(oBufI >= _valE, false)) { + _writeContext->writeComprBuffer(false); + } +} + + +template +void +FeatureEncodeContext:: +writeString(const vespalib::stringref &buf) +{ + size_t len = buf.size(); + for (unsigned int i = 0; i < len; ++i) { + writeBits(static_cast(buf[i]), 8); + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + } + writeBits(0, 8); +} + + +template +void +FeatureEncodeContext:: +writeHeader(const vespalib::GenericHeader &header) +{ + vespalib::DataBuffer dataBuffer(32768u); + vespalib::GenericHeader::BufferWriter bufferWriter(dataBuffer); + dataBuffer.ensureFree(header.getSize()); + header.write(bufferWriter); + const uint8_t *data = reinterpret_cast + (dataBuffer.getData()); + uint32_t offset = (reinterpret_cast(data) & 7); + data -= offset; + uint32_t bitOffset = offset * 8; + uint32_t bitLen = dataBuffer.getDataLen() * 8; + writeBits(reinterpret_cast(data), bitOffset, bitLen); +} + + +template +void +FeatureDecodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + (void) header; + (void) prefix; +} + + +template +const vespalib::string & +FeatureDecodeContext::getIdentifier(void) const +{ + return noFeatures; +} + + +template +void +FeatureDecodeContext::readFeatures(DocIdAndFeatures &features) +{ + (void) features; +} + + +template +void +FeatureDecodeContext::skipFeatures(unsigned int count) +{ + (void) count; +} + + +template +void +FeatureDecodeContext:: +unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId) +{ + if (matchData.size() == 1) { + matchData[0]->reset(docId); + } +} + + +template +void +FeatureDecodeContext:: +setParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +template +void +FeatureDecodeContext:: +getParams(PostingListParams ¶ms) const +{ + params.clear(); +} + + +template +void +FeatureEncodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + (void) header; + (void) prefix; +} + + +template +void +FeatureEncodeContext:: +writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const +{ + (void) header; + (void) prefix; +} + + +template +const vespalib::string & +FeatureEncodeContext::getIdentifier(void) const +{ + return noFeatures; +} + + +template +void +FeatureEncodeContext::writeFeatures(const DocIdAndFeatures &features) +{ + (void) features; +} + + +template +void +FeatureEncodeContext:: +setParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +template +void +FeatureEncodeContext:: +getParams(PostingListParams ¶ms) const +{ + params.clear(); +} + + +template class FeatureDecodeContext; +template class FeatureDecodeContext; + +template class FeatureEncodeContext; +template class FeatureEncodeContext; + + +} // namespace bitcompression + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h new file mode 100644 index 00000000000..a2d1eaff93e --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -0,0 +1,1933 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include +#include + +namespace vespalib +{ + +class nbostream; +class GenericHeader; + +} + +namespace search +{ + +namespace index +{ + +class DocIdAndFeatures; + +} // namespace index + +namespace fef +{ + +class TermFieldMatchDataArray; + +} // namespace fef + + +namespace bitcompression +{ + +class Position { +public: + Position(const uint64_t * occurences, int bitOffset) : _occurences(occurences), _bitOffset(bitOffset) { } + const uint64_t * getOccurences() const { return _occurences; } + int getBitOffset() const { return _bitOffset; } +private: + const uint64_t * _occurences; + int _bitOffset; +}; + +// Use inline assembly for log2 calculations +#define DO_ASMLOG + +/* + * The so-called rice2 code is very similar to the well known exp + * golomb code. One difference is that the first bits are inverted. + * rice code is a special case of golomb code, with M being a power of + * two (2^k). rice coding uses unary coding for quotient, while remainder + * bits are just written as they are. + * + * Rice2 (k=0) starts with: 0, 100, 101, 11000, 11001, 11010, 11011 + * Rice2 (k=1) starts with: 00, 01, 1000, 1001, 1010, 1011, 110000 + * Exp golomb (k=0) starts with: 1, 010, 011, 00100, 00101, 00101, 00111 + * Exp golomb (k=1) starts with: 10, 11, 0100, 0101, 0110, 0111, 001000 + * unary coding: 0, 10, 110, 1110, 11110, 111110, 1111110 + * rice coding (k=0) 0, 10, 110, 1110, 11110, 111110, 1111110 + * rice coding (k=1) 00, 01, 100, 101, 1100, 1101, 11100 + * + * For k=0, exp golomb coding is the same as elias gamma coding. + * For k=0, rice coding is the same as unary coding. + * + * k values up to and including 63 is supported for exp golomb coding + * and decoding. + + * The *SMALL* macros only supports k values up to and including 62 + * (trading flexibility for a minor speed improvement) and numbers + * that can be encoded within 64 bits. + */ + +#define TOP_BIT64 UINT64_C(0x8000000000000000) +#define TOP_2_BITS64 UINT64_C(0xC000000000000000) +#define TOP_4_BITS64 UINT64_C(0xF000000000000000) + +// Compression parameters for zcposting file word headers. +#define K_VALUE_ZCPOSTING_NUMDOCS 0 +#define K_VALUE_ZCPOSTING_LASTDOCID 22 +#define K_VALUE_ZCPOSTING_DOCIDSSIZE 22 +#define K_VALUE_ZCPOSTING_L1SKIPSIZE 12 +#define K_VALUE_ZCPOSTING_L2SKIPSIZE 10 +#define K_VALUE_ZCPOSTING_L3SKIPSIZE 8 +#define K_VALUE_ZCPOSTING_L4SKIPSIZE 6 +#define K_VALUE_ZCPOSTING_FEATURESSIZE 25 +#define K_VALUE_ZCPOSTING_DELTA_DOCID 22 + +/** + * Lookup tables used for compression / decompression. + */ +class CodingTables +{ +public: + static uint8_t _log2Table[65536]; + static uint64_t _intMask64[65]; + static uint64_t _intMask64le[65]; + + CodingTables(void); +}; + +#define UC64_DECODECONTEXT(prefix) \ + const uint64_t * prefix ## Compr; \ + uint64_t prefix ## Val; \ + uint64_t prefix ## CacheInt; \ + uint32_t prefix ## PreRead; + +#define UC64_DECODECONTEXT_CONSTRUCTOR(prefix, ctx) \ + const uint64_t * prefix ## Compr = ctx ## valI; \ + uint64_t prefix ## Val = ctx ## val; \ + uint64_t prefix ## CacheInt = ctx ## cacheInt; \ + uint32_t prefix ## PreRead = ctx ## preRead; + +#define UC64_DECODECONTEXT_LOAD(prefix, ctx) \ + prefix ## Compr = ctx ## valI; \ + prefix ## Val = ctx ## val; \ + prefix ## CacheInt = ctx ## cacheInt; \ + prefix ## PreRead = ctx ## preRead; + +#define UC64_DECODECONTEXT_LOAD_PARTIAL(prefix, ctx) \ + prefix ## Compr = ctx ## valI; + +#define UC64_DECODECONTEXT_STORE(prefix, ctx) \ + ctx ## valI = prefix ## Compr; \ + ctx ## val = prefix ## Val; \ + ctx ## cacheInt = prefix ## CacheInt; \ + ctx ## preRead = prefix ## PreRead; + + +#define UC64_DECODECONTEXT_STORE_PARTIAL(prefix, ctx) \ + ctx ## valI = prefix ## Compr; + +#define UC64BE_READBITS(val, valI, preRead, cacheInt, EC) \ + do { \ + if (__builtin_expect(length <= preRead, true)) { \ + val |= ((cacheInt >> (preRead - length)) & \ + ::search::bitcompression::CodingTables::_intMask64[length]); \ + preRead -= length; \ + } else { \ + if (__builtin_expect(preRead > 0, true)) { \ + length -= preRead; \ + val |= ((cacheInt & \ + ::search::bitcompression::CodingTables:: \ + _intMask64[preRead]) << length); \ + } \ + cacheInt = EC::bswap(*valI++); \ + preRead = 64 - length; \ + val |= cacheInt >> preRead; \ + } \ + } while (0) + +#define UC64BE_READBITS_NS(prefix, EC) \ + UC64BE_READBITS(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, EC) + +#define UC64BE_READBITS_CTX(ctx, EC) \ + UC64BE_READBITS(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC); + + +#define UC64BE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \ + do { \ + cacheInt = EC::bswap(*valI++); \ + preRead = 64 - bitOffset; \ + val = 0; \ + length = 64; \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64BE_SETUPBITS_NS(ns, comprData, bitOffset, EC) \ + ns ## Compr = comprData; \ + UC64BE_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \ + ns ## PreRead, ns ## CacheInt, EC); + +#define UC64BE_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \ + ctx._valI = comprData; \ + UC64BE_SETUPBITS((bitOffset), ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC); + +#define UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + length = \ + 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \ + unsigned int olength = length; \ + val <<= length; \ + if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + length = 0; \ + } \ + val64 = (val >> (63 - olength - (k))) - (UINT64_C(1) << (k)); \ + val <<= olength + 1 + (k); \ + if (__builtin_expect(olength + 1 + (k) == 64, false)) \ + val = 0; \ + length += olength + 1 + (k); \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64BE_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ + EC) \ + do { \ + length = \ + 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \ + val <<= length; \ + val64 = (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \ + val <<= length + 1 + (k); \ + length += length + 1 + (k); \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64BE_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64BE_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64BE_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \ + do { \ + UC64BE_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, \ + k, EC); \ + } while (0) + +#define UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt,\ + k, EC, resop) \ + do { \ + length = \ + 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \ + val <<= length; \ + resop (val >> (63 - length - (k))) - (UINT64_C(1) << (k)); \ + val <<= length + 1 + (k); \ + length += length + 1 + (k); \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + length = \ + 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \ + unsigned int olength = length; \ + val <<= length; \ + if (__builtin_expect(length * 2 + 1 + (k) > 64, false)) { \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + length = 0; \ + } \ + val <<= olength + 1 + (k); \ + if (__builtin_expect(olength + 1 + (k) == 64, false)) \ + val = 0; \ + length += olength + 1 + (k); \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64BE_SKIPEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64BE_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ + EC) \ + do { \ + length = \ + 63 - ::search::bitcompression::EncodeContext64BE::log2(val); \ + val <<= length; \ + val <<= length + 1 + (k); \ + length += length + 1 + (k); \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64BE_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64BE_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64BE_WRITEBITS(cacheInt, cacheFree, bufI, EC) \ + do { \ + if (length >= cacheFree) { \ + cacheInt |= ((data >> (length - cacheFree)) & \ + ::search::bitcompression::CodingTables:: \ + _intMask64[cacheFree]); \ + *bufI++ = EC::bswap(cacheInt); \ + length -= cacheFree; \ + cacheInt = 0; \ + cacheFree = 64; \ + } \ + if (length > 0) { \ + uint64_t dataFragment = \ + (data & ::search::bitcompression::CodingTables:: \ + _intMask64[length]); \ + cacheInt |= (dataFragment << (cacheFree - length)); \ + cacheFree -= length; \ + } \ + } while (0) + + +#define UC64BE_WRITEBITS_NS(prefix, EC) \ + do { \ + UC64BE_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \ + prefix ## BufI, EC); \ + } while (0) + +#define UC64BE_WRITEBITS_CTX(ctx, EC) \ + do { \ + UC64BE_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \ + ctx ## valI, EC); \ + } while (0) + +#define UC64BE_DECODEDEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + if ((prefix ## Val & TOP_BIT64) == 0) { \ + length = 1; \ + prefix ## Val <<= 1; \ + val64 = 0; \ + UC64BE_READBITS_NS(prefix, EC); \ + } else { \ + if ((prefix ## Val & TOP_2_BITS64) != TOP_2_BITS64) { \ + length = 2; \ + prefix ## Val <<= 2; \ + val64 = 1; \ + UC64BE_READBITS_NS(prefix, EC); \ + } else { \ + length = 2; \ + prefix ## Val <<= 2; \ + UC64BE_READBITS_NS(prefix, EC); \ + UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC); \ + val64 += 2; \ + } \ + } \ + } while (0) + +#define UC64BE_DECODED0EXPGOLOMB_NS(prefix, k, EC) \ + do { \ + if ((prefix ## Val & TOP_BIT64) == 0) { \ + length = 1; \ + prefix ## Val <<= 1; \ + val64 = 0; \ + UC64BE_READBITS_NS(prefix, EC); \ + } else { \ + length = 1; \ + prefix ## Val <<= 1; \ + UC64BE_READBITS_NS(prefix, EC); \ + UC64BE_DECODEEXPGOLOMB_NS(prefix, k, EC); \ + val64 += 1; \ + } \ + } while (0) + +#define UC64LE_READBITS(val, valI, preRead, cacheInt, EC) \ + do { \ + if (__builtin_expect(length <= preRead, true)) { \ + val |= ((cacheInt << (preRead - length)) & \ + ::search::bitcompression::CodingTables::_intMask64le[length]); \ + preRead -= length; \ + } else { \ + if (__builtin_expect(preRead > 0, true)) { \ + length -= preRead; \ + val |= ((cacheInt & \ + ::search::bitcompression::CodingTables:: \ + _intMask64le[preRead]) >> length); \ + } \ + cacheInt = EC::bswap(*valI++); \ + preRead = 64 - length; \ + val |= cacheInt << preRead; \ + } \ + } while (0) + +#define UC64LE_READBITS_NS(prefix, EC) \ + UC64LE_READBITS(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, EC) + +#define UC64LE_READBITS_CTX(ctx, EC) \ + UC64LE_READBITS(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC); + + +#define UC64LE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \ + do { \ + cacheInt = EC::bswap(*valI++); \ + preRead = 64 - bitOffset; \ + val = 0; \ + length = 64; \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64LE_SETUPBITS_NS(ns, comprData, bitOffset, EC) \ + ns ## Compr = comprData; \ + UC64LE_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \ + ns ## PreRead, ns ## CacheInt, EC); + +#define UC64LE_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \ + ctx._valI = comprData; \ + UC64LE_SETUPBITS((bitOffset), ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC); + +#define UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + unsigned int olength = \ + ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + length = olength + 1; \ + val >>= length; \ + if (__builtin_expect(length == 64, false)) \ + val = 0; \ + if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + length = 0; \ + } \ + val64 = (val & ((UINT64_C(1) << (olength + (k))) - 1)) + \ + (UINT64_C(1) << (olength + (k))) - (UINT64_C(1) << (k)); \ + val >>= olength + (k); \ + length += olength + (k); \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64LE_DECODEEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64LE_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ + EC) \ + do { \ + length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + val >>= length + 1; \ + val64 = (val & ((UINT64_C(1) << (length + (k))) - 1)) + \ + (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \ + val >>= length + (k); \ + length += length + 1 + (k); \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64LE_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64LE_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64LE_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \ + do { \ + UC64LE_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, \ + k, EC); \ + } while (0) + +#define UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt,\ + k, EC, resop) \ + do { \ + length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + val >>= length + 1; \ + resop (val & ((UINT64_C(1) << (length + (k))) - 1)) + \ + (UINT64_C(1) << (length + (k))) - (UINT64_C(1) << (k)); \ + val >>= length + (k); \ + length += length + 1 + (k); \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + unsigned int olength = \ + ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + length = olength + 1; \ + val >>= length; \ + if (__builtin_expect(length == 64, false)) \ + val = 0; \ + if (__builtin_expect(olength * 2 + 1 + (k) > 64, false)) { \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + length = 0; \ + } \ + val >>= olength + (k); \ + length += olength + (k); \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + + +#define UC64LE_SKIPEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64LE_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ + EC) \ + do { \ + length = ::search::bitcompression::EncodeContext64LE::ffsl(val); \ + val >>= length + 1; \ + val >>= length + (k); \ + length += length + 1 + (k); \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } while (0) + +#define UC64LE_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64LE_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64LE_WRITEBITS(cacheInt, cacheFree, bufI, EC) \ + do { \ + if (length >= cacheFree) { \ + cacheInt |= (data << (64 - cacheFree)); \ + *bufI++ = EC::bswap(cacheInt); \ + data >>= cacheFree; \ + length -= cacheFree; \ + cacheInt = 0; \ + cacheFree = 64; \ + } \ + if (length > 0) { \ + uint64_t dataFragment = \ + (data & ::search::bitcompression::CodingTables:: \ + _intMask64[length]); \ + cacheInt |= (dataFragment << (64 - cacheFree)); \ + cacheFree -= length; \ + } \ + } while (0) + + +#define UC64LE_WRITEBITS_NS(prefix, EC) \ + do { \ + UC64LE_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \ + prefix ## BufI, EC); \ + } while (0) + +#define UC64LE_WRITEBITS_CTX(ctx, EC) \ + do { \ + UC64LE_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \ + ctx ## valI, EC); \ + } while (0) + +#define UC64_READBITS(val, valI, preRead, cacheInt, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_READBITS(val, valI, preRead, cacheInt, EC); \ + } else { \ + UC64LE_READBITS(val, valI, preRead, cacheInt, EC); \ + } \ + } while (0) + +#define UC64_READBITS_NS(prefix, EC) \ + UC64_READBITS(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, EC) + +#define UC64_READBITS_CTX(ctx, EC) \ + UC64_READBITS(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC) + + +#define UC64_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC); \ + } else { \ + UC64LE_SETUPBITS(bitOffset, val, valI, preRead, cacheInt, EC); \ + } \ + } while (0) + +#define UC64_SETUPBITS_NS(ns, comprData, bitOffset, EC) \ + ns ## Compr = comprData; \ + UC64_SETUPBITS((bitOffset), ns ## Val, ns ## Compr, \ + ns ## PreRead, ns ## CacheInt, EC); + +#define UC64_SETUPBITS_CTX(ctx, comprData, bitOffset, EC) \ + ctx._valI = comprData; \ + UC64_SETUPBITS((bitOffset), ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, EC); + +#define UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \ + } else { \ + UC64LE_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \ + } \ + } while (0) + +#define UC64_DECODEEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64_DECODEEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC);\ + } else { \ + UC64LE_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC);\ + } \ + } while (0) + +#define UC64_DECODEEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64_DECODEEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64_DECODEEXPGOLOMB_SMALL_CTX(ctx, k, EC) \ + do { \ + UC64_DECODEEXPGOLOMB_SMALL(ctx._val, ctx._valI, \ + ctx._preRead, ctx._cacheInt, \ + k, EC); \ + } while (0) + +#define UC64_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \ + k, EC, resop) \ + do { \ + if (bigEndian) { \ + UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \ + k, EC, resop); \ + } else { \ + UC64LE_DECODEEXPGOLOMB_SMALL_APPLY(val, valI, preRead, cacheInt, \ + k, EC, resop); \ + } \ + } while (0) + +#define UC64_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \ + } else { \ + UC64LE_SKIPEXPGOLOMB(val, valI, preRead, cacheInt, k, EC); \ + } \ + } while (0) + +#define UC64_SKIPEXPGOLOMB_NS(prefix, k, EC) \ + do { \ + UC64_SKIPEXPGOLOMB(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, \ + EC) \ + do { \ + if (bigEndian) { \ + UC64BE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC); \ + } else { \ + UC64LE_SKIPEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, k, EC); \ + } \ + } while (0) + +#define UC64_SKIPEXPGOLOMB_SMALL_NS(prefix, k, EC) \ + do { \ + UC64_SKIPEXPGOLOMB_SMALL(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, prefix ## CacheInt, \ + k, EC); \ + } while (0) + +#define UC64_WRITEBITS(cacheInt, cacheFree, bufI, EC) \ + do { \ + if (bigEndian) { \ + UC64BE_WRITEBITS(cacheInt, cacheFree, bufI, EC); \ + } else { \ + UC64LE_WRITEBITS(cacheInt, cacheFree, bufI, EC); \ + } \ + } while (0) + + +#define UC64_WRITEBITS_NS(prefix, EC) \ + do { \ + UC64_WRITEBITS(prefix ## CacheInt, prefix ## CacheFree, \ + prefix ## BufI, EC); \ + } while (0) + +#define UC64_WRITEBITS_CTX(ctx, EC) \ + do { \ + UC64_WRITEBITS(ctx ## cacheInt, ctx ## cacheFree, \ + ctx ## valI, EC); \ + } while (0) + +#define UC64_ENCODECONTEXT(prefix) \ + uint64_t *prefix ## BufI; \ + uint64_t prefix ## CacheInt; \ + uint32_t prefix ## CacheFree; + +#define UC64_ENCODECONTEXT_CONSTRUCTOR(prefix, ctx) \ + uint64_t *prefix ## BufI = ctx ## valI; \ + uint64_t prefix ## CacheInt = ctx ## cacheInt; \ + uint32_t prefix ## CacheFree = ctx ## cacheFree; + +#define UC64_ENCODECONTEXT_LOAD(prefix, ctx) \ + prefix ## BufI = ctx ## valI; \ + prefix ## CacheInt = ctx ## cacheInt; \ + prefix ## CacheFree = ctx ## cacheFree; + +#define UC64_ENCODECONTEXT_LOAD_PARTIAL(prefix, ctx) \ + prefix ## BufI = ctx ## valI; + +#define UC64_ENCODECONTEXT_STORE(prefix, ctx) \ + ctx ## valI = prefix ## BufI; \ + ctx ## cacheInt = prefix ## CacheInt; \ + ctx ## cacheFree = prefix ## CacheFree; + +#define UC64_ENCODECONTEXT_STORE_PARTIAL(prefix, ctx) \ + ctx ## valI = prefix ## BufI; + + +class EncodeContext64Base : public search::ComprFileEncodeContext +{ +public: + enum Constants { + END_BUFFER_SAFETY = 4 + }; + + typedef uint64_t UnitType; + + // Pointers to compressed data + uint64_t *_valI; + const uint64_t *_valE; + + // Cached integers + + // _cacheInt is the second level of integer cache. It holds the + // next bits (_cacheFree bits of this integer is free) + uint64_t _cacheInt; + uint32_t _cacheFree; + + // File position for start of buffer minus byte address of start of buffer + // plus sizeof uint64_t. Then shifted left by 3 to represent bits. + uint64_t _fileWriteBias; + + EncodeContext64Base(void) + : search::ComprFileEncodeContext(), + _valI(NULL), + _valE(NULL), + _cacheInt(0), + _cacheFree(64), + _fileWriteBias(64) + { + } + + EncodeContext64Base(const EncodeContext64Base &other) + : search::ComprFileEncodeContext(other), + _valI(other._valI), + _valE(other._valE), + _cacheInt(other._cacheInt), + _cacheFree(other._cacheFree), + _fileWriteBias(other._fileWriteBias) + { + } + + virtual + ~EncodeContext64Base(void) + { + } + + EncodeContext64Base & + operator=(const EncodeContext64Base &rhs) + { + search::ComprFileEncodeContext::operator=(rhs); + _valI = rhs._valI; + _valE = rhs._valE; + _cacheInt = rhs._cacheInt; + _cacheFree = rhs._cacheFree; + _fileWriteBias = rhs._fileWriteBias; + return *this; + } + + /** + * Get number of used units (e.g. _valI - start) + */ + virtual int + getUsedUnits(void *start) + { + return _valI - static_cast(start); + } + + /** + * Get normal full buffer size (e.g. _valE - start) + */ + virtual int + getNormalMaxUnits(void *start) + { + return _valE - static_cast(start); + } + + /** + * Adjust buffer after write (e.g. _valI, _fileWriteBias) + */ + virtual void + afterWrite(search::ComprBuffer &cbuf, + uint32_t remainingUnits, + uint64_t bufferStartFilePos) + { + _valI = static_cast(cbuf._comprBuf) + remainingUnits; + _fileWriteBias = (bufferStartFilePos - + reinterpret_cast(cbuf._comprBuf) + + sizeof(uint64_t)) << 3; + adjustBufSize(cbuf); + } + + /** + * Adjust buffer size to align end of buffer. + */ + virtual void + adjustBufSize(search::ComprBuffer &cbuf) + { + uint64_t fileWriteOffset = + (_fileWriteBias + + ((reinterpret_cast(cbuf._comprBuf) - + sizeof(uint64_t)) << 3)) >> 3; + _valE = static_cast(cbuf._comprBuf) + + cbuf._aligner.adjustElements( + fileWriteOffset / sizeof(uint64_t), + cbuf._comprBufSize); + } + + virtual uint32_t + getUnitByteSize(void) const + { + return sizeof(uint64_t); + } + + void + setupWrite(search::ComprBuffer &cbuf) + { + _valI = static_cast(cbuf._comprBuf); + + _fileWriteBias = + (sizeof(uint64_t) - + reinterpret_cast(cbuf._comprBuf)) << 3; + // Buffer for compressed data now has padding after it + adjustBufSize(cbuf); + _cacheInt = 0; + _cacheFree = 64; + } + + void + reload(const EncodeContext64Base &other) + { + _valI = other._valI; + _valE = other._valE; + _cacheInt = other._cacheInt; + _cacheFree = other._cacheFree; + _fileWriteBias = other._fileWriteBias; + } + + void + pushBack(EncodeContext64Base &other) const + { + other._valI = _valI; + other._cacheInt = _cacheInt; + other._cacheFree = _cacheFree; + } + + virtual void + checkPointWrite(vespalib::nbostream &out); + + virtual void + checkPointRead(vespalib::nbostream &in); + + uint64_t + getWriteOffset(void) const + { + return _fileWriteBias + + (reinterpret_cast(_valI) << 3) - _cacheFree; + } + + void + defineWriteOffset(uint64_t writeOffset) + { + _fileWriteBias = writeOffset - + (reinterpret_cast(_valI) << 3) + + _cacheFree; + } + + virtual uint64_t + getBitPosV(void) const + { + return getWriteOffset(); + } + + /* + * Return max value that can be exp golomb encoded with our implementation + * ot the encoding method. Handling of larger numbers would require changes + * to both decode macros (making them slower) and encoding method (making + * it slower). + */ + static uint64_t + maxExpGolombVal(uint32_t kValue) + { + return static_cast + (- (UINT64_C(1) << kValue) - 1); + } + + /* + * Return max value that can be exp golomb encoded within maxBits + * using kValue encoding parameter. + * + * maxBits must be larger than kValue + */ + static uint64_t + maxExpGolombVal(uint32_t kValue, uint32_t maxBits) + { + if ((maxBits + kValue + 1) / 2 > 64) { + return static_cast(-1); + } + if ((maxBits + kValue + 1) / 2 == 64) { + return static_cast + (- (UINT64_C(1) << kValue) - 1); + } + return static_cast + ((UINT64_C(1) << ((maxBits + kValue + 1) / 2)) - + (UINT64_C(1) << kValue) - 1); + } + +}; + + +template +class EncodeContext64EBase : public EncodeContext64Base +{ +public: + static inline uint64_t + bswap(uint64_t val); + + /** + * Write bits + * + * @param data The bits to be written to file. + * @param length The number of bits to be written to file. + */ + void inline + writeBits(uint64_t data, uint32_t length); + + /** + * Flushes the last integer to disk if there are remaining bits left in + * the _cacheInt. Padding of trailing 0-bits is automatically added. + */ + void + flush(void) + { + if (_cacheFree < 64) { + *_valI++ = bswap(_cacheInt); + _cacheInt = 0; + _cacheFree = 64; + } + } + + void + smallPadBits(uint32_t length) + { + if (length > 0) + writeBits(0, length); + } + + virtual void + padBits(uint32_t length) + { + while (length > 64) { + writeBits(0, 64); + length -= 64; + } + smallPadBits(length); + } + + void + align(uint32_t alignment) + { + uint64_t length = (- getWriteOffset()) & (alignment - 1); + padBits(length); + } + + void + alignDirectIO() + { + align(4096*8); + } + + /* + * Small alignment (max 64 bits alignment) + */ + void + smallAlign(uint32_t alignment) + { + uint64_t length = _cacheFree & (alignment - 1); + smallPadBits(length); + } +}; + + +template <> +inline uint64_t +EncodeContext64EBase::bswap(uint64_t val) +{ + __asm__("bswap %0" : "=r" (val) : "0" (val)); + return val; +} + + +template <> +inline void +EncodeContext64EBase::writeBits(uint64_t data, uint32_t length) +{ + // While there are enough bits remaining in "data", + // fill the cacheInt and flush it to vector + if (length >= _cacheFree) { + // Shift new bits into cacheInt + _cacheInt |= ((data >> (length - _cacheFree)) & + CodingTables::_intMask64[_cacheFree]); + *_valI++ = bswap(_cacheInt); + + // Initialize variables for receiving new bits + length -= _cacheFree; + _cacheInt = 0; + _cacheFree = 64; + } + + if (length > 0) { + uint64_t dataFragment = (data & CodingTables::_intMask64[length]); + _cacheInt |= (dataFragment << (_cacheFree - length)); + _cacheFree -= length; + } +} + + +template <> +inline uint64_t +EncodeContext64EBase::bswap(uint64_t val) +{ + return val; +} + + +template <> +inline void +EncodeContext64EBase::writeBits(uint64_t data, uint32_t length) +{ + // While there are enough bits remaining in "data", + // fill the cacheInt and flush it to vector + if (length >= _cacheFree) { + // Shift new bits into cacheInt + _cacheInt |= (data << (64 - _cacheFree)); + *_valI++ = bswap(_cacheInt); + + data >>= _cacheFree; + // Initialize variables for receiving new bits + length -= _cacheFree; + _cacheInt = 0; + _cacheFree = 64; + } + + if (length > 0) { + uint64_t dataFragment = (data & CodingTables::_intMask64[length]); + _cacheInt |= (dataFragment << (64 - _cacheFree)); + _cacheFree -= length; + } +} + +typedef EncodeContext64EBase EncodeContext64BEBase; + +typedef EncodeContext64EBase EncodeContext64LEBase; + + +template +class EncodeContext64 : public EncodeContext64EBase +{ +public: + typedef EncodeContext64EBase BaseClass; + using BaseClass::writeBits; + + /** + * Calculate floor(log2(x)) + */ + static inline uint32_t + log2(uint64_t x) + { + uint64_t retVal; + +#if (defined(__x86_64__)) && defined(DO_ASMLOG) + __asm("bsrq %1,%0" : "=r" (retVal) : "r" (x)); + +#else + register uint64_t lower = x; + uint32_t upper32 = lower >> 32; + if (upper32 != 0) { + uint32_t upper16 = upper32 >> 16; + if (upper16 != 0) + retVal = 48 + CodingTables::_log2Table[upper16]; + else + retVal = 32 + CodingTables::_log2Table[upper32]; + } else { + uint32_t lower32 = static_cast(x); + uint32_t upper16 = lower32 >> 16; + + if (upper16 != 0) + retVal = 16 + CodingTables::_log2Table[upper16]; + else + retVal = CodingTables::_log2Table[lower32]; + } +#endif + + return retVal; + } + + static inline uint64_t + ffsl(uint64_t x) + { + uint64_t retVal; + __asm("bsfq %1,%0" : "=r" (retVal) : "r" (x)); + return retVal; + } + + /** + * ExpGolomb-encode an integer + * @param x integer to be encoded (lowest value is 0). + * @param k k parameter + * + * Note: This method doesn't work when x > maxExpGolombVal(k). + */ + void + encodeExpGolomb(uint64_t x, uint32_t k) + { + if (bigEndian) { + uint32_t log2qx2 = log2((x >> k) + 1) * 2; + uint64_t expGolomb = x + (UINT64_C(1) << k); + + if (log2qx2 < 64 - k) + writeBits(expGolomb, k + log2qx2 + 1); + else { + writeBits(0, k + log2qx2 + 1 - 64); + writeBits(expGolomb, 64); + } + } else { + uint32_t log2q = log2((x >> k) + 1); + uint32_t log2qx2 = log2q * 2; + uint64_t expGolomb = x + (UINT64_C(1) << k) - + (UINT64_C(1) << (k + log2q)); + + if (log2qx2 < 64 - k) + writeBits(((expGolomb << 1) | 1) << log2q, k + log2qx2 + 1); + else { + writeBits(0, log2q); + writeBits((expGolomb << 1) | 1, log2q + k + 1); + } + } + } + + static uint32_t + encodeExpGolombSpace(uint64_t x, uint32_t k) + { + return k + log2((x >> k) + 1) * 2 + 1; + } + + void + encodeDExpGolomb(uint64_t x, uint32_t k) + { + if (x == 0) { + writeBits(0, 1); + return; + } + if (x == 1) { + writeBits(bigEndian ? 2 : 1, 2); + return; + } + writeBits(3, 2); + encodeExpGolomb(x - 2, k); + } + + static uint32_t + encodeDExpGolombSpace(uint64_t x, uint32_t k) + { + if (x == 0) + return 1; + if (x == 1) + return 2; + return 2 + encodeExpGolombSpace(x, k); + } + + void + encodeD0ExpGolomb(uint64_t x, uint32_t k) + { + if (x == 0) { + writeBits(0, 1); + return; + } + writeBits(1, 1); + encodeExpGolomb(x - 1, k); + } + + static uint32_t + encodeD0ExpGolombSpace(uint64_t x, uint32_t k) + { + if (x == 0) + return 1; + return 1 + encodeExpGolombSpace(x, k); + } + + static uint64_t + convertToUnsigned(int64_t val) + { + if (val < 0) + return ((- val) << 1) - 1; + else + return (val << 1); + } +}; + + +typedef EncodeContext64 EncodeContext64BE; + +typedef EncodeContext64 EncodeContext64LE; + +class DecodeContext64Base : public search::ComprFileDecodeContext +{ +private: + DecodeContext64Base(const DecodeContext64Base &); + +public: + enum Constants { + END_BUFFER_SAFETY = 4 + }; + + // Pointers to compressed data + const uint64_t *_valI; + const uint64_t *_valE; + const uint64_t *_realValE; + + // Cached integers + + // _val is the work-integer which is by convention always filled + // with the next 64 bits (the first bit is #31) + uint64_t _val; + + // _cacheInt is the second level of integer cache. It holds the + // next bits (_preRead bits of this integer is valid) + uint64_t _cacheInt; + uint32_t _preRead; + + // File position for end of buffer minus byte address of end of buffer + // minus sizeof uint64_t. Then shifted left by 3 to represent bits. + uint64_t _fileReadBias; + + DecodeContext64Base(void) + : search::ComprFileDecodeContext(), + _valI(NULL), + _valE(NULL), + _realValE(NULL), + _val(0), + _cacheInt(0), + _preRead(0), + _fileReadBias(0) + { + } + + + DecodeContext64Base(const uint64_t *valI, + const uint64_t *valE, + const uint64_t *realValE, + uint64_t val, + uint64_t cacheInt, + uint32_t preRead) + : search::ComprFileDecodeContext(), + _valI(valI), + _valE(valE), + _realValE(realValE), + _val(val), + _cacheInt(cacheInt), + _preRead(preRead), + _fileReadBias(0) + { + } + + virtual + ~DecodeContext64Base(void) + { + } + + DecodeContext64Base & + operator=(const DecodeContext64Base &rhs) + { + search::ComprFileDecodeContext::operator=(rhs); + _valI = rhs._valI; + _valE = rhs._valE; + _realValE = rhs._realValE; + _val = rhs._val; + _cacheInt = rhs._cacheInt; + _preRead = rhs._preRead; + _fileReadBias = rhs._fileReadBias; + return *this; + } + + /** + * + * Check if the chunk referenced by the decode context was the + * last chunk in the file (e.g. _valE > _realValE) + */ + virtual bool + lastChunk(void) const + { + return _valE > _realValE; + } + + /** + * Check if we're at the end of the current chunk (e.g. _valI >= _valE) + */ + virtual bool + endOfChunk(void) const + { + return _valI >= _valE; + } + + /** + * Get remaining units in buffer (e.g. _realValE - _valI) + */ + + virtual int32_t + remainingUnits(void) const + { + return _realValE - _valI; + } + + /** + * Get unit ptr (e.g. _valI) from decode context. + */ + virtual const void * + getUnitPtr(void) const + { + return _valI; + } + + virtual void + afterRead(const void *start, + size_t bufferUnits, + uint64_t bufferEndFilePos, + bool isMore) + { + _valI = static_cast(start); + setEnd(bufferUnits, isMore); + _fileReadBias = (bufferEndFilePos - + reinterpret_cast(_realValE + 1)) << 3; + } + + virtual uint64_t + getBitPos(int bitOffset, + uint64_t bufferEndFilePos) const + { + int intOffset = _realValE - _valI; + if (bitOffset == -1) + bitOffset = -64 - _preRead; + + return (bufferEndFilePos << 3) - + (static_cast(intOffset) << 6) + bitOffset; + } + + uint64_t + getReadOffset(void) const + { + return _fileReadBias + + (reinterpret_cast(_valI) << 3) - _preRead; + } + + void + defineReadOffset(uint64_t readOffset) + { + _fileReadBias = readOffset - + (reinterpret_cast(_valI) << 3) + + _preRead; + } + + virtual uint64_t + getBitPosV(void) const + { + return getReadOffset(); + } + + virtual void + adjUnitPtr(int newRemainingUnits) + { + _valI = _realValE - newRemainingUnits; + } + + virtual void + emptyBuffer(uint64_t newBitPosition) + { + _fileReadBias = newBitPosition; + _valI = NULL; + _valE = NULL; + _realValE = NULL; + _preRead = 0; + } + + virtual uint32_t + getUnitByteSize(void) const + { + return sizeof(uint64_t); + } + + /** + * Set the end of the buffer + * @param unitCount Number of bytes in buffer + * @param moreData Set if there is more data available + */ + void + setEnd(unsigned int unitCount, bool moreData) + { + _valE = _realValE = _valI + unitCount; + if (moreData) + _valE -= END_BUFFER_SAFETY; + else + _valE += END_BUFFER_SAFETY; + } + + const uint64_t * + getCompr(void) const + { + return (_preRead == 0) ? (_valI - 1) : (_valI - 2); + } + + int + getBitOffset(void) const + { + return (_preRead == 0) ? 0 : 64 - _preRead; + } + + virtual void + checkPointWrite(vespalib::nbostream &out); + + virtual void + checkPointRead(vespalib::nbostream &in); + + static int64_t + convertToSigned(uint64_t val) + { + if ((val & 1) != 0) + return - (val >> 1) - 1; + else + return (val >> 1); + } +}; + + +template +class DecodeContext64 : public DecodeContext64Base +{ +private: + DecodeContext64(const DecodeContext64 &); + +public: + typedef EncodeContext64 EC; + + DecodeContext64(void) + : DecodeContext64Base() + { + } + + + DecodeContext64(const uint64_t *compr, + int bitOffset) + : DecodeContext64Base(compr + 1, + NULL, + NULL, + 0, + EC::bswap(*compr), + 64 - bitOffset) + { + uint32_t length = 64; + UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + } + + /* + * Setup decode context without read context, all data is in memory. + * Assumes that last word is fully readable, and that some extra + * data beyond is available, to avoid issues when prefetching bits + * into two registers (_val and _cacheInt). + */ + DecodeContext64(const uint64_t *compr, + int bitOffset, + uint64_t bitLength) + : DecodeContext64Base(compr + 1, + NULL, + NULL, + 0, + EC::bswap(*compr), + 64 - bitOffset) + { + uint32_t length = 64; + UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + _realValE = compr + (bitOffset + bitLength + 63) / 64; + _valE = _realValE + END_BUFFER_SAFETY; + } + + DecodeContext64 & + operator=(const DecodeContext64 &rhs) + { + DecodeContext64Base::operator=(rhs); + return *this; + } + + /** + * Read [length] bits from a bitstream + * + * @param length Number of bits to read (0 < length < 64) + * @param val Current integer holding bits + * @param cacheInt 2nd level integer cache + * @param preRead Number of valid bits in cacheInt + * @param valI Pointer to next integer in bitstream + */ + static void + ReadBits(unsigned int length, uint64_t &val, + uint64_t &cacheInt, unsigned int &preRead, + const uint64_t * &valI) + { + if (length <= preRead) { + if (bigEndian) { + val |= ((cacheInt >> (preRead - length)) & + CodingTables::_intMask64[length]); + } else { + val |= ((cacheInt << (preRead - length)) & + CodingTables::_intMask64le[length]); + } + preRead -= length; + return; + } + + if (preRead > 0) { + length -= preRead; + if (bigEndian) { + val |= ((cacheInt & + CodingTables::_intMask64[preRead]) << length); + } else { + val |= ((cacheInt & + CodingTables::_intMask64le[preRead]) >> length); + } + } + + cacheInt = EC::bswap(*valI++); + preRead = 64 - length; + if (bigEndian) + val |= (cacheInt >> preRead); + else + val |= (cacheInt << preRead); + }; + + virtual void + skipBits(int bits) + { + while (bits >= 64) { + _val = 0; + ReadBits(64, _val, _cacheInt, _preRead, _valI); + bits -= 64; + } + if (bits > 0) { + if (bigEndian) + _val <<= bits; + else + _val >>= bits; + ReadBits(bits, _val, _cacheInt, _preRead, _valI); + } + } + + /** + * Setup for bitwise reading. + */ + virtual void + setupBits(int bitOffset) + { + unsigned int length; + UC64_SETUPBITS(bitOffset, _val, _valI, _preRead, _cacheInt, EC); + } + + void setPosition(Position pos) { + _valI = pos.getOccurences(); + setupBits(pos.getBitOffset()); + } + + /** + * Used by iterators when switching from bitwise to bytewise decoding. + */ + const uint8_t * + getByteCompr(void) const + { + assert((_preRead & 7) == 0); + return reinterpret_cast(getCompr()) + + (getBitOffset() >> 3); + } + + /** + * Used by iterators when switching from bytewise to bitwise decoding. + */ + void + setByteCompr(const uint8_t *bCompr) + { + int byteOffset = reinterpret_cast(bCompr) & 7; + _valI = reinterpret_cast(bCompr - byteOffset); + setupBits(byteOffset * 8); + } + + uint64_t + readBits(uint32_t length) + { + uint64_t res; + if (length < 64) { + if (bigEndian) { + res = _val >> (64 - length); + _val <<= length; + } else { + res = _val & CodingTables::_intMask64[length]; + _val >>= length; + } + } else { + res = _val; + _val = 0; + } + UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + return res; + } + + void + align(uint32_t alignment) + { + uint64_t pad = (- getReadOffset()) & (alignment - 1); + while (pad > 64) { + (void) readBits(64); + pad -= 64; + } + if (pad > 0) + (void) readBits(pad); + } + + /* + * Small alignment (max 64 bits alignment) + */ + void + smallAlign(uint32_t alignment) + { + uint64_t pad = _preRead & (alignment - 1); + if (pad > 0) + (void) readBits(pad); + } +}; + +typedef DecodeContext64 DecodeContext64BE; + +typedef DecodeContext64 DecodeContext64LE; + +template +class FeatureDecodeContext : public DecodeContext64 +{ +public: + search::ComprFileReadContext *_readContext; + typedef DecodeContext64 ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::_val; + using ParentClass::_valI; + using ParentClass::_valE; + using ParentClass::_realValE; + using ParentClass::_cacheInt; + using ParentClass::_preRead; + using ParentClass::getReadOffset; + using ParentClass::getCompr; + using ParentClass::getBitOffset; + using ParentClass::readBits; + using ParentClass::ReadBits; + + FeatureDecodeContext(void) + : ParentClass(), + _readContext(NULL) + { + } + + FeatureDecodeContext(const uint64_t *compr, + int bitOffset) + : ParentClass(compr, bitOffset), + _readContext(NULL) + { + } + + FeatureDecodeContext(const uint64_t *compr, + int bitOffset, + uint64_t bitLength) + : ParentClass(compr, bitOffset, bitLength), + _readContext(NULL) + { + } + + FeatureDecodeContext & + operator=(const FeatureDecodeContext &rhs) + { + ParentClass::operator=(rhs); + _readContext = rhs._readContext; + return *this; + } + + void + setReadContext(search::ComprFileReadContext *readContext) + { + _readContext = readContext; + } + + search::ComprFileReadContext * + getReadContext(void) const + { + return _readContext; + } + + void + readComprBuffer(void) + { + _readContext->readComprBuffer(); + } + + void + readComprBufferIfNeeded(void) + { + if (__builtin_expect(_valI >= _valE, false)) + readComprBuffer(); + } + + void + readBytes(uint8_t *buf, size_t len); + + virtual uint32_t + readHeader(vespalib::GenericHeader &header, int64_t fileSize); + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + readFeatures(DocIdAndFeatures &features); + + virtual void + skipFeatures(unsigned int count); + + virtual void + unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; + + virtual void + skipBits(int bits) + { + readComprBufferIfNeeded(); + while (bits >= 64) { + _val = 0; + ReadBits(64, _val, _cacheInt, _preRead, _valI); + bits -= 64; + readComprBufferIfNeeded(); + } + if (bits > 0) { + if (bigEndian) + _val <<= bits; + else + _val >>= bits; + ReadBits(bits, _val, _cacheInt, _preRead, _valI); + readComprBufferIfNeeded(); + } + } + + void + align(uint32_t alignment) + { + readComprBufferIfNeeded(); + uint64_t pad = (- getReadOffset()) & (alignment - 1); + while (pad > 64) { + (void) readBits(64); + pad -= 64; + readComprBufferIfNeeded(); + } + if (pad > 0) + (void) readBits(pad); + readComprBufferIfNeeded(); + } +}; + +typedef FeatureDecodeContext FeatureDecodeContextBE; + +typedef FeatureDecodeContext FeatureDecodeContextLE; + +template +class FeatureEncodeContext : public EncodeContext64 +{ +public: + search::ComprFileWriteContext *_writeContext; + typedef EncodeContext64 ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::_cacheInt; + using ParentClass::_cacheFree; + using ParentClass::smallPadBits; + +public: + FeatureEncodeContext(void) + : ParentClass(), + _writeContext(NULL) + { + } + + FeatureEncodeContext & + operator=(const FeatureEncodeContext &rhs) + { + ParentClass::operator=(rhs); + _writeContext = rhs._writeContext; + return *this; + } + + void + setWriteContext(search::ComprFileWriteContext *writeContext) + { + _writeContext = writeContext; + } + + using ParentClass::log2; + using ParentClass::_valI; + using ParentClass::_valE; + + static int + calcDocIdK(uint32_t numDocs, uint32_t docIdLimit) + { + uint32_t avgDelta = docIdLimit / (numDocs + 1); + uint32_t docIdK = (avgDelta < 4) ? 1 : (log2(avgDelta)); + return docIdK; + } + + using ParentClass::writeBits; + + void + writeBits(const uint64_t *bits, uint32_t bitOffset, uint32_t bitLength); + + void + writeString(const vespalib::stringref &buf); + + virtual void + writeHeader(const vespalib::GenericHeader &header); + + void + writeComprBufferIfNeeded(void) + { + if (_valI >= _valE) + _writeContext->writeComprBuffer(false); + } + + void + writeComprBuffer(void) + { + _writeContext->writeComprBuffer(true); + } + + virtual void + padBits(uint32_t length) + { + while (length > 64) { + writeBits(0, 64); + length -= 64; + writeComprBufferIfNeeded(); + } + smallPadBits(length); + writeComprBufferIfNeeded(); + } + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual void + writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const; + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + writeFeatures(const DocIdAndFeatures &features); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; +}; + +typedef FeatureEncodeContext FeatureEncodeContextBE; + +typedef FeatureEncodeContext FeatureEncodeContextLE; + +extern template class FeatureDecodeContext; +extern template class FeatureDecodeContext; + +extern template class FeatureEncodeContext; +extern template class FeatureEncodeContext; + +} // namespace bitcompression + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp new file mode 100644 index 00000000000..9c0b7115a72 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.cpp @@ -0,0 +1,241 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "compression.h" +#include "countcompression.h" +#include +#include + +namespace search +{ + +namespace bitcompression +{ + +using vespalib::nbostream; + +#define K_VALUE_COUNTFILE_LASTDOCID 22 +#define K_VALUE_COUNTFILE_NUMCHUNKS 1 +#define K_VALUE_COUNTFILE_CHUNKNUMDOCS 18 +#define K_VALUE_COUNTFILE_WORDNUMDELTA 0 +#define K_VALUE_COUNTFILE_SPNUMDOCS 0 + + +void +PostingListCountFileDecodeContext::checkPointWrite(nbostream &out) +{ + ParentClass::checkPointWrite(out); + out << _avgBitsPerDoc << _minChunkDocs << _docIdLimit << _numWordIds; + out << _minWordNum; +} + + +void +PostingListCountFileDecodeContext::checkPointRead(nbostream &in) +{ + ParentClass::checkPointRead(in); + in >> _avgBitsPerDoc >> _minChunkDocs >> _docIdLimit >> _numWordIds; + in >> _minWordNum; +} + + +void +PostingListCountFileDecodeContext:: +readCounts(PostingListCounts &counts) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + uint32_t numDocs; + + counts._segments.clear(); + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_SPNUMDOCS, + EC); + numDocs = static_cast(val64) + 1; + counts._numDocs = numDocs; + if (numDocs != 0) { + uint64_t expVal = numDocs * static_cast(_avgBitsPerDoc); + uint32_t kVal = (expVal < 4) ? 1 : EC::log2(expVal); + UC64BE_DECODEEXPGOLOMB_NS(o, + kVal, + EC); + counts._bitLength = val64; + } else + counts._bitLength = 0; + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + uint32_t numChunks = 0; + if (numDocs >= _minChunkDocs) { + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_NUMCHUNKS, + EC); + numChunks = static_cast(val64); + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + } + if (numChunks != 0) { + uint32_t prevLastDoc = 0u; + for (uint32_t chunk = 0; chunk < numChunks; ++chunk) { + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + PostingListCounts::Segment seg; + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_CHUNKNUMDOCS, + EC); + seg._numDocs = static_cast(val64) + 1; + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_POSOCCBITS, + EC); + seg._bitLength = val64; + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_LASTDOCID, + EC); + seg._lastDoc = + static_cast(val64) + seg._numDocs + prevLastDoc; + prevLastDoc = seg._lastDoc; + counts._segments.push_back(seg); + } + } + UC64_DECODECONTEXT_STORE(o, _); + if (__builtin_expect(oCompr >= valE, false)) + _readContext->readComprBuffer(); +} + + +void +PostingListCountFileDecodeContext:: +readWordNum(uint64_t &wordNum) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_WORDNUMDELTA, + EC); + wordNum = _minWordNum + val64; + UC64_DECODECONTEXT_STORE(o, _); + if (__builtin_expect(oCompr >= valE, false)) + _readContext->readComprBuffer(); + _minWordNum = wordNum + 1; +} + + +void +PostingListCountFileDecodeContext:: +copyParams(const PostingListCountFileDecodeContext &rhs) +{ + _avgBitsPerDoc = rhs._avgBitsPerDoc; + _minChunkDocs = rhs._minChunkDocs; + _docIdLimit = rhs._docIdLimit; + _numWordIds = rhs._numWordIds; +} + + +void +PostingListCountFileEncodeContext::checkPointWrite(nbostream &out) +{ + ParentClass::checkPointWrite(out); + out << _avgBitsPerDoc << _minChunkDocs << _docIdLimit << _numWordIds; + out << _minWordNum; +} + + +void +PostingListCountFileEncodeContext::checkPointRead(nbostream &in) +{ + ParentClass::checkPointRead(in); + in >> _avgBitsPerDoc >> _minChunkDocs >> _docIdLimit >> _numWordIds; + in >> _minWordNum; +} + + +void +PostingListCountFileEncodeContext:: +writeCounts(const PostingListCounts &counts) +{ + assert(counts._segments.empty() || + counts._numDocs >= _minChunkDocs); + uint32_t numDocs = counts._numDocs; + assert(numDocs > 0); + encodeExpGolomb(numDocs - 1, K_VALUE_COUNTFILE_SPNUMDOCS); + if (numDocs == 0) { + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + return; + } + uint64_t encodeVal = counts._bitLength; + uint64_t expVal = numDocs * static_cast(_avgBitsPerDoc); + uint32_t kVal = (expVal < 4) ? 1 : log2(expVal); + encodeExpGolomb(encodeVal, kVal); + uint32_t numChunks = counts._segments.size(); + if (numDocs >= _minChunkDocs) + encodeExpGolomb(numChunks, K_VALUE_COUNTFILE_NUMCHUNKS); + if (numChunks != 0) { + typedef std::vector::const_iterator segit; + + segit ite = counts._segments.end(); + + uint32_t prevLastDoc = 0u; + for (segit it = counts._segments.begin(); it != ite; ++it) { + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + encodeExpGolomb(it->_numDocs - 1, + K_VALUE_COUNTFILE_CHUNKNUMDOCS); + encodeExpGolomb(it->_bitLength, + K_VALUE_COUNTFILE_POSOCCBITS); + encodeExpGolomb(it->_lastDoc - prevLastDoc - it->_numDocs, + K_VALUE_COUNTFILE_LASTDOCID); + prevLastDoc = it->_lastDoc; + } + } + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); +} + + +void +PostingListCountFileEncodeContext:: +writeWordNum(uint64_t wordNum) +{ + assert(wordNum >= _minWordNum); + assert(wordNum <= _numWordIds); + encodeExpGolomb(wordNum - _minWordNum, + K_VALUE_COUNTFILE_WORDNUMDELTA); + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + _minWordNum = wordNum + 1; +} + + +void +PostingListCountFileEncodeContext:: +copyParams(const PostingListCountFileEncodeContext &rhs) +{ + _avgBitsPerDoc = rhs._avgBitsPerDoc; + _minChunkDocs = rhs._minChunkDocs; + _docIdLimit = rhs._docIdLimit; + _numWordIds = rhs._numWordIds; +} + + +} // namespace bitcompression + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/bitcompression/countcompression.h b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h new file mode 100644 index 00000000000..284b441aa3b --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/countcompression.h @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include + +#define K_VALUE_COUNTFILE_POSOCCBITS 6 + +namespace search +{ + +namespace bitcompression +{ + +class PostingListCountFileDecodeContext : public FeatureDecodeContext +{ +public: + typedef FeatureDecodeContext ParentClass; + typedef index::PostingListCounts PostingListCounts; + uint32_t _avgBitsPerDoc; // Average number of bits per document + uint32_t _minChunkDocs; // Minimum number of documents for chunking + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + uint64_t _numWordIds; // Number of words in dictionary + uint64_t _minWordNum; // Minimum word number + + PostingListCountFileDecodeContext(void) + : ParentClass(), + _avgBitsPerDoc(10), + _minChunkDocs(262144), + _docIdLimit(10000000), + _numWordIds(0), + _minWordNum(0u) + { + } + + virtual void + checkPointWrite(vespalib::nbostream &out); + + virtual void + checkPointRead(vespalib::nbostream &in); + + void + readCounts(PostingListCounts &counts); + + void + readWordNum(uint64_t &wordNum); + + static uint64_t + noWordNum(void) + { + return std::numeric_limits::max(); + } + + void + copyParams(const PostingListCountFileDecodeContext &rhs); +}; + + +class PostingListCountFileEncodeContext : public FeatureEncodeContext +{ +public: + typedef FeatureEncodeContext ParentClass; + typedef index::PostingListCounts PostingListCounts; + uint32_t _avgBitsPerDoc; // Average number of bits per document + uint32_t _minChunkDocs; // Minimum number of documents for chunking + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + uint64_t _numWordIds; // Number of words in dictionary + uint64_t _minWordNum; // Mininum word number + + PostingListCountFileEncodeContext(void) + : ParentClass(), + _avgBitsPerDoc(10), + _minChunkDocs(262144), + _docIdLimit(10000000), + _numWordIds(0), + _minWordNum(0u) + { + } + + virtual void + checkPointWrite(vespalib::nbostream &out); + + virtual void + checkPointRead(vespalib::nbostream &in); + + void + writeCounts(const PostingListCounts &counts); + + void + writeWordNum(uint64_t wordNum); + + static uint64_t + noWordNum(void) + { + return std::numeric_limits::max(); + } + + void + copyParams(const PostingListCountFileEncodeContext &rhs); +}; + + +} // namespace bitcompression + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp new file mode 100644 index 00000000000..7a9875c1316 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp @@ -0,0 +1,2586 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "compression.h" +#include "countcompression.h" +#include +#include +#include "pagedict4.h" +#include +#include +#include +#include +#include + +LOG_SETUP(".pagedict4"); + +namespace search +{ + +namespace bitcompression +{ + +namespace +{ + +void +setDecoderPositionHelper(PostingListCountFileDecodeContext &ctx, + const void *buffer, + uint64_t offset) +{ + const uint64_t *p = static_cast(buffer); + ctx._valI = p + offset / 64; + ctx.setupBits(offset & 63); + ctx.defineReadOffset(offset); +} + +void +setDecoderPositionInPage(PostingListCountFileDecodeContext &ctx, + const void *buffer, + uint64_t offset) +{ + ctx.afterRead(buffer, + PageDict4PageParams::getPageBitSize() / 64, + PageDict4PageParams::getPageBitSize() / 8, + false); + setDecoderPositionHelper(ctx, buffer, offset); +} + +void +setDecoderPosition(PostingListCountFileDecodeContext &ctx, + const ComprBuffer &cb, + uint64_t offset) +{ + ctx.afterRead(cb._comprBuf, + cb._comprBufSize, + cb._comprBufSize * sizeof(uint64_t), + false); + setDecoderPositionHelper(ctx, cb._comprBuf, offset); +} + + +} + + +uint32_t +PageDict4PageParams::getFileHeaderPad(uint32_t offset) +{ + uint32_t pad = (- offset & getPageBitSize()); + return pad > getMaxFileHeaderPad() ? 0u : pad; +} + + +std::ostream & +operator<<(std::ostream &stream, const index::PostingListCounts &counts) +{ + stream << "(d=" << counts._numDocs << ",b=" << counts._bitLength << ")"; + return stream; +} + +vespalib::nbostream & +operator<<(vespalib::nbostream &stream, + const PageDict4StartOffset &startOffset) +{ + stream << startOffset._fileOffset << startOffset._accNumDocs; + return stream; +} + +vespalib::nbostream & +operator>>(vespalib::nbostream &stream, PageDict4StartOffset &startOffset) +{ + stream >> startOffset._fileOffset >> startOffset._accNumDocs; + return stream; +} + + +vespalib::nbostream & +operator<<(vespalib::nbostream &stream, + const PageDict4SSReader::L7Entry &l7Entry) +{ + stream << l7Entry._l7Word << l7Entry._l7StartOffset << l7Entry._l7WordNum; + stream << l7Entry._l6Offset << l7Entry._sparsePageNum << l7Entry._pageNum; + stream << l7Entry._l7Ref; + return stream; +} + + +vespalib::nbostream & +operator>>(vespalib::nbostream &stream, + PageDict4SSReader::L7Entry &l7Entry) +{ + stream >> l7Entry._l7Word >> l7Entry._l7StartOffset >> l7Entry._l7WordNum; + stream >> l7Entry._l6Offset >> l7Entry._sparsePageNum >> l7Entry._pageNum; + stream >> l7Entry._l7Ref; + return stream; +} + + +vespalib::nbostream & +operator<<(vespalib::nbostream &stream, + const PageDict4SSReader::OverflowRef &oref) +{ + stream << oref._wordNum << oref._l7Ref; + return stream; +} + + +vespalib::nbostream & +operator>>(vespalib::nbostream &stream, PageDict4SSReader::OverflowRef &oref) +{ + stream >> oref._wordNum >> oref._l7Ref; + return stream; +} + +typedef index::PostingListCounts Counts; +typedef PageDict4StartOffset StartOffset; + +#define K_VALUE_COUNTFILE_L1_FILEOFFSET 7 +#define K_VALUE_COUNTFILE_L2_FILEOFFSET 11 +#define K_VALUE_COUNTFILE_L3_FILEOFFSET 13 +#define K_VALUE_COUNTFILE_L4_FILEOFFSET 15 +#define K_VALUE_COUNTFILE_L5_FILEOFFSET 17 +#define K_VALUE_COUNTFILE_L6_FILEOFFSET 19 + +#define K_VALUE_COUNTFILE_L1_WORDOFFSET 7 +#define K_VALUE_COUNTFILE_L2_WORDOFFSET 10 +#define K_VALUE_COUNTFILE_L4_WORDOFFSET 7 +#define K_VALUE_COUNTFILE_L5_WORDOFFSET 10 + +#define K_VALUE_COUNTFILE_L1_COUNTOFFSET 8 +#define K_VALUE_COUNTFILE_L2_COUNTOFFSET 11 +#define K_VALUE_COUNTFILE_L2_L1OFFSET 8 + +#define K_VALUE_COUNTFILE_L4_L3OFFSET 8 +#define K_VALUE_COUNTFILE_L5_L3OFFSET 11 +#define K_VALUE_COUNTFILE_L5_L4OFFSET 8 + +#define K_VALUE_COUNTFILE_L6_PAGENUM 7 + +#define K_VALUE_COUNTFILE_L3_WORDNUM 7 +#define K_VALUE_COUNTFILE_L4_WORDNUM 11 +#define K_VALUE_COUNTFILE_L5_WORDNUM 14 +#define K_VALUE_COUNTFILE_L6_WORDNUM 17 + +#define K_VALUE_COUNTFILE_L1_ACCNUMDOCS 4 +#define K_VALUE_COUNTFILE_L2_ACCNUMDOCS 8 +#define K_VALUE_COUNTFILE_L3_ACCNUMDOCS 10 +#define K_VALUE_COUNTFILE_L4_ACCNUMDOCS 12 +#define K_VALUE_COUNTFILE_L5_ACCNUMDOCS 14 +#define K_VALUE_COUNTFILE_L6_ACCNUMDOCS 16 + +static uint32_t +getLCP(const vespalib::stringref &word, + const vespalib::stringref &prevWord) +{ + size_t len1 = word.size(); + size_t len2 = prevWord.size(); + + size_t res = 0; + while (res < len1 && + res < len2 && + res < 254u && + word[res] == prevWord[res]) + ++res; + return res; +} + + +static void +addLCPWord(const vespalib::stringref &word, size_t lcp, std::vector &v) +{ + v.push_back(lcp); + size_t pos = lcp; + size_t len = word.size(); + while (pos < len) { + v.push_back(word[pos]); + ++pos; + } + v.push_back(0); +} + + +static void +writeStartOffset(PostingListCountFileEncodeContext &e, + const StartOffset &startOffset, + const StartOffset &prevStartOffset, + uint32_t fileOffsetK, + uint32_t accNumDocsK) +{ + e.encodeExpGolomb(startOffset._fileOffset - + prevStartOffset._fileOffset, + fileOffsetK); + e.encodeExpGolomb(startOffset._accNumDocs - + prevStartOffset._accNumDocs, + accNumDocsK); + e.writeComprBufferIfNeeded(); +} + + +static void +readStartOffset(PostingListCountFileDecodeContext &d, + StartOffset &startOffset, + uint32_t fileOffsetK, + uint32_t accNumDocsK) +{ + typedef PostingListCountFileEncodeContext EC; + + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, d._); + UC64_DECODEEXPGOLOMB_NS(o, + fileOffsetK, + EC); + startOffset._fileOffset += val64; + UC64_DECODEEXPGOLOMB_NS(o, + accNumDocsK, + EC); + startOffset._accNumDocs += val64; + UC64_DECODECONTEXT_STORE(o, d._); + d.readComprBufferIfNeeded(); +} + + +PageDict4SSWriter::PageDict4SSWriter(SSEC &sse) + : _eL6(sse), + _l6Word(), + _l6StartOffset(), + _l6PageNum(0u), + _l6SparsePageNum(0u), + _l6WordNum(1u) +{ +} + +PageDict4SSWriter::~PageDict4SSWriter(void) +{ +} + +void +PageDict4SSWriter::addL6Skip(const vespalib::stringref &word, + const StartOffset &startOffset, + uint64_t wordNum, + uint64_t pageNum, + uint32_t sparsePageNum) +{ +#if 0 + LOG(info, + "addL6SKip, \"%s\" -> wordnum %d, page (%d,%d) startOffset %" PRId64 + ", SS bitOffset %" PRIu64, + word.c_str(), + (int) wordNum, + (int) pageNum, + (int) sparsePageNum, + startOffset.empty() ? + static_cast(0) : + startOffset[0]._fileOffset, + _eL6.getWriteOffset()); +#endif + _eL6.writeBits(0, 1); // Selector bit + writeStartOffset(_eL6, + startOffset, + _l6StartOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + _eL6.encodeExpGolomb(wordNum - _l6WordNum, + K_VALUE_COUNTFILE_L6_WORDNUM); + _eL6.writeComprBufferIfNeeded(); + size_t lcp = getLCP(word, _l6Word); + vespalib::stringref wordSuffix = word.substr(lcp); + _eL6.smallAlign(8); +#if 0 + LOG(info, + "lcp=%d, at offset %" PRIu64 , + (int) lcp, + _eL6.getWriteOffset()); +#endif + _eL6.writeBits(lcp, 8); + _eL6.writeComprBufferIfNeeded(); + _eL6.writeString(wordSuffix); + assert(pageNum >= _l6PageNum); + _eL6.encodeExpGolomb(pageNum - _l6PageNum, + K_VALUE_COUNTFILE_L6_PAGENUM); + _eL6.writeComprBufferIfNeeded(); + assert(_l6PageNum < pageNum); + assert(_l6SparsePageNum + 1 == sparsePageNum); + _l6SparsePageNum = sparsePageNum; + _l6PageNum = pageNum; + _l6StartOffset = startOffset; + _l6Word = word; + _l6WordNum = wordNum; +#if 0 + LOG(info, "after .. SS bit Offset %" PRId64, + _eL6.getWriteOffset()); +#endif +} + + +void +PageDict4SSWriter:: +addOverflowCounts(const vespalib::stringref &word, + const Counts &counts, + const StartOffset &startOffset, + uint64_t wordNum) +{ +#if 0 + std::ostringstream txtCounts; + std::ostringstream txtStartOffset; + std::ostringstream txtL6StartOffset; + txtCounts << counts; + txtStartOffset << startOffset; + txtL6StartOffset << _l6StartOffset; + LOG(info, + "addL6Overflow, \"%s\" wordNum %d, counts %s fileoffset %s l6startOffset %s", + word.c_str(), + (int) wordNum, + txtCounts.str().c_str(), + txtStartOffset.str().c_str(), + txtL6StartOffset.str().c_str()); +#endif + _eL6.writeBits(1, 1); // Selector bit + writeStartOffset(_eL6, + startOffset, + _l6StartOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + _eL6.encodeExpGolomb(wordNum - _l6WordNum, + K_VALUE_COUNTFILE_L6_WORDNUM); + _eL6.writeComprBufferIfNeeded(); + _eL6.smallAlign(8); + size_t lcp = getLCP(word, _l6Word); + vespalib::stringref wordSuffix = word.substr(lcp); + _eL6.writeBits(lcp, 8); + _eL6.writeComprBufferIfNeeded(); + _eL6.writeString(wordSuffix); + _eL6.writeCounts(counts); + _l6StartOffset = startOffset; + _l6StartOffset.adjust(counts); + _l6Word = word; + _l6WordNum = wordNum; +} + + +void +PageDict4SSWriter::flush(void) +{ +} + + +void +PageDict4SSWriter::checkPointWrite(vespalib::nbostream &out) +{ + out << _l6Word; + out << _l6StartOffset; + out << _l6PageNum; + out << _l6SparsePageNum; + out << _l6WordNum; +} + + +void +PageDict4SSWriter::checkPointRead(vespalib::nbostream &in) +{ + in >> _l6Word; + in >> _l6StartOffset; + in >> _l6PageNum; + in >> _l6SparsePageNum; + in >> _l6WordNum; +} + + +PageDict4SPWriter::PageDict4SPWriter(SSWriter &ssWriter, + EC &spe) + : _eL3(), + _wcL3(_eL3), + _eL4(), + _wcL4(_eL4), + _eL5(), + _wcL5(_eL5), + _l3Word(), + _l4Word(), + _l5Word(), + _l6Word(), + _l3WordOffset(0u), + _l4WordOffset(0u), + _l5WordOffset(0u), + _l3StartOffset(), + _l4StartOffset(), + _l5StartOffset(), + _l6StartOffset(), + _l3WordNum(1u), + _l4WordNum(1u), + _l5WordNum(1u), + _l6WordNum(1u), + _curL3OffsetL4(0u), + _curL3OffsetL5(0u), + _curL4OffsetL5(0u), + _headerSize(getPageHeaderBitSize()), + _l3Entries(0u), + _l4StrideCheck(0u), + _l5StrideCheck(0u), + _l3Size(0u), + _l4Size(0u), + _l5Size(0u), + _prevL3Size(0u), + _prevL4Size(0u), + _prevL5Size(0u), + _prevWordsSize(0u), + _sparsePageNum(0u), + _l3PageNum(0u), + _ssWriter(ssWriter), + _spe(spe) +{ +} + + +void +PageDict4SPWriter::setup() +{ + _eL3.copyParams(_spe); + _eL4.copyParams(_spe); + _eL5.copyParams(_spe); + _l6Word.clear(); + _wcL3.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _wcL4.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _wcL5.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _eL3.setWriteContext(&_wcL3); + _eL4.setWriteContext(&_wcL4); + _eL5.setWriteContext(&_wcL5); + _l3Word = _l6Word; + _l4Word = _l6Word; + _l5Word = _l6Word; + _l3WordOffset = 0u; + _l4WordOffset = 0u; + _l5WordOffset = 0u; + _l3StartOffset = _l6StartOffset; + // Handle extra padding after file header + _spe.padBits(getFileHeaderPad(_spe.getWriteOffset())); + resetPage(); + _headerSize += _spe.getWriteOffset() & (getPageBitSize() - 1); +} + + +PageDict4SPWriter::~PageDict4SPWriter(void) +{ +} + + +void +PageDict4SPWriter::flushPage(void) +{ + assert(_l3Entries > 0); + assert(_l3Size > 0); + assert(_headerSize >= getPageHeaderBitSize()); + uint32_t wordsSize = _prevWordsSize; + assert(_prevL3Size + _prevL4Size + _prevL5Size + _headerSize + + wordsSize * 8 <= getPageBitSize()); + assert(_prevL5Size < (1u << 15)); + assert(_prevL4Size < (1u << 15)); + assert(_prevL3Size < (1u << 15)); + assert(_l3Entries < (1u << 15)); + assert(wordsSize < (1u << 12)); + assert(wordsSize <= _words.size()); + + uint32_t l4Residue = getL4Entries(_l3Entries); + uint32_t l5Residue = getL5Entries(l4Residue); + + assert((l4Residue == 0) == (_prevL4Size == 0)); + assert((l5Residue == 0) == (_prevL5Size == 0)); + (void) l5Residue; + + EC &e = _spe; + e.writeBits(_prevL5Size, 15); + e.writeBits(_prevL4Size, 15); + e.writeBits(_l3Entries, 15); + e.writeBits(wordsSize, 12); + e.writeComprBufferIfNeeded(); + if (_prevL5Size > 0) { + _eL5.flush(); + const uint64_t *l5Buf = static_cast(_wcL5._comprBuf); + e.writeBits(l5Buf, 0, _prevL5Size); + } + if (_prevL4Size > 0) { + _eL4.flush(); + const uint64_t *l4Buf = static_cast(_wcL4._comprBuf); + e.writeBits(l4Buf, 0, _prevL4Size); + } + _eL3.flush(); + const uint64_t *l3Buf = static_cast(_wcL3._comprBuf); + e.writeBits(l3Buf, 0, _prevL3Size); + uint32_t padding = getPageBitSize() - _headerSize - _prevL5Size - _prevL4Size - + _prevL3Size - wordsSize * 8; + e.padBits(padding); + if (wordsSize > 0) { + // Pad with 7 NUL bytes to silence testing tools. + _words.reserve(_words.size() + 7); + memset(&*_words.end(), '\0', 7); + const char *wordsBufX = static_cast(&_words[0]); + size_t wordsBufXOff = reinterpret_cast(wordsBufX) & 7; + const uint64_t *wordsBuf = reinterpret_cast + (wordsBufX - wordsBufXOff); + e.writeBits(wordsBuf, 8 * wordsBufXOff, wordsSize * 8); + } + assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0); + _l6Word = _l3Word; + _l6StartOffset = _l3StartOffset; + _l6WordNum = _l3WordNum; + ++_sparsePageNum; +} + + +void +PageDict4SPWriter::flush(void) +{ + if (!empty()) { + flushPage(); + _ssWriter.addL6Skip(_l6Word, + _l6StartOffset, + _l6WordNum, + _l3PageNum, getSparsePageNum()); + } + _ssWriter.flush(); +} + + +void +PageDict4SPWriter::resetPage(void) +{ + _eL3.setupWrite(_wcL3); + _eL4.setupWrite(_wcL4); + _eL5.setupWrite(_wcL5); + assert(_eL3.getWriteOffset() == 0); + assert(_eL4.getWriteOffset() == 0); + assert(_eL5.getWriteOffset() == 0); + _l3Word = _l6Word; + _l4Word = _l6Word; + _l5Word = _l6Word; + _l3WordOffset = 0u; + _l4WordOffset = 0u; + _l5WordOffset = 0u; + _l3StartOffset = _l6StartOffset; + _l4StartOffset = _l6StartOffset; + _l5StartOffset = _l6StartOffset; + _l3WordNum = _l6WordNum; + _l4WordNum = _l6WordNum; + _l5WordNum = _l6WordNum; + _curL3OffsetL4 = 0u; + _curL3OffsetL5 = 0u; + _curL4OffsetL5 = 0u; + _l3Entries = 0u; + _l4StrideCheck = 0u; + _l5StrideCheck = 0u; + _l3Size = 0u; + _l4Size = 0u; + _l5Size = 0u; + _prevL3Size = 0u; + _prevL4Size = 0u; + _prevL5Size = 0u; + _prevWordsSize = 0u; + _words.clear(); + _headerSize = getPageHeaderBitSize(); +} + + +void +PageDict4SPWriter::addL3Skip(const vespalib::stringref &word, + const StartOffset &startOffset, + uint64_t wordNum, + uint64_t pageNum) +{ +#if 0 + LOG(info, + "addL3Skip(\"%s\"), wordNum=%d pageNum=%d", + word.c_str(), (int) wordNum, (int) pageNum); +#endif + assert(_l3WordOffset == _words.size()); + /* + * Update notion of previous size, converting tentative writes to + * full writes. This is used when flushing page, since last entry + * on each page (possibly overflowing page) is elided, in practice + * promoted to an L6 entry at SS level. + */ + _prevL3Size = _l3Size; + _prevL4Size = _l4Size; + _prevL5Size = _l5Size; + _prevWordsSize = _l3WordOffset; + + /* + * Tentative write of counts, word and skip info. Converted to full + * write when new entry is tentatively added to same page. + */ + writeStartOffset(_eL3, + startOffset, + _l3StartOffset, + K_VALUE_COUNTFILE_L3_FILEOFFSET, + K_VALUE_COUNTFILE_L3_ACCNUMDOCS); +#if 0 + LOG(info, + "Adding l3 delta %d", (int) (wordNum - _l3WordNum)); +#endif + _eL3.encodeExpGolomb(wordNum - _l3WordNum, + K_VALUE_COUNTFILE_L3_WORDNUM); + _eL3.writeComprBufferIfNeeded(); + _l3Size = static_cast(_eL3.getWriteOffset()); + size_t lcp = getLCP(word, _l3Word); + _l3Word = word; + _l3StartOffset = startOffset; + _l3WordNum = wordNum; + ++_l3Entries; + ++_l4StrideCheck; + if (_l4StrideCheck >= getL4SkipStride()) + addL4Skip(lcp); + addLCPWord(word, lcp, _words); + _l3WordOffset = _words.size(); + _l3PageNum = pageNum; + if (_l3Size + _l4Size + _l5Size + _headerSize + 8 * _l3WordOffset > + getPageBitSize()) { + // Cannot convert tentative writes to full writes due to overflow. + // Flush existing full writes. + flushPage(); + + // Compensate for elided entry. + _l6Word = word; + _l6StartOffset = startOffset; + _l6WordNum = wordNum; + + _ssWriter.addL6Skip(_l6Word, + _l6StartOffset, + _l6WordNum, + _l3PageNum, getSparsePageNum()); + resetPage(); + } +} + + +void +PageDict4SPWriter::addL4Skip(size_t &lcp) +{ +#if 0 + LOG(info, + "addL4Skip(\"%s\")", + _l3Word.c_str()); +#endif + size_t tlcp = getLCP(_l3Word, _l4Word); + assert(tlcp <= lcp); + if (tlcp < lcp) + lcp = tlcp; + _l4StrideCheck = 0u; + _eL4.encodeExpGolomb(_l3WordOffset - _l4WordOffset, + K_VALUE_COUNTFILE_L4_WORDOFFSET); + _eL4.writeComprBufferIfNeeded(); + writeStartOffset(_eL4, + _l3StartOffset, + _l4StartOffset, + K_VALUE_COUNTFILE_L4_FILEOFFSET, + K_VALUE_COUNTFILE_L4_ACCNUMDOCS); + _eL4.encodeExpGolomb(_l3WordNum - _l4WordNum, + K_VALUE_COUNTFILE_L4_WORDNUM); + _eL4.writeComprBufferIfNeeded(); + _eL4.encodeExpGolomb(_l3Size - _curL3OffsetL4, + K_VALUE_COUNTFILE_L4_L3OFFSET); + _eL4.writeComprBufferIfNeeded(); + _l4StartOffset = _l3StartOffset; + _l4WordNum = _l3WordNum; + _curL3OffsetL4 = _l3Size; + _l4Size = _eL4.getWriteOffset(); + _l4Word = _l3Word; + ++_l5StrideCheck; + if (_l5StrideCheck >= getL5SkipStride()) { + addL5Skip(lcp); + _l5StrideCheck = 0; + } + _l4WordOffset = _l3WordOffset + 2 + _l3Word.size() - lcp; +} + + +void +PageDict4SPWriter::addL5Skip(size_t &lcp) +{ +#if 0 + LOG(info, + "addL5Skip(\"%s\")", + _l3Word.c_str()); +#endif + size_t tlcp = getLCP(_l3Word, _l5Word); + assert(tlcp <= lcp); + if (tlcp < lcp) + lcp = tlcp; + _eL5.encodeExpGolomb(_l3WordOffset - _l5WordOffset, + K_VALUE_COUNTFILE_L5_WORDOFFSET); + _eL5.writeComprBufferIfNeeded(); + writeStartOffset(_eL5, + _l3StartOffset, + _l5StartOffset, + K_VALUE_COUNTFILE_L5_FILEOFFSET, + K_VALUE_COUNTFILE_L5_ACCNUMDOCS); + _eL5.encodeExpGolomb(_l3WordNum - _l5WordNum, + K_VALUE_COUNTFILE_L5_WORDNUM); + _eL5.writeComprBufferIfNeeded(); + _eL5.encodeExpGolomb(_l3Size - _curL3OffsetL5, + K_VALUE_COUNTFILE_L5_L3OFFSET); + _eL5.encodeExpGolomb(_l4Size - _curL4OffsetL5, + K_VALUE_COUNTFILE_L5_L4OFFSET); + _eL5.writeComprBufferIfNeeded(); + _l5StartOffset = _l3StartOffset; + _l5WordNum = _l3WordNum; + _curL3OffsetL5 = _l3Size; + _curL4OffsetL5 = _l4Size; + _l5Size = _eL5.getWriteOffset(); + _l5Word = _l3Word; + _l5WordOffset = _l3WordOffset + 2 + _l3Word.size() - lcp; +} + + +void +PageDict4SPWriter::checkPointWrite(vespalib::nbostream &out) +{ + _wcL3.checkPointWrite(out); + _wcL4.checkPointWrite(out); + _wcL5.checkPointWrite(out); + out << _l3Word << _l4Word << _l5Word << _l6Word; + out << _l3WordOffset << _l4WordOffset << _l5WordOffset; + out << _l3StartOffset << _l4StartOffset << _l5StartOffset << _l6StartOffset; + out << _l3WordNum << _l4WordNum << _l5WordNum << _l6WordNum; + out << _curL3OffsetL4 << _curL3OffsetL5 << _curL4OffsetL5; + out << _headerSize; + out << _l3Entries; + out << _l4StrideCheck << _l5StrideCheck; + out << _l3Size << _l4Size << _l5Size; + out << _prevL3Size << _prevL4Size << _prevL5Size << _prevWordsSize; + out << _sparsePageNum << _l3PageNum; + out << _words; +} + + +void +PageDict4SPWriter::checkPointRead(vespalib::nbostream &in) +{ + _wcL3.checkPointRead(in); + _wcL4.checkPointRead(in); + _wcL5.checkPointRead(in); + in >> _l3Word >> _l4Word >> _l5Word >> _l6Word; + in >> _l3WordOffset >> _l4WordOffset >> _l5WordOffset; + in >> _l3StartOffset >> _l4StartOffset >> _l5StartOffset >> _l6StartOffset; + in >> _l3WordNum >> _l4WordNum >> _l5WordNum >> _l6WordNum; + in >> _curL3OffsetL4 >> _curL3OffsetL5 >> _curL4OffsetL5; + in >> _headerSize; + in >> _l3Entries; + in >> _l4StrideCheck >> _l5StrideCheck; + in >> _l3Size >> _l4Size >> _l5Size; + in >> _prevL3Size >> _prevL4Size >> _prevL5Size >> _prevWordsSize; + in >> _sparsePageNum >> _l3PageNum; + in >> _words; +} + + +PageDict4PWriter::PageDict4PWriter(SPWriter &spWriter, + EC &pe) + : _eCounts(), + _wcCounts(_eCounts), + _eL1(), + _wcL1(_eL1), + _eL2(), + _wcL2(_eL2), + _countsWord(), + _l1Word(), + _l2Word(), + _l3Word(), + _pendingCountsWord(), + _countsWordOffset(0u), + _l1WordOffset(0u), + _l2WordOffset(0u), + _countsStartOffset(), + _l1StartOffset(), + _l2StartOffset(), + _l3StartOffset(), + _curCountOffsetL1(0u), + _curCountOffsetL2(0u), + _curL1OffsetL2(0u), + _headerSize(getPageHeaderBitSize()), + _countsEntries(0u), + _l1StrideCheck(0u), + _l2StrideCheck(0u), + _countsSize(0u), + _l1Size(0u), + _l2Size(0u), + _prevL1Size(0u), + _prevL2Size(0u), + _pageNum(0u), + _l3WordNum(1u), + _wordNum(1u), + _words(), + _spWriter(spWriter), + _pe(pe) +{ +} + + +void +PageDict4PWriter::setup() +{ + _eCounts.copyParams(_pe); + _eL1.copyParams(_pe); + _eL2.copyParams(_pe); + _l3Word.clear(); + _wcCounts.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _wcL1.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _wcL2.allocComprBuf(getPageByteSize() * 2, getPageByteSize() * 2); + _eCounts.setWriteContext(&_wcCounts); + _eL1.setWriteContext(&_wcL1); + _eL2.setWriteContext(&_wcL2); + _countsWord = _l3Word; + _l1Word = _l3Word; + _l2Word = _l3Word; + _pendingCountsWord.clear(); + _countsWordOffset = 0u; + _l1WordOffset = 0u; + _l2WordOffset = 0u; + _countsStartOffset = _l3StartOffset; + // Handle extra padding after file header + _pe.padBits(getFileHeaderPad(_pe.getWriteOffset())); + resetPage(); + _headerSize += _pe.getWriteOffset() & (getPageBitSize() - 1); +} + + +PageDict4PWriter::~PageDict4PWriter(void) +{ +} + + +void +PageDict4PWriter::flushPage(void) +{ + assert(_countsEntries > 0); + assert(_countsSize > 0); + assert(_headerSize >= getPageHeaderBitSize()); + assert(_countsSize + _l1Size + _l2Size + _headerSize + + 8 * _countsWordOffset <= getPageBitSize()); + assert(_l2Size < (1u << 15)); + assert(_l1Size < (1u << 15)); + assert(_countsEntries < (1u << 15)); + assert(_countsWordOffset < (1u << 12)); + + uint32_t l1Residue = getL1Entries(_countsEntries); + uint32_t l2Residue = getL2Entries(l1Residue); + + assert((l1Residue == 0) == (_l1Size == 0)); + assert((l2Residue == 0) == (_l2Size == 0)); + (void) l2Residue; + + EC &e = _pe; + e.writeBits(_l2Size, 15); + e.writeBits(_l1Size, 15); + e.writeBits(_countsEntries, 15); + e.writeBits(_countsWordOffset, 12); + e.writeComprBufferIfNeeded(); + if (_l2Size > 0) { + _eL2.flush(); + const uint64_t *l2Buf = static_cast(_wcL2._comprBuf); + e.writeBits(l2Buf, 0, _l2Size); + } + if (_l1Size > 0) { + _eL1.flush(); + const uint64_t *l1Buf = static_cast(_wcL1._comprBuf); + e.writeBits(l1Buf, 0, _l1Size); + } + _eCounts.flush(); + const uint64_t *countsBuf = static_cast + (_wcCounts._comprBuf); + e.writeBits(countsBuf, 0, _countsSize); + uint32_t padding = getPageBitSize() - _headerSize - _l2Size - _l1Size - + _countsSize - _countsWordOffset * 8; + e.padBits(padding); + if (_countsWordOffset > 0) { + // Pad with 7 NUL bytes to silence testing tools. + _words.reserve(_words.size() + 7); + memset(&*_words.end(), '\0', 7); + const char *wordsBufX = static_cast(&_words[0]); + size_t wordsBufXOff = reinterpret_cast(wordsBufX) & 7; + const uint64_t *wordsBuf = reinterpret_cast + (wordsBufX - wordsBufXOff); + e.writeBits(wordsBuf, 8 * wordsBufXOff, _countsWordOffset * 8); + } + assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0); + _l3Word = _pendingCountsWord; + _l3StartOffset = _countsStartOffset; + _l3WordNum = _wordNum; + ++_pageNum; +} + + +void +PageDict4PWriter::flush(void) +{ + if (!empty()) { + flushPage(); + _spWriter.addL3Skip(_l3Word, + _l3StartOffset, + _l3WordNum, + getPageNum()); + } + _spWriter.flush(); +} + + +void +PageDict4PWriter::resetPage(void) +{ + _eCounts.setupWrite(_wcCounts); + _eL1.setupWrite(_wcL1); + _eL2.setupWrite(_wcL2); + assert(_eCounts.getWriteOffset() == 0); + assert(_eL1.getWriteOffset() == 0); + assert(_eL2.getWriteOffset() == 0); + _countsWord = _l3Word; + _l1Word = _l3Word; + _l2Word = _l3Word; + _pendingCountsWord.clear(); + _countsWordOffset = 0u; + _l1WordOffset = 0u; + _l2WordOffset = 0u; + _countsStartOffset = _l3StartOffset; + _l1StartOffset = _l3StartOffset; + _l2StartOffset = _l3StartOffset; + _curCountOffsetL1 = 0u; + _curCountOffsetL2 = 0u; + _curL1OffsetL2 = 0u; + _countsEntries = 0u; + _l1StrideCheck = 0u; + _l2StrideCheck = 0u; + _countsSize = 0u; + _l1Size = 0u; + _l2Size = 0u; + _prevL1Size = 0u; + _prevL2Size = 0u; + _words.clear(); + _headerSize = getPageHeaderBitSize(); +} + + +void +PageDict4PWriter:: +addCounts(const vespalib::stringref &word, + const Counts &counts) +{ +#if 0 + std::ostringstream txtcounts; + txtcounts << counts; + LOG(info, + "addCounts(\"%s\", %s), wordNum=%d", + word.c_str(), + txtcounts.str().c_str(), + (int) _wordNum); +#endif + assert(_countsWordOffset == _words.size()); + size_t lcp = getLCP(_pendingCountsWord, _countsWord); + if (_l1StrideCheck >= getL1SkipStride()) + addL1Skip(lcp); + if (_countsEntries > 0) + addLCPWord(_pendingCountsWord, lcp, _words); + _eCounts.writeCounts(counts); + uint32_t eCountsOffset = static_cast(_eCounts.getWriteOffset()); + if (eCountsOffset + _l1Size + _l2Size + _headerSize + + 8 * (_countsWordOffset + 2 + _pendingCountsWord.size() - lcp) > + getPageBitSize()) { +#if 0 + LOG(info, + "Backtrack: eCountsOffset=%d, l1size=%d, l2size=%d, hdrsize=%d", + (int) eCountsOffset, + (int) _l1Size, + (int) _l2Size, + (int) _headerSize); +#endif + if (_l1StrideCheck == 0u) { + _l1Size = _prevL1Size; // Undo L1 + _l2Size = _prevL2Size; // Undo L2 + } + if (_countsEntries > 0) { + flushPage(); + _spWriter.addL3Skip(_l3Word, + _l3StartOffset, + _l3WordNum, + getPageNum()); + resetPage(); + _eCounts.writeCounts(counts); + eCountsOffset = static_cast(_eCounts.getWriteOffset()); + } + if (eCountsOffset + _headerSize > getPageBitSize()) { + // overflow page. + addOverflowCounts(word, counts); + _spWriter.addOverflowCounts(word, counts, _countsStartOffset, + _l3WordNum); + _spWriter.addL3Skip(_l3Word, + _l3StartOffset, + _l3WordNum, + getPageNum()); + resetPage(); +#if 0 + std::ostringstream txtoffsets; + txtoffsets << _countsStartOffset; + LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str()); +#endif + return; + } + } + _countsSize = eCountsOffset; + ++_countsEntries; + ++_l1StrideCheck; + _countsStartOffset.adjust(counts); +#if 0 + std::ostringstream txtoffsets; + txtoffsets << _countsStartOffset; + LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str()); +#endif + _countsWord = _pendingCountsWord; + _countsWordOffset = _words.size(); + _pendingCountsWord = word; + _wordNum++; +} + + +/* Private use */ +void +PageDict4PWriter::addOverflowCounts(const vespalib::stringref &word, + const Counts &counts) +{ + assert(_countsEntries == 0); + assert(_countsSize == 0); + assert(_headerSize >= getPageHeaderBitSize()); + assert(_countsSize + _l1Size + _l2Size + _headerSize <= getPageBitSize()); + assert(_l2Size == 0); + assert(_l1Size == 0); + assert(_countsSize == 0); + assert(_countsWordOffset == 0); + + EC &e = _pe; + e.writeBits(0, 15); + e.writeBits(0, 15); + e.writeBits(0, 15); + e.writeBits(0, 12); + e.smallAlign(64); + e.writeComprBufferIfNeeded(); + e.writeBits(_wordNum, 64); // Identifies overflow for later read +#if 0 + LOG(info, + "AddOverflowCounts wordnum %d", (int) _wordNum); +#endif + uint32_t alignedHeaderSize = (_headerSize + 63) & -64; + uint32_t padding = getPageBitSize() - alignedHeaderSize - 64; + e.padBits(padding); + assert((e.getWriteOffset() & (getPageBitSize() - 1)) == 0); + _l3Word = word; + _l3StartOffset = _countsStartOffset; + _l3StartOffset.adjust(counts); + ++_pageNum; + ++_wordNum; + _l3WordNum = _wordNum; +} + + +void +PageDict4PWriter::addL1Skip(size_t &lcp) +{ + _prevL1Size = _l1Size; // Prepare for undo + _prevL2Size = _l2Size; // Prepare for undo + size_t tlcp = getLCP(_pendingCountsWord, _l1Word); + assert(tlcp <= lcp); + if (tlcp < lcp) + lcp = tlcp; + _l1StrideCheck = 0u; +#if 0 + LOG(info, + "addL1SKip(\"%s\"), lcp=%d, offset=%d -> %d", + _pendingCountsWord.c_str(), + (int) lcp, + (int) _l1WordOffset, + (int) _countsWordOffset); +#endif + _eL1.encodeExpGolomb(_countsWordOffset - _l1WordOffset, + K_VALUE_COUNTFILE_L1_WORDOFFSET); + _eL1.writeComprBufferIfNeeded(); + writeStartOffset(_eL1, + _countsStartOffset, + _l1StartOffset, + K_VALUE_COUNTFILE_L1_FILEOFFSET, + K_VALUE_COUNTFILE_L1_ACCNUMDOCS); + _eL1.encodeExpGolomb(_countsSize - _curCountOffsetL1, + K_VALUE_COUNTFILE_L1_COUNTOFFSET); + _eL1.writeComprBufferIfNeeded(); + _l1StartOffset = _countsStartOffset; + _curCountOffsetL1 = _countsSize; + _l1Size = _eL1.getWriteOffset(); + ++_l2StrideCheck; + if (_l2StrideCheck >= getL2SkipStride()) + addL2Skip(lcp); + _l1WordOffset = _countsWordOffset + 2 + _pendingCountsWord.size() - lcp; +} + + +void +PageDict4PWriter::addL2Skip(size_t &lcp) +{ + size_t tlcp = getLCP(_pendingCountsWord, _l2Word); + assert(tlcp <= lcp); + if (tlcp < lcp) + lcp = tlcp; + _l2StrideCheck = 0; +#if 0 + LOG(info, + "addL2SKip(\"%s\"), lcp=%d, offset=%d -> %d", + _pendingCountsWord.c_str(), + (int) lcp, + (int) _l2WordOffset, + (int) _countsWordOffset); +#endif + _eL2.encodeExpGolomb(_countsWordOffset - _l2WordOffset, + K_VALUE_COUNTFILE_L2_WORDOFFSET); + _eL2.writeComprBufferIfNeeded(); + writeStartOffset(_eL2, + _countsStartOffset, + _l2StartOffset, + K_VALUE_COUNTFILE_L2_FILEOFFSET, + K_VALUE_COUNTFILE_L2_ACCNUMDOCS); + _eL2.encodeExpGolomb(_countsSize - _curCountOffsetL2, + K_VALUE_COUNTFILE_L2_COUNTOFFSET); + _eL2.encodeExpGolomb(_l1Size - _curL1OffsetL2, + K_VALUE_COUNTFILE_L2_L1OFFSET); + _eL2.writeComprBufferIfNeeded(); + _l2StartOffset = _countsStartOffset; + _curCountOffsetL2 = _countsSize; + _curL1OffsetL2 = _l1Size; + _l2Size = _eL2.getWriteOffset(); + _l2WordOffset = _countsWordOffset + 2 + _pendingCountsWord.size() - lcp; +} + + +void +PageDict4PWriter::checkPointWrite(vespalib::nbostream &out) +{ + _wcCounts.checkPointWrite(out); + _wcL1.checkPointWrite(out); + _wcL2.checkPointWrite(out); + out << _countsWord << _l1Word << _l2Word << _l3Word; + out << _pendingCountsWord; + out << _countsWordOffset << _l1WordOffset << _l2WordOffset; + out << _countsStartOffset << _l1StartOffset << _l2StartOffset; + out << _l3StartOffset; + out << _curCountOffsetL1 << _curCountOffsetL2 << _curL1OffsetL2; + out << _headerSize; + out << _countsEntries; + out << _l1StrideCheck << _l2StrideCheck; + out << _countsSize << _l1Size << _l2Size; + out << _prevL1Size << _prevL2Size; + out << _pageNum; + out << _l3WordNum << _wordNum; + out << _words; +} + + +void +PageDict4PWriter::checkPointRead(vespalib::nbostream &in) +{ + _wcCounts.checkPointRead(in); + _wcL1.checkPointRead(in); + _wcL2.checkPointRead(in); + in >> _countsWord >> _l1Word >> _l2Word >> _l3Word; + in >> _pendingCountsWord; + in >> _countsWordOffset >> _l1WordOffset >> _l2WordOffset; + in >> _countsStartOffset >> _l1StartOffset >> _l2StartOffset; + in >> _l3StartOffset; + in >> _curCountOffsetL1 >> _curCountOffsetL2 >> _curL1OffsetL2; + in >> _headerSize; + in >> _countsEntries; + in >> _l1StrideCheck >> _l2StrideCheck; + in >> _countsSize >> _l1Size >> _l2Size; + in >> _prevL1Size >> _prevL2Size; + in >> _pageNum; + in >> _l3WordNum >> _wordNum; + in >> _words; +} + + +PageDict4SSLookupRes:: +PageDict4SSLookupRes(void) + : _l6Word(), + _lastWord(), + _l6StartOffset(), + _counts(), + _pageNum(0u), + _sparsePageNum(0u), + _l6WordNum(1u), + _startOffset(), + _res(false), + _overflow(false) +{ +} + + +PageDict4SSLookupRes:: +~PageDict4SSLookupRes(void) +{ +} + + +PageDict4SSReader:: +PageDict4SSReader(ComprBuffer &cb, + uint32_t ssFileHeaderSize, + uint64_t ssFileBitLen, + uint32_t spFileHeaderSize, + uint64_t spFileBitLen, + uint32_t pFileHeaderSize, + uint64_t pFileBitLen) + : _cb(sizeof(uint64_t)), + _ssFileBitLen(ssFileBitLen), + _ssStartOffset(ssFileHeaderSize * 8), + _l7(), + _ssd(), + _spFileBitLen(spFileBitLen), + _pFileBitLen(pFileBitLen), + _spStartOffset(spFileHeaderSize * 8), + _pStartOffset(pFileHeaderSize * 8), + _spFirstPageNum(0u), + _spFirstPageOffset(0u), + _pFirstPageNum(0u), + _pFirstPageOffset(0u), + _overflows() +{ + // Reference existing compressed buffer + _cb._comprBuf = cb._comprBuf; + _cb._comprBufSize = cb._comprBufSize; +} + + +PageDict4SSReader:: +~PageDict4SSReader(void) +{ +} + + +void +PageDict4SSReader::setup(DC &ssd) +{ + _ssd.copyParams(ssd); + // Handle extra padding after file header + uint32_t offset = _spStartOffset + getFileHeaderPad(_spStartOffset); + _spFirstPageNum = offset / getPageBitSize(); + _spFirstPageOffset = offset & (getPageBitSize() - 1); + offset = _pStartOffset + getFileHeaderPad(_pStartOffset); + _pFirstPageNum = offset / getPageBitSize(); + _pFirstPageOffset = offset & (getPageBitSize() - 1); + // setup(); + + DC dL6; + +#if 0 + LOG(info, + "comprBuf=%p, comprBufSize=%d", + static_cast(_cb._comprBuf), + (int) _cb._comprBufSize); +#endif + setDecoderPosition(dL6, _cb, _ssStartOffset); + + dL6.copyParams(_ssd); + + _l7.clear(); + + vespalib::string word; + Counts counts; + StartOffset startOffset; + uint64_t pageNum = _pFirstPageNum; + uint32_t sparsePageNum = _spFirstPageNum; + uint32_t l7StrideCheck = 0; + uint32_t l7Ref = noL7Ref(); // Last L6 entry not after this L7 entry + + uint32_t l6Offset = dL6.getReadOffset(); + uint64_t l6WordNum = 1; + bool forceL7Entry = false; + bool overflow = false; + + while (l6Offset < _ssFileBitLen) { +#if 0 + LOG(info, + "L6Offset=%" PRIu32 ", bitLen=%" PRIu64, + l6Offset, + _ssFileBitLen); +#endif + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL6._); + overflow = ((oVal & TOP_BIT64) != 0); + oVal <<= 1; + length = 1; + UC64_READBITS_NS(o, EC); + UC64_DECODECONTEXT_STORE(o, dL6._); + + /* + * L7 entry for each 16th L6 entry and right before and after any + * overflow entry. + */ + if (l7StrideCheck >= getL7SkipStride() || + (l7StrideCheck > 0 && (overflow || forceL7Entry))) { + // Don't update l7Ref if this L7 entry points to an overflow entry + if (!forceL7Entry) + l7Ref = _l7.size(); // Self-ref if referencing L6 entry + _l7.push_back(L7Entry(word, startOffset, l6WordNum, + l6Offset, sparsePageNum, pageNum, l7Ref)); + l7StrideCheck = 0; + forceL7Entry = false; + } + readStartOffset(dL6, + startOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL6._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_WORDNUM, + EC); +#if 0 + LOG(info, + "Bumping l6wordnum from %d to %d (delta %d)", + (int) l6WordNum, (int) (l6WordNum + val64) , (int) val64); +#endif + l6WordNum += val64; + UC64_DECODECONTEXT_STORE(o, dL6._); + dL6.smallAlign(8); + const uint8_t *bytes = dL6.getByteCompr(); + size_t lcp = *bytes; + ++bytes; + assert(lcp <= word.size()); + word.resize(lcp); + word += reinterpret_cast(bytes); + dL6.setByteCompr(bytes + word.size() + 1 - lcp); + if (overflow) { +#if 0 + LOG(info, + "AddOverflowRef2 wordnum %d", (int) (l6WordNum - 1)); +#endif + _overflows.push_back(OverflowRef(l6WordNum - 1, _l7.size())); + dL6.readCounts(counts); + startOffset.adjust(counts); + forceL7Entry = true; // Add new L7 entry as soon as possible + } else { + UC64_DECODECONTEXT_LOAD(o, dL6._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_PAGENUM, + EC); + pageNum += val64; + ++sparsePageNum; + UC64_DECODECONTEXT_STORE(o, dL6._); + } +#if 0 + std::ostringstream txtfileoffset; + txtfileoffset << startOffset; + LOG(info, + "ssreader::setup " + "word=%s, l6offset=%d->%d, startOffsets=%s overflow=%s", + word.c_str(), + (int) l6Offset, + (int) dL6.getReadOffset(), + txtfileoffset.str().c_str(), + overflow ? "true" : "false"); +#endif + ++l7StrideCheck; + l6Offset = dL6.getReadOffset(); + } + if (l7StrideCheck > 0) { + if (!forceL7Entry) + l7Ref = _l7.size(); // Self-ref if referencing L6 entry + _l7.push_back(L7Entry(word, startOffset, l6WordNum, + l6Offset, sparsePageNum, pageNum, l7Ref)); + } + assert(l6Offset == _ssFileBitLen); +} + + +PageDict4SSLookupRes +PageDict4SSReader:: +lookup(const vespalib::stringref &key) +{ + PageDict4SSLookupRes res; + + DC dL6; + + dL6.copyParams(_ssd); + + uint32_t l7Pos = 0; + uint32_t l7Ref = noL7Ref(); + + L7Vector::const_iterator l7lb; + l7lb = std::lower_bound(_l7.begin(), _l7.end(), key); + + l7Pos = &*l7lb - &_l7[0]; + StartOffset startOffset; + uint64_t pageNum = _pFirstPageNum; + uint32_t sparsePageNum = _spFirstPageNum; + uint32_t l6Offset = _ssStartOffset; + uint64_t l6WordNum = 1; + uint64_t wordNum = l6WordNum; + + vespalib::string l6Word; // Last L6 entry word + vespalib::string word; + StartOffset l6StartOffset; // Last L6 entry file offset + + // Setup for decoding of L6+overflow stream + if (l7Pos > 0) { + L7Entry &l7e = _l7[l7Pos - 1]; + l7Ref = l7e._l7Ref; + startOffset = l7e._l7StartOffset; + word = l7e._l7Word; + l6Offset = l7e._l6Offset; + wordNum = l7e._l7WordNum; + } + + /* + * Setup L6 only variables, used when no overflow matches. + * + * l7Ref == l7Pos - 1, when _l7[l7Pos -1] references end of L6 + * entry in L6+overflow stream. + * + * l7Ref != l7Pos - 1, when _l7[l7Pos -1] references end of overflow + * entry in L6+overflow stream, and is used for backtracking to end + * of previous L6 entry in L6+overflow stream. + */ + if (l7Ref != noL7Ref()) { + L7Entry &l7e = _l7[l7Ref]; + sparsePageNum = l7e._sparsePageNum; + pageNum = l7e._pageNum; + l6Word = l7e._l7Word; + l6StartOffset = l7e._l7StartOffset; + l6WordNum = l7e._l7WordNum; + } + +#if 0 + LOG(info, + "sslookup1: l6WordNum=%d, l6Word=\"%s\", key=\"%s\", l6Offset=%d", + (int) l6WordNum, + l6Word.c_str(), + key.c_str(), + (int) l6Offset); +#endif + + setDecoderPosition(dL6, _cb, l6Offset); + + Counts counts; + + while (l6Offset < _ssFileBitLen) { + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL6._); + bool overflow = ((oVal & TOP_BIT64) != 0); + oVal <<= 1; + length = 1; + UC64_READBITS_NS(o, EC); + UC64_DECODECONTEXT_STORE(o, dL6._); + + readStartOffset(dL6, + startOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL6._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_WORDNUM, + EC); + wordNum += val64; + UC64_DECODECONTEXT_STORE(o, dL6._); + dL6.smallAlign(8); + const uint8_t *bytes = dL6.getByteCompr(); + size_t lcp = *bytes; + ++bytes; + assert(lcp <= word.size()); + word.resize(lcp); + word += reinterpret_cast(bytes); + dL6.setByteCompr(bytes + word.size() + 1 - lcp); + if (overflow) { +#if 0 + LOG(info, + "sslookup: wordNum=%d, word=\"%s\", key=\"%s\"", + (int) wordNum, + word.c_str(), + key.c_str()); +#endif + bool l6NotLessThanKey = !(word < key); + if (l6NotLessThanKey) { + if (key == word) { + dL6.readCounts(counts); + res._overflow = true; + res._counts = counts; + res._startOffset = startOffset; + l6WordNum = wordNum - 1; // overloaded meaning + } + break; // key < counts + } + LOG(error, "FATAL: Missing L7 entry for overflow entry"); + abort(); // counts < key, should not happen (missing L7 entry) + } else { + bool l6NotLessThanKey = !(word < key); + if (l6NotLessThanKey) + break; // key <= counts + UC64_DECODECONTEXT_LOAD(o, dL6._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_PAGENUM, + EC); + pageNum += val64; + ++sparsePageNum; + UC64_DECODECONTEXT_STORE(o, dL6._); + l6Word = word; + l6StartOffset = startOffset; + l6WordNum = wordNum; + } + l6Offset = dL6.getReadOffset(); + } + assert(l6Offset <= _ssFileBitLen); + res._l6Word = l6Word; + if (l6Offset >= _ssFileBitLen) + res._lastWord.clear(); // Mark that word is beyond end of dictionary + else + res._lastWord = word; + res._l6StartOffset = l6StartOffset; + res._pageNum = pageNum; + res._sparsePageNum = sparsePageNum; + res._l6WordNum = l6WordNum; + // Lookup succeeded if not run to end of L6 info or if overflow was found + // Failed lookup means we want keys larger than the highest present. + res._res = l6Offset < _ssFileBitLen || res._overflow; + return res; +} + + +PageDict4SSLookupRes +PageDict4SSReader:: +lookupOverflow(uint64_t wordNum) const +{ + PageDict4SSLookupRes res; + + assert(!_overflows.empty()); + + OverflowVector::const_iterator lb = + std::lower_bound(_overflows.begin(), + _overflows.end(), + wordNum); + + assert(lb != _overflows.end()); + assert(lb->_wordNum == wordNum); + uint32_t l7Ref = lb->_l7Ref; + assert(l7Ref < _l7.size()); + + const vespalib::string &word = _l7[l7Ref]._l7Word; +#if 0 + LOG(info, + "lookupOverflow: wordNum %d -> word %s, next l7 Pos is %d", + (int) wordNum, + word.c_str(), + (int) l7Ref); +#endif + uint64_t l6Offset = _ssStartOffset; + StartOffset startOffset; + if (l7Ref > 0) { + l6Offset = _l7[l7Ref - 1]._l6Offset; + startOffset = _l7[l7Ref - 1]._l7StartOffset; + } + + StartOffset l6StartOffset; + vespalib::string l6Word; + + uint32_t l7Ref2 = _l7[l7Ref]._l7Ref; + if (l7Ref2 != noL7Ref()) { + // last L6 entry before overflow entry + const L7Entry &l6Ref = _l7[l7Ref2]; + l6Word = l6Ref._l7Word; + l6StartOffset = l6Ref._l7StartOffset; + } + + DC dL6; + + dL6.copyParams(_ssd); + setDecoderPosition(dL6, _cb, l6Offset); + +#if 0 + std::ostringstream txtStartOffset; + std::ostringstream txtL6StartOffset; + txtStartOffset << startOffset; + txtL6StartOffset << l6StartOffset; + LOG(info, + "Lookupoverflow l6Offset=%d, l6fileoffset=%s, fileoffset=%s", + (int) l6Offset, + txtL6StartOffset.str().c_str(), + txtStartOffset.str().c_str()); +#endif + + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL6._); + bool overflow = ((oVal & TOP_BIT64) != 0); + oVal <<= 1; + length = 1; + UC64_READBITS_NS(o, EC); + assert(overflow); + (void) overflow; + UC64_DECODECONTEXT_STORE(o, dL6._); + + readStartOffset(dL6, + startOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL6._); + UC64_SKIPEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_WORDNUM, + EC); + UC64_DECODECONTEXT_STORE(o, dL6._); + + dL6.smallAlign(8); + const uint8_t *bytes = dL6.getByteCompr(); + size_t lcp = *bytes; + ++bytes; + assert(lcp <= word.size()); + vespalib::stringref suffix = reinterpret_cast(bytes); + dL6.setByteCompr(bytes + suffix.size() + 1); + assert(lcp + suffix.size() == word.size()); + assert(suffix == word.substr(lcp)); + (void) lcp; + Counts counts; + dL6.readCounts(counts); +#if 0 + std::ostringstream txtCounts; + txtStartOffset.str(""); + txtStartOffset << startOffset; + txtCounts << counts; + LOG(info, + "Lookupoverflow fileoffset=%s, counts=%s", + txtStartOffset.str().c_str(), + txtCounts.str().c_str()); +#endif + res._overflow = true; + res._counts = counts; + res._startOffset = startOffset; + res._l6StartOffset = l6StartOffset; + res._l6Word = l6Word; + res._lastWord = word; + res._res = true; + return res; +} + + +void +PageDict4SSReader::checkPointWrite(vespalib::nbostream &out) +{ + out << _ssFileBitLen << _ssStartOffset; + out << _l7; + _ssd.checkPointWrite(out); + out << _spFileBitLen << _pFileBitLen; + out << _spStartOffset << _pStartOffset; + out << _spFirstPageNum << _spFirstPageOffset; + out << _pFirstPageNum << _pFirstPageOffset; + out << _overflows; +} + + +void +PageDict4SSReader::checkPointRead(vespalib::nbostream &in) +{ + in >> _ssFileBitLen >> _ssStartOffset; + in >> _l7; + _ssd.checkPointRead(in); + in >> _spFileBitLen >> _pFileBitLen; + in >> _spStartOffset >> _pStartOffset; + in >> _spFirstPageNum >> _spFirstPageOffset; + in >> _pFirstPageNum >> _pFirstPageOffset; + in >> _overflows; +} + + +PageDict4SPLookupRes:: +PageDict4SPLookupRes(void) + : _l3Word(), + _lastWord(), + _l3StartOffset(), + _pageNum(0u), + _l3WordNum(1u) +{ +} + + +PageDict4SPLookupRes:: +~PageDict4SPLookupRes(void) +{ +} + + +void +PageDict4SPLookupRes:: +lookup(const SSReader &ssReader, + const void *sparsePage, + const vespalib::stringref &key, + const vespalib::stringref &l6Word, + const vespalib::stringref &lastSPWord, + const StartOffset &l6StartOffset, + uint64_t l6WordNum, + uint64_t lowestPageNum) +{ +// const uint64_t *p = static_cast(sparsePage); + + DC dL3; // L3 stream + DC dL4; // L4 stream + DC dL5; // L5 stream + + dL3.copyParams(ssReader.getSSD()); + dL4.copyParams(ssReader.getSSD()); + dL5.copyParams(ssReader.getSSD()); + uint32_t spStartOffset = 0; + if (l6WordNum == 1) + spStartOffset = ssReader._spFirstPageOffset; + setDecoderPositionInPage(dL5, sparsePage, spStartOffset); + + uint32_t l5Size = dL5.readBits(15); + uint32_t l4Size = dL5.readBits(15); + uint32_t l3Entries = dL5.readBits(15); + uint32_t wordsSize = dL5.readBits(12); + uint32_t l3Residue = l3Entries; + + assert(l3Entries > 0); + uint32_t l4Residue = getL4Entries(l3Entries); + uint32_t l5Residue = getL5Entries(l4Residue); + + assert((l4Residue == 0) == (l4Size == 0)); + assert((l5Residue == 0) == (l5Size == 0)); + + uint32_t l5Offset = getPageHeaderBitSize() + spStartOffset; + uint32_t l4Offset = l5Offset + l5Size; + uint32_t l3Offset = l4Offset + l4Size; + + assert(l5Offset == dL5.getReadOffset()); + + uint32_t wordOffset = getPageByteSize() - wordsSize; + const char *wordBuf = static_cast(sparsePage) + wordOffset; + + _l3Word = l6Word; + _l3StartOffset = l6StartOffset; + vespalib::string word; + uint32_t l3WordOffset = 0; + uint32_t l5WordOffset = l3WordOffset; + uint64_t l3WordNum = l6WordNum; + + while (l5Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL5._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC); + l5WordOffset += val64; + UC64_DECODECONTEXT_STORE(o, dL5._); + const char *l5WordBuf = wordBuf + l5WordOffset; + size_t lcp = *reinterpret_cast(l5WordBuf); + ++l5WordBuf; + assert(lcp <= _l3Word.size()); + word = _l3Word.substr(0, lcp) + l5WordBuf; + bool l3NotLessThanKey = !(word < key); + if (l3NotLessThanKey) + break; + _l3Word = word; + l3WordOffset = l5WordOffset + 2 + word.size() - lcp; + l5WordOffset = l3WordOffset; + readStartOffset(dL5, + _l3StartOffset, + K_VALUE_COUNTFILE_L5_FILEOFFSET, + K_VALUE_COUNTFILE_L5_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL5._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L5_WORDNUM, + EC); + l3WordNum += val64; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC); + l3Offset += val64; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC); + l4Offset += val64; + UC64_DECODECONTEXT_STORE(o, dL5._); + --l5Residue; + assert(l4Residue >= getL5SkipStride()); + l4Residue -= getL5SkipStride(); + assert(l3Residue > getL5SkipStride() * getL4SkipStride()); + l3Residue -= getL5SkipStride() * getL4SkipStride(); + } + setDecoderPositionInPage(dL4, sparsePage, l4Offset); + uint32_t l4WordOffset = l3WordOffset; + while (l4Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL4._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC); + l4WordOffset += val64; + UC64_DECODECONTEXT_STORE(o, dL4._); + const char *l4WordBuf = wordBuf + l4WordOffset; + size_t lcp = *reinterpret_cast(l4WordBuf); + ++l4WordBuf; + assert(lcp <= _l3Word.size()); + word = _l3Word.substr(0, lcp) + l4WordBuf; + bool l3NotLessThanKey = !(word < key); + if (l3NotLessThanKey) + break; + _l3Word = word; + l3WordOffset = l4WordOffset + 2 + word.size() - lcp; + l4WordOffset = l3WordOffset; + readStartOffset(dL4, + _l3StartOffset, + K_VALUE_COUNTFILE_L4_FILEOFFSET, + K_VALUE_COUNTFILE_L4_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL4._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L4_WORDNUM, + EC); + l3WordNum += val64; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC); + l3Offset += val64; + UC64_DECODECONTEXT_STORE(o, dL4._); + --l4Residue; + assert(l3Residue > getL4SkipStride()); + l3Residue -= getL4SkipStride(); + } + + setDecoderPositionInPage(dL3, sparsePage, l3Offset); + assert(l3Residue > 0); + while (l3Residue > 0) { + if (l3Residue > 1) { + const char *l3WordBuf = wordBuf + l3WordOffset; + size_t lcp = *reinterpret_cast(l3WordBuf); + ++l3WordBuf; + assert(lcp <= _l3Word.size()); + word = _l3Word.substr(0, lcp) + l3WordBuf; + bool l3NotLessThanKey = !(word < key); + if (l3NotLessThanKey) + break; + _l3Word = word; + l3WordOffset += 2 + word.size() - lcp; + } else { + word = lastSPWord; + assert(!word.empty()); // Should've stopped at SS level + bool l3NotLessThanKey = !(word < key); + if (l3NotLessThanKey) + break; + abort(); + _l3Word = word; + } + readStartOffset(dL3, + _l3StartOffset, + K_VALUE_COUNTFILE_L3_FILEOFFSET, + K_VALUE_COUNTFILE_L3_ACCNUMDOCS); + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL3._); + UC64_DECODEEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L3_WORDNUM, + EC); + UC64_DECODECONTEXT_STORE(o, dL3._); + l3WordNum += val64; + --l3Residue; + } + _lastWord = word; + _pageNum = lowestPageNum + l3Entries - l3Residue; + _l3WordNum = l3WordNum; + // Lookup succeded if not run to end of L3 info. + // Shoudn't have tried to look at page if word < key, i.e. lookup at this + // level should always succeed. + assert(l3Residue > 0); +} + + +PageDict4PLookupRes:: +PageDict4PLookupRes(void) + : _counts(), + _startOffset(), + _wordNum(1u), + _res(false), + _nextWord(NULL) +{ +} + + +PageDict4PLookupRes:: +~PageDict4PLookupRes(void) +{ +} + +bool +PageDict4PLookupRes:: +lookup(const SSReader &ssReader, + const void *page, + const vespalib::stringref &key, + const vespalib::stringref &l3Word, + const vespalib::stringref &lastPWord, + const StartOffset &l3StartOffset, + uint64_t l3WordNum) +{ + DC dCounts; // counts stream (sparse counts) + DC dL1; // L1 stream + DC dL2; // L2 stream + + dCounts.copyParams(ssReader.getSSD()); + dL1.copyParams(ssReader.getSSD()); + dL2.copyParams(ssReader.getSSD()); + + uint32_t pStartOffset = 0; + if (l3WordNum == 1) + pStartOffset = ssReader._pFirstPageOffset; + setDecoderPositionInPage(dL2, page, pStartOffset); + + uint32_t l2Size = dL2.readBits(15); + uint32_t l1Size = dL2.readBits(15); + uint32_t countsEntries = dL2.readBits(15); + uint32_t wordsSize = dL2.readBits(12); + uint32_t countsResidue = countsEntries; + + if (countsEntries == 0) { + /* + * Tried to lookup word that is between an overflow word and + * the previous word in the dictionary. + */ + _startOffset = l3StartOffset; + _wordNum = l3WordNum; + return false; + } + + uint32_t l1Residue = getL1Entries(countsEntries); + uint32_t l2Residue = getL2Entries(l1Residue); + + assert((l1Residue == 0) == (l1Size == 0)); + assert((l2Residue == 0) == (l2Size == 0)); + + uint32_t l2Offset = getPageHeaderBitSize() + pStartOffset; + uint32_t l1Offset = l2Offset + l2Size; + uint32_t countsOffset = l1Offset + l1Size; + + assert(l2Offset == dL2.getReadOffset()); + + uint32_t wordOffset = getPageByteSize() - wordsSize; + const char *wordBuf = static_cast(page) + wordOffset; + + vespalib::string countsWord = l3Word; + StartOffset countsStartOffset = l3StartOffset; + vespalib::string word; + Counts counts; + + uint32_t countsWordOffset = 0; + uint32_t l2WordOffset = countsWordOffset; + uint64_t wordNum = l3WordNum; + while (l2Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL2._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC); + l2WordOffset += val64; + UC64_DECODECONTEXT_STORE(o, dL2._); + const char *l2WordBuf = wordBuf + l2WordOffset; + size_t lcp = *reinterpret_cast(l2WordBuf); + ++l2WordBuf; + assert(lcp <= countsWord.size()); + word = countsWord.substr(0, lcp) + l2WordBuf; + bool countsNotLessThanKey = !(word < key); + if (countsNotLessThanKey) + break; + countsWord = word; + countsWordOffset = l2WordOffset + 2 + word.size() - lcp; + l2WordOffset = countsWordOffset; + + readStartOffset(dL2, + countsStartOffset, + K_VALUE_COUNTFILE_L2_FILEOFFSET, + K_VALUE_COUNTFILE_L2_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL2._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC); + countsOffset += val64; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC); + l1Offset += val64; + UC64_DECODECONTEXT_STORE(o, dL2._); + --l2Residue; + assert(l1Residue >= getL2SkipStride()); + l1Residue -= getL2SkipStride(); + assert(countsResidue > getL2SkipStride() * getL1SkipStride()); + countsResidue -= getL2SkipStride() * getL1SkipStride(); + wordNum += getL2SkipStride() * getL1SkipStride(); + } + setDecoderPositionInPage(dL1, page, l1Offset); + uint32_t l1WordOffset = countsWordOffset; + while (l1Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + uint64_t val64; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, dL1._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC); + l1WordOffset += val64; + UC64_DECODECONTEXT_STORE(o, dL1._); + const char *l1WordBuf = wordBuf + l1WordOffset; + size_t lcp = *reinterpret_cast(l1WordBuf); + ++l1WordBuf; + assert(lcp <= countsWord.size()); + word = countsWord.substr(0, lcp) + l1WordBuf; + bool countsNotLessThanKey = !(word < key); + if (countsNotLessThanKey) + break; + countsWord = word; + countsWordOffset = l1WordOffset + 2 + word.size() - lcp; + l1WordOffset = countsWordOffset; + + readStartOffset(dL1, + countsStartOffset, + K_VALUE_COUNTFILE_L1_FILEOFFSET, + K_VALUE_COUNTFILE_L1_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, dL1._); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC); + countsOffset += val64; + UC64_DECODECONTEXT_STORE(o, dL1._); + --l1Residue; + assert(countsResidue > getL1SkipStride()); + countsResidue -= getL1SkipStride(); + wordNum += getL1SkipStride(); + } + + setDecoderPositionInPage(dCounts, page, countsOffset); + assert(countsResidue > 0); + while (countsResidue > 0) { + dCounts.readCounts(counts); + if (countsResidue > 1) { + const char *countsWordBuf = wordBuf + countsWordOffset; + size_t lcp = + *reinterpret_cast(countsWordBuf); + ++countsWordBuf; + assert(lcp <= countsWord.size()); + word = countsWord.substr(0, lcp) + countsWordBuf; + bool countsNotLessThanKey = !(word < key); + if (countsNotLessThanKey) + break; + countsWordOffset += 2 + word.size() - lcp; + countsWord = word; + } else { + word = lastPWord; + assert(!word.empty()); // Should've stopped at SS level + bool countsNotLessThanKey = !(word < key); + if (countsNotLessThanKey) + break; + } + countsStartOffset.adjust(counts); + ++wordNum; + --countsResidue; + } + _startOffset = countsStartOffset; + _wordNum = wordNum; + // Lookup succeded if word found. + if (key == word) { + _counts = counts; + _res = true; + } else { + // Shouldn't have tried to look at page if word < key, and we know + // that key != word. Thus we can assert that key < word. + assert(key < word); + } + return _res; +} + +PageDict4Reader::PageDict4Reader(const SSReader &ssReader, + DC &spd, + DC &pd) + : _pd(pd), + _countsResidue(0), + _ssReader(ssReader), + _pFileBitLen(ssReader._pFileBitLen), + _startOffset(), + _overflowPage(false), + _counts(), + _cc(), + _ce(), + _words(), + _wc(), + _we(), + _lastWord(), + _lastSSWord(), + _spd(spd), + _l3Residue(0u), + _spwords(), + _spwc(), + _spwe(), + _ssd(), + _wordNum(1u) +{ +} + + +void +PageDict4Reader::setup() +{ + _ssd.copyParams(_ssReader.getSSD()); + _spd.copyParams(_ssReader.getSSD()); + _pd.copyParams(_ssReader.getSSD()); + assert(_pd.getReadOffset() == _ssReader._pStartOffset); + assert(_spd.getReadOffset() == _ssReader._spStartOffset); + // Handle extra padding after file header + _pd.skipBits(getFileHeaderPad(_ssReader._pStartOffset)); + _spd.skipBits(getFileHeaderPad(_ssReader._spStartOffset)); + assert(_pFileBitLen >= _pd.getReadOffset()); + if (_pFileBitLen > _pd.getReadOffset()) { + setupPage(); + setupSPage(); + } + + const ComprBuffer &sscb = _ssReader._cb; + uint32_t ssStartOffset = _ssReader._ssStartOffset; + setDecoderPosition(_ssd, sscb, ssStartOffset); +} + + +PageDict4Reader::~PageDict4Reader(void) +{ +} + + +void +PageDict4Reader::setupPage(void) +{ +#if 0 + LOG(info, + "setupPage(%ld), " + (long int) _pd.getReadOffset()); +#endif + uint32_t l2Size = _pd.readBits(15); + uint32_t l1Size = _pd.readBits(15); + uint32_t countsEntries = _pd.readBits(15); + uint32_t wordsSize = _pd.readBits(12); + _countsResidue = countsEntries; + +#if 0 + _pd.skipBits(l2Size + l1Size); + Counts counts; +#else + if (countsEntries == 0 && l1Size == 0 && l2Size == 0) { + _pd.smallAlign(64); + _overflowPage = true; + return; + } + _overflowPage = false; + assert(countsEntries > 0); + uint32_t l1Residue = getL1Entries(countsEntries); + uint32_t l2Residue = getL2Entries(l1Residue); + + uint64_t beforePos = _pd.getReadOffset(); + Counts counts; + StartOffset startOffset; + while (l2Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _pd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _pd._); + readStartOffset(_pd, + startOffset, + K_VALUE_COUNTFILE_L2_FILEOFFSET, + K_VALUE_COUNTFILE_L2_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, _pd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _pd._); + --l2Residue; + } + assert(_pd.getReadOffset() == beforePos + l2Size); + while (l1Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _pd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _pd._); + readStartOffset(_pd, + startOffset, + K_VALUE_COUNTFILE_L1_FILEOFFSET, + K_VALUE_COUNTFILE_L1_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, _pd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _pd._); + --l1Residue; + } + assert(_pd.getReadOffset() == beforePos + l2Size + l1Size); + (void) beforePos; +#endif + _counts.clear(); + while (countsEntries > 0) { + _pd.readCounts(counts); + _counts.push_back(counts); + --countsEntries; + } + _cc = _counts.begin(); + _ce = _counts.end(); + uint32_t pageOffset = _pd.getReadOffset() & (getPageBitSize() - 1); + uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset; + _pd.skipBits(padding); + _words.resize(wordsSize); + _pd.readBytes(reinterpret_cast(&_words[0]), wordsSize); + _wc = _words.begin(); + _we = _words.end(); +} + + +void +PageDict4Reader::setupSPage(void) +{ +#if 0 + LOG(info, "setupSPage(%d),", (int) _spd.getReadOffset()); +#endif + uint32_t l5Size = _spd.readBits(15); + uint32_t l4Size = _spd.readBits(15); + uint32_t l3Entries = _spd.readBits(15); + uint32_t wordsSize = _spd.readBits(12); + _l3Residue = l3Entries; + +#if 0 + _spd.skipBits(l5Size + l4Size); +#else + + assert(l3Entries > 0); + uint32_t l4Residue = getL4Entries(l3Entries); + uint32_t l5Residue = getL5Entries(l4Residue); + + uint64_t beforePos = _spd.getReadOffset(); + StartOffset startOffset; + while (l5Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _spd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _spd._); + readStartOffset(_spd, + startOffset, + K_VALUE_COUNTFILE_L5_FILEOFFSET, + K_VALUE_COUNTFILE_L5_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, _spd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDNUM, EC); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _spd._); + --l5Residue; + } + assert(_spd.getReadOffset() == beforePos + l5Size); + while (l4Residue > 0) { + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _spd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _spd._); + readStartOffset(_spd, + startOffset, + K_VALUE_COUNTFILE_L4_FILEOFFSET, + K_VALUE_COUNTFILE_L4_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, _spd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDNUM, EC); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC); + UC64_DECODECONTEXT_STORE(o, _spd._); + --l4Residue; + } + assert(_spd.getReadOffset() == beforePos + l5Size + l4Size); + (void) l4Size; + (void) l5Size; + (void) beforePos; +#endif + while (l3Entries > 1) { + readStartOffset(_spd, + startOffset, + K_VALUE_COUNTFILE_L3_FILEOFFSET, + K_VALUE_COUNTFILE_L3_ACCNUMDOCS); + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _spd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L3_WORDNUM, EC); + UC64_DECODECONTEXT_STORE(o, _spd._); + --l3Entries; + } + uint32_t pageOffset = _spd.getReadOffset() & (getPageBitSize() - 1); + uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset; + _spd.skipBits(padding); + _spwords.resize(wordsSize); + _spd.readBytes(reinterpret_cast(&_spwords[0]), wordsSize); + _spwc = _spwords.begin(); + _spwe = _spwords.end(); +} + + +void +PageDict4Reader::decodePWord(vespalib::string &word) +{ + assert(_wc != _we); + size_t lcp = static_cast(*_wc); + ++_wc; + assert(lcp <= _lastWord.size()); + assert(_wc != _we); + word = _lastWord.substr(0, lcp); + while (*_wc != 0) { + word += *_wc; + assert(_wc != _we); + ++_wc; + } + assert(_wc != _we); + ++_wc; +} + + +void +PageDict4Reader::decodeSPWord(vespalib::string &word) +{ + assert(_spwc != _spwe); + size_t lcp = static_cast(*_spwc); + ++_spwc; + assert(lcp <= _lastWord.size()); + assert(_spwc != _spwe); + word = _lastWord.substr(0, lcp); + while (*_spwc != 0) { + word += *_spwc; + assert(_spwc != _spwe); + ++_spwc; + } + assert(_spwc != _spwe); + ++_spwc; +} + + +void +PageDict4Reader::decodeSSWord(vespalib::string &word) +{ + uint32_t l6Offset = _ssd.getReadOffset(); + + while (l6Offset < _ssReader._ssFileBitLen) { + UC64_DECODECONTEXT(o); + uint32_t length; + const bool bigEndian = true; + UC64_DECODECONTEXT_LOAD(o, _ssd._); + bool overflow = ((oVal & TOP_BIT64) != 0); + oVal <<= 1; + length = 1; + UC64_READBITS_NS(o, EC); + UC64_DECODECONTEXT_STORE(o, _ssd._); + + StartOffset startOffset; + readStartOffset(_ssd, + startOffset, + K_VALUE_COUNTFILE_L6_FILEOFFSET, + K_VALUE_COUNTFILE_L6_ACCNUMDOCS); + UC64_DECODECONTEXT_LOAD(o, _ssd._); + UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L6_WORDNUM, EC); + UC64_DECODECONTEXT_STORE(o, _ssd._); + + _ssd.smallAlign(8); + const uint8_t *bytes = _ssd.getByteCompr(); + size_t lcp = *bytes; + ++bytes; + assert(lcp <= _lastSSWord.size()); + word = _lastSSWord.substr(0, lcp); + word += reinterpret_cast(bytes); + _ssd.setByteCompr(bytes + word.size() + 1 - lcp); + _lastSSWord = word; +#if 0 + LOG(info, + "word is %s LCP %d, overflow=%s", + word.c_str(), + (int) lcp, + overflow ? "true" : "false"); +#endif + if (overflow) { + Counts counts; + _ssd.readCounts(counts); + } else { + UC64_DECODECONTEXT_LOAD(o, _ssd._); + UC64_SKIPEXPGOLOMB_NS(o, + K_VALUE_COUNTFILE_L6_PAGENUM, + EC); + UC64_DECODECONTEXT_STORE(o, _ssd._); + break; + } + l6Offset = _ssd.getReadOffset(); + } +} + +void +PageDict4Reader::readCounts(vespalib::string &word, + uint64_t &wordNum, + Counts &counts) +{ + if (_countsResidue > 0) { + assert(_cc != _ce); + counts = *_cc; + ++_cc; + if (_countsResidue > 1) { + assert(_cc != _ce); + } else { + assert(_cc == _ce); + } + _startOffset.adjust(counts); + if (_countsResidue > 1) { + decodePWord(word); + _lastWord = word; + if (_countsResidue == 2) { + assert(_wc == _we); + } else { + assert(_wc != _we); + } + } else { + assert(_l3Residue > 0); + if (_l3Residue > 1) + decodeSPWord(word); + else + decodeSSWord(word); + _lastWord = word; + --_l3Residue; + } + --_countsResidue; + if (_countsResidue == 0) { + assert((_pd.getReadOffset() & (getPageBitSize() - 1)) == 0); + if (_pd.getReadOffset() < _pFileBitLen) { + setupPage(); + if (_l3Residue == 0) + setupSPage(); + } else { + assert(_pd.getReadOffset() == _pFileBitLen); + } + } + wordNum = _wordNum++; + } else if (_overflowPage) { + readOverflowCounts(word, counts); + _overflowPage = false; + assert(_l3Residue > 0); + vespalib::string tword; + if (_l3Residue > 1) + decodeSPWord(tword); + else + decodeSSWord(tword); + assert(tword == word); + --_l3Residue; + _lastWord = word; + _pd.align(getPageBitSize()); + if (_pd.getReadOffset() < _pFileBitLen) { + setupPage(); + if (_l3Residue == 0) + setupSPage(); + } else { + assert(_pd.getReadOffset() == _pFileBitLen); + } + wordNum = _wordNum++; + } else { + // Mark end of file. + word.clear(); + counts.clear(); + wordNum = search::index::DictionaryFileSeqRead::noWordNumHigh(); + } +} + + +void +PageDict4Reader::readOverflowCounts(vespalib::string &word, + Counts &counts) +{ + uint64_t wordNum = _pd.readBits(64); + + PageDict4SSLookupRes wtsslr; + wtsslr = _ssReader.lookupOverflow(wordNum); + assert(wtsslr._overflow); + assert(wtsslr._res); + + word = wtsslr._lastWord; + counts = wtsslr._counts; + +#if 0 + std::ostringstream txtCounts; + std::ostringstream txtStartOffset; + std::ostringstream txtLRStartOffset; + + txtCounts << counts; + txtStartOffset << _startOffset; + txtLRStartOffset << wtsslr._startOffset; + LOG(info, + "readOverflowCounts _wordNum=%" PRIu64 + ", counts=%s, startOffset=%s (should be %s)", + _wordNum, + txtCounts.str().c_str(), + txtLRStartOffset.str().c_str(), + txtStartOffset.str().c_str()); +#endif + + assert(wtsslr._startOffset == _startOffset); + _startOffset.adjust(counts); +} + +void +PageDict4Reader::checkPointWrite(vespalib::nbostream &out) +{ + out << _countsResidue; + out << _overflowPage; + out << _counts; + size_t ccOff = _cc - _counts.begin(); + size_t ceOff = _ce - _counts.begin(); + assert(ceOff == _counts.size()); + out << ccOff << ceOff; + out << _words; + size_t wcOff = _wc - _words.begin(); + size_t weOff = _we - _words.begin(); + assert(weOff = _words.size()); + out << wcOff << weOff; + out << _lastWord; + out << _lastSSWord; + out << _l3Residue; + out << _spwords; + size_t spwcOff = _spwc - _spwords.begin(); + size_t spweOff = _spwe - _spwords.begin(); + assert(spweOff == _spwords.size()); + out << spwcOff << spweOff; + _ssd.checkPointWrite(out); + out << _ssd.getReadOffset(); + out << _wordNum; +} + +void +PageDict4Reader::checkPointRead(vespalib::nbostream &in) +{ + in >> _countsResidue; + in >> _overflowPage; + in >> _counts; + size_t ccOff; + size_t ceOff; + in >> ccOff >> ceOff; + _cc = _counts.begin() + ccOff; + _ce = _counts.begin() + ceOff; + in >> _words; + size_t wcOff; + size_t weOff; + in >> wcOff >> weOff; + _wc = _words.begin() + wcOff; + _we = _words.begin() + weOff; + in >> _lastWord; + in >> _lastSSWord; + in >> _l3Residue; + in >> _spwords; + size_t spwcOff; + size_t spweOff; + in >> spwcOff >> spweOff; + _spwc = _spwords.begin() + spwcOff; + _spwe = _spwords.begin() + spweOff; + _ssd.checkPointRead(in); + int64_t ssReadOffset; + in >> ssReadOffset; + const ComprBuffer &sscb = _ssReader._cb; + setDecoderPosition(_ssd, sscb, ssReadOffset); + in >> _wordNum; +} + +} // namespace bitcompression + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h new file mode 100644 index 00000000000..895a15aa9f3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h @@ -0,0 +1,836 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "countcompression.h" + +namespace search +{ + +namespace bitcompression +{ + + +class PageDict4StartOffset +{ +public: + uint64_t _fileOffset; + uint64_t _accNumDocs; + + PageDict4StartOffset(void) + : _fileOffset(0u), + _accNumDocs(0u) + { + } + + PageDict4StartOffset(uint64_t fileOffset, uint64_t accNumDocs) + : _fileOffset(fileOffset), + _accNumDocs(accNumDocs) + { + } + + bool + operator>=(const PageDict4StartOffset &rhs) const + { + if (_fileOffset >= rhs._fileOffset) { + assert(_accNumDocs >= rhs._accNumDocs); + return true; + } + assert(_accNumDocs < rhs._accNumDocs); + return false; + } + + bool + operator>(const PageDict4StartOffset &rhs) const + { + if (_fileOffset > rhs._fileOffset) { + assert(_accNumDocs > rhs._accNumDocs); + return true; + } + assert(_accNumDocs <= rhs._accNumDocs); + return false; + } + + bool + operator==(const PageDict4StartOffset &rhs) const + { + if (_fileOffset == rhs._fileOffset) { + assert(_accNumDocs == rhs._accNumDocs); + return true; + } + assert(_accNumDocs != rhs._accNumDocs); + if (_fileOffset < rhs._fileOffset) { + assert(_accNumDocs < rhs._accNumDocs); + } else { + assert(_accNumDocs > rhs._accNumDocs); + } + return false; + } + + void + adjust(const index::PostingListCounts &counts) + { + _fileOffset += counts._bitLength; + _accNumDocs += counts._numDocs; + } +}; + +std::ostream & +operator<<(std::ostream &stream, const index::PostingListCounts &counts); + +class PageDict4PageParams +{ +public: + using Counts = index::PostingListCounts; + typedef PageDict4StartOffset StartOffset; + + static uint32_t + getPageByteSize(void) + { + return 4096; + } + + static uint32_t + getPageBitSize(void) + { + return getPageByteSize() * 8; + } + + static uint32_t + getPageHeaderBitSize(void) + { + return 15u + 15u + 15u + 12u; + } + + static uint32_t + getMaxFileHeaderPad(void) + { + return 999u; + } + + static uint32_t + getFileHeaderPad(uint32_t offset); + + static uint32_t + getL1SkipStride(void) + { + return 16; + } + + static uint32_t + getL2SkipStride(void) + { + return 8; + } + + static uint32_t + getL4SkipStride(void) + { + return 16; + } + + static uint32_t + getL5SkipStride(void) + { + return 8; + } + + static uint32_t + getL7SkipStride(void) + { + return 8; + } + + static uint32_t + noL7Ref(void) + { + return std::numeric_limits::max(); + } + + static uint32_t + getL1Entries(uint32_t countsEntries) + { + return (countsEntries - 1) / getL1SkipStride(); + } + + static uint32_t + getL2Entries(uint32_t l1Entries) + { + return l1Entries / getL2SkipStride(); + } + + static uint32_t + getL4Entries(uint32_t l3Entries) + { + return (l3Entries - 1) / getL4SkipStride(); + } + + static uint32_t + getL5Entries(uint32_t l4Entries) + { + return l4Entries / getL5SkipStride(); + } +}; +/* + * Sparse sparse layout for random access word counts: + * + * selector bit + * 0 => L6 entry, with word, data file deltas + * 1 => overflow entry, with word, data file deltas, sparse counts + * + * Read from file to memory (compressed mix of L6 entries and overflow entries) + * + * Uncompressed L7 array in memory, usable for binary search. + * + * File header should contain number of entries + */ + +class PageDict4SSWriter : public PageDict4PageParams +{ + typedef PostingListCountFileEncodeContext EC; + typedef EC SSEC; + +private: + EC &_eL6; // L6 stream + vespalib::string _l6Word; // L6 word + StartOffset _l6StartOffset; // file offsets + accnum + uint64_t _l6PageNum; // Page number for last L6 entry + uint32_t _l6SparsePageNum; // Sparse page number for last L6 entry + uint64_t _l6WordNum; + +public: + PageDict4SSWriter(SSEC &sse); + + ~PageDict4SSWriter(void); + + /* + * Add L6 skip entry. + * + * startOffset represents file position / accNumDocs after word. + */ + void + addL6Skip(const vespalib::stringref &word, + const StartOffset &startOffset, + uint64_t wordNum, + uint64_t pageNum, + uint32_t sparsePageNum); + + /* + * Add overflow counts entry. + * + * startOffset represents file position / accNumDocs at start of entry. + */ + void + addOverflowCounts(const vespalib::stringref &word, + const Counts &counts, + const StartOffset &startOffset, + uint64_t wordNum); + + void + flush(void); + + + void + checkPointWrite(vespalib::nbostream &out); + + void + checkPointRead(vespalib::nbostream &in); +}; + + +/* + * Sparse page layout for random access word counts: + * + * 15 bits L5 size + * 15 bits L4 size + * 15 bits number of L3 entries in page + * this can be used to derive number of L4 and L5 entries, using + * skip stride info. + * 12 bits word string size + * L5 data (word ref delta, offset to L4 and L3 data, data file delta) + * L4 data (word ref delta, offset to L3 data, data file delta) + * L3 data (word ref delta, offset to full page file is implicit, data file delta) + * padding + * word strings (LCP + suffix + NUL) + * + * File header should be defined + */ + +class PageDict4SPWriter : public PageDict4PageParams +{ + typedef PostingListCountFileEncodeContext EC; + typedef PageDict4SSWriter SSWriter; + +private: + EC _eL3; // L3 stream + ComprFileWriteContext _wcL3;// L3 buffer + EC _eL4; // L4 stream + ComprFileWriteContext _wcL4;// L4 buffer + EC _eL5; // L5 stream + ComprFileWriteContext _wcL5;// L5 buffer + vespalib::string _l3Word; // last L3 word written + vespalib::string _l4Word; // last L4 word written + vespalib::string _l5Word; // last L5 word written + vespalib::string _l6Word; // word before this sparse page + uint32_t _l3WordOffset; // Offset for next L3 word to write + uint32_t _l4WordOffset; // Offset for last L4 word written + uint32_t _l5WordOffset; // Offset for last L5 word written + + // file offsets + StartOffset _l3StartOffset; + + // Offsets in data files for last L4 entry + StartOffset _l4StartOffset; + + // Offsets in data files for last L5 entry + StartOffset _l5StartOffset; + + // Offsets in data files for last L6 entry + StartOffset _l6StartOffset; + + uint64_t _l3WordNum; // word number next L3 entry to write + uint64_t _l4WordNum; // word number last L4 entry + uint64_t _l5WordNum; // word number last L5 entry + uint64_t _l6WordNum; // word number last L6 entry + + uint32_t _curL3OffsetL4; // Offset in L3 for last L4 entry + uint32_t _curL3OffsetL5; // Offset in L3 for last L5 entry + uint32_t _curL4OffsetL5; // Offset in L4 for last L5 entry + + uint32_t _headerSize; // Size of page header + + uint32_t _l3Entries; // Number of L3 entries on page + uint32_t _l4StrideCheck; // L3 entries since last L4 entry + uint32_t _l5StrideCheck; // L4 entries since last L5 entry + + uint32_t _l3Size; // Size of L3 entries + uint32_t _l4Size; // Size of L4 entries + uint32_t _l5Size; // Size of L5 entries + uint32_t _prevL3Size; // Previous size of L3 entries + uint32_t _prevL4Size; // Previous size of L4 entries + uint32_t _prevL5Size; // Previous size of L5 entries + uint32_t _prevWordsSize; // previous size of words + uint32_t _sparsePageNum; + uint32_t _l3PageNum; // Page number for last L3 entry + std::vector _words; // Word buffer + + // Sparse sparse entries and counts that don't fit in a page + SSWriter &_ssWriter; + // Encode context where paged sparse counts go + EC &_spe; + +public: + PageDict4SPWriter(SSWriter &sparseSparsewriter, + EC &spe); + + ~PageDict4SPWriter(void); + + void + setup(); + + void + flushPage(void); + + void + flush(void); + + void + resetPage(void); + + void + addL3Skip(const vespalib::stringref &word, + const StartOffset &startOffset, + uint64_t wordNum, + uint64_t pageNum); + + + void + addL4Skip(size_t &lcp); + + void + addL5Skip(size_t &lcp); + + bool + empty(void) const + { + return _l3Entries == 0; + } + + uint32_t + getSparsePageNum(void) const + { + return _sparsePageNum; + } + + /* + * Add overflow counts entry. + * + * startOffset represents file position / accNumDocs at start of entry. + */ + void + addOverflowCounts(const vespalib::stringref &word, + const Counts &counts, + const StartOffset &startOffset, + uint64_t wordNum) + { + _ssWriter.addOverflowCounts(word, counts, startOffset, wordNum); + } + + void + checkPointWrite(vespalib::nbostream &out); + + void + checkPointRead(vespalib::nbostream &in); +}; + +/* + * Page layout for random access word counts: + * + * 15 bits L2 size + * 15 bits L1 size + * 15 bits number of words in page + * this can be used to derive number of L1 and L2 entries, using + * skip stride info. + * 12 bits word string size + * L2 data (word ref delta, offset to L1 and counts data, data file delta) + * L1 data (word ref delta, offset to counts, data file delta) + * counts (sparse count) + * padding + * word strings (LCP + suffix + NULL) + * + * Alternate layout for overflow page: + * + * 15 bits L2 size hardcoded to 0 + * 15 bits L1 size hardcoded to 0 + * 15 bits number of words in page, hardcoded to 0 + * 12 bits word string size, hardcoded to 0 + * More info in sparse sparse file. + * + * File header should be defined + */ + +class PageDict4PWriter : public PageDict4PageParams +{ +public: + typedef PageDict4SPWriter SPWriter; + typedef PostingListCountFileEncodeContext EC; + +private: + EC _eCounts; // counts stream (sparse counts) + ComprFileWriteContext _wcCounts;// counts buffer + EC _eL1; // L1 stream + ComprFileWriteContext _wcL1;// L1 buffer + EC _eL2; // L2 stream + ComprFileWriteContext _wcL2;// L2 buffer + vespalib::string _countsWord; // last counts on page + vespalib::string _l1Word; // Last L1 word written + vespalib::string _l2Word; // Last L2 word written + vespalib::string _l3Word; // word before this page + vespalib::string _pendingCountsWord; // pending counts word (counts written) + uint32_t _countsWordOffset; // Offset for next counts word to write + uint32_t _l1WordOffset; // Offset of last L1 word written + uint32_t _l2WordOffset; // Offset of last L2 word written + + // file offsets + StartOffset _countsStartOffset; + + // Offsets in data files for last L1 entry + StartOffset _l1StartOffset; + + // Offsets in data files for last L2 entry + StartOffset _l2StartOffset; + + // Offsets in data files for last L3 entry + StartOffset _l3StartOffset; + + uint32_t _curCountOffsetL1; // Offset in eCounts for last L1 entry + uint32_t _curCountOffsetL2; // Offset in eCounts for last L2 entry + uint32_t _curL1OffsetL2; // Offset in eL1 for last L2 entry + + uint32_t _headerSize; // Size of page header + + uint32_t _countsEntries; // Number of count entries on page + uint32_t _l1StrideCheck; // Count entries since last L1 entry + uint32_t _l2StrideCheck; // L1 entries since last L2 entry + + uint32_t _countsSize; // Size of counts + uint32_t _l1Size; // Size of L1 entries + uint32_t _l2Size; // Size of L2 entries + uint32_t _prevL1Size; // Previous size of L1 entries + uint32_t _prevL2Size; // Previous size of L2 entries + uint64_t _pageNum; // Page number. + uint64_t _l3WordNum; // last L3 word num written + uint64_t _wordNum; // current word number + std::vector _words; // Word buffer + SPWriter &_spWriter; + // Encode context where paged counts go + EC &_pe; + + void + addOverflowCounts(const vespalib::stringref &word, + const Counts &counts); + +public: + PageDict4PWriter(SPWriter &spWriter, + EC &pe); + + ~PageDict4PWriter(void); + + void + setup(); + + void + flushPage(void); + + void + flush(void); + + void + resetPage(void); + + void + addCounts(const vespalib::stringref &word, + const Counts &counts); + + void + addL1Skip(size_t &lcp); + + void + addL2Skip(size_t &lcp); + + bool + empty(void) const + { + return _countsEntries == 0; + } + + uint64_t + getPageNum(void) const + { + return _pageNum; + } + + uint64_t + getWordNum() const + { + return _wordNum - 1; + } + + void + checkPointWrite(vespalib::nbostream &out); + + void + checkPointRead(vespalib::nbostream &in); +}; + + +class PageDict4SSLookupRes +{ +public: + using Counts = index::PostingListCounts; + typedef PageDict4StartOffset StartOffset; + + vespalib::string _l6Word; // last L6 word before key + vespalib::string _lastWord; // L6 or overflow word >= key + StartOffset _l6StartOffset; // File offsets + Counts _counts; // Counts valid if overflow + uint64_t _pageNum; + uint64_t _sparsePageNum; + uint64_t _l6WordNum; // wordnum if overflow + StartOffset _startOffset; // valid if overflow + bool _res; + bool _overflow; + + PageDict4SSLookupRes(void); + + ~PageDict4SSLookupRes(void); +}; + +/* Reader for sparse sparse file. + * + * Read from file to memory (compressed mix of L6 entries and overflow entries) + * + * Uncompressed L7 array in memory, usable for binary search. + */ + +class PageDict4SSReader : public PageDict4PageParams +{ + typedef PostingListCountFileEncodeContext EC; + typedef PostingListCountFileDecodeContext DC; +public: + class L7Entry + { + public: + vespalib::string _l7Word; + StartOffset _l7StartOffset; // Offsets in data files + uint64_t _l7WordNum; + uint32_t _l6Offset; // Offset in L6+overflow stream + uint32_t _sparsePageNum;// page number for sparse file + uint64_t _pageNum; // page number in full file + uint32_t _l7Ref; // L7 entry before overflow, or self-ref if L6 + + L7Entry() + : _l7Word(0), + _l7StartOffset(), + _l7WordNum(0), + _l6Offset(0), + _sparsePageNum(0), + _pageNum(0), + _l7Ref(0) + { + } + + L7Entry(const vespalib::stringref &l7Word, + const StartOffset &l7StartOffset, + uint64_t l7WordNum, + uint32_t l6Offset, + uint32_t sparsePageNum, + uint64_t pageNum, + uint32_t l7Ref) + : _l7Word(l7Word), + _l7StartOffset(l7StartOffset), + _l7WordNum(l7WordNum), + _l6Offset(l6Offset), + _sparsePageNum(sparsePageNum), + _pageNum(pageNum), + _l7Ref(l7Ref) + { + } + + bool + operator<(const vespalib::stringref &word) const + { + return _l7Word < word; + } + + friend vespalib::nbostream & + operator<<(vespalib::nbostream &stream, const L7Entry &l7Entry); + + friend vespalib::nbostream & + operator>>(vespalib::nbostream &stream, L7Entry &l7Entry); + }; + + class OverflowRef + { + public: + uint64_t _wordNum; + uint32_t _l7Ref; // overflow entry in L7 table + + OverflowRef() + : _wordNum(0), + _l7Ref(0) + { + } + + OverflowRef(uint64_t wordNum, uint32_t l7Ref) + : _wordNum(wordNum), + _l7Ref(l7Ref) + { + } + + bool + operator<(uint64_t wordNum) const + { + return _wordNum < wordNum; + } + + friend vespalib::nbostream & + operator<<(vespalib::nbostream &stream, const OverflowRef &oref); + + friend vespalib::nbostream & + operator>>(vespalib::nbostream &stream, OverflowRef &oref); + }; + + ComprBuffer _cb; + uint64_t _ssFileBitLen; // File size in bits + uint32_t _ssStartOffset; // Header size in bits + + typedef std::vector L7Vector; + L7Vector _l7;// Uncompressed skip list for sparse sparse file + + DC _ssd; // used to store compression parameters + uint64_t _spFileBitLen; + uint64_t _pFileBitLen; + uint32_t _spStartOffset; + uint32_t _pStartOffset; + uint32_t _spFirstPageNum; + uint32_t _spFirstPageOffset; + uint32_t _pFirstPageNum; + uint32_t _pFirstPageOffset; + + typedef std::vector OverflowVector; + OverflowVector _overflows; + + PageDict4SSReader(ComprBuffer &cb, + uint32_t ssFileHeaderSize, + uint64_t ssFileBitLen, + uint32_t spFileHeaderSize, + uint64_t spFileBitLen, + uint32_t pFileHeaderSize, + uint64_t pFileBitLen); + + ~PageDict4SSReader(void); + + void + setup(DC &ssd); + + PageDict4SSLookupRes + lookup(const vespalib::stringref &key); + + PageDict4SSLookupRes + lookupOverflow(uint64_t wordNum) const; + + const DC & + getSSD(void) const + { + return _ssd; + } + + void + checkPointWrite(vespalib::nbostream &out); + + void + checkPointRead(vespalib::nbostream &in); +}; + + +class PageDict4SPLookupRes : public PageDict4PageParams +{ + typedef PostingListCountFileEncodeContext EC; + typedef PostingListCountFileDecodeContext DC; + typedef PageDict4SSReader SSReader; + +public: + vespalib::string _l3Word; + vespalib::string _lastWord; // L3 word >= key + StartOffset _l3StartOffset; + uint64_t _pageNum; + uint64_t _l3WordNum; + +public: + PageDict4SPLookupRes(void); + + ~PageDict4SPLookupRes(void); + + void + lookup(const SSReader &ssReader, + const void *sparsePage, + const vespalib::stringref &key, + const vespalib::stringref &l6Word, + const vespalib::stringref &lastSPWord, + const StartOffset &l6StartOffset, + uint64_t l6WordNum, + uint64_t lowestPageNum); +}; + + +class PageDict4PLookupRes : public PageDict4PageParams +{ +public: + typedef PostingListCountFileEncodeContext EC; + typedef PostingListCountFileDecodeContext DC; + typedef PageDict4SSReader SSReader; + +public: + Counts _counts; + StartOffset _startOffset; + uint64_t _wordNum; + bool _res; + vespalib::string *_nextWord; + +public: + PageDict4PLookupRes(void); + + ~PageDict4PLookupRes(void); + + bool + lookup(const SSReader &ssReader, + const void *page, + const vespalib::stringref &key, + const vespalib::stringref &l3Word, + const vespalib::stringref &lastPWord, + const StartOffset &l3StartOffset, + uint64_t l3WordNum); +}; + + +class PageDict4Reader : public PageDict4PageParams +{ +public: + typedef PostingListCountFileDecodeContext DC; + typedef PostingListCountFileEncodeContext EC; + typedef PageDict4SSReader SSReader; + + DC &_pd; + uint32_t _countsResidue; + const SSReader &_ssReader; + uint64_t _pFileBitLen; + StartOffset _startOffset; + bool _overflowPage; + typedef std::vector PCV; + PCV _counts; + PCV::const_iterator _cc; + PCV::const_iterator _ce; + typedef std::vector WV; + WV _words; + WV::const_iterator _wc; + WV::const_iterator _we; + vespalib::string _lastWord; + vespalib::string _lastSSWord; + + DC &_spd; + uint32_t _l3Residue; + WV _spwords; + WV::const_iterator _spwc; + WV::const_iterator _spwe; + + DC _ssd; + uint64_t _wordNum; + + + PageDict4Reader(const SSReader &ssReader, + DC &spd, + DC &pd); + + ~PageDict4Reader(void); + + void + setup(); + + void + setupPage(); + + void + setupSPage(); + + void + decodePWord(vespalib::string &word); + + void + decodeSPWord(vespalib::string &word); + + void + decodeSSWord(vespalib::string &word); + + void + readCounts(vespalib::string &word, + uint64_t &wordNum, + Counts &counts); + + void + readOverflowCounts(vespalib::string &word, + Counts &counts); + + void + checkPointWrite(vespalib::nbostream &out); + + void + checkPointRead(vespalib::nbostream &in); +}; + +} // namespace bitcompression + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp new file mode 100644 index 00000000000..ebf4ff59889 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.cpp @@ -0,0 +1,1355 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(".posocccompression"); +#include "compression.h" +#include "posocccompression.h" +#include +#include +#include +#include +#include + +using search::index::DocIdAndFeatures; +using search::index::WordDocElementFeatures; +using search::index::WordDocElementWordPosFeatures; +using search::index::PostingListParams; +using search::index::SchemaUtil; +using search::index::Schema; +using search::fef::TermFieldMatchData; +using vespalib::GenericHeader; + +namespace +{ + +vespalib::string PosOccId = "PosOcc.3"; + +vespalib::string PosOccIdCooked = "PosOcc.3.Cooked"; + +} + +namespace +{ + +vespalib::string EG64PosOccId = "EG64PosOcc.3"; // Dynamic k values +vespalib::string EG64PosOccId2 = "EG64PosOcc.2"; // Fixed k values + +} + +namespace search +{ + +namespace bitcompression +{ + + +PosOccFieldParams::PosOccFieldParams(void) + : _elemLenK(0), + _hasElements(false), + _hasElementWeights(false), + _avgElemLen(512), + _collectionType(SINGLE), + _name() +{ +} + + +bool +PosOccFieldParams::operator==(const PosOccFieldParams &rhs) const +{ + return _collectionType == rhs._collectionType && + _avgElemLen == rhs._avgElemLen && + _name == rhs._name; +} + + +vespalib::string +PosOccFieldParams::getParamsPrefix(uint32_t idx) +{ + vespalib::asciistream paramsPrefix; + paramsPrefix << "fieldParams."; + paramsPrefix << idx; + return paramsPrefix.str(); +} + + +void +PosOccFieldParams::getParams(PostingListParams ¶ms, uint32_t idx) const +{ + vespalib::string paramsPrefix = getParamsPrefix(idx); + vespalib::string collStr = paramsPrefix + ".collectionType"; + vespalib::string avgElemLenStr = paramsPrefix + ".avgElemLen"; + vespalib::string nameStr = paramsPrefix + ".name"; + + switch (_collectionType) { + case SINGLE: + params.setStr(collStr, "single"); + break; + case ARRAY: + params.setStr(collStr, "array"); + break; + case WEIGHTEDSET: + params.setStr(collStr, "weightedSet"); + break; + } + params.set(avgElemLenStr, _avgElemLen); + params.setStr(nameStr, _name); +} + + +void +PosOccFieldParams::setParams(const PostingListParams ¶ms, uint32_t idx) +{ + vespalib::string paramsPrefix = getParamsPrefix(idx); + vespalib::string collStr = paramsPrefix + ".collectionType"; + vespalib::string avgElemLenStr = paramsPrefix + ".avgElemLen"; + vespalib::string nameStr = paramsPrefix + ".name"; + + if (params.isSet(collStr)) { + vespalib::string collVal = params.getStr(collStr); + if (collVal == "single") { + _collectionType = SINGLE; + _hasElements = false; + _hasElementWeights = false; + } else if (collVal == "array") { + _collectionType = ARRAY; + _hasElements = true; + _hasElementWeights = false; + } else if (collVal == "weightedSet") { + _collectionType = WEIGHTEDSET; + _hasElements = true; + _hasElementWeights = true; + } + } + params.get(avgElemLenStr, _avgElemLen); + if (params.isSet(nameStr)) + _name = params.getStr(nameStr); +} + + +void +PosOccFieldParams::setSchemaParams(const Schema &schema, uint32_t fieldId) +{ + assert(fieldId < schema.getNumIndexFields()); + const Schema::IndexField &field = schema.getIndexField(fieldId); + switch (field.getCollectionType()) { + case Schema::SINGLE: + _collectionType = SINGLE; + _hasElements = false; + _hasElementWeights = false; + break; + case Schema::ARRAY: + _collectionType = ARRAY; + _hasElements = true; + _hasElementWeights = false; + break; + case Schema::WEIGHTEDSET: + _collectionType = WEIGHTEDSET; + _hasElements = true; + _hasElementWeights = true; + break; + default: + LOG(error, + "Bad collection type"); + abort(); + } + _avgElemLen = field.getAvgElemLen(); + _name = field.getName(); +} + + +void +PosOccFieldParams::readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + vespalib::string nameKey(prefix + "fieldName"); + vespalib::string collKey(prefix + "collectionType"); + vespalib::string avgElemLenKey(prefix + "avgElemLen"); + _name = header.getTag(nameKey).asString(); + Schema::CollectionType ct = + Schema::collectionTypeFromName(header.getTag(collKey).asString()); + switch (ct) { + case Schema::SINGLE: + _collectionType = SINGLE; + _hasElements = false; + _hasElementWeights = false; + break; + case Schema::ARRAY: + _collectionType = ARRAY; + _hasElements = true; + _hasElementWeights = false; + break; + case Schema::WEIGHTEDSET: + _collectionType = WEIGHTEDSET; + _hasElements = true; + _hasElementWeights = true; + break; + default: + LOG(error, + "Bad collection type when reading field param in header"); + abort(); + } + _avgElemLen = header.getTag(avgElemLenKey).asInteger(); +} + + +void +PosOccFieldParams::writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const +{ + vespalib::string nameKey(prefix + "fieldName"); + vespalib::string collKey(prefix + "collectionType"); + vespalib::string avgElemLenKey(prefix + "avgElemLen"); + header.putTag(GenericHeader::Tag(nameKey, _name)); + Schema::CollectionType ct(Schema::SINGLE); + switch (_collectionType) { + case SINGLE: + ct = Schema::SINGLE; + break; + case ARRAY: + ct = Schema::ARRAY; + break; + case WEIGHTEDSET: + ct = Schema::WEIGHTEDSET; + break; + default: + LOG(error, + "Bad collection type when writing field param in header"); + abort(); + } + header.putTag(GenericHeader::Tag(collKey, Schema::getTypeName(ct))); + header.putTag(GenericHeader::Tag(avgElemLenKey, _avgElemLen)); +} + + +PosOccFieldsParams::PosOccFieldsParams(void) + : _numFields(0u), + _fieldParams(NULL), + _params() +{ +} + +PosOccFieldsParams::PosOccFieldsParams(const PosOccFieldsParams &rhs) + : _numFields(0u), + _fieldParams(NULL), + _params(rhs._params) +{ + cacheParamsRef(); +} + +PosOccFieldsParams & +PosOccFieldsParams::operator=(const PosOccFieldsParams &rhs) +{ + assertCachedParamsRef(); + _params = rhs._params; + cacheParamsRef(); + return *this; +} + + +bool +PosOccFieldsParams::operator==(const PosOccFieldsParams &rhs) const +{ + return _params == rhs._params; +} + + +void +PosOccFieldsParams::getParams(PostingListParams ¶ms) const +{ + assertCachedParamsRef(); + assert(_numFields == 1u); // Only single field for now + params.set("numFields", _numFields); + // Single posting file index format will have multiple fields in file + for (uint32_t field = 0; field < _numFields; ++field) + _fieldParams[field].getParams(params, field); +} + + +void +PosOccFieldsParams::setParams(const PostingListParams ¶ms) +{ + assertCachedParamsRef(); + uint32_t numFields = _numFields; + params.get("numFields", numFields); + assert(numFields == 1u); + _params.resize(numFields); + cacheParamsRef(); + // Single posting file index format will have multiple fields in file + for (uint32_t field = 0; field < numFields; ++field) + _params[field].setParams(params, field); +} + + +void +PosOccFieldsParams::setSchemaParams(const Schema &schema, + const uint32_t indexId) +{ + assertCachedParamsRef(); + SchemaUtil::IndexIterator i(schema, indexId); + assert(i.isValid()); + _params.resize(1u); + cacheParamsRef(); + const Schema::IndexField &field = schema.getIndexField(indexId); + if (!SchemaUtil::validateIndexField(field)) + abort(); + _params[0].setSchemaParams(schema, indexId); +} + + +void +PosOccFieldsParams::readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + vespalib::string numFieldsKey(prefix + "numFields"); + assertCachedParamsRef(); + uint32_t numFields = header.getTag(numFieldsKey).asInteger(); + assert(numFields == 1u); + _params.resize(numFields); + cacheParamsRef(); + // Single posting file index format will have multiple fields in file + for (uint32_t field = 0; field < numFields; ++field) { + vespalib::asciistream as; + as << prefix << "field[" << field << "]."; + vespalib::string subPrefix(as.str()); + _params[field].readHeader(header, subPrefix); + } +} + + +void +PosOccFieldsParams::writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const +{ + vespalib::string numFieldsKey(prefix + "numFields"); + assertCachedParamsRef(); + assert(_numFields == 1u); + header.putTag(GenericHeader::Tag(numFieldsKey, _numFields)); + // Single posting file index format will have multiple fields in file + for (uint32_t field = 0; field < _numFields; ++field) { + vespalib::asciistream as; + as << prefix << "field[" << field << "]."; + vespalib::string subPrefix(as.str()); + _params[field].writeHeader(header, subPrefix); + } +} + + +template +void +EG2PosOccDecodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + const_cast(_fieldsParams)->readHeader(header, + prefix); +} + + +template +void +EG2PosOccDecodeContext:: +readFeatures(search::index::DocIdAndFeatures &features) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + + features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); + features.setRaw(true); + const uint64_t *rawFeatures = + (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); + uint64_t rawFeaturesStartBitPos = + _fileReadBias + (reinterpret_cast(oCompr) << 3) - + oPreRead; + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone) { + if (fieldParams._hasElements) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + if (fieldParams._hasElementWeights) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + } + if (__builtin_expect(oCompr >= valE, false)) { + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + rawFeatures = oCompr; + } + } + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTLEN, + EC); + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + do { + if (__builtin_expect(oCompr >= valE, false)) { + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + rawFeatures = oCompr; + } + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_FIRST_WORDPOS, + EC); + } while (0); + for (uint32_t pos = 1; pos < numPositions; ++pos) { + if (__builtin_expect(oCompr >= valE, false)) { + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + rawFeatures = oCompr; + } + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_DELTA_WORDPOS, + EC); + } + } + UC64_DECODECONTEXT_STORE(o, _); + uint64_t rawFeaturesEndBitPos = + _fileReadBias + + (reinterpret_cast(oCompr) << 3) - + oPreRead; + features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + if (__builtin_expect(oCompr >= valE, false)) { + _readContext->readComprBuffer(); + } +} + + +template +void +EG2PosOccDecodeContextCooked:: +readFeatures(search::index::DocIdAndFeatures &features) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + + features.clearFeatures(); + features.setRaw(false); + + const PosOccFieldParams &fieldParams = _fieldsParams->getFieldParams()[0]; + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + uint32_t elementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++elementId) { + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + elementId += static_cast(val64); + } + features._elements. + push_back(WordDocElementFeatures(elementId)); + if (fieldParams._hasElementWeights) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + int32_t elementWeight = this->convertToSigned(val64); + features._elements.back().setWeight(elementWeight); + } + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTLEN, + EC); + uint32_t elementLen = static_cast(val64) + 1; + features._elements.back().setElementLen(elementLen); + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + uint32_t wordPos = static_cast(-1); + do { + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_FIRST_WORDPOS, + EC); + wordPos = static_cast(val64); + features._elements.back().incNumOccs(); + features._wordPositions.push_back( + WordDocElementWordPosFeatures(wordPos)); + } while (0); + for (uint32_t pos = 1; pos < numPositions; ++pos) { + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_DELTA_WORDPOS, + EC); + wordPos += 1 + static_cast(val64); + features._elements.back().incNumOccs(); + features._wordPositions.push_back( + WordDocElementWordPosFeatures(wordPos)); + } + } + UC64_DECODECONTEXT_STORE(o, _); + if (__builtin_expect(oCompr >= valE, false)) + _readContext->readComprBuffer(); +} + + +template +void +EG2PosOccDecodeContext:: +skipFeatures(unsigned int count) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + + for (unsigned int i = count; i > 0; --i) { + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone) { + if (fieldParams._hasElements) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + if (fieldParams._hasElementWeights) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + } + } + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTLEN, + EC); + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_FIRST_WORDPOS, + EC); + for (uint32_t pos = 1; pos < numPositions; ++pos) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_DELTA_WORDPOS, + EC); + } + } + } + UC64_DECODECONTEXT_STORE(o, _); +} + + +template +void +EG2PosOccDecodeContext:: +unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + TermFieldMatchData *tfmd = matchData[0]; + tfmd->reset(docId); + uint32_t elementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++elementId) { + int32_t elementWeight = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + elementId += static_cast(val64); + if (fieldParams._hasElementWeights) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + elementWeight = this->convertToSigned(val64); + } + } + + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTLEN, + EC); + uint32_t elementLen = static_cast(val64) + 1; + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_FIRST_WORDPOS, + EC); + uint32_t wordPos = static_cast(val64); + { + search::fef::TermFieldMatchDataPosition + pos(elementId, wordPos, elementWeight, elementLen); + tfmd->appendPosition(pos); + } + for (uint32_t wi = 1; wi < numPositions; ++wi) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_DELTA_WORDPOS, + EC); + wordPos += 1 + static_cast(val64); + { + search::fef::TermFieldMatchDataPosition + pos(elementId, wordPos, elementWeight, + elementLen); + tfmd->appendPosition(pos); + } + } + } + UC64_DECODECONTEXT_STORE(o, _); +} + + +template +void +EG2PosOccDecodeContext:: +setParams(const PostingListParams ¶ms) +{ + const_cast(_fieldsParams)->setParams(params); +} + + +template +void +EG2PosOccDecodeContext:: +getParams(PostingListParams ¶ms) const +{ + params.clear(); + params.setStr("encoding", EG64PosOccId2); + _fieldsParams->getParams(params); +} + + +template +void +EG2PosOccDecodeContextCooked:: +getParams(PostingListParams ¶ms) const +{ + ParentClass::getParams(params); + params.setStr("cookedEncoding", PosOccIdCooked); +} + + +template +void +EG2PosOccEncodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + const_cast(_fieldsParams)->readHeader(header, + prefix); +} + + +template +const vespalib::string & +EG2PosOccDecodeContext::getIdentifier(void) const +{ + return EG64PosOccId2; +} + + +template +void +EG2PosOccEncodeContext:: +writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const +{ + _fieldsParams->writeHeader(header, prefix); +} + + +template +const vespalib::string & +EG2PosOccEncodeContext::getIdentifier(void) const +{ + return EG64PosOccId2; +} + + +template +void +EG2PosOccEncodeContext:: +writeFeatures(const search::index::DocIdAndFeatures &features) +{ + if (features.getRaw()) { + writeBits(&features._blob[0], + features._bitOffset, features._bitLength); + return; + } + typedef WordDocElementFeatures Elements; + typedef WordDocElementWordPosFeatures Positions; + + std::vector::const_iterator element = features._elements.begin(); + + std::vector::const_iterator position = + features._wordPositions.begin(); + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + + uint32_t numElements = features._elements.size(); + if (fieldParams._hasElements) { + assert(numElements > 0u); + encodeExpGolomb(numElements - 1, + K_VALUE_POSOCC_NUMELEMENTS); + } else { + assert(numElements == 1); + } + uint32_t minElementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++element) { + if (fieldParams._hasElements) { + uint32_t elementId = element->getElementId(); + assert(elementId >= minElementId); + encodeExpGolomb(elementId - minElementId, + K_VALUE_POSOCC_ELEMENTID); + minElementId = elementId + 1; + if (fieldParams._hasElementWeights) { + int32_t elementWeight = element->getWeight(); + encodeExpGolomb(this->convertToUnsigned(elementWeight), + K_VALUE_POSOCC_ELEMENTWEIGHT); + } + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + } else { + uint32_t elementId = element->getElementId(); + assert(elementId == 0); + (void) elementId; + } + + encodeExpGolomb(element->getElementLen() - 1, + K_VALUE_POSOCC_ELEMENTLEN); + uint32_t numPositions = element->getNumOccs(); + assert(numPositions > 0); + encodeExpGolomb(numPositions - 1, + K_VALUE_POSOCC_NUMPOSITIONS); + + uint32_t wordPos = static_cast(-1); + do { + uint32_t lastWordPos = wordPos; + wordPos = position->getWordPos(); + encodeExpGolomb(wordPos - lastWordPos - 1, + K_VALUE_POSOCC_FIRST_WORDPOS); + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + ++position; + } while (0); + uint32_t positionResidue = numPositions - 1; + while (positionResidue > 0) { + uint32_t lastWordPos = wordPos; + wordPos = position->getWordPos(); + encodeExpGolomb(wordPos - lastWordPos - 1, + K_VALUE_POSOCC_DELTA_WORDPOS); + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + ++position; + --positionResidue; + } + } +} + + +template +void +EG2PosOccEncodeContext:: +setParams(const PostingListParams ¶ms) +{ + const_cast(_fieldsParams)->setParams(params); +} + + +template +void +EG2PosOccEncodeContext:: +getParams(PostingListParams ¶ms) const +{ + params.clear(); + params.setStr("encoding", EG64PosOccId2); + params.setStr("cookedEncoding", PosOccIdCooked); + _fieldsParams->getParams(params); +} + + +template +void +EGPosOccDecodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + ParentClass::readHeader(header, prefix); +} + + +template +void +EGPosOccDecodeContext:: +readFeatures(search::index::DocIdAndFeatures &features) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + + features.clearFeatures((oPreRead == 0) ? 0 : 64 - oPreRead); + features.setRaw(true); + const uint64_t *rawFeatures = + (oPreRead == 0) ? (oCompr - 1) : (oCompr - 2); + uint64_t rawFeaturesStartBitPos = + _fileReadBias + (reinterpret_cast(oCompr) << 3) - + oPreRead; + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t elementLenK = EGPosOccEncodeContext:: + calcElementLenK(fieldParams._avgElemLen); + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone) { + if (fieldParams._hasElements) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + if (fieldParams._hasElementWeights) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + } + if (__builtin_expect(oCompr >= valE, false)) { + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + rawFeatures = oCompr; + } + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + elementLenK, + EC); + uint32_t elementLen = static_cast(val64) + 1; + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + uint32_t wordPosK = EGPosOccEncodeContext:: + calcWordPosK(numPositions, elementLen); + + for (uint32_t pos = 0; pos < numPositions; ++pos) { + if (__builtin_expect(oCompr >= valE, false)) { + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + rawFeatures = oCompr; + } + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + wordPosK, + EC); + } + } + UC64_DECODECONTEXT_STORE(o, _); + uint64_t rawFeaturesEndBitPos = + _fileReadBias + + (reinterpret_cast(oCompr) << 3) - + oPreRead; + features._bitLength = rawFeaturesEndBitPos - rawFeaturesStartBitPos; + while (rawFeatures < oCompr) { + features._blob.push_back(*rawFeatures); + ++rawFeatures; + } + if (__builtin_expect(oCompr >= valE, false)) { + _readContext->readComprBuffer(); + } +} + + +template +void +EGPosOccDecodeContextCooked:: +readFeatures(search::index::DocIdAndFeatures &features) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _valE; + + features.clearFeatures(); + features.setRaw(false); + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t elementLenK = EGPosOccEncodeContext:: + calcElementLenK(fieldParams._avgElemLen); + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + uint32_t elementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++elementId) { + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + elementId += static_cast(val64); + } + features._elements. + push_back(WordDocElementFeatures(elementId)); + if (fieldParams._hasElementWeights) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + int32_t elementWeight = this->convertToSigned(val64); + features._elements.back().setWeight(elementWeight); + } + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + elementLenK, + EC); + uint32_t elementLen = static_cast(val64) + 1; + features._elements.back().setElementLen(elementLen); + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + features._bitLength = numPositions * 64; + + uint32_t wordPosK = EGPosOccEncodeContext:: + calcWordPosK(numPositions, elementLen); + + uint32_t wordPos = static_cast(-1); + for (uint32_t pos = 0; pos < numPositions; ++pos) { + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, _); + _readContext->readComprBuffer(); + valE = _valE; + UC64_DECODECONTEXT_LOAD(o, _); + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + wordPosK, + EC); + wordPos += 1 + static_cast(val64); + features._elements.back().incNumOccs(); + features._wordPositions.push_back( + WordDocElementWordPosFeatures(wordPos)); + } + } + UC64_DECODECONTEXT_STORE(o, _); + if (__builtin_expect(oCompr >= valE, false)) + _readContext->readComprBuffer(); +} + + +template +void +EGPosOccDecodeContext:: +skipFeatures(unsigned int count) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + + for (unsigned int i = count; i > 0; --i) { + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t elementLenK = EGPosOccEncodeContext:: + calcElementLenK(fieldParams._avgElemLen); + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone) { + if (fieldParams._hasElements) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + if (fieldParams._hasElementWeights) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + } + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + elementLenK, + EC); + uint32_t elementLen = static_cast(val64) + 1; + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + uint32_t wordPosK = EGPosOccEncodeContext:: + calcWordPosK(numPositions, elementLen); + + for (uint32_t pos = 0; pos < numPositions; ++pos) { + UC64_SKIPEXPGOLOMB_SMALL_NS(o, + wordPosK, + EC); + } + } + } + UC64_DECODECONTEXT_STORE(o, _); +} + + +template +void +EGPosOccDecodeContext:: +unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId) +{ + UC64_DECODECONTEXT_CONSTRUCTOR(o, _); + uint32_t length; + uint64_t val64; + + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t elementLenK = EGPosOccEncodeContext:: + calcElementLenK(fieldParams._avgElemLen); + uint32_t numElements = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMELEMENTS, + EC); + numElements = static_cast(val64) + 1; + } + TermFieldMatchData *tfmd = matchData[0]; + tfmd->reset(docId); + uint32_t elementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++elementId) { + int32_t elementWeight = 1; + if (fieldParams._hasElements) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTID, + EC); + elementId += static_cast(val64); + if (fieldParams._hasElementWeights) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_ELEMENTWEIGHT, + EC); + elementWeight = this->convertToSigned(val64); + } + } + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + elementLenK, + EC); + uint32_t elementLen = static_cast(val64) + 1; + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_POSOCC_NUMPOSITIONS, + EC); + uint32_t numPositions = static_cast(val64) + 1; + + uint32_t wordPosK = EGPosOccEncodeContext:: + calcWordPosK(numPositions, elementLen); + + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + wordPosK, + EC); + uint32_t wordPos = static_cast(val64); + { + search::fef::TermFieldMatchDataPosition + pos(elementId, wordPos, elementWeight, elementLen); + tfmd->appendPosition(pos); + } + for (uint32_t wi = 1; wi < numPositions; ++wi) { + UC64_DECODEEXPGOLOMB_SMALL_NS(o, + wordPosK, + EC); + wordPos += 1 + static_cast(val64); + { + search::fef::TermFieldMatchDataPosition + pos(elementId, wordPos, elementWeight, + elementLen); + tfmd->appendPosition(pos); + } + } + } + UC64_DECODECONTEXT_STORE(o, _); +} + + +template +void +EGPosOccDecodeContext:: +setParams(const PostingListParams ¶ms) +{ + ParentClass::setParams(params); +} + + +template +void +EGPosOccDecodeContext:: +getParams(PostingListParams ¶ms) const +{ + ParentClass::getParams(params); + params.setStr("encoding", EG64PosOccId); +} + + +template +void +EGPosOccDecodeContextCooked:: +getParams(PostingListParams ¶ms) const +{ + ParentClass::getParams(params); + params.setStr("cookedEncoding", PosOccIdCooked); +} + + +template +void +EGPosOccEncodeContext:: +readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix) +{ + ParentClass::readHeader(header, prefix); +} + + +template +const vespalib::string & +EGPosOccDecodeContext::getIdentifier(void) const +{ + return EG64PosOccId; +} + + +template +void +EGPosOccEncodeContext:: +writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const +{ + ParentClass::writeHeader(header, prefix); +} + + +template +const vespalib::string & +EGPosOccEncodeContext::getIdentifier(void) const +{ + return EG64PosOccId; +} + + +template +void +EGPosOccEncodeContext:: +writeFeatures(const search::index::DocIdAndFeatures &features) +{ + if (features.getRaw()) { + writeBits(&features._blob[0], + features._bitOffset, features._bitLength); + return; + } + typedef WordDocElementFeatures Elements; + typedef WordDocElementWordPosFeatures Positions; + + std::vector::const_iterator element = features._elements.begin(); + + std::vector::const_iterator position = + features._wordPositions.begin(); + const PosOccFieldParams &fieldParams = + _fieldsParams->getFieldParams()[0]; + uint32_t elementLenK = calcElementLenK(fieldParams._avgElemLen); + + uint32_t numElements = features._elements.size(); + if (fieldParams._hasElements) { + assert(numElements > 0u); + encodeExpGolomb(numElements - 1, + K_VALUE_POSOCC_NUMELEMENTS); + } else { + assert(numElements == 1); + } + uint32_t minElementId = 0; + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++element) { + if (fieldParams._hasElements) { + uint32_t elementId = element->getElementId(); + assert(elementId >= minElementId); + encodeExpGolomb(elementId - minElementId, + K_VALUE_POSOCC_ELEMENTID); + minElementId = elementId + 1; + if (fieldParams._hasElementWeights) { + int32_t elementWeight = element->getWeight(); + encodeExpGolomb(this->convertToUnsigned(elementWeight), + K_VALUE_POSOCC_ELEMENTWEIGHT); + } + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + } else { + uint32_t elementId = element->getElementId(); + assert(elementId == 0); + (void) elementId; + } + uint32_t elementLen = element->getElementLen(); + encodeExpGolomb(elementLen - 1, elementLenK); + uint32_t numPositions = element->getNumOccs(); + assert(numPositions > 0); + encodeExpGolomb(numPositions - 1, + K_VALUE_POSOCC_NUMPOSITIONS); + + uint32_t wordPosK = calcWordPosK(numPositions, elementLen); + uint32_t wordPos = static_cast(-1); + uint32_t positionResidue = numPositions; + while (positionResidue > 0) { + uint32_t lastWordPos = wordPos; + wordPos = position->getWordPos(); + encodeExpGolomb(wordPos - lastWordPos - 1, + wordPosK); + if (__builtin_expect(_valI >= _valE, false)) + _writeContext->writeComprBuffer(false); + ++position; + --positionResidue; + } + } +} + + + +template +void +EGPosOccEncodeContext:: +setParams(const PostingListParams ¶ms) +{ + ParentClass::setParams(params); +} + + +template +void +EGPosOccEncodeContext:: +getParams(PostingListParams ¶ms) const +{ + ParentClass::getParams(params); + params.setStr("encoding", EG64PosOccId); + params.setStr("cookedEncoding", PosOccIdCooked); +} + + +template class EG2PosOccDecodeContext; +template class EG2PosOccDecodeContext; + +template class EG2PosOccDecodeContextCooked; +template class EG2PosOccDecodeContextCooked; + +template class EG2PosOccEncodeContext; +template class EG2PosOccEncodeContext; + +template class EGPosOccDecodeContext; +template class EGPosOccDecodeContext; + +template class EGPosOccDecodeContextCooked; +template class EGPosOccDecodeContextCooked; + +template class EGPosOccEncodeContext; +template class EGPosOccEncodeContext; + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h new file mode 100644 index 00000000000..cdf9c73fdc0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/bitcompression/posocccompression.h @@ -0,0 +1,616 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once +#include +#include + + +#define K_VALUE_POSOCC_FIRST_DOCID 22 +#define MAXRICE2_POSOCC_FIRST_DOCID MAX_RICE2VAL_L32_K22 + +#define K_VALUE_POSOCC_DELTA_DOCID 7 +#define MAXRICE2_POSOCC_DELTA_DOCID MAX_RICE2VAL_L30_K7 + +#define K_VALUE_POSOCC_FIRST_WORDPOS 8 +#define MAXRICE2_POSOCC_FIRST_WORDPOS MAX_RICE2VAL_L32_K8 + +#define K_VALUE_POSOCC_DELTA_WORDPOS 4 +#define MAXRICE2_POSOCC_DELTA_WORDPOS MAX_RICE2VAL_L31_K4 + +// Compression parameters for EGPosOcc encode/decode context +#define K_VALUE_POSOCC_ELEMENTLEN 9 +#define K_VALUE_POSOCC_NUMPOSITIONS 0 +#define K_VALUE_POSOCC_NUMFIELDS 0 +#define K_VALUE_POSOCC_FIELDID 0 + +#define K_VALUE_POSOCC_NUMELEMENTS 0 +#define K_VALUE_POSOCC_ELEMENTID 0 +#define K_VALUE_POSOCC_ELEMENTWEIGHT 9 + +namespace search +{ + +namespace index +{ + +class DocIdAndPosOccFeatures : public DocIdAndFeatures +{ +public: + + void + addNextOcc(uint32_t elementId, + uint32_t wordPos, + int32_t elementWeight, + uint32_t elementLen) + { + assert(wordPos < elementLen); + if (_elements.empty() || + elementId > _elements.back().getElementId()) { + _elements.emplace_back(elementId, elementWeight, elementLen); + } else { + assert(elementId == _elements.back().getElementId()); + assert(elementWeight == _elements.back().getWeight()); + assert(elementLen == _elements.back().getElementLen()); + } + assert(_elements.back().getNumOccs() == 0 || + wordPos > _wordPositions.back().getWordPos()); + _elements.back().incNumOccs(); + _wordPositions.emplace_back(wordPos); + } +}; + +} // namespace search::index + +} // namespace search + + +namespace search +{ + +namespace bitcompression +{ + +class PosOccFieldParams +{ +public: + typedef index::PostingListParams PostingListParams; + typedef index::Schema Schema; + + enum CollectionType + { + SINGLE, + ARRAY, + WEIGHTEDSET + }; + + uint8_t _elemLenK; + bool _hasElements; + bool _hasElementWeights; + uint32_t _avgElemLen; + CollectionType _collectionType; + vespalib::string _name; + + PosOccFieldParams(void); + + bool + operator==(const PosOccFieldParams &rhs) const; + + static vespalib::string + getParamsPrefix(uint32_t idx); + + void + getParams(PostingListParams ¶ms, uint32_t idx) const; + + void + setParams(const PostingListParams ¶ms, uint32_t idx); + + void + setSchemaParams(const Schema &schema, uint32_t fieldId); + + void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + void + writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const; +}; + + +class PosOccFieldsParams +{ + // Cache pointers. + uint32_t _numFields; + const PosOccFieldParams *_fieldParams; + + // Storage + std::vector _params; + +public: + typedef index::PostingListParams PostingListParams; + typedef index::Schema Schema; + + PosOccFieldsParams(void); + + PosOccFieldsParams(const PosOccFieldsParams &rhs); + + PosOccFieldsParams & + operator=(const PosOccFieldsParams &rhs); + + bool + operator==(const PosOccFieldsParams &rhs) const; + + void + cacheParamsRef(void) + { + _numFields = _params.size(); + _fieldParams = _params.empty() ? NULL : &_params[0]; + } + + void + assertCachedParamsRef(void) const + { + assert(_numFields == _params.size()); + assert(_fieldParams == (_params.empty() ? NULL : &_params[0])); + } + + uint32_t + getNumFields(void) const + { + return _numFields; + } + + const PosOccFieldParams * + getFieldParams(void) const + { + return _fieldParams; + } + + void + getParams(PostingListParams ¶ms) const; + + void + setParams(const PostingListParams ¶ms); + + void + setSchemaParams(const Schema &schema, const uint32_t indexId); + + void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + void + writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const; +}; + +template +class EG2PosOccDecodeContext : public FeatureDecodeContext +{ +public: + typedef FeatureDecodeContext ParentClass; + using ParentClass::smallAlign; + using ParentClass::readBits; + using ParentClass::_valI; + using ParentClass::_val; + using ParentClass::_cacheInt; + using ParentClass::_preRead; + using ParentClass::_valE; + using ParentClass::_fileReadBias; + using ParentClass::_readContext; + using ParentClass::readHeader; + typedef EncodeContext64 EC; + typedef index::PostingListParams PostingListParams; + + const PosOccFieldsParams *_fieldsParams; + + EG2PosOccDecodeContext(const PosOccFieldsParams *fieldsParams) + : FeatureDecodeContext(), + _fieldsParams(fieldsParams) + { + } + + EG2PosOccDecodeContext(const uint64_t *compr, int bitOffset, + const PosOccFieldsParams *fieldsParams) + : FeatureDecodeContext(compr, bitOffset), + _fieldsParams(fieldsParams) + { + } + + + EG2PosOccDecodeContext(const uint64_t *compr, + int bitOffset, + uint64_t bitLength, + const PosOccFieldsParams *fieldsParams) + : FeatureDecodeContext(compr, bitOffset, bitLength), + _fieldsParams(fieldsParams) + { + } + + + EG2PosOccDecodeContext & + operator=(const EG2PosOccDecodeContext &rhs) + { + FeatureDecodeContext::operator=(rhs); + _fieldsParams = rhs._fieldsParams; + return *this; + } + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + readFeatures(search::index::DocIdAndFeatures &features); + + virtual void + skipFeatures(unsigned int count); + + virtual void + unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; +}; + + +template +class EG2PosOccDecodeContextCooked : public EG2PosOccDecodeContext +{ +public: + typedef EG2PosOccDecodeContext ParentClass; + using ParentClass::smallAlign; + using ParentClass::readBits; + using ParentClass::_valI; + using ParentClass::_val; + using ParentClass::_cacheInt; + using ParentClass::_preRead; + using ParentClass::_valE; + using ParentClass::_fileReadBias; + using ParentClass::_readContext; + using ParentClass::_fieldsParams; + typedef EncodeContext64 EC; + typedef index::PostingListParams PostingListParams; + + EG2PosOccDecodeContextCooked(const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(fieldsParams) + { + } + + EG2PosOccDecodeContextCooked(const uint64_t *compr, int bitOffset, + const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(compr, bitOffset, fieldsParams) + { + } + + + EG2PosOccDecodeContextCooked(const uint64_t *compr, + int bitOffset, + uint64_t bitLength, + const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(compr, bitOffset, bitLength, + fieldsParams) + { + } + + + EG2PosOccDecodeContextCooked & + operator=(const EG2PosOccDecodeContext &rhs) + { + EG2PosOccDecodeContext::operator=(rhs); + return *this; + } + + virtual void + readFeatures(search::index::DocIdAndFeatures &features); + + virtual void + getParams(PostingListParams ¶ms) const; +}; + + +template +class EG2PosOccEncodeContext : public FeatureEncodeContext +{ +public: + typedef FeatureEncodeContext ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::smallAlign; + using ParentClass::writeBits; + using ParentClass::_valI; + using ParentClass::_valE; + using ParentClass::_writeContext; + using ParentClass::encodeExpGolomb; + using ParentClass::readHeader; + using ParentClass::writeHeader; + + const PosOccFieldsParams *_fieldsParams; + + EG2PosOccEncodeContext(const PosOccFieldsParams *fieldsParams) + : FeatureEncodeContext(), + _fieldsParams(fieldsParams) + { + } + + EG2PosOccEncodeContext & + operator=(const EG2PosOccEncodeContext &rhs) + { + FeatureEncodeContext::operator=(rhs); + _fieldsParams = rhs._fieldsParams; + return *this; + } + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual void + writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const; + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + writeFeatures(const DocIdAndFeatures &features); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; +}; + + +template +class EGPosOccDecodeContext : public EG2PosOccDecodeContext +{ +public: + typedef EG2PosOccDecodeContext ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::smallAlign; + using ParentClass::readBits; + using ParentClass::_valI; + using ParentClass::_val; + using ParentClass::_cacheInt; + using ParentClass::_preRead; + using ParentClass::_valE; + using ParentClass::_fileReadBias; + using ParentClass::_readContext; + using ParentClass::_fieldsParams; + using ParentClass::readHeader; + typedef EncodeContext64 EC; + + EGPosOccDecodeContext(const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(fieldsParams) + { + } + + EGPosOccDecodeContext(const uint64_t *compr, int bitOffset, + const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(compr, bitOffset, fieldsParams) + { + } + + + EGPosOccDecodeContext(const uint64_t *compr, + int bitOffset, + uint64_t bitLength, + const PosOccFieldsParams *fieldsParams) + : EG2PosOccDecodeContext(compr, bitOffset, bitLength, + fieldsParams) + { + } + + + EGPosOccDecodeContext & + operator=(const EGPosOccDecodeContext &rhs) + { + EG2PosOccDecodeContext::operator=(rhs); + return *this; + } + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + readFeatures(search::index::DocIdAndFeatures &features); + + virtual void + skipFeatures(unsigned int count); + + virtual void + unpackFeatures(const search::fef::TermFieldMatchDataArray &matchData, + uint32_t docId); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; +}; + + +template +class EGPosOccDecodeContextCooked : public EGPosOccDecodeContext +{ +public: + typedef EGPosOccDecodeContext ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::smallAlign; + using ParentClass::readBits; + using ParentClass::_valI; + using ParentClass::_val; + using ParentClass::_cacheInt; + using ParentClass::_preRead; + using ParentClass::_valE; + using ParentClass::_fileReadBias; + using ParentClass::_readContext; + using ParentClass::_fieldsParams; + typedef EncodeContext64 EC; + + EGPosOccDecodeContextCooked(const PosOccFieldsParams *fieldsParams) + : EGPosOccDecodeContext(fieldsParams) + { + } + + EGPosOccDecodeContextCooked(const uint64_t *compr, int bitOffset, + const PosOccFieldsParams *fieldsParams) + : EGPosOccDecodeContext(compr, bitOffset, fieldsParams) + { + } + + + EGPosOccDecodeContextCooked(const uint64_t *compr, + int bitOffset, + uint64_t bitLength, + const PosOccFieldsParams *fieldsParams) + : EGPosOccDecodeContext(compr, bitOffset, bitLength, + fieldsParams) + { + } + + + EGPosOccDecodeContextCooked & + operator=(const EGPosOccDecodeContext &rhs) + { + EGPosOccDecodeContext::operator=(rhs); + return *this; + } + + virtual void + readFeatures(search::index::DocIdAndFeatures &features); + + virtual void + getParams(PostingListParams ¶ms) const; +}; + + +template +class EGPosOccEncodeContext : public EG2PosOccEncodeContext +{ +public: + typedef EG2PosOccEncodeContext ParentClass; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListParams PostingListParams; + using ParentClass::smallAlign; + using ParentClass::writeBits; + using ParentClass::_valI; + using ParentClass::_valE; + using ParentClass::_writeContext; + using ParentClass::log2; + using ParentClass::encodeExpGolomb; + using ParentClass::_fieldsParams; + using ParentClass::readHeader; + using ParentClass::writeHeader; + + EGPosOccEncodeContext(const PosOccFieldsParams *fieldsParams) + : EG2PosOccEncodeContext(fieldsParams) + { + } + + EGPosOccEncodeContext & + operator=(const EGPosOccEncodeContext &rhs) + { + EG2PosOccEncodeContext::operator=(rhs); + return *this; + } + + virtual void + readHeader(const vespalib::GenericHeader &header, + const vespalib::string &prefix); + + virtual void + writeHeader(vespalib::GenericHeader &header, + const vespalib::string &prefix) const; + + virtual const vespalib::string & + getIdentifier(void) const; + + virtual void + writeFeatures(const DocIdAndFeatures &features); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms) const; + + static uint32_t + calcElementLenK(uint32_t avgElementLen) + { + return (avgElementLen < 4) ? 1u : (log2(avgElementLen)); + } + + static uint32_t + calcWordPosK(uint32_t numPositions, uint32_t elementLen) + { + uint32_t avgDelta = elementLen / (numPositions + 1); + uint32_t wordPosK = (avgDelta < 4) ? 1 : (log2(avgDelta)); + return wordPosK; + } +}; + + +extern template class EG2PosOccDecodeContext; +extern template class EG2PosOccDecodeContext; + +extern template class EG2PosOccDecodeContextCooked; +extern template class EG2PosOccDecodeContextCooked; + +extern template class EG2PosOccEncodeContext; +extern template class EG2PosOccEncodeContext; + +extern template class EGPosOccDecodeContext; +extern template class EGPosOccDecodeContext; + +extern template class EGPosOccDecodeContextCooked; +extern template class EGPosOccDecodeContextCooked; + +extern template class EGPosOccEncodeContext; +extern template class EGPosOccEncodeContext; + +} // namespace bitcompression + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/CMakeLists.txt b/searchlib/src/vespa/searchlib/btree/CMakeLists.txt new file mode 100644 index 00000000000..ce4e71729b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_btree OBJECT + SOURCES + btreeaggregator.cpp + btreebuilder.cpp + btreeinserter.cpp + btreeiterator.cpp + btreenode.cpp + btreenodeallocator.cpp + btreenodestore.cpp + btreeremover.cpp + btreeroot.cpp + btreerootbase.cpp + btreestore.cpp + bufferstate.cpp + datastore.cpp + datastorebase.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/btree/OWNERS b/searchlib/src/vespa/searchlib/btree/OWNERS new file mode 100644 index 00000000000..e6340232840 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/OWNERS @@ -0,0 +1,2 @@ +tegge +geirst diff --git a/searchlib/src/vespa/searchlib/btree/btree.h b/searchlib/src/vespa/searchlib/btree/btree.h new file mode 100644 index 00000000000..7f96e3647e2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btree.h @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeroot.h" +#include "noaggrcalc.h" +#include + +namespace search { +namespace btree { + +/** + * Class that wraps a btree root and an allocator and that provides the same API as + * a standalone btree root without needing to pass the allocator to all functions. + **/ +template , + typename TraitsT = BTreeDefaultTraits, + class AggrCalcT = NoAggrCalc> +class BTree +{ +public: + typedef BTreeRoot TreeType; + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeBuilder Builder; + typedef typename TreeType::InternalNodeType InternalNodeType; + typedef typename TreeType::LeafNodeType LeafNodeType; + typedef typename TreeType::KeyType KeyType; + typedef typename TreeType::DataType DataType; + typedef typename TreeType::Iterator Iterator; + typedef typename TreeType::ConstIterator ConstIterator; + typedef typename TreeType::FrozenView FrozenView; + typedef typename TreeType::AggrCalcType AggrCalcType; +private: + NodeAllocatorType _alloc; + TreeType _tree; + + BTree(const BTree &rhs); + + BTree & + operator=(BTree &rhs); + +public: + BTree(); + ~BTree(); + + const NodeAllocatorType &getAllocator() const { return _alloc; } + NodeAllocatorType &getAllocator() { return _alloc; } + + void + disableFreeLists() { + _alloc.disableFreeLists(); + } + + void + disableElemHoldList() + { + _alloc.disableElemHoldList(); + } + + // Inherit doc from BTreeRoot + void clear() { + _tree.clear(_alloc); + } + void assign(Builder & rhs) { + _tree.assign(rhs, _alloc); + } + bool insert(const KeyType & key, const DataType & data, CompareT comp = CompareT()) { + return _tree.insert(key, data, _alloc, comp); + } + + void + insert(Iterator &itr, + const KeyType &key, const DataType &data) + { + _tree.insert(itr, key, data); + } + + Iterator find(const KeyType & key, CompareT comp = CompareT()) const { + return _tree.find(key, _alloc, comp); + } + Iterator lowerBound(const KeyType & key, CompareT comp = CompareT()) const { + return _tree.lowerBound(key, _alloc, comp); + } + Iterator upperBound(const KeyType & key, CompareT comp = CompareT()) const { + return _tree.upperBound(key, _alloc, comp); + } + bool remove(const KeyType & key, CompareT comp = CompareT()) { + return _tree.remove(key, _alloc, comp); + } + + void + remove(Iterator &itr) + { + _tree.remove(itr); + } + + Iterator begin() const { + return _tree.begin(_alloc); + } + FrozenView getFrozenView() const { + return _tree.getFrozenView(_alloc); + } + size_t size() const { + return _tree.size(_alloc); + } + vespalib::string toString() const { + return _tree.toString(_alloc); + } + bool isValid(CompareT comp = CompareT()) const { + return _tree.isValid(_alloc, comp); + } + bool isValidFrozen(CompareT comp = CompareT()) const { + return _tree.isValidFrozen(_alloc, comp); + } + size_t bitSize() const { + return _tree.bitSize(_alloc); + } + size_t bitSize(BTreeNode::Ref node) const { + return _tree.bitSize(node, _alloc); + } + void setRoot(BTreeNode::Ref newRoot) { + _tree.setRoot(newRoot, _alloc); + } + BTreeNode::Ref getRoot() const { + return _tree.getRoot(); + } + MemoryUsage getMemoryUsage() const { + return _alloc.getMemoryUsage(); + } + + const AggrT & + getAggregated(void) const + { + return _tree.getAggregated(_alloc); + } + + void + thaw(Iterator &itr) + { + assert(&itr.getAllocator() == &getAllocator()); + _tree.thaw(itr); + } + + template + void + foreach_key(FunctionType func) const + { + _alloc.getNodeStore().foreach_key(_tree.getRoot(), func); + } + + template + void + foreach(FunctionType func) const + { + _alloc.getNodeStore().foreach(_tree.getRoot(), func); + } +}; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btree.hpp b/searchlib/src/vespa/searchlib/btree/btree.hpp new file mode 100644 index 00000000000..71a05a1d832 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btree.hpp @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btree.h" + +namespace search { +namespace btree { + +template +BTree::BTree() + : _alloc(), + _tree() +{ +} + +template +BTree::~BTree() +{ + clear(); + _alloc.freeze(); + _alloc.clearHoldLists(); +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp b/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp new file mode 100644 index 00000000000..75e07cd7514 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.cpp @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "btreetraits.h" +#include "btreeaggregator.hpp" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" + +namespace search +{ + +namespace btree +{ + +template class BTreeAggregator; +template class BTreeAggregator; +template class BTreeAggregator; + + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.h b/searchlib/src/vespa/searchlib/btree/btreeaggregator.h new file mode 100644 index 00000000000..8ba42aba42a --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.h @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include "btreenodeallocator.h" +#include "btreetraits.h" +#include "noaggrcalc.h" + +namespace search +{ + +namespace btree +{ + +template +class BTreeAggregator +{ +public: + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeInternalNode + InternalNodeType; + typedef BTreeLeafNode + LeafNodeType; + typedef AggrT AggregatedType; + + static void + recalc(LeafNodeType &node, const AggrCalcT &aggrCalc); + + static void + recalc(LeafNodeType &node, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) + { + (void) allocator; + recalc(node, aggrCalc); + } + + static void + recalc(InternalNodeType &node, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc); + + static AggregatedType + recalc(LeafNodeType &node, + LeafNodeType &splitNode, + const AggrCalcT &aggrCalc); + + static AggregatedType + recalc(InternalNodeType &node, + InternalNodeType &splitNode, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc); +}; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp b/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp new file mode 100644 index 00000000000..9f9183b72cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeaggregator.hpp @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeaggregator.h" + +namespace search +{ + +namespace btree +{ + +template +void +BTreeAggregator:: +recalc(LeafNodeType &node, const AggrCalcT &aggrCalc) +{ + AggrT a; + for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) { + aggrCalc.add(a, aggrCalc.getVal(node.getData(i))); + } + node.getAggregated() = a; +} + + +template +void +BTreeAggregator:: +recalc(InternalNodeType &node, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) +{ + AggrT a; + for (uint32_t i = 0, ie = node.validSlots(); i < ie; ++i) { + const BTreeNode::Ref childRef = node.getChild(i); + const AggrT &ca(allocator.getAggregated(childRef)); + aggrCalc.add(a, ca); + } + node.getAggregated() = a; +} + + +template +typename BTreeAggregator::AggregatedType +BTreeAggregator:: +recalc(LeafNodeType &node, + LeafNodeType &splitNode, + const AggrCalcT &aggrCalc) +{ + AggrT a; + recalc(node, aggrCalc); + recalc(splitNode, aggrCalc); + a = node.getAggregated(); + aggrCalc.add(a, splitNode.getAggregated()); + return a; +} + + +template +typename BTreeAggregator::AggregatedType +BTreeAggregator:: + recalc(InternalNodeType &node, + InternalNodeType &splitNode, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) +{ + AggrT a; + recalc(node, allocator, aggrCalc); + recalc(splitNode, allocator, aggrCalc); + a = node.getAggregated(); + aggrCalc.add(a, splitNode.getAggregated()); + return a; +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp b/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp new file mode 100644 index 00000000000..418f2d8665c --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreebuilder.h" +#include "btreenode.hpp" +#include "btreebuilder.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeBuilder; +template class BTreeBuilder; +template class BTreeBuilder; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.h b/searchlib/src/vespa/searchlib/btree/btreebuilder.h new file mode 100644 index 00000000000..b68a4e440d5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.h @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include "btreerootbase.h" +#include "btreenodeallocator.h" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" +#include "btreeaggregator.h" + +namespace search +{ + +namespace btree +{ + +template +class BTreeBuilder +{ +public: + typedef BTreeNodeAllocator NodeAllocatorType; + typedef typename NodeAllocatorType::BTreeRootBaseType BTreeRootBaseType; + typedef typename NodeAllocatorType::InternalNodeType InternalNodeType; + typedef typename NodeAllocatorType::LeafNodeType LeafNodeType; + typedef BTreeAggregator Aggregator; +private: + typedef KeyT KeyType; + typedef DataT DataType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef BTreeNode::Ref NodeRef; + + NodeAllocatorType &_allocator; + int _numInternalNodes; + int _numLeafNodes; + uint32_t _numInserts; + std::vector _inodes; + LeafNodeTypeRefPair _leaf; + AggrCalcT _defaultAggrCalc; + const AggrCalcT &_aggrCalc; + + void + normalize(void); + + void + allocNewLeafNode(void); + + InternalNodeType * + createInternalNode(void); +public: + BTreeBuilder(NodeAllocatorType &allocator); + + BTreeBuilder(NodeAllocatorType &allocator, const AggrCalcT &aggrCalc); + + ~BTreeBuilder(void); + + void + recursiveDelete(NodeRef node); + + void + insert(const KeyT &key, const DataT &data); + + NodeRef + handover(void); + + void + reuse(void); + + void + clear(void); +}; + +extern template class BTreeBuilder; +extern template class BTreeBuilder; +extern template class BTreeBuilder; + +} // namespace btree + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp b/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp new file mode 100644 index 00000000000..25c24a75561 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreebuilder.hpp @@ -0,0 +1,459 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreebuilder.h" + +namespace search +{ + +namespace btree +{ + +template +BTreeBuilder:: +BTreeBuilder(NodeAllocatorType &allocator) + : _allocator(allocator), + _numInternalNodes(0), + _numLeafNodes(0), + _numInserts(0), + _inodes(), + _leaf(), + _defaultAggrCalc(), + _aggrCalc(_defaultAggrCalc) +{ + _leaf = _allocator.allocLeafNode(); + ++_numLeafNodes; +} + + +template +BTreeBuilder:: +BTreeBuilder(NodeAllocatorType &allocator, const AggrCalcT &aggrCalc) + : _allocator(allocator), + _numInternalNodes(0), + _numLeafNodes(0), + _numInserts(0), + _inodes(), + _leaf(), + _defaultAggrCalc(), + _aggrCalc(aggrCalc) +{ + _leaf = _allocator.allocLeafNode(); + ++_numLeafNodes; +} + + +template +BTreeBuilder:: +~BTreeBuilder(void) +{ + clear(); +} + + +template +void +BTreeBuilder:: +recursiveDelete(NodeRef node) +{ + assert(_allocator.isValidRef(node)); + if (_allocator.isLeafRef(node)) { + _allocator.holdNode(node, _allocator.mapLeafRef(node)); + _numLeafNodes--; + return; + } + InternalNodeType *inode = _allocator.mapInternalRef(node); + for (unsigned int i = 0; i < inode->validSlots(); ++i) { + recursiveDelete(inode->getChild(i)); + } + _allocator.holdNode(node, inode); + _numInternalNodes--; +} + + +template +void +BTreeBuilder:: +normalize(void) +{ + std::vector leftInodes; // left to rightmost nodes in tree + LeafNodeType *leftLeaf; + NodeRef child; + unsigned int level; + LeafNodeType *leafNode = _leaf.second; + + if (_inodes.size() == 0) { + if (leafNode->validSlots() == 0) { + assert(_numLeafNodes == 1); + assert(_numInserts == 0); + _allocator.holdNode(_leaf.first, _leaf.second); + _numLeafNodes--; + _leaf = std::make_pair(NodeRef(), + static_cast(NULL)); + } + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leafNode, _aggrCalc); + } + assert(_numInserts == leafNode->validSlots()); + return; + } + + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leafNode, _aggrCalc); + } + /* Adjust validLeaves for rightmost nodes */ + for (level = 0; level < _inodes.size(); level++) { + InternalNodeType *inode = _inodes[level].second; + NodeRef lcRef(inode->getLastChild()); + assert(NodeAllocatorType::isValidRef(lcRef)); + assert((level == 0) == _allocator.isLeafRef(lcRef)); + inode->incValidLeaves(_allocator.validLeaves(inode->getLastChild())); + inode->update(inode->validSlots() - 1, + level == 0 ? + _allocator.mapLeafRef(lcRef)->getLastKey() : + _allocator.mapInternalRef(lcRef)->getLastKey(), + lcRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*inode, _allocator, _aggrCalc); + } + } + for (level = 0; level + 1 < _inodes.size(); level++) { + leftInodes.push_back(NodeRef()); + } + /* Build vector of left to rightmost internal nodes (except root level) */ + level = _inodes.size() - 1; + for (;;) { + NodeRef iRef = _inodes[level].first; + InternalNodeType *inode = _inodes[level].second; + if (inode->validSlots() < 2) { + /* Use last child of left to rightmost node on level */ + assert(level + 1 < _inodes.size()); + iRef = leftInodes[level]; + inode = _allocator.mapInternalRef(iRef); + assert(inode != NULL); + assert(inode->validSlots() >= 1); + child = inode->getLastChild(); + } else { + /* Use next to last child of rightmost node on level */ + child = inode->getChild(inode->validSlots() - 2); + } + if (level == 0) + break; + level--; + assert(!_allocator.isLeafRef(child)); + leftInodes[level] = child; + } + /* Remember left to rightmost leaf node */ + assert(_allocator.isLeafRef(child)); + leftLeaf = _allocator.mapLeafRef(child); + + /* Check fanout on rightmost leaf node */ + if (leafNode->validSlots() < LeafNodeType::minSlots()) { + InternalNodeType *pnode = _inodes[0].second; + if (leftLeaf->validSlots() + leafNode->validSlots() < + 2 * LeafNodeType::minSlots()) { + leftLeaf->stealAllFromRightNode(leafNode); + if (pnode->validSlots() == 1) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[0]); + lpnode->incValidLeaves(pnode->validLeaves()); + pnode->setValidLeaves(0); + } + /* Unlink from parent node */ + pnode->remove(pnode->validSlots() - 1); + _allocator.holdNode(_leaf.first, leafNode); + _numLeafNodes--; + _leaf = std::make_pair(child, leftLeaf); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftLeaf, _aggrCalc); + } + } else { + leafNode->stealSomeFromLeftNode(leftLeaf); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftLeaf, _aggrCalc); + Aggregator::recalc(*leafNode, _aggrCalc); + } + if (pnode->validSlots() == 1) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[0]); + uint32_t steal = leafNode->validLeaves() - + pnode->validLeaves(); + pnode->incValidLeaves(steal); + lpnode->decValidLeaves(steal); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*lpnode, _allocator, _aggrCalc); + Aggregator::recalc(*pnode, _allocator, _aggrCalc); + } + } + } + if (pnode->validSlots() > 0) { + uint32_t s = pnode->validSlots() - 1; + LeafNodeType *l = _allocator.mapLeafRef(pnode->getChild(s)); + pnode->writeKey(s, l->getLastKey()); + if (s > 0) { + --s; + l = _allocator.mapLeafRef(pnode->getChild(s)); + pnode->writeKey(s, l->getLastKey()); + } + } + if (!leftInodes.empty() && _allocator.isValidRef(leftInodes[0])) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[0]); + uint32_t s = lpnode->validSlots() - 1; + LeafNodeType *l = _allocator.mapLeafRef(lpnode->getChild(s)); + lpnode->writeKey(s, l->getLastKey()); + } + } + + /* Check fanout on rightmost internal nodes except root node */ + for (level = 0; level + 1 < _inodes.size(); level++) { + InternalNodeType *inode = _inodes[level].second; + NodeRef leftInodeRef = leftInodes[level]; + assert(NodeAllocatorType::isValidRef(leftInodeRef)); + InternalNodeType *leftInode = _allocator.mapInternalRef(leftInodeRef); + + InternalNodeType *pnode = _inodes[level + 1].second; + if (inode->validSlots() < InternalNodeType::minSlots()) { + if (leftInode->validSlots() + inode->validSlots() < + 2 * InternalNodeType::minSlots()) { + leftInode->stealAllFromRightNode(inode); + if (pnode->validSlots() == 1) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[level + 1]); + lpnode->incValidLeaves(pnode->validLeaves()); + pnode->setValidLeaves(0); + } + /* Unlink from parent node */ + pnode->remove(pnode->validSlots() - 1); + _allocator.holdNode(_inodes[level].first, inode); + _numInternalNodes--; + _inodes[level] = std::make_pair(leftInodeRef, leftInode); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftInode, _allocator, _aggrCalc); + } + } else { + inode->stealSomeFromLeftNode(leftInode, _allocator); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftInode, _allocator, _aggrCalc); + Aggregator::recalc(*inode, _allocator, _aggrCalc); + } + if (pnode->validSlots() == 1) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[level + 1]); + uint32_t steal = inode->validLeaves() - + pnode->validLeaves(); + pnode->incValidLeaves(steal); + lpnode->decValidLeaves(steal); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*lpnode, _allocator, _aggrCalc); + Aggregator::recalc(*pnode, _allocator, _aggrCalc); + } + } + } + } + if (pnode->validSlots() > 0) { + uint32_t s = pnode->validSlots() - 1; + InternalNodeType *n = + _allocator.mapInternalRef(pnode->getChild(s)); + pnode->writeKey(s, n->getLastKey()); + if (s > 0) { + --s; + n = _allocator.mapInternalRef(pnode->getChild(s)); + pnode->writeKey(s, n->getLastKey()); + } + } + if (level + 1 < leftInodes.size() && + _allocator.isValidRef(leftInodes[level + 1])) { + InternalNodeType *lpnode = + _allocator.mapInternalRef(leftInodes[level + 1]); + uint32_t s = lpnode->validSlots() - 1; + InternalNodeType *n = + _allocator.mapInternalRef(lpnode->getChild(s)); + lpnode->writeKey(s, n->getLastKey()); + } + } + /* Check fanout on root node */ + assert(level < _inodes.size()); + InternalNodeType *inode = _inodes[level].second; + assert(inode != NULL); + assert(inode->validSlots() >= 1); + if (inode->validSlots() == 1) { + /* Remove top level from proposed tree since fanout is 1 */ + NodeRef iRef = _inodes[level].first; + _inodes.pop_back(); + _allocator.holdNode(iRef, inode); + _numInternalNodes--; + } + if (!_inodes.empty()) { + assert(_numInserts == _inodes.back().second->validLeaves()); + } else { + assert(_numInserts == _leaf.second->validLeaves()); + } +} + + +template +void +BTreeBuilder:: +allocNewLeafNode(void) +{ + InternalNodeType *inode; + NodeRef child; + + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*_leaf.second, _aggrCalc); + } + LeafNodeTypeRefPair lPair(_allocator.allocLeafNode()); + _numLeafNodes++; + + child = lPair.first; + + unsigned int level = 0; + for (;;) { + if (level >= _inodes.size()) { + InternalNodeTypeRefPair iPair( + _allocator.allocInternalNode(level + 1)); + inode = iPair.second; + _numInternalNodes++; + if (level > 0) { + InternalNodeType *cnode = _inodes[level - 1].second; + inode->insert(0, cnode->getLastKey(), + _inodes[level - 1].first); + inode->setValidLeaves(cnode->validLeaves()); + } else { + inode->insert(0, _leaf.second->getLastKey(), _leaf.first); + inode->setValidLeaves(_leaf.second->validLeaves()); + } + inode->insert(1, KeyType(), child); + _inodes.push_back(iPair); + break; + } + inode = _inodes[level].second; + assert(inode->validSlots() > 0); + NodeRef lcRef(inode->getLastChild()); + inode->incValidLeaves(_allocator.validLeaves(lcRef)); + inode->update(inode->validSlots() - 1, + level == 0 ? + _allocator.mapLeafRef(lcRef)->getLastKey() : + _allocator.mapInternalRef(lcRef)->getLastKey(), + lcRef); + if (inode->validSlots() >= InternalNodeType::maxSlots()) { + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*inode, _allocator, _aggrCalc); + } + InternalNodeTypeRefPair iPair( + _allocator.allocInternalNode(level + 1)); + inode = iPair.second; + _numInternalNodes++; + inode->insert(0, KeyType(), child); + child = iPair.first; + level++; + continue; + } + inode->insert(inode->validSlots(), KeyType(), child); + break; + } + while (level > 0) { + assert(inode->validSlots() > 0); + child = inode->getLastChild(); + assert(!_allocator.isLeafRef(child)); + inode = _allocator.mapInternalRef(child); + level--; + _inodes[level] = std::make_pair(child, inode); + } + _leaf = lPair; +} + + +template +void +BTreeBuilder:: +insert(const KeyT &key, + const DataT &data) +{ + if (_leaf.second->validSlots() >= LeafNodeType::maxSlots()) + allocNewLeafNode(); + LeafNodeType *leaf = _leaf.second; + leaf->insert(leaf->validSlots(), key, data); + ++_numInserts; +} + + +template +typename BTreeBuilder::NodeRef +BTreeBuilder:: +handover(void) +{ + NodeRef ret; + + normalize(); + + if (!_inodes.empty()) + ret = _inodes.back().first; + else + ret = _leaf.first; + + _leaf = std::make_pair(NodeRef(), + static_cast(NULL)); + + _inodes.clear(); + _numInternalNodes = 0; + _numLeafNodes = 0; + return ret; +} + + +template +void +BTreeBuilder:: +reuse(void) +{ + clear(); + _leaf = _allocator.allocLeafNode(); + ++_numLeafNodes; + _numInserts = 0u; +} + + +template +void +BTreeBuilder:: +clear(void) +{ + if (!_inodes.empty()) { + recursiveDelete(_inodes.back().first); + _leaf = std::make_pair(NodeRef(), + static_cast(NULL)); + _inodes.clear(); + } + if (NodeAllocatorType::isValidRef(_leaf.first)) { + assert(_leaf.second != NULL); + assert(_numLeafNodes == 1); + _allocator.holdNode(_leaf.first, _leaf.second); + --_numLeafNodes; + _leaf = std::make_pair(NodeRef(), + static_cast(NULL)); + } else { + assert(_leaf.second == NULL); + } + assert(_numLeafNodes == 0); + assert(_numInternalNodes == 0); +} + + +} // namespace btree + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp new file mode 100644 index 00000000000..cf67831fde1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreeinserter.h" +#include "btreenodeallocator.h" +#include "btreerootbase.hpp" +#include "btreeinserter.hpp" +#include "btreenode.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeInserter; +template class BTreeInserter; +template class BTreeInserter, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.h b/searchlib/src/vespa/searchlib/btree/btreeinserter.h new file mode 100644 index 00000000000..d0d01892500 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include "btreenodeallocator.h" +#include "btreerootbase.h" +#include "btreeaggregator.h" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" +#include "btreeiterator.h" + +namespace search +{ + +namespace btree +{ + +template , + typename TraitsT = BTreeDefaultTraits, + class AggrCalcT = NoAggrCalc> +class BTreeInserter +{ +public: + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeAggregator Aggregator; + typedef BTreeIterator Iterator; + typedef BTreeInternalNode + InternalNodeType; + typedef BTreeLeafNode + LeafNodeType; + typedef KeyT KeyType; + typedef DataT DataType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + + static void + insert(BTreeNode::Ref &root, + Iterator &itr, + const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc); +}; + +extern template class BTreeInserter; +extern template class BTreeInserter; +extern template class BTreeInserter, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp new file mode 100644 index 00000000000..597f75aa5ef --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeinserter.hpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeinserter.h" +#include "btreerootbase.hpp" +#include "btreeiterator.hpp" +#include + +namespace search { +namespace btree { + + +template +void +BTreeInserter:: +insert(BTreeNode::Ref &root, + Iterator &itr, + const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc) +{ + if (!NodeAllocatorType::isValidRef(root)) { + root = itr.insertFirst(key, data, aggrCalc); + return; + } + NodeAllocatorType &allocator(itr.getAllocator()); + bool inRange = itr.valid(); + if (!inRange) { + --itr; + } + root = itr.thaw(root); + uint32_t idx = itr.getLeafNodeIdx() + (inRange ? 0 : 1); + LeafNodeType * lnode = itr.getLeafNode(); + BTreeNode::Ref splitNodeRef; + const KeyT *splitLastKey = NULL; + bool inRightSplit = false; + AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT()); + AggrT ca; + if (lnode->isFull()) { + LeafNodeTypeRefPair splitNode = allocator.allocLeafNode(); + lnode->splitInsert(splitNode.second, idx, key, data); + if (AggrCalcT::hasAggregated()) { + ca = Aggregator::recalc(*lnode, *splitNode.second, aggrCalc); + } + splitNodeRef = splitNode.first; // to signal that a split occured + splitLastKey = &splitNode.second->getLastKey(); + inRightSplit = itr.setLeafNodeIdx(idx, splitNode.second); + } else { + lnode->insert(idx, key, data); + itr.setLeafNodeIdx(idx); + if (AggrCalcT::hasAggregated()) { + aggrCalc.add(lnode->getAggregated(), aggrCalc.getVal(data)); + ca = lnode->getAggregated(); + } + } + const KeyT *lastKey = &lnode->getLastKey(); + uint32_t level = 0; + uint32_t levels = itr.getPathSize(); + for (; level < levels; ++level) { + typename Iterator::PathElement &pe = itr.getPath(level); + InternalNodeType *node(pe.getWNode()); + idx = pe.getIdx(); + AggrT olda(AggrCalcT::hasAggregated() ? + node->getAggregated() : AggrT()); + BTreeNode::Ref subNode = node->getChild(idx); + node->update(idx, *lastKey, subNode); + node->incValidLeaves(1); + if (NodeAllocatorType::isValidRef(splitNodeRef)) { + idx++; // the extra node is inserted in the next slot + if (node->isFull()) { + InternalNodeTypeRefPair splitNode = + allocator.allocInternalNode(level + 1); + node->splitInsert(splitNode.second, idx, + *splitLastKey, splitNodeRef, allocator); + inRightSplit = pe.adjustSplit(inRightSplit, splitNode.second); + if (AggrCalcT::hasAggregated()) { + ca = Aggregator::recalc(*node, *splitNode.second, + allocator, aggrCalc); + } + splitNodeRef = splitNode.first; + splitLastKey = &splitNode.second->getLastKey(); + } else { + node->insert(idx, *splitLastKey, splitNodeRef); + pe.adjustSplit(inRightSplit); + inRightSplit = false; + if (AggrCalcT::hasAggregated()) { + aggrCalc.add(node->getAggregated(), oldca, ca); + ca = node->getAggregated(); + } + splitNodeRef = BTreeNode::Ref(); + splitLastKey = NULL; + } + } else { + if (AggrCalcT::hasAggregated()) { + aggrCalc.add(node->getAggregated(), oldca, ca); + ca = node->getAggregated(); + } + } + if (AggrCalcT::hasAggregated()) { + oldca = olda; + } + lastKey = &node->getLastKey(); + } + if (NodeAllocatorType::isValidRef(splitNodeRef)) { + root = itr.addLevel(root, splitNodeRef, inRightSplit, aggrCalc); + } +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp new file mode 100644 index 00000000000..cdc9895d8aa --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.cpp @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreeroot.h" +#include "btreenodeallocator.h" +#include "btreeiterator.hpp" +#include "btreenode.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeIteratorBase; +template class BTreeIteratorBase; +template class BTreeIteratorBase; +template class BTreeConstIterator; +template class BTreeConstIterator; +template class BTreeConstIterator; +template class BTreeIterator; +template class BTreeIterator; +template class BTreeIterator; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.h b/searchlib/src/vespa/searchlib/btree/btreeiterator.h new file mode 100644 index 00000000000..ef8c3babb25 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.h @@ -0,0 +1,885 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include "btreenodeallocator.h" +#include "btreetraits.h" + +namespace search { +namespace btree { + +template +class BTreeInserter; +template +class BTreeRemoverBase; +template +class BTreeRemover; +template +class BTreeIterator; + +/** + * Helper class to provide internal or leaf node and position within node. + */ +template +class NodeElement +{ + template + friend class BTreeInserter; + template + friend class BTreeRemoverBase; + template + friend class BTreeRemover; + template + friend class BTreeIterator; + + typedef NodeT NodeType; + typedef typename NodeType::KeyType KeyType; + typedef typename NodeType::DataType DataType; + const NodeType *_node; + uint32_t _idx; + + NodeType * + getWNode(void) const + { + return const_cast(_node); + } + +public: + NodeElement(void) + : _node(NULL), + _idx(0u) + { + } + + NodeElement(const NodeType *node, uint32_t idx) + : _node(node), + _idx(idx) + { + } + + void + setNode(const NodeType *node) + { + _node = node; + } + + const NodeType * + getNode(void) const + { + return _node; + } + + void + setIdx(uint32_t idx) + { + _idx = idx; + } + + uint32_t + getIdx(void) const + { + return _idx; + } + + void + incIdx(void) + { + ++_idx; + } + + void + decIdx(void) + { + --_idx; + } + + void + setNodeAndIdx(const NodeType *node, uint32_t idx) + { + _node = node; + _idx = idx; + } + + const KeyType & + getKey() const + { + return _node->getKey(_idx); + } + + const DataType & + getData() const + { + return _node->getData(_idx); + } + + bool + valid() const + { + return _node != NULL; + } + + void + adjustLeftVictimKilled(void) + { + assert(_idx > 0); + --_idx; + } + + void + adjustSteal(uint32_t stolen) + { + assert(_idx + stolen < _node->validSlots()); + _idx += stolen; + } + + void + adjustSplit(bool inRightSplit) + { + if (inRightSplit) + ++_idx; + } + + bool + adjustSplit(bool inRightSplit, const NodeType *splitNode) + { + adjustSplit(inRightSplit); + if (_idx >= _node->validSlots()) { + _idx -= _node->validSlots(); + _node = splitNode; + return true; + } + return false; + } + + void + swap(NodeElement &rhs) + { + std::swap(_node, rhs._node); + std::swap(_idx, rhs._idx); + } + + bool + operator!=(const NodeElement &rhs) const + { + return _node != rhs._node || + _idx != rhs._idx; + } +}; + + +/** + * Base class for B-tree iterators. It defines all members needed + * for the iterator and methods that don't depend on tree ordering. + */ +template +class BTreeIteratorBase +{ +protected: + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode LeafNodeType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef BTreeLeafNodeTemp LeafNodeTempType; + typedef BTreeKeyData KeyDataType; + typedef KeyT KeyType; + typedef DataT DataType; + template + friend class BTreeInserter; + template + friend class BTreeRemoverBase; + template + friend class BTreeRemover; + + typedef NodeElement LeafElement; + + /** + * Current leaf node and current index within it. + */ + LeafElement _leaf; + /** + * Pointer to internal node and index to the child used to + * traverse down the tree + */ + typedef NodeElement PathElement; + /** + * Path from current leaf node up to the root (path[0] is the + * parent of the leaf node) + */ + PathElement _path[PATH_SIZE]; + size_t _pathSize; + + const NodeAllocatorType *_allocator; + + const LeafNodeType *_leafRoot; // Root node for small tree/array + + // Temporary leaf node when iterating over short arrays + std::unique_ptr _compatLeafNode; + +private: + /* + * Find the next leaf node, called by operator++() as needed. + */ + void findNextLeafNode(); + + /* + * Find the previous leaf node, called by operator--() as needed. + */ + VESPA_DLL_LOCAL void findPrevLeafNode(); + +protected: + /* + * Report current position in tree. + * + * @param pidx Number of levels above leaf nodes to take into account. + */ + size_t + position(uint32_t pidx) const; + + /** + * Create iterator pointing to first element in the tree referenced + * by root. + * + * @param root Reference to root of tree + * @param allocator B-tree node allocator helper class. + */ + BTreeIteratorBase(BTreeNode::Ref root, const NodeAllocatorType &allocator); + + /** + * Compability constructor, creating a temporary tree with only a + * temporary leaf node owned by the iterator. + */ + template + BTreeIteratorBase(const KeyDataType *shortArray, + uint32_t arraySize, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc); + + /** + * Default constructor. Iterator is not associated with a tree. + */ + BTreeIteratorBase(); + + /** + * Step iterator forwards. If at end then leave it at end. + */ + BTreeIteratorBase & + operator++() { + if (_leaf.getNode() == NULL) { + return *this; + } + _leaf.incIdx(); + if (_leaf.getIdx() < _leaf.getNode()->validSlots()) { + return *this; + } + findNextLeafNode(); + return *this; + } + + /** + * Step iterator backwards. If at end then place it at last valid + * position in tree (cf. rbegin()) + */ + BTreeIteratorBase & + operator--(); + + ~BTreeIteratorBase(); + BTreeIteratorBase(const BTreeIteratorBase &other); + BTreeIteratorBase &operator=(const BTreeIteratorBase &other); + + + /** + * Set new tree height and clear portions of path that are now + * beyond new tree height. For internal use only. + * + * @param pathSize New tree height (number of levels of internal nodes) + */ + void + clearPath(uint32_t pathSize); +public: + + bool + operator==(const BTreeIteratorBase & rhs) const { + if (_leaf.getNode() != rhs._leaf.getNode() || + _leaf.getIdx() != rhs._leaf.getIdx()) { + return false; + } + return true; + } + + bool + operator!=(const BTreeIteratorBase & rhs) const + { + return !operator==(rhs); + } + + /** + * Swap iterator with the other. + * + * @param rhs Other iterator. + */ + void + swap(BTreeIteratorBase & rhs); + + /** + * Get key at current iterator location. + */ + const KeyType & + getKey() const + { + return _leaf.getKey(); + } + + /** + * Get data at current iterator location. + */ + const DataType & + getData() const + { + return _leaf.getData(); + } + + /** + * Check if iterator is at a valid element, i.e. not at end. + */ + bool + valid() const + { + return _leaf.valid(); + } + + /** + * Return the number of elements in the tree. + */ + size_t + size() const; + + + /** + * Return the current position in the tree. + */ + size_t + position() const + { + return position(_pathSize); + } + + /** + * Return the distance between two positions in the tree. + */ + ssize_t + operator-(const BTreeIteratorBase &rhs) const; + + /** + * Return if the tree has data or not (e.g. keys and data or only keys). + */ + static bool + hasData(void) + { + return LeafNodeType::hasData(); + } + + /** + * Move the iterator directly to end. Used by findHelper method in BTree. + */ + void + setupEnd(void); + + /** + * Setup iterator to be empty and not be associated with any tree. + */ + void + setupEmpty(void); + + /** + * Move iterator to beyond last element in the current tree. + */ + void + end(void) __attribute__((noinline)); + + /** + * Move iterator to beyond last element in the given tree. + * + * @param rootRef Reference to root of tree. + */ + void + end(BTreeNode::Ref rootRef); + + /** + * Move iterator to first element in the current tree. + */ + void + begin(void); + + /** + * Move iterator to first element in the given tree. + * + * @param rootRef Reference to root of tree. + */ + void + begin(BTreeNode::Ref rootRef); + + /** + * Move iterator to last element in the current tree. + */ + void + rbegin(void); + + /* + * Get aggregated values for the current tree. + */ + const AggrT & + getAggregated(void) const; + + bool + identical(const BTreeIteratorBase &rhs) const; + + template + void + foreach_key(FunctionType func) const + { + if (_pathSize > 0) { + _path[_pathSize - 1].getNode()-> + foreach_key(_allocator->getNodeStore(), func); + } else if (_leafRoot != nullptr) { + _leafRoot->foreach_key(func); + } + } +}; + + +/** + * Iterator class for read access to B-trees. It defines methods to + * navigate in the tree, useable for implementing search iterators and + * for positioning in preparation for tree changes (cf. BTreeInserter and + * BTreeRemover). + */ +template , + typename TraitsT = BTreeDefaultTraits> +class BTreeConstIterator : public BTreeIteratorBase +{ +protected: + typedef BTreeIteratorBase ParentType; + typedef typename ParentType::NodeAllocatorType NodeAllocatorType; + typedef typename ParentType::InternalNodeType InternalNodeType; + typedef typename ParentType::LeafNodeType LeafNodeType; + typedef typename ParentType::InternalNodeTypeRefPair + InternalNodeTypeRefPair; + typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair; + typedef typename ParentType::LeafNodeTempType LeafNodeTempType; + typedef typename ParentType::KeyDataType KeyDataType; + typedef typename ParentType::KeyType KeyType; + typedef typename ParentType::DataType DataType; + typedef typename ParentType::PathElement PathElement; + + using ParentType::_leaf; + using ParentType::_path; + using ParentType::_pathSize; + using ParentType::_allocator; + using ParentType::_leafRoot; + using ParentType::_compatLeafNode; + using ParentType::clearPath; + using ParentType::setupEmpty; +public: + using ParentType::end; + +protected: + /** Pointer to seek node and path index to the parent node **/ + typedef std::pair SeekNode; + +public: + /** + * Create iterator pointing to first element in the tree referenced + * by root. + * + * @param root Reference to root of tree + * @param allocator B-tree node allocator helper class. + */ + BTreeConstIterator(BTreeNode::Ref root, const NodeAllocatorType &allocator) + : ParentType(root, allocator) + { + } + + /** + * Compability constructor, creating a temporary tree with only a + * temporary leaf node owned by the iterator. + */ + template + BTreeConstIterator(const KeyDataType *shortArray, + uint32_t arraySize, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) + : ParentType(shortArray, arraySize, allocator, aggrCalc) + { + } + + /** + * Default constructor. Iterator is not associated with a tree. + */ + BTreeConstIterator() + : ParentType() + { + } + + /** + * Step iterator forwards. If at end then leave it at end. + */ + BTreeConstIterator & + operator++() + { + ParentType::operator++(); + return *this; + } + + /** + * Step iterator backwards. If at end then place it at last valid + * position in tree (cf. rbegin()) + */ + BTreeConstIterator & + operator--() + { + ParentType::operator--(); + return *this; + } + + /** + * Position iterator at first position with a key that is greater + * than or equal to the key argument. The iterator must be set up + * for the same tree before this method is called. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + lower_bound(const KeyType & key, CompareT comp = CompareT()); + + /** + * Position iterator at first position with a key that is greater + * than or equal to the key argument in the tree referenced by rootRef. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + lower_bound(BTreeNode::Ref rootRef, + const KeyType & key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than or equal to the key argument. Original + * position must be valid with a key that is less than the key argument. + * + * Tree traits determine if binary or linear search is performed within + * each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + seek(const KeyType &key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than or equal to the key argument. Original + * position must be valid with a key that is less than the key argument. + * + * Binary search is performed within each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + binarySeek(const KeyType &key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than or equal to the key argument. Original + * position must be valid with a key that is less than the key argument. + * + * Linear search is performed within each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + linearSeek(const KeyType &key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than the key argument. Original position must + * be valid with a key that is less than or equal to the key argument. + * + * Tree traits determine if binary or linear search is performed within + * each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + seekPast(const KeyType &key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than the key argument. Original position must + * be valid with a key that is less than or equal to the key argument. + * + * Binary search is performed within each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + binarySeekPast(const KeyType &key, CompareT comp = CompareT()); + + /** + * Step iterator forwards until it is at a position with a key + * that is greater than the key argument. Original position must + * be valid with a key that is less than or equal to the key argument. + * + * Linear search is performed within each tree node. + * + * @param key Key to search for + * @param comp Comparator for the tree ordering. + */ + void + linearSeekPast(const KeyType &key, CompareT comp = CompareT()); + + /** + * Validate the iterator as a valid iterator or positioned at + * end in the tree referenced by rootRef. Validation failure + * triggers asserts. This method is for internal debugging use only. + * + * @param rootRef Reference to root of tree to operate on + * @param comp Comparator for the tree ordering. + */ + void + validate(BTreeNode::Ref rootRef, CompareT comp = CompareT()); +}; + + +/** + * Iterator class for write access to B-trees. It contains some helper + * methods used by BTreeInserter and BTreeRemover when modifying a tree. + */ +template , + typename TraitsT = BTreeDefaultTraits> +class BTreeIterator : public BTreeConstIterator +{ +public: + typedef BTreeConstIterator ParentType; + typedef typename ParentType::NodeAllocatorType NodeAllocatorType; + typedef typename ParentType::InternalNodeType InternalNodeType; + typedef typename ParentType::LeafNodeType LeafNodeType; + typedef typename ParentType::InternalNodeTypeRefPair + InternalNodeTypeRefPair; + typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair; + typedef typename ParentType::LeafNodeTempType LeafNodeTempType; + typedef typename ParentType::KeyDataType KeyDataType; + typedef typename ParentType::KeyType KeyType; + typedef typename ParentType::DataType DataType; + typedef typename ParentType::PathElement PathElement; + template + friend class BTreeInserter; + template + friend class BTreeRemoverBase; + template + friend class BTreeRemover; + + using ParentType::_leaf; + using ParentType::_path; + using ParentType::_pathSize; + using ParentType::_allocator; + using ParentType::_leafRoot; + using ParentType::_compatLeafNode; + using ParentType::end; + + BTreeIterator(BTreeNode::Ref root, const NodeAllocatorType &allocator) + : ParentType(root, allocator) + { + } + + template + BTreeIterator(const KeyDataType *shortArray, + uint32_t arraySize, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) + : ParentType(shortArray, arraySize, allocator, aggrCalc) + { + } + + BTreeIterator() + : ParentType() + { + } + + BTreeIterator & + operator++() + { + ParentType::operator++(); + return *this; + } + + BTreeIterator & + operator--() + { + ParentType::operator--(); + return *this; + } + + NodeAllocatorType & + getAllocator(void) const + { + return const_cast(*_allocator); + } + + BTreeNode::Ref + moveFirstLeafNode(BTreeNode::Ref rootRef); + + void + moveNextLeafNode(); + + void + writeData(const DataType &data) + { + _leaf.getWNode()->writeData(_leaf.getIdx(), data); + } + + /** + * Set a new key for the current iterator position. + * The new key must have the same semantic meaning as the old key. + * Typically used when compacting data store containing keys. + */ + void + writeKey(const KeyType &key); + + /** + * Updata data at the current iterator position. The tree should + * have been thawed. + * + * @param data New data value + * @param aggrCalc Calculator for updating aggregated information. + */ + template + void + updateData(const DataType &data, const AggrCalcT &aggrCalc); + + /** + * Thaw a path from the root node down the the current leaf node in + * the current tree, allowing for updates to be performed without + * disturbing the frozen version of the tree. + */ + BTreeNode::Ref + thaw(BTreeNode::Ref rootRef); + +private: + /* Insert into empty tree */ + template + BTreeNode::Ref + insertFirst(const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc); + + LeafNodeType * + getLeafNode(void) const + { + return _leaf.getWNode(); + } + + bool + setLeafNodeIdx(uint32_t idx, const LeafNodeType *splitLeafNode); + + void + setLeafNodeIdx(uint32_t idx) + { + _leaf.setIdx(idx); + } + + uint32_t + getLeafNodeIdx(void) const + { + return _leaf.getIdx(); + } + + uint32_t + getPathSize(void) const + { + return _pathSize; + } + + PathElement & + getPath(uint32_t pidx) + { + return _path[pidx]; + } + + template + BTreeNode::Ref + addLevel(BTreeNode::Ref rootRef, BTreeNode::Ref splitNodeRef, + bool inRightSplit, const AggrCalcT &aggrCalc); + + BTreeNode::Ref + removeLevel(BTreeNode::Ref rootRef, InternalNodeType *rootNode); + + void + removeLast(BTreeNode::Ref rootRef); + + void + adjustSteal(uint32_t level, bool leftVictimKilled, uint32_t stolen) + { + assert(_pathSize > level); + if (leftVictimKilled) { + _path[level].adjustLeftVictimKilled(); + } + if (stolen != 0) { + if (level > 0) + _path[level - 1].adjustSteal(stolen); + else + _leaf.adjustSteal(stolen); + } + } +}; + + +extern template class BTreeIteratorBase; +extern template class BTreeIteratorBase; +extern template class BTreeIteratorBase; +extern template class BTreeConstIterator; +extern template class BTreeConstIterator; +extern template class BTreeConstIterator; +extern template class BTreeIterator; +extern template class BTreeIterator; +extern template class BTreeIterator; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp new file mode 100644 index 00000000000..d9439f94bfb --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeiterator.hpp @@ -0,0 +1,1330 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeiterator.h" +#include "btreeaggregator.h" +#include + +namespace search { +namespace btree { + +#define STRICT_BTREE_ITERATOR_SEEK + +namespace { + +template +vespalib::string +keyToStr(const KeyT & key) +{ + vespalib::asciistream ss; + ss << key; + return ss.str(); +} + +} + +template +BTreeIteratorBase:: +BTreeIteratorBase(const BTreeIteratorBase &other) + : _leaf(other._leaf), + _pathSize(other._pathSize), + _allocator(other._allocator), + _leafRoot(other._leafRoot), + _compatLeafNode() +{ + for (size_t i = 0; i < _pathSize; ++i) { + _path[i] = other._path[i]; + } + if (other._compatLeafNode.get()) { + _compatLeafNode.reset( new LeafNodeTempType(*other._compatLeafNode)); + } + if (other._leaf.getNode() == other._compatLeafNode.get()) { + _leaf.setNode(_compatLeafNode.get()); + } + if (other._leafRoot == other._compatLeafNode.get()) { + _leafRoot = _compatLeafNode.get(); + } +} + +template +void +BTreeIteratorBase:: +swap(BTreeIteratorBase & other) +{ + std::swap(_leaf, other._leaf); + std::swap(_pathSize, other._pathSize); + std::swap(_path, other._path); + std::swap(_allocator, other._allocator); + std::swap(_leafRoot, other._leafRoot); + std::swap(_compatLeafNode, other._compatLeafNode); +} + + +template +void +BTreeIteratorBase:: +clearPath(uint32_t pathSize) +{ + uint32_t level = _pathSize; + while (level > pathSize) { + --level; + _path[level].setNodeAndIdx(NULL, 0u); + } + _pathSize = pathSize; +} + + +template +BTreeIteratorBase & +BTreeIteratorBase:: +operator=(const BTreeIteratorBase &other) +{ + if (&other == this) { + return *this; + } + BTreeIteratorBase tmp(other); + swap(tmp); + return *this; +} + +template +BTreeIteratorBase:: +~BTreeIteratorBase() +{ +} + +template +void +BTreeIteratorBase:: +setupEnd(void) +{ + _leaf.setNodeAndIdx(NULL, 0u); +} + + +template +void +BTreeIteratorBase:: +setupEmpty(void) +{ + clearPath(0u); + _leaf.setNodeAndIdx(NULL, 0u); + _leafRoot = NULL; +} + + +template +void +BTreeIteratorBase:: +end(void) +{ + if (_pathSize == 0) { + if (_leafRoot == NULL) + return; + _leaf.setNodeAndIdx(NULL, 0u); + return; + } + uint32_t level = _pathSize - 1; + PathElement &pe = _path[level]; + const InternalNodeType *inode = pe.getNode(); + uint32_t idx = inode->validSlots(); + pe.setIdx(idx); + BTreeNode::Ref childRef = inode->getChild(idx - 1); + while (level > 0) { + --level; + assert(!_allocator->isLeafRef(childRef)); + inode = _allocator->mapInternalRef(childRef); + idx = inode->validSlots(); + _path[level].setNodeAndIdx(inode, idx); + childRef = inode->getChild(idx - 1); + assert(childRef.valid()); + } + assert(_allocator->isLeafRef(childRef)); + _leaf.setNodeAndIdx(NULL, 0u); +} + + +template +void +BTreeIteratorBase:: +end(BTreeNode::Ref rootRef) +{ + if (!rootRef.valid()) { + setupEmpty(); + return; + } + if (_allocator->isLeafRef(rootRef)) { + clearPath(0u); + const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef); + _leafRoot = lnode; + _leaf.setNodeAndIdx(NULL, 0u); + return; + } + _leafRoot = NULL; + const InternalNodeType *inode = _allocator->mapInternalRef(rootRef); + uint32_t idx = inode->validSlots(); + uint32_t pidx = inode->getLevel(); + clearPath(pidx); + --pidx; + assert(pidx < PATH_SIZE); + _path[pidx].setNodeAndIdx(inode, idx); + BTreeNode::Ref childRef = inode->getChild(idx - 1); + assert(childRef.valid()); + while (pidx != 0) { + --pidx; + inode = _allocator->mapInternalRef(childRef); + idx = inode->validSlots(); + assert(idx > 0u); + _path[pidx].setNodeAndIdx(inode, idx); + childRef = inode->getChild(idx - 1); + assert(childRef.valid()); + } + _leaf.setNodeAndIdx(NULL, 0u); +} + + +template +void +BTreeIteratorBase:: +findNextLeafNode() +{ + uint32_t pidx; + for (pidx = 0; pidx < _pathSize; ++pidx) { + PathElement & elem = _path[pidx]; + const InternalNodeType * inode = elem.getNode(); + elem.incIdx(); // advance to the next child + if (elem.getIdx() < inode->validSlots()) { + BTreeNode::Ref node = inode->getChild(elem.getIdx()); + while (pidx > 0) { + // find the first leaf node under this child and update path + inode = _allocator->mapInternalRef(node); + pidx--; + _path[pidx].setNodeAndIdx(inode, 0u); + node = inode->getChild(0); + } + _leaf.setNodeAndIdx(_allocator->mapLeafRef(node), 0u); + return; + } + } + _leaf.setNodeAndIdx(NULL, 0u); +} + + +template +void +BTreeIteratorBase:: +findPrevLeafNode() +{ + uint32_t pidx; + for (pidx = 0; pidx < _pathSize; ++pidx) { + PathElement & elem = _path[pidx]; + const InternalNodeType * inode = elem.getNode(); + if (elem.getIdx() > 0u) { + elem.decIdx(); // advance to the previous child + BTreeNode::Ref node = inode->getChild(elem.getIdx()); + while (pidx > 0) { + // find the last leaf node under this child and update path + inode = _allocator->mapInternalRef(node); + uint16_t slot = inode->validSlots() - 1; + pidx--; + _path[pidx].setNodeAndIdx(inode, slot); + node = inode->getChild(slot); + } + const LeafNodeType *lnode(_allocator->mapLeafRef(node)); + _leaf.setNodeAndIdx(lnode, lnode->validSlots() - 1); + return; + } + } + // XXX: position wraps around for now, to end of list. + end(); +} + + +template +void +BTreeIteratorBase:: +begin(void) +{ + uint32_t pidx = _pathSize; + if (pidx > 0u) { + --pidx; + PathElement &elem = _path[pidx]; + elem.setIdx(0); + BTreeNode::Ref node = elem.getNode()->getChild(0); + while (pidx > 0) { + // find the first leaf node under this child and update path + const InternalNodeType * inode = _allocator->mapInternalRef(node); + pidx--; + _path[pidx].setNodeAndIdx(inode, 0u); + node = inode->getChild(0); + } + _leaf.setNodeAndIdx(_allocator->mapLeafRef(node), 0u); + } else { + _leaf.setNodeAndIdx(_leafRoot, 0u); + } +} + + +template +void +BTreeIteratorBase:: +begin(BTreeNode::Ref rootRef) +{ + if (!rootRef.valid()) { + setupEmpty(); + return; + } + if (_allocator->isLeafRef(rootRef)) { + clearPath(0u); + const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef); + _leafRoot = lnode; + _leaf.setNodeAndIdx(lnode, 0u); + return; + } + _leafRoot = NULL; + const InternalNodeType *inode = _allocator->mapInternalRef(rootRef); + uint32_t pidx = inode->getLevel(); + clearPath(pidx); + --pidx; + assert(pidx < PATH_SIZE); + _path[pidx].setNodeAndIdx(inode, 0); + BTreeNode::Ref childRef = inode->getChild(0); + assert(childRef.valid()); + while (pidx != 0) { + --pidx; + inode = _allocator->mapInternalRef(childRef); + _path[pidx].setNodeAndIdx(inode, 0); + childRef = inode->getChild(0); + assert(childRef.valid()); + } + _leaf.setNodeAndIdx(_allocator->mapLeafRef(childRef), 0u); +} + + +template +void +BTreeIteratorBase:: +rbegin(void) +{ + uint32_t pidx = _pathSize; + if (pidx > 0u) { + --pidx; + PathElement &elem = _path[pidx]; + const InternalNodeType * inode = elem.getNode(); + uint16_t slot = inode->validSlots() - 1; + elem.setIdx(slot); + BTreeNode::Ref node = inode->getChild(slot); + while (pidx > 0) { + // find the last leaf node under this child and update path + inode = _allocator->mapInternalRef(node); + slot = inode->validSlots() - 1; + pidx--; + _path[pidx].setNodeAndIdx(inode, slot); + node = inode->getChild(slot); + } + const LeafNodeType *lnode(_allocator->mapLeafRef(node)); + _leaf.setNodeAndIdx(lnode, lnode->validSlots() - 1); + } else { + _leaf.setNodeAndIdx(_leafRoot, + (_leafRoot != NULL) ? + _leafRoot->validSlots() - 1 : + 0u); + } +} + + +template +const AggrT & +BTreeIteratorBase:: +getAggregated(void) const +{ + // XXX: Undefined behavior if tree is empty. + uint32_t pidx = _pathSize; + if (pidx > 0u) { + return _path[pidx - 1].getNode()->getAggregated(); + } else if (_leafRoot != NULL) { + return _leafRoot->getAggregated(); + } else { + return LeafNodeType::getEmptyAggregated(); + } +} + + +template +size_t +BTreeIteratorBase:: +position(uint32_t levels) const +{ + assert(_pathSize >= levels); + if (_leaf.getNode() == NULL) + return size(); + size_t res = _leaf.getIdx(); + if (levels == 0) + return res; + { + const PathElement & elem = _path[0]; + const InternalNodeType * inode = elem.getNode(); + uint32_t slots = inode->validSlots(); + if (elem.getIdx() * 2 > slots) { + res += inode->validLeaves(); + for (uint32_t c = elem.getIdx(); c < slots; ++c) { + BTreeNode::Ref node = inode->getChild(c); + const LeafNodeType *lnode = _allocator->mapLeafRef(node); + res -= lnode->validSlots(); + } + } else { + for (uint32_t c = 0; c < elem.getIdx(); ++c) { + BTreeNode::Ref node = inode->getChild(c); + const LeafNodeType *lnode = _allocator->mapLeafRef(node); + res += lnode->validSlots(); + } + } + } + for (uint32_t pidx = 1; pidx < levels; ++pidx) { + const PathElement & elem = _path[pidx]; + const InternalNodeType * inode = elem.getNode(); + uint32_t slots = inode->validSlots(); + if (elem.getIdx() * 2 > slots) { + res += inode->validLeaves(); + for (uint32_t c = elem.getIdx(); c < slots; ++c) { + BTreeNode::Ref node = inode->getChild(c); + const InternalNodeType *jnode = + _allocator->mapInternalRef(node); + res -= jnode->validLeaves(); + } + } else { + for (uint32_t c = 0; c < elem.getIdx(); ++c) { + BTreeNode::Ref node = inode->getChild(c); + const InternalNodeType *jnode = + _allocator->mapInternalRef(node); + res += jnode->validLeaves(); + } + } + } + return res; +} + + +template +BTreeIteratorBase:: +BTreeIteratorBase(BTreeNode::Ref root, + const NodeAllocatorType &allocator) + : _leaf(NULL, 0u), + _path(), + _pathSize(0), + _allocator(&allocator), + _leafRoot(NULL), + _compatLeafNode() +{ + begin(root); +} + + +template +template +BTreeIteratorBase:: +BTreeIteratorBase(const KeyDataType *shortArray, + uint32_t arraySize, + const NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc) + : _leaf(NULL, 0u), + _path(), + _pathSize(0), + _allocator(&allocator), + _leafRoot(NULL), + _compatLeafNode() +{ + if(arraySize > 0) { + _compatLeafNode.reset(new LeafNodeTempType(shortArray, arraySize)); + _leaf.setNode(_compatLeafNode.get()); + _leafRoot = _leaf.getNode(); + typedef BTreeAggregator Aggregator; + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(const_cast(*_leaf.getNode()), + aggrCalc); + } + } +} + + +template +BTreeIteratorBase:: +BTreeIteratorBase() + : _leaf(NULL, 0u), + _path(), + _pathSize(0), + _allocator(NULL), + _leafRoot(NULL), + _compatLeafNode() +{ +} + + +template +BTreeIteratorBase & +BTreeIteratorBase:: +operator--() +{ + if (_leaf.getNode() == NULL) { + rbegin(); + return *this; + } + if (_leaf.getIdx() > 0u) { + _leaf.decIdx(); + return *this; + } + findPrevLeafNode(); + return *this; +} + + +template +size_t +BTreeIteratorBase:: +size() const +{ + if (_pathSize > 0) { + return _path[_pathSize - 1].getNode()->validLeaves(); + } + if (_leafRoot != nullptr) { + return _leafRoot->validSlots(); + } + return 0u; +} + + +template +ssize_t +BTreeIteratorBase:: +operator-(const BTreeIteratorBase &rhs) const +{ + if (_leaf.getNode() == NULL) { + if (rhs._leaf.getNode() == NULL) + return 0; + // *this might not be normalized (i.e. default constructor) + return rhs.size() - rhs.position(rhs._pathSize); + } else if (rhs._leaf.getNode() == NULL) { + // rhs might not be normalized (i.e. default constructor) + return position(_pathSize) - size(); + } + assert(_pathSize == rhs._pathSize); + if (_pathSize != 0) { + uint32_t pidx = _pathSize; + while (pidx > 0) { + assert(_path[pidx - 1].getNode() == rhs._path[pidx - 1].getNode()); + if (_path[pidx - 1].getIdx() != rhs._path[pidx - 1].getIdx()) + break; + --pidx; + } + return position(pidx) - rhs.position(pidx); + } else { + assert(_leaf.getNode() == NULL || rhs._leaf.getNode() == NULL || + _leaf.getNode() == rhs._leaf.getNode()); + return position(0) - rhs.position(0); + } +} + + +template +bool +BTreeIteratorBase:: +identical(const BTreeIteratorBase &rhs) const +{ + if (_pathSize != rhs._pathSize || _leaf != rhs._leaf) { + abort(); + return false; + } + for (uint32_t level = 0; level < _pathSize; ++level) { + if (_path[level] != rhs._path[level]) { + abort(); + return false; + } + } + if (_leafRoot != rhs._leafRoot) { + abort(); + return false; + } + return true; +} + + +template +void +BTreeConstIterator:: +lower_bound(const KeyType & key, CompareT comp) +{ + if (_pathSize == 0) { + if (_leafRoot == NULL) + return; + uint32_t idx = _leafRoot->template lower_bound(key, comp); + if (idx >= _leafRoot->validSlots()) { + _leaf.setNodeAndIdx(NULL, 0u); + } else { + _leaf.setNodeAndIdx(_leafRoot, idx); + } + return; + } + uint32_t level = _pathSize - 1; + PathElement &pe = _path[level]; + const InternalNodeType *inode = pe.getNode(); + uint32_t idx = inode->template lower_bound(key, comp); + if (__builtin_expect(idx >= inode->validSlots(), false)) { + end(); + return; + } + pe.setIdx(idx); + BTreeNode::Ref childRef = inode->getChild(idx); + while (level > 0) { + --level; + assert(!_allocator->isLeafRef(childRef)); + inode = _allocator->mapInternalRef(childRef); + idx = inode->template lower_bound(key, comp); + assert(idx < inode->validSlots()); + _path[level].setNodeAndIdx(inode, idx); + childRef = inode->getChild(idx); + assert(childRef.valid()); + } + assert(_allocator->isLeafRef(childRef)); + const LeafNodeType *lnode = _allocator->mapLeafRef(childRef); + idx = lnode->template lower_bound(key, comp); + assert(idx < lnode->validSlots()); + _leaf.setNodeAndIdx(lnode, idx); +} + + +template +void +BTreeConstIterator:: +lower_bound(BTreeNode::Ref rootRef, const KeyType & key, CompareT comp) +{ + if (!rootRef.valid()) { + setupEmpty(); + return; + } + if (_allocator->isLeafRef(rootRef)) { + clearPath(0u); + const LeafNodeType *lnode = _allocator->mapLeafRef(rootRef); + _leafRoot = lnode; + uint32_t idx = lnode->template lower_bound(key, comp); + if (idx >= lnode->validSlots()) { + _leaf.setNodeAndIdx(NULL, 0u); + } else { + _leaf.setNodeAndIdx(lnode, idx); + } + return; + } + _leafRoot = NULL; + const InternalNodeType *inode = _allocator->mapInternalRef(rootRef); + uint32_t idx = inode->template lower_bound(key, comp); + if (idx >= inode->validSlots()) { + end(rootRef); + return; + } + uint32_t pidx = inode->getLevel(); + clearPath(pidx); + --pidx; + assert(pidx < TraitsT::PATH_SIZE); + _path[pidx].setNodeAndIdx(inode, idx); + BTreeNode::Ref childRef = inode->getChild(idx); + assert(childRef.valid()); + while (pidx != 0) { + --pidx; + inode = _allocator->mapInternalRef(childRef); + idx = inode->template lower_bound(key, comp); + assert(idx < inode->validSlots()); + _path[pidx].setNodeAndIdx(inode, idx); + childRef = inode->getChild(idx); + assert(childRef.valid()); + } + const LeafNodeType *lnode = _allocator->mapLeafRef(childRef); + idx = lnode->template lower_bound(key, comp); + assert(idx < lnode->validSlots()); + _leaf.setNodeAndIdx(lnode, idx); +} + + +template +void +BTreeConstIterator:: +seek(const KeyType & key, CompareT comp) +{ + if (TraitsT::BINARY_SEEK) { + binarySeek(key, comp); + } else { + linearSeek(key, comp); + } +} + +template +void +BTreeConstIterator:: +binarySeek(const KeyType & key, CompareT comp) +{ + const LeafNodeType *lnode = _leaf.getNode(); + uint32_t lidx = _leaf.getIdx(); +#ifdef STRICT_BTREE_ITERATOR_SEEK + assert(_leaf.valid() && comp(lnode->getKey(lidx), key)); +#endif + ++lidx; + if (lidx < lnode->validSlots()) { + if (!comp(lnode->getKey(lidx), key)) { + _leaf.setIdx(lidx); + return; + } else { + ++lidx; + } + } + if (comp(lnode->getLastKey(), key)) { + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels && + comp(_path[level].getNode()->getLastKey(), key)) + ++level; + if (__builtin_expect(level >= levels, false)) { + end(); + return; + } else { + const InternalNodeType *node = _path[level].getNode(); + uint32_t idx = _path[level].getIdx(); + idx = node->template lower_bound(idx + 1, key, comp); + _path[level].setIdx(idx); + while (level > 0) { + --level; + node = _allocator->mapInternalRef(node->getChild(idx)); + idx = node->template lower_bound(0, key, comp); + _path[level].setNodeAndIdx(node, idx); + } + lnode = _allocator->mapLeafRef(node->getChild(idx)); + _leaf.setNode(lnode); + lidx = 0; + } + } + lidx = lnode->template lower_bound(lidx, key, comp); + _leaf.setIdx(lidx); +} + +template +void +BTreeConstIterator:: +linearSeek(const KeyType & key, CompareT comp) +{ + const LeafNodeType *lnode = _leaf.getNode(); + uint32_t lidx = _leaf.getIdx(); +#ifdef STRICT_BTREE_ITERATOR_SEEK + assert(_leaf.valid() && comp(lnode->getKey(lidx), key)); +#endif + ++lidx; + if (lidx < lnode->validSlots()) { + if (!comp(lnode->getKey(lidx), key)) { + _leaf.setIdx(lidx); + return; + } else { + ++lidx; + } + } + if (comp(lnode->getLastKey(), key)) { + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels && + comp(_path[level].getNode()->getLastKey(), key)) + ++level; + if (__builtin_expect(level >= levels, false)) { + end(); + return; + } else { + const InternalNodeType *node = _path[level].getNode(); + uint32_t idx = _path[level].getIdx(); + do { + ++idx; + } while (comp(node->getKey(idx), key)); + _path[level].setIdx(idx); + while (level > 0) { + --level; + node = _allocator->mapInternalRef(node->getChild(idx)); + idx = 0; + while (comp(node->getKey(idx), key)) { + ++idx; + } + _path[level].setNodeAndIdx(node, idx); + } + lnode = _allocator->mapLeafRef(node->getChild(idx)); + _leaf.setNode(lnode); + lidx = 0; + } + } + while (comp(lnode->getKey(lidx), key)) { + ++lidx; + } + _leaf.setIdx(lidx); +} + +template +void +BTreeConstIterator:: +seekPast(const KeyType & key, CompareT comp) +{ + if (TraitsT::BINARY_SEEK) { + binarySeekPast(key, comp); + } else { + linearSeekPast(key, comp); + } +} + +template +void +BTreeConstIterator:: +binarySeekPast(const KeyType & key, CompareT comp) +{ + const LeafNodeType *lnode = _leaf.getNode(); + uint32_t lidx = _leaf.getIdx(); +#ifdef STRICT_BTREE_ITERATOR_SEEK + assert(_leaf.valid() && !comp(key, lnode->getKey(lidx))); +#endif + ++lidx; + if (lidx < lnode->validSlots()) { + if (comp(key, lnode->getKey(lidx))) { + _leaf.setIdx(lidx); + return; + } else { + ++lidx; + } + } + if (!comp(key, lnode->getLastKey())) { + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels && + !comp(key, _path[level].getNode()->getLastKey())) + ++level; + if (__builtin_expect(level >= levels, false)) { + end(); + return; + } else { + const InternalNodeType *node = _path[level].getNode(); + uint32_t idx = _path[level].getIdx(); + idx = node->template upper_bound(idx + 1, key, comp); + _path[level].setIdx(idx); + while (level > 0) { + --level; + node = _allocator->mapInternalRef(node->getChild(idx)); + idx = node->template upper_bound(0, key, comp); + _path[level].setNodeAndIdx(node, idx); + } + lnode = _allocator->mapLeafRef(node->getChild(idx)); + _leaf.setNode(lnode); + lidx = 0; + } + } + lidx = lnode->template upper_bound(lidx, key, comp); + _leaf.setIdx(lidx); +} + +template +void +BTreeConstIterator:: +linearSeekPast(const KeyType & key, CompareT comp) +{ + const LeafNodeType *lnode = _leaf.getNode(); + uint32_t lidx = _leaf.getIdx(); +#ifdef STRICT_BTREE_ITERATOR_SEEK + assert(_leaf.valid() && !comp(key, lnode->getKey(lidx))); +#endif + ++lidx; + if (lidx < lnode->validSlots()) { + if (comp(key, lnode->getKey(lidx))) { + _leaf.setIdx(lidx); + return; + } else { + ++lidx; + } + } + if (!comp(key, lnode->getLastKey())) { + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels && + !comp(key, _path[level].getNode()->getLastKey())) + ++level; + if (__builtin_expect(level >= levels, false)) { + end(); + return; + } else { + const InternalNodeType *node = _path[level].getNode(); + uint32_t idx = _path[level].getIdx(); + do { + ++idx; + } while (!comp(key, node->getKey(idx))); + _path[level].setIdx(idx); + while (level > 0) { + --level; + node = _allocator->mapInternalRef(node->getChild(idx)); + idx = 0; + while (!comp(key, node->getKey(idx))) { + ++idx; + } + _path[level].setNodeAndIdx(node, idx); + } + lnode = _allocator->mapLeafRef(node->getChild(idx)); + _leaf.setNode(lnode); + lidx = 0; + } + } + while (!comp(key, lnode->getKey(lidx))) { + ++lidx; + } + _leaf.setIdx(lidx); +} + + +template +void +BTreeConstIterator:: +validate(BTreeNode::Ref rootRef, CompareT comp) +{ + bool frozen = false; + if (!rootRef.valid()) { + assert(_pathSize == 0u); + assert(_leafRoot == NULL); + assert(_leaf.getNode() == NULL); + return; + } + uint32_t level = _pathSize; + BTreeNode::Ref nodeRef = rootRef; + const KeyT *parentKey = NULL; + const KeyT *leafKey = NULL; + if (_leaf.getNode() != NULL) { + leafKey = &_leaf.getNode()->getKey(_leaf.getIdx()); + } + while (level > 0) { + --level; + assert(!_allocator->isLeafRef(nodeRef)); + const PathElement &pe = _path[level]; + assert(pe.getNode() == _allocator->mapInternalRef(nodeRef)); + uint32_t idx = pe.getIdx(); + if (leafKey == NULL) { + assert(idx == 0 || + idx == pe.getNode()->validSlots()); + if (idx == pe.getNode()->validSlots()) + --idx; + } + assert(idx < pe.getNode()->validSlots()); + assert(!frozen || pe.getNode()->getFrozen()); + (void) frozen; + frozen = pe.getNode()->getFrozen(); + if (parentKey != NULL) { + assert(idx + 1 == pe.getNode()->validSlots() || + comp(pe.getNode()->getKey(idx), *parentKey)); + assert(!comp(*parentKey, pe.getNode()->getKey(idx))); + (void) comp; + } + if (leafKey != NULL) { + assert(idx == 0 || + comp(pe.getNode()->getKey(idx - 1), *leafKey)); + assert(idx + 1 == pe.getNode()->validSlots() || + comp(*leafKey, pe.getNode()->getKey(idx + 1))); + assert(!comp(pe.getNode()->getKey(idx), *leafKey)); + (void) comp; + } + parentKey = &pe.getNode()->getKey(idx); + nodeRef = pe.getNode()->getChild(idx); + assert(nodeRef.valid()); + } + assert(_allocator->isLeafRef(nodeRef)); + if (_pathSize == 0) { + assert(_leafRoot == _allocator->mapLeafRef(nodeRef)); + assert(_leaf.getNode() == NULL || _leaf.getNode() == _leafRoot); + } else { + assert(_leafRoot == NULL); + assert(_leaf.getNode() == _allocator->mapLeafRef(nodeRef) || + _leaf.getNode() == NULL); + } +} + + +template +BTreeNode::Ref +BTreeIterator:: +moveFirstLeafNode(BTreeNode::Ref rootRef) +{ + if (!NodeAllocatorType::isValidRef(rootRef)) { + assert(_pathSize == 0); + assert(_leaf.getNode() == NULL); + return rootRef; + } + + assert(_leaf.getNode() != NULL); + NodeAllocatorType &allocator = getAllocator(); + + if (_pathSize == 0) { + BTreeNode::Ref newRootRef = rootRef; + assert(_leaf.getNode() == allocator.mapLeafRef(rootRef)); + if (allocator.getCompacting(rootRef)) { + LeafNodeTypeRefPair lPair(allocator.moveLeafNode(_leaf.getNode())); + _leaf.setNode(lPair.second); + // Before updating root + std::atomic_thread_fence(std::memory_order_release); + newRootRef = lPair.first; + } + _leaf.setIdx(_leaf.getNode()->validSlots() - 1); + return newRootRef; + } + + uint32_t level = _pathSize; + BTreeNode::Ref newRootRef = rootRef; + + --level; + InternalNodeType *node = _path[level].getWNode(); + assert(node == allocator.mapInternalRef(rootRef)); + bool moved = allocator.getCompacting(rootRef); + if (moved) { + InternalNodeTypeRefPair iPair(allocator.moveInternalNode(node)); + newRootRef = iPair.first; + node = iPair.second; + } + _path[level].setNodeAndIdx(node, 0u); + while (level > 0) { + --level; + EntryRef nodeRef = node->getChild(0); + InternalNodeType *pnode = node; + node = allocator.mapInternalRef(nodeRef); + if (allocator.getCompacting(nodeRef)) { + InternalNodeTypeRefPair iPair = allocator.moveInternalNode(node); + nodeRef = iPair.first; + node = iPair.second; + pnode->setChild(0, nodeRef); + moved = true; + } + _path[level].setNodeAndIdx(node, 0u); + } + EntryRef nodeRef = node->getChild(0); + _leaf.setNode(allocator.mapLeafRef(nodeRef)); + if (allocator.getCompacting(nodeRef)) { + LeafNodeTypeRefPair + lPair(allocator.moveLeafNode(_leaf.getNode())); + _leaf.setNode(lPair.second); + node->setChild(0, lPair.first); + moved = true; + } + if (moved) { + // Before updating root + std::atomic_thread_fence(std::memory_order_release); + } + _leaf.setIdx(_leaf.getNode()->validSlots() - 1); + return newRootRef; +} + + +template +void +BTreeIterator:: +moveNextLeafNode() +{ + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels && + _path[level].getNode()->validSlots() <= _path[level].getIdx() + 1) + ++level; + if (__builtin_expect(level >= levels, false)) { + end(); + return; + } else { + NodeAllocatorType &allocator = getAllocator(); + InternalNodeType *node = _path[level].getWNode(); + uint32_t idx = _path[level].getIdx(); + ++idx; + _path[level].setIdx(idx); + while (level > 0) { + --level; + EntryRef nodeRef = node->getChild(idx); + InternalNodeType *pnode = node; + node = allocator.mapInternalRef(nodeRef); + if (allocator.getCompacting(nodeRef)) { + InternalNodeTypeRefPair iPair(allocator.moveInternalNode(node)); + nodeRef = iPair.first; + node = iPair.second; + std::atomic_thread_fence(std::memory_order_release); + pnode->setChild(idx, nodeRef); + } + idx = 0; + _path[level].setNodeAndIdx(node, idx); + } + EntryRef nodeRef = node->getChild(idx); + _leaf.setNode(allocator.mapLeafRef(nodeRef)); + if (allocator.getCompacting(nodeRef)) { + LeafNodeTypeRefPair lPair(allocator.moveLeafNode(_leaf.getNode())); + _leaf.setNode(lPair.second); + std::atomic_thread_fence(std::memory_order_release); + node->setChild(idx, lPair.first); + } + _leaf.setIdx(_leaf.getNode()->validSlots() - 1); + } +} + + +template +void +BTreeIterator:: +writeKey(const KeyType & key) +{ + LeafNodeType * lnode = getLeafNode(); + lnode->writeKey(_leaf.getIdx(), key); + // must also update the key towards the root as long as the key is + // the last one in the current node + if (_leaf.getIdx() + 1 == lnode->validSlots()) { + for (uint32_t i = 0; i < _pathSize; ++i) { + const PathElement & pe = _path[i]; + InternalNodeType *inode = pe.getWNode(); + uint32_t childIdx = pe.getIdx(); + inode->writeKey(childIdx, key); + if (childIdx + 1 != inode->validSlots()) { + break; + } + } + } +} + + +template +template +void +BTreeIterator:: +updateData(const DataType & data, const AggrCalcT &aggrCalc) +{ + LeafNodeType * lnode = getLeafNode(); + if (AggrCalcT::hasAggregated()) { + AggrT oldca(lnode->getAggregated()); + typedef BTreeAggregator Aggregator; + if (aggrCalc.update(lnode->getAggregated(), + aggrCalc.getVal(lnode->getData(_leaf.getIdx())), + aggrCalc.getVal(data))) { + lnode->writeData(_leaf.getIdx(), data); + Aggregator::recalc(*lnode, aggrCalc); + } else { + lnode->writeData(_leaf.getIdx(), data); + } + AggrT ca(lnode->getAggregated()); + // must also update aggregated values towards the root. + for (uint32_t i = 0; i < _pathSize; ++i) { + const PathElement & pe = _path[i]; + InternalNodeType * inode = pe.getWNode(); + AggrT oldpa(inode->getAggregated()); + if (aggrCalc.update(inode->getAggregated(), + oldca, ca)) { + Aggregator::recalc(*inode, *_allocator, aggrCalc); + } + AggrT pa(inode->getAggregated()); + oldca = oldpa; + ca = pa; + } + } else { + lnode->writeData(_leaf.getIdx(), data); + } +} + + +template +BTreeNode::Ref +BTreeIterator:: +thaw(BTreeNode::Ref rootRef) +{ + assert(_leaf.getNode() != NULL && _compatLeafNode.get() == NULL); + if (!_leaf.getNode()->getFrozen()) + return rootRef; + NodeAllocatorType &allocator = getAllocator(); + if (_pathSize == 0) { + LeafNodeType *leafNode = allocator.mapLeafRef(rootRef); + assert(leafNode == _leaf.getNode()); + assert(leafNode == _leafRoot); + LeafNodeTypeRefPair thawedLeaf = allocator.thawNode(rootRef, + leafNode); + _leaf.setNode(thawedLeaf.second); + _leafRoot = thawedLeaf.second; + return thawedLeaf.first; + } + assert(_leafRoot == NULL); + assert(_path[_pathSize - 1].getNode() == + allocator.mapInternalRef(rootRef)); + BTreeNode::Ref childRef(_path[0].getNode()->getChild(_path[0].getIdx())); + LeafNodeType *leafNode = allocator.mapLeafRef(childRef); + assert(leafNode == _leaf.getNode()); + LeafNodeTypeRefPair thawedLeaf = allocator.thawNode(childRef, + leafNode); + _leaf.setNode(thawedLeaf.second); + childRef = thawedLeaf.first; + uint32_t level = 0; + uint32_t levels = _pathSize; + while (level < levels) { + PathElement &pe = _path[level]; + InternalNodeType *node(pe.getWNode()); + BTreeNode::Ref nodeRef = level + 1 < levels ? + _path[level + 1].getNode()-> + getChild(_path[level + 1].getIdx()) : + rootRef; + assert(node == allocator.mapInternalRef(nodeRef)); + if (!node->getFrozen()) { + node->setChild(pe.getIdx(), childRef); + return rootRef; + } + InternalNodeTypeRefPair thawed = allocator.thawNode(nodeRef, node); + node = thawed.second; + pe.setNode(node); + node->setChild(pe.getIdx(), childRef); + childRef = thawed.first; + ++level; + } + return childRef; // Root node was thawed +} + + +template +template +BTreeNode::Ref +BTreeIterator:: +insertFirst(const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc) +{ + assert(_pathSize == 0); + assert(_leafRoot == NULL); + NodeAllocatorType &allocator = getAllocator(); + LeafNodeTypeRefPair lnode = allocator.allocLeafNode(); + lnode.second->insert(0, key, data); + if (AggrCalcT::hasAggregated()) { + AggrT a; + aggrCalc.add(a, aggrCalc.getVal(data)); + lnode.second->getAggregated() = a; + } + _leafRoot = lnode.second; + _leaf.setNodeAndIdx(lnode.second, 0u); + return lnode.first; +} + + +template +bool +BTreeIterator:: +setLeafNodeIdx(uint32_t idx, const LeafNodeType *splitLeafNode) +{ + uint32_t leafSlots = _leaf.getNode()->validSlots(); + if (idx >= leafSlots) { + _leaf.setNodeAndIdx(splitLeafNode, + idx - leafSlots); + if (_pathSize == 0) { + _leafRoot = splitLeafNode; + } + return true; + } else { + _leaf.setIdx(idx); + return false; + } +} + + +template +template +BTreeNode::Ref +BTreeIterator:: +addLevel(BTreeNode::Ref rootRef, BTreeNode::Ref splitNodeRef, + bool inRightSplit, const AggrCalcT &aggrCalc) +{ + typedef BTreeAggregator Aggregator; + + NodeAllocatorType &allocator(getAllocator()); + + InternalNodeTypeRefPair inodePair(allocator.allocInternalNode(_pathSize + 1)); + InternalNodeType *inode = inodePair.second; + inode->setValidLeaves(allocator.validLeaves(rootRef) + + allocator.validLeaves(splitNodeRef)); + inode->insert(0, allocator.getLastKey(rootRef), rootRef); + inode->insert(1, allocator.getLastKey(splitNodeRef), splitNodeRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*inode, allocator, aggrCalc); + } + _path[_pathSize].setNodeAndIdx(inode, inRightSplit ? 1u : 0u); + if (_pathSize == 0) { + _leafRoot = NULL; + } + ++_pathSize; + return inodePair.first; +} + + +template +BTreeNode::Ref +BTreeIterator:: +removeLevel(BTreeNode::Ref rootRef, InternalNodeType *rootNode) +{ + BTreeNode::Ref newRoot = rootNode->getChild(0); + NodeAllocatorType &allocator(getAllocator()); + allocator.holdNode(rootRef, rootNode); + --_pathSize; + _path[_pathSize].setNodeAndIdx(NULL, 0u); + if (_pathSize == 0) { + _leafRoot = _leaf.getNode(); + } + return newRoot; +} + + +template +void +BTreeIterator:: +removeLast(BTreeNode::Ref rootRef) +{ + NodeAllocatorType &allocator(getAllocator()); + allocator.holdNode(rootRef, getLeafNode()); + _leafRoot = NULL; + _leaf.setNode(NULL); +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.cpp b/searchlib/src/vespa/searchlib/btree/btreenode.cpp new file mode 100644 index 00000000000..9d8f6d686d5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenode.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreenode.h" +#include "btreenode.hpp" + +namespace search { +namespace btree { + +BTreeNoLeafData BTreeNoLeafData::_instance; + +NoAggregated BTreeNodeAggregatedWrap::_instance; +template <> +MinMaxAggregated BTreeNodeAggregatedWrap::_instance = + MinMaxAggregated(); + +template class BTreeNodeDataWrap; +template class BTreeNodeDataWrap; +template class BTreeKeyData; +template class BTreeKeyData; +template class BTreeNodeT; +template class BTreeNodeTT; +template class BTreeNodeTT; +template class BTreeNodeTT; +template class BTreeNodeTT; +template class BTreeInternalNode; +template class BTreeInternalNode; +template class BTreeLeafNode; +template class BTreeLeafNode; +template class BTreeLeafNode; +template class BTreeLeafNodeTemp; +template class BTreeLeafNodeTemp; +template class BTreeLeafNodeTemp; + +} // namespace search::btree +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.h b/searchlib/src/vespa/searchlib/btree/btreenode.h new file mode 100644 index 00000000000..c44743fceb0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenode.h @@ -0,0 +1,784 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "entryref.h" +#include +#include +#include +#include "noaggregated.h" +#include "minmaxaggregated.h" + +namespace search { +namespace btree { + +template +class BTreeNodeAllocator; +template class BTreeNodeStore; +template class BufferType; +template class DataStoreT; + +class NoAggregated; + +class BTreeNode { +private: + uint8_t _level; + bool _isFrozen; +public: + static constexpr uint8_t EMPTY_LEVEL = 255; + static constexpr uint8_t LEAF_LEVEL = 0; +protected: + uint16_t _validSlots; + BTreeNode(uint8_t level) + : _level(level), + _isFrozen(false), + _validSlots(0) + { + } + + BTreeNode(const BTreeNode &rhs) + : _level(rhs._level), + _isFrozen(rhs._isFrozen), + _validSlots(rhs._validSlots) + { + } + + BTreeNode & + operator=(const BTreeNode &rhs) + { + assert(!_isFrozen); + _level = rhs._level; + _isFrozen = rhs._isFrozen; + _validSlots = rhs._validSlots; + return *this; + } + + ~BTreeNode(void) + { + assert(_isFrozen); + } + +public: + typedef EntryRef Ref; + + bool isLeaf() const { return _level == 0u; } + + bool + getFrozen(void) const + { + return _isFrozen; + } + + void + freeze(void) + { + _isFrozen = true; + } + + void + unFreeze(void) + { + _isFrozen = false; + } + + + void + setLevel(uint8_t level) + { + _level = level; + } + + uint32_t getLevel() const { return _level; } + + uint32_t validSlots() const { return _validSlots; } + + void + setValidSlots(uint16_t validSlots_) + { + _validSlots = validSlots_; + } +}; + + +template +class BTreeNodeDataWrap +{ +public: + DataT _data[NumSlots]; + + BTreeNodeDataWrap(void) + : _data() + { + } + + void + copyData(const BTreeNodeDataWrap &rhs, uint32_t validSlots) + { + const DataT *rdata = rhs._data; + DataT *ldata = _data; + DataT *ldatae = _data + validSlots; + for (; ldata != ldatae; ++ldata, ++rdata) + *ldata = *rdata; + } + + const DataT & + getData(uint32_t idx) const + { + return _data[idx]; + } + + void + setData(uint32_t idx, const DataT &data) + { + _data[idx] = data; + } + + static bool + hasData(void) + { + return true; + } +}; + + +/** + * Empty class to use as DataT template parameter for BTree classes to + * indicate that leaf nodes have no data (similar to std::set having less + * information than std::map). Use of this class triggers the below + * partial specialization of BTreeNodeDataWrap to prevent unneeded + * storage overhead. + */ +class BTreeNoLeafData +{ +public: + static BTreeNoLeafData _instance; +}; + + +template +class BTreeNodeDataWrap +{ +public: + BTreeNodeDataWrap(void) + { + } + + void + copyData(const BTreeNodeDataWrap &rhs, uint32_t validSlots) + { + (void) rhs; + (void) validSlots; + } + + const BTreeNoLeafData & + getData(uint32_t idx) const + { + (void) idx; + return BTreeNoLeafData::_instance; + } + + void + setData(uint32_t idx, const BTreeNoLeafData &data) + { + (void) idx; + (void) data; + } + + static bool + hasData(void) + { + return false; + } +}; + + +template +class BTreeKeyData +{ +public: + typedef KeyT KeyType; + typedef DataT DataType; + + KeyT _key; + DataT _data; + + BTreeKeyData(void) + : _key(), + _data() + { + } + + BTreeKeyData(const KeyT &key, const DataT &data) + : _key(key), + _data(data) + { + } + + void + setData(const DataT &data) + { + _data = data; + } + + const DataT & + getData(void) const + { + return _data; + } + + /** + * This operator only works when using direct keys. References to + * externally stored keys will not be properly sorted. + */ + bool + operator<(const BTreeKeyData &rhs) const + { + return _key < rhs._key; + } +}; + + +template +class BTreeKeyData +{ +public: + typedef KeyT KeyType; + typedef BTreeNoLeafData DataType; + + KeyT _key; + + BTreeKeyData(void) + : _key() + { + } + + BTreeKeyData(const KeyT &key, const BTreeNoLeafData &data) + : _key(key) + { + (void) data; + } + + void + setData(const BTreeNoLeafData &data) + { + (void) data; + } + + const BTreeNoLeafData & + getData(void) const + { + return BTreeNoLeafData::_instance; + } + + /** + * This operator only works when using direct keys. References to + * externally stored keys will not be properly sorted. + */ + bool + operator<(const BTreeKeyData &rhs) const + { + return _key < rhs._key; + } +}; + + +template +class BTreeNodeAggregatedWrap +{ + typedef AggrT AggregatedType; + + AggrT _aggr; + static AggrT _instance; + +public: + BTreeNodeAggregatedWrap(void) + : _aggr() + { + } + + AggrT & + getAggregated(void) + { + return _aggr; + } + + const AggrT & + getAggregated(void) const + { + return _aggr; + } + + static const AggrT & + getEmptyAggregated(void) + { + return _instance; + } +}; + + +template <> +class BTreeNodeAggregatedWrap +{ + typedef NoAggregated AggregatedType; + + static NoAggregated _instance; +public: + BTreeNodeAggregatedWrap(void) + { + } + + NoAggregated & + getAggregated(void) + { + return _instance; + } + + const NoAggregated & + getAggregated(void) const + { + return _instance; + } + + static const NoAggregated & + getEmptyAggregated(void) + { + return _instance; + } +}; + + +template +class BTreeNodeT : public BTreeNode { +protected: + KeyT _keys[NumSlots]; + BTreeNodeT(uint8_t level) + : BTreeNode(level), + _keys() + { + } + + ~BTreeNodeT(void) + { + } + + BTreeNodeT(const BTreeNodeT &rhs) + : BTreeNode(rhs) + { + const KeyT *rkeys = rhs._keys; + KeyT *lkeys = _keys; + KeyT *lkeyse = _keys + _validSlots; + for (; lkeys != lkeyse; ++lkeys, ++rkeys) + *lkeys = *rkeys; + } + + BTreeNodeT & + operator=(const BTreeNodeT &rhs) + { + BTreeNode::operator=(rhs); + const KeyT *rkeys = rhs._keys; + KeyT *lkeys = _keys; + KeyT *lkeyse = _keys + _validSlots; + for (; lkeys != lkeyse; ++lkeys, ++rkeys) + *lkeys = *rkeys; + return *this; + } + +public: + const KeyT & getKey(uint32_t idx) const { return _keys[idx]; } + const KeyT & getLastKey() const { return _keys[validSlots() - 1]; } + void writeKey(uint32_t idx, const KeyT & key) { _keys[idx] = key; } + + template + uint32_t + lower_bound(uint32_t sidx, const KeyT & key, CompareT comp) const; + + template + uint32_t + lower_bound(const KeyT & key, CompareT comp) const; + + template + uint32_t + upper_bound(uint32_t sidx, const KeyT & key, CompareT comp) const; + + bool isFull() const { return validSlots() == NumSlots; } + bool isAtLeastHalfFull() const { return validSlots() >= minSlots(); } + static uint32_t maxSlots() { return NumSlots; } + static uint32_t minSlots() { return NumSlots / 2; } +}; + +template +class BTreeNodeTT : public BTreeNodeT, + public BTreeNodeDataWrap, + public BTreeNodeAggregatedWrap +{ +public: + typedef BTreeNodeT ParentType; + typedef BTreeNodeDataWrap DataWrapType; + typedef BTreeNodeAggregatedWrap AggrWrapType; + using ParentType::_validSlots; + using ParentType::validSlots; + using ParentType::getFrozen; + using ParentType::_keys; + using DataWrapType::getData; + using DataWrapType::setData; + using DataWrapType::copyData; +protected: + BTreeNodeTT(uint8_t level) + : ParentType(level), + DataWrapType() + { + } + + ~BTreeNodeTT(void) + { + } + + BTreeNodeTT(const BTreeNodeTT &rhs) + : ParentType(rhs), + DataWrapType(rhs), + AggrWrapType(rhs) + { + copyData(rhs, _validSlots); + } + + BTreeNodeTT & + operator=(const BTreeNodeTT &rhs) + { + ParentType::operator=(rhs); + AggrWrapType::operator=(rhs); + copyData(rhs, _validSlots); + return *this; + } + +public: + typedef BTreeNodeTT NodeType; + void insert(uint32_t idx, const KeyT & key, const DataT & data); + void update(uint32_t idx, const KeyT & key, const DataT & data) { + // assert(idx < NodeType::maxSlots()); + // assert(!getFrozen()); + _keys[idx] = key; + setData(idx, data); + } + void splitInsert(NodeType * splitNode, uint32_t idx, const KeyT & key, const DataT & data); + void remove(uint32_t idx); + void stealAllFromLeftNode(const NodeType * victim); + void stealAllFromRightNode(const NodeType * victim); + void stealSomeFromLeftNode(NodeType * victim); + void stealSomeFromRightNode(NodeType * victim); + void cleanRange(uint32_t from, uint32_t to); + void clean(void); + + void cleanFrozen(void); +}; + +template +class BTreeInternalNode : public BTreeNodeTT +{ +public: + typedef BTreeNodeTT ParentType; + typedef BTreeInternalNode InternalNodeType; + template + friend class BTreeNodeAllocator; + template + friend class BTreeNodeStore; + template + friend class BTreeNodeDataWrap; + template + friend class BufferType; + template + friend class DataStoreT; + typedef BTreeNode::Ref Ref; + typedef std::pair RefPair; + using ParentType::_keys; + using ParentType::validSlots; + using ParentType::_validSlots; + using ParentType::getFrozen; + using ParentType::getData; + using ParentType::setData; + using ParentType::setLevel; + using ParentType::EMPTY_LEVEL; + typedef KeyT KeyType; + typedef Ref DataType; +private: + uint32_t _validLeaves; + + BTreeInternalNode() + : ParentType(EMPTY_LEVEL), + _validLeaves(0u) + { + } + + + BTreeInternalNode(const BTreeInternalNode &rhs) + : ParentType(rhs), + _validLeaves(rhs._validLeaves) + { + } + + ~BTreeInternalNode(void) + { + } + + BTreeInternalNode & + operator=(const BTreeInternalNode &rhs) + { + ParentType::operator=(rhs); + _validLeaves = rhs._validLeaves; + return *this; + } +public: + BTreeNode::Ref + getChild(uint32_t idx) const + { + return getData(idx); + } + + void + setChild(uint32_t idx, BTreeNode::Ref child) + { + setData(idx, child); + } + + BTreeNode::Ref getLastChild() const { return getChild(validSlots() - 1); } + + uint32_t + validLeaves(void) const + { + return _validLeaves; + } + + void + setValidLeaves(uint32_t newValidLeaves) + { + _validLeaves = newValidLeaves; + } + + void + incValidLeaves(uint32_t delta) + { + _validLeaves += delta; + } + + void + decValidLeaves(uint32_t delta) + { + _validLeaves -= delta; + } + + template + void + splitInsert(BTreeInternalNode *splitNode, uint32_t idx, const KeyT &key, + const BTreeNode::Ref &data, + NodeAllocatorType &allocator); + + void + stealAllFromLeftNode(const BTreeInternalNode *victim); + + void + stealAllFromRightNode(const BTreeInternalNode *victim); + + template + void + stealSomeFromLeftNode(BTreeInternalNode *victim, + NodeAllocatorType &allocator); + + template + void + stealSomeFromRightNode(BTreeInternalNode *victim, + NodeAllocatorType &allocator); + + void + clean(void); + + void + cleanFrozen(void); + + template + void + foreach_key(NodeStoreType &store, FunctionType func) const + { + const BTreeNode::Ref *it = this->_data; + const BTreeNode::Ref *ite = it + _validSlots; + if (this->getLevel() > 1u) { + for (; it != ite; ++it) { + store.mapInternalRef(*it)->foreach_key(store, func); + } + } else { + for (; it != ite; ++it) { + store.mapLeafRef(*it)->foreach_key(func); + } + } + } + + template + void + foreach(NodeStoreType &store, FunctionType func) const + { + const BTreeNode::Ref *it = this->_data; + const BTreeNode::Ref *ite = it + _validSlots; + if (this->getLevel() > 1u) { + for (; it != ite; ++it) { + store.mapInternalRef(*it)->foreach(store, func); + } + } else { + for (; it != ite; ++it) { + store.mapLeafRef(*it)->foreach(func); + } + } + } +}; + +template +class BTreeLeafNode : public BTreeNodeTT +{ +public: + typedef BTreeNodeTT ParentType; + typedef BTreeLeafNode LeafNodeType; + template + friend class BTreeNodeAllocator; + template + friend class BTreeNodeStore; + template + friend class BufferType; + template + friend class DataStoreT; + typedef BTreeNode::Ref Ref; + typedef std::pair RefPair; + using ParentType::validSlots; + using ParentType::_validSlots; + using ParentType::_keys; + using ParentType::freeze; + using ParentType::stealSomeFromLeftNode; + using ParentType::stealSomeFromRightNode; + using ParentType::LEAF_LEVEL; + typedef BTreeKeyData KeyDataType; + typedef KeyT KeyType; + typedef DataT DataType; +private: + BTreeLeafNode() : ParentType(LEAF_LEVEL) {} + +protected: + BTreeLeafNode(const BTreeLeafNode &rhs) + : ParentType(rhs) + { + } + + BTreeLeafNode(const KeyDataType *smallArray, uint32_t arraySize); + + ~BTreeLeafNode(void) + { + } + + BTreeLeafNode & + operator=(const BTreeLeafNode &rhs) + { + ParentType::operator=(rhs); + return *this; + } +public: + template + void + stealSomeFromLeftNode(BTreeLeafNode *victim, + NodeAllocatorType &allocator) + { + (void) allocator; + stealSomeFromLeftNode(victim); + } + + template + void + stealSomeFromRightNode(BTreeLeafNode *victim, + NodeAllocatorType &allocator) + { + (void) allocator; + stealSomeFromRightNode(victim); + } + + const DataT & + getLastData() const + { + return this->getData(validSlots() - 1); + } + + void + writeData(uint32_t idx, const DataT &data) + { + this->setData(idx, data); + } + + uint32_t + validLeaves(void) const + { + return validSlots(); + } + + template + void + foreach_key(FunctionType func) const + { + const KeyT *it = _keys; + const KeyT *ite = it + _validSlots; + for (; it != ite; ++it) { + func(*it); + } + } + + template + void + foreach(FunctionType func) const + { + const KeyT *it = _keys; + const KeyT *ite = it + _validSlots; + uint32_t idx = 0; + for (; it != ite; ++it) { + func(*it, this->getData(idx++)); + } + } +}; + + +template +class BTreeLeafNodeTemp : public BTreeLeafNode +{ +public: + typedef BTreeLeafNode ParentType; + typedef typename ParentType::KeyDataType KeyDataType; + + BTreeLeafNodeTemp(const KeyDataType *smallArray, + uint32_t arraySize) + : ParentType(smallArray, arraySize) + { + } + + ~BTreeLeafNodeTemp(void) + { + } +}; + +extern template class BTreeNodeDataWrap; +extern template class BTreeNodeDataWrap; +extern template class BTreeKeyData; +extern template class BTreeKeyData; +extern template class BTreeNodeT; +extern template class BTreeNodeTT; +extern template class BTreeNodeTT; +extern template class BTreeNodeTT; +extern template class BTreeNodeTT; +extern template class BTreeInternalNode; +extern template class BTreeInternalNode; +extern template class BTreeLeafNode; +extern template class BTreeLeafNode; +extern template class BTreeLeafNode; +extern template class BTreeLeafNodeTemp; +extern template class BTreeLeafNodeTemp; +extern template class BTreeLeafNodeTemp; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreenode.hpp b/searchlib/src/vespa/searchlib/btree/btreenode.hpp new file mode 100644 index 00000000000..958b9e5fa5d --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenode.hpp @@ -0,0 +1,402 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include + +namespace search { +namespace btree { + +namespace { + +class SplitInsertHelper { +private: + uint32_t _idx; + uint32_t _median; + bool _medianBumped; +public: + SplitInsertHelper(uint32_t idx, uint32_t validSlots) : + _idx(idx), + _median(validSlots / 2), + _medianBumped(false) + { + if (idx > _median) { + _median++; + _medianBumped = true; + } + } + uint32_t getMedian() const { return _median; } + bool insertInSplitNode() const { + if (_median >= _idx && !_medianBumped) { + return false; + } + return true; + } +}; + + +} + +template +template +uint32_t +BTreeNodeT:: +lower_bound(uint32_t sidx, const KeyT & key, CompareT comp) const +{ + const KeyT * itr = std::lower_bound + (_keys + sidx, _keys + validSlots(), key, comp); + return itr - _keys; +} + +template +template +uint32_t +BTreeNodeT::lower_bound(const KeyT & key, CompareT comp) const +{ + + const KeyT * itr = std::lower_bound + (_keys, _keys + validSlots(), key, comp); + return itr - _keys; +} + + +template +template +uint32_t +BTreeNodeT:: +upper_bound(uint32_t sidx, const KeyT & key, CompareT comp) const +{ + const KeyT * itr = std::upper_bound + (_keys + sidx, _keys + validSlots(), key, comp); + return itr - _keys; +} + + +template +void +BTreeNodeTT::insert(uint32_t idx, + const KeyT &key, + const DataT &data) +{ + assert(validSlots() < NodeType::maxSlots()); + assert(!getFrozen()); + for (uint32_t i = validSlots(); i > idx; --i) { + _keys[i] = _keys[i - 1]; + setData(i, getData(i - 1)); + } + _keys[idx] = key; + setData(idx, data); + _validSlots++; +} + +template +void +BTreeNodeTT::splitInsert(NodeType *splitNode, + uint32_t idx, + const KeyT &key, + const DataT &data) +{ + assert(!getFrozen()); + assert(!splitNode->getFrozen()); + SplitInsertHelper sih(idx, validSlots()); + splitNode->_validSlots = validSlots() - sih.getMedian(); + for (uint32_t i = sih.getMedian(); i < validSlots(); ++i) { + splitNode->_keys[i - sih.getMedian()] = _keys[i]; + splitNode->setData(i - sih.getMedian(), getData(i)); + } + cleanRange(sih.getMedian(), validSlots()); + _validSlots = sih.getMedian(); + if (sih.insertInSplitNode()) { + splitNode->insert(idx - sih.getMedian(), key, data); + } else { + insert(idx, key, data); + } +} + +template +void +BTreeNodeTT::remove(uint32_t idx) +{ + assert(!getFrozen()); + for (uint32_t i = idx + 1; i < validSlots(); ++i) { + _keys[i - 1] = _keys[i]; + setData(i - 1, getData(i)); + } + _validSlots--; + _keys[validSlots()] = KeyT(); + setData(validSlots(), DataT()); +} + +template +void +BTreeNodeTT:: +stealAllFromLeftNode(const NodeType *victim) +{ + assert(validSlots() + victim->validSlots() <= NodeType::maxSlots()); + assert(!getFrozen()); + for (int i = validSlots() - 1; i >= 0; --i) { + _keys[i + victim->validSlots()] = _keys[i]; + setData(i + victim->validSlots(), getData(i)); + } + for (uint32_t i = 0; i < victim->validSlots(); ++i) { + _keys[i] = victim->_keys[i]; + setData(i, victim->getData(i)); + } + _validSlots += victim->validSlots(); +} + +template +void +BTreeNodeTT:: +stealAllFromRightNode(const NodeType *victim) +{ + assert(validSlots() + victim->validSlots() <= NodeType::maxSlots()); + assert(!getFrozen()); + for (uint32_t i = 0; i < victim->validSlots(); ++i) { + _keys[validSlots() + i] = victim->_keys[i]; + setData(validSlots() + i, victim->getData(i)); + } + _validSlots += victim->validSlots(); +} + +template +void +BTreeNodeTT:: +stealSomeFromLeftNode(NodeType *victim) +{ + assert(validSlots() + victim->validSlots() >= NodeType::minSlots()); + assert(!getFrozen()); + assert(!victim->getFrozen()); + uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t steal = median - validSlots(); + _validSlots += steal; + for (int32_t i = validSlots() - 1; i >= static_cast(steal); --i) { + _keys[i] = _keys[i - steal]; + setData(i, getData(i - steal)); + } + for (uint32_t i = 0; i < steal; ++i) { + _keys[i] = victim->_keys[victim->validSlots() - steal + i]; + setData(i, victim->getData(victim->validSlots() - steal + i)); + } + victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); + victim->_validSlots -= steal; +} + +template +void +BTreeNodeTT:: +stealSomeFromRightNode(NodeType *victim) +{ + assert(validSlots() + victim->validSlots() >= NodeType::minSlots()); + assert(!getFrozen()); + assert(!victim->getFrozen()); + uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t steal = median - validSlots(); + for (uint32_t i = 0; i < steal; ++i) { + _keys[validSlots() + i] = victim->_keys[i]; + setData(validSlots() + i, victim->getData(i)); + } + _validSlots += steal; + for (uint32_t i = steal; i < victim->validSlots(); ++i) { + victim->_keys[i - steal] = victim->_keys[i]; + victim->setData(i - steal, victim->getData(i)); + } + victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); + victim->_validSlots -= steal; +} + + +template +void +BTreeNodeTT::cleanRange(uint32_t from, + uint32_t to) +{ + assert(from < to); + assert(to <= validSlots()); + assert(validSlots() <= NodeType::maxSlots()); + assert(!getFrozen()); + KeyT emptyKey = KeyT(); + for (KeyT *k = _keys + from, *ke = _keys + to; k != ke; ++k) + *k = emptyKey; + DataT emptyData = DataT(); + for (uint32_t i = from; i != to; ++i) + setData(i, emptyData); +} + + +template +void +BTreeNodeTT::clean(void) +{ + if (validSlots() == 0) + return; + cleanRange(0, validSlots()); + _validSlots = 0; +} + + +template +void +BTreeNodeTT::cleanFrozen(void) +{ + assert(validSlots() <= NodeType::maxSlots()); + assert(getFrozen()); + if (validSlots() == 0) + return; + KeyT emptyKey = KeyT(); + for (KeyT *k = _keys, *ke = _keys + validSlots(); k != ke; ++k) + *k = emptyKey; + DataT emptyData = DataT(); + for (uint32_t i = 0, ie = validSlots(); i != ie; ++i) + setData(i, emptyData); + _validSlots = 0; +} + + +template +template +void +BTreeInternalNode:: +splitInsert(BTreeInternalNode *splitNode, uint32_t idx, const KeyT &key, + const BTreeNode::Ref &data, + NodeAllocatorType &allocator) +{ + assert(!getFrozen()); + assert(!splitNode->getFrozen()); + SplitInsertHelper sih(idx, validSlots()); + splitNode->_validSlots = validSlots() - sih.getMedian(); + uint32_t splitLeaves = 0; + uint32_t newLeaves = allocator.validLeaves(data); + for (uint32_t i = sih.getMedian(); i < validSlots(); ++i) { + splitNode->_keys[i - sih.getMedian()] = _keys[i]; + splitNode->setData(i - sih.getMedian(), getData(i)); + splitLeaves += allocator.validLeaves(getData(i)); + } + splitNode->_validLeaves = splitLeaves; + this->cleanRange(sih.getMedian(), validSlots()); + _validLeaves -= splitLeaves + newLeaves; + _validSlots = sih.getMedian(); + if (sih.insertInSplitNode()) { + splitNode->insert(idx - sih.getMedian(), key, data); + splitNode->_validLeaves += newLeaves; + } else { + this->insert(idx, key, data); + _validLeaves += newLeaves; + } +} + + +template +void +BTreeInternalNode:: +stealAllFromLeftNode(const BTreeInternalNode *victim) +{ + ParentType::stealAllFromLeftNode(victim); + _validLeaves += victim->_validLeaves; +} + +template +void +BTreeInternalNode:: +stealAllFromRightNode(const BTreeInternalNode *victim) +{ + ParentType::stealAllFromRightNode(victim); + _validLeaves += victim->_validLeaves; +} + + +template +template +void +BTreeInternalNode:: +stealSomeFromLeftNode(BTreeInternalNode *victim, NodeAllocatorType &allocator) +{ + assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots()); + assert(!getFrozen()); + assert(!victim->getFrozen()); + uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t steal = median - validSlots(); + _validSlots += steal; + for (int32_t i = validSlots() - 1; i >= static_cast(steal); --i) { + _keys[i] = _keys[i - steal]; + setData(i, getData(i - steal)); + } + uint32_t stolenLeaves = 0; + for (uint32_t i = 0; i < steal; ++i) { + _keys[i] = victim->_keys[victim->validSlots() - steal + i]; + setData(i, victim->getData(victim->validSlots() - steal + i)); + stolenLeaves += allocator.validLeaves(getData(i)); + } + _validLeaves += stolenLeaves; + victim->_validLeaves -= stolenLeaves; + victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); + victim->_validSlots -= steal; +} + + +template +template +void +BTreeInternalNode:: +stealSomeFromRightNode(BTreeInternalNode *victim, NodeAllocatorType &allocator) +{ + assert(validSlots() + victim->validSlots() >= BTreeInternalNode::minSlots()); + assert(!getFrozen()); + assert(!victim->getFrozen()); + uint32_t median = (validSlots() + victim->validSlots()) / 2; + uint32_t steal = median - validSlots(); + uint32_t stolenLeaves = 0; + for (uint32_t i = 0; i < steal; ++i) { + _keys[validSlots() + i] = victim->_keys[i]; + setData(validSlots() + i, victim->getData(i)); + stolenLeaves += allocator.validLeaves(victim->getData(i)); + } + _validSlots += steal; + _validLeaves += stolenLeaves; + victim->_validLeaves -= stolenLeaves; + for (uint32_t i = steal; i < victim->validSlots(); ++i) { + victim->_keys[i - steal] = victim->_keys[i]; + victim->setData(i - steal, victim->getData(i)); + } + victim->cleanRange(victim->validSlots() - steal, victim->validSlots()); + victim->_validSlots -= steal; +} + + +template +void +BTreeInternalNode::clean(void) +{ + ParentType::clean(); + _validLeaves = 0; +} + + +template +void +BTreeInternalNode::cleanFrozen(void) +{ + ParentType::cleanFrozen(); + _validLeaves = 0; +} + + +template +BTreeLeafNode:: +BTreeLeafNode(const KeyDataType *smallArray, uint32_t arraySize) + : ParentType(LEAF_LEVEL) +{ + assert(arraySize <= BTreeLeafNode::maxSlots()); + _validSlots = arraySize; + for (uint32_t idx = 0; idx < arraySize; ++idx) { + _keys[idx] = smallArray[idx]._key; + this->setData(idx, smallArray[idx].getData()); + } + freeze(); +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp new file mode 100644 index 00000000000..11f371360e4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreenodeallocator.h" +#include "btreerootbase.h" +#include "btreenodeallocator.hpp" + +namespace search +{ +namespace btree +{ + +template class BTreeNodeAllocator; +template class BTreeNodeAllocator; +template class BTreeNodeAllocator; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h new file mode 100644 index 00000000000..53e949bacbe --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.h @@ -0,0 +1,271 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +#include "btreenode.h" +#include +#include +#include "btreenodestore.h" +#include +#include + +namespace search +{ + +namespace btree +{ + +template class BTreeRootBase; + +template +class BTreeNodeAllocator +{ +public: + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode LeafNodeType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef BTreeRootBase + BTreeRootBaseType; + typedef vespalib::GenerationHandler::generation_t generation_t; + typedef BTreeNodeStore + NodeStore; + +private: + BTreeNodeAllocator(const BTreeNodeAllocator &rhs); + + BTreeNodeAllocator & + operator=(const BTreeNodeAllocator &rhs); + + NodeStore _nodeStore; + + typedef vespalib::Array RefVector; + typedef vespalib::Array BTreeRootBaseTypeVector; + + // Nodes that might not be frozen. + RefVector _internalToFreeze; + RefVector _leafToFreeze; + BTreeRootBaseTypeVector _treeToFreeze; + + // Nodes held until freeze is performed + RefVector _internalHoldUntilFreeze; + RefVector _leafHoldUntilFreeze; + +public: + BTreeNodeAllocator(void); + + ~BTreeNodeAllocator(void); + + void + disableFreeLists() { + _nodeStore.disableFreeLists(); + } + + void + disableElemHoldList() + { + _nodeStore.disableElemHoldList(); + } + + /** + * Allocate internal node. + */ + InternalNodeTypeRefPair + allocInternalNode(uint8_t level); + + /* + * Allocate leaf node. + */ + LeafNodeTypeRefPair + allocLeafNode(void); + + InternalNodeTypeRefPair + thawNode(BTreeNode::Ref nodeRef, InternalNodeType *node); + + LeafNodeTypeRefPair + thawNode(BTreeNode::Ref nodeRef, LeafNodeType *node); + + BTreeNode::Ref + thawNode(BTreeNode::Ref node); + + /** + * hold internal node until freeze/generation constraint is satisfied. + */ + void + holdNode(BTreeNode::Ref nodeRef, InternalNodeType *node); + + /** + * hold leaf node until freeze/generation constraint is satisfied. + */ + void + holdNode(BTreeNode::Ref nodeRef, LeafNodeType *node); + + /** + * Mark that tree needs to be frozen. Tree must be kept alive until + * freeze operation has completed. + */ + void + needFreeze(BTreeRootBaseType *tree); + + /** + * Freeze all nodes that are not already frozen. + */ + void + freeze(void); + + /** + * Try to free held nodes if nobody can be referencing them. + */ + void + trimHoldLists(generation_t usedGen); + + /** + * Transfer nodes from hold1 lists to hold2 lists, they are no + * longer referenced by new frozen structures, but readers accessing + * older versions of the frozen structure must leave before elements + * can be unheld. + */ + void + transferHoldLists(generation_t generation); + + void + clearHoldLists(void); + + static bool + isValidRef(BTreeNode::Ref ref) + { + return NodeStore::isValidRef(ref); + } + + bool + isLeafRef(BTreeNode::Ref ref) const + { + if (!isValidRef(ref)) + return false; + return _nodeStore.isLeafRef(ref); + } + + const InternalNodeType * + mapInternalRef(BTreeNode::Ref ref) const + { + return _nodeStore.mapInternalRef(ref); + } + + InternalNodeType * + mapInternalRef(BTreeNode::Ref ref) + { + return _nodeStore.mapInternalRef(ref); + } + + const LeafNodeType * + mapLeafRef(BTreeNode::Ref ref) const + { + return _nodeStore.mapLeafRef(ref); + } + + LeafNodeType * + mapLeafRef(BTreeNode::Ref ref) + { + return _nodeStore.mapLeafRef(ref); + } + + template + const NodeType * + mapRef(BTreeNode::Ref ref) const + { + return _nodeStore.template mapRef(ref); + } + + template + NodeType * + mapRef(BTreeNode::Ref ref) + { + return _nodeStore.template mapRef(ref); + } + + InternalNodeTypeRefPair + moveInternalNode(const InternalNodeType *node); + + LeafNodeTypeRefPair + moveLeafNode(const LeafNodeType *node); + + uint32_t + validLeaves(BTreeNode::Ref ref) const; + + /* + * Extract level from ref. + */ + uint32_t + getLevel(BTreeNode::Ref ref) const; + + const KeyT & + getLastKey(BTreeNode::Ref node) const; + + const AggrT & + getAggregated(BTreeNode::Ref node) const; + + MemoryUsage getMemoryUsage() const; + + vespalib::string toString(BTreeNode::Ref ref) const; + + vespalib::string toString(const BTreeNode * node) const; + + bool + getCompacting(EntryRef ref) const + { + return _nodeStore.getCompacting(ref); + } + + std::vector + startCompact(void) + { + return _nodeStore.startCompact(); + } + + void + finishCompact(const std::vector &toHold) + { + return _nodeStore.finishCompact(toHold); + } + + template + void + foreach_key(EntryRef ref, FunctionType func) const + { + _nodeStore.foreach_key(ref, func); + } + + template + void + foreach(EntryRef ref, FunctionType func) const + { + _nodeStore.foreach(ref, func); + } + + const NodeStore &getNodeStore() const { return _nodeStore; } +}; + +extern template class BTreeNodeAllocator; +extern template class BTreeNodeAllocator; +extern template class BTreeNodeAllocator; + +} // namespace btree + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp new file mode 100644 index 00000000000..2b189058544 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodeallocator.hpp @@ -0,0 +1,437 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenodeallocator.h" +#include "btreerootbase.h" +#include +#include "btreenodestore.hpp" + +namespace search { +namespace btree { + +template +BTreeNodeAllocator:: +BTreeNodeAllocator(void) + : _nodeStore(), + _internalToFreeze(), + _leafToFreeze(), + _treeToFreeze(), + _internalHoldUntilFreeze(), + _leafHoldUntilFreeze() +{ +} + + +template +BTreeNodeAllocator:: +~BTreeNodeAllocator(void) +{ + assert(_internalToFreeze.empty()); + assert(_leafToFreeze.empty()); + assert(_treeToFreeze.empty()); + assert(_internalHoldUntilFreeze.empty()); + assert(_leafHoldUntilFreeze.empty()); + DataStoreBase::MemStats stats = _nodeStore.getMemStats(); + assert(stats._usedBytes == stats._deadBytes); + assert(stats._holdBytes == 0); + (void) stats; +} + + +template +typename BTreeNodeAllocator:: +InternalNodeTypeRefPair +BTreeNodeAllocator:: +allocInternalNode(uint8_t level) +{ + if (_internalHoldUntilFreeze.empty()) { + InternalNodeTypeRefPair nodeRef = _nodeStore.allocInternalNode(); + assert(nodeRef.first.valid()); + _internalToFreeze.push_back(nodeRef.first); + nodeRef.second->setLevel(level); + return nodeRef; + } + BTreeNode::Ref nodeRef = _internalHoldUntilFreeze.back(); + _internalHoldUntilFreeze.pop_back(); + InternalNodeType *node = mapInternalRef(nodeRef); + assert(!node->getFrozen()); + node->setLevel(level); + return std::make_pair(nodeRef, node); +} + + +template +typename BTreeNodeAllocator:: +LeafNodeTypeRefPair +BTreeNodeAllocator:: +allocLeafNode(void) +{ + if (_leafHoldUntilFreeze.empty()) { + LeafNodeTypeRefPair nodeRef = _nodeStore.allocLeafNode(); + _leafToFreeze.push_back(nodeRef.first); + return nodeRef; + } + BTreeNode::Ref nodeRef = _leafHoldUntilFreeze.back(); + _leafHoldUntilFreeze.pop_back(); + LeafNodeType *node = mapLeafRef(nodeRef); + assert(!node->getFrozen()); + return std::make_pair(nodeRef, node); +} + + + +template +typename BTreeNodeAllocator:: +InternalNodeTypeRefPair +BTreeNodeAllocator:: +thawNode(BTreeNode::Ref nodeRef, InternalNodeType *node) +{ + if (_internalHoldUntilFreeze.empty()) { + InternalNodeTypeRefPair retNodeRef = + _nodeStore.allocInternalNodeCopy(*node); + assert(retNodeRef.second->getFrozen()); + retNodeRef.second->unFreeze(); + assert(retNodeRef.first.valid()); + _internalToFreeze.push_back(retNodeRef.first); + holdNode(nodeRef, node); + return retNodeRef; + } + BTreeNode::Ref retNodeRef = _internalHoldUntilFreeze.back(); + InternalNodeType *retNode = mapInternalRef(retNodeRef); + _internalHoldUntilFreeze.pop_back(); + assert(!retNode->getFrozen()); + *retNode = static_cast(*node); + assert(retNode->getFrozen()); + retNode->unFreeze(); + holdNode(nodeRef, node); + return std::make_pair(retNodeRef, retNode); +} + + +template +typename BTreeNodeAllocator:: +LeafNodeTypeRefPair +BTreeNodeAllocator:: +thawNode(BTreeNode::Ref nodeRef, LeafNodeType *node) +{ + if (_leafHoldUntilFreeze.empty()) { + LeafNodeTypeRefPair retNodeRef = + _nodeStore.allocLeafNodeCopy(*node); + assert(retNodeRef.second->getFrozen()); + retNodeRef.second->unFreeze(); + _leafToFreeze.push_back(retNodeRef.first); + holdNode(nodeRef, node); + return retNodeRef; + } + BTreeNode::Ref retNodeRef = _leafHoldUntilFreeze.back(); + LeafNodeType *retNode = mapLeafRef(retNodeRef); + _leafHoldUntilFreeze.pop_back(); + assert(!retNode->getFrozen()); + *retNode = static_cast(*node); + assert(retNode->getFrozen()); + retNode->unFreeze(); + holdNode(nodeRef, node); + return std::make_pair(retNodeRef, retNode); +} + +template +BTreeNode::Ref +BTreeNodeAllocator:: +thawNode(BTreeNode::Ref node) +{ + if (isLeafRef(node)) + return thawNode(node, mapLeafRef(node)).first; + else + return thawNode(node, mapInternalRef(node)).first; +} + + +template +void +BTreeNodeAllocator:: +holdNode(BTreeNode::Ref nodeRef, + InternalNodeType *node) +{ + if (node->getFrozen()) { + _nodeStore.holdElem(nodeRef); + } else { + node->clean(); + _internalHoldUntilFreeze.push_back(nodeRef); + } +} + + +template +void +BTreeNodeAllocator:: +holdNode(BTreeNode::Ref nodeRef, + LeafNodeType *node) +{ + if (node->getFrozen()) { + _nodeStore.holdElem(nodeRef); + } else { + node->clean(); + _leafHoldUntilFreeze.push_back(nodeRef); + } +} + + +template +void +BTreeNodeAllocator:: +freeze(void) +{ + // Freeze nodes. + + if (!_internalToFreeze.empty() || !_leafToFreeze.empty()) { + { + for (auto &i : _internalToFreeze) { + assert(i.valid()); + mapInternalRef(i)->freeze(); + } + _internalToFreeze.clear(); + } + { + for (auto &i : _leafToFreeze) { + assert(i.valid()); + mapLeafRef(i)->freeze(); + } + _leafToFreeze.clear(); + } + + // Tree node freezes must be visible before tree freezes to + // ensure that readers see a frozen world + std::atomic_thread_fence(std::memory_order_release); + } + + // Freeze trees. + + if (!_treeToFreeze.empty()) { + for (auto &i : _treeToFreeze) { + i->freeze(*this); + } + _treeToFreeze.clear(); + // Tree freezes must be visible before held nodes are freed + std::atomic_thread_fence(std::memory_order_release); + } + + + // Free nodes that were only held due to freezing. + + { + for (auto &i : _internalHoldUntilFreeze) { + assert(!isLeafRef(i)); + InternalNodeType *inode = mapInternalRef(i); + (void) inode; + assert(inode->getFrozen()); + _nodeStore.freeElem(i); + } + _internalHoldUntilFreeze.clear(); + } + { + for (auto &i : _leafHoldUntilFreeze) { + assert(isLeafRef(i)); + LeafNodeType *lnode = mapLeafRef(i); + (void) lnode; + assert(lnode->getFrozen()); + _nodeStore.freeElem(i); + } + _leafHoldUntilFreeze.clear(); + } +} + + +template +void +BTreeNodeAllocator:: +needFreeze(BTreeRootBaseType *tree) +{ + _treeToFreeze.push_back(tree); +} + + +template +void +BTreeNodeAllocator:: +trimHoldLists(generation_t usedGen) +{ + _nodeStore.trimHoldLists(usedGen); +} + +template +void +BTreeNodeAllocator:: +transferHoldLists(generation_t generation) +{ + _nodeStore.transferHoldLists(generation); +} + + +template +void +BTreeNodeAllocator:: +clearHoldLists(void) +{ + _nodeStore.clearHoldLists(); +} + + +template +typename BTreeNodeAllocator:: +InternalNodeTypeRefPair +BTreeNodeAllocator:: +moveInternalNode(const InternalNodeType *node) +{ + InternalNodeTypeRefPair iPair; + iPair = _nodeStore.allocNewInternalNodeCopy(*node); + assert(iPair.first.valid()); + _internalToFreeze.push_back(iPair.first); + return iPair; +} + + +template +typename BTreeNodeAllocator:: +LeafNodeTypeRefPair +BTreeNodeAllocator:: +moveLeafNode(const LeafNodeType *node) +{ + LeafNodeTypeRefPair lPair; + lPair = _nodeStore.allocNewLeafNodeCopy(*node); + _leafToFreeze.push_back(lPair.first); + return lPair; +} + + +template +uint32_t +BTreeNodeAllocator:: +validLeaves(BTreeNode::Ref ref) const +{ + if (isLeafRef(ref)) + return mapLeafRef(ref)->validSlots(); + else + return mapInternalRef(ref)->validLeaves(); +} + + +template +uint32_t +BTreeNodeAllocator:: +getLevel(BTreeNode::Ref ref) const +{ + if (isLeafRef(ref)) + return BTreeNode::LEAF_LEVEL; + else + return mapInternalRef(ref)->getLevel(); +} + + +template +const KeyT & +BTreeNodeAllocator:: +getLastKey(BTreeNode::Ref node) const +{ + if (isLeafRef(node)) + return mapLeafRef(node)->getLastKey(); + else + return mapInternalRef(node)->getLastKey(); +} + + +template +const AggrT & +BTreeNodeAllocator:: +getAggregated(BTreeNode::Ref node) const +{ + if (!node.valid()) + return LeafNodeType::getEmptyAggregated(); + if (isLeafRef(node)) + return mapLeafRef(node)->getAggregated(); + else + return mapInternalRef(node)->getAggregated(); +} + + +template +MemoryUsage +BTreeNodeAllocator:: +getMemoryUsage() const +{ + MemoryUsage usage = _nodeStore.getMemoryUsage(); + return usage; +} + +template +vespalib::string +BTreeNodeAllocator:: +toString(BTreeNode::Ref ref) const +{ + if (!isValidRef(ref)) { + return "NULL"; + } + if (isLeafRef(ref)) + return toString(mapLeafRef(ref)); + else + return toString(mapInternalRef(ref)); +} + +template +vespalib::string +BTreeNodeAllocator:: +toString(const BTreeNode * node) const +{ + if (node == NULL) { + return "NULL"; + } + vespalib::asciistream ss; + if (node->isLeaf()) { + const LeafNodeType * lnode = static_cast(node); + ss << "L: keys(" << lnode->validSlots() << ")["; + for (uint32_t i = 0; i < lnode->validSlots(); ++i) { + if (i > 0) ss << ","; + ss << lnode->getKey(i); + } + ss << "]"; + } else { + const InternalNodeType * inode = + static_cast(node); + ss << "I: validLeaves(" << inode->validLeaves() << + "), keys(" << inode->validSlots() << ")["; + for (uint32_t i = 0; i < inode->validSlots(); ++i) { + if (i > 0) ss << ","; + ss << inode->getKey(i); + } + ss << "]"; + } + return ss.str(); +} + + +} // namespace btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp b/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp new file mode 100644 index 00000000000..07a2f60fe54 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.cpp @@ -0,0 +1,117 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreenodestore.hpp" +#include "datastore.h" +#include "btreenode.h" +#include "btreerootbase.h" +#include "btreeroot.h" +#include "btreenodeallocator.h" + +namespace search +{ + +namespace btree +{ + +template class BTreeNodeStore; +template class BTreeNodeStore; +template class BTreeNodeStore; + +typedef EntryRefT<22> MyRef; + +typedef BTreeNodeStore MyNodeStore1; +typedef BTreeNodeStore MyNodeStore2; +typedef BTreeNodeStore MyNodeStore3; + +typedef BTreeLeafNode MyEntry1; +typedef BTreeLeafNode MyEntry2; +typedef BTreeInternalNode MyEntry4; +typedef BTreeLeafNode MyEntry5; +typedef BTreeInternalNode MyEntry6; + +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t, const MyEntry1 &); + +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t, const MyEntry2 &); + +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t, const MyEntry4 &); + +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t, const MyEntry5 &); + +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t, const MyEntry6 &); + +template +std::pair +DataStoreT::allocEntry(uint32_t); + +template +std::pair +DataStoreT::allocEntry(uint32_t); + +template +std::pair +DataStoreT::allocEntry(uint32_t); + +template +std::pair +DataStoreT::allocEntry(uint32_t); + +template +std::pair +DataStoreT::allocEntry(uint32_t); + +template +std::pair +DataStoreT::allocEntryCopy( + uint32_t, const MyEntry1 &); + +template +std::pair +DataStoreT::allocEntryCopy( + uint32_t, const MyEntry2 &); + +template +std::pair +DataStoreT::allocEntryCopy( + uint32_t, const MyEntry4 &); + +template +std::pair +DataStoreT::allocEntryCopy( + uint32_t, const MyEntry5 &); + + +template +std::pair +DataStoreT::allocEntryCopy( + uint32_t, const MyEntry6 &); + + + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.h b/searchlib/src/vespa/searchlib/btree/btreenodestore.h new file mode 100644 index 00000000000..1786f3996d7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.h @@ -0,0 +1,399 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "datastore.h" +#include "btreetraits.h" + +namespace search +{ + +namespace btree +{ + +class BTreeNodeReclaimer +{ +public: + static void reclaim(BTreeNode * node) + { + node->unFreeze(); + } +}; + + +template +class BTreeNodeBufferType : public BufferType +{ + typedef BufferType ParentType; + using ParentType::_emptyEntry; + using ParentType::_clusterSize; +public: + BTreeNodeBufferType(uint32_t minClusters, + uint32_t maxClusters) + : ParentType(1, minClusters, maxClusters) + { + _emptyEntry.freeze(); + } + + virtual void + cleanInitialElements(void *buffer); + + virtual void + cleanHold(void *buffer, uint64_t offset, uint64_t len); +}; + + +template +class BTreeNodeStore +{ +public: + typedef DataStoreT > DataStoreType; + typedef DataStoreType::RefType RefType; + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode LeafNodeType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef vespalib::GenerationHandler::generation_t generation_t; + + enum NodeTypes + { + NODETYPE_INTERNAL = 0, + NODETYPE_LEAF = 1 + }; + + +private: + static constexpr size_t MIN_CLUSTERS = 128u; + DataStoreType _store; + BTreeNodeBufferType _internalNodeType; + BTreeNodeBufferType _leafNodeType; + +public: + BTreeNodeStore(void); + + ~BTreeNodeStore(void); + + void + disableFreeLists() { + _store.disableFreeLists(); + } + + void + disableElemHoldList() + { + _store.disableElemHoldList(); + } + + static bool + isValidRef(EntryRef ref) + { + return ref.valid(); + } + + bool + isLeafRef(EntryRef ref) const + { + RefType iRef(ref); + return _store.getTypeId(iRef.bufferId()) == NODETYPE_LEAF; + } + + const InternalNodeType * + mapInternalRef(EntryRef ref) const + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + InternalNodeType * + mapInternalRef(EntryRef ref) + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + const LeafNodeType * + mapLeafRef(EntryRef ref) const + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + LeafNodeType * + mapLeafRef(EntryRef ref) + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + template + const NodeType * + mapRef(EntryRef ref) const + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + template + NodeType * + mapRef(EntryRef ref) + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), + iRef.offset()); + } + + LeafNodeTypeRefPair + allocNewLeafNode(void) { + return _store.allocNewEntry(NODETYPE_LEAF); + } + + LeafNodeTypeRefPair + allocLeafNode(void) { + return _store.allocEntry(NODETYPE_LEAF); + } + + LeafNodeTypeRefPair + allocNewLeafNodeCopy(const LeafNodeType &rhs) { + return _store.allocNewEntryCopy(NODETYPE_LEAF, rhs); + } + + LeafNodeTypeRefPair + allocLeafNodeCopy(const LeafNodeType &rhs) { + return _store.allocEntryCopy(NODETYPE_LEAF, rhs); + } + + InternalNodeTypeRefPair + allocNewInternalNode(void) { + return _store.allocNewEntry(NODETYPE_INTERNAL); + } + + InternalNodeTypeRefPair + allocInternalNode(void) { + return _store.allocEntry(NODETYPE_INTERNAL); + } + + InternalNodeTypeRefPair + allocNewInternalNodeCopy(const InternalNodeType &rhs) { + return _store.allocNewEntryCopy(NODETYPE_INTERNAL, rhs); + } + + InternalNodeTypeRefPair + allocInternalNodeCopy(const InternalNodeType &rhs) { + return _store.allocEntryCopy(NODETYPE_INTERNAL, rhs); + } + + void + holdElem(EntryRef ref) + { + _store.holdElem(ref, 1); + } + + void + freeElem(EntryRef ref) + { + _store.freeElem(ref, 1); + } + + std::vector + startCompact(void); + + void + finishCompact(const std::vector &toHold); + + void + transferHoldLists(generation_t generation) + { + _store.transferHoldLists(generation); + } + + // Inherit doc from DataStoreBase + DataStoreBase::MemStats getMemStats() const { + return _store.getMemStats(); + } + + // Inherit doc from DataStoreBase + void + trimHoldLists(generation_t usedGen) + { + _store.trimHoldLists(usedGen); + } + + void + clearHoldLists(void) + { + _store.clearHoldLists(); + } + + // Inherit doc from DataStoreBase + MemoryUsage getMemoryUsage() const { + return _store.getMemoryUsage(); + } + + // Inherit doc from DataStoreT + bool getCompacting(EntryRef ref) const { + return _store.getCompacting(ref); + } + + template + void + foreach_key(EntryRef ref, FunctionType func) const + { + if (!ref.valid()) + return; + if (isLeafRef(ref)) { + mapLeafRef(ref)->foreach_key(func); + } else { + mapInternalRef(ref)->foreach_key(*this, func); + } + } + + template + void + foreach(EntryRef ref, FunctionType func) const + { + if (!ref.valid()) + return; + if (isLeafRef(ref)) { + mapLeafRef(ref)->foreach(func); + } else { + mapInternalRef(ref)->foreach(*this, func); + } + } +}; + +extern template class BTreeNodeStore; +extern template class BTreeNodeStore; +extern template class BTreeNodeStore; + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocNewEntryCopy > +(uint32_t, const BTreeLeafNode &); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocNewEntryCopy >( + uint32_t, + const BTreeLeafNode &); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocNewEntryCopy >( + uint32_t, const BTreeInternalNode &); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocNewEntryCopy > +(uint32_t, const BTreeLeafNode &); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocNewEntryCopy >( + uint32_t, const BTreeInternalNode &); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntry, + BTreeNodeReclaimer>(uint32_t); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntry, + BTreeNodeReclaimer>(uint32_t); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocEntry, + BTreeNodeReclaimer>(uint32_t); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntry, + BTreeNodeReclaimer>(uint32_t); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocEntry, + BTreeNodeReclaimer>(uint32_t); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntryCopy, + BTreeNodeReclaimer>( + uint32_t, + const BTreeLeafNode &); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntryCopy, + BTreeNodeReclaimer>( + uint32_t, + const BTreeLeafNode &); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocEntryCopy, BTreeNodeReclaimer>( + uint32_t, const BTreeInternalNode &); + +extern template +std::pair, BTreeLeafNode *> +DataStoreT >:: +allocEntryCopy, + BTreeNodeReclaimer>( + uint32_t, + const BTreeLeafNode &); + +extern template +std::pair, BTreeInternalNode *> +DataStoreT >:: +allocEntryCopy, + BTreeNodeReclaimer>( + uint32_t, const BTreeInternalNode &); + + +} // namespace btree + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp b/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp new file mode 100644 index 00000000000..8ba828aa07f --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreenodestore.hpp @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenodestore.h" +#include "datastore.hpp" + +namespace search +{ + +namespace btree +{ + +template +void +BTreeNodeBufferType::cleanInitialElements(void *buffer) +{ + ParentType::cleanInitialElements(buffer); + EntryType *e = static_cast(buffer); + for (size_t j = _clusterSize; j != 0; --j) { + e->freeze(); + ++e; + } +} + + +template +void +BTreeNodeBufferType::cleanHold(void *buffer, + uint64_t offset, + uint64_t len) +{ + EntryType *e = static_cast(buffer) + offset; + for (size_t j = len; j != 0; --j) { + e->cleanFrozen(); + ++e; + } +} + + + + +template +BTreeNodeStore:: +BTreeNodeStore(void) + : _store(), + _internalNodeType(MIN_CLUSTERS, RefType::offsetSize()), + _leafNodeType(MIN_CLUSTERS, RefType::offsetSize()) +{ + _store.addType(&_internalNodeType); + _store.addType(&_leafNodeType); + _store.initActiveBuffers(); + _store.enableFreeLists(); +} + +template +BTreeNodeStore:: +~BTreeNodeStore(void) +{ + _store.dropBuffers(); // Drop buffers before type handlers are dropped +} + + +template +std::vector +BTreeNodeStore:: +startCompact(void) +{ + std::vector iToHold = + _store.startCompact(NODETYPE_INTERNAL); + std::vector lToHold = + _store.startCompact(NODETYPE_LEAF); + std::vector ret = iToHold; + for (std::vector::const_iterator + i = lToHold.begin(), ie = lToHold.end(); i != ie; ++i) + ret.push_back(*i); + return ret; +} + + +template +void +BTreeNodeStore:: +finishCompact(const std::vector &toHold) +{ + _store.finishCompact(toHold); +} + + +} // namespace btree + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.cpp b/searchlib/src/vespa/searchlib/btree/btreeremover.cpp new file mode 100644 index 00000000000..a8dceb35dac --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeremover.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreeremover.h" +#include "btreenodeallocator.h" +#include "btreerootbase.hpp" +#include "btreeremover.hpp" +#include "btreenode.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeRemover; +template class BTreeRemover; +template class BTreeRemover, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.h b/searchlib/src/vespa/searchlib/btree/btreeremover.h new file mode 100644 index 00000000000..bc78a6be3a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeremover.h @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreenode.h" +#include "btreenodeallocator.h" +#include "btreerootbase.h" +#include "btreeaggregator.h" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" +#include "btreeiterator.h" + +namespace search +{ + +namespace btree +{ + +template +class BTreeRemoverBase +{ +public: + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeAggregator Aggregator; + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode LeafNodeType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + + template + static void + steal(InternalNodeType *pNode, + BTreeNode::Ref sNodeRef, + NodeType *sNode, + uint32_t idx, + NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc, + Iterator &itr, + uint32_t level); +}; + +template , + typename TraitsT = BTreeDefaultTraits, + class AggrCalcT = NoAggrCalc> +class BTreeRemover : public BTreeRemoverBase + +{ +public: + typedef BTreeRemoverBase ParentType; + typedef BTreeNodeAllocator NodeAllocatorType; + typedef BTreeAggregator Aggregator; + typedef BTreeInternalNode + InternalNodeType; + typedef BTreeLeafNode + LeafNodeType; + typedef KeyT KeyType; + typedef DataT DataType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef BTreeIterator Iterator; + + static void + remove(BTreeNode::Ref &root, + Iterator &itr, + const AggrCalcT &aggrCalc); +}; + +extern template class BTreeRemover; +extern template class BTreeRemover; +extern template class BTreeRemover, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeremover.hpp b/searchlib/src/vespa/searchlib/btree/btreeremover.hpp new file mode 100644 index 00000000000..864a2833993 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeremover.hpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeremover.h" +#include "btreerootbase.hpp" +#include + +namespace search +{ + +namespace btree +{ + +template +template +void +BTreeRemoverBase:: +steal(InternalNodeType *pNode, + BTreeNode::Ref sNodeRef, + NodeType * sNode, uint32_t idx, NodeAllocatorType &allocator, + const AggrCalcT &aggrCalc, + Iterator &itr, + uint32_t level) +{ + BTreeNode::Ref leftVictimRef = BTreeNode::Ref(); + NodeType * leftVictim = NULL; + BTreeNode::Ref rightVictimRef = BTreeNode::Ref(); + NodeType * rightVictim = NULL; + if (idx > 0) { + leftVictimRef = pNode->getChild(idx - 1); + leftVictim = allocator.template mapRef(leftVictimRef); + } + if (idx + 1 < pNode->validSlots()) { + rightVictimRef = pNode->getChild(idx + 1); + rightVictim = allocator.template mapRef(rightVictimRef); + } + if (leftVictim != NULL && + leftVictim->validSlots() + sNode->validSlots() <= + NodeType::maxSlots()) + { + uint32_t stolen = leftVictim->validSlots(); + sNode->stealAllFromLeftNode(leftVictim); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->remove(idx - 1); + allocator.holdNode(leftVictimRef, leftVictim); + itr.adjustSteal(level, true, stolen); + } else if (rightVictim != NULL && + rightVictim->validSlots() + sNode->validSlots() <= + NodeType::maxSlots()) + { + sNode->stealAllFromRightNode(rightVictim); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->remove(idx + 1); + allocator.holdNode(rightVictimRef, rightVictim); + } else if (leftVictim != NULL && + (rightVictim == NULL || + leftVictim->validSlots() > rightVictim->validSlots())) + { + if (leftVictim->getFrozen()) { + NodeTypeRefPair thawed = + allocator.thawNode(leftVictimRef, leftVictim); + leftVictimRef = thawed.first; + leftVictim = thawed.second; + } + uint32_t oldLeftValid = leftVictim->validSlots(); + sNode->stealSomeFromLeftNode(leftVictim, allocator); + uint32_t stolen = oldLeftValid - leftVictim->validSlots(); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx - 1, leftVictim->getLastKey(), leftVictimRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*leftVictim, allocator, aggrCalc); + } + itr.adjustSteal(level, false, stolen); + } else if (rightVictim != NULL) { + if (rightVictim->getFrozen()) { + NodeTypeRefPair thawed = + allocator.thawNode(rightVictimRef, rightVictim); + rightVictimRef = thawed.first; + rightVictim = thawed.second; + } + sNode->stealSomeFromRightNode(rightVictim, allocator); + pNode->update(idx, sNode->getLastKey(), sNodeRef); + pNode->update(idx + 1, rightVictim->getLastKey(), rightVictimRef); + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*rightVictim, allocator, aggrCalc); + } + } + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*sNode, allocator, aggrCalc); + } +} + + +template +void +BTreeRemover:: +remove(BTreeNode::Ref &root, + Iterator &itr, + const AggrCalcT &aggrCalc) +{ + assert(itr.valid()); + root = itr.thaw(root); + + uint32_t idx = itr.getLeafNodeIdx(); + LeafNodeType * lnode = itr.getLeafNode(); + if (lnode->validSlots() == 1u) { + itr.removeLast(root); + root = BTreeNode::Ref(); + return; + } + NodeAllocatorType &allocator(itr.getAllocator()); + AggrT oldca(AggrCalcT::hasAggregated() ? lnode->getAggregated() : AggrT()); + AggrT ca; + if (AggrCalcT::hasAggregated() && + aggrCalc.remove(lnode->getAggregated(), + aggrCalc.getVal(lnode->getData(idx)))) { + lnode->remove(idx); + Aggregator::recalc(*lnode, aggrCalc); + } else { + lnode->remove(idx); + } + if (AggrCalcT::hasAggregated()) { + ca = lnode->getAggregated(); + } + bool steppedBack = idx >= lnode->validSlots(); + if (steppedBack) { + itr.setLeafNodeIdx(itr.getLeafNodeIdx() - 1); + --idx; + } + uint32_t level = 0; + uint32_t levels = itr.getPathSize(); + InternalNodeType *node; + for (; level < levels; ++level) { + typename Iterator::PathElement &pe = itr.getPath(level); + node = pe.getWNode(); + idx = pe.getIdx(); + AggrT olda(AggrCalcT::hasAggregated() ? + node->getAggregated() : AggrT()); + BTreeNode::Ref subNode = node->getChild(idx); + node->update(idx, allocator.getLastKey(subNode), subNode); + node->decValidLeaves(1); + if (level == 0) { + LeafNodeType * sNode = allocator.mapLeafRef(subNode); + assert(sNode == lnode); + if (!sNode->isAtLeastHalfFull()) { + // too few elements in sub node, steal from left or + // right sibling + ParentType::template steal + (node, subNode, sNode, idx, allocator, aggrCalc, + itr, level); + } + } else { + InternalNodeType * sNode = allocator.mapInternalRef(subNode); + if (!sNode->isAtLeastHalfFull()) { + // too few elements in sub node, steal from left or + // right sibling + ParentType::template steal + (node, subNode, sNode, idx, allocator, aggrCalc, + itr, level); + } + } + if (AggrCalcT::hasAggregated()) { + if (aggrCalc.remove(node->getAggregated(), oldca, ca)) { + Aggregator::recalc(*node, allocator, aggrCalc); + } + ca = node->getAggregated(); + oldca = olda; + } + } + if (level > 0 && node->validSlots() == 1) { + root = itr.removeLevel(root, node); + } + if (steppedBack) + ++itr; +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.cpp b/searchlib/src/vespa/searchlib/btree/btreeroot.cpp new file mode 100644 index 00000000000..fdfa0834b95 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeroot.cpp @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreeroot.h" +#include "btreenodeallocator.h" +#include "btreeiterator.hpp" +#include "btreeroot.hpp" +#include "btreenode.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeRootT; +template class BTreeRootT; +template class BTreeRootT; +template class BTreeRoot; +template class BTreeRoot; +template class BTreeRoot, + BTreeDefaultTraits, MinMaxAggrCalc>; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.h b/searchlib/src/vespa/searchlib/btree/btreeroot.h new file mode 100644 index 00000000000..b06050904f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeroot.h @@ -0,0 +1,253 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeiterator.h" +#include "btreenode.h" +#include "btreenodeallocator.h" +#include "btreerootbase.h" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" + +namespace search { +namespace btree { + +template +class BTreeNodeAllocator; +template class +BTreeBuilder; +template class +BTreeAggregator; + +template , + typename TraitsT = BTreeDefaultTraits> +class BTreeRootT : public BTreeRootBase +{ +public: + typedef BTreeRootBase ParentType; + typedef typename ParentType::NodeAllocatorType NodeAllocatorType; + typedef BTreeKeyData KeyDataType; + typedef typename ParentType::InternalNodeType InternalNodeType; + typedef typename ParentType::LeafNodeType LeafNodeType; + typedef BTreeLeafNodeTemp + LeafNodeTempType; + typedef BTreeIterator Iterator; + typedef BTreeConstIterator + ConstIterator; + + typedef typename ParentType::KeyType KeyType; + typedef typename ParentType::DataType DataType; +protected: + typedef typename ParentType::BTreeRootBaseType BTreeRootBaseType; + typedef BTreeRootT BTreeRootTType; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + using ParentType::_root; + using ParentType::getFrozenRoot; + using ParentType::getFrozenRootRelaxed; + using ParentType::isFrozen; + + vespalib::string toString(BTreeNode::Ref node, const NodeAllocatorType &allocator) const; + bool isValid(BTreeNode::Ref node, bool ignoreMinSlots, uint32_t level, + const NodeAllocatorType &allocator, CompareT comp) const; + +public: + /** + * Read view of the frozen version of the tree. + * Should be used by reader threads. + **/ + class FrozenView { + private: + BTreeNode::Ref _frozenRoot; + const NodeAllocatorType & _allocator; + public: + typedef ConstIterator Iterator; + FrozenView(BTreeNode::Ref frozenRoot, + const NodeAllocatorType & allocator); + ConstIterator find(const KeyType& key, + CompareT comp = CompareT()) const; + ConstIterator lowerBound(const KeyType &key, + CompareT comp = CompareT()) const; + ConstIterator upperBound(const KeyType &key, + CompareT comp = CompareT()) const; + ConstIterator begin() const { + return ConstIterator(_frozenRoot, _allocator); + } + void begin(std::vector &where) const { + where.emplace_back(_frozenRoot, _allocator); + } + + BTreeNode::Ref + getRoot(void) const + { + return _frozenRoot; + } + + size_t + size(void) const; + + const NodeAllocatorType & + getAllocator(void) const + { + return _allocator; + } + + template + void + foreach_key(FunctionType func) const { + _allocator.getNodeStore().foreach_key(_frozenRoot, func); + } + + template + void + foreach(FunctionType func) const { + _allocator.getNodeStore().foreach(_frozenRoot, func); + } + }; + +private: + + static Iterator findHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp = CompareT()); + + static Iterator lowerBoundHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp = CompareT()); + + static Iterator upperBoundHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp = CompareT()); + +public: + BTreeRootT(); + ~BTreeRootT(); + + void + clear(NodeAllocatorType &allocator); + + Iterator + find(const KeyType & key, const NodeAllocatorType &allocator, + CompareT comp = CompareT()) const; + + Iterator + lowerBound(const KeyType & key, const NodeAllocatorType & allocator, + CompareT comp = CompareT()) const; + + Iterator + upperBound(const KeyType & key, const NodeAllocatorType & allocator, + CompareT comp = CompareT()) const; + + Iterator begin(const NodeAllocatorType &allocator) const { + return Iterator(_root, allocator); + } + + FrozenView getFrozenView(const NodeAllocatorType & allocator) const { + return FrozenView(getFrozenRoot(), allocator); + } + + size_t + size(const NodeAllocatorType &allocator) const; + + size_t + frozenSize(const NodeAllocatorType &allocator) const; + + vespalib::string toString(const NodeAllocatorType &allocator) const; + + bool + isValid(const NodeAllocatorType &allocator, CompareT comp = CompareT()) const; + + bool + isValidFrozen(const NodeAllocatorType &allocator, CompareT comp = CompareT()) const; + + size_t + bitSize(const NodeAllocatorType &allocator) const; + + size_t + bitSize(BTreeNode::Ref node, const NodeAllocatorType &allocator) const; + + void + thaw(Iterator &itr); +}; + + +template , + typename TraitsT = BTreeDefaultTraits, + class AggrCalcT = NoAggrCalc> +class BTreeRoot : public BTreeRootT +{ +public: + typedef BTreeRootT ParentType; + typedef typename ParentType::ParentType Parent2Type; + typedef typename ParentType::NodeAllocatorType NodeAllocatorType; + typedef typename ParentType::KeyType KeyType; + typedef typename ParentType::DataType DataType; + typedef typename ParentType::LeafNodeType LeafNodeType; + typedef typename ParentType::InternalNodeType InternalNodeType; + typedef typename ParentType::LeafNodeTypeRefPair LeafNodeTypeRefPair; + typedef typename ParentType::InternalNodeTypeRefPair + InternalNodeTypeRefPair; + typedef typename ParentType::Iterator Iterator; + typedef BTreeBuilder Builder; + typedef BTreeAggregator Aggregator; + typedef AggrCalcT AggrCalcType; + using Parent2Type::_root; + using Parent2Type::getFrozenRoot; + using Parent2Type::getFrozenRootRelaxed; + using Parent2Type::isFrozen; + +public: + /** + * Create a tree from a tree builder. This is a destructive + * assignment, old content of tree is destroyed and tree + * builder is emptied when tree grabs ownership of nodes. + */ + void + assign(Builder &rhs, NodeAllocatorType &allocator); + + bool + insert(const KeyType & key, const DataType & data, + NodeAllocatorType &allocator, CompareT comp = CompareT(), + const AggrCalcT &aggrCalc = AggrCalcT()); + + void + insert(Iterator &itr, + const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc = AggrCalcT()); + + bool + remove(const KeyType & key, + NodeAllocatorType &allocator, CompareT comp = CompareT(), + const AggrCalcT &aggrCalc = AggrCalcT()); + + void + remove(Iterator &itr, + const AggrCalcT &aggrCalc = AggrCalcT()); +}; + + + +extern template class BTreeRootT; +extern template class BTreeRootT; +extern template class BTreeRootT; +extern template class BTreeRoot; +extern template class BTreeRoot; +extern template class BTreeRoot, + BTreeDefaultTraits, MinMaxAggrCalc>; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreeroot.hpp b/searchlib/src/vespa/searchlib/btree/btreeroot.hpp new file mode 100644 index 00000000000..6b39e142d28 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreeroot.hpp @@ -0,0 +1,486 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreeroot.h" +#include "btreebuilder.h" +#include "btreerootbase.hpp" +#include "btreeinserter.hpp" +#include "btreeremover.hpp" +#include "btreeaggregator.hpp" +#include + +namespace search { +namespace btree { + +//----------------------- BTreeRoot ------------------------------------------// + +template +vespalib::string +BTreeRootT:: +toString(BTreeNode::Ref node, + const NodeAllocatorType &allocator) const +{ + if (allocator.isLeafRef(node)) { + vespalib::asciistream ss; + ss << "{" << allocator.toString(node) << "}"; + return ss.str(); + } else { + const InternalNodeType * inode = allocator.mapInternalRef(node); + vespalib::asciistream ss; + ss << "{" << allocator.toString(inode) << ",children(" << inode->validSlots() << ")["; + for (size_t i = 0; i < inode->validSlots(); ++i) { + if (i > 0) ss << ","; + ss << "c[" << i << "]" << toString(inode->getChild(i), allocator); + } + ss << "]}"; + return ss.str(); + } +} + +template +bool +BTreeRootT:: +isValid(BTreeNode::Ref node, + bool ignoreMinSlots, uint32_t level, const NodeAllocatorType &allocator, + CompareT comp) const +{ + if (allocator.isLeafRef(node)) { + if (level != 0) { + return false; + } + const LeafNodeType * lnode = allocator.mapLeafRef(node); + if (level != lnode->getLevel()) { + return false; + } + if (lnode->validSlots() > LeafNodeType::maxSlots()) + return false; + if (lnode->validSlots() < LeafNodeType::minSlots() && !ignoreMinSlots) + return false; + for (size_t i = 1; i < lnode->validSlots(); ++i) { + if (!comp(lnode->getKey(i - 1), lnode->getKey(i))) { + return false; + } + } + } else { + if (level == 0) { + return false; + } + const InternalNodeType * inode = allocator.mapInternalRef(node); + if (level != inode->getLevel()) { + return false; + } + if (inode->validSlots() > InternalNodeType::maxSlots()) + return false; + if (inode->validSlots() < InternalNodeType::minSlots() && + !ignoreMinSlots) + return false; + size_t lChildren = 0; + size_t iChildren = 0; + uint32_t validLeaves = 0; + for (size_t i = 0; i < inode->validSlots(); ++i) { + if (i > 0 && !comp(inode->getKey(i - 1), inode->getKey(i))) { + return false; + } + const BTreeNode::Ref childRef = inode->getChild(i); + if (!allocator.isValidRef(childRef)) + return false; + validLeaves += allocator.validLeaves(childRef); + if (allocator.isLeafRef(childRef)) + lChildren++; + else + iChildren++; + if (comp(inode->getKey(i), allocator.getLastKey(childRef))) { + return false; + } + if (comp(allocator.getLastKey(childRef), inode->getKey(i))) { + return false; + } + if (!isValid(childRef, false, level - 1, allocator, comp)) { + return false; + } + } + if (validLeaves != inode->validLeaves()) { + return false; + } + if (lChildren < inode->validSlots() && iChildren < inode->validSlots()) { + return false; + } + } + return true; +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +findHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp) +{ + Iterator itr(BTreeNode::Ref(), allocator); + itr.lower_bound(root, key, comp); + if (itr.valid() && comp(key, itr.getKey())) { + itr.setupEnd(); + } + return itr; +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +lowerBoundHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp) +{ + Iterator itr(BTreeNode::Ref(), allocator); + itr.lower_bound(root, key, comp); + return itr; +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +upperBoundHelper(BTreeNode::Ref root, const KeyType & key, + const NodeAllocatorType & allocator, CompareT comp) +{ + Iterator itr(root, allocator); + if (itr.valid() && !comp(key, itr.getKey())) { + itr.seekPast(key, comp); + } + return itr; +} + + +//----------------------- BTreeRoot::FrozenView ----------------------------------// + +template +BTreeRootT:: +FrozenView::FrozenView(BTreeNode::Ref frozenRoot, + const NodeAllocatorType & allocator) : + _frozenRoot(frozenRoot), + _allocator(allocator) +{ +} + +template +typename BTreeRootT::ConstIterator +BTreeRootT:: +FrozenView::find(const KeyType & key, + CompareT comp) const +{ + ConstIterator itr(BTreeNode::Ref(), _allocator); + itr.lower_bound(_frozenRoot, key, comp); + if (itr.valid() && comp(key, itr.getKey())) { + itr.setupEnd(); + } + return itr; +} + +template +typename BTreeRootT::ConstIterator +BTreeRootT:: +FrozenView::lowerBound(const KeyType & key, + CompareT comp) const +{ + ConstIterator itr(BTreeNode::Ref(), _allocator); + itr.lower_bound(_frozenRoot, key, comp); + return itr; +} + +template +typename BTreeRootT::ConstIterator +BTreeRootT:: +FrozenView::upperBound(const KeyType & key, + CompareT comp) const +{ + ConstIterator itr(_frozenRoot, _allocator); + if (itr.valid() && !comp(key, itr.getKey())) { + itr.seekPast(key, comp); + } + return itr; +} + +template +size_t +BTreeRootT:: +FrozenView::size(void) const +{ + if (NodeAllocatorType::isValidRef(_frozenRoot)) { + return _allocator.validLeaves(_frozenRoot); + } + return 0u; +} + +//----------------------- BTreeRoot ----------------------------------------------// + +template +BTreeRootT::BTreeRootT() + : ParentType() +{ +} + +template +BTreeRootT::~BTreeRootT() +{ +} + +template +void +BTreeRootT:: +clear(NodeAllocatorType &allocator) +{ + if (NodeAllocatorType::isValidRef(_root)) { + this->recursiveDelete(_root, allocator); + _root = BTreeNode::Ref(); + if (NodeAllocatorType::isValidRef(getFrozenRootRelaxed())) + allocator.needFreeze(this); + } +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +find(const KeyType & key, const NodeAllocatorType & allocator, + CompareT comp) const +{ + return findHelper(_root, key, allocator, comp); +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +lowerBound(const KeyType & key, const NodeAllocatorType & allocator, + CompareT comp) const +{ + return lowerBoundHelper(_root, key, allocator, comp); +} + +template +typename BTreeRootT::Iterator +BTreeRootT:: +upperBound(const KeyType & key, const NodeAllocatorType & allocator, + CompareT comp) const +{ + return upperBoundHelper(_root, key, allocator, comp); +} + + +template +size_t +BTreeRootT:: +size(const NodeAllocatorType &allocator) const +{ + if (NodeAllocatorType::isValidRef(_root)) { + return allocator.validLeaves(_root); + } + return 0u; +} + + +template +size_t +BTreeRootT:: +frozenSize(const NodeAllocatorType &allocator) const +{ + BTreeNode::Ref frozenRoot = getFrozenRoot(); + if (NodeAllocatorType::isValidRef(frozenRoot)) { + return allocator.validLeaves(frozenRoot); + } + return 0u; +} + + +template +vespalib::string +BTreeRootT:: +toString(const NodeAllocatorType &allocator) const +{ + vespalib::asciistream ss; + if (NodeAllocatorType::isValidRef(_root)) { + ss << "root(" << toString(_root, allocator) << ")"; + } + return ss.str(); +} + +template +bool +BTreeRootT:: +isValid(const NodeAllocatorType &allocator, + CompareT comp) const +{ + if (NodeAllocatorType::isValidRef(_root)) { + uint32_t level = allocator.getLevel(_root); + return isValid(_root, true, level, allocator, comp); + } + return true; +} + + +template +bool +BTreeRootT:: +isValidFrozen(const NodeAllocatorType &allocator, + CompareT comp) const +{ + BTreeNode::Ref frozenRoot = getFrozenRoot(); + if (NodeAllocatorType::isValidRef(frozenRoot)) { + uint32_t level = allocator.getLevel(frozenRoot); + return isValid(frozenRoot, true, level, allocator, comp); + } + return true; +} + + +template +size_t +BTreeRootT:: +bitSize(const NodeAllocatorType &allocator) const +{ + size_t ret = sizeof(BTreeRootT) * 8; + if (NodeAllocatorType::isValidRef(_root)) + ret += bitSize(_root, allocator); + return ret; +} + + +template +size_t +BTreeRootT:: +bitSize(BTreeNode::Ref node, + const NodeAllocatorType &allocator) const +{ + if (allocator.isLeafRef(node)) { + return sizeof(LeafNodeType) * 8; + } else { + size_t ret = sizeof(InternalNodeType) * 8; + const InternalNodeType * inode = allocator.mapInternalRef(node); + size_t slots = inode->validSlots(); + for (size_t i = 0; i < slots; ++i) { + ret += bitSize(inode->getChild(i), allocator); + } + return ret; + } +} + + +template +void +BTreeRootT:: +thaw(Iterator &itr) +{ + bool oldFrozen = isFrozen(); + _root = itr.thaw(_root); + if (oldFrozen && !isFrozen()) + itr.getAllocator().needFreeze(this); +} + + +template +void +BTreeRoot:: +assign(Builder &rhs, + NodeAllocatorType &allocator) +{ + this->clear(allocator); + + bool oldFrozen = isFrozen(); + _root = rhs.handover(); + if (oldFrozen && !isFrozen()) + allocator.needFreeze(this); +} + + +template +bool +BTreeRoot:: +insert(const KeyType & key, const DataType & data, + NodeAllocatorType &allocator, CompareT comp, + const AggrCalcT &aggrCalc) +{ + Iterator itr(BTreeNode::Ref(), allocator); + itr.lower_bound(_root, key, comp); + if (itr.valid() && !comp(key, itr.getKey())) + return false; // Element already exists + insert(itr, key, data, aggrCalc); + return true; +} + + +template +void +BTreeRoot:: +insert(Iterator &itr, + const KeyType &key, const DataType &data, + const AggrCalcT &aggrCalc) +{ + typedef BTreeInserter Inserter; + bool oldFrozen = isFrozen(); + Inserter::insert(_root, itr, key, data, + aggrCalc); + if (oldFrozen && !isFrozen()) + itr.getAllocator().needFreeze(this); +} + + +template +bool +BTreeRoot:: +remove(const KeyType & key, + NodeAllocatorType &allocator, CompareT comp, + const AggrCalcT &aggrCalc) +{ + Iterator itr(BTreeNode::Ref(), allocator); + itr.lower_bound(_root, key, comp); + if (!itr.valid() || comp(key, itr.getKey())) + return false; + remove(itr, aggrCalc); + return true; +} + + +template +void +BTreeRoot:: +remove(Iterator &itr, + const AggrCalcT &aggrCalc) +{ + typedef BTreeRemover + Remover; + bool oldFrozen = isFrozen(); + Remover::remove(_root, itr, aggrCalc); + if (oldFrozen && !isFrozen()) + itr.getAllocator().needFreeze(this); +} + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp b/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp new file mode 100644 index 00000000000..a31ea1206c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.cpp @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreerootbase.h" +#include "btreerootbase.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeRootBase; +template class BTreeRootBase; +template class BTreeRootBase; + +} // namespace btree +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.h b/searchlib/src/vespa/searchlib/btree/btreerootbase.h new file mode 100644 index 00000000000..8d16402a030 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.h @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreetraits.h" +#include "btreenode.h" +#include "btreenodeallocator.h" +#include + +namespace search { +namespace btree { + +template +class BTreeRootBase +{ +protected: + typedef KeyT KeyType; + typedef DataT DataType; + typedef AggrT AggregatedType; + typedef BTreeRootBase + BTreeRootBaseType; + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode LeafNodeType; + typedef BTreeNodeAllocator NodeAllocatorType; + + BTreeNode::Ref _root; + std::atomic _frozenRoot; + + static_assert(sizeof(_root) == sizeof(_frozenRoot), + "BTree root reference size mismatch"); + + BTreeRootBase(void); + + BTreeRootBase(const BTreeRootBase &rhs); + + BTreeRootBase &operator=(const BTreeRootBase &rhs); + + ~BTreeRootBase(void); + +public: + void + freeze(NodeAllocatorType &allocator); + + bool isFrozen() const { + return (_root.ref() == _frozenRoot.load(std::memory_order_relaxed)); + } + + void + setRoot(BTreeNode::Ref newRoot, NodeAllocatorType &allocator) + { + bool oldFrozen = isFrozen(); + _root = newRoot; + if (oldFrozen && !isFrozen()) + allocator.needFreeze(this); + } + + void + setRoots(BTreeNode::Ref newRoot) + { + _root = newRoot; + _frozenRoot = newRoot.ref(); + } + + BTreeNode::Ref + getRoot(void) const + { + return _root; + } + + BTreeNode::Ref + getFrozenRoot(void) const + { + return BTreeNode::Ref(_frozenRoot.load(std::memory_order_acquire)); + } + + BTreeNode::Ref + getFrozenRootRelaxed(void) const + { + return BTreeNode::Ref(_frozenRoot.load(std::memory_order_relaxed)); + } + + const AggrT & + getAggregated(const NodeAllocatorType &allocator) const + { + return allocator.getAggregated(_root); + } + + void + recycle(void) + { + _root = BTreeNode::Ref(); + _frozenRoot = BTreeNode::Ref().ref(); + } + +protected: + void + recursiveDelete(BTreeNode::Ref node, NodeAllocatorType &allocator); +}; + +extern template class BTreeRootBase; +extern template class BTreeRootBase; +extern template class BTreeRootBase; + +} // namespace btree +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp b/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp new file mode 100644 index 00000000000..4641bc6dad7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreerootbase.hpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreerootbase.h" + +namespace search { +namespace btree { + + +template +BTreeRootBase::BTreeRootBase() + : _root(BTreeNode::Ref()), + _frozenRoot(BTreeNode::Ref().ref()) +{ +} + + +template +BTreeRootBase:: +BTreeRootBase(const BTreeRootBase &rhs) + : _root(rhs._root), + _frozenRoot(rhs._frozenRoot.load()) +{ +} + + +template +BTreeRootBase::~BTreeRootBase() +{ + assert(!_root.valid()); +#if 0 + assert(!_frozenRoot.valid()); +#endif +} + + +template +BTreeRootBase & +BTreeRootBase:: +operator=(const BTreeRootBase &rhs) +{ + _root = rhs._root; + _frozenRoot.store(rhs._frozenRoot.load(), std::memory_order_release); + return *this; +} + + +template +void +BTreeRootBase:: +freeze(NodeAllocatorType &allocator) +{ + if (NodeAllocatorType::isValidRef(_root)) { + if (allocator.isLeafRef(_root)) + assert(allocator.mapLeafRef(_root)->getFrozen()); + else + assert(allocator.mapInternalRef(_root)->getFrozen()); + } + _frozenRoot.store(_root.ref(), std::memory_order_release); +} + + +template +void +BTreeRootBase:: +recursiveDelete(BTreeNode::Ref node, + NodeAllocatorType &allocator) +{ + assert(allocator.isValidRef(node)); + if (!allocator.isLeafRef(node)) { + InternalNodeType * inode = allocator.mapInternalRef(node); + for (size_t i = 0; i < inode->validSlots(); ++i) { + recursiveDelete(inode->getChild(i), allocator); + } + allocator.holdNode(node, inode); + } else { + allocator.holdNode(node, allocator.mapLeafRef(node)); + } +} + +} // namespace btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.cpp b/searchlib/src/vespa/searchlib/btree/btreestore.cpp new file mode 100644 index 00000000000..862d05baf55 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreestore.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "btreestore.h" +#include "datastore.h" +#include "btreenode.h" +#include "btreerootbase.h" +#include "btreeroot.h" +#include "btreenodeallocator.h" +#include "btreeiterator.hpp" +#include "btreestore.hpp" + +namespace search +{ + +namespace btree +{ + +template class BTreeStore, + BTreeDefaultTraits>; + +template class BTreeStore, + BTreeDefaultTraits>; + +template class BTreeStore, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.h b/searchlib/src/vespa/searchlib/btree/btreestore.h new file mode 100644 index 00000000000..143a491a725 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreestore.h @@ -0,0 +1,511 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "datastore.h" +#include "btreenode.h" +#include "btreebuilder.h" +#include "btreeroot.h" +#include "noaggrcalc.h" +#include "minmaxaggrcalc.h" + +namespace search +{ + +namespace btree +{ + +template +class BTreeStore +{ +public: + typedef KeyT KeyType; + typedef DataT DataType; + typedef AggrT AggregatedType; + typedef DataStoreT > DataStoreType; + typedef DataStoreType::RefType RefType; + typedef BTreeKeyData KeyDataType; + + typedef BTreeRoot BTreeType; + typedef BTreeInternalNode InternalNodeType; + typedef BTreeLeafNode + LeafNodeType; + typedef std::pair BTreeTypeRefPair; + typedef std::pair KeyDataTypeRefPair; + typedef typename InternalNodeType::RefPair InternalNodeTypeRefPair; + typedef typename LeafNodeType::RefPair LeafNodeTypeRefPair; + typedef vespalib::GenerationHandler::generation_t generation_t; + typedef BTreeNodeAllocator NodeAllocatorType; + typedef typename BTreeType::Iterator Iterator; + typedef typename BTreeType::ConstIterator ConstIterator; + typedef const KeyDataType * AddIter; + typedef const KeyType * RemoveIter; + typedef BTreeBuilder Builder; + + static constexpr uint32_t clusterLimit = 8; + + enum BufferTypes + { + BUFFERTYPE_ARRAY1 = 0, + BUFFERTYPE_ARRAY2 = 1, + BUFFERTYPE_ARRAY3 = 2, + BUFFERTYPE_ARRAY4 = 3, + BUFFERTYPE_ARRAY5 = 4, + BUFFERTYPE_ARRAY6 = 5, + BUFFERTYPE_ARRAY7 = 6, + BUFFERTYPE_ARRAY8 = 7, + BUFFERTYPE_BTREE = 8 + }; +protected: + struct TreeReclaimer { + static void reclaim(BTreeType * tree) { + tree->recycle(); + } + }; + + DataStoreType _store; + BufferType _treeType; + BufferType _small1Type; + BufferType _small2Type; + BufferType _small3Type; + BufferType _small4Type; + BufferType _small5Type; + BufferType _small6Type; + BufferType _small7Type; + BufferType _small8Type; + NodeAllocatorType _allocator; + AggrCalcT _aggrCalc; + Builder _builder; + + BTreeType * getWTreeEntry(RefType ref) { + return _store.getBufferEntry(ref.bufferId(), ref.offset()); + } + +public: + BTreeStore(); + + BTreeStore(bool init); + + ~BTreeStore(void); + + const NodeAllocatorType &getAllocator() const { return _allocator; } + + void + disableFreeLists() { + _store.disableFreeLists(); + _allocator.disableFreeLists(); + } + + void + disableElemHoldList() + { + _store.disableElemHoldList(); + _allocator.disableElemHoldList(); + } + + BTreeTypeRefPair + allocNewBTree(void) { + return _store.allocNewEntry(BUFFERTYPE_BTREE); + } + + BTreeTypeRefPair + allocBTree(void) { + return _store.allocEntry(BUFFERTYPE_BTREE); + } + + BTreeTypeRefPair + allocNewBTreeCopy(const BTreeType &rhs) { + return _store.allocNewEntryCopy(BUFFERTYPE_BTREE, rhs); + } + + BTreeTypeRefPair + allocBTreeCopy(const BTreeType &rhs) { + return _store.allocEntryCopy >( + BUFFERTYPE_BTREE, rhs); + } + + KeyDataTypeRefPair + allocNewKeyData(uint32_t clusterSize); + + KeyDataTypeRefPair + allocKeyData(uint32_t clusterSize); + + KeyDataTypeRefPair + allocNewKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize); + + KeyDataTypeRefPair + allocKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize); + + std::vector + startCompact(void); + + void + finishCompact(const std::vector &toHold); + + + const KeyDataType * + lower_bound(const KeyDataType *b, const KeyDataType *e, + const KeyType &key, CompareT comp); + + void + makeTree(EntryRef &ref, + const KeyDataType *array, uint32_t clusterSize); + + void + makeArray(EntryRef &ref, EntryRef leafRef, LeafNodeType *leafNode); + + bool + insert(EntryRef &ref, + const KeyType &key, const DataType &data, + CompareT comp = CompareT()); + + bool + remove(EntryRef &ref, + const KeyType &key, + CompareT comp = CompareT()); + + uint32_t + getNewClusterSize(const KeyDataType *o, + const KeyDataType *oe, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + void + applyCluster(const KeyDataType *o, + const KeyDataType *oe, + KeyDataType *d, + const KeyDataType *de, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + + void + applyModifyTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + void + applyBuildTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + void + applyNewArray(EntryRef &ref, + AddIter aOrg, + AddIter ae); + + void + applyNewTree(EntryRef &ref, + AddIter a, + AddIter ae, + CompareT comp); + + void + applyNew(EntryRef &ref, + AddIter a, + AddIter ae, + CompareT comp); + + + bool + applyCluster(EntryRef &ref, + uint32_t clusterSize, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + void + applyTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp); + + void + normalizeTree(EntryRef &ref, + BTreeType *tree, + bool wasArray); + /** + * Apply multiple changes at once. + * + * additions and removals should be sorted on key without duplicates. + * Overlap between additions and removals indicates updates. + */ + void + apply(EntryRef &ref, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp = CompareT()); + + void + clear(const EntryRef ref); + + size_t + size(const EntryRef ref) const; + + size_t + frozenSize(const EntryRef ref) const; + + Iterator + begin(const EntryRef ref) const; + + ConstIterator + beginFrozen(const EntryRef ref) const; + + void + beginFrozen(const EntryRef ref, std::vector &where) const; + + uint32_t + getTypeId(RefType ref) const + { + return _store.getBufferState(ref.bufferId()).getTypeId(); + } + + static bool + isSmallArray(uint32_t typeId) + { + return typeId < clusterLimit; + } + + bool + isSmallArray(const EntryRef ref) const; + + /** + * Returns the cluster size for the type id. + * Cluster size == 0 means we have a tree for the given reference. + * The reference must be valid. + **/ + static uint32_t + getClusterSize(uint32_t typeId) + { + return (typeId < clusterLimit) ? typeId + 1 : 0; + } + + /** + * Returns the cluster size for the entry pointed to by the given reference. + * Cluster size == 0 means we have a tree for the given reference. + * The reference must be valid. + **/ + uint32_t + getClusterSize(RefType ref) const + { + return getClusterSize(getTypeId(ref)); + } + + const BTreeType * getTreeEntry(RefType ref) const { + return _store.getBufferEntry(ref.bufferId(), ref.offset()); + } + + const KeyDataType * getKeyDataEntry(RefType ref, uint32_t clusterSize) const { + return _store.getBufferEntry(ref.bufferId(), ref.offset() * clusterSize); + } + + void freeze() { + _allocator.freeze(); + } + + // Inherit doc from DataStoreBase + void + trimHoldLists(generation_t usedGen) + { + _allocator.trimHoldLists(usedGen); + _store.trimHoldLists(usedGen); + } + + // Inherit doc from DataStoreBase + void + transferHoldLists(generation_t generation) + { + _allocator.transferHoldLists(generation); + _store.transferHoldLists(generation); + } + + void + clearHoldLists(void) + { + _allocator.clearHoldLists(); + _store.clearHoldLists(); + } + + + // Inherit doc from DataStoreBase + MemoryUsage getMemoryUsage() const { + MemoryUsage usage; + usage.merge(_allocator.getMemoryUsage()); + usage.merge(_store.getMemoryUsage()); + return usage; + } + + void + clearBuilder(void) + { + _builder.clear(); + } + + AggregatedType + getAggregated(const EntryRef ref) const; + + template + void + foreach_unfrozen_key(EntryRef ref, FunctionType func) const; + + template + void + foreach_frozen_key(EntryRef ref, FunctionType func) const; + + template + void + foreach_unfrozen(EntryRef ref, FunctionType func) const; + + template + void + foreach_frozen(EntryRef ref, FunctionType func) const; + +private: + static constexpr size_t MIN_CLUSTERS = 128u; + template + void + foreach_key(EntryRef ref, FunctionType func) const; + + template + void + foreach(EntryRef ref, FunctionType func) const; +}; + +template +template +void +BTreeStore:: +foreach_unfrozen_key(EntryRef ref, FunctionType func) const { + foreach_key(ref, func); +} + +template +template +void +BTreeStore:: +foreach_frozen_key(EntryRef ref, FunctionType func) const +{ + foreach_key(ref, func); +} + +template +template +void +BTreeStore:: +foreach_unfrozen(EntryRef ref, FunctionType func) const +{ + foreach(ref, func); +} + + +template +template +void +BTreeStore:: +foreach_frozen(EntryRef ref, FunctionType func) const +{ + foreach(ref, func); +} + +template +template +void +BTreeStore:: +foreach_key(EntryRef ref, FunctionType func) const +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + _allocator.getNodeStore().foreach_key(Frozen ? tree->getFrozenRoot() : tree->getRoot(), func); + } else { + const KeyDataType *p = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *pe = p + clusterSize; + for (; p != pe; ++p) { + func(p->_key); + } + } +} + +template +template +void +BTreeStore:: +foreach(EntryRef ref, FunctionType func) const +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + _allocator.getNodeStore().foreach(Frozen ? tree->getFrozenRoot() : tree->getRoot(), func); + } else { + const KeyDataType *p = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *pe = p + clusterSize; + for (; p != pe; ++p) { + func(p->_key, p->getData()); + } + } +} + + +extern template class BTreeStore, + BTreeDefaultTraits>; + +extern template class BTreeStore, + BTreeDefaultTraits>; + +extern template class BTreeStore, + BTreeDefaultTraits, + MinMaxAggrCalc>; + +} // namespace btree + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/btree/btreestore.hpp b/searchlib/src/vespa/searchlib/btree/btreestore.hpp new file mode 100644 index 00000000000..f6223d93731 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreestore.hpp @@ -0,0 +1,1005 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "btreestore.h" +#include +#include "btreebuilder.h" +#include "btreebuilder.hpp" +#include "datastore.hpp" + +namespace search +{ + +namespace btree +{ + +template +BTreeStore:: +BTreeStore() + : BTreeStore(true) +{ +} + +template +BTreeStore:: +BTreeStore(bool init) + : _store(), + _treeType(1, MIN_CLUSTERS, RefType::offsetSize()), + _small1Type(1, MIN_CLUSTERS, RefType::offsetSize()), + _small2Type(2, MIN_CLUSTERS, RefType::offsetSize()), + _small3Type(3, MIN_CLUSTERS, RefType::offsetSize()), + _small4Type(4, MIN_CLUSTERS, RefType::offsetSize()), + _small5Type(5, MIN_CLUSTERS, RefType::offsetSize()), + _small6Type(6, MIN_CLUSTERS, RefType::offsetSize()), + _small7Type(7, MIN_CLUSTERS, RefType::offsetSize()), + _small8Type(8, MIN_CLUSTERS, RefType::offsetSize()), + _allocator(), + _aggrCalc(), + _builder(_allocator, _aggrCalc) +{ + // XXX: order here makes typeId + 1 == clusterSize for small arrays, + // code elsewhere depends on it. + _store.addType(&_small1Type); + _store.addType(&_small2Type); + _store.addType(&_small3Type); + _store.addType(&_small4Type); + _store.addType(&_small5Type); + _store.addType(&_small6Type); + _store.addType(&_small7Type); + _store.addType(&_small8Type); + _store.addType(&_treeType); + if (init) { + _store.initActiveBuffers(); + _store.enableFreeLists(); + } +} + + +template +BTreeStore::~BTreeStore(void) +{ + _builder.clear(); + _store.dropBuffers(); // Drop buffers before type handlers are dropped +} + + +template +typename BTreeStore:: +KeyDataTypeRefPair +BTreeStore:: +allocNewKeyData(uint32_t clusterSize) +{ + assert(clusterSize >= 1 && clusterSize <= clusterLimit); + uint32_t typeId = clusterSize - 1; + _store.ensureBufferCapacity(typeId, clusterSize); + uint32_t activeBufferId = _store.getActiveBufferId(typeId); + BufferState &state = _store.getBufferState(activeBufferId); + assert(state._state == BufferState::ACTIVE); + size_t oldSize = state.size(); + KeyDataType *node = + _store.getBufferEntry(activeBufferId, oldSize); + for (uint32_t i = 0; i < clusterSize; ++i) + new (static_cast(node + i)) KeyDataType(); + state.pushed_back(clusterSize); + return std::make_pair(RefType(oldSize / clusterSize, activeBufferId), + node); +} + + +template +typename BTreeStore:: +KeyDataTypeRefPair +BTreeStore:: +allocKeyData(uint32_t clusterSize) +{ + assert(clusterSize >= 1 && clusterSize <= clusterLimit); + uint32_t typeId = clusterSize - 1; + BufferState::FreeListList &freeListList = _store.getFreeList(typeId); + if (freeListList._head == NULL) + return allocNewKeyData(clusterSize); + BufferState &state = *freeListList._head; + assert(state._state == BufferState::ACTIVE); + RefType ref(state.popFreeList()); + KeyDataType *node = + _store.getBufferEntry(ref.bufferId(), + ref.offset() * clusterSize); + return std::make_pair(ref, node); +} + + +template +typename BTreeStore:: +KeyDataTypeRefPair +BTreeStore:: +allocNewKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize) +{ + assert(clusterSize >= 1 && clusterSize <= clusterLimit); + uint32_t typeId = clusterSize - 1; + _store.ensureBufferCapacity(typeId, clusterSize); + uint32_t activeBufferId = _store.getActiveBufferId(typeId); + BufferState &state = _store.getBufferState(activeBufferId); + assert(state._state == BufferState::ACTIVE); + size_t oldSize = state.size(); + KeyDataType *node = + _store.getBufferEntry(activeBufferId, oldSize); + for (uint32_t i = 0; i < clusterSize; ++i) + new (static_cast(node + i)) KeyDataType(*(rhs + i)); + state.pushed_back(clusterSize); + return std::make_pair(RefType(oldSize / clusterSize, activeBufferId), + node); +} + + +template +typename BTreeStore:: +KeyDataTypeRefPair +BTreeStore:: +allocKeyDataCopy(const KeyDataType *rhs, uint32_t clusterSize) +{ + assert(clusterSize >= 1 && clusterSize <= clusterLimit); + uint32_t typeId = clusterSize - 1; + BufferState::FreeListList &freeListList = _store.getFreeList(typeId); + if (freeListList._head == NULL) + return allocNewKeyDataCopy(rhs, clusterSize); + BufferState &state = *freeListList._head; + assert(state._state == BufferState::ACTIVE); + RefType ref(state.popFreeList()); + KeyDataType *node = + _store.getBufferEntry(ref.bufferId(), + ref.offset() * clusterSize); + for (uint32_t i = 0; i < clusterSize; ++i) + *(node + i) = *(rhs + i); + return std::make_pair(ref, node); +} + + +template +std::vector +BTreeStore::startCompact(void) +{ + std::vector ret = _store.startCompact(clusterLimit); + for (uint32_t clusterSize = 1; clusterSize <= clusterLimit; ++clusterSize) { + uint32_t typeId = clusterSize - 1; + std::vector toHold = _store.startCompact(typeId); + for (auto i : toHold) { + ret.push_back(i); + } + } + return ret; +} + + +template +void +BTreeStore:: +finishCompact(const std::vector &toHold) +{ + _store.finishCompact(toHold); +} + + +template +const typename BTreeStore:: +KeyDataType * +BTreeStore:: +lower_bound(const KeyDataType *b, const KeyDataType *e, + const KeyType &key, CompareT comp) +{ + const KeyDataType *i = b; + for (; i != e; ++i) { + if (!comp(i->_key, key)) + break; + } + return i; +} + + +template +void +BTreeStore:: +makeTree(EntryRef &ref, + const KeyDataType *array, uint32_t clusterSize) +{ + LeafNodeTypeRefPair lPair(_allocator.allocLeafNode()); + LeafNodeType *lNode = lPair.second; + lNode->setValidSlots(clusterSize); + const KeyDataType *o = array; + for (uint32_t idx = 0; idx < clusterSize; ++idx, ++o) { + lNode->update(idx, o->_key, o->getData()); + } + typedef BTreeAggregator Aggregator; + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*lNode, _aggrCalc); + } + lNode->freeze(); + BTreeTypeRefPair tPair(allocBTree()); + tPair.second->setRoots(lPair.first); + _store.holdElem(ref, clusterSize); + ref = tPair.first; +} + + +template +void +BTreeStore:: +makeArray(EntryRef &ref, EntryRef root, LeafNodeType *leafNode) +{ + uint32_t clusterSize = leafNode->validSlots(); + KeyDataTypeRefPair kPair(allocKeyData(clusterSize)); + KeyDataType *kd = kPair.second; + // Copy whole leaf node + for (uint32_t idx = 0; idx < clusterSize; ++idx, ++kd) { + kd->_key = leafNode->getKey(idx); + kd->setData(leafNode->getData(idx)); + } + assert(kd == kPair.second + clusterSize); + _store.holdElem(ref, 1); + if (!leafNode->getFrozen()) { + leafNode->freeze(); + } + _allocator.holdNode(root, leafNode); + ref = kPair.first; +} + + +template +bool +BTreeStore:: +insert(EntryRef &ref, + const KeyType &key, const DataType &data, + CompareT comp) +{ +#ifdef FORCE_APPLY + bool retVal = true; + if (ref.valid()) { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + Iterator itr = tree->find(key, _allocator, comp); + if (itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi < olde && !comp(key, oldi->_key)) + retVal = false; // key already present + } + } + KeyDataType addition(key, data); + if (retVal) { + apply(ref, &addition, &addition+1, NULL, NULL, comp); + } + return retVal; +#else + if (!ref.valid()) { + KeyDataTypeRefPair kPair(allocKeyData(1)); + KeyDataType *kd = kPair.second; + kd->_key = key; + kd->setData(data); + ref = kPair.first; + return true; + } + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + BTreeType *tree = getWTreeEntry(iRef); + return tree->insert(key, data, _allocator, comp, _aggrCalc); + } + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi < olde && !comp(key, oldi->_key)) + return false; // key already present + if (clusterSize < clusterLimit) { + // Grow array + KeyDataTypeRefPair kPair(allocKeyData(clusterSize + 1)); + KeyDataType *kd = kPair.second; + // Copy data before key + for (const KeyDataType *i = old; i != oldi; ++i, ++kd) { + kd->_key = i->_key; + kd->setData(i->getData()); + } + // Copy key + kd->_key = key; + kd->setData(data); + ++kd; + // Copy data after key + for (const KeyDataType *i = oldi; i != olde; ++i, ++kd) { + kd->_key = i->_key; + kd->setData(i->getData()); + } + assert(kd == kPair.second + clusterSize + 1); + _store.holdElem(ref, clusterSize); + ref = kPair.first; + return true; + } + // Convert from short array to tree + LeafNodeTypeRefPair lPair(_allocator.allocLeafNode()); + LeafNodeType *lNode = lPair.second; + uint32_t idx = 0; + lNode->setValidSlots(clusterSize + 1); + // Copy data before key + for (const KeyDataType *i = old; i != oldi; ++i, ++idx) { + lNode->update(idx, i->_key, i->getData()); + } + // Copy key + lNode->update(idx, key, data); + ++idx; + // Copy data after key + for (const KeyDataType *i = oldi; i != olde; ++i, ++idx) { + lNode->update(idx, i->_key, i->getData()); + } + assert(idx == clusterSize + 1); + typedef BTreeAggregator Aggregator; + if (AggrCalcT::hasAggregated()) { + Aggregator::recalc(*lNode, _aggrCalc); + } + lNode->freeze(); + BTreeTypeRefPair tPair(allocBTree()); + tPair.second->setRoots(lPair.first); // allow immediate access to readers + _store.holdElem(ref, clusterSize); + ref = tPair.first; + return true; +#endif +} + + +template +bool +BTreeStore:: +remove(EntryRef &ref, + const KeyType &key, + CompareT comp) +{ +#ifdef FORCE_APPLY + bool retVal = true; + if (!ref.valid()) + retVal = false; // not found + else { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + Iterator itr = tree->find(key, _allocator, comp); + if (!itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi == olde || comp(key, oldi->_key)) + retVal = false; // not found + } + } + std::vector additions; + std::vector removals; + removals.push_back(key); + apply(ref, + &additions[0], &additions[additions.size()], + &removals[0], &removals[removals.size()], + comp); + return retVal; +#else + if (!ref.valid()) + return false; // not found + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize != 0) { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi == olde || comp(key, oldi->_key)) + return false; // not found + if (clusterSize == 1) { + _store.holdElem(ref, 1); + ref = EntryRef(); + return true; + } + // Copy to smaller array + KeyDataTypeRefPair kPair(allocKeyData(clusterSize - 1)); + KeyDataType *kd = kPair.second; + // Copy data before key + for (const KeyDataType *i = old; i != oldi; ++i, ++kd) { + kd->_key = i->_key; + kd->setData(i->getData()); + } + // Copy data after key + for (const KeyDataType *i = oldi + 1; i != olde; ++i, ++kd) { + kd->_key = i->_key; + kd->setData(i->getData()); + } + assert(kd == kPair.second + clusterSize - 1); + _store.holdElem(ref, clusterSize); + ref = kPair.first; + return true; + } + BTreeType *tree = getWTreeEntry(iRef); + if (!tree->remove(key, _allocator, comp, _aggrCalc)) + return false; // not found + EntryRef root = tree->getRoot(); + assert(NodeAllocatorType::isValidRef(root)); + if (!_allocator.isLeafRef(root)) + return true; + LeafNodeType *lNode = _allocator.mapLeafRef(root); + clusterSize = lNode->validSlots(); + assert(clusterSize > 0); + if (clusterSize > clusterLimit) + return true; + // Convert from tree to short array + makeArray(ref, root, lNode); + return true; +#endif +} + + +template +uint32_t +BTreeStore:: +getNewClusterSize(const KeyDataType *o, + const KeyDataType *oe, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + uint32_t d = 0u; + if (o == oe && a == ae) + return 0u; + while (a != ae || r != re) { + if (r != re && (a == ae || comp(*r, a->_key))) { + // remove + while (o != oe && comp(o->_key, *r)) { + ++d; + ++o; + } + if (o != oe && !comp(*r, o->_key)) + ++o; + ++r; + } else { + // add or update + while (o != oe && comp(o->_key, a->_key)) { + ++d; + ++o; + } + if (o != oe && !comp(a->_key, o->_key)) + ++o; + ++d; + if (r != re && !comp(a->_key, *r)) + ++r; + ++a; + } + } + while (o != oe) { + ++d; + ++o; + } + return d; +} + + +template +void +BTreeStore:: +applyCluster(const KeyDataType *o, + const KeyDataType *oe, + KeyDataType *d, + const KeyDataType *de, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + while (a != ae || r != re) { + if (r != re && (a == ae || comp(*r, a->_key))) { + // remove + while (o != oe && comp(o->_key, *r)) { + d->_key = o->_key; + d->setData(o->getData()); + ++d; + ++o; + } + if (o != oe && !comp(*r, o->_key)) + ++o; + ++r; + } else { + // add or update + while (o != oe && comp(o->_key, a->_key)) { + d->_key = o->_key; + d->setData(o->getData()); + ++d; + ++o; + } + if (o != oe && !comp(a->_key, o->_key)) + ++o; + d->_key = a->_key; + d->setData(a->getData()); + ++d; + if (r != re && !comp(a->_key, *r)) + ++r; + ++a; + } + } + while (o != oe) { + d->_key = o->_key; + d->setData(o->getData()); + ++d; + ++o; + } + assert(d == de); + (void) de; +} + + +template +void +BTreeStore:: +applyModifyTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + if (a == ae && r == re) + return; + Iterator itr(BTreeNode::Ref(), _allocator); + itr.lower_bound(tree->getRoot(), + (a != ae && r != re) ? (comp(a->_key, *r) ? a->_key : *r) : + ((a != ae) ? a->_key : *r), + comp); + while (a != ae || r != re) { + if (r != re && (a == ae || comp(*r, a->_key))) { + // remove + if (itr.valid() && comp(itr.getKey(), *r)) { + itr.binarySeek(*r, comp); + } + if (itr.valid() && !comp(*r, itr.getKey())) { + tree->remove(itr, _aggrCalc); + } + ++r; + } else { + // update or add + if (itr.valid() && comp(itr.getKey(), a->_key)) { + itr.binarySeek(a->_key, comp); + } + if (itr.valid() && !comp(a->_key, itr.getKey())) { + tree->thaw(itr); + itr.updateData(a->getData(), _aggrCalc); + } else { + tree->insert(itr, a->_key, a->getData(), _aggrCalc); + } + if (r != re && !comp(a->_key, *r)) { + ++r; + } + ++a; + } + } +} + + +template +void +BTreeStore:: +applyBuildTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + Iterator itr = tree->begin(_allocator); + Builder &builder = _builder; + builder.reuse(); + while (a != ae || r != re) { + if (r != re && (a == ae || comp(*r, a->_key))) { + // remove + while (itr.valid() && comp(itr.getKey(), *r)) { + builder.insert(itr.getKey(), itr.getData()); + ++itr; + } + if (itr.valid() && !comp(*r, itr.getKey())) + ++itr; + ++r; + } else { + // add or update + while (itr.valid() && comp(itr.getKey(), a->_key)) { + builder.insert(itr.getKey(), itr.getData()); + ++itr; + } + if (itr.valid() && !comp(a->_key, itr.getKey())) + ++itr; + builder.insert(a->_key, a->getData()); + if (r != re && !comp(a->_key, *r)) + ++r; + ++a; + } + } + while (itr.valid()) { + builder.insert(itr.getKey(), itr.getData()); + ++itr; + } + tree->assign(builder, _allocator); +} + + +template +void +BTreeStore:: +applyNewArray(EntryRef &ref, + AddIter aOrg, + AddIter ae) +{ + assert(!ref.valid()); + if (aOrg == ae) { + // No new data + return; + } + size_t additionSize(ae - aOrg); + uint32_t clusterSize = additionSize; + assert(clusterSize <= clusterLimit); + KeyDataTypeRefPair kPair(allocKeyData(clusterSize)); + KeyDataType *kd = kPair.second; + AddIter a = aOrg; + for (;a != ae; ++a, ++kd) { + kd->_key = a->_key; + kd->setData(a->getData()); + } + assert(kd == kPair.second + clusterSize); + assert(a == ae); + ref = kPair.first; + } + + +template +void +BTreeStore:: +applyNewTree(EntryRef &ref, + AddIter a, + AddIter ae, + CompareT comp) +{ + assert(!ref.valid()); + size_t additionSize(ae - a); + BTreeTypeRefPair tPair(allocBTree()); + BTreeType *tree = tPair.second; + applyBuildTree(tree, a, ae, nullptr, nullptr, comp); + assert(tree->size(_allocator) == additionSize); + ref = tPair.first; +} + + +template +void +BTreeStore:: +applyNew(EntryRef &ref, + AddIter a, + AddIter ae, + CompareT comp) +{ + // No old data + assert(!ref.valid()); + size_t additionSize(ae - a); + uint32_t clusterSize = additionSize; + if (clusterSize <= clusterLimit) { + applyNewArray(ref, a, ae); + } else { + applyNewTree(ref, a, ae, comp); + } +} + + +template +bool +BTreeStore:: +applyCluster(EntryRef &ref, + uint32_t clusterSize, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + size_t additionSize(ae - a); + size_t removeSize(re - r); + uint32_t newSizeMin = + std::max(clusterSize, + static_cast(additionSize)) - + std::min(clusterSize, static_cast(removeSize)); + RefType iRef(ref); + const KeyDataType *ob = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *oe = ob + clusterSize; + if (newSizeMin <= clusterLimit) { + uint32_t newSize = getNewClusterSize(ob, oe, a, ae, r, re, comp); + if (newSize == 0) { + _store.holdElem(ref, clusterSize); + ref = EntryRef(); + return true; + } + if (newSize <= clusterLimit) { + KeyDataTypeRefPair kPair(allocKeyData(newSize)); + applyCluster(ob, oe, kPair.second, kPair.second + newSize, + a, ae, r, re, comp); + _store.holdElem(ref, clusterSize); + ref = kPair.first; + return true; + } + } + // Convert from short array to tree + makeTree(ref, ob, clusterSize); + return false; +} + + +template +void +BTreeStore:: +applyTree(BTreeType *tree, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + // Old data was tree or has been converted to a tree + uint32_t treeSize = tree->size(_allocator); + size_t additionSize(ae - a); + size_t removeSize(re - r); + uint64_t buildCost = treeSize * 2 + additionSize; + typedef bitcompression::EncodeContext64BE EC; + uint64_t modifyCost = (EC::log2(treeSize + additionSize) + 1) * + (additionSize + removeSize); + if (modifyCost < buildCost) + applyModifyTree(tree, a, ae, r, re, comp); + else + applyBuildTree(tree, a, ae, r, re, comp); +} + + +template +void +BTreeStore:: +normalizeTree(EntryRef &ref, + BTreeType *tree, + bool wasArray) +{ + EntryRef root = tree->getRoot(); + if (!NodeAllocatorType::isValidRef(root)) { + _store.holdElem(ref, 1); + ref = EntryRef(); + return; + } + if (!_allocator.isLeafRef(root)) + return; + LeafNodeType *lNode = _allocator.mapLeafRef(root); + uint32_t treeSize = lNode->validSlots(); + assert(treeSize > 0); + if (treeSize > clusterLimit) + return; + assert(!wasArray); // Should never have used tree + (void) wasArray; + // Convert from tree to short array + makeArray(ref, root, lNode); +} + + +template +void +BTreeStore:: +apply(EntryRef &ref, + AddIter a, + AddIter ae, + RemoveIter r, + RemoveIter re, + CompareT comp) +{ + if (!ref.valid()) { + // No old data + applyNew(ref, a, ae, comp); + return; + } + RefType iRef(ref); + bool wasArray = false; + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize != 0) { + wasArray = true; + if (applyCluster(ref, clusterSize, a, ae, r, re, comp)) + return; + iRef = ref; + } + // Old data was tree or has been converted to a tree + BTreeType *tree = getWTreeEntry(iRef); + applyTree(tree, a, ae, r, re, comp); + normalizeTree(ref, tree, wasArray); +} + + +template +void +BTreeStore:: +clear(const EntryRef ref) +{ + if (!ref.valid()) + return; + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + BTreeType *tree = getWTreeEntry(iRef); + tree->clear(_allocator); + _store.holdElem(ref, 1); + } else { + _store.holdElem(ref, clusterSize); + } +} + + +template +size_t +BTreeStore:: +size(const EntryRef ref) const +{ + if (!ref.valid()) + return 0; + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + return tree->size(_allocator); + } + return clusterSize; +} + + +template +size_t +BTreeStore:: +frozenSize(const EntryRef ref) const +{ + if (!ref.valid()) + return 0; + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + return tree->frozenSize(_allocator); + } + return clusterSize; +} + + +template +bool +BTreeStore:: +isSmallArray(const EntryRef ref) const +{ + if (!ref.valid()) + return true; + RefType iRef(ref); + uint32_t typeId(_store.getBufferState(iRef.bufferId()).getTypeId()); + return typeId < clusterLimit; +} + + +template +typename BTreeStore:: +Iterator +BTreeStore:: +begin(const EntryRef ref) const +{ + if (!ref.valid()) + return Iterator(); + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + return tree->begin(_allocator); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + return Iterator(shortArray, clusterSize, _allocator, _aggrCalc); +} + + +template +typename BTreeStore:: +ConstIterator +BTreeStore:: +beginFrozen(const EntryRef ref) const +{ + if (!ref.valid()) + return ConstIterator(); + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + return tree->getFrozenView(_allocator).begin(); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + return ConstIterator(shortArray, clusterSize, _allocator, _aggrCalc); +} + +template +void +BTreeStore:: +beginFrozen(const EntryRef ref, std::vector &where) const +{ + if (!ref.valid()) { + where.emplace_back(); + return; + } + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + tree->getFrozenView(_allocator).begin(where); + return; + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + where.emplace_back(shortArray, clusterSize, _allocator, _aggrCalc); +} + +template +typename BTreeStore:: +AggregatedType +BTreeStore:: +getAggregated(const EntryRef ref) const +{ + if (!ref.valid()) + return AggregatedType(); + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + return tree->getAggregated(_allocator); + } + const KeyDataType *shortArray = getKeyDataEntry(iRef, clusterSize); + AggregatedType a; + for (uint32_t i = 0; i < clusterSize; ++i) { + _aggrCalc.add(a, _aggrCalc.getVal(shortArray[i].getData())); + } + return a; +} + +} // namespace btree + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/btree/btreetraits.h b/searchlib/src/vespa/searchlib/btree/btreetraits.h new file mode 100644 index 00000000000..3b3962fb7ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/btreetraits.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace btree +{ + +template +struct BTreeTraits { + static const size_t LEAF_SLOTS = LS; + static const size_t INTERNAL_SLOTS = IS; + static const size_t PATH_SIZE = PS; + static const bool BINARY_SEEK = BS; +}; + +typedef BTreeTraits<16, 16, 10, true> BTreeDefaultTraits; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/bufferstate.cpp b/searchlib/src/vespa/searchlib/btree/bufferstate.cpp new file mode 100644 index 00000000000..21f548187ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/bufferstate.cpp @@ -0,0 +1,351 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "bufferstate.h" +#include + +namespace search +{ + +namespace btree +{ + + +BufferTypeBase::BufferTypeBase(uint32_t clusterSize, + uint32_t minClusters, + uint32_t maxClusters) + : _clusterSize(clusterSize), + _minClusters(std::min(minClusters, maxClusters)), + _maxClusters(maxClusters), + _activeBuffers(0), + _holdBuffers(0), + _activeUsedElems(0), + _holdUsedElems(0), + _lastUsedElems(NULL) +{ +} + + +BufferTypeBase::~BufferTypeBase(void) +{ + assert(_activeBuffers == 0); + assert(_holdBuffers == 0); + assert(_activeUsedElems == 0); + assert(_holdUsedElems == 0); + assert(_lastUsedElems == NULL); +} + + +void +BufferTypeBase::flushLastUsed(void) +{ + if (_lastUsedElems != NULL) { + _activeUsedElems += *_lastUsedElems; + _lastUsedElems = NULL; + } +} + + +void +BufferTypeBase::onActive(const size_t *usedElems) +{ + flushLastUsed(); + ++_activeBuffers; + _lastUsedElems = usedElems; +} + + +void +BufferTypeBase::onHold(const size_t *usedElems) +{ + if (usedElems == _lastUsedElems) + flushLastUsed(); + --_activeBuffers; + ++_holdBuffers; + assert(_activeUsedElems >= *usedElems); + _activeUsedElems -= *usedElems; + _holdUsedElems += *usedElems; +} + + +void +BufferTypeBase::onFree(size_t usedElems) +{ + --_holdBuffers; + assert(_holdUsedElems >= usedElems); + _holdUsedElems -= usedElems; +} + + +size_t +BufferTypeBase::calcClustersToAlloc(size_t sizeNeeded, + uint64_t clusterRefSize) const +{ + size_t usedElems = _activeUsedElems; + if (_lastUsedElems != NULL) + usedElems += *_lastUsedElems; + assert((usedElems % _clusterSize) == 0); + uint64_t maxClusters = std::numeric_limits::max() / _clusterSize; + uint64_t maxClusters2 = clusterRefSize; + if (maxClusters > maxClusters2) + maxClusters = maxClusters2; + if (maxClusters > _maxClusters) + maxClusters = _maxClusters; + uint32_t minClusters = _minClusters; + if (minClusters > maxClusters) + minClusters = maxClusters; + size_t usedClusters = usedElems / _clusterSize; + size_t needClusters = (sizeNeeded + _clusterSize - 1) / _clusterSize; + uint64_t wantClusters = usedClusters + minClusters; + if (wantClusters < needClusters) + wantClusters = needClusters; + if (wantClusters > maxClusters) + wantClusters = maxClusters; + return wantClusters; +} + + +BufferState::FreeListList::~FreeListList(void) +{ + assert(_head == NULL); // Owner should have disabled free lists +} + + +BufferState::BufferState(void) + : _usedElems(0), + _allocElems(0), + _deadElems(0u), + _state(FREE), + _disableElemHoldList(false), + _holdElems(0u), + _freeList(), + _freeListList(NULL), + _nextHasFree(NULL), + _prevHasFree(NULL), + _typeHandler(NULL), + _typeId(0), + _clusterSize(0), + _compacting(false), + _buffer() +{ + _buffer.reset(new Alloc()); +} + + +BufferState::~BufferState(void) +{ + assert(_state == FREE); + assert(_freeListList == NULL); + assert(_nextHasFree == NULL); + assert(_prevHasFree == NULL); + assert(_holdElems == 0); + assert(_freeList.empty()); +} + + +void +BufferState::onActive(uint32_t bufferId, uint32_t typeId, + BufferTypeBase *typeHandler, + size_t sizeNeeded, + size_t maxClusters, + void *&buffer) +{ + assert(buffer == NULL); + assert(_buffer->get() == NULL); + assert(_state == FREE); + assert(_typeHandler == NULL); + assert(_allocElems == 0); + assert(_usedElems == 0); + assert(_deadElems == 0u); + assert(_holdElems == 0); + assert(_freeList.empty()); + assert(_nextHasFree == NULL); + assert(_prevHasFree == NULL); + assert(_freeListList == NULL || _freeListList->_head != this); + + size_t initialSizeNeeded = 0; + if (bufferId == 0) + initialSizeNeeded = typeHandler->getClusterSize(); + size_t allocClusters = + typeHandler->calcClustersToAlloc(initialSizeNeeded + sizeNeeded, + maxClusters); + size_t allocSize = allocClusters * typeHandler->getClusterSize(); + assert(allocSize >= initialSizeNeeded + sizeNeeded); + _buffer.reset(new Alloc(allocSize * typeHandler->elementSize())); + buffer = _buffer->get(); + typeHandler->onActive(&_usedElems); + assert(buffer != NULL); + _allocElems = allocSize; + _state = ACTIVE; + _typeHandler = typeHandler; + _typeId = typeId; + _clusterSize = _typeHandler->getClusterSize(); + if (bufferId == 0) { + typeHandler->cleanInitialElements(buffer); + pushed_back(_clusterSize); + _deadElems = _clusterSize; + } +} + + +void +BufferState::onHold(void) +{ + assert(_state == ACTIVE); + assert(_typeHandler != NULL); + _state = HOLD; + _compacting = false; + assert(_deadElems <= _usedElems); + assert(_holdElems <= (_usedElems - _deadElems)); + _holdElems = _usedElems - _deadElems; // Put everyting not dead on hold + _typeHandler->onHold(&_usedElems); + if (!_freeList.empty()) { + removeFromFreeListList(); + FreeList().swap(_freeList); + } + assert(_nextHasFree == NULL); + assert(_prevHasFree == NULL); + assert(_freeListList == NULL || _freeListList->_head != this); + setFreeListList(NULL); +} + + +void +BufferState::onFree(void *&buffer) +{ + assert(buffer == _buffer->get()); + assert(_state == HOLD); + assert(_typeHandler != NULL); + assert(_deadElems <= _usedElems); + assert(_holdElems == _usedElems - _deadElems); + _typeHandler->destroyElements(buffer, _usedElems); + Alloc().swap(*_buffer); + _typeHandler->onFree(_usedElems); + buffer = NULL; + _usedElems = 0; + _allocElems = 0; + _deadElems = 0u; + _holdElems = 0u; + _state = FREE; + _typeHandler = NULL; + _clusterSize = 0; + assert(_freeList.empty()); + assert(_nextHasFree == NULL); + assert(_prevHasFree == NULL); + assert(_freeListList == NULL || _freeListList->_head != this); + setFreeListList(NULL); + _disableElemHoldList = false; +} + + +void +BufferState::dropBuffer(void *&buffer) +{ + if (_state == FREE) { + assert(buffer == NULL); + return; + } + assert(buffer != NULL); + if (_state == ACTIVE) + onHold(); + if (_state == HOLD) + onFree(buffer); + assert(_state == FREE); + assert(buffer == NULL); +} + + +void +BufferState::setFreeListList(FreeListList *freeListList) +{ + if (_state == FREE && freeListList != NULL) + return; + if (freeListList == _freeListList) + return; // No change + if (_freeListList != NULL && !_freeList.empty()) + removeFromFreeListList(); // Remove from old free list + _freeListList = freeListList; + if (!_freeList.empty()) { + if (freeListList != NULL) + addToFreeListList(); // Changed free list list + else + FreeList().swap(_freeList); // Free lists have been disabled + } +} + + +void +BufferState::addToFreeListList(void) +{ + assert(_freeListList != NULL && _freeListList->_head != this); + assert(_nextHasFree == NULL); + assert(_prevHasFree == NULL); + if (_freeListList->_head != NULL) { + _nextHasFree = _freeListList->_head; + _prevHasFree = _nextHasFree->_prevHasFree; + _nextHasFree->_prevHasFree = this; + _prevHasFree->_nextHasFree = this; + } else { + _nextHasFree = this; + _prevHasFree = this; + } + _freeListList->_head = this; +} + + +void +BufferState::removeFromFreeListList(void) +{ + assert(_freeListList != NULL); + assert(_nextHasFree != NULL); + assert(_prevHasFree != NULL); + if (_nextHasFree == this) { + assert(_prevHasFree == this); + assert(_freeListList->_head == this); + _freeListList->_head = NULL; + } else { + assert(_prevHasFree != this); + _freeListList->_head = _nextHasFree; + _nextHasFree->_prevHasFree = _prevHasFree; + _prevHasFree->_nextHasFree = _nextHasFree; + } + _nextHasFree = NULL; + _prevHasFree = NULL; +} + + +void +BufferState::disableElemHoldList(void) +{ + _disableElemHoldList = true; +} + + +void +BufferState::fallbackResize(uint64_t newSize, + size_t maxClusters, + void *&buffer, + Alloc &holdBuffer) +{ + assert(_state == ACTIVE); + assert(_typeHandler != NULL); + assert(holdBuffer.get() == NULL); + size_t allocClusters = _typeHandler->calcClustersToAlloc(newSize, + maxClusters); + size_t allocSize = allocClusters * _typeHandler->getClusterSize(); + assert(allocSize >= newSize); + assert(allocSize > _allocElems); + Alloc::UP newBuffer(std::make_unique + (allocSize * _typeHandler->elementSize())); + _typeHandler->fallbackCopy(newBuffer->get(), buffer, _usedElems); + holdBuffer.swap(*_buffer); + std::atomic_thread_fence(std::memory_order_release); + _buffer = std::move(newBuffer); + buffer = _buffer->get(); + _allocElems = allocSize; + std::atomic_thread_fence(std::memory_order_release); +} + +} // namespace btree + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/bufferstate.h b/searchlib/src/vespa/searchlib/btree/bufferstate.h new file mode 100644 index 00000000000..3c7a3557952 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/bufferstate.h @@ -0,0 +1,389 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +#include "entryref.h" +#include + +namespace search { +namespace btree { + + +class BufferTypeBase +{ +private: + BufferTypeBase(const BufferTypeBase &rhs); + + BufferTypeBase & + operator=(const BufferTypeBase &rhs); +protected: + uint32_t _clusterSize; // Number of elements in an allocation unit + uint32_t _minClusters; // Minimum number of clusters to allocate + uint32_t _maxClusters; // Maximum number of clusters to allocate + uint32_t _activeBuffers; + uint32_t _holdBuffers; + size_t _activeUsedElems; // used elements in all but last active buffer + size_t _holdUsedElems; // used elements in all held buffers + const size_t *_lastUsedElems; // used elements in last active buffer + +public: + BufferTypeBase(uint32_t clusterSize, + uint32_t minClusters, + uint32_t maxClusters); + + virtual + ~BufferTypeBase(void); + + virtual void + destroyElements(void *buffer, size_t numElements) = 0; + + virtual void + fallbackCopy(void *newBuffer, + const void *oldBuffer, + size_t numElements) = 0; + + virtual void + cleanInitialElements(void *buffer) = 0; + + virtual size_t + elementSize(void) const = 0; + + virtual void + cleanHold(void *buffer, uint64_t offset, uint64_t len) = 0; + + uint32_t + getClusterSize(void) const + { + return _clusterSize; + } + + void + flushLastUsed(void); + + void + onActive(const size_t *usedElems); + + void + onHold(const size_t *usedElems); + + virtual void + onFree(size_t usedElems); + + /** + * Calculate number of clusters to allocate for new buffer. + * + * @param sizeNeeded number of elements needed now + * @param clusterRefSize number of clusters expressable via reference type + * + * @return number of clusters to allocate for new buffer + */ + virtual size_t + calcClustersToAlloc(size_t sizeNeeded, + uint64_t clusterRefSize) const; + + uint32_t getActiveBuffers() const { return _activeBuffers; } +}; + + +template +class BufferType : public BufferTypeBase +{ +private: + BufferType(const BufferType &rhs); + + BufferType & + operator=(const BufferType &rhs); +public: + EntryType _emptyEntry; + + BufferType(uint32_t clusterSize, + uint32_t minClusters, + uint32_t maxClusters) + : BufferTypeBase(clusterSize, minClusters, maxClusters), + _emptyEntry() + { + } + + virtual void + destroyElements(void *buffer, size_t numElements); + + virtual void + fallbackCopy(void *newBuffer, + const void *oldBuffer, + size_t numElements); + + virtual void + cleanInitialElements(void *buffer); + + virtual void + cleanHold(void *buffer, uint64_t offset, uint64_t len); + + virtual size_t + elementSize(void) const + { + return sizeof(EntryType); + } +}; + + +template +void +BufferType::destroyElements(void *buffer, size_t numElements) +{ + EntryType *e = static_cast(buffer); + for (size_t j = numElements; j != 0; --j) { + e->~EntryType(); + ++e; + } +} + + +template +void +BufferType::fallbackCopy(void *newBuffer, + const void *oldBuffer, + size_t numElements) +{ + EntryType *d = static_cast(newBuffer); + const EntryType *s = static_cast(oldBuffer); + for (size_t j = numElements; j != 0; --j) { + new (static_cast(d)) EntryType(*s); + ++s; + ++d; + } +} + + +template +void +BufferType::cleanInitialElements(void *buffer) +{ + EntryType *e = static_cast(buffer); + for (size_t j = _clusterSize; j != 0; --j) { + new (static_cast(e)) EntryType(_emptyEntry); + ++e; + } +} + + +template +void +BufferType::cleanHold(void *buffer, uint64_t offset, uint64_t len) +{ + EntryType *e = static_cast(buffer) + offset; + for (size_t j = len; j != 0; --j) { + *e = _emptyEntry; + ++e; + } +} + + +class BufferState +{ +public: + typedef vespalib::DefaultAlloc Alloc; + + class FreeListList + { + public: + BufferState *_head; + + FreeListList(void) + : _head(NULL) + { + } + + ~FreeListList(void); + }; + + typedef vespalib::Array FreeList; + + enum State + { + FREE, + ACTIVE, + HOLD + }; + + size_t _usedElems; + size_t _allocElems; + uint64_t _deadElems; + State _state; + bool _disableElemHoldList; + uint64_t _holdElems; + FreeList _freeList; + FreeListList *_freeListList; // non-NULL if free lists are enabled + + // NULL pointers if not on circular list of buffer states with free elems + BufferState *_nextHasFree; + BufferState *_prevHasFree; + + BufferTypeBase *_typeHandler; + uint32_t _typeId; + uint32_t _clusterSize; + bool _compacting; + + /* + * TODO: Check if per-buffer free lists are useful, or if + *compaction should always be used to free up whole buffers. + */ + + BufferState(void); + + ~BufferState(void); + + /** + * Transition from FREE to ACTIVE state. + * + * @param bufferId Id of buffer to be active. + * @param typeId registered data type for buffer. + * @param typeHandler type handler for registered data type. + * @param sizeNeeded Number of elements needed to be free + * @param maxSize number of clusters expressable via reference + * type + * @param buffer start of buffer. + */ + void + onActive(uint32_t bufferId, uint32_t typeId, BufferTypeBase *typeHandler, + size_t sizeNeeded, + size_t maxSize, + void *&buffer); + + /** + * Transition from ACTIVE to HOLD state. + */ + void + onHold(void); + + /** + * Transition from HOLD to FREE state. + */ + void + onFree(void *&buffer); + + /** + * Set list of buffer states with nonempty free lists. + * + * @param freeListList List of buffer states. If NULL then free lists + * are disabled. + */ + void + setFreeListList(FreeListList *freeListList); + + /** + * Add buffer state to list of buffer states with nonempty free lists. + */ + void + addToFreeListList(void); + + /** + * Remove buffer state from list of buffer states with nonempty free lists. + */ + void + removeFromFreeListList(void); + + /** + * Disable hold of elements, just mark then as dead without + * cleanup. Typically used when tearing down data structure in a + * controlled manner. + */ + void + disableElemHoldList(void); + + /** + * Pop element from free list. + */ + EntryRef + popFreeList(void) + { + EntryRef ret = _freeList.back(); + _freeList.pop_back(); + if (_freeList.empty()) + removeFromFreeListList(); + _deadElems -= _clusterSize; + return ret; + } + + + size_t + size(void) const + { + return _usedElems; + } + + size_t + capacity(void) const + { + return _allocElems; + } + + size_t + remaining(void) const + { + return _allocElems - _usedElems; + } + + void + pushed_back(uint64_t len) + { + _usedElems += len; + } + + void + cleanHold(void *buffer, uint64_t offset, uint64_t len) + { + _typeHandler->cleanHold(buffer, offset, len); + } + + void + dropBuffer(void *&buffer); + + uint32_t + getTypeId(void) const + { + return _typeId; + } + + uint32_t + getClusterSize(void) const + { + return _clusterSize; + } + + uint64_t getDeadElems() const { return _deadElems; } + + bool + getCompacting(void) const + { + return _compacting; + } + + void + setCompacting(void) + { + _compacting = true; + } + + void + fallbackResize(uint64_t newSize, + size_t maxClusters, + void *&buffer, + Alloc &holdBuffer); + + bool isActive(uint32_t typeId) const { + return ((_state == ACTIVE) && (_typeId == typeId)); + } + +private: + Alloc::UP _buffer; +}; + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/datastore.cpp b/searchlib/src/vespa/searchlib/btree/datastore.cpp new file mode 100644 index 00000000000..4af74d6f861 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/datastore.cpp @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "datastore.h" +#include "datastore.hpp" + +namespace search +{ + +namespace btree +{ + +template class DataStoreT >; + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/datastore.h b/searchlib/src/vespa/searchlib/btree/datastore.h new file mode 100644 index 00000000000..b709052f4ac --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/datastore.h @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "datastorebase.h" +#include "btreenode.h" + +namespace search +{ + +namespace btree +{ + +template +struct DefaultReclaimer { + static void reclaim(EntryType * entry) { + (void) entry; + } +}; + +template > +class DataStoreT : public DataStoreBase +{ +private: + DataStoreT(const DataStoreT &rhs); + + DataStoreT & + operator=(const DataStoreT &rhs); +public: + typedef RefT RefType; + + DataStoreT(void); + + ~DataStoreT(void); + + /** + * Increase number of dead elements in buffer. + * + * @param ref Reference to dead stored features + * @param dead Number of newly dead elements + */ + void + incDead(EntryRef ref, uint64_t dead) + { + RefType intRef(ref); + DataStoreBase::incDead(intRef.bufferId(), dead); + } + + /** + * Free element. + */ + void + freeElem(EntryRef ref, uint64_t len); + + /** + * Hold element. + */ + void + holdElem(EntryRef ref, uint64_t len); + + /** + * Trim elem hold list, freeing elements that no longer needs to be held. + * + * @param usedGen lowest generation that is still used. + */ + virtual void + trimElemHoldList(generation_t usedGen); + + virtual void + clearElemHoldList(void); + + bool + getCompacting(EntryRef ref) const + { + return getBufferState(RefType(ref).bufferId()).getCompacting(); + } + + template + std::pair + allocNewEntry(uint32_t typeId); + + template + std::pair + allocEntry(uint32_t typeId); + + template + std::pair + allocNewEntryCopy(uint32_t typeId, const EntryType &rhs); + + template + std::pair + allocEntryCopy(uint32_t typeId, const EntryType &rhs); + +}; + + +template > +class DataStore : public DataStoreT +{ +private: + DataStore(const DataStore &rhs); + + DataStore & + operator=(const DataStore &rhs); +protected: + typedef DataStoreT ParentType; + using ParentType::ensureBufferCapacity; + // using ParentType::activeBuffer; + using ParentType::_activeBufferIds; + using ParentType::_buffers; + using ParentType::_states; + using ParentType::_freeListLists; + using ParentType::getBufferEntry; + using ParentType::dropBuffers; + using ParentType::initActiveBuffers; + using ParentType::addType; + + BufferType _type; +public: + typedef typename ParentType::RefType RefType; + DataStore(); + + ~DataStore(void); + + EntryRef + addEntry(const EntryType &e); + + EntryRef + addEntry2(const EntryType &e); + + const EntryType & + getEntry(EntryRef ref) const; +}; + +extern template class DataStoreT >; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/datastore.hpp b/searchlib/src/vespa/searchlib/btree/datastore.hpp new file mode 100644 index 00000000000..64e55b840fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/datastore.hpp @@ -0,0 +1,248 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "datastore.h" + +namespace search +{ + +namespace btree +{ + + +template +DataStoreT::DataStoreT(void) + : DataStoreBase(RefType::numBuffers(), + RefType::offsetSize() / RefType::align(1)) +{ +} + + +template +DataStoreT::~DataStoreT(void) +{ +} + + +template +void +DataStoreT::freeElem(EntryRef ref, uint64_t len) +{ + RefType intRef(ref); + BufferState &state = _states[intRef.bufferId()]; + if (state._state == BufferState::ACTIVE) { + if (state._freeListList != NULL && len == state.getClusterSize()) { + if (state._freeList.empty()) + state.addToFreeListList(); + state._freeList.push_back(ref); + } + } else { + assert(state._state == BufferState::HOLD); + } + state._deadElems += len; + state.cleanHold(_buffers[intRef.bufferId()], + (intRef.offset() / RefType::align(1)) * + state.getClusterSize(), len); +} + + +template +void +DataStoreT::holdElem(EntryRef ref, uint64_t len) +{ + RefType intRef(ref); + uint64_t alignedLen = RefType::align(len); + BufferState &state = _states[intRef.bufferId()]; + assert(state._state == BufferState::ACTIVE); + if (state._disableElemHoldList) { + state._deadElems += alignedLen; + return; + } + _elemHold1List.push_back(ElemHold1ListElem(ref, alignedLen)); + state._holdElems += alignedLen; +} + + +template +void +DataStoreT::trimElemHoldList(generation_t usedGen) +{ + ElemHold2List &elemHold2List = _elemHold2List; + + ElemHold2List::iterator it(elemHold2List.begin()); + ElemHold2List::iterator ite(elemHold2List.end()); + uint32_t freed = 0; + for (; it != ite; ++it) { + if (static_cast(it->_generation - usedGen) >= 0) + break; + RefType intRef(it->_ref); + BufferState &state = _states[intRef.bufferId()]; + freeElem(it->_ref, it->_len); + assert(state._holdElems >= it->_len); + state._holdElems -= it->_len; + ++freed; + } + if (freed != 0) { + elemHold2List.erase(elemHold2List.begin(), it); + } +} + + +template +void +DataStoreT::clearElemHoldList(void) +{ + ElemHold2List &elemHold2List = _elemHold2List; + + ElemHold2List::iterator it(elemHold2List.begin()); + ElemHold2List::iterator ite(elemHold2List.end()); + for (; it != ite; ++it) { + RefType intRef(it->_ref); + BufferState &state = _states[intRef.bufferId()]; + freeElem(it->_ref, it->_len); + assert(state._holdElems >= it->_len); + state._holdElems -= it->_len; + } + elemHold2List.clear(); +} + + +template +template +std::pair +DataStoreT::allocNewEntry(uint32_t typeId) +{ + ensureBufferCapacity(typeId, 1); + uint32_t activeBufferId = getActiveBufferId(typeId); + BufferState &state = getBufferState(activeBufferId); + assert(state._state == BufferState::ACTIVE); + size_t oldSize = state.size(); + EntryType *entry = getBufferEntry(activeBufferId, oldSize); + new (static_cast(entry)) EntryType(); + state.pushed_back(1); + return std::make_pair(RefType(oldSize, activeBufferId), entry); +} + + +template +template +std::pair +DataStoreT::allocEntry(uint32_t typeId) +{ + BufferState::FreeListList &freeListList = getFreeList(typeId); + if (freeListList._head == NULL) { + return allocNewEntry(typeId); + } + BufferState &state = *freeListList._head; + assert(state._state == BufferState::ACTIVE); + RefType ref(state.popFreeList()); + EntryType *entry = + getBufferEntry(ref.bufferId(), ref.offset()); + Reclaimer::reclaim(entry); + return std::make_pair(ref, entry); +} + + +template +template +std::pair +DataStoreT::allocNewEntryCopy(uint32_t typeId, const EntryType &rhs) +{ + ensureBufferCapacity(typeId, 1); + uint32_t activeBufferId = getActiveBufferId(typeId); + BufferState &state = getBufferState(activeBufferId); + assert(state._state == BufferState::ACTIVE); + size_t oldSize = state.size(); + EntryType *entry = getBufferEntry(activeBufferId, oldSize); + new (static_cast(entry)) EntryType(rhs); + state.pushed_back(1); + return std::make_pair(RefType(oldSize, activeBufferId), entry); +} + + +template +template +std::pair +DataStoreT::allocEntryCopy(uint32_t typeId, const EntryType &rhs) +{ + BufferState::FreeListList &freeListList = getFreeList(typeId); + if (freeListList._head == NULL) { + return allocNewEntryCopy(typeId, rhs); + } + BufferState &state = *freeListList._head; + assert(state._state == BufferState::ACTIVE); + RefType ref(state.popFreeList()); + EntryType *entry = + getBufferEntry(ref.bufferId(), ref.offset()); + Reclaimer::reclaim(entry); + *entry = rhs; + return std::make_pair(ref, entry); +} + + + +template +DataStore::DataStore(void) + : ParentType(), + _type(1, RefType::offsetSize(), RefType::offsetSize()) +{ + addType(&_type); + initActiveBuffers(); +} + +template +DataStore::~DataStore(void) +{ + dropBuffers(); // Drop buffers before type handlers are dropped +} + +template +EntryRef +DataStore::addEntry(const EntryType &e) +{ + ensureBufferCapacity(0, 1); + uint32_t activeBufferId = _activeBufferIds[0]; + BufferState &state = _states[activeBufferId]; + size_t oldSize = state.size(); + EntryType *be = static_cast(_buffers[activeBufferId]) + + oldSize; + new (static_cast(be)) EntryType(e); + RefType ref(oldSize, activeBufferId); + state.pushed_back(1); + return ref; +} + +template +EntryRef +DataStore::addEntry2(const EntryType &e) +{ + BufferState::FreeListList &freeListList = _freeListLists[0]; + if (freeListList._head == NULL) + return addEntry(e); + BufferState &state = *freeListList._head; + assert(state._state == BufferState::ACTIVE); + RefType ref(state.popFreeList()); + EntryType *be = + this->template + getBufferEntry(ref.bufferId(), ref.offset()); + *be = e; + return ref; +} + +template +const EntryType & +DataStore::getEntry(EntryRef ref) const +{ + RefType intRef(ref); + const EntryType *be = + this->template + getBufferEntry(intRef.bufferId(), intRef.offset()); + return *be; +} + +extern template class DataStoreT >; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/datastorebase.cpp b/searchlib/src/vespa/searchlib/btree/datastorebase.cpp new file mode 100644 index 00000000000..45c68630773 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/datastorebase.cpp @@ -0,0 +1,426 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "datastore.h" + +using vespalib::GenerationHeldBase; + +namespace search +{ + +namespace btree +{ + +namespace +{ + +/* + * Minimum dead bytes in active write buffer before switching to new + * active write buffer even if another active buffer has more dead + * bytes due to considering the active write buffer as too dead. + */ +constexpr size_t TOODEAD_SLACK = 0x4000u; + +/* + * Check if active write buffer is too dead for further use, i.e. if it + * is likely to be the worst buffer at next compaction. If so, filling it + * up completely will be wasted work, as data will have to be moved again + * rather soon. + */ +bool activeWriteBufferTooDead(const BufferState &state) +{ + size_t deadElems = state.getDeadElems(); + size_t deadBytes = deadElems * state.getClusterSize(); + return ((deadBytes >= TOODEAD_SLACK) && (deadElems * 2 >= state.size())); +} + +} + +DataStoreBase::FallbackHold::FallbackHold(size_t size, + BufferState::Alloc &&buffer, + size_t usedElems, + BufferTypeBase *typeHandler, + uint32_t typeId) + : GenerationHeldBase(size), + _buffer(std::move(buffer)), + _usedElems(usedElems), + _typeHandler(typeHandler), + _typeId(typeId) +{ +} + + +DataStoreBase::FallbackHold::~FallbackHold(void) +{ + _typeHandler->destroyElements(_buffer.get(), _usedElems); +} + + +class DataStoreBase::BufferHold : public GenerationHeldBase +{ + DataStoreBase &_dsb; + uint32_t _bufferId; + +public: + BufferHold(size_t size, + DataStoreBase &dsb, + uint32_t bufferId) + : GenerationHeldBase(size), + _dsb(dsb), + _bufferId(bufferId) + { + } + + virtual + ~BufferHold(void) + { + _dsb.doneHoldBuffer(_bufferId); + } +}; + + +DataStoreBase::DataStoreBase(uint32_t numBuffers, + size_t maxClusters) + : _buffers(numBuffers), + _activeBufferIds(), + _states(numBuffers), + _typeHandlers(), + _freeListLists(), + _freeListsEnabled(false), + _elemHold1List(), + _elemHold2List(), + _numBuffers(numBuffers), + _maxClusters(maxClusters), + _genHolder() +{ +} + + +DataStoreBase::~DataStoreBase(void) +{ + disableFreeLists(); + + assert(_elemHold1List.empty()); + assert(_elemHold2List.empty()); +} + + +void +DataStoreBase::switchActiveBuffer(uint32_t typeId, size_t sizeNeeded) +{ + size_t activeBufferId = _activeBufferIds[typeId]; + do { + // start using next buffer + activeBufferId = nextBufferId(activeBufferId); + } while (_states[activeBufferId]._state != BufferState::FREE); + onActive(activeBufferId, typeId, sizeNeeded, _maxClusters); + _activeBufferIds[typeId] = activeBufferId; +} + + +void +DataStoreBase::initActiveBuffers(void) +{ + uint32_t numTypes = _activeBufferIds.size(); + for (uint32_t typeId = 0; typeId < numTypes; ++typeId) { + size_t activeBufferId = 0; + while (_states[activeBufferId]._state != BufferState::FREE) { + // start using next buffer + activeBufferId = nextBufferId(activeBufferId); + } + onActive(activeBufferId, typeId, 0u, _maxClusters); + _activeBufferIds[typeId] = activeBufferId; + } +} + + +void +DataStoreBase::addType(BufferTypeBase *typeHandler) +{ + _activeBufferIds.push_back(0); + _typeHandlers.push_back(typeHandler); + _freeListLists.push_back(BufferState::FreeListList()); +} + + +void +DataStoreBase::transferElemHoldList(generation_t generation) +{ + ElemHold2List &elemHold2List = _elemHold2List; + for (const ElemHold1ListElem & elemHold1 : _elemHold1List) { + elemHold2List.push_back(ElemHold2ListElem(elemHold1, generation)); + } + _elemHold1List.clear(); +} + + +void +DataStoreBase::transferHoldLists(generation_t generation) +{ + _genHolder.transferHoldLists(generation); + if (hasElemHold1()) + transferElemHoldList(generation); +} + + +void +DataStoreBase::doneHoldBuffer(uint32_t bufferId) +{ + _states[bufferId].onFree(_buffers[bufferId]); +} + + +void +DataStoreBase::trimHoldLists(generation_t usedGen) +{ + trimElemHoldList(usedGen); // Trim entries before trimming buffers + + _genHolder.trimHoldLists(usedGen); +} + + +void +DataStoreBase::clearHoldLists(void) +{ + transferElemHoldList(0); + clearElemHoldList(); + _genHolder.clearHoldLists(); +} + + +void +DataStoreBase::dropBuffers(void) +{ + uint32_t numBuffers = _buffers.size(); + for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) { + _states[bufferId].dropBuffer(_buffers[bufferId]); + } + _genHolder.clearHoldLists(); +} + + +MemoryUsage +DataStoreBase::getMemoryUsage(void) const +{ + MemStats stats = getMemStats(); + MemoryUsage usage; + usage.setAllocatedBytes(stats._allocBytes); + usage.setUsedBytes(stats._usedBytes); + usage.setDeadBytes(stats._deadBytes); + usage.setAllocatedBytesOnHold(stats._holdBytes); + return usage; +} + + +void +DataStoreBase::holdBuffer(uint32_t bufferId) +{ + _states[bufferId].onHold(); + size_t holdBytes = 0u; // getMemStats() still accounts held buffers + GenerationHeldBase::UP hold(new BufferHold(holdBytes, *this, bufferId)); + _genHolder.hold(std::move(hold)); +} + + +void +DataStoreBase::enableFreeLists(void) +{ + for (BufferState & bState : _states) { + if (bState._state != BufferState::ACTIVE || bState.getCompacting()) + continue; + bState.setFreeListList(&_freeListLists[bState._typeId]); + } + _freeListsEnabled = true; +} + + +void +DataStoreBase::disableFreeLists(void) +{ + for (BufferState & bState : _states) { + bState.setFreeListList(nullptr); + } + _freeListsEnabled = false; +} + + +void +DataStoreBase::enableFreeList(uint32_t bufferId) +{ + BufferState &state = _states[bufferId]; + if (_freeListsEnabled && + state._state == BufferState::ACTIVE && + !state.getCompacting()) + state.setFreeListList(&_freeListLists[state._typeId]); +} + + +void +DataStoreBase::disableFreeList(uint32_t bufferId) +{ + _states[bufferId].setFreeListList(NULL); +} + + +void +DataStoreBase::disableElemHoldList(void) +{ + for (auto &state : _states) { + if (state._state != BufferState::FREE) + state.disableElemHoldList(); + } +} + + +DataStoreBase::MemStats +DataStoreBase::getMemStats(void) const +{ + MemStats stats; + + for (const BufferState & bState: _states) { + auto typeHandler = bState._typeHandler; + BufferState::State state = bState._state; + if ((state == BufferState::FREE) || (typeHandler == nullptr)) { + ++stats._freeBuffers; + } else if (state == BufferState::ACTIVE) { + size_t elementSize = typeHandler->elementSize(); + ++stats._activeBuffers; + stats._allocElems += bState._allocElems; + stats._usedElems += bState._usedElems; + stats._deadElems += bState._deadElems; + stats._holdElems += bState._holdElems; + stats._allocBytes += bState._allocElems * elementSize; + stats._usedBytes += bState._usedElems * elementSize; + stats._deadBytes += bState._deadElems * elementSize; + stats._holdBytes += bState._holdElems * elementSize; + } else if (state == BufferState::HOLD) { + size_t elementSize = typeHandler->elementSize(); + ++stats._holdBuffers; + stats._allocElems += bState._allocElems; + stats._usedElems += bState._usedElems; + stats._deadElems += bState._deadElems; + stats._holdElems += bState._holdElems; + stats._allocBytes += bState._allocElems * elementSize; + stats._usedBytes += bState._usedElems * elementSize; + stats._deadBytes += bState._deadElems * elementSize; + stats._holdBytes += bState._holdElems * elementSize; + } else { + abort(); + } + } + return stats; +} + + +void +DataStoreBase::onActive(uint32_t bufferId, uint32_t typeId, + size_t sizeNeeded, + size_t maxClusters) +{ + assert(typeId < _typeHandlers.size()); + assert(bufferId < _numBuffers); + BufferState &state = _states[bufferId]; + state.onActive(bufferId, typeId, + _typeHandlers[typeId], + sizeNeeded, + maxClusters, + _buffers[bufferId]); + enableFreeList(bufferId); +} + +std::vector +DataStoreBase::startCompact(uint32_t typeId) +{ + std::vector toHold; + + for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) { + BufferState &state = getBufferState(bufferId); + if (state._state == BufferState::ACTIVE && + state.getTypeId() == typeId && + !state.getCompacting()) { + state.setCompacting(); + toHold.push_back(bufferId); + disableFreeList(bufferId); + } + } + switchActiveBuffer(typeId, 0u); + return toHold; +} + +void +DataStoreBase::finishCompact(const std::vector &toHold) +{ + for (uint32_t bufferId : toHold) { + holdBuffer(bufferId); + } +} + + +void +DataStoreBase::fallbackResize(uint32_t bufferId, + uint64_t newSize) +{ + BufferState &state = getBufferState(bufferId); + BufferState::Alloc toHoldBuffer; + size_t oldUsedElems = state._usedElems; + size_t oldAllocElems = state._allocElems; + size_t elementSize = state._typeHandler->elementSize(); + state.fallbackResize(newSize, + _maxClusters, + _buffers[bufferId], + toHoldBuffer); + GenerationHeldBase::UP + hold(new FallbackHold(oldAllocElems * elementSize, + std::move(toHoldBuffer), + oldUsedElems, + state._typeHandler, + state._typeId)); + _genHolder.hold(std::move(hold)); +} + + +uint32_t +DataStoreBase::startCompactWorstBuffer(uint32_t typeId) +{ + uint32_t activeBufferId = getActiveBufferId(typeId); + const BufferTypeBase *typeHandler = _typeHandlers[typeId]; + assert(typeHandler->getActiveBuffers() >= 1u); + if (typeHandler->getActiveBuffers() == 1u) { + // Single active buffer for type, no need for scan + _states[activeBufferId].setCompacting(); + _states[activeBufferId].disableElemHoldList(); + disableFreeList(activeBufferId); + switchActiveBuffer(typeId, 0u); + return activeBufferId; + } + // Multiple active buffers for type, must perform full scan + uint32_t worstBufferId = activeBufferId; + uint32_t worstDead = 0; + for (uint32_t bufferId = 0; bufferId < _numBuffers; ++bufferId) { + const auto &state = _states[bufferId]; + if (state.isActive(typeId)) { + size_t dead = state.getDeadElems(); + if (bufferId == 0u) { + // buffer 0 is special due to invalid ref -> (buf 0, offset 0) + dead -= state.getClusterSize(); + } + if (dead > worstDead) { + worstBufferId = bufferId; + worstDead = dead; + } + } + } + if ((worstBufferId == activeBufferId) || + activeWriteBufferTooDead(_states[activeBufferId])) + { + switchActiveBuffer(typeId, 0u); + } + _states[worstBufferId].setCompacting(); + _states[worstBufferId].disableElemHoldList(); + disableFreeList(worstBufferId); + return worstBufferId; +} + + +} // namespace btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/btree/datastorebase.h b/searchlib/src/vespa/searchlib/btree/datastorebase.h new file mode 100644 index 00000000000..0c44b485d18 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/datastorebase.h @@ -0,0 +1,404 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +#include "bufferstate.h" +#include +#include + + +namespace search +{ + +namespace btree +{ + +class DataStoreBase +{ +private: + DataStoreBase(const DataStoreBase &rhs); + + DataStoreBase & + operator=(const DataStoreBase &rhs); +protected: + typedef vespalib::GenerationHandler::generation_t generation_t; + typedef vespalib::GenerationHandler::sgeneration_t sgeneration_t; + + std::vector _buffers; // For fast mapping with known types + std::vector _activeBufferIds; // typeId -> active buffer + + // Hold list before freeze, before knowing how long elements must be held + class ElemHold1ListElem + { + public: + EntryRef _ref; + uint64_t _len; // Aligned length + + ElemHold1ListElem(EntryRef ref, uint64_t len) + : _ref(ref), + _len(len) + { + } + }; + + // Hold list at freeze, when knowing how long elements must be held + class ElemHold2ListElem : public ElemHold1ListElem + { + public: + generation_t _generation; + + ElemHold2ListElem(const ElemHold1ListElem &hold1, + generation_t generation) + : ElemHold1ListElem(hold1), + _generation(generation) + { + } + }; + + typedef vespalib::Array ElemHold1List; + typedef std::deque ElemHold2List; + + class FallbackHold : public vespalib::GenerationHeldBase + { + public: + BufferState::Alloc _buffer; + size_t _usedElems; + BufferTypeBase *_typeHandler; + uint32_t _typeId; + + FallbackHold(size_t size, + BufferState::Alloc &&buffer, + size_t usedElems, + BufferTypeBase *typeHandler, + uint32_t typeId); + + virtual + ~FallbackHold(void); + }; + + class BufferHold; + +public: + class MemStats + { + public: + uint64_t _allocElems; + uint64_t _usedElems; + uint64_t _deadElems; + uint64_t _holdElems; + uint64_t _allocBytes; + uint64_t _usedBytes; + uint64_t _deadBytes; + uint64_t _holdBytes; + uint32_t _freeBuffers; + uint32_t _activeBuffers; + uint32_t _holdBuffers; + + MemStats(void) + : _allocElems(0), + _usedElems(0), + _deadElems(0), + _holdElems(0), + _allocBytes(0), + _usedBytes(0), + _deadBytes(0), + _holdBytes(0), + _freeBuffers(0), + _activeBuffers(0), + _holdBuffers(0) + { + } + + MemStats & + operator+=(const MemStats &rhs) + { + _allocElems += rhs._allocElems; + _usedElems += rhs._usedElems; + _deadElems += rhs._deadElems; + _holdElems += rhs._holdElems; + _allocBytes += rhs._allocBytes; + _usedBytes += rhs._usedBytes; + _deadBytes += rhs._deadBytes; + _holdBytes += rhs._holdBytes; + _freeBuffers += rhs._freeBuffers; + _activeBuffers += rhs._activeBuffers; + _holdBuffers += rhs._holdBuffers; + return *this; + } + }; + +protected: + std::vector _states; + std::vector _typeHandlers; // TypeId -> handler + + std::vector _freeListLists; + bool _freeListsEnabled; + + ElemHold1List _elemHold1List; + ElemHold2List _elemHold2List; + + uint32_t _numBuffers; + size_t _maxClusters; + + vespalib::GenerationHolder _genHolder; + + DataStoreBase(uint32_t numBuffers, size_t maxClusters); + + virtual + ~DataStoreBase(void); + + /** + * Get next buffer id + * + * @param bufferId current buffer id + * @return next buffer id + */ + uint32_t + nextBufferId(uint32_t bufferId) + { + uint32_t ret = bufferId + 1; + if (ret == _numBuffers) + ret = 0; + return ret; + } + + /** + * Get active buffer + * + * @return active buffer + */ + void * + activeBuffer(uint32_t typeId) + { + return _buffers[_activeBufferIds[typeId]]; + } + + /** + * Trim elem hold list, freeing elements that no longer needs to be held. + * + * @param usedGen lowest generation that is still used. + */ + virtual void + trimElemHoldList(generation_t usedGen) = 0; + + virtual void + clearElemHoldList(void) = 0; + +public: + void + addType(BufferTypeBase *typeHandler); + + void + initActiveBuffers(void); + + /** + * Ensure that active buffer has a given number of elements free at end. + * Switch to new buffer if current buffer is too full. + * + * @param typeId registered data type for buffer. + * @param sizeNeeded Number of elements needed to be free + */ + void + ensureBufferCapacity(uint32_t typeId, size_t sizeNeeded) + { + if (__builtin_expect(sizeNeeded > + _states[_activeBufferIds[typeId]].remaining(), + false)) { + switchActiveBuffer(typeId, sizeNeeded); + } + } + + /** + * Put buffer on hold list, as part of compaction. + * + * @param bufferId Id of buffer to be held. + */ + void + holdBuffer(uint32_t bufferId); + + /** + * Switch to new active buffer, typically in preparation for compaction + * or when current active buffer no longer has free space. + * + * @param typeId registered data type for buffer. + * @param sizeNeeded Number of elements needed to be free + */ + void + switchActiveBuffer(uint32_t typeId, size_t sizeNeeded); + + MemoryUsage getMemoryUsage() const; + + /** + * Get active buffer id for the given type id. + */ + uint32_t + getActiveBufferId(uint32_t typeId) const + { + return _activeBufferIds[typeId]; + } + + const BufferState & + getBufferState(uint32_t bufferId) const + { + return _states[bufferId]; + } + + BufferState & + getBufferState(uint32_t bufferId) + { + return _states[bufferId]; + } + + uint32_t + getNumBuffers(void) const + { + return _numBuffers; + } + + bool + hasElemHold1(void) const + { + return !_elemHold1List.empty(); + } + + /** + * Transfer element holds from hold1 list to hold2 list. + */ + void + transferElemHoldList(generation_t generation); + + /** + * Transfer holds from hold1 to hold2 lists, assigning generation. + */ + void + transferHoldLists(generation_t generation); + + /** + * Hold of buffer has ended. + */ + void + doneHoldBuffer(uint32_t bufferId); + + /** + * Trim hold lists, freeing buffers that no longer needs to be held. + * + * @param usedGen lowest generation that is still used. + */ + void + trimHoldLists(generation_t usedGen); + + void + clearHoldLists(void); + + template + EntryType * + getBufferEntry(uint32_t bufferId, uint64_t offset) + { + return static_cast(_buffers[bufferId]) + + offset; + } + + template + const EntryType * + getBufferEntry(uint32_t bufferId, uint64_t offset) const + { + return static_cast(_buffers[bufferId]) + + offset; + } + + void + dropBuffers(void); + + + void + incDead(uint32_t bufferId, uint64_t dead) + { + BufferState &state = _states[bufferId]; + state._deadElems += dead; + } + + /** + * Enable free list management. This only works for fixed size elements. + */ + void + enableFreeLists(void); + + /** + * Disable free list management. + */ + void + disableFreeLists(void); + + /** + * Enable free list management. This only works for fixed size elements. + */ + void + enableFreeList(uint32_t bufferId); + + /** + * Disable free list management. + */ + void + disableFreeList(uint32_t bufferId); + + void + disableElemHoldList(void); + + /** + * Returns the free list for the given type id. + */ + BufferState::FreeListList & + getFreeList(uint32_t typeId) + { + return _freeListLists[typeId]; + } + + MemStats + getMemStats(void) const; + + /** + * Switch buffer state to active. + * + * @param bufferId Id of buffer to be active. + * @param typeId registered data type for buffer. + * @param sizeNeeded Number of elements needed to be free + * @param maxSize number of clusters expressable via reference + * type + */ + void + onActive(uint32_t bufferId, uint32_t typeId, + size_t sizeNeeded, + size_t maxSize); + + uint32_t + getTypeId(uint32_t bufferId) const + { + return _states[bufferId].getTypeId(); + } + + std::vector + startCompact(uint32_t typeId); + + void + finishCompact(const std::vector &toHold); + + void + fallbackResize(uint32_t bufferId, + uint64_t newSize); + + vespalib::GenerationHolder & + getGenerationHolder(void) + { + return _genHolder; + } + + uint32_t startCompactWorstBuffer(uint32_t typeId); +}; + + +} // namespace btree + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/entryref.h b/searchlib/src/vespa/searchlib/btree/entryref.h new file mode 100644 index 00000000000..e2d2a8d89ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/entryref.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace btree { + +class EntryRef { +protected: + uint32_t _ref; +public: + EntryRef(void) : _ref(0u) { } + EntryRef(uint32_t ref_) : _ref(ref_) { } + uint32_t ref(void) const { return _ref; } + bool valid(void) const { return _ref != 0u; } + bool operator==(const EntryRef &rhs) const { return _ref == rhs._ref; } + bool operator!=(const EntryRef &rhs) const { return _ref != rhs._ref; } + bool operator <(const EntryRef &rhs) const { return _ref < rhs._ref; } +}; + +/** + * Class for entry reference where we use OffsetBits bits for offset into buffer, + * and (32 - OffsetBits) bits for buffer id. + **/ +template +class EntryRefT : public EntryRef { +public: + EntryRefT() : EntryRef() {} + EntryRefT(uint64_t offset_, uint32_t bufferId_) : + EntryRef((offset_ << BufferBits) + bufferId_) {} + EntryRefT(const EntryRef & ref_) : EntryRef(ref_.ref()) {} + uint64_t offset() const { return _ref >> BufferBits; } + uint32_t bufferId() const { return _ref & (numBuffers() - 1); } + static uint64_t offsetSize() { return 1ul << OffsetBits; } + static uint32_t numBuffers() { return 1 << BufferBits; } + static uint64_t align(uint64_t val) { return val; } + static uint64_t pad(uint64_t val) { (void) val; return 0ul; } +}; + +/** + * Class for entry reference that is similar to EntryRefT, + * except that we use (2^OffsetAlign) byte alignment on the offset. + **/ +template +class AlignedEntryRefT : public EntryRefT { +private: + typedef EntryRefT ParentType; + static const uint32_t PadConstant = ((1 << OffsetAlign) - 1); +public: + AlignedEntryRefT() : ParentType() {} + AlignedEntryRefT(uint64_t offset_, uint32_t bufferId_) : + ParentType(align(offset_) >> OffsetAlign, bufferId_) {} + AlignedEntryRefT(const EntryRef & ref_) : ParentType(ref_) {} + uint64_t offset() const { return ParentType::offset() << OffsetAlign; } + static uint64_t offsetSize() { return ParentType::offsetSize() << OffsetAlign; } + static uint64_t align(uint64_t val) { return val + pad(val); } + static uint64_t pad(uint64_t val) { return (-val & PadConstant); } +}; + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h b/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h new file mode 100644 index 00000000000..09cd8ea7d23 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/minmaxaggrcalc.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ +namespace btree +{ + +class MinMaxAggrCalc +{ +public: + MinMaxAggrCalc(void) + { + } + + static bool + hasAggregated(void) + { + return true; + } + + static int32_t + getVal(int32_t val) + { + return val; + } + + static void + add(MinMaxAggregated &a, int32_t val) + { + a.add(val); + } + + static void + add(MinMaxAggregated &a, const MinMaxAggregated &ca) + { + a.add(ca); + } + + static void + add(MinMaxAggregated &a, const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + a.add(oldca, ca); + } + + /* Returns true if recalculation is needed */ + static bool + remove(MinMaxAggregated &a, int32_t val) + { + return a.remove(val); + } + + /* Returns true if recalculation is needed */ + static bool + remove(MinMaxAggregated &a, const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + return a.remove(oldca, ca); + } + + /* Returns true if recalculation is needed */ + static bool + update(MinMaxAggregated &a, int32_t oldVal, int32_t val) + { + return a.update(oldVal, val); + } + + /* Returns true if recalculation is needed */ + static bool + update(MinMaxAggregated &a, const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + return a.update(oldca, ca); + } +}; + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h b/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h new file mode 100644 index 00000000000..1b876918d1a --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/minmaxaggregated.h @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ +namespace btree +{ + +class MinMaxAggregated +{ + int32_t _min; + int32_t _max; + +public: + MinMaxAggregated(void) + : _min(std::numeric_limits::max()), + _max(std::numeric_limits::min()) + { + } + + MinMaxAggregated(int32_t min, int32_t max) + : _min(min), + _max(max) + { + } + + int32_t + getMin(void) const + { + return _min; + } + + int32_t + getMax(void) const + { + return _max; + } + + void + add(int32_t val) + { + if (_min > val) + _min = val; + if (_max < val) + _max = val; + } + + void + add(const MinMaxAggregated &ca) + { + if (_min > ca._min) + _min = ca._min; + if (_max < ca._max) + _max = ca._max; + } + + void + add(const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + (void) oldca; + add(ca); + } + + /* Returns true if recalculation is needed */ + bool + remove(int32_t val) + { + return (_min == val || _max == val); + } + + /* Returns true if recalculation is needed */ + bool + remove(const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + return (_min == oldca._min && _min != ca._min) || + (_max == oldca._max && _max != ca._max); + } + + /* Returns true if recalculation is needed */ + bool + update(int32_t oldVal, int32_t val) + { + if ((_min == oldVal && _min < val) || + (_max == oldVal && _max > val)) { + return true; + } + add(val); + return false; + } + + /* Returns true if recalculation is needed */ + bool + update(const MinMaxAggregated &oldca, + const MinMaxAggregated &ca) + { + if ((_min == oldca._min && _min < ca._min) || + (_max == oldca._max && _max > ca._max)) { + return true; + } + add(ca); + return false; + } +}; + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/noaggrcalc.h b/searchlib/src/vespa/searchlib/btree/noaggrcalc.h new file mode 100644 index 00000000000..57d7ccd2f45 --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/noaggrcalc.h @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ +namespace btree +{ + +class NoAggrCalc +{ +public: + NoAggrCalc(void) + { + } + + static bool + hasAggregated(void) + { + return false; + } + + template + static inline int32_t + getVal(const DataT &val) + { + (void) val; + return 0; + } + + static void + add(NoAggregated &a, int32_t val) + { + (void) a; + (void) val; + } + + static void + add(NoAggregated &a, const NoAggregated &ca) + { + (void) a; + (void) ca; + } + + static void + add(NoAggregated &a, + const NoAggregated &oldca, + const NoAggregated &ca) + { + (void) a; + (void) oldca; + (void) ca; + } + + /* Returns true if recalculation is needed */ + static bool + remove(NoAggregated &a, int32_t val) + { + (void) a; + (void) val; + return false; + } + + /* Returns true if recalculation is needed */ + static bool + remove(NoAggregated &a, const NoAggregated &oldca, const NoAggregated &ca) + { + (void) a; + (void) oldca; + (void) ca; + return false; + } + + /* Returns true if recalculation is needed */ + static bool + update(NoAggregated &a, int32_t oldVal, int32_t val) + { + (void) a; + (void) oldVal; + (void) val; + return false; + } + + /* Returns true if recalculation is needed */ + static bool + update(NoAggregated &a, const NoAggregated &oldca, const NoAggregated &ca) + { + (void) a; + (void) oldca; + (void) ca; + return false; + } +}; + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/btree/noaggregated.h b/searchlib/src/vespa/searchlib/btree/noaggregated.h new file mode 100644 index 00000000000..9f710840d5a --- /dev/null +++ b/searchlib/src/vespa/searchlib/btree/noaggregated.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ +namespace btree +{ + +class NoAggregated +{ +public: + NoAggregated(void) + { + } +}; + + +} // namespace search::btree +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/.gitignore b/searchlib/src/vespa/searchlib/common/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt new file mode 100644 index 00000000000..0bad6ad9381 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_common OBJECT + SOURCES + bitvector.cpp + allocatedbitvector.cpp + partialbitvector.cpp + growablebitvector.cpp + bitvectorcache.cpp + bitvectoriterator.cpp + condensedbitvectors.cpp + documentlocations.cpp + documentsummary.cpp + featureset.cpp + fileheadercontext.cpp + foregroundtaskexecutor.cpp + indexmetainfo.cpp + location.cpp + locationiterators.cpp + mapnames.cpp + packets.cpp + resultset.cpp + sequencedtaskexecutor.cpp + serialnumfileheadercontext.cpp + sort.cpp + sortdata.cpp + sortresults.cpp + sortspec.cpp + tunefileinfo.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp new file mode 100644 index 00000000000..59d190b2b50 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.cpp @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "allocatedbitvector.h" + +namespace search +{ + +using vespalib::nbostream; +using vespalib::GenerationHeldBase; +using vespalib::GenerationHeldAlloc; +using vespalib::GenerationHolder; + +void AllocatedBitVector::alloc() +{ + uint32_t words = capacityWords(); + words += (-words & 15); // Pad to 64 byte alignment + const size_t sz(words * sizeof(Word)); + Alloc(sz).swap(_alloc); + assert(_alloc.size()/sizeof(Word) >= words); + // Clear padding + memset(static_cast(_alloc.get()) + sizeBytes(), 0, sz - sizeBytes()); +} + +////////////////////////////////////////////////////////////////////// +// Parameterized Constructor +////////////////////////////////////////////////////////////////////// +AllocatedBitVector::AllocatedBitVector(Index numberOfElements) : + BitVector(), + _capacityBits(numberOfElements), + _alloc() +{ + alloc(); + init(_alloc.get(), 0, numberOfElements); + clear(); +} + +AllocatedBitVector::AllocatedBitVector(Index numberOfElements, Alloc buffer, size_t offset) : + BitVector(static_cast(buffer.get()) + offset, numberOfElements), + _capacityBits(numberOfElements), + _alloc(std::move(buffer)) +{ +} + +AllocatedBitVector::AllocatedBitVector(Index numberOfElements, Index capacityBits, const void * rhsBuf, size_t rhsSize) : + BitVector(), + _capacityBits(capacityBits), + _alloc() +{ + alloc(); + init(_alloc.get(), 0, numberOfElements); + clear(); + if (rhsSize > 0) { + size_t minCount = std::min(static_cast(numberOfElements), rhsSize); + memcpy(getStart(), rhsBuf, numBytes(minCount)); + if (minCount/8 == numberOfElements/8) { + static_cast(getStart())[numWords()-1] &= ~endBits(minCount); + } + setBit(size()); // Guard bit + } +} + +AllocatedBitVector::AllocatedBitVector(const AllocatedBitVector & rhs) : + AllocatedBitVector(rhs, rhs.capacity()) +{ +} + +AllocatedBitVector::AllocatedBitVector(const BitVector & rhs) : + AllocatedBitVector(rhs, rhs.size()) +{ +} + +AllocatedBitVector::AllocatedBitVector(const BitVector & rhs, Index capacity_) : + BitVector(), + _capacityBits(capacity_), + _alloc() +{ + alloc(); + memcpy(_alloc.get(), rhs.getStart(), rhs.sizeBytes()); + init(_alloc.get(), 0, rhs.size()); +} + +////////////////////////////////////////////////////////////////////// +// Destructor +////////////////////////////////////////////////////////////////////// +AllocatedBitVector::~AllocatedBitVector(void) +{ +} + +void +AllocatedBitVector::cleanup(void) +{ + init(nullptr, 0, 0); + Alloc().swap(_alloc); + _capacityBits = 0; +} + +void +AllocatedBitVector::resize(Index newLength) +{ + _capacityBits = newLength; + alloc(); + init(_alloc.get(), 0, newLength); + clear(); +} + +AllocatedBitVector & +AllocatedBitVector::operator=(const AllocatedBitVector & rhs) +{ + AllocatedBitVector tmp(rhs); + swap(tmp); + assert(testBit(size())); + + return *this; +} +AllocatedBitVector & +AllocatedBitVector::operator=(const BitVector & rhs) +{ + AllocatedBitVector tmp(rhs); + swap(tmp); + assert(testBit(size())); + + return *this; +} + +GenerationHeldBase::UP +AllocatedBitVector::grow(Index newSize, Index newCapacity) +{ + assert(newCapacity >= newSize); + GenerationHeldBase::UP ret; + if (newCapacity != capacity()) { + AllocatedBitVector tbv(newSize, newCapacity, _alloc.get(), size()); + if (newSize > size()) { + tbv.clearBit(size()); // Clear old guard bit. + } + ret.reset(new GenerationHeldAlloc(_alloc)); + if (( newSize >= size()) && isValidCount()) { + tbv.setTrueBits(countTrueBits()); + } + swap(tbv); + } else { + if (newSize > size()) { + Index oldSz(size()); + setSize(newSize); + clearIntervalNoInvalidation(oldSz, newSize); + } else { + clearInterval(newSize, size()); + setSize(newSize); + } + } + return ret; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/allocatedbitvector.h b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h new file mode 100644 index 00000000000..8a52a07e29b --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/allocatedbitvector.h @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +class BitVectorTest; + +/** + * search::AllocatedBitVector provides an interface to a bit vector + * internally implemented as an array of words. + */ +class AllocatedBitVector : public BitVector +{ +public: + typedef vespalib::AutoAlloc<0x800000, 0x1000> Alloc; + + /** + * Class constructor specifying size but not content. New bitvector + * is cleared. + * + * @param numberOfElements The size of the bit vector in bits. + * + */ + explicit AllocatedBitVector(Index numberOfElements); + /** + * + * @param numberOfElements The size of the bit vector in bits. + * @param buffer The buffer backing the bit vector. + * @param offset Where bitvector image is located in the buffer. + */ + AllocatedBitVector(Index numberOfElements, Alloc buffer, size_t offset); + + /** + * Creates a new bitvector with room for numberOfElements bits. + * Copies what it can from the original vector. This is used for extending vector. + */ + AllocatedBitVector(Index numberOfElements, Index capacity, const void * rhsBuf, size_t rhsSize); + + AllocatedBitVector(const BitVector &other); + AllocatedBitVector(const AllocatedBitVector &other); + virtual ~AllocatedBitVector(void); + AllocatedBitVector &operator=(const AllocatedBitVector &other); + AllocatedBitVector &operator=(const BitVector &other); + + /** + * Query the size of the bit vector. + * + * @return number of legal index positions (bits). + */ + Index capacity() const { return _capacityBits; } + + Index extraByteSize(void) const { return _alloc.size(); } + + /** + * Set new length of bit vector, possibly destroying content. + * + * @param newLength the new length of the bit vector (in bits) + */ + void resize(Index newLength) override; + + GenerationHeldBase::UP grow(Index newLength, Index newCapacity) override; + +protected: + Index _capacityBits; + Alloc _alloc; + +private: + friend class BitVectorTest; + void alloc(); + void swap(AllocatedBitVector & rhs) { + std::swap(_capacityBits, rhs._capacityBits); + _alloc.swap(rhs._alloc); + BitVector::swap(rhs); + } + + AllocatedBitVector(const BitVector &other, Index capacity); + + /** + * Prepare for potential reuse where new value might be filled in by + * Read method. + */ + void cleanup(void); + Index capacityWords() const { return numWords(capacity()); } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/base.h b/searchlib/src/vespa/searchlib/common/base.h new file mode 100644 index 00000000000..d91acbd29f6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/base.h @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +namespace search { + +// This constant defines the illegal/undefined value for unsigned 32-bit +// integer ids. Use this instead of the function below to get less +// overhead with not-so-smart compilers. + +const uint32_t NoId32 = static_cast(-1); + +} + diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp new file mode 100644 index 00000000000..25edae290de --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -0,0 +1,421 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "bitvector.h" +#include "allocatedbitvector.h" +#include "growablebitvector.h" +#include "partialbitvector.h" +#include +#include + +using vespalib::make_string; +using vespalib::IllegalArgumentException; +using vespalib::hwaccelrated::IAccelrated; +using vespalib::Optimized; + +namespace { + +void verifyContains(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline)); + +void verifyContains(const search::BitVector & a, const search::BitVector & b) +{ + if ((a.getStartIndex() < b.getStartIndex()) || (a.size() > b.size())) { + throw IllegalArgumentException(make_string("[%d, %d] is not contained in [%d, %d]", + a.getStartIndex(), a.size(), b.getStartIndex(), b.size()), + VESPA_STRLOC); + } +} + +} + +///////////////////////////////// +namespace search +{ + +using vespalib::nbostream; +using vespalib::GenerationHeldBase; +using vespalib::GenerationHeldAlloc; +using vespalib::GenerationHolder; + +namespace { + +template +void fillUp(T * v, T startVal) { + for (size_t i(0); i < (sizeof(T)*8); i++) { + v[i] = startVal << i; + } +} + +} + +BitWord::Init BitWord::_initializer; + +BitWord::Init::Init() +{ + fillUp(BitWord::_checkTab, std::numeric_limits::max()); +} + +BitWord::Word BitWord::_checkTab[BitWord::WordLen]; + + +BitVector::BitVector(void * buf, Index start, Index end) : + _words(static_cast(buf) - wordNum(start)), + _startOffset(start), + _sz(end), + _numTrueBits(invalidCount()) +{ + assert((reinterpret_cast(_words) & (sizeof(Word) - 1ul)) == 0); +} + +void +BitVector::init(void * buf, Index start, Index end) +{ + _words = static_cast(buf) - wordNum(start); + _startOffset = start; + _sz = end; + _numTrueBits = invalidCount(); +} + +void +BitVector::clear(void) +{ + memset(getActiveStart(), '\0', getActiveBytes()); + setBit(size()); // Guard bit + setTrueBits(0); +} + +void +BitVector::clearInterval(Index start, Index end) +{ + clearIntervalNoInvalidation(start, end); + + invalidateCachedCount(); +} + +void +BitVector::clearIntervalNoInvalidation(Index start, Index end) +{ + if (start >= end) { return; } + + Index last = std::min(end, size()) - 1; + Index startw = wordNum(start); + Index endw = wordNum(last); + + if (endw > startw) { + _words[startw++] &= startBits(start); + memset(_words+startw, 0, sizeof(*_words)*(endw-startw)); + _words[endw] &= endBits(last); + } else { + _words[startw] &= (startBits(start) | endBits(last)); + } +} + +void +BitVector::setInterval(Index start, Index end) +{ + if (start >= end) { return; } + + Index last = std::min(end, size()) - 1; + Index startw = wordNum(start); + Index endw = wordNum(last); + + if (endw > startw) { + _words[startw++] |= checkTab(start); + memset(_words + startw, 0xff, sizeof(*_words)*(endw-startw)); + _words[endw] |= ~endBits(last); + } else { + _words[startw] |= ~(startBits(start) | endBits(last)); + } + + invalidateCachedCount(); +} + +BitVector::Index +BitVector::count(void) const +{ + // Subtract by one to compensate for guard bit + return internalCount(getActiveStart(), numActiveWords()) - 1; +} + +BitVector::Index +BitVector::internalCount(const Word *tarr, size_t sz) +{ + Index count(0); + for (size_t i(0); i < sz; i++) { + count += Optimized::popCount(tarr[i]); + } + return count; +} + +BitVector::Index +BitVector::countInterval(Index start, Index end) const +{ + if (start >= end) return 0; + + Index last = std::min(end, size()) - 1; + // Count bits in range [start..end> + Index startw = wordNum(start); + Index endw = wordNum(last); + Word *bitValues = _words; + + if (startw == endw) { + return Optimized::popCount(bitValues[startw] & ~(startBits(start) | endBits(last))); + } + Index res = 0; + // Limit to full words + if ((start & (WordLen - 1)) != 0) { + res += Optimized::popCount(bitValues[startw] & ~startBits(start)); + ++startw; + } + // Align start to 16 bytes + while (startw < endw && (startw & 3) != 0) { + res += Optimized::popCount(bitValues[startw]); + ++startw; + } + bool partialEnd = (last & (WordLen - 1)) != (WordLen - 1); + if (!partialEnd) { + ++endw; + } + if (startw < endw) { + res += internalCount(bitValues + startw, endw - startw); + } + if (partialEnd) { + res += Optimized::popCount(bitValues[endw] & ~endBits(last)); + } + + return res; +} + +void +BitVector::orWith(const BitVector & right) +{ + verifyContains(*this, right); + IAccelrated::getAccelrator()->orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + + repairEnds(); + invalidateCachedCount(); +} + +void +BitVector::repairEnds() +{ + if (size() == 0) return; + Index start(getStartIndex()); + Index last(size() - 1); + getWordIndex(start)[0] &= ~startBits(start); + getWordIndex(last)[0] &= ~endBits(last); + setGuardBit(); +} + + +void +BitVector::andWith(const BitVector & right) +{ + verifyContains(*this, right); + + IAccelrated::getAccelrator()->andBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + + setGuardBit(); + invalidateCachedCount(); +} + + +void +BitVector::andNotWith(const BitVector& right) +{ + verifyContains(*this, right); + + IAccelrated::getAccelrator()->andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + + setGuardBit(); + invalidateCachedCount(); +} + +bool +BitVector::operator==(const BitVector &rhs) const +{ + if ((size() != rhs.size()) || (getStartIndex() != rhs.getStartIndex())) { + return false; + } + + Index bitVectorSize = numActiveWords(); + const Word *words = getActiveStart(); + const Word *oWords = rhs.getActiveStart(); + for (Index i = 0; i < bitVectorSize; i++) { + if (words[i] != oWords[i]) { + return false; + } + } + return true; +} + +bool +BitVector::hasTrueBitsInternal(void) const +{ + Index bitVectorSizeL1(numActiveWords() - 1); + const Word *words(getActiveStart()); + for (Index i = 0; i < bitVectorSizeL1; i++) { + if (words[i] != 0) { + return true; + } + } + + // Ignore guard bit. + if ((words[bitVectorSizeL1] & ~mask(size())) != 0) + return true; + + return false; +} + +////////////////////////////////////////////////////////////////////// +// Set new length. Destruction of content +////////////////////////////////////////////////////////////////////// +void +BitVector::resize(Index newLength) +{ + (void) newLength; + assert(false); +} +GenerationHeldBase::UP +BitVector::grow(Index newSize, Index newCapacity) +{ + (void) newSize; + (void) newCapacity; + assert(false); +} + +size_t +BitVector::getFileBytes(Index bits) +{ + Index bytes = numBytes(bits); + bytes += (-bytes & (getAlignment() - 1)); + return bytes; +} + +class MMappedBitVector : public BitVector +{ +public: + MMappedBitVector(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount); + +private: + void read(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount); +}; + +BitVector::UP +BitVector::create(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount) +{ + UP bv; + if (file.IsMemoryMapped()) { + bv.reset(new MMappedBitVector(numberOfElements, file, offset, doccount)); + } else { + size_t padbefore, padafter; + size_t vectorsize = getFileBytes(numberOfElements); + file.DirectIOPadding(offset, vectorsize, padbefore, padafter); + assert((padbefore & (getAlignment() - 1)) == 0); + AllocatedBitVector::Alloc alloc(padbefore + vectorsize + padafter); + void * alignedBuffer = alloc.get(); + file.ReadBuf(alignedBuffer, alloc.size(), offset - padbefore); + bv.reset(new AllocatedBitVector(numberOfElements, std::move(alloc), padbefore)); + bv->setTrueBits(doccount); + // Check guard bit for getNextTrueBit() + assert(bv->testBit(bv->size())); + } + return bv; +} + +BitVector::UP +BitVector::create(Index start, Index end) +{ + return (start == 0) + ? create(end) + : UP(new PartialBitVector(start, end)); +} + +BitVector::UP +BitVector::create(Index numberOfElements) +{ + return UP(new AllocatedBitVector(numberOfElements)); +} + +BitVector::UP +BitVector::create(const BitVector & rhs) +{ + return UP(new AllocatedBitVector(rhs)); +} + +BitVector::UP +BitVector::create(Index numberOfElements, Index newCapacity, GenerationHolder &generationHolder) +{ + return UP(new GrowableBitVector(numberOfElements, newCapacity, generationHolder)); +} + +MMappedBitVector::MMappedBitVector(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount) : + BitVector() +{ + read(numberOfElements, file, offset, doccount); +} + +void +MMappedBitVector::read(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount) +{ + assert((offset & (getAlignment() - 1)) == 0); + void *mapptr = file.MemoryMapPtr(offset); + assert(mapptr != NULL); + if (mapptr != NULL) { + init(mapptr, 0, numberOfElements); + } + setTrueBits(doccount); +} + +nbostream & +operator<<(nbostream &out, const BitVector &bv) +{ + size_t size = bv.size(); + size_t cachedHits = bv.countTrueBits(); + size_t fileBytes = bv.getFileBytes(); + assert(size <= std::numeric_limits::max()); + assert(cachedHits <= size || ! bv.isValidCount(cachedHits)); + assert(bv.testBit(size)); + out << size << cachedHits << fileBytes; + out.write(bv.getStart(), bv.getFileBytes()); + return out; +} + + +nbostream & +operator>>(nbostream &in, BitVector &bv) +{ + size_t size; + size_t cachedHits; + size_t fileBytes; + in >> size >> cachedHits >> fileBytes; + assert(size <= std::numeric_limits::max()); + assert(cachedHits <= size || ! bv.isValidCount(cachedHits)); + if (bv.size() != size) + bv.resize(size); + assert(bv.getFileBytes() == fileBytes); + in.read(bv.getStart(), bv.getFileBytes()); + assert(bv.testBit(size)); + bv.setTrueBits(cachedHits); + return in; +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h new file mode 100644 index 00000000000..70864c938d3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -0,0 +1,354 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +class BitWord { +public: + typedef uint64_t Word; + typedef uint32_t Index; + static Word checkTab(Index index) { return _checkTab[bitNum(index)]; } + static Word startBits(Index index) { return (std::numeric_limits::max() >> 1) >> (WordLen - 1 - bitNum(index)); } + static constexpr size_t WordLen = sizeof(Word)*8; + static uint8_t bitNum(Index idx) { return (idx % WordLen); } + static Word endBits(Index index) { return (std::numeric_limits::max() - 1) << bitNum(index); } + static Index wordNum(Index idx) { return idx >> numWordBits(); } + static Word mask(Index idx) { return Word(1) << bitNum(idx); } + static constexpr uint8_t size_bits(uint8_t n) { return (n > 1) ? (1 + size_bits(n >> 1)) : 0; } + static uint8_t numWordBits() { return size_bits(WordLen); } +private: + + static Word _checkTab[WordLen]; + struct Init { + Init(); + }; + static Init _initializer; +}; + +class BitVector : protected BitWord, private vespalib::noncopyable +{ +public: + typedef BitWord::Index Index; + typedef vespalib::GenerationHolder GenerationHolder; + typedef vespalib::GenerationHeldBase GenerationHeldBase; + typedef std::unique_ptr UP; + virtual ~BitVector() { } + bool operator == (const BitVector &right) const; + const void * getStart() const { return _words; } + void * getStart() { return _words; } + Index size() const { return _sz; } + Index sizeBytes() const { return numBytes(getActiveSize()); } + bool testBit(Index idx) const { + return ((_words[wordNum(idx)] & mask(idx)) != 0); + } + bool hasTrueBits() const { + return isValidCount() + ? (countTrueBits() != 0) + : hasTrueBitsInternal(); + } + Index countTrueBits() const { + if ( ! isValidCount()) { + _numTrueBits = count(); + } + return _numTrueBits; + } + + /** + * Will provide the first valid bit of the bitvector. + * + * @return The Index of the first valid bit of the bitvector. + */ + Index getStartIndex() const { return _startOffset; } + + /** + * Get next bit set in the bitvector (inclusive start). + * It assumes that bitvector is non-zero terminated. + * + * @param start first bit to check + * @return next bit set in the bitvector. + */ + Index getNextTrueBit(Index start) const { + Index index(wordNum(start)); + const Word *words(_words); + Word t(words[index] & checkTab(start)); + + // In order to avoid a test an extra guard bit is added + // after the bitvector as a termination. + // Also bitvector will normally at least 1 bit set per 32 bits. + // So that is what we should expect. + while (__builtin_expect(t == 0, false)) { + t = words[++index]; + } + + return (index << numWordBits()) + vespalib::Optimized::lsbIdx(t); + } + + /** + * Iterate over all true bits in th einclusive range. + * + * @param func callback + * @param start first bit + * @param last bit + */ + template + void + foreach_truebit(FunctionType func, Index start=0, Index end=std::numeric_limits::max()) const + { + foreach(func, [&](Word w) { return w; }, start, end); + } + + /** + * Iterate over all true bits in th einclusive range. + * + * @param func callback + * @param start first bit + * @param last bit + */ + template + void + foreach_falsebit(FunctionType func, Index start=0, Index end=std::numeric_limits::max()) const + { + foreach(func, [&](Word w) { return ~w; }, start, end); + } + + Index getFirstTrueBit(Index start=0) const { + return getNextTrueBit(std::max(start, getStartIndex())); + } + + Index getPrevTrueBit(Index start) const { + Index index(wordNum(start)); + const Word *words(_words); + Word t(words[index] & ~endBits(start)); + + while(t == 0 && index > getStartWordNum()) { + t = words[--index]; + } + + return (t != 0) + ? (index << numWordBits()) + vespalib::Optimized::msbIdx(t) + : getStartIndex(); + } + + void setSize(Index sz) { + clearBit(size()); + _sz = sz; + setBit(size()); + } + void setBit(Index idx) { + _words[wordNum(idx)] |= mask(idx); + } + void clearBit(Index idx) { + _words[wordNum(idx)] &= ~ mask(idx); + } + void flip(Index idx) { + _words[wordNum(idx)] ^= mask(idx); + } + void slowSetBit(Index idx) { + if ( ! testBit(idx) ) { + setBit(idx); + if ( isValidCount() ) { + _numTrueBits++; + } + } + } + + void andWith(const BitVector &right); + void orWith(const BitVector &right); + void andNotWith(const BitVector &right); + + /** + * Clear all bits in the bit vector. + */ + void clear(); + + /** + * Clear a sequence of bits [..>. + * + * @param start first bit to be cleared + * @param end limit + */ + void clearInterval(Index start, Index end); + /** + * Set a sequence of bits. + * + * @param start first bit to be set [..> + * @param end limit + */ + void setInterval(Index start, Index end); + + void slowClearBit(Index idx) { + if (testBit(idx)) { + clearBit(idx); + if ( isValidCount() ) { + _numTrueBits--; + } + } + } + + /** + * Invalidate cached count of bits set in bit vector. This method + * should be called before calling Test/Clear/Flip methods. + */ + void invalidateCachedCount() const { + _numTrueBits = invalidCount(); + } + + void swap(BitVector & rhs) { + std::swap(_words, rhs._words); + std::swap(_startOffset, rhs._startOffset); + std::swap(_sz, rhs._sz); + std::swap(_numTrueBits, rhs._numTrueBits); + } + + /** + * Count bits in partial bitvector [..>. + * + * @param start first bit to be counted + * @param end limit + */ + Index countInterval(Index start, Index end) const; + + /** + * Perform an andnot with an internal array representation. + * + * @param other internal array representation + * @param otherCount number of elements in array + */ + template + void andNotWithT(T it); + + /* + * Calculate the size of a bitmap when performing file io. + */ + static size_t getFileBytes(Index bits); + + /* + * Calculate the size of a bitmap when performing file io. + */ + size_t getFileBytes(void) const { + return getFileBytes(size()); + } + + virtual void resize(Index newLength); + + virtual GenerationHeldBase::UP grow(Index newLength, Index newCapacity); + GenerationHeldBase::UP grow(Index newLength) { return grow(newLength, newLength); } + + /** + * This will create the appropriate vector. + * + * @param numberOfElements The size of the bit vector in bits. + * @param file The file from which to read the bit vector. + * @param offset Where bitvector image is located in the file. + * @param doccount Number of bits set in bitvector + */ + static UP create(Index numberOfElements, + FastOS_FileInterface &file, + int64_t offset, + Index doccount); + static UP create(Index start, Index end); + static UP create(Index numberOfElements); + static UP create(const BitVector & rhs); + static UP create(Index newSize, Index newCapacity, GenerationHolder &generationHolder); +protected: + VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end); + BitVector(void * buf, Index sz) : BitVector(buf, 0, sz) { } + BitVector() : BitVector(nullptr, 0) { } + void init(void * buf, Index start, Index end); + void setTrueBits(Index numTrueBits) { _numTrueBits = numTrueBits; } + VESPA_DLL_LOCAL void clearIntervalNoInvalidation(Index start, Index end); + bool isValidCount() const { return isValidCount(_numTrueBits); } + static bool isValidCount(Index v) { return v != invalidCount(); } + static Index numWords(Index bits) { return wordNum(bits + 1 + (WordLen - 1)); } + static Index numBytes(Index bits) { return numWords(bits) * sizeof(Word); } + size_t numWords() const { return numWords(size()); } + static size_t getAlignment() { return 0x40u; } + static size_t numActiveBytes(Index start, Index end) { return numActiveWords(start, end) * sizeof(Word); } + +private: + const Word * getWordIndex(Index index) const { return static_cast(getStart()) + wordNum(index); } + Word * getWordIndex(Index index) { return static_cast(getStart()) + wordNum(index); } + const Word * getActiveStart() const { return getWordIndex(getStartIndex()); } + Word * getActiveStart() { return getWordIndex(getStartIndex()); } + Index getStartWordNum() const { return wordNum(getStartIndex()); } + Index getActiveSize() const { return size() - getStartIndex(); } + size_t getActiveBytes() const { return numActiveBytes(getStartIndex(), size()); } + size_t numActiveWords() const { return numActiveWords(getStartIndex(), size()); } + static size_t numActiveWords(Index start, Index end) { return (numWords(end) - wordNum(start)); } + static Index invalidCount() { return std::numeric_limits::max(); } + void setGuardBit() { setBit(size()); } + VESPA_DLL_LOCAL void repairEnds(); + VESPA_DLL_LOCAL static Index internalCount(const Word *tarr, size_t sz); + Index count(void) const; + bool hasTrueBitsInternal() const; + template + void + foreach(FunctionType func, WordConverter conv, Index start, Index end) const + { + if ((end <= start) || (size() == 0)) return; + Index last = std::min(end, size()) - 1; + if (start < getStartIndex()) start = getStartIndex(); + + Index index(wordNum(start)); + Index lastIndex(wordNum(last)); + Word word(conv(_words[index]) & checkTab(start)); + for ( ; index < lastIndex; word = conv(_words[++index])) { + foreach_bit(func, word, index << numWordBits()); + } + foreach_bit(func, word & ~endBits(last), lastIndex << numWordBits()); + } + template + static void + foreach_bit(FunctionType func, Word word, Index start) + { + while (word) { + uint32_t pos = vespalib::Optimized::lsbIdx(word); + func(start+pos); + start += pos + 1; + word >>= pos; + word >>= 1; + } + } + + + Word *_words; // This is the buffer staring at Index 0 + Index _startOffset; // This is the official start + Index _sz; // This is the official end. + mutable Index _numTrueBits; + +protected: + friend vespalib::nbostream & + operator<<(vespalib::nbostream &out, const BitVector &bv); + friend vespalib::nbostream & + operator>>(vespalib::nbostream &in, BitVector &bv); +}; + +typedef BitVector ConstBitVectorReference; + +vespalib::nbostream & +operator<<(vespalib::nbostream &out, const BitVector &bv); + +vespalib::nbostream & +operator>>(vespalib::nbostream &in, BitVector &bv); + +template +void BitVector::andNotWithT(T it) { + while (it.hasNext()) { + clearBit(it.next()); + } + invalidateCachedCount(); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp b/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp new file mode 100644 index 00000000000..cf2011b1f2a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.cpp @@ -0,0 +1,218 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +LOG_SETUP(".searchlib.common.bitvectorcache"); + +namespace search { + +BitVectorCache::BitVectorCache(GenerationHolder &genHolder) : + _lookupCount(0), + _needPopulation(false), + _lock(), + _keys(), + _chunks(), + _genHolder(genHolder) +{ +} + +BitVectorCache::~BitVectorCache() +{ +} + +void +BitVectorCache::computeCountVector(KeySet & keys, CountVector & v) const +{ + std::vector notFound; + std::vector keySets; + ChunkV chunks; + { + vespalib::LockGuard guard(_lock); + keySets.resize(_chunks.size()); + Key2Index::const_iterator end(_keys.end()); + for (Key k : keys) { + Key2Index::const_iterator found = _keys.find(k); + if (found != end) { + const KeyMeta & m = found->second; + keySets[m.chunkId()].insert(m.chunkIndex()); + } + } + chunks = _chunks; + } + for (Key k : notFound) { + keys.erase(k); + } + size_t index(0); + if (chunks.empty()) { + memset(&v[0], 0, v.size()); + } + for (const auto & chunk : chunks) { + if (index == 0) { + chunk->initializeCountVector(keySets[index++], v); + } else { + chunk->addCountVector(keySets[index++], v); + } + } +} + +BitVectorCache::KeySet +BitVectorCache::lookupCachedSet(const KeyAndCountSet & keys) +{ + KeySet cached(keys.size()*3); + vespalib::LockGuard guard(_lock); + _lookupCount++; + if (_lookupCount == 2000) { + _needPopulation = true; + } else if ((_lookupCount & 0x1fffff) == 0x100000) { + if (hasCostChanged(guard)) { + _needPopulation = true; + } + } + for (const auto & e : keys) { + auto found = _keys.find(e.first); + if (found != _keys.end()) { + KeyMeta & m = found->second; + m.lookup(); + if (m.isCached()) { + cached.insert(e.first); + } + } else { + _keys[e.first] = KeyMeta().lookup().bitCount(e.second); + } + } + return cached; +} + +BitVectorCache::SortedKeyMeta +BitVectorCache::getSorted(Key2Index & keys) +{ + std::vector> sorted; + sorted.reserve(keys.size()); + for (auto & e : keys) { + sorted.push_back({e.first, &e.second}); + } + std::sort(sorted.begin(), sorted.end(), + [&] (const auto & a, const auto & b) { + return a.second->cost() > b.second->cost(); + }); + return sorted; +} + +bool +BitVectorCache::hasCostChanged(const vespalib::LockGuard & guard) +{ + (void) guard; + if ( ! _chunks.empty()) { + SortedKeyMeta sorted(getSorted(_keys)); + double oldCached(0); + for (auto & e : sorted) { + const KeyMeta & m = *e.second; + if ( m.isCached() ) { + oldCached += m.cost(); + } + } + double newCached(0); + for (size_t i(0); i < sorted.size() && i < _chunks[0]->getKeyCapacity(); i++) { + const KeyMeta & m = *sorted[i].second; + newCached += m.cost(); + } + if (newCached > oldCached * 1.01) { // 1% change needed. + return true; + } + } + return false; +} + +void +BitVectorCache::populate(Key2Index & newKeys, CondensedBitVector & chunk, const PopulateInterface & lookup) +{ + SortedKeyMeta sorted(getSorted(newKeys)); + + double sum(0); + for (auto & e : sorted) { + e.second->unCache(); + sum += e.second->cost(); + } + double accum(0.0); + uint32_t index(0); + for (const auto & e : sorted) { + KeyMeta & m = *e.second; + if (index >= chunk.getKeyCapacity()) { + assert( ! m.isCached()); + } else { + double percentage(m.cost()*100.0/sum); + accum += percentage; + m.chunkId(0); + m.chunkIndex(index); + LOG(info, "Populating bitvector %2d with feature %ld and %ld bits set. Cost is %8f = %2.2f%%, accumulated cost is %2.2f%%", + index, e.first, m.bitCount(), m.cost(), percentage, accum); + index++; + assert(m.isCached()); + assert(newKeys[e.first].isCached()); + assert(&m == &newKeys[e.first]); + PopulateInterface::Iterator::UP iterator = lookup.lookup(e.first); + for (int32_t docId(iterator->getNext()); docId >= 0; docId = iterator->getNext()) { + chunk.set(m.chunkIndex(), docId, true); + } + } + } +} + +void +BitVectorCache::populate(uint32_t sz, const PopulateInterface & lookup) +{ + vespalib::LockGuard guard1(_lock); + if (! _needPopulation) { + return; + } + Key2Index newKeys(_keys); + guard1.unlock(); + + CondensedBitVector::UP chunk(CondensedBitVector::create(sz, _genHolder)); + populate(newKeys, *chunk, lookup); + + vespalib::LockGuard guard2(_lock); + _chunks.push_back(std::move(chunk)); + _keys.swap(newKeys); + _needPopulation = false; +} + +void +BitVectorCache::set(Key key, uint32_t index, bool v) +{ + vespalib::LockGuard guard(_lock); + auto found = _keys.find(key); + if (found != _keys.end()) { + const KeyMeta & m(found->second); + if (m.isCached()) { + _chunks[m.chunkId()]->set(m.chunkIndex(), index, v); + } + } +} + +bool +BitVectorCache::get(Key key, uint32_t index) const +{ + (void) key; (void) index; + return false; +} + +void +BitVectorCache::removeIndex(uint32_t index) +{ + vespalib::LockGuard guard(_lock); + for (auto & chunk : _chunks) { + chunk->clearIndex(index); + } +} + + +void +BitVectorCache::adjustDocIdLimit(uint32_t docId) +{ + for (auto &chunk : _chunks) { + chunk->adjustDocIdLimit(docId); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/common/bitvectorcache.h b/searchlib/src/vespa/searchlib/common/bitvectorcache.h new file mode 100644 index 00000000000..3405adaf1fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvectorcache.h @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { + +class PopulateInterface +{ +public: + class Iterator { + public: + typedef std::unique_ptr UP; + virtual ~Iterator() { } + virtual int32_t getNext() = 0; + }; + virtual ~PopulateInterface() { } + virtual Iterator::UP lookup(uint64_t key) const = 0; +}; + +class BitVectorCache +{ +public: + typedef uint64_t Key; + typedef vespalib::hash_set KeySet; + typedef std::vector> KeyAndCountSet; + typedef CondensedBitVector::CountVector CountVector; + typedef vespalib::GenerationHolder GenerationHolder; + + BitVectorCache(GenerationHolder &genHolder); + ~BitVectorCache(); + void computeCountVector(KeySet & keys, CountVector & v) const; + KeySet lookupCachedSet(const KeyAndCountSet & keys); + void set(Key key, uint32_t index, bool v); + bool get(Key key, uint32_t index) const; + void removeIndex(uint32_t index); + void adjustDocIdLimit(uint32_t docId); + void populate(uint32_t count, const PopulateInterface &); + bool needPopulation() const { return _needPopulation; } +private: + class KeyMeta { + public: + KeyMeta() : + _lookupCount(0), + _bitCount(0), + _chunkId(-1), + _chunkIndex(0) + { } + double cost() const { return _bitCount * _lookupCount; } + bool isCached() const { return _chunkId >= 0; } + size_t bitCount() const { return _bitCount; } + size_t chunkIndex() const { return _chunkIndex; } + size_t chunkId() const { return _chunkId; } + size_t lookupCount() const { return _lookupCount; } + KeyMeta & incBits() { _bitCount++; return *this; } + KeyMeta & decBits() { _bitCount--; return *this; } + KeyMeta & lookup() { _lookupCount++; return *this; } + KeyMeta & bitCount(uint32_t v) { _bitCount = v; return *this; } + KeyMeta & chunkId(uint32_t v) { _chunkId = v; return *this; } + KeyMeta & chunkIndex(uint32_t v) { _chunkIndex = v; return *this; } + KeyMeta & unCache() { _chunkId = -1; return *this; } + private: + size_t _lookupCount; + uint32_t _bitCount; + int32_t _chunkId; + uint32_t _chunkIndex; + }; + typedef vespalib::hash_map Key2Index; + typedef std::vector> SortedKeyMeta; + typedef std::vector ChunkV; + + VESPA_DLL_LOCAL static SortedKeyMeta getSorted(Key2Index & keys); + VESPA_DLL_LOCAL static void populate(Key2Index & newKeys, CondensedBitVector & chunk, const PopulateInterface & lookup); + VESPA_DLL_LOCAL bool hasCostChanged(const vespalib::LockGuard &); + + uint64_t _lookupCount; + bool _needPopulation; + vespalib::Lock _lock; + Key2Index _keys; + ChunkV _chunks; + GenerationHolder &_genHolder; +}; + +} diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp new file mode 100644 index 00000000000..012fab70f51 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".bitvectoriterators"); + +#include "bitvectoriterator.h" +#include +#include +#include +#include + +namespace search +{ + +using fef::TermFieldMatchDataArray; +using fef::TermFieldMatchData; + +BitVectorIterator::BitVectorIterator(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : + _docIdLimit(std::min(docIdLimit, bv.size())), + _bv(bv), + _tfmd(matchData) +{ + assert(docIdLimit <= bv.size()); + _tfmd.reset(0); +} + +void +BitVectorIterator::initRange(uint32_t begin, uint32_t end) +{ + SearchIterator::initRange(begin, end); + if (begin >= _docIdLimit) { + setAtEnd(); + } else { + uint32_t docId = _bv.getFirstTrueBit(begin); + if (docId >= _docIdLimit) { + setAtEnd(); + } else { + setDocId(docId); + } + } +} + +void +BitVectorIterator::doSeek(uint32_t docId) +{ + if (__builtin_expect(docId >= _docIdLimit, false)) { + setAtEnd(); + } else if (_bv.testBit(docId)) { + setDocId(docId); + } +} + +void +BitVectorIterator::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + SearchIterator::visitMembers(visitor); + visit(visitor, "docIdLimit", _docIdLimit); + visit(visitor, "termfieldmatchdata.fieldId", _tfmd.getFieldId()); + visit(visitor, "termfieldmatchdata.docid", _tfmd.getDocId()); +} + +void +BitVectorIterator::doUnpack(uint32_t docId) +{ + _tfmd.resetOnlyDocId(docId); +} + +class BitVectorIteratorStrict : public BitVectorIterator +{ +public: + BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData); +private: + void doSeek(uint32_t docId) override; + Trinary is_strict() const override { return Trinary::True; } +}; + +BitVectorIteratorStrict::BitVectorIteratorStrict(const BitVector & bv, uint32_t docIdLimit, TermFieldMatchData & matchData) : + BitVectorIterator(bv, docIdLimit, matchData) +{ +} + +void +BitVectorIteratorStrict::doSeek(uint32_t docId) +{ + if (__builtin_expect(docId >= _docIdLimit, false)) { + setAtEnd(); + return; + } + + docId = _bv.getNextTrueBit(docId); + if (__builtin_expect(docId >= _docIdLimit, false)) { + setAtEnd(); + } else { + setDocId(docId); + } +} + +queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, const TermFieldMatchDataArray &matchData, bool strict) +{ + assert(matchData.size() == 1); + return create(bv, bv->size(), *matchData[0], strict); +} +queryeval::SearchIterator::UP BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit, TermFieldMatchData &matchData, bool strict) +{ + if (bv == NULL) { + return UP(new queryeval::EmptySearch()); + } else if (strict) { + return UP(new BitVectorIteratorStrict(*bv, docIdLimit, matchData)); + } else { + return UP(new BitVectorIterator(*bv, docIdLimit, matchData)); + } +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/bitvectoriterator.h b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h new file mode 100644 index 00000000000..ab8b506792b --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/bitvectoriterator.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "bitvector.h" + + +namespace search +{ + +namespace fef { class TermFieldMatchDataArray; } +namespace fef { class TermFieldMatchData; } + +class BitVectorIterator : public queryeval::SearchIterator +{ +protected: + BitVectorIterator(const BitVector & other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData); + + uint32_t _docIdLimit; + const BitVector & _bv; +private: + void initRange(uint32_t begin, uint32_t end) override; + void visitMembers(vespalib::ObjectVisitor &visitor) const override; + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; + bool isBitVector() const override { return true; } + fef::TermFieldMatchData &_tfmd; +public: + const void * getBitValues() const { return _bv.getStart(); } + + Trinary is_strict() const override { return Trinary::False; } + virtual bool isStrict() const { return (is_strict() == Trinary::True); } + uint32_t getDocIdLimit() const { return _docIdLimit; } + static UP create(const BitVector *const other, const fef::TermFieldMatchDataArray &matchData, bool strict); + static UP create(const BitVector *const other, uint32_t docIdLimit, fef::TermFieldMatchData &matchData, bool strict); +}; + + +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp new file mode 100644 index 00000000000..fa94eb5dca4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp @@ -0,0 +1,148 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +using vespalib::IllegalArgumentException; +using vespalib::make_string; +using vespalib::GenerationHolder; + +namespace search { + +namespace { + +template +class CondensedBitVectorT : public CondensedBitVector +{ +public: + CondensedBitVectorT(size_t sz, GenerationHolder &genHolder) : + _v(sz, 30, 1000, genHolder) + { + for (size_t i = 0; i < sz; ++i) { + _v.push_back(0); + } + } +private: + static uint8_t countBits(T v) { + return ((sizeof(T)) <= 4) + ? __builtin_popcount(v) + : __builtin_popcountl(v); + } + T computeMask(const KeySet & keys) const __attribute__ ((noinline)) { + T mask(0); + for (size_t i : keys) { + assert(i < getKeyCapacity()); + mask |= (B << i); + } + return mask; + } + static const uint64_t B = 1ul; + void initializeCountVector(const KeySet & keys, CountVector & cv) const override { + struct S { + void operator () (uint8_t & cv, uint8_t v) { cv = v; } + }; + computeCountVector(computeMask(keys), cv, S()); + } + void addCountVector(const KeySet & keys, CountVector & cv) const override { + struct S { + void operator () (uint8_t & cv, uint8_t v) { cv += v; } + }; + computeCountVector(computeMask(keys), cv, S()); + } + + void clearIndex(uint32_t index) override { + _v[index] = 0; + } + + template + VESPA_DLL_LOCAL void computeCountVector(T mask, CountVector & cv, F func) const __attribute__((noinline)); + + template + VESPA_DLL_LOCAL void computeTail(T mask, CountVector & cv, F func, size_t i) const __attribute__((noinline)); + + void set(Key key, uint32_t index, bool v) override { + assert(key < getKeyCapacity()); + if (v) { + _v[index] |= B << key; + } else { + _v[index] &= ~(B << key); + } + } + bool get(Key key, uint32_t index) const override { + assert(key < getKeyCapacity()); + return (_v[index] & (B << key)) != 0; + } + + size_t getKeyCapacity() const override { return sizeof(T)*8; } + size_t getCapacity() const override { return _v.capacity(); } + size_t getSize() const { return _v.size(); } + void adjustDocIdLimit(uint32_t docId); + attribute::RcuVectorBase _v; +}; + +template +template +VESPA_DLL_LOCAL void +CondensedBitVectorT::computeCountVector(T mask, CountVector & cv, F func) const +{ + size_t i(0); + const size_t UNROLL = 2; + uint8_t *d = &cv[0]; + const T *v = &_v[0]; + for (const size_t m(cv.size() - (UNROLL - 1)); i < m; i+=UNROLL) { + for (size_t j(0); j < UNROLL; j++) { + func(d[i+j], countBits(v[i+j] & mask)); + } + } + computeTail(mask, cv, func, i); +} + +template +template +VESPA_DLL_LOCAL void +CondensedBitVectorT::computeTail(T mask, CountVector & cv, F func, size_t i) const +{ + for (; i < cv.size(); i++) { + func(cv[i], countBits(_v[i] & mask)); + } +} + + +template +void +CondensedBitVectorT:: adjustDocIdLimit(uint32_t docId) +{ + while (_v.size() <= docId) { + _v.push_back(0); + } +} + + +void throwIllegalKey(size_t numKeys, size_t key) __attribute__((noinline)); + +void throwIllegalKey(size_t numKeys, size_t key) +{ + throw IllegalArgumentException(make_string("All %ld possible keys are used. Key %ld is not added", numKeys, key), VESPA_STRLOC); +} + +} + +CondensedBitVector::~CondensedBitVector() +{ +} + +void +CondensedBitVector::addKey(Key key) const +{ + if ( ! hasKey(key)) { + throwIllegalKey(getKeyCapacity(), key); + } +} + +CondensedBitVector::UP +CondensedBitVector::create(size_t size, GenerationHolder &genHolder) +{ + return UP(new CondensedBitVectorT(size, genHolder)); +} + +} diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h new file mode 100644 index 00000000000..b8d97cbcb07 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { + +class CondensedBitVector +{ +public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + typedef uint32_t Key; + typedef std::set KeySet; + typedef vespalib::ArrayRef CountVector; + + virtual ~CondensedBitVector(); + + virtual void initializeCountVector(const KeySet & keys, CountVector & v) const = 0; + virtual void addCountVector(const KeySet & keys, CountVector & v) const = 0; + virtual void set(Key key, uint32_t index, bool v) = 0; + virtual bool get(Key key, uint32_t index) const = 0; + virtual void clearIndex(uint32_t index) = 0; + virtual size_t getKeyCapacity() const = 0; + virtual size_t getCapacity() const = 0; + virtual size_t getSize() const = 0; + virtual void adjustDocIdLimit(uint32_t docId) = 0; + bool hasKey(Key key) const { return key < getKeyCapacity(); } + void addKey(Key key) const; + static CondensedBitVector::UP create(size_t size, vespalib::GenerationHolder &genHolder); +private: + typedef vespalib::hash_map Key2Index; + Key2Index _keys; +}; + +} diff --git a/searchlib/src/vespa/searchlib/common/converters.h b/searchlib/src/vespa/searchlib/common/converters.h new file mode 100644 index 00000000000..ccd15c6105c --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/converters.h @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace common { + +class PassThroughConverter : public BlobConverter +{ +private: + virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const; +}; + +class LowercaseConverter : public BlobConverter +{ +public: + LowercaseConverter(); +private: + virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const; + mutable vespalib::string _buffer; +}; + +class UcaConverter : public BlobConverter +{ +public: + typedef icu::Collator Collator; + UcaConverter(const vespalib::string & locale, const vespalib::string & strength); + const Collator & getCollator() const { return *_collator; } +private: + struct Buffer { + vespalib::string _data; + uint8_t *ptr() { return (uint8_t *)_data.begin(); } + int32_t siz() { return _data.size(); } + Buffer() : _data() { + reserve(_data.capacity()-8); // do not cause extra malloc() by default + } + void reserve(size_t size) { + _data.reserve(size+8); + _data.resize(size); + _data[size+1] = '\0'; + _data[size+2] = '\0'; + _data[size+3] = 'd'; + _data[size+4] = 'e'; + _data[size+5] = 'a'; + _data[size+6] = 'd'; + _data[size+7] = '\0'; + } + void check() { + assert(_data[siz()+3] == 'd'); + assert(_data[siz()+4] == 'e'); + assert(_data[siz()+5] == 'a'); + assert(_data[siz()+6] == 'd'); + } + }; + int utf8ToUtf16(const vespalib::ConstBufferRef & src) const; + virtual vespalib::ConstBufferRef onConvert(const vespalib::ConstBufferRef & src) const; + mutable Buffer _buffer; + mutable std::vector _u16Buffer; + std::unique_ptr _collator; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/docstamp.h b/searchlib/src/vespa/searchlib/common/docstamp.h new file mode 100644 index 00000000000..d1f22eb5abb --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/docstamp.h @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +// since everything is real-time, the docstamp does no longer change +// as before. The value 0 still means invalid in fdispatch, and the +// value 42 was selected randomly to reflect a valid value. Defined +// here for a single source of truth. + +struct DocStamp { + static uint32_t good() { return 42; } + static uint32_t bad() { return 0; } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.cpp b/searchlib/src/vespa/searchlib/common/documentlocations.cpp new file mode 100644 index 00000000000..e5811045e9f --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/documentlocations.cpp @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "documentlocations.h" + +namespace search { +namespace common { + +DocumentLocations::DocumentLocations(void) + : _vec_guard(new AttributeGuard), + _vec(NULL) { +} + +} // namespace common +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/documentlocations.h b/searchlib/src/vespa/searchlib/common/documentlocations.h new file mode 100644 index 00000000000..8b5372b601f --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/documentlocations.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace common { + + +/** + * This class contains meta-information about document locations (positions) + * for all documents in the index, and references to the attributes + * containing the actual document locations. + */ +class DocumentLocations +{ + +private: + search::AttributeGuard::UP _vec_guard; + const search::attribute::IAttributeVector *_vec; + +public: + DocumentLocations(void); + + void setVecGuard(search::AttributeGuard::UP guard) { + _vec_guard = std::move(guard); + setVec(_vec_guard.get()->get()); + } + + void setVec(const search::attribute::IAttributeVector &vec) { + _vec = &vec; + } + + const search::attribute::IAttributeVector *getVec() const { + return _vec; + } +}; + + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/documentsummary.cpp b/searchlib/src/vespa/searchlib/common/documentsummary.cpp new file mode 100644 index 00000000000..03faf45c1a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/documentsummary.cpp @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".searchlib.docsummary.documentsummary"); + +using vespalib::getLastErrorString; + +namespace search { +namespace docsummary { + +bool +DocumentSummary::readDocIdLimit(const vespalib::string &dir, uint32_t &count) +{ + char numbuf[20]; + Fast_BufferedFile qcntfile(4096); + unsigned int qcnt; + vespalib::string qcntname; + const char *p; + + qcntname = dir + "/docsum.qcnt"; + + count = qcnt = 0; + // XXX no checking for success + qcntfile.ReadOpen(qcntname.c_str()); + if (!qcntfile.IsOpened() || qcntfile.Eof()) + return false; + p = qcntfile.ReadLine(numbuf, sizeof(numbuf)); + while (*p >= '0' && *p <= '9') + qcnt = qcnt * 10 + *p++ - '0'; + qcntfile.Close(); + count = qcnt; + return true; +} + + +bool +DocumentSummary::writeDocIdLimit(const vespalib::string &dir, uint32_t count) +{ + vespalib::string qcntname = dir + "/docsum.qcnt"; + Fast_BufferedFile qcntfile(new FastOS_File); + + qcntfile.WriteOpen(qcntname.c_str()); + if (!qcntfile.IsOpened()) { + LOG(error, "Could not open %s: %s", qcntname.c_str(), getLastErrorString().c_str()); + return false; + } + qcntfile.addNum(count, 0, ' '); + qcntfile.WriteByte('\n'); + qcntfile.Sync(); + qcntfile.Close(); + return true; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/common/documentsummary.h b/searchlib/src/vespa/searchlib/common/documentsummary.h new file mode 100644 index 00000000000..86e45ca921d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/documentsummary.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { +namespace docsummary { + +class DocumentSummary +{ +public: + static bool + readDocIdLimit(const vespalib::string &dir, uint32_t &docIdLimit); + + static bool + writeDocIdLimit(const vespalib::string &dir, uint32_t docIdLimit); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/feature.h b/searchlib/src/vespa/searchlib/common/feature.h new file mode 100644 index 00000000000..b0eb8d3e463 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/feature.h @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +typedef double feature_t; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/featureset.cpp b/searchlib/src/vespa/searchlib/common/featureset.cpp new file mode 100644 index 00000000000..826382f0e8e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/featureset.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +namespace search { + +FeatureSet::FeatureSet() + : _names(), + _docIds(), + _values() +{ +} + +FeatureSet::FeatureSet(const StringVector &names, + uint32_t expectDocs) + : _names(names), + _docIds(), + _values() +{ + _docIds.reserve(expectDocs); + _values.reserve(expectDocs * names.size()); +} + +bool +FeatureSet::equals(const FeatureSet &rhs) const +{ + return ((_docIds == rhs._docIds) && + (_values == rhs._values) && + (_names == rhs._names)); // do names last, as they are most likely to match +} + +uint32_t +FeatureSet::addDocId(uint32_t docId) +{ + _docIds.push_back(docId); + _values.resize(_names.size() * _docIds.size()); + return (_docIds.size() - 1); +} + +bool +FeatureSet::contains(const std::vector &docIds) const +{ + typedef std::vector::const_iterator ITR; + ITR myPos = _docIds.begin(); + ITR myEnd = _docIds.end(); + ITR pos = docIds.begin(); + ITR end = docIds.end(); + + for (; pos != end; ++pos) { + while (myPos != myEnd && *myPos < *pos) { + ++myPos; + } + if (myPos == myEnd || *myPos != *pos) { + return false; + } + ++myPos; + } + return true; +} + +feature_t * +FeatureSet::getFeaturesByIndex(uint32_t idx) +{ + if (idx >= _docIds.size()) { + return 0; + } + return &(_values[idx * _names.size()]); +} + +const feature_t * +FeatureSet::getFeaturesByDocId(uint32_t docId) const +{ + uint32_t low = 0; + uint32_t hi = _docIds.size(); + while (low < hi) { + uint32_t pos = (low + hi) >> 1; + uint32_t val = _docIds[pos]; + if (val < docId) { + low = pos + 1; + } else if (val > docId) { + hi = pos; + } else { + return &(_values[pos * _names.size()]); + } + } + return 0; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/featureset.h b/searchlib/src/vespa/searchlib/common/featureset.h new file mode 100644 index 00000000000..cd1f0595da7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/featureset.h @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "feature.h" + +namespace search { + +/** + * This class holds information about a set of features for a set of + * documents. + **/ +class FeatureSet +{ +public: + typedef vespalib::string string; + typedef std::vector StringVector; +private: + StringVector _names; + std::vector _docIds; + std::vector _values; + + FeatureSet(const FeatureSet &); + FeatureSet & operator=(const FeatureSet &); + +public: + /** + * Convenience typedef for a shared pointer to an object of this + * class. + **/ + typedef std::shared_ptr SP; + + /** + * Create a new object without any feature information. + **/ + FeatureSet(); + + /** + * Create a new object that will contain information about the + * given features. + * + * @param names names of all features + * @param expectDocs the number of documents we expect to store information about + **/ + FeatureSet(const StringVector &names, uint32_t expectDocs); + + /** + * Check whether this object is equal to the given object. + * + * @return true if the objects are equal. + **/ + bool equals(const FeatureSet &rhs) const; + + /** + * Obtain the names of all the features tracked by this object. + * + * @return feature names + **/ + const StringVector &getNames() const { return _names; } + + /** + * Obtain the number of features this object contains information + * about. + * + * @return number of features + **/ + uint32_t numFeatures() const { return _names.size(); } + + /** + * Obtain the number of documents this object contains information + * about. + * + * @return number of documents. + **/ + uint32_t numDocs() const { return _docIds.size(); } + + /** + * Add a document to the set of documents this object contains + * information about. Documents must be added in ascending + * order. When a new document is added, all features are + * initialized to 0.0. The return value from this method can be + * used together with the @ref getFeaturesByIndex method to set + * the actual feature values. The ordering among features are + * assumed to be the same as in the name vector passed to the + * constructor. + * + * @return the index of the document just added + * @param docid the id of the document to add + **/ + uint32_t addDocId(uint32_t docid); + + /** + * Check whether this object contains information about the given + * set of documents. The given set of documents must be sorted on + * document id; lowest first. + * + * @return true if this object contains information about all the given documents + * @param docIds the documents we want information about + **/ + bool contains(const std::vector &docIds) const; + + /** + * Obtain the feature values belonging to a document based on the + * index into the internal docid array. This method is intended + * for use only when filling in the feature values during object + * initialization. + * + * @return pointer to features + * @param idx index into docid array + **/ + feature_t *getFeaturesByIndex(uint32_t idx); + + /** + * Obtain the feature values belonging to a document based on the + * docid value. This method is intended for lookup when generating + * the summary features or rank features docsum field. + * + * @return pointer to features + * @param docId docid value + **/ + const feature_t *getFeaturesByDocId(uint32_t docId) const; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp new file mode 100644 index 00000000000..c5d1399c47d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.fileheadercontext"); +#include "fileheadercontext.h" +#include + +namespace search +{ + +namespace common +{ + +using vespalib::GenericHeader; + +FileHeaderContext::FileHeaderContext(void) +{ +} + + +FileHeaderContext::~FileHeaderContext(void) +{ +} + + +void +FileHeaderContext::addCreateAndFreezeTime(GenericHeader &header) +{ + typedef GenericHeader::Tag Tag; + fastos::TimeStamp ts(fastos::ClockSystem::now()); + header.putTag(Tag("createTime", ts.us())); + header.putTag(Tag("freezeTime", 0)); +} + + +void +FileHeaderContext::setFreezeTime(GenericHeader &header) +{ + typedef GenericHeader::Tag Tag; + if (header.hasTag("freezeTime") && + header.getTag("freezeTime").getType() == Tag::TYPE_INTEGER) { + fastos::TimeStamp ts(fastos::ClockSystem::now()); + header.putTag(Tag("freezeTime", ts.us())); + } +} + + +} // namespace common + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.h b/searchlib/src/vespa/searchlib/common/fileheadercontext.h new file mode 100644 index 00000000000..cb0f31edc38 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace vespalib +{ + +class GenericHeader; + +} + +namespace search +{ + +namespace common +{ + +class FileHeaderContext +{ +public: + FileHeaderContext(void); + + virtual + ~FileHeaderContext(void); + + virtual void + addTags(vespalib::GenericHeader &header, + const vespalib::string &name) const = 0; + + static void + addCreateAndFreezeTime(vespalib::GenericHeader &header); + + static void + setFreezeTime(vespalib::GenericHeader &header); +}; + + +} // namespace common + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp new file mode 100644 index 00000000000..2c50f20df30 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.cpp @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".common.foregroundtaskexecutor"); + +#include "foregroundtaskexecutor.h" +#include + +using vespalib::ThreadStackExecutor; + +namespace search +{ + +namespace +{ + +constexpr uint32_t stackSize = 128 * 1024; + +} + + +ForegroundTaskExecutor::ForegroundTaskExecutor() +{ +} + +ForegroundTaskExecutor::~ForegroundTaskExecutor() +{ +} + + +void +ForegroundTaskExecutor::executeTask(uint64_t id, + vespalib::Executor::Task::UP task) +{ + (void) id; + task->run(); +} + + +void +ForegroundTaskExecutor::sync() +{ +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h new file mode 100644 index 00000000000..ee481f5e496 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/foregroundtaskexecutor.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "isequencedtaskexecutor.h" + +namespace vespalib +{ + +class ThreadStackExecutorBase; + +} + +namespace search +{ + +/** + * Class to run multiple tasks in parallel, but tasks with same + * id has to be run in sequence. + * + * Currently, this is a dummy version that runs everything in the foreground. + */ +class ForegroundTaskExecutor : public ISequencedTaskExecutor +{ +public: + ForegroundTaskExecutor(); + + ~ForegroundTaskExecutor(); + + virtual void executeTask(uint64_t id, + vespalib::Executor::Task::UP task) override; + + virtual void sync() override; +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/fslimits.h b/searchlib/src/vespa/searchlib/common/fslimits.h new file mode 100644 index 00000000000..64de5e3be37 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/fslimits.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +// define min/max number of bits that may be used to +// encode partid/rowid into the partition path field. +// NB: MIN_ROWBITS == 0 +// Constraint: MIN_PARTBITS >= 1 +// Constraint: MIN_PARTBITS <= 6 <= MAX_PARTBITS + +#define MIN_PARTBITS 1 +#define MAX_PARTBITS 8 + +#define MAX_ROWBITS 8 + +// Currently, max word length and max number of indexes are limited by +// the layout of binary dictionaries; see class FastS_Pagedict. + +#define MAX_WORD_LEN 1000 +#define MAX_INDEXES 64 + +// max number of tiers in a multi-tier dataset. +// may currently not be greater than 16, due to the +// partition path encoding algorithm used. + +#define MAX_TIERS 16 + +// max number of explicitly defined term rank limits +#define MAX_TERMRANKLIMITS 32 + +// Max number of fallthrough classes in Multi-tier fallthrough selector, just set a limit.. +#define MAX_FALLTHROUGH_SELECTORS 32 + +#define SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH 1000000u + diff --git a/searchlib/src/vespa/searchlib/common/gid.h b/searchlib/src/vespa/searchlib/common/gid.h new file mode 100644 index 00000000000..ac76c72fe7e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/gid.h @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +class Lid { +public: + Lid() { memset(_lid, 0, sizeof(_lid)); } + Lid(uint32_t l) { lid(l);} + uint32_t lid() const + { + return (_lid[0] << 24) + + (_lid[1] << 16) + + (_lid[2] << 8) + + _lid[3]; + } + void lid(uint32_t v) + { + _lid[0] = (v >> 24) & 0xff; + _lid[1] = (v >> 16) & 0xff; + _lid[2] = (v >> 8) & 0xff; + _lid[3] = v & 0xff; + } + int cmp(const Lid & b) const { return lid() - b.lid(); } +private: + typedef unsigned char LidT[4]; + LidT _lid; +}; + +class Gid { +public: + Gid() { memset(_gid, 0, sizeof(_gid)); } + Gid(const char *g) { memcpy(_gid, g, sizeof(_gid)); } + const char * gid() const { return _gid; } + int cmp(const Gid & b) const { return memcmp(_gid, b._gid, sizeof(_gid)); } +private: + typedef char GidT[12]; + GidT _gid; +}; + +inline bool operator < (const Gid & a, const Gid & b) { return a.cmp(b) < 0; } +inline bool operator <= (const Gid & a, const Gid & b) { return a.cmp(b) <= 0; } +inline bool operator == (const Gid & a, const Gid & b) { return a.cmp(b) == 0; } +inline bool operator != (const Gid & a, const Gid & b) { return a.cmp(b) != 0; } +inline bool operator > (const Gid & a, const Gid & b) { return a.cmp(b) > 0; } +inline bool operator >= (const Gid & a, const Gid & b) { return a.cmp(b) >= 0; } + +} + diff --git a/searchlib/src/vespa/searchlib/common/growablebitvector.cpp b/searchlib/src/vespa/searchlib/common/growablebitvector.cpp new file mode 100644 index 00000000000..f3a4e87257c --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/growablebitvector.cpp @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "growablebitvector.h" + +///////////////////////////////// +namespace search +{ + +using vespalib::GenerationHeldBase; +using vespalib::GenerationHolder; + +GrowableBitVector::GrowableBitVector(Index newSize, + Index newCapacity, + GenerationHolder &generationHolder) + : AllocatedBitVector(newSize, newCapacity, nullptr, 0), + _generationHolder(generationHolder) +{ + assert(newSize <= newCapacity); +} + +void +GrowableBitVector::reserve(Index newCapacity) +{ + Index oldCapacity = capacity(); + assert(newCapacity >= oldCapacity); + if (newCapacity == oldCapacity) + return; + hold(grow(size(), newCapacity)); +} + +void GrowableBitVector::hold(GenerationHeldBase::UP v) +{ + if (v) { + _generationHolder.hold(std::move(v)); + } +} + +void +GrowableBitVector::shrink(Index newCapacity) +{ + Index oldCapacity = capacity(); + assert(newCapacity <= oldCapacity); + hold(grow(newCapacity, std::max(capacity(), newCapacity))); +} + +void +GrowableBitVector::extend(Index newCapacity) +{ + hold(grow(newCapacity, std::max(capacity(), newCapacity))); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/growablebitvector.h b/searchlib/src/vespa/searchlib/common/growablebitvector.h new file mode 100644 index 00000000000..799babaa78d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/growablebitvector.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +class GrowableBitVector : public AllocatedBitVector +{ +public: + GrowableBitVector(Index newSize, + Index newCapacity, + GenerationHolder &generationHolder); + + void reserve(Index newCapacity); + void shrink(Index newCapacity); + void extend(Index newCapacity); +private: + VESPA_DLL_LOCAL void hold(GenerationHeldBase::UP v); + GenerationHolder &_generationHolder; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/hitrank.h b/searchlib/src/vespa/searchlib/common/hitrank.h new file mode 100644 index 00000000000..38ef0dc8858 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/hitrank.h @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + +typedef double HitRank; +typedef double SignedHitRank; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/identifiable.h b/searchlib/src/vespa/searchlib/common/identifiable.h new file mode 100644 index 00000000000..12c04b7d86a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/identifiable.h @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +#define CID_AttributeVector SEARCHLIB_CID(1) +#define CID_NumericAttribute SEARCHLIB_CID(2) +#define CID_IntegerAttribute SEARCHLIB_CID(3) +#define CID_FloatingPointAttribute SEARCHLIB_CID(4) +#define CID_StringAttribute SEARCHLIB_CID(5) +#define CID_Int8Attribute SEARCHLIB_CID(6) +#define CID_Int16Attribute SEARCHLIB_CID(7) +#define CID_Int32Attribute SEARCHLIB_CID(8) +#define CID_Int64Attribute SEARCHLIB_CID(9) +#define CID_UInt8Attribute SEARCHLIB_CID(10) +#define CID_UInt16Attribute SEARCHLIB_CID(11) +#define CID_UInt32Attribute SEARCHLIB_CID(12) +#define CID_UInt64Attribute SEARCHLIB_CID(13) +#define CID_FloatAttribute SEARCHLIB_CID(14) +#define CID_DoubleAttribute SEARCHLIB_CID(15) +#define CID_AttributeVectorBase SEARCHLIB_CID(16) + +#define CID_Aggregator SEARCHLIB_CID(20) +#define CID_MultiScalarIntegerAggregator SEARCHLIB_CID(21) +#define CID_MultiScalarFloatingPointAggregator SEARCHLIB_CID(22) +#define CID_BucketAggregator SEARCHLIB_CID(23) +#define CID_UniqueAggregator SEARCHLIB_CID(24) +#define CID_FixedWidthAggregator SEARCHLIB_CID(25) + +#define CID_BucketList SEARCHLIB_CID(30) +#define CID_EnumBucketList SEARCHLIB_CID(31) +#define CID_IntBucketList SEARCHLIB_CID(32) +#define CID_FloatBucketList SEARCHLIB_CID(33) +#define CID_StringBucketList SEARCHLIB_CID(34) + +#define CID_search_expression_ExpressionNode SEARCHLIB_CID(40) +#define CID_search_expression_ResultNode SEARCHLIB_CID(41) +#define CID_search_expression_FunctionNode SEARCHLIB_CID(42) +#define CID_search_expression_UnaryFunctionNode SEARCHLIB_CID(43) +#define CID_search_expression_BinaryFunctionNode SEARCHLIB_CID(44) +#define CID_search_expression_MultiArgFunctionNode SEARCHLIB_CID(45) +#define CID_search_expression_UnaryBitFunctionNode SEARCHLIB_CID(46) +#define CID_search_expression_BitFunctionNode SEARCHLIB_CID(47) +#define CID_search_expression_DocumentAccessorNode SEARCHLIB_CID(48) +#define CID_search_expression_ConstantNode SEARCHLIB_CID(49) + +#define CID_search_expression_NumericResultNode SEARCHLIB_CID(50) +#define CID_search_expression_IntegerResultNode SEARCHLIB_CID(51) +#define CID_search_expression_FloatResultNode SEARCHLIB_CID(52) +#define CID_search_expression_StringResultNode SEARCHLIB_CID(53) +#define CID_search_expression_RawResultNode SEARCHLIB_CID(54) +#define CID_search_expression_AttributeNode SEARCHLIB_CID(55) +#define CID_search_expression_DocumentFieldNode SEARCHLIB_CID(56) +#define CID_search_expression_NullResultNode SEARCHLIB_CID(57) +#define CID_search_expression_FieldValue2ResultNode \ + SEARCHLIB_CID(58) +#define CID_search_expression_RelevanceNode SEARCHLIB_CID(59) +#define CID_search_expression_InterpolatedLookup SEARCHLIB_CID(39) +#define CID_search_expression_ArrayAtLookup SEARCHLIB_CID(38) +#define CID_search_expression_ArrayOperationNode SEARCHLIB_CID(37) + +#define CID_search_expression_NegateFunctionNode SEARCHLIB_CID(60) +#define CID_search_expression_AddFunctionNode SEARCHLIB_CID(61) +#define CID_search_expression_MultiplyFunctionNode SEARCHLIB_CID(62) +#define CID_search_expression_DivideFunctionNode SEARCHLIB_CID(63) +#define CID_search_expression_ModuloFunctionNode SEARCHLIB_CID(64) +#define CID_search_expression_MinFunctionNode SEARCHLIB_CID(65) +#define CID_search_expression_MaxFunctionNode SEARCHLIB_CID(66) +#define CID_search_expression_AndFunctionNode SEARCHLIB_CID(67) +#define CID_search_expression_OrFunctionNode SEARCHLIB_CID(68) +#define CID_search_expression_XorFunctionNode SEARCHLIB_CID(69) +#define CID_search_expression_MD5BitFunctionNode SEARCHLIB_CID(70) +#define CID_search_expression_XorBitFunctionNode SEARCHLIB_CID(71) +#define CID_search_expression_CatFunctionNode SEARCHLIB_CID(72) +#define CID_search_expression_GetDocIdNamespaceSpecificFunctionNode SEARCHLIB_CID(73) +#define CID_search_expression_GetYMUMChecksumFunctionNode SEARCHLIB_CID(74) +#define CID_search_expression_TimeStampFunctionNode SEARCHLIB_CID(75) +#define CID_search_expression_RangeBucketPreDefFunctionNode SEARCHLIB_CID(76) +#define CID_search_expression_FixedWidthBucketFunctionNode SEARCHLIB_CID(77) +#define CID_search_expression_NumericFunctionNode SEARCHLIB_CID(78) +#define CID_search_expression_ExpressionTree SEARCHLIB_CID(79) + +#define CID_search_aggregation_AggregationResult SEARCHLIB_CID(80) +#define CID_search_aggregation_CountAggregationResult SEARCHLIB_CID(81) +#define CID_search_aggregation_SumAggregationResult SEARCHLIB_CID(82) +#define CID_search_aggregation_MaxAggregationResult SEARCHLIB_CID(83) +#define CID_search_aggregation_MinAggregationResult SEARCHLIB_CID(84) +#define CID_search_aggregation_AverageAggregationResult SEARCHLIB_CID(85) +#define CID_search_aggregation_XorAggregationResult SEARCHLIB_CID(86) +#define CID_search_aggregation_HitsAggregationResult SEARCHLIB_CID(87) +#define CID_search_aggregation_ExpressionCountAggregationResult \ + SEARCHLIB_CID(88) + +#define CID_search_aggregation_Group SEARCHLIB_CID(90) +#define CID_search_aggregation_Grouping SEARCHLIB_CID(91) +#define CID_search_aggregation_GroupingLevel SEARCHLIB_CID(93) +#define CID_search_aggregation_Hit SEARCHLIB_CID(94) +#define CID_search_aggregation_FS4Hit SEARCHLIB_CID(95) +#define CID_search_aggregation_VdsHit SEARCHLIB_CID(96) +#define CID_search_aggregation_HitList SEARCHLIB_CID(97) + +#define CID_search_expression_BucketResultNode SEARCHLIB_CID(100) +#define CID_search_expression_IntegerBucketResultNode SEARCHLIB_CID(101) +#define CID_search_expression_FloatBucketResultNode SEARCHLIB_CID(102) +#define CID_search_expression_StringBucketResultNode SEARCHLIB_CID(103) +#define CID_search_expression_Int8ResultNode SEARCHLIB_CID(104) +#define CID_search_expression_Int16ResultNode SEARCHLIB_CID(105) +#define CID_search_expression_Int32ResultNode SEARCHLIB_CID(106) +#define CID_search_expression_Int64ResultNode SEARCHLIB_CID(107) + +#define CID_search_expression_ResultNodeVector SEARCHLIB_CID(108) +#define CID_search_expression_IntegerResultNodeVector SEARCHLIB_CID(109) +#define CID_search_expression_FloatResultNodeVector SEARCHLIB_CID(110) +#define CID_search_expression_StringResultNodeVector SEARCHLIB_CID(111) +#define CID_search_expression_IntegerBucketResultNodeVector SEARCHLIB_CID(112) +#define CID_search_expression_FloatBucketResultNodeVector SEARCHLIB_CID(113) +#define CID_search_expression_StringBucketResultNodeVector SEARCHLIB_CID(114) +#define CID_search_expression_RawResultNodeVector SEARCHLIB_CID(115) +#define CID_search_expression_Int8ResultNodeVector SEARCHLIB_CID(116) +#define CID_search_expression_Int16ResultNodeVector SEARCHLIB_CID(117) +#define CID_search_expression_Int32ResultNodeVector SEARCHLIB_CID(118) +#define CID_search_expression_Int64ResultNodeVector SEARCHLIB_CID(119) +#define CID_search_expression_DefaultValue SEARCHLIB_CID(120) +#define CID_search_expression_SingleResultNode SEARCHLIB_CID(121) +#define CID_search_expression_EnumResultNode SEARCHLIB_CID(122) +#define CID_search_expression_EnumResultNodeVector SEARCHLIB_CID(123) +#define CID_search_expression_PositiveInfinityResultNode SEARCHLIB_CID(124) +#define CID_search_expression_RawBucketResultNode SEARCHLIB_CID(125) +#define CID_search_expression_RawBucketResultNodeVector SEARCHLIB_CID(126) +#define CID_search_expression_AttributeResult SEARCHLIB_CID(127) +#define CID_search_expression_GeneralResultNodeVector SEARCHLIB_CID(128) +#define CID_search_expression_EnumAttributeResult SEARCHLIB_CID(129) + +#define CID_search_expression_StrLenFunctionNode SEARCHLIB_CID(130) +#define CID_search_expression_ToStringFunctionNode SEARCHLIB_CID(131) +#define CID_search_expression_NumElemFunctionNode SEARCHLIB_CID(132) +#define CID_search_expression_StrCatFunctionNode SEARCHLIB_CID(133) +#define CID_search_expression_ToFloatFunctionNode SEARCHLIB_CID(134) +#define CID_search_expression_ToIntFunctionNode SEARCHLIB_CID(135) +#define CID_search_expression_MathFunctionNode SEARCHLIB_CID(136) +#define CID_search_expression_SortFunctionNode SEARCHLIB_CID(137) +#define CID_search_expression_ReverseFunctionNode SEARCHLIB_CID(138) +#define CID_search_expression_ZCurveFunctionNode SEARCHLIB_CID(139) +#define CID_search_expression_UcaFunctionNode SEARCHLIB_CID(140) +#define CID_search_expression_ToRawFunctionNode SEARCHLIB_CID(141) +#define CID_search_expression_AggregationRefNode SEARCHLIB_CID(142) +#define CID_search_expression_NormalizeSubjectFunctionNode SEARCHLIB_CID(143) +#define CID_search_expression_DebugWaitFunctionNode SEARCHLIB_CID(144) + +#define CID_search_QueryNode SEARCHLIB_CID(150) +#define CID_search_Query SEARCHLIB_CID(151) +#define CID_search_QueryTerm SEARCHLIB_CID(152) +#define CID_search_QueryConnector SEARCHLIB_CID(153) +#define CID_search_AndQueryNode SEARCHLIB_CID(154) +#define CID_search_AndNotQueryNode SEARCHLIB_CID(155) +#define CID_search_OrQueryNode SEARCHLIB_CID(156) +#define CID_search_EquivQueryNode SEARCHLIB_CID(157) +#define CID_search_PhraseQueryNode SEARCHLIB_CID(158) +#define CID_search_NotQueryNode SEARCHLIB_CID(159) +#define CID_search_NearQueryNode SEARCHLIB_CID(160) +#define CID_search_ONearQueryNode SEARCHLIB_CID(161) +#define CID_search_TrueNode SEARCHLIB_CID(162) + +#define CID_PredicateAttribute SEARCHLIB_CID(163) + +#define CID_search_NormalSketch SEARCHLIB_CID(170) +#define CID_search_SparseSketch SEARCHLIB_CID(171) diff --git a/searchlib/src/vespa/searchlib/common/idestructorcallback.h b/searchlib/src/vespa/searchlib/common/idestructorcallback.h new file mode 100644 index 00000000000..bcc6a5a78bb --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/idestructorcallback.h @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace search +{ + +/** + * Interface for class that performs a callback when instance is + * destroyed. Typically a shared pointer to an instance is passed + * around to multiple worker threads that performs portions of a + * larger task before dropping the shared pointer, triggering the + * callback when all worker threads have completed. + */ +class IDestructorCallback +{ +public: + virtual ~IDestructorCallback() { } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/idocumentmetastore.h b/searchlib/src/vespa/searchlib/common/idocumentmetastore.h new file mode 100644 index 00000000000..82db8df4947 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/idocumentmetastore.h @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "lid_usage_stats.h" +#include +#include +#include + +namespace search { + +/** + * Meta data for a single document. + **/ +struct DocumentMetaData { + typedef uint32_t DocId; + DocId lid; + storage::spi::Timestamp timestamp; + document::BucketId bucketId; + document::GlobalId gid; + bool removed; + + typedef std::vector Vector; + + DocumentMetaData() + : lid(0), + timestamp(0), + bucketId(), + gid(), + removed(false) + { + } + + DocumentMetaData(DocId lid_, + storage::spi::Timestamp timestamp_, + document::BucketId bucketId_, + const document::GlobalId &gid_) + : lid(lid_), + timestamp(timestamp_), + bucketId(bucketId_), + gid(gid_), + removed(false) + { + } + + DocumentMetaData(DocId lid_, + storage::spi::Timestamp timestamp_, + document::BucketId bucketId_, + const document::GlobalId &gid_, + bool removed_) + : lid(lid_), + timestamp(timestamp_), + bucketId(bucketId_), + gid(gid_), + removed(removed_) + { + } + + bool valid() const { + return lid != 0 && timestamp != 0 && bucketId.isSet(); + } +}; + +namespace queryeval { + +class Blueprint; + +} + + +/** + * Read interface for a document meta store that provides mapping between + * global document id (gid) and local document id (lid) with additional + * meta data per document. + **/ +struct IDocumentMetaStore { + typedef uint32_t DocId; + typedef document::GlobalId GlobalId; + typedef document::BucketId BucketId; + typedef storage::spi::Timestamp Timestamp; + + virtual ~IDocumentMetaStore() {} + + /** + * Retrieves the gid associated with the given lid. + * Returns true if found, false otherwise. + **/ + virtual bool getGid(DocId lid, GlobalId &gid) const = 0; + + /** + * Retrieves the lid associated with the given gid. + * Returns true if found, false otherwise. + **/ + virtual bool getLid(const GlobalId &gid, DocId &lid) const = 0; + + /** + * Retrieves the meta data for the document with the given gid. + **/ + virtual DocumentMetaData getMetaData(const GlobalId &gid) const = 0; + + /** + * Retrieves meta data for all documents contained in the given bucket. + **/ + virtual void getMetaData(const BucketId &bucketId, DocumentMetaData::Vector &result) const = 0; + + /** + * Returns the lid following the largest lid used in the store. + * + * As long as the reader holds a read guard on the document meta + * store, we guarantee that the meta store info for lids that were + * valid when calling this method will remain valid while the + * guard is held, i.e. lids for newly removed documents are not + * reused while the read guard is held. + * + * Access to lids beyond the returned limit is not safe. + * + * The return value can be used as lid range for queries when + * attribute writer threads are synced, and is propagated as such + * when visibility delay is nonzero and forceCommit() method is + * called regularly on feed views, cf. proton::FastAccessFeedView. + * + * In the future, this method might be renamed to getReaderDocIdLimit(). + **/ + virtual DocId getCommittedDocIdLimit() const = 0; + + /** + * Returns the number of used lids in this store. + */ + virtual DocId getNumUsedLids() const = 0; + + /** + * Returns the number of active lids in this store. + * This should be <= getNumUsedLids(). + * Active lids correspond to documents in active buckets. + */ + virtual DocId getNumActiveLids() const = 0; + + /** + * Returns stats on the usage and availability of lids in this store. + */ + virtual LidUsageStats getLidUsageStats() const = 0; + + /** + * Creates a black list blueprint that returns a search iterator + * that gives hits for all documents that should not be visible. + **/ + virtual std::unique_ptr createBlackListBlueprint() const = 0; +}; + + +} + diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp new file mode 100644 index 00000000000..45d7f0ac82d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.cpp @@ -0,0 +1,354 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".indexmetainfo"); +#include +#include +#include +#include + +namespace { + +class Parser { +private: + vespalib::string _name; + vespalib::FilePointer _file; + uint32_t _line; + char _buf[2048]; + bool _error; + vespalib::string _lastKey; + vespalib::string _lastValue; + uint32_t _lastIdx; + bool _matched; + +public: + Parser(const vespalib::string &name) + : _name(name), + _file(fopen(name.c_str(), "r")), + _line(0), + _buf(), + _error(false), + _lastKey(), + _lastValue(), + _lastIdx(0), + _matched(true) + { + _error = !_file.valid(); + } + bool openFailed() { + LOG(warning, "could not open file for reading: %s", _name.c_str()); + _error = true; + return false; + } + bool illegalLine() { + LOG(warning, "%s:%d: illegal line: %s", + _name.c_str(), _line, _buf); + _error = true; + return false; + } + bool illegalArrayKey() { + LOG(warning, "%s:%d: illegal array key '%s'(value='%s')", + _name.c_str(), _line, _lastKey.c_str(), _lastValue.c_str()); + _error = true; + return false; + } + bool illegalValue() { + LOG(warning, "%s:%d: illegal value for '%s': %s", + _name.c_str(), _line, _lastKey.c_str(), + _lastValue.c_str()); + _error = true; + return false; + } + bool unknown() { + LOG(warning, "%s:%d: unknown key '%s'(value='%s')", + _name.c_str(), _line, _lastKey.c_str(), _lastValue.c_str()); + _error = true; + return false; + } + bool status() const { return !_error; } + bool next() { + if (_error) { + return false; + } + if (!_matched) { + return unknown(); + } + if (!_file.valid()) { + return openFailed(); + } + if (fgets(_buf, sizeof(_buf), _file) == NULL) { + return false; // EOF + } + ++_line; + uint32_t len = strlen(_buf); + if (len > 0 && _buf[len - 1] == '\n') { + _buf[--len] = '\0'; + } + char *split = strchr(_buf, '='); + if (split == NULL || (split - _buf) == 0) { + return illegalLine(); + } + _lastKey = vespalib::string(_buf, split - _buf); + _lastValue = vespalib::string(split + 1, (_buf + len) - (split + 1)); + _matched = false; + return true; + } + const vespalib::string key() const { return _lastKey; } + const vespalib::string value() const { return _lastValue; } + void parseBool(const vespalib::string &k, bool &v) { + if (!_matched && !_error && _lastKey == k) { + _matched = true; + if (_lastValue == "true") { + v = true; + } else if (_lastValue == "false") { + v = false; + } else { + illegalValue(); + } + } + } + void parseString(const vespalib::string &k, vespalib::string &v) { + if (!_matched && !_error && _lastKey == k) { + _matched = true; + v = _lastValue; + } + } + void parseInt64(const vespalib::string &k, uint64_t &v) { + if (!_matched && !_error && _lastKey == k) { + _matched = true; + char *end = NULL; + uint64_t val = strtoull(_lastValue.c_str(), &end, 10); + if (end == NULL || *end != '\0' || + val == static_cast(-1)) { + illegalValue(); + return; + } + v = val; + } + } + bool parseArray(const vespalib::string &name, uint32_t size) { + if (_matched || _error + || _lastKey.length() < name.length() + 1 + || strncmp(_lastKey.c_str(), name.c_str(), name.length()) != 0 + || _lastKey[name.length()] != '.') + { + return false; + } + vespalib::string::size_type dot2 = _lastKey.find('.', name.length() + 1); + if (dot2 == vespalib::string::npos) { + return illegalArrayKey(); + } + char *end = NULL; + const char *pt = _lastKey.c_str() + name.length() + 1; + uint32_t val = strtoul(pt, &end, 10); + if (end == NULL || end == pt || *end != '.' + || val > size || size > val + 1) + { + return illegalArrayKey(); + } + _lastIdx = val; + _lastKey = _lastKey.substr(dot2 + 1); + return true; + } + uint32_t idx() const { return _lastIdx; } +}; + +} // namespace + +namespace search { + +vespalib::string +IndexMetaInfo::makeFileName(const vespalib::string &baseName) +{ + if (_path.length() == 0 || _path == ".") { + return baseName; + } else if (_path[_path.length() - 1] == '/') { + return vespalib::make_string("%s%s", _path.c_str(), baseName.c_str()); + } + return vespalib::make_string("%s/%s", _path.c_str(), baseName.c_str()); +} + + +IndexMetaInfo::Snapshot & +IndexMetaInfo::getCreateSnapshot(uint32_t idx) +{ + while (idx >= _snapshots.size()) { + _snapshots.push_back(Snapshot()); + } + return _snapshots[idx]; +} + + +IndexMetaInfo::SnapshotList::iterator +IndexMetaInfo::findSnapshot(uint64_t syncToken) +{ + for (SnapItr it = _snapshots.begin(); it != _snapshots.end(); ++it) { + if (it->syncToken == syncToken) { + return it; + } + } + return _snapshots.end(); +} + + +IndexMetaInfo::IndexMetaInfo(const vespalib::string &path) + : _path(path), + _snapshots() +{ +} + + +IndexMetaInfo::Snapshot +IndexMetaInfo::getBestSnapshot() const +{ + int idx = _snapshots.size() - 1; + while (idx >= 0 && !_snapshots[idx].valid) { + --idx; + } + if (idx >= 0) { + return _snapshots[idx]; + } else { + return Snapshot(); + } +} + + +IndexMetaInfo::Snapshot +IndexMetaInfo::getSnapshot(uint64_t syncToken) const +{ + IndexMetaInfo *self = const_cast(this); + SnapItr itr = self->findSnapshot(syncToken); + if (itr == _snapshots.end()) { + return Snapshot(); + } + return *itr; +} + + +bool +IndexMetaInfo::addSnapshot(const Snapshot &snap) +{ + if (snap.dirName.empty() + || findSnapshot(snap.syncToken) != _snapshots.end()) + { + return false; + } + assert(snap.syncToken != uint64_t(-1)); + _snapshots.push_back(snap); + std::sort(_snapshots.begin(), _snapshots.end()); + return true; +} + + +bool +IndexMetaInfo::removeSnapshot(uint64_t syncToken) +{ + SnapItr itr = findSnapshot(syncToken); + if (itr == _snapshots.end()) { + return false; + } + _snapshots.erase(itr); + return true; +} + + +bool +IndexMetaInfo::validateSnapshot(uint64_t syncToken) +{ + SnapItr itr = findSnapshot(syncToken); + if (itr == _snapshots.end()) { + return false; + } + itr->valid = true; + return true; +} + + +bool +IndexMetaInfo::invalidateSnapshot(uint64_t syncToken) +{ + SnapItr itr = findSnapshot(syncToken); + if (itr == _snapshots.end()) { + return false; + } + itr->valid = false; + return true; +} + + +void +IndexMetaInfo::clear() +{ + _snapshots.resize(0); +} + + +bool +IndexMetaInfo::load(const vespalib::string &baseName) +{ + clear(); + Parser parser(makeFileName(baseName)); + while (parser.status() && parser.next()) { + if (parser.parseArray("snapshot", _snapshots.size())) { + Snapshot &snap = getCreateSnapshot(parser.idx()); + parser.parseBool("valid", snap.valid); + parser.parseInt64("syncToken", snap.syncToken); + parser.parseString("dirName", snap.dirName); + assert(snap.syncToken != static_cast(-1)); + } + } + std::sort(_snapshots.begin(), _snapshots.end()); + return parser.status(); +} + + +bool +IndexMetaInfo::save(const vespalib::string &baseName) +{ + vespalib::string fileName = makeFileName(baseName); + vespalib::string newName = fileName + ".new"; + vespalib::FilePointer f(fopen(newName.c_str(), "w")); + if (!f.valid()) { + LOG(warning, "could not open file for writing: %s", newName.c_str()); + return false; + } + for (uint32_t i = 0; i < _snapshots.size(); ++i) { + Snapshot &snap = _snapshots[i]; + fprintf(f, "snapshot.%d.valid=%s\n", i, snap.valid? "true" : "false"); + fprintf(f, "snapshot.%d.syncToken=%" PRIu64 "\n", i, snap.syncToken); + fprintf(f, "snapshot.%d.dirName=%s\n", i, snap.dirName.c_str()); + } + if (ferror(f) != 0) { + LOG(error, + "Could not write to file %s", + newName.c_str()); + return false; + } + if (fflush(f) != 0) { + LOG(error, + "Could not flush file %s", + newName.c_str()); + return false; + } + if (fsync(fileno(f)) != 0) { + LOG(error, + "Could not fsync file %s", + newName.c_str()); + return false; + } + if (fclose(f.release()) != 0) { + LOG(error, + "Could not close file %s", + newName.c_str()); + return false; + } + if (rename(newName.c_str(), fileName.c_str()) != 0) { + LOG(warning, "could not rename: %s->%s", + newName.c_str(), fileName.c_str()); + return false; + } + return true; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/indexmetainfo.h b/searchlib/src/vespa/searchlib/common/indexmetainfo.h new file mode 100644 index 00000000000..823f69e7a94 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/indexmetainfo.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { + +class IndexMetaInfo +{ +public: + struct Snapshot + { + bool valid; + uint64_t syncToken; + vespalib::string dirName; + Snapshot() : valid(false), syncToken(0), dirName() {} + Snapshot(bool valid_, uint64_t syncToken_, const vespalib::string &dirName_) + : valid(valid_), syncToken(syncToken_), dirName(dirName_) {} + bool operator==(const Snapshot &rhs) const { + return (valid == rhs.valid + && syncToken == rhs.syncToken + && dirName == rhs.dirName); + } + bool operator<(const Snapshot &rhs) const { + return syncToken < rhs.syncToken; + } + }; + typedef std::vector SnapshotList; + typedef SnapshotList::iterator SnapItr; + +private: + vespalib::string _path; + SnapshotList _snapshots; + + vespalib::string makeFileName(const vespalib::string &baseName); + Snapshot &getCreateSnapshot(uint32_t idx); + + SnapItr findSnapshot(uint64_t syncToken); + +public: + IndexMetaInfo(const vespalib::string &path); + vespalib::string getPath() const { return _path; } + void setPath(const vespalib::string &path) { _path = path; } + + const SnapshotList &snapshots() const { return _snapshots; } + + Snapshot getSnapshot(uint64_t syncToken) const; + Snapshot getBestSnapshot() const; + bool addSnapshot(const Snapshot &snap); + bool removeSnapshot(uint64_t syncToken); + bool validateSnapshot(uint64_t syncToken); + bool invalidateSnapshot(uint64_t syncToken); + + void clear(); + bool load(const vespalib::string &baseName = "meta-info.txt"); + bool save(const vespalib::string &baseName = "meta-info.txt"); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h new file mode 100644 index 00000000000..f978cb30ff5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/isequencedtaskexecutor.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include "lambdatask.h" + +namespace search +{ + +/** + * Interface class to run multiple tasks in parallel, but tasks with same + * id has to be run in sequence. + */ +class ISequencedTaskExecutor +{ +public: + virtual ~ISequencedTaskExecutor() { } + + /** + * Schedule a task to run after all previously scheduled tasks with + * same id. All tasks must be scheduled from same thread. + * + * @param id task id. + * @param task unique pointer to the task to be executed + */ + virtual void executeTask(uint64_t id, + vespalib::Executor::Task::UP task) = 0; + + /** + * Wait for all scheduled tasks to complete. + */ + virtual void sync() = 0; + + /** + * Wrap lambda function into a task and schedule it to be run. + * Caller must ensure that pointers and references are valid and + * call sync before tearing down pointed to/referenced data. + * All tasks must be scheduled from same thread. + * + * @param id task id. + * @param function function to be wrapped in a task and later executed + */ + template + inline void execute(uint64_t id, FunctionType &&function) { + executeTask(id, makeLambdaTask(std::forward(function))); + } + + /** + * Wrap lambda function into a task and schedule it to be run. + * Caller must ensure that pointers and references are valid and + * call sync before tearing down pointed to/referenced data. + * All tasks must be scheduled from same thread. + * + * @param id task id. + * @param function function to be wrapped in a task and later executed + */ + template + inline void execute(const vespalib::stringref id, FunctionType &&function) { + vespalib::hash hashfun; + executeTask(hashfun(id), + makeLambdaTask(std::forward(function))); + } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/lambdatask.h b/searchlib/src/vespa/searchlib/common/lambdatask.h new file mode 100644 index 00000000000..d03d23ba3dd --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/lambdatask.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search +{ + +template +class LambdaTask : public vespalib::Executor::Task { + FunctionType _func; + +public: + LambdaTask(const FunctionType &func) : _func(func) {} + LambdaTask(FunctionType &&func) : _func(std::move(func)) {} + virtual void run() { _func(); } +}; + +template +inline vespalib::Executor::Task::UP +makeLambdaTask(FunctionType &&function) +{ + return std::make_unique>> + (std::forward(function)); +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/lid_usage_stats.h b/searchlib/src/vespa/searchlib/common/lid_usage_stats.h new file mode 100644 index 00000000000..ced4bc36c8d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/lid_usage_stats.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { + +/** + * Stats on the usage and availability of lids in a document meta store. + */ +class LidUsageStats +{ +private: + uint32_t _lidLimit; + uint32_t _usedLids; + uint32_t _lowestFreeLid; + uint32_t _highestUsedLid; + +public: + LidUsageStats() + : _lidLimit(0), + _usedLids(0), + _lowestFreeLid(0), + _highestUsedLid(0) + { + } + LidUsageStats(uint32_t lidLimit, + uint32_t usedLids, + uint32_t lowestFreeLid, + uint32_t highestUsedLid) + : _lidLimit(lidLimit), + _usedLids(usedLids), + _lowestFreeLid(lowestFreeLid), + _highestUsedLid(highestUsedLid) + { + } + uint32_t getLidLimit() const { return _lidLimit; } + uint32_t getUsedLids() const { return _usedLids; } + uint32_t getLowestFreeLid() const { return _lowestFreeLid; } + uint32_t getHighestUsedLid() const { return _highestUsedLid; } + uint32_t getLidBloat() const { + // Account for reserved lid 0 + int32_t lidBloat = getLidLimit() - getUsedLids() - 1; + if (lidBloat < 0) { + return 0u; + } + return lidBloat; + } + double getLidBloatFactor() const { + return (double)getLidBloat() / (double)getLidLimit(); + } + double getLidFragmentationFactor() const { + int32_t freeLids = getHighestUsedLid() - getUsedLids(); + if (freeLids < 0) { + return 0; + } + if (getHighestUsedLid() == 0) { + return 0; + } + return (double)freeLids / (double)getHighestUsedLid(); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/location.cpp b/searchlib/src/vespa/searchlib/common/location.cpp new file mode 100644 index 00000000000..5374870773e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/location.cpp @@ -0,0 +1,205 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS +#include +#include + +namespace search { +namespace common { + +Location::Location(void) : + _zBoundingBox(0,0,0,0), + _x(0), + _y(0), + _xAspect(0u), + _radius(std::numeric_limits::max()), + _minx(std::numeric_limits::min()), + _maxx(std::numeric_limits::max()), + _miny(std::numeric_limits::min()), + _maxy(std::numeric_limits::max()), + _rankOnDistance(false), + _pruneOnDistance(false), + _parseError(NULL) +{ +} + + +bool +Location::getDimensionality(const char **pp) +{ + if (**pp == '2') { + (*pp)++; + if (**pp != ',') { + _parseError = "Missing comma after 2D dimensionality"; + return false; + } + (*pp)++; + return true; + } + _parseError = "Bad dimensionality spec, not 2D"; + return false; +} + + +int +Location::getInt(const char **pp) +{ + const char *p = *pp; + int val; + bool isminus; + + val = 0; + isminus = false; + if (*p == '-') { + isminus = true; + p++; + } + while (*p >= '0' && *p <= '9') + val = val * 10 + *p++ - '0'; + *pp = p; + return isminus ? - val : val; +} + +bool Location::parse(const vespalib::string &locStr) +{ + bool hadCutoff = false; + bool hadLoc = false; + const char *p = locStr.c_str(); + while (*p != '\0') { + if (*p == '[') { + p++; + if (hadCutoff) { + _parseError = "Duplicate square cutoff"; + return false; + } + hadCutoff = true; + if (!getDimensionality(&p)) + return false; + _minx = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after minx"; + return false; + } + p++; + _miny = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after miny"; + return false; + } + p++; + _maxx = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after maxx"; + return false; + } + p++; + _maxy = getInt(&p); + if (*p != ']') { + _parseError = "Missing ']' after maxy"; + return false; + } + p++; + } else if (*p == '(') { + p++; + if (hadLoc) { + _parseError = "Duplicate location"; + return false; + } + hadLoc = true; + if (!getDimensionality(&p)) + return false; + _x = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after x position"; + return false; + } + p++; + _y = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after y position"; + return false; + } + p++; + _radius = getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after radius"; + return false; + } + p++; + /* _tableID = */ (void) getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after tableID"; + return false; + } + p++; + /* _rankMultiplier = */ (void) getInt(&p); + if (*p != ',') { + _parseError = "Missing ',' after rank multiplier"; + return false; + } + p++; + /* _rankOnlyOnDistance = */ (void) (getInt(&p) != 0); + if (*p == ',') { + p++; + _xAspect = getInt(&p); + if (*p != ')') { + _parseError = "Missing ')' after xAspect"; + return false; + } + } else { + if (*p != ')') { + _parseError = "Missing ')' after rankOnlyOnDistance flag"; + return false; + } + } + p++; + } else if (*p == ' ') + p++; + else { + _parseError = "Unexpected char in location spec"; + return false; + } + } + + if (hadLoc) { + _rankOnDistance = true; + uint32_t maxdx = _radius; + if (_xAspect != 0) { + uint64_t maxdx2 = ((static_cast(_radius) << 32) + 0xffffffffu) / + _xAspect; + if (maxdx2 >= 0xffffffffu) + maxdx = 0xffffffffu; + else + maxdx = static_cast(maxdx2); + } + if (static_cast(_x - maxdx) > _minx && + static_cast(_x) - static_cast(maxdx) > + static_cast(_minx)) + _minx = _x - maxdx; + if (static_cast(_x + maxdx) < _maxx && + static_cast(_x) + static_cast(maxdx) < + static_cast(_maxx)) + _maxx = _x + maxdx; + if (static_cast(_y - _radius) > _miny && + static_cast(_y) - static_cast(_radius) > + static_cast(_miny)) + _miny = _y - _radius; + if (static_cast(_y + _radius) < _maxy && + static_cast(_y) + static_cast(_radius) < + static_cast(_maxy)) + _maxy = _y + _radius; + } + if (_minx != std::numeric_limits::min() || + _maxx != std::numeric_limits::max() || + _miny != std::numeric_limits::min() || + _maxy != std::numeric_limits::max()) + { + _pruneOnDistance = true; + } + _zBoundingBox = vespalib::geo::ZCurve::BoundingBox(_minx, _maxx, _miny, _maxy); + + return true; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/common/location.h b/searchlib/src/vespa/searchlib/common/location.h new file mode 100644 index 00000000000..9faa42d0ca2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/location.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2004 Overture Services Norway AS + +#pragma once + +#include +#include +#include "documentlocations.h" + +namespace search { +namespace common { + +class Location : public DocumentLocations +{ +private: + static int getInt(const char **pp); + bool getDimensionality(const char **pp); + +public: + Location(void); + bool getRankOnDistance() const { return _rankOnDistance; } + bool getPruneOnDistance() const { return _pruneOnDistance; } + uint32_t getXAspect() const { return _xAspect; } + int32_t getX() const { return _x; } + int32_t getY() const { return _y; } + uint32_t getRadius() const { return _radius; } + const char * getParseError() const { return _parseError; } + int32_t getMinX() const { return _minx; } + int32_t getMinY() const { return _miny; } + int32_t getMaxX() const { return _maxx; } + int32_t getMaxY() const { return _maxy; } + bool getzFailBoundingBoxTest(int64_t docxy) const { + return _zBoundingBox.getzFailBoundingBoxTest(docxy); + } + + bool parse(const vespalib::string &locStr); + +private: + vespalib::geo::ZCurve::BoundingBox _zBoundingBox; + int32_t _x; /* Query X position */ + int32_t _y; /* Query Y position */ + uint32_t _xAspect; /* X distance multiplier fraction */ + uint32_t _radius; /* Radius for euclidian distance */ + int32_t _minx; /* Min X coordinate */ + int32_t _maxx; /* Max X coordinate */ + int32_t _miny; /* Min Y coordinate */ + int32_t _maxy; /* Max Y coordinate */ + + bool _rankOnDistance; + bool _pruneOnDistance; + const char *_parseError; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.cpp b/searchlib/src/vespa/searchlib/common/locationiterators.cpp new file mode 100644 index 00000000000..ba959114c77 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/locationiterators.cpp @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".locationiterators"); +#include + +#include + +#include "locationiterators.h" + +using namespace search::common; + +class FastS_2DZLocationIterator : public search::queryeval::SearchIterator, public vespalib::noncopyable +{ +private: + const unsigned int _numDocs; + const bool _strict; + const uint64_t _radius2; + const Location & _location; + std::vector _pos; + + virtual void doSeek(uint32_t docId); + virtual void doUnpack(uint32_t docId); +public: + FastS_2DZLocationIterator(unsigned int numDocs, + bool strict, + const Location & location); + + virtual ~FastS_2DZLocationIterator(void); +}; + + +FastS_2DZLocationIterator:: +FastS_2DZLocationIterator(unsigned int numDocs, + bool strict, + const Location & location) + : SearchIterator(), + _numDocs(numDocs), + _strict(strict), + _radius2(static_cast(location.getRadius()) * location.getRadius()), + _location(location), + _pos() +{ + _pos.resize(1); //Need at least 1 entry as the singlevalue attributes does not honour given size. +}; + + +FastS_2DZLocationIterator::~FastS_2DZLocationIterator(void) +{ +}; + + +void +FastS_2DZLocationIterator::doSeek(uint32_t docId) +{ + if (__builtin_expect(docId >= _numDocs, false)) { + setAtEnd(); + return; + } + + const Location &location = _location; + std::vector &pos = _pos; + + for (;;) { + uint32_t numValues = + location.getVec()->get(docId, &pos[0], pos.size()); + if (numValues > pos.size()) { + pos.resize(numValues); + numValues = location.getVec()->get(docId, &pos[0], pos.size()); + } + for (uint32_t i = 0; i < numValues; i++) { + int64_t docxy(pos[i]); + if ( ! location.getzFailBoundingBoxTest(docxy)) { + int32_t docx = 0; + int32_t docy = 0; + vespalib::geo::ZCurve::decode(docxy, &docx, &docy); + uint32_t dx = (location.getX() > docx) + ? location.getX() - docx + : docx - location.getX(); + if (location.getXAspect() != 0) + dx = ((uint64_t) dx * location.getXAspect()) >> 32; + + uint32_t dy = (location.getY() > docy) + ? location.getY() - docy + : docy - location.getY(); + uint64_t dist2 = (uint64_t) dx * dx + (uint64_t) dy * dy; + if (dist2 <= _radius2) { + setDocId(docId); + return; + } + } + } + + if (__builtin_expect(docId + 1 >= _numDocs, false)) { + setAtEnd(); + return; + } + + if (!_strict) { + return; + } + docId++; + } +} + + +void +FastS_2DZLocationIterator::doUnpack(uint32_t docId) +{ + (void) docId; +} + + +search::queryeval::SearchIterator * +FastS_AllocLocationIterator(unsigned int numDocs, + bool strict, + const Location & location) +{ + return new FastS_2DZLocationIterator(numDocs, strict, location); +} diff --git a/searchlib/src/vespa/searchlib/common/locationiterators.h b/searchlib/src/vespa/searchlib/common/locationiterators.h new file mode 100644 index 00000000000..d55cc2ff16e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/locationiterators.h @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +search::queryeval::SearchIterator * +FastS_AllocLocationIterator(unsigned int numDocs, + bool strict, + const search::common::Location & location); + diff --git a/searchlib/src/vespa/searchlib/common/mapnames.cpp b/searchlib/src/vespa/searchlib/common/mapnames.cpp new file mode 100644 index 00000000000..2597ae1c6fb --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/mapnames.cpp @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "mapnames.h" + +namespace search { + +const vespalib::string MapNames::RANK("rank"); +const vespalib::string MapNames::FEATURE("feature"); +const vespalib::string MapNames::HIGHLIGHTTERMS("highlightterms"); +const vespalib::string MapNames::MATCH("match"); +const vespalib::string MapNames::CACHES("caches"); +const vespalib::string MapNames::MODEL("model"); + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/mapnames.h b/searchlib/src/vespa/searchlib/common/mapnames.h new file mode 100644 index 00000000000..fa8d7f97578 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/mapnames.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/** + * A simple wrapper class for the named maps of properties. + **/ +struct MapNames +{ + /** name of rank feature property collection **/ + static const vespalib::string RANK; + + /** name of feature override property collection **/ + static const vespalib::string FEATURE; + + /** name of highlightterms property collection **/ + static const vespalib::string HIGHLIGHTTERMS; + + /** name of match property collection **/ + static const vespalib::string MATCH; + + /** name of cache property collection **/ + static const vespalib::string CACHES; + + /** name of model property collection **/ + static const vespalib::string MODEL; +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/packets.cpp b/searchlib/src/vespa/searchlib/common/packets.cpp new file mode 100644 index 00000000000..769cbbeeed4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/packets.cpp @@ -0,0 +1,2198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".searchlib.common.fs4packets"); + +using document::CompressionConfig; +using vespalib::ConstBufferRef; +using vespalib::make_string; +using vespalib::stringref; + +namespace search { +namespace fs4transport { + +/** + * Persistent packet streamer. + **/ +FS4PersistentPacketStreamer FS4PersistentPacketStreamer:: +Instance(FS4PacketFactory::CreateFS4Packet); + +//============================================================ + +bool +FS4PersistentPacketStreamer::HasChannelID(uint32_t pcode) +{ + switch(pcode & PCODE_MASK) { + case search::fs4transport::PCODE_EOL: + case search::fs4transport::PCODE_QUERYRESULT: + case search::fs4transport::PCODE_ERROR: + case search::fs4transport::PCODE_GETDOCSUMS: + case search::fs4transport::PCODE_DOCSUM: + case search::fs4transport::PCODE_MLD_QUERYRESULT: + case search::fs4transport::PCODE_MLD_GETDOCSUMS: + case search::fs4transport::PCODE_PARSEDQUERY2: + case search::fs4transport::PCODE_QUERYRESULTX: + case search::fs4transport::PCODE_QUERYX: + case search::fs4transport::PCODE_GETDOCSUMSX: + case search::fs4transport::PCODE_TRACEREPLY: + return true; + default: + return false; + } +} + +FS4PersistentPacketStreamer:: +FS4PersistentPacketStreamer(FS4PacketFactory::CreatePacket_t cp) + : _compressionLimit(0), + _compressionLevel(9), + _compressionType(CompressionConfig::LZ4), + _conservative(false), + _createPacket(cp) { +} + + +bool +FS4PersistentPacketStreamer::GetPacketInfo(FNET_DataBuffer *src, + uint32_t *plen, uint32_t *pcode, + uint32_t *chid, bool *broken) +{ + uint32_t tmpVal; + bool hasCHID; + + if (src->GetDataLen() < 2 * sizeof(uint32_t) || + ((hasCHID = HasChannelID(src->PeekInt32(sizeof(uint32_t)))) && + src->GetDataLen() < 3 * sizeof(uint32_t))) + return false; + + if (hasCHID) { + tmpVal = src->ReadInt32(); + if (tmpVal < 2 * sizeof(uint32_t)) { + // This is not a valid packet length. We might + // be out of sync. + *broken = _conservative; + if (*broken) { + LOG(warning, "Out of sync! Invalid packet length %u\n", tmpVal); + } + return false; + } else { + *plen = tmpVal - 2 * sizeof(uint32_t); + } + tmpVal = src->ReadInt32(); + if (!ValidPCode(tmpVal)) { + // Out of sync? + *broken = _conservative; + if (*broken) { + LOG(warning, "Out of sync! Invalid pcode %u (%u)\n", tmpVal, *plen); + } + return false; + } else { + *pcode = tmpVal; + } + *chid = src->ReadInt32(); + } else { + tmpVal = src->ReadInt32(); + if (tmpVal < sizeof(uint32_t)) { + // This is not a valid packet length. We might + // be out of sync. + *broken = _conservative; + if (*broken) { + LOG(warning, "Out of sync! Invalid length (noch) %u\n", tmpVal); + } + return false; + } else { + *plen = tmpVal - sizeof(uint32_t); + } + tmpVal = src->ReadInt32(); + if (!ValidPCode(tmpVal)) { + // Out of sync? + *broken = _conservative; + if (*broken) { + LOG(warning, "Out of sync! Invalid pcode (noch) %u (%u)\n", tmpVal, *plen); + } + return false; + } else { + *pcode = tmpVal; + } + *chid = FNET_NOID; + } + return true; +} + +namespace { +void decodePacket(FNET_Packet *&packet, FNET_DataBuffer &buf, uint32_t size, + uint32_t pcode) { + try { + if (!packet->Decode(&buf, size)) { + LOG(error, "could not decode packet (pcode=%u); " + "this could be caused by a protocol and/or " + "version incompatibility\n", pcode); + packet->Free(); + packet = NULL; + } + } catch (const vespalib::Exception & e) { + packet->Free(); + packet = NULL; + LOG(error, "%s", e.toString().c_str()); + } +} +} // namespace + +FNET_Packet* +FS4PersistentPacketStreamer::Decode(FNET_DataBuffer *src, uint32_t plen, uint32_t pcode, FNET_Context) +{ + FNET_Packet *packet; + + packet = _createPacket(pcode & PCODE_MASK); + if (packet != NULL) { + uint32_t compressionByte = (pcode & ~PCODE_MASK) >> 24; + CompressionConfig::Type compressionType(CompressionConfig::toType(compressionByte)); + if (compressionType != 0) { + uint32_t uncompressed_size = src->ReadInt32(); + ConstBufferRef org(src->GetData(), plen - sizeof(uint32_t)); + vespalib::DataBuffer uncompressed(uncompressed_size); + document::decompress(compressionType, uncompressed_size, org, uncompressed, false); + FNET_DataBuffer buf(uncompressed.getData(), uncompressed.getDataLen()); + decodePacket(packet, buf, uncompressed_size, pcode); + src->DataToDead(plen - sizeof(uint32_t)); + } else { + decodePacket(packet, *src, plen, pcode); + } + } else { + src->DataToDead(plen); + } + return packet; +} + + +void +FS4PersistentPacketStreamer::Encode(FNET_Packet *packet, uint32_t chid, FNET_DataBuffer *dst) +{ + uint32_t len = packet->GetLength(); + uint32_t pcode = packet->GetPCODE(); + + uint32_t packet_start = dst->GetDataLen(); + if (HasChannelID(pcode)) { + dst->EnsureFree(len + 3 * sizeof(uint32_t)); + dst->WriteInt32Fast(len + 2 * sizeof(uint32_t)); + dst->WriteInt32Fast(pcode); + dst->WriteInt32Fast(chid); + } else { + dst->EnsureFree(len + 2 * sizeof(uint32_t)); + dst->WriteInt32Fast(len + sizeof(uint32_t)); + dst->WriteInt32Fast(pcode); + } + uint32_t header_len = dst->GetDataLen() - packet_start; + packet->Encode(dst); + dst->AssertValid(); + uint32_t body_len = dst->GetDataLen() - packet_start - header_len; + bool isCompressable((pcode & ~PCODE_MASK) == 0); + + if (isCompressable && _compressionLimit && (body_len > _compressionLimit)) { + CompressionConfig config(_compressionType, _compressionLevel, 90); + ConstBufferRef org(dst->GetData() + packet_start + header_len, body_len); + vespalib::DataBuffer compressed(org.size()); + CompressionConfig::Type r = document::compress(config, org, compressed, false); + if (r != CompressionConfig::NONE) { + dst->DataToFree(body_len + header_len); + // sizeof(data + header + uncompressed_size) - sizeof(uint32_t) + dst->WriteInt32Fast(compressed.getDataLen() + header_len); + dst->WriteInt32Fast(pcode | (_compressionType << 24)); + if (HasChannelID(pcode)) { + dst->FreeToData(sizeof(uint32_t)); // channel + } + dst->WriteInt32Fast(body_len); + dst->WriteBytes(compressed.getData(), compressed.getDataLen()); + dst->AssertValid(); + } + } +} + +//============================================================ + +FS4Properties::FS4Properties() + : _entries(), + _name(0), + _backing() +{ +} + +FS4Properties::FS4Properties(FS4Properties && rhs) + : _entries(std::move(rhs._entries)), + _name(std::move(rhs._name)), + _backing(std::move(rhs._backing)) +{ +} + +FS4Properties & +FS4Properties::operator=(FS4Properties && rhs) +{ + _entries = std::move(rhs._entries); + _name = std::move(rhs._name); + _backing = std::move(rhs._backing); + return *this; +} + +FS4Properties::~FS4Properties() +{ +} + +void +FS4Properties::allocEntries(uint32_t cnt) +{ + _entries.resize(cnt); + _backing.reserve(cnt*2*40); // Assume strings are average 40 bytes +} + +void FS4Properties::set(StringRef & e, const vespalib::stringref & s) +{ + e.first = _backing.size(); + e.second = s.size(); + _backing.append(s.c_str(), s.size()); +} + +void +FS4Properties::setKey(uint32_t entry, const char *key, uint32_t keySize) +{ + set(_entries[entry].first, vespalib::stringref(key, keySize)); +} + +void +FS4Properties::setValue(uint32_t entry, const char *value, uint32_t valueSize) +{ + set(_entries[entry].second, vespalib::stringref(value, valueSize)); +} + +uint32_t +FS4Properties::getLength() +{ + uint32_t len = sizeof(uint32_t) * 2 + getNameLen(); + len += _backing.size(); + len += _entries.size() * sizeof(uint32_t) * 2; + return len; +} + +void +FS4Properties::encode(FNET_DataBuffer &dst) +{ + dst.WriteInt32Fast(_name.size()); + dst.WriteBytesFast(_name.c_str(), _name.size()); + dst.WriteInt32Fast(size()); + for (uint32_t i = 0; i < size(); ++i) { + dst.WriteInt32Fast(getKeyLen(i)); + dst.WriteBytesFast(getKey(i), getKeyLen(i)); + dst.WriteInt32Fast(getValueLen(i)); + dst.WriteBytesFast(getValue(i), getValueLen(i)); + } +} + +bool +FS4Properties::decode(FNET_DataBuffer &src, uint32_t &len) +{ + uint32_t strLen; + if (len < sizeof(uint32_t)) return false; + strLen = src.ReadInt32(); + len -= sizeof(uint32_t); + if (len < strLen) return false; + setName(src.GetData(), strLen); + src.DataToDead(strLen); + len -= strLen; + if (len < sizeof(uint32_t)) return false; + uint32_t cnt = src.ReadInt32(); + len -= sizeof(uint32_t); + allocEntries(cnt); + for (uint32_t i = 0; i < cnt; ++i) { + if (len < sizeof(uint32_t)) return false; + strLen = src.ReadInt32(); + len -= sizeof(uint32_t); + if (len < strLen) return false; + setKey(i, src.GetData(), strLen); + src.DataToDead(strLen); + len -= strLen; + if (len < sizeof(uint32_t)) return false; + strLen = src.ReadInt32(); + len -= sizeof(uint32_t); + if (len < strLen) return false; + setValue(i, src.GetData(), strLen); + src.DataToDead(strLen); + len -= strLen; + } + return true; +} + +vespalib::string +FS4Properties::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sProperties {\n", indent, ""); + s += make_string("%*s name: ", indent, ""); + s += _name; + s += "\n"; + for (uint32_t i = 0; i < size(); ++i) { + s += make_string("%*s Entry[%d] {\n", indent, "", i); + s += make_string("%*s key : %s\n", indent, "", vespalib::string(getKey(i), getKeyLen(i)).c_str()); + s += make_string("%*s value: %s\n", indent, "", vespalib::string(getValue(i), getValueLen(i)).c_str()); + s += make_string("%*s }\n", indent, ""); + } + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + + +/** + * Write a string in usual format to a buffer. Usual format is first + * a 32-bit integer holding the string length, then the bytes that the + * string contained. Skip checking for free space. + * + * @param buf buffer to write to + * @param str string to write, of any type that has c_str() and size() + **/ +template +void +writeLenString(FNET_DataBuffer *buf, const STR &str) +{ + buf->WriteInt32Fast(str.size()); + buf->WriteBytesFast(str.c_str(), str.size()); +} + + +//============================================================ + +FS4Packet::FS4Packet() + : FNET_Packet() +{ +} + + +FS4Packet::~FS4Packet() +{ +} + + +void +FS4Packet::Free() +{ + delete this; +} + +vespalib::string +FS4Packet::Print(uint32_t indent) +{ + return toString(indent); +} + +//============================================================ + +FS4Packet_EOL::FS4Packet_EOL() + : FS4Packet() +{ +} + + +FS4Packet_EOL::~FS4Packet_EOL() +{ +} + + +uint32_t +FS4Packet_EOL::GetLength() +{ + return 0; +} + + +void +FS4Packet_EOL::Encode(FNET_DataBuffer *dst) +{ + (void) dst; +} + + +bool +FS4Packet_EOL::Decode(FNET_DataBuffer *src, uint32_t len) +{ + src->DataToDead(len); + return (len == 0); +} + + +vespalib::string +FS4Packet_EOL::toString(uint32_t indent) const +{ + return make_string("%*sFS4Packet_EOL {}\n", indent, ""); +} + +//============================================================ + +FS4Packet_Shared::FS4Packet_Shared(FNET_Packet::SP packet) + : FS4Packet(), + _packet(std::move(packet)) +{ +} + +FS4Packet_Shared::~FS4Packet_Shared() +{ +} + +uint32_t +FS4Packet_Shared::GetPCODE() +{ + return _packet->GetPCODE(); +} + +uint32_t +FS4Packet_Shared::GetLength() +{ + return _packet->GetLength(); +} + +void +FS4Packet_Shared::Encode(FNET_DataBuffer *dst) +{ + _packet->Encode(dst); +} + +bool +FS4Packet_Shared::Decode(FNET_DataBuffer *, uint32_t ) +{ + assert(false); +} + +vespalib::string +FS4Packet_Shared::toString(uint32_t indent) const +{ + return _packet->Print(indent); +} + +//============================================================ + +FS4Packet_PreSerialized::FS4Packet_PreSerialized(FNET_Packet & packet) + : FS4Packet(), + _pcode(packet.GetPCODE()), + _compressionType(CompressionConfig::NONE), + _data(packet.GetLength() + 1*sizeof(uint32_t)) +{ + const uint32_t body_len(packet.GetLength()); + const uint32_t compressionLimit=FS4PersistentPacketStreamer::Instance.getCompressionLimit(); + if (compressionLimit && (body_len > compressionLimit)) { + FNET_DataBuffer tmp(packet.GetLength()); + packet.Encode(&tmp); + tmp.AssertValid(); + CompressionConfig config(FS4PersistentPacketStreamer::Instance.getCompressionType(), + FS4PersistentPacketStreamer::Instance.getCompressionLevel(), + 90); + ConstBufferRef org(tmp.GetData(), tmp.GetDataLen()); + vespalib::DataBuffer compressed(org.size()); + _compressionType = document::compress(config, org, compressed, false); + if (_compressionType != CompressionConfig::NONE) { + _data.WriteInt32Fast(body_len); + _data.WriteBytes(compressed.getData(), compressed.getDataLen()); + _data.AssertValid(); + } else { + packet.Encode(&_data); + } + } else { + packet.Encode(&_data); + } +} + +FS4Packet_PreSerialized::~FS4Packet_PreSerialized() +{ +} + +uint32_t +FS4Packet_PreSerialized::GetPCODE() +{ + return ((_compressionType == CompressionConfig::NONE) + ? _pcode + : (_pcode | (_compressionType << 24))); +} + +uint32_t +FS4Packet_PreSerialized::GetLength() +{ + return _data.GetDataLen(); +} + +void +FS4Packet_PreSerialized::Encode(FNET_DataBuffer *dst) +{ + dst->WriteBytes(_data.GetData(), _data.GetDataLen()); +} + +bool +FS4Packet_PreSerialized::Decode(FNET_DataBuffer *, uint32_t) +{ + assert(false); +} + +vespalib::string +FS4Packet_PreSerialized::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_PreSerialized {\n", indent, ""); + s += make_string("%*s length : %d\n", indent, "", _data.GetDataLen()); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +FS4Packet_ERROR::FS4Packet_ERROR() + : FS4Packet(), + _errorCode(0), + _message() +{ +} + + +FS4Packet_ERROR::~FS4Packet_ERROR() +{ +} + + +uint32_t +FS4Packet_ERROR::GetLength() +{ + return 2 * sizeof(uint32_t) + _message.size(); +} + + +void +FS4Packet_ERROR::Encode(FNET_DataBuffer *dst) +{ + dst->WriteInt32Fast(_errorCode); + writeLenString(dst, _message); +} + + +bool +FS4Packet_ERROR::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (len < sizeof(uint32_t) * 2) { + src->DataToDead(len); + return false; + } + _errorCode = src->ReadInt32(); + uint32_t messageLen = src->ReadInt32(); + len -= 2 * sizeof(uint32_t); + if (len != messageLen) { + src->DataToDead(len); + return false; + } + setErrorMessage(stringref(src->GetData(), messageLen)); + src->DataToDead(messageLen); + return true; +} + + +vespalib::string +FS4Packet_ERROR::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_ERROR {\n", indent, ""); + s += make_string("%*s errorCode : %d\n", indent, "", _errorCode); + s += make_string("%*s message : %s\n", indent, "", _message.c_str()); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +void +FS4Packet_DOCSUM::SetBuf(const char *buf, uint32_t len) +{ + _buf.resize(len); + memcpy(_buf.str(), buf, len); +} + + +FS4Packet_DOCSUM::FS4Packet_DOCSUM() + : FS4Packet(), + _gid(), + _buf() +{ +} + + +FS4Packet_DOCSUM::~FS4Packet_DOCSUM() +{ +} + +void +FS4Packet_DOCSUM::Encode(FNET_DataBuffer *dst) +{ + dst->WriteBytesFast(_gid.get(), document::GlobalId::LENGTH); + dst->WriteBytesFast(_buf.c_str(), _buf.size()); +} + + +bool +FS4Packet_DOCSUM::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (len < document::GlobalId::LENGTH) { + src->DataToDead(len); + return false; + } + unsigned char rawGid[document::GlobalId::LENGTH]; + src->ReadBytes(rawGid, document::GlobalId::LENGTH); + _gid.set(rawGid); + len -= document::GlobalId::LENGTH; + SetBuf(src->GetData(), len); + src->DataToDead(len); + return true; +} + + +vespalib::string +FS4Packet_DOCSUM::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_DOCSUM {\n", indent, ""); + s += make_string("%*s gid : %s\n", indent, "", _gid.toString().c_str()); + + uint32_t magic = ::search::fs4transport::SLIME_MAGIC_ID; + if (_buf.size() >= sizeof(magic) && + memcmp(_buf.c_str(), &magic, sizeof(magic)) == 0) { + vespalib::Slime slime; + vespalib::slime::Memory input(_buf.c_str() + sizeof(magic), + _buf.size() - sizeof(magic)); + vespalib::slime::SimpleBuffer buf; + vespalib::slime::BinaryFormat::decode(input, slime); + vespalib::slime::JsonFormat::encode(slime, buf, false); + s += make_string("%*s json dump : ", indent, ""); + s += buf.get().make_string(); + } else { + s += make_string("%*s data dump :\n", indent, ""); + const char *pt = _buf.c_str(); + uint32_t i = 0; + if ( ! _buf.empty()) + s += make_string("%*s ", indent, ""); + while (i < _buf.size()) { + s += make_string("%x ", (unsigned char) pt[i]); + if ((++i % 16) == 0) + s += make_string("\n%*s ", indent, ""); + } + if ((i % 16) != 0) + s += make_string("\n"); + } + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +FS4Packet_MONITORQUERYX::FS4Packet_MONITORQUERYX(uint32_t pcode) + : FS4Packet(), + _pcode(pcode), + _features(0), + _qflags(0u) +{ + UpdateCompatFeatures(); +} + + +FS4Packet_MONITORQUERYX::~FS4Packet_MONITORQUERYX() +{ +} + + +void +FS4Packet_MONITORQUERYX::UpdateCompatPCODE(void) +{ + if (_features == search::fs4transport::MQF_MONITORQUERY_MASK) + _pcode = search::fs4transport::PCODE_MONITORQUERY; + else + _pcode = search::fs4transport::PCODE_MONITORQUERYX; +} + + +void +FS4Packet_MONITORQUERYX::UpdateCompatFeatures(void) +{ + if (_pcode == search::fs4transport::PCODE_MONITORQUERY) + _features = search::fs4transport::MQF_MONITORQUERY_MASK; +} + + +uint32_t +FS4Packet_MONITORQUERYX::GetLength() +{ + uint32_t plen = 0; + + if (_pcode == search::fs4transport::PCODE_MONITORQUERYX) + plen += sizeof(uint32_t); + if (_features & search::fs4transport::MQF_QFLAGS) + plen += sizeof(uint32_t); + return plen; +} + + +void +FS4Packet_MONITORQUERYX::Encode(FNET_DataBuffer *dst) +{ + if (_pcode == search::fs4transport::PCODE_MONITORQUERYX) + dst->WriteInt32Fast(_features); + + if ((_features & search::fs4transport::MQF_QFLAGS) != 0) + dst->WriteInt32Fast(_qflags); +} + + +bool +FS4Packet_MONITORQUERYX::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (_pcode == search::fs4transport::PCODE_MONITORQUERYX) { + if (len < sizeof(uint32_t)) + goto error; + _features = src->ReadInt32(); + len -= sizeof(uint32_t); + } + if ((_features & ~search::fs4transport::FNET_MQF_SUPPORTED_MASK) != 0) + goto error; + + if ((_features & search::fs4transport::MQF_QFLAGS) != 0) { + if (len < sizeof(uint32_t)) + goto error; + _qflags = src->ReadInt32(); + len -= sizeof(uint32_t); + } + + if (len != 0) + goto error; + + SetRealPCODE(); + return true; // OK + error: + src->DataToDead(len); + return false; // FAIL +} + + +vespalib::string +FS4Packet_MONITORQUERYX::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_MONITORQUERYX {\n", indent, ""); + s += make_string("%*s pcode : %d\n", indent, "", _pcode); + s += make_string("%*s features : 0x%x\n", indent, "", _features); + s += make_string("%*s qflags : %d\n", indent, "", _qflags); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +FS4Packet_MONITORRESULTX:: +FS4Packet_MONITORRESULTX(uint32_t pcode) + : FS4Packet(), + _pcode(pcode), + _features(0), + _partid(0), + _timestamp(0), + _totalNodes(0), + _activeNodes(0), + _totalParts(0), + _activeParts(0), + _rflags(0u), + _activeDocs(0) +{ + UpdateCompatFeatures(); +} + + +FS4Packet_MONITORRESULTX::~FS4Packet_MONITORRESULTX(void) +{ +} + + +void +FS4Packet_MONITORRESULTX::UpdateCompatPCODE(void) +{ + if (_features == search::fs4transport::MRF_MONITORRESULT_MASK) + _pcode = search::fs4transport::PCODE_MONITORRESULT; + else if (_features == search::fs4transport::MRF_MLD_MONITORRESULT_MASK) + _pcode = search::fs4transport::PCODE_MLD_MONITORRESULT; + else + _pcode = search::fs4transport::PCODE_MONITORRESULTX; +} + + +void +FS4Packet_MONITORRESULTX::UpdateCompatFeatures(void) +{ + if (_pcode == search::fs4transport::PCODE_MONITORRESULT) + _features = search::fs4transport::MRF_MONITORRESULT_MASK; + else if (_pcode == search::fs4transport::PCODE_MLD_MONITORRESULT) + _features = search::fs4transport::MRF_MLD_MONITORRESULT_MASK; +} + + +uint32_t +FS4Packet_MONITORRESULTX::GetLength(void) +{ + uint32_t plen = 2 * sizeof(uint32_t); + + if (_pcode == search::fs4transport::PCODE_MONITORRESULTX) + plen += sizeof(uint32_t); + if ((_features & search::fs4transport::MRF_MLD) != 0) + plen += 4 * sizeof(uint32_t); + if ((_features & search::fs4transport::MRF_RFLAGS) != 0) + plen += sizeof(uint32_t); + if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0) + plen += sizeof(uint64_t); + + return plen; +} + + +void +FS4Packet_MONITORRESULTX::Encode(FNET_DataBuffer *dst) +{ + if (_pcode == search::fs4transport::PCODE_MONITORRESULTX) + dst->WriteInt32Fast(_features); + + dst->WriteInt32Fast(_partid); + dst->WriteInt32Fast(_timestamp); + if ((_features & search::fs4transport::MRF_MLD) != 0) { + dst->WriteInt32Fast(_totalNodes); + dst->WriteInt32Fast(_activeNodes); + dst->WriteInt32Fast(_totalParts); + dst->WriteInt32Fast(_activeParts); + } + if ((_features & search::fs4transport::MRF_RFLAGS) != 0) { + dst->WriteInt32Fast(_rflags); + } + if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0) { + dst->WriteInt64Fast(_activeDocs); + } +} + + +bool +FS4Packet_MONITORRESULTX::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (_pcode == search::fs4transport::PCODE_MONITORRESULTX) { + if (len < sizeof(uint32_t)) goto error; + _features = src->ReadInt32(); + len -= sizeof(uint32_t); + } + if ((_features & ~search::fs4transport::FNET_MRF_SUPPORTED_MASK) != 0) + goto error; + + if (len < 2 * sizeof(uint32_t)) + goto error; + _partid = src->ReadInt32(); + _timestamp = src->ReadInt32(); + len -= 2 * sizeof(uint32_t); + + if ((_features & search::fs4transport::MRF_MLD) != 0) { + if (len < 4 * sizeof(uint32_t)) + goto error; + _totalNodes = src->ReadInt32(); + _activeNodes = src->ReadInt32(); + _totalParts = src->ReadInt32(); + _activeParts = src->ReadInt32(); + len -= 4 * sizeof(uint32_t); + } + + if ((_features & search::fs4transport::MRF_RFLAGS) != 0) { + if (len < sizeof(uint32_t)) + goto error; + _rflags = src->ReadInt32(); + len -= sizeof(uint32_t); + } + + if ((_features & search::fs4transport::MRF_ACTIVEDOCS) != 0) { + if (len < sizeof(uint64_t)) + goto error; + _activeDocs = src->ReadInt64(); + len -= sizeof(uint64_t); + } + + if (len != 0) + goto error; + + SetRealPCODE(); + return true; // OK + error: + src->DataToDead(len); + return false; // FAIL +} + + +vespalib::string +FS4Packet_MONITORRESULTX::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_MONITORRESULTX {\n", indent, ""); + s += make_string("%*s pcode : %d\n", indent, "", _pcode); + s += make_string("%*s features : 0x%x\n", indent, "", _features); + s += make_string("%*s partid : %d\n", indent, "", _partid); + s += make_string("%*s timestamp : %d\n", indent, "", _timestamp); + s += make_string("%*s totalnodes : %d\n", indent, "", _totalNodes); + s += make_string("%*s activenodes : %d\n", indent, "", _activeNodes); + s += make_string("%*s totalparts : %d\n", indent, "", _totalParts); + s += make_string("%*s activeparts : %d\n", indent, "", _activeParts); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +FS4Packet_CLEARCACHES::FS4Packet_CLEARCACHES() + : FS4Packet() +{ +} + + +FS4Packet_CLEARCACHES::~FS4Packet_CLEARCACHES() +{ +} + + +uint32_t +FS4Packet_CLEARCACHES::GetLength() +{ + return 0; +} + + +void +FS4Packet_CLEARCACHES::Encode(FNET_DataBuffer *dst) +{ + (void) dst; +} + + +bool +FS4Packet_CLEARCACHES::Decode(FNET_DataBuffer *src, uint32_t len) +{ + src->DataToDead(len); + return (len == 0); +} + + +vespalib::string +FS4Packet_CLEARCACHES::toString(uint32_t indent) const +{ + return make_string("%*sFS4Packet_CLEARCACHES {}\n", indent, ""); +} + +//============================================================ + +FS4Packet_QUEUELEN::FS4Packet_QUEUELEN() + : FS4Packet(), + _queueLen(0), + _dispatchers(0) +{ +} + + +FS4Packet_QUEUELEN::~FS4Packet_QUEUELEN() +{ +} + + +uint32_t +FS4Packet_QUEUELEN::GetLength() +{ + return 2 * sizeof(uint32_t); +} + + +void +FS4Packet_QUEUELEN::Encode(FNET_DataBuffer *dst) +{ + dst->WriteInt32Fast(_queueLen); + dst->WriteInt32Fast(_dispatchers); +} + + +bool +FS4Packet_QUEUELEN::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (len != 2 * sizeof(uint32_t)) { + src->DataToDead(len); + return false; + } + _queueLen = src->ReadInt32(); + _dispatchers = src->ReadInt32(); + return true; +} + + +vespalib::string +FS4Packet_QUEUELEN::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_QUEUELEN {\n", indent, ""); + s += make_string("%*s queue len : %d\n", indent, "", _queueLen); + s += make_string("%*s dispatchers : %d\n", indent, "", _dispatchers); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +void +FS4Packet_QUERYRESULTX::AllocateSortIndex(uint32_t cnt) +{ + if (cnt == 0) + return; + + cnt++; // end of data index entry + _sortIndex = new uint32_t[cnt]; +} + + +void +FS4Packet_QUERYRESULTX::AllocateSortData(uint32_t len) +{ + if (len == 0) + return; + + _sortData = (char *) malloc(len); +} + + +void +FS4Packet_QUERYRESULTX::SetSortDataRef(uint32_t cnt, + uint32_t *sortIndex, + const char *sortData) +{ + if (cnt == 0) + return; + + AllocateSortIndex(cnt); + AllocateSortData(sortIndex[cnt] - sortIndex[0]); + _sortIndex[0] = 0; + search::common::SortData::Copy(cnt, _sortIndex, _sortData, sortIndex, sortData); +} + + +void +FS4Packet_QUERYRESULTX::AllocateAggrData(uint32_t len) +{ + if (len == 0) + return; + + _aggrData = (char *) malloc(len); + _aggrDataLen = len; +} + + +void +FS4Packet_QUERYRESULTX::SetAggrDataRef(const char *aggrData, + uint32_t len) +{ + if (len == 0) + return; + + AllocateAggrData(len); + memcpy(_aggrData, aggrData, len); +} + + +void +FS4Packet_QUERYRESULTX::AllocateGroupData(uint32_t len) +{ + if (len == 0) + return; + + _groupData = (char *) malloc(len); + _groupDataLen = len; +} + + +void +FS4Packet_QUERYRESULTX::SetGroupDataRef(const char *groupData, + uint32_t len) +{ + if (len == 0) + return; + + AllocateGroupData(len); + memcpy(_groupData, groupData, len); +} + + +void +FS4Packet_QUERYRESULTX::AllocateHits(uint32_t cnt) +{ + if (cnt == 0) + return; + + _hits = new FS4_hit[cnt]; + _numDocs = cnt; +} + + +FS4Packet_QUERYRESULTX::FS4Packet_QUERYRESULTX(uint32_t pcode) + : FS4Packet(), + _pcode(pcode), + _distributionKey(0), + _features(0), + _offset(0), + _numDocs(0), + _totNumDocs(0), + _maxRank(0), + _sortIndex(NULL), + _sortData(NULL), + _aggrDataLen(0), + _aggrData(NULL), + _groupDataLen(0), + _groupData(NULL), + _coverageDocs(0), + _activeDocs(0), + _hits(NULL), + _propsVector() +{ + UpdateCompatFeatures(); +} + + +FS4Packet_QUERYRESULTX::~FS4Packet_QUERYRESULTX() +{ + if (_sortIndex) { delete [] _sortIndex; } + if (_sortData) { free(_sortData); } + if (_aggrData) { free(_aggrData); } + if (_groupData) { free(_groupData); } + if (_hits) { delete [] _hits; } +} + + +void +FS4Packet_QUERYRESULTX::UpdateCompatPCODE() +{ + if (_features == search::fs4transport::QRF_QUERYRESULT_MASK) + _pcode = search::fs4transport::PCODE_QUERYRESULT; + else if (_features == search::fs4transport::QRF_MLD_QUERYRESULT_MASK) + _pcode = search::fs4transport::PCODE_MLD_QUERYRESULT; + else + _pcode = search::fs4transport::PCODE_QUERYRESULTX; +} + + +void +FS4Packet_QUERYRESULTX::UpdateCompatFeatures() +{ + if (_pcode == search::fs4transport::PCODE_QUERYRESULT) + _features = search::fs4transport::QRF_QUERYRESULT_MASK; + else if (_pcode == search::fs4transport::PCODE_MLD_QUERYRESULT) + _features = search::fs4transport::QRF_MLD_QUERYRESULT_MASK; +} + + +uint32_t +FS4Packet_QUERYRESULTX::GetLength() +{ + uint32_t plen = 3 * sizeof(uint32_t) + + sizeof(uint64_t) + // hit count is now 64-bit + sizeof(search::HitRank) + + _numDocs * (sizeof(document::GlobalId) + sizeof(search::HitRank)); + + if (_pcode == search::fs4transport::PCODE_QUERYRESULTX) + plen += sizeof(uint32_t); + + if ((_features & search::fs4transport::QRF_MLD) != 0) + plen += _numDocs * 2 * sizeof(uint32_t); + + if (((_features & search::fs4transport::QRF_SORTDATA) != 0) && + (_numDocs > 0)) + plen += _numDocs * sizeof(uint32_t) + + (_sortIndex[_numDocs] - _sortIndex[0]); + + if ((_features & search::fs4transport::QRF_AGGRDATA) != 0) + plen += sizeof(uint32_t) + + _aggrDataLen; + + if ((_features & search::fs4transport::QRF_GROUPDATA) != 0) + plen += sizeof(uint32_t) + + _groupDataLen; + + if ((_features & search::fs4transport::QRF_COVERAGE) != 0) + plen += sizeof(uint64_t) + + 2 * sizeof(uint32_t); + + if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) { + plen += sizeof(uint32_t); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + plen += _propsVector[i].getLength(); + } + } + + return plen; +} + + +void +FS4Packet_QUERYRESULTX::Encode(FNET_DataBuffer *dst) +{ + if (_pcode == search::fs4transport::PCODE_QUERYRESULTX) { + // Never provide QF_WARMUP downwards + dst->WriteInt32Fast(_features & ~QF_WARMUP); + } + dst->WriteInt32Fast(_offset); + dst->WriteInt32Fast(_numDocs); + dst->WriteInt64Fast(_totNumDocs); + union { uint64_t INT64; double DOUBLE; } mrval; + mrval.DOUBLE = _maxRank; + dst->WriteInt64Fast(mrval.INT64); + dst->WriteInt32Fast(_distributionKey); + + if (((_features & search::fs4transport::QRF_SORTDATA) != 0) && + (_numDocs > 0)) + { + uint32_t idx0 = _sortIndex[0]; + // implicit: first index entry always 0 + for (uint32_t i = 1; i <= _numDocs; i++) { + dst->WriteInt32Fast(_sortIndex[i] - idx0); + } + dst->WriteBytesFast(_sortData + idx0, + _sortIndex[_numDocs] - idx0); + } + + if ((_features & search::fs4transport::QRF_AGGRDATA) != 0) { + dst->WriteInt32Fast(_aggrDataLen); + dst->WriteBytesFast(_aggrData, _aggrDataLen); + } + + if ((_features & search::fs4transport::QRF_GROUPDATA) != 0) { + dst->WriteInt32Fast(_groupDataLen); + dst->WriteBytesFast(_groupData, _groupDataLen); + } + + if ((_features & search::fs4transport::QRF_COVERAGE) != 0) { + dst->WriteInt64Fast(_coverageDocs); + dst->WriteInt64Fast(_activeDocs); + } + + for (uint32_t i = 0; i < _numDocs; i++) { + dst->WriteBytesFast(_hits[i]._gid.get(), document::GlobalId::LENGTH); + union { uint64_t INT64; double DOUBLE; } val; + val.DOUBLE = _hits[i]._metric; + dst->WriteInt64Fast(val.INT64); + if ((_features & search::fs4transport::QRF_MLD) != 0) { + dst->WriteInt32Fast(_hits[i]._partid); + dst->WriteInt32Fast(_hits[i].getDistributionKey()); + } + } + + if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) { + dst->WriteInt32Fast(_propsVector.size()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + _propsVector[i].encode(*dst); + } + } + +} + + +bool +FS4Packet_QUERYRESULTX::Decode(FNET_DataBuffer *src, uint32_t len) +{ + uint32_t i; + uint32_t hitSize = sizeof(document::GlobalId); + + if (_pcode == search::fs4transport::PCODE_QUERYRESULTX) { + if (len < sizeof(uint32_t)) goto error; + _features = src->ReadInt32(); + len -= sizeof(uint32_t); + } + + if ((_features & ~search::fs4transport::FNET_QRF_SUPPORTED_MASK) != 0) { + throwUnsupportedFeatures(_features, search::fs4transport::FNET_QRF_SUPPORTED_MASK); + } + hitSize += sizeof(uint64_t); + + if (len < 3 * sizeof(uint32_t) + sizeof(uint64_t) + sizeof(search::HitRank)) goto error; + _offset = src->ReadInt32(); + _numDocs = src->ReadInt32(); + _totNumDocs = src->ReadInt64(); + union { uint64_t INT64; double DOUBLE; } mrval; + mrval.INT64 = src->ReadInt64(); + _maxRank = mrval.DOUBLE; + _distributionKey = src->ReadInt32(); + len -= 3 * sizeof(uint32_t) + sizeof(uint64_t) + sizeof(search::HitRank); + + if (((_features & search::fs4transport::QRF_SORTDATA) != 0) && + (_numDocs > 0)) { + if (len < _numDocs * sizeof(uint32_t)) goto error; + AllocateSortIndex(_numDocs); + _sortIndex[0] = 0; // implicit + for (i = 1; i <= _numDocs; i++) + _sortIndex[i] = src->ReadInt32(); + len -= _numDocs * sizeof(uint32_t); + uint32_t sortDataLen = _sortIndex[_numDocs]; + + if (len < sortDataLen) goto error; + AllocateSortData(sortDataLen); + src->ReadBytes(_sortData, sortDataLen); + len -= sortDataLen; + } + + if ((_features & search::fs4transport::QRF_AGGRDATA) != 0) { + if (len < sizeof(uint32_t)) goto error; + _aggrDataLen = src->ReadInt32(); + len -= sizeof(uint32_t); + + if (len < _aggrDataLen) goto error; + AllocateAggrData(_aggrDataLen); + src->ReadBytes(_aggrData, _aggrDataLen); + len -= _aggrDataLen; + } + + if ((_features & search::fs4transport::QRF_GROUPDATA) != 0) { + if (len < sizeof(uint32_t)) goto error; + _groupDataLen = src->ReadInt32(); + len -= sizeof(uint32_t); + + if (len < _groupDataLen) goto error; + AllocateGroupData(_groupDataLen); + src->ReadBytes(_groupData, _groupDataLen); + len -= _groupDataLen; + } + + if ((_features & search::fs4transport::QRF_COVERAGE) != 0) { + if (len < 2 * sizeof(uint64_t)) goto error; + _coverageDocs = src->ReadInt64(); + _activeDocs = src->ReadInt64(); + len -= 2 * sizeof(uint64_t); + } + + if ((_features & search::fs4transport::QRF_MLD) != 0) + hitSize += 2 * sizeof(uint32_t); + + if (len < _numDocs * hitSize) goto error; + AllocateHits(_numDocs); + unsigned char rawGid[document::GlobalId::LENGTH]; + for (i = 0; i < _numDocs; i++) { + src->ReadBytes(rawGid, document::GlobalId::LENGTH); + _hits[i]._gid.set(rawGid); + union { uint64_t INT64; double DOUBLE; } val; + val.INT64 = src->ReadInt64(); + _hits[i]._metric = val.DOUBLE; + if ((_features & search::fs4transport::QRF_MLD) != 0) { + _hits[i]._partid = src->ReadInt32(); + _hits[i].setDistributionKey(src->ReadInt32()); + } else { + _hits[i]._partid = 0; // partid not available + _hits[i].setDistributionKey(getDistributionKey()); + } + } + len -= _numDocs * hitSize; + + if ((_features & search::fs4transport::QRF_PROPERTIES) != 0) { + uint32_t sz = src->ReadInt32(); + _propsVector.resize(sz); + len -= sizeof(uint32_t); + for (i = 0; i < sz; ++i) { + if (! _propsVector[i].decode(*src, len)) goto error; + } + } + + if (len != 0) goto error; + + SetRealPCODE(); + return true; // OK + + error: + src->DataToDead(len); + return false; // FAIL +} + + +vespalib::string +FS4Packet_QUERYRESULTX::toString(uint32_t indent) const +{ + vespalib::string s; + uint32_t i; + + s += make_string("%*sFS4Packet_QUERYRESULTX {\n", indent, ""); + s += make_string("%*s pcode : %d\n", indent, "", _pcode); + s += make_string("%*s features : 0x%x\n", indent, "", _features); + s += make_string("%*s offset : %d\n", indent, "", _offset); + s += make_string("%*s numDocs : %d\n", indent, "", _numDocs); + s += make_string("%*s totNumDocs : %" PRIu64 "\n", indent, "", _totNumDocs); + s += make_string("%*s maxRank : %f\n", indent, "", _maxRank); + s += make_string("%*s distrib key : %d\n", indent, "", getDistributionKey()); + if (_numDocs > 0 && _sortIndex != NULL) { + uint32_t offset = _sortIndex[0]; + for (i = 0; i < _numDocs; i++) { + uint32_t end = _sortIndex[i + 1]; + s += make_string("%*s sort[%d] = { 0x", indent, "", i); + for (; offset < end; offset++) + s += make_string("%02x", (unsigned char)*(_sortData + offset)); + s += make_string(" }\n"); + } + } + s += make_string("%*s aggrData : %d bytes\n", indent, "", _aggrDataLen); + s += make_string("%*s groupData : %d bytes\n", indent, "", _groupDataLen); + s += make_string("%*s coverageDocs : %" PRIu64 "\n", indent, "", _coverageDocs); + s += make_string("%*s activeDocs : %" PRIu64 "\n", indent, "", _activeDocs); + for (i = 0; i < _numDocs; i++) { + s += make_string("%*s hit {", indent, ""); + s += make_string("gid=%s, ", _hits[i]._gid.toString().c_str()); + s += make_string("metric=%f, ", _hits[i]._metric); + s += make_string("partid=%d, ", _hits[i]._partid); + s += make_string("distribkey=%d, ", _hits[i].getDistributionKey()); + } + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + + +FS4Packet_QUERYX::FS4Packet_QUERYX(uint32_t pcode) + : FS4Packet(), + _pcode(pcode), + _timeout(0), + _features(0), + _offset(0), + _maxhits(0), + _qflags(0), + _ranking(), + _propsVector(), + _sortSpec(), + _aggrSpec(), + _groupSpec(), + _sessionId(), + _location(), + _numStackItems(0), + _stackDump() +{ + UpdateCompatFeatures(); +} + + +FS4Packet_QUERYX::~FS4Packet_QUERYX() +{ +} + + +void +FS4Packet_QUERYX::UpdateCompatPCODE() +{ + if (_features == search::fs4transport::QF_PARSEDQUERY2_MASK) + _pcode = search::fs4transport::PCODE_PARSEDQUERY2; + else + _pcode = search::fs4transport::PCODE_QUERYX; +} + + +void +FS4Packet_QUERYX::UpdateCompatFeatures() +{ + if (_pcode == search::fs4transport::PCODE_PARSEDQUERY2) + _features = search::fs4transport::QF_PARSEDQUERY2_MASK; +} + + +uint32_t +FS4Packet_QUERYX::GetLength() +{ + uint32_t plen = 2 * sizeof(uint32_t); + plen += FNET_DataBuffer::getCompressedPositiveLength(_offset); + plen += FNET_DataBuffer::getCompressedPositiveLength(_maxhits); + if (_pcode == search::fs4transport::PCODE_QUERYX) + plen += sizeof(uint32_t); + + if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) { + plen += sizeof(uint32_t)*2; + plen += _stackDump.size(); + } + if ((_features & search::fs4transport::QF_RANKP) != 0) { + plen += FNET_DataBuffer::getCompressedPositiveLength(_ranking.size()); + plen += _ranking.size(); + } + if ((_features & search::fs4transport::QF_PROPERTIES) != 0) { + plen += sizeof(uint32_t); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + plen += _propsVector[i].getLength(); + } + } + + if ((_features & search::fs4transport::QF_SORTSPEC) != 0) + plen += sizeof(uint32_t) + + _sortSpec.size(); + + if ((_features & search::fs4transport::QF_AGGRSPEC) != 0) + plen += sizeof(uint32_t) + + _aggrSpec.size(); + + if ((_features & search::fs4transport::QF_GROUPSPEC) != 0) + plen += sizeof(uint32_t) + + _groupSpec.size(); + + if ((_features & search::fs4transport::QF_SESSIONID) != 0) + plen += sizeof(uint32_t) + + _sessionId.size(); + + if ((_features & search::fs4transport::QF_LOCATION) != 0) + plen += sizeof(uint32_t) + + _location.size(); + + return plen; +} + + +void +FS4Packet_QUERYX::Encode(FNET_DataBuffer *dst) +{ + if (_pcode == search::fs4transport::PCODE_QUERYX) + dst->WriteInt32Fast(_features); + + dst->writeCompressedPositive(_offset); + dst->writeCompressedPositive(_maxhits); + dst->WriteInt32Fast(_timeout); + dst->WriteInt32Fast(_qflags); + + if ((_features & search::fs4transport::QF_RANKP) != 0) { + dst->writeCompressedPositive(_ranking.size()); + dst->WriteBytesFast(_ranking.c_str(), _ranking.size()); + } + + if ((_features & search::fs4transport::QF_PROPERTIES) != 0) { + dst->WriteInt32Fast(_propsVector.size()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + _propsVector[i].encode(*dst); + } + } + + if ((_features & search::fs4transport::QF_SORTSPEC) != 0) { + dst->WriteInt32Fast(_sortSpec.size()); + dst->WriteBytesFast(_sortSpec.c_str(), _sortSpec.size()); + } + + if ((_features & search::fs4transport::QF_AGGRSPEC) != 0) { + dst->WriteInt32Fast(_aggrSpec.size()); + dst->WriteBytesFast(_aggrSpec.c_str(), _aggrSpec.size()); + } + + if ((_features & search::fs4transport::QF_GROUPSPEC) != 0) { + dst->WriteInt32Fast(_groupSpec.size()); + dst->WriteBytesFast(_groupSpec.c_str(), _groupSpec.size()); + } + + if ((_features & search::fs4transport::QF_SESSIONID) != 0) { + dst->WriteInt32Fast(_sessionId.size()); + dst->WriteBytesFast(_sessionId.c_str(), _sessionId.size()); + } + + if ((_features & search::fs4transport::QF_LOCATION) != 0) { + dst->WriteInt32Fast(_location.size()); + dst->WriteBytesFast(_location.c_str(), _location.size()); + } + + if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) { + dst->WriteInt32Fast(_numStackItems); + dst->WriteInt32Fast(_stackDump.size()); + dst->WriteBytesFast(_stackDump.c_str(), _stackDump.size()); + } +} + +void FS4Packet::throwPropertieDecodeError(size_t i) +{ + throw vespalib::IllegalArgumentException(vespalib::make_string("Failed decoding properties[%ld]", i)); +} + +void FS4Packet::throwUnsupportedFeatures(uint32_t features, uint32_t set) +{ + throw vespalib::UnderflowException(vespalib::make_string("Unsupported features(%x), supported set(%x)", features, set)); +} + +void FS4Packet::throwNotEnoughData(FNET_DataBuffer & buf, uint32_t left, uint32_t needed, const char * text) +{ + (void) buf; + throw vespalib::UnderflowException(vespalib::make_string("Failed decoding packet of type %d. Only %d bytes left, needed %d from '%s'", GetPCODE(), left, needed, text)); +} + +#define VERIFY_LEN(needed, text) \ + { \ + if (len < needed) { \ + throwNotEnoughData(*src, len, needed, text); \ + } \ + len -= needed; \ + } + +uint32_t FS4Packet::readUInt32(FNET_DataBuffer & buf, uint32_t & len, const char *text) +{ + if (len < sizeof(uint32_t)) { + throwNotEnoughData(buf, len, sizeof(uint32_t), text); \ + } + len -= sizeof(uint32_t); + return buf.ReadInt32(); +} + +void +FS4Packet_GETDOCSUMSX::setTimeout(const fastos::TimeStamp & timeout) +{ + _timeout = std::max(0l, timeout.ms()); +} + +fastos::TimeStamp +FS4Packet_GETDOCSUMSX::getTimeout() const +{ + return fastos::TimeStamp(_timeout*fastos::TimeStamp::MS); +} + +void +FS4Packet_QUERYX::setTimeout(const fastos::TimeStamp & timeout) +{ + _timeout = std::max(0l, timeout.ms()); +} + +fastos::TimeStamp +FS4Packet_QUERYX::getTimeout() const +{ + return fastos::TimeStamp(_timeout*fastos::TimeStamp::MS); +} + +bool +FS4Packet_QUERYX::Decode(FNET_DataBuffer *src, uint32_t len) +{ + if (_pcode == search::fs4transport::PCODE_QUERYX) { + _features = readUInt32(*src, len, "features"); + } + + if (((_features & ~search::fs4transport::FNET_QF_SUPPORTED_MASK) != 0)) { + throwUnsupportedFeatures(_features, search::fs4transport::FNET_QF_SUPPORTED_MASK); + } + _offset = src->readCompressedPositiveInteger(); + len -= FNET_DataBuffer::getCompressedPositiveLength(_offset); + _maxhits = src->readCompressedPositiveInteger(); + len -= FNET_DataBuffer::getCompressedPositiveLength(_maxhits); + VERIFY_LEN(2 * sizeof(uint32_t), "offset, maxhits, timeout and qflags"); + _timeout = src->ReadInt32(); + _qflags = src->ReadInt32(); + + if ((_features & search::fs4transport::QF_RANKP) != 0) { + uint32_t rankingLen = src->readCompressedPositiveInteger(); + len -= FNET_DataBuffer::getCompressedPositiveLength(rankingLen); + VERIFY_LEN(rankingLen, "ranking blob"); + setRanking(stringref(src->GetData(), rankingLen)); + src->DataToDead(rankingLen); + } + + if ((_features & search::fs4transport::QF_PROPERTIES) != 0) { + uint32_t cnt = readUInt32(*src, len, "#properties"); + _propsVector.resize(cnt); + for (uint32_t i = 0; i < cnt; ++i) { + if (!_propsVector[i].decode(*src, len)) { + throwPropertieDecodeError(i); + } + } + } + + if ((_features & search::fs4transport::QF_SORTSPEC) != 0) { + uint32_t sortSpecLen = readUInt32(*src, len, "sortspec length"); + + VERIFY_LEN(sortSpecLen, "sortspec string"); + setSortSpec(stringref(src->GetData(), sortSpecLen)); + src->DataToDead(sortSpecLen); + } + + if ((_features & search::fs4transport::QF_AGGRSPEC) != 0) { + uint32_t aggrSpecLen = readUInt32(*src, len, "aggrspec length"); + + VERIFY_LEN(aggrSpecLen, "aggrspec string"); + setAggrSpec(stringref(src->GetData(), aggrSpecLen)); + src->DataToDead(aggrSpecLen); + } + + if ((_features & search::fs4transport::QF_GROUPSPEC) != 0) { + uint32_t groupSpecLen = readUInt32(*src, len, "groupspec length"); + + VERIFY_LEN(groupSpecLen, "groupspec string"); + setGroupSpec(stringref(src->GetData(), groupSpecLen)); + src->DataToDead(groupSpecLen); + } + + if ((_features & search::fs4transport::QF_SESSIONID) != 0) { + uint32_t sessionIdLen = readUInt32(*src, len, "sessionid length"); + VERIFY_LEN(sessionIdLen, "sessionid string"); + setSessionId(stringref(src->GetData(), sessionIdLen)); + src->DataToDead(sessionIdLen); + } + + if ((_features & search::fs4transport::QF_LOCATION) != 0) { + uint32_t locationLen = readUInt32(*src, len, "location length"); + + VERIFY_LEN(locationLen, "location string"); + setLocation(stringref(src->GetData(), locationLen)); + src->DataToDead(locationLen); + } + + if ((_features & search::fs4transport::QF_WARMUP) != 0) { + (void) readUInt32(*src, len, "warmup"); + } + + if ((_features & search::fs4transport::QF_PARSEDQUERY) != 0) { + _numStackItems = readUInt32(*src, len, "# querystack items"); + + uint32_t stackDumpLen = readUInt32(*src, len, "stackdump length"); + VERIFY_LEN(stackDumpLen, "stackdump"); + setStackDump(stringref(src->GetData(), stackDumpLen)); + src->DataToDead(stackDumpLen); + } + if (len != 0) { + throwNotEnoughData(*src, len, 0, "eof"); + } + + SetRealPCODE(); + return true; // OK +} + + +vespalib::string +FS4Packet_QUERYX::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_QUERYX {\n", indent, ""); + s += make_string("%*s pcode : %d\n", indent, "", _pcode); + s += make_string("%*s features : 0x%x\n", indent, "", _features); + s += make_string("%*s offset : %d\n", indent, "", _offset); + s += make_string("%*s maxhits : %d\n", indent, "", _maxhits); + s += make_string("%*s qflags : %x\n", indent, "", _qflags); + s += make_string("%*s ranking : %s\n", indent, "", _ranking.c_str()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + s += _propsVector[i].toString(indent + 2); + } + s += make_string("%*s sortspec : %s\n", indent, "", _sortSpec.c_str()); + s += make_string("%*s aggrspec : %s\n", indent, "", _aggrSpec.c_str()); + s += make_string("%*s groupspec : (%d bytes)\n", indent, "", (int)_groupSpec.size()); + s += make_string("%*s sessionId : (%d bytes)\n", indent, "", (int)_sessionId.size()); + s += make_string("%*s location : %s\n", indent, "", _location.c_str()); + s += make_string("%*s timeout : %d\n", indent, "", _timeout); + s += make_string("%*s stackitems : %d\n", indent, "", _numStackItems); + s += make_string("%*s stack dump :\n", indent, ""); + if (_stackDump.size() > 0) { + const char *pt = _stackDump.c_str(); + s += make_string("%*s ", indent, ""); + uint32_t i = 0; + while (i < _stackDump.size()) { + s += make_string("%x ", (unsigned char) pt[i]); + if ((++i % 16) == 0 && i < _stackDump.size()) { + s += make_string("\n%*s ", indent, ""); + } + } + if ((i % 16) != 0) s += make_string("\n"); + } + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + + +void +FS4Packet_GETDOCSUMSX::AllocateDocIDs(uint32_t cnt) +{ + if (cnt == 0) + return; + + _docid = new FS4_docid[cnt]; + _docidCnt = cnt; +} + + +FS4Packet_GETDOCSUMSX::FS4Packet_GETDOCSUMSX(uint32_t pcode) + : FS4Packet(), + _pcode(pcode), + _timeout(0), + _features(0), + _ranking(), + _qflags(0), + _resultClassName(), + _propsVector(), + _stackItems(0), + _stackDump(), + _location(), + _flags(0u), + _docid(NULL), + _docidCnt(0) +{ + UpdateCompatFeatures(); +} + + +FS4Packet_GETDOCSUMSX::~FS4Packet_GETDOCSUMSX() +{ + delete [] _docid; +} + + +void +FS4Packet_GETDOCSUMSX::UpdateCompatPCODE() +{ + if (_features == search::fs4transport::GDF_GETDOCSUMS_MASK) + _pcode = search::fs4transport::PCODE_GETDOCSUMS; + else if (_features == search::fs4transport::GDF_MLD_GETDOCSUMS_MASK) + _pcode = search::fs4transport::PCODE_MLD_GETDOCSUMS; + else + _pcode = search::fs4transport::PCODE_GETDOCSUMSX; +} + + +void +FS4Packet_GETDOCSUMSX::UpdateCompatFeatures() +{ + if (_pcode == search::fs4transport::PCODE_GETDOCSUMS) + _features = search::fs4transport::GDF_GETDOCSUMS_MASK; + else if (_pcode == search::fs4transport::PCODE_MLD_GETDOCSUMS) + _features = search::fs4transport::GDF_MLD_GETDOCSUMS_MASK; +} + + +uint32_t +FS4Packet_GETDOCSUMSX::GetLength() +{ + uint32_t plen = 2 * sizeof(uint32_t) + + + _docidCnt * (sizeof(document::GlobalId)); + + if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX) + plen += sizeof(uint32_t); + + if ((_features & search::fs4transport::GDF_MLD) != 0) + plen += 2 * _docidCnt * sizeof(uint32_t); + + if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0) + plen += 2 * sizeof(uint32_t) + _stackDump.size(); + + if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0) + plen += sizeof(uint32_t) + _resultClassName.size(); + + if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) { + plen += sizeof(uint32_t); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + plen += _propsVector[i].getLength(); + } + } + + if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) { + plen += FNET_DataBuffer::getCompressedPositiveLength(_ranking.size()); + plen += _ranking.size(); + plen += sizeof(uint32_t); + } + + if ((_features & search::fs4transport::GDF_LOCATION) != 0) + plen += sizeof(uint32_t) + + _location.size(); + + if ((_features & search::fs4transport::GDF_FLAGS) != 0) + plen += sizeof(uint32_t); + + return plen; +} + + +void +FS4Packet_GETDOCSUMSX::Encode(FNET_DataBuffer *dst) +{ + if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX) + dst->WriteInt32Fast(_features); + + dst->WriteInt32Fast(0); + dst->WriteInt32Fast(_timeout); + + if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) { + dst->writeCompressedPositive(_ranking.size()); + dst->WriteBytesFast(_ranking.c_str(), _ranking.size()); + dst->WriteInt32Fast(_qflags); + } + + if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0) { + writeLenString(dst, _resultClassName); + } + + if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) { + dst->WriteInt32Fast(_propsVector.size()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + _propsVector[i].encode(*dst); + } + } + + if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0) { + dst->WriteInt32Fast(_stackItems); + writeLenString(dst, _stackDump); + } + + if ((_features & search::fs4transport::GDF_LOCATION) != 0) { + writeLenString(dst, _location); + } + + if ((_features & search::fs4transport::GDF_FLAGS) != 0) { + dst->WriteInt32Fast(_flags); + } + + for (uint32_t i = 0; i < _docidCnt; i++) { + dst->WriteBytesFast(_docid[i]._gid.get(), document::GlobalId::LENGTH); + + if ((_features & search::fs4transport::GDF_MLD) != 0) { + dst->WriteInt32Fast(_docid[i]._partid); + dst->WriteInt32Fast(0); + } + } +} + + +bool +FS4Packet_GETDOCSUMSX::Decode(FNET_DataBuffer *src, uint32_t len) +{ + uint32_t docidSize = sizeof(document::GlobalId); + + if (_pcode == search::fs4transport::PCODE_GETDOCSUMSX) { + _features = readUInt32(*src, len, "features"); + } + + if ((_features & ~search::fs4transport::FNET_GDF_SUPPORTED_MASK) != 0) { + throwUnsupportedFeatures(_features, search::fs4transport::FNET_GDF_SUPPORTED_MASK); + } + + VERIFY_LEN(2*sizeof(uint32_t), "unused and timeout"); + src->ReadInt32(); // unused + _timeout = src->ReadInt32(); + + if ((_features & search::fs4transport::GDF_RANKP_QFLAGS) != 0) { + uint32_t rankingLen = src->readCompressedPositiveInteger(); + len -= FNET_DataBuffer::getCompressedPositiveLength(rankingLen); + + VERIFY_LEN(rankingLen, "ranking blob"); + setRanking(vespalib::stringref(src->GetData(), rankingLen)); + src->DataToDead(rankingLen); + + _qflags = readUInt32(*src, len, "qflags"); + } + + if ((_features & search::fs4transport::GDF_RESCLASSNAME) != 0) { + uint32_t resultClassNameLen = readUInt32(*src, len, "result class name length"); + + VERIFY_LEN(resultClassNameLen, "result class"); + setResultClassName(stringref(src->GetData(), resultClassNameLen)); + src->DataToDead(resultClassNameLen); + } + + if ((_features & search::fs4transport::GDF_PROPERTIES) != 0) { + uint32_t cnt = readUInt32(*src, len, "#properties"); + _propsVector.resize(cnt); + for (uint32_t i = 0; i < cnt; ++i) { + if (!_propsVector[i].decode(*src, len)) { + throwPropertieDecodeError(i); + } + } + } + + if ((_features & search::fs4transport::GDF_QUERYSTACK) != 0) { + _stackItems = readUInt32(*src, len, "num stack items"); + uint32_t stackDumpLen = readUInt32(*src, len, "stackdump length"); + VERIFY_LEN(stackDumpLen, "stackdump"); + setStackDump(stringref(src->GetData(), stackDumpLen)); + src->DataToDead(stackDumpLen); + } + + if ((_features & search::fs4transport::GDF_LOCATION) != 0) { + uint32_t locationLen = readUInt32(*src, len, "location length"); + VERIFY_LEN(locationLen, "location string"); + setLocation(stringref(src->GetData(), locationLen)); + src->DataToDead(locationLen); + } + + if ((_features & search::fs4transport::GDF_FLAGS) != 0) { + _flags = readUInt32(*src, len, "flags"); + } + + if ((_features & search::fs4transport::GDF_MLD) != 0) + docidSize += 2 * sizeof(uint32_t); + + _docidCnt = len / docidSize; + AllocateDocIDs(_docidCnt); + + unsigned char rawGid[document::GlobalId::LENGTH]; + for (uint32_t i = 0; i < _docidCnt; i++) { + src->ReadBytes(rawGid, document::GlobalId::LENGTH); + _docid[i]._gid.set(rawGid); + + if ((_features & search::fs4transport::GDF_MLD) != 0) { + _docid[i]._partid = src->ReadInt32(); + src->ReadInt32(); // unused + } else { + _docid[i]._partid = 0; // partid not available + } + } + len -= _docidCnt * docidSize; + + if (len != 0) { + throwNotEnoughData(*src, len, 0, "eof"); + } + + SetRealPCODE(); + return true; // OK +} + + +vespalib::string +FS4Packet_GETDOCSUMSX::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_GETDOCSUMSX {\n", indent, ""); + s += make_string("%*s features : %d\n", indent, "", _features); + s += make_string("%*s ranking : %s\n", indent, "", _ranking.c_str()); + s += make_string("%*s qflags : %x\n", indent, "", _qflags); + s += make_string("%*s resClassName: %s\n", indent, "", _resultClassName.c_str()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + s += _propsVector[i].toString(indent + 2); + } + s += make_string("%*s stackItems : %d\n", indent, "", _stackItems); + s += make_string("%*s stackDumpLen : %d\n", indent, "", (int)_stackDump.size()); + s += make_string("%*s stackDump :\n", indent, ""); + + uint32_t i = 0; + if (_stackDump.size() > 0) { + const char *pt = _stackDump.c_str(); + s += make_string("%*s ", indent, ""); + while (i < _stackDump.size()) { + s += make_string("%x ", (unsigned char) pt[i]); + if ((++i % 16) == 0) + s += make_string("\n%*s ", indent, ""); + } + if ((i % 16) != 0) s += make_string("\n"); + } + for (i = 0; i < _docidCnt; i++) { + s += make_string("%*s gid=%s, partid=%d\n", indent, "", + _docid[i]._gid.toString().c_str(), _docid[i]._partid); + } + s += make_string("%*s location : %s\n", indent, "", _location.c_str()); + s += make_string("%*s timeout : %d\n", indent, "", _timeout); + s += make_string("%*s flags : %d\n", indent, "", _flags); + s += make_string("%*s}\n", indent, ""); + return s; +} + +//============================================================ + +uint32_t +FS4Packet_TRACEREPLY::GetLength() +{ + uint32_t plen = sizeof(uint32_t); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + plen += _propsVector[i].getLength(); + } + return plen; +} + +void +FS4Packet_TRACEREPLY::Encode(FNET_DataBuffer *dst) +{ + dst->WriteInt32Fast(_propsVector.size()); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + _propsVector[i].encode(*dst); + } +} + +bool +FS4Packet_TRACEREPLY::Decode(FNET_DataBuffer *src, uint32_t len) +{ + uint32_t cnt = readUInt32(*src, len, "#properties"); + _propsVector.resize(cnt); + for (uint32_t i = 0; i < cnt; ++i) { + if (!_propsVector[i].decode(*src, len)) { + throwPropertieDecodeError(i); + } + } + if (len != 0) goto error; + return true; // OK + error: + src->DataToDead(len); + return false; // FAIL +} + +vespalib::string +FS4Packet_TRACEREPLY::toString(uint32_t indent) const +{ + vespalib::string s; + s += make_string("%*sFS4Packet_TRACEREPLY {\n", indent, ""); + for (uint32_t i = 0; i < _propsVector.size(); ++i) { + s += _propsVector[i].toString(indent + 2); + } + s += make_string("%*s}\n", indent, ""); + return s; +} + + +//============================================================ + +FNET_Packet* +FS4PacketFactory::CreateFS4Packet(uint32_t pcode) +{ + switch(pcode) { + case search::fs4transport::PCODE_EOL: + return new FS4Packet_EOL; + case search::fs4transport::PCODE_QUERYRESULT: + return new FS4Packet_QUERYRESULTX(search::fs4transport:: + PCODE_QUERYRESULT); + case search::fs4transport::PCODE_ERROR: + return new FS4Packet_ERROR; + case search::fs4transport::PCODE_GETDOCSUMS: + return new FS4Packet_GETDOCSUMSX(search::fs4transport:: + PCODE_GETDOCSUMS); + case search::fs4transport::PCODE_DOCSUM: + return new FS4Packet_DOCSUM; + case search::fs4transport::PCODE_MONITORQUERY: + return new FS4Packet_MONITORQUERYX(search::fs4transport:: + PCODE_MONITORQUERY); + case search::fs4transport::PCODE_MONITORRESULT: + return new FS4Packet_MONITORRESULTX(search::fs4transport:: + PCODE_MONITORRESULT); + case search::fs4transport::PCODE_MLD_QUERYRESULT: + return new FS4Packet_QUERYRESULTX(search::fs4transport:: + PCODE_MLD_QUERYRESULT); + case search::fs4transport::PCODE_MLD_GETDOCSUMS: + return new FS4Packet_GETDOCSUMSX(search::fs4transport:: + PCODE_MLD_GETDOCSUMS); + case search::fs4transport::PCODE_MLD_MONITORRESULT: + return new FS4Packet_MONITORRESULTX(search::fs4transport:: + PCODE_MLD_MONITORRESULT); + case search::fs4transport::PCODE_CLEARCACHES: + return new FS4Packet_CLEARCACHES; + case search::fs4transport::PCODE_PARSEDQUERY2: + return new FS4Packet_QUERYX(search::fs4transport::PCODE_PARSEDQUERY2); + case search::fs4transport::PCODE_QUEUELEN: + return new FS4Packet_QUEUELEN; + case search::fs4transport::PCODE_QUERYRESULTX: + return new FS4Packet_QUERYRESULTX; + case search::fs4transport::PCODE_QUERYX: + return new FS4Packet_QUERYX; + case search::fs4transport::PCODE_GETDOCSUMSX: + return new FS4Packet_GETDOCSUMSX; + case search::fs4transport::PCODE_MONITORQUERYX: + return new FS4Packet_MONITORQUERYX; + case search::fs4transport::PCODE_MONITORRESULTX: + return new FS4Packet_MONITORRESULTX; + case search::fs4transport::PCODE_TRACEREPLY: + return new FS4Packet_TRACEREPLY; + default: + return NULL; + } +} + +} +} diff --git a/searchlib/src/vespa/searchlib/common/packets.h b/searchlib/src/vespa/searchlib/common/packets.h new file mode 100644 index 00000000000..f3ea8e5b225 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/packets.h @@ -0,0 +1,593 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace fs4transport +{ +using vespalib::string; + +enum fnet_feature_masks { + FNET_QRF_SUPPORTED_MASK = (QRF_MLD | + QRF_SORTDATA | + QRF_AGGRDATA | + QRF_COVERAGE | + QRF_GROUPDATA | + QRF_PROPERTIES), + + FNET_QF_SUPPORTED_MASK = (QF_PARSEDQUERY | + QF_RANKP | + QF_SORTSPEC | + QF_AGGRSPEC | + QF_LOCATION | + QF_PROPERTIES | + QF_GROUPSPEC | + QF_SESSIONID), + + FNET_GDF_SUPPORTED_MASK = (GDF_MLD | + GDF_QUERYSTACK | + GDF_RANKP_QFLAGS | + GDF_LOCATION | + GDF_RESCLASSNAME | + GDF_PROPERTIES | + GDF_FLAGS), + + FNET_MQF_SUPPORTED_MASK = (MQF_QFLAGS), + + FNET_MRF_SUPPORTED_MASK = (MRF_MLD | MRF_RFLAGS | MRF_ACTIVEDOCS) +}; + +enum pcode_mask { + PCODE_MASK = 0x00ffffff +}; + +//========================================================================== + +class PacketArray +{ +private: + PacketArray(const PacketArray &); + PacketArray& operator=(const PacketArray &); + + FNET_Packet **_extArray; + FNET_Packet **_array; + uint32_t _size; + uint32_t _used; + +public: + PacketArray(FNET_Packet **arr = NULL, + uint32_t size = 0) + : _extArray(arr), + _array(arr), + _size(size), + _used(0) {} + ~PacketArray() + { + if (_array != _extArray) + delete [] _array; + } + void Add(FNET_Packet *packet) + { + if (_used == _size) { + _size *= 2; + if (_size < 16) + _size = 16; + FNET_Packet **newArray = new FNET_Packet*[_size]; + for (uint32_t i = 0; i < _used; i++) + newArray[i] = _array[i]; + if (_array != _extArray) + delete [] _array; + _array = newArray; + } + _array[_used++] = packet; + } + FNET_Packet **Array() const { return _array; } + uint32_t Length() const { return _used; } +}; + +//========================================================================== + +class FS4PacketFactory +{ +public: + typedef FNET_Packet *(* CreatePacket_t)(uint32_t pcode); + + static FNET_Packet *CreateFS4Packet(uint32_t pcode); +}; + +//========================================================================== + +class FS4PersistentPacketStreamer : public FNET_IPacketStreamer { + FS4PersistentPacketStreamer(const FS4PersistentPacketStreamer &); + FS4PersistentPacketStreamer& operator=(const FS4PersistentPacketStreamer &); + + unsigned int _compressionLimit; + unsigned int _compressionLevel; + document::CompressionConfig::Type _compressionType; +protected: + bool _conservative; // Set to true if out of sync should mark the + // stream as broken. + FS4PacketFactory::CreatePacket_t _createPacket; + + bool HasChannelID(uint32_t pcode); + bool ValidPCode(uint32_t pcode) const { + return ((pcode & PCODE_MASK) >= PCODE_EOL) + && ((pcode & PCODE_MASK) < PCODE_LastCode); + } + +public: + static FS4PersistentPacketStreamer Instance; + + FS4PersistentPacketStreamer(FS4PacketFactory::CreatePacket_t cp); + + bool GetPacketInfo(FNET_DataBuffer *src, uint32_t *plen, + uint32_t *pcode, uint32_t *chid, bool *broken); + FNET_Packet *Decode(FNET_DataBuffer *src, uint32_t plen, + uint32_t pcode, FNET_Context context); + void Encode(FNET_Packet *packet, uint32_t chid, FNET_DataBuffer *dst); + + void SetConservativeMode(bool cons) { _conservative = cons; } + void SetCompressionLimit(unsigned int limit) { _compressionLimit = limit; } + void SetCompressionLevel(unsigned int level) { _compressionLevel = level; } + void SetCompressionType(document::CompressionConfig::Type compressionType) { _compressionType = compressionType; } + document::CompressionConfig::Type getCompressionType() const { return _compressionType; } + uint32_t getCompressionLimit() const { return _compressionLimit; } + uint32_t getCompressionLevel() const { return _compressionLevel; } +}; + +//========================================================================== + +class FS4Properties +{ +private: + typedef std::pair StringRef; + typedef std::pair Entry; + typedef std::vector KeyValueVector; + + KeyValueVector _entries; + vespalib::string _name; + vespalib::string _backing; + const char * c_str(size_t sz) const { return _backing.c_str() + sz; } + void set(StringRef & e, const vespalib::stringref & s); +public: + FS4Properties(FS4Properties &&); + FS4Properties &operator=(FS4Properties &&); + + FS4Properties(); + ~FS4Properties(); + void allocEntries(uint32_t cnt); + void setName(const char *name, uint32_t nameSize) { _name.assign(name, nameSize); } + void setName(const vespalib::stringref &val) { + setName(val.data(), val.size()); + } + void setKey(uint32_t entry, const char *key, uint32_t keySize); + void setKey(uint32_t entry, const vespalib::stringref &val) { + setKey(entry, val.data(), val.size()); + } + void setValue(uint32_t entry, const char *value, uint32_t valueSize); + void setValue(uint32_t entry, const vespalib::stringref &val) { + setValue(entry, val.data(), val.size()); + } + uint32_t size() const { return _entries.size(); } + const char *getName() const { return _name.c_str(); } + uint32_t getNameLen() const { return _name.size(); } + const char *getKey(uint32_t entry) const { return c_str(_entries[entry].first.first); } + uint32_t getKeyLen(uint32_t entry) const { return _entries[entry].first.second; } + const char *getValue(uint32_t entry) const { return c_str(_entries[entry].second.first); } + uint32_t getValueLen(uint32_t entry) const { return _entries[entry].second.second; } + + // sub-packet methods below + uint32_t getLength(); + void encode(FNET_DataBuffer &dst); + bool decode(FNET_DataBuffer &src, uint32_t &len); + vespalib::string toString(uint32_t indent = 0) const; +}; + +//========================================================================== + +typedef std::vector PropsVector; + +//========================================================================== + +class FS4Packet : public FNET_Packet +{ +private: + FS4Packet(const FS4Packet &); + FS4Packet& operator=(const FS4Packet &); + +public: + FS4Packet(); + ~FS4Packet(); + vespalib::string Print(uint32_t indent) override; + void Free() override; + virtual vespalib::string toString(uint32_t indent) const = 0; +protected: + uint32_t readUInt32(FNET_DataBuffer & buf, uint32_t & len, const char *text) __attribute__((noinline)); + void throwNotEnoughData(FNET_DataBuffer & buf, uint32_t left, uint32_t needed, const char * text) __attribute__((noinline)); + void throwUnsupportedFeatures(uint32_t features, uint32_t set) __attribute__((noinline)); + void throwPropertieDecodeError(size_t i) __attribute__((noinline)); +}; + +//========================================================================== + +class FS4Packet_EOL : public FS4Packet +{ +public: + FS4Packet_EOL(); + ~FS4Packet_EOL(); + uint32_t GetPCODE() override { return PCODE_EOL; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +class FS4Packet_PreSerialized : public FS4Packet +{ +public: + FS4Packet_PreSerialized(FNET_Packet & packet); + ~FS4Packet_PreSerialized(); + uint32_t GetPCODE() override; + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +private: + uint32_t _pcode; + document::CompressionConfig::Type _compressionType; + FNET_DataBuffer _data; +}; + +class FS4Packet_Shared : public FS4Packet +{ +public: + FS4Packet_Shared(FNET_Packet::SP packet); + ~FS4Packet_Shared(); + uint32_t GetPCODE() override; + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *, uint32_t ) override; + vespalib::string toString(uint32_t indent) const override; +private: + FNET_Packet::SP _packet; +}; + +//========================================================================== + +class FS4Packet_ERROR : public FS4Packet +{ +private: + FS4Packet_ERROR(const FS4Packet_ERROR &); + FS4Packet_ERROR& operator=(const FS4Packet_ERROR &); + +public: + uint32_t _errorCode; + string _message; + + void setErrorMessage(const vespalib::stringref &msg) { _message = msg; } + + FS4Packet_ERROR(); + ~FS4Packet_ERROR(); + uint32_t GetPCODE() override { return PCODE_ERROR; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_DOCSUM : public FS4Packet +{ +public: + typedef vespalib::MallocPtr Buf; +private: + FS4Packet_DOCSUM(const FS4Packet_DOCSUM &); + FS4Packet_DOCSUM& operator=(const FS4Packet_DOCSUM &); + + document::GlobalId _gid; + Buf _buf; +public: + FS4Packet_DOCSUM(); + ~FS4Packet_DOCSUM(); + const Buf & getBuf() const { return _buf; } + void swapBuf(Buf & other) { _buf.swap(other); } + void setGid(const document::GlobalId & gid) { _gid = gid; } + const document::GlobalId & getGid() const { return _gid; } + bool empty() const { return _buf.empty(); } + void SetBuf(const char *buf, uint32_t len); + uint32_t GetPCODE() override { return PCODE_DOCSUM; } + uint32_t GetLength() override { return sizeof(_gid) + _buf.size(); } + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_MONITORQUERYX : public FS4Packet +{ + FS4Packet_MONITORQUERYX(const FS4Packet_MONITORQUERYX &); + FS4Packet_MONITORQUERYX& operator=(const FS4Packet_MONITORQUERYX &); + + uint32_t _pcode; +public: + uint32_t _features; // see monitorquery_features + uint32_t _qflags; // if MQF_QFLAGS + + FS4Packet_MONITORQUERYX(uint32_t pcode = PCODE_MONITORQUERYX); + ~FS4Packet_MONITORQUERYX(); + void UpdateCompatPCODE(); + void UpdateCompatFeatures(); + void SetRealPCODE(void) { _pcode = PCODE_MONITORQUERYX; } + uint32_t GetPCODE() override { return _pcode; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_MONITORRESULTX : public FS4Packet +{ +private: + FS4Packet_MONITORRESULTX(const FS4Packet_MONITORRESULTX &); + FS4Packet_MONITORRESULTX& operator=(const FS4Packet_MONITORRESULTX &); + + uint32_t _pcode; +public: + uint32_t _features; // see monitor + uint32_t _partid; + uint32_t _timestamp; + + uint32_t _totalNodes; // if MRF_MLD + uint32_t _activeNodes; // if MRF_MLD + uint32_t _totalParts; // if MRF_MLD + uint32_t _activeParts; // if MRF_MLD + + uint32_t _rflags; // if MRF_RFLAGS + uint64_t _activeDocs; // if MRF_ACTIVEDOCS + + FS4Packet_MONITORRESULTX(uint32_t pcode = PCODE_MONITORRESULTX); + ~FS4Packet_MONITORRESULTX(); + void UpdateCompatPCODE(); + void UpdateCompatFeatures(); + void SetRealPCODE(void) { _pcode = PCODE_MONITORRESULTX; } + uint32_t GetPCODE() override { return _pcode; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_CLEARCACHES : public FS4Packet +{ +public: + FS4Packet_CLEARCACHES(); + ~FS4Packet_CLEARCACHES(); + uint32_t GetPCODE() override { return PCODE_CLEARCACHES; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_QUEUELEN : public FS4Packet +{ +public: + uint32_t _queueLen; + uint32_t _dispatchers; + + FS4Packet_QUEUELEN(); + ~FS4Packet_QUEUELEN(); + uint32_t GetPCODE() override { return PCODE_QUEUELEN; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_QUERYRESULTX : public FS4Packet +{ +private: + FS4Packet_QUERYRESULTX(const FS4Packet_QUERYRESULTX &); + FS4Packet_QUERYRESULTX& operator=(const FS4Packet_QUERYRESULTX &); + + uint32_t _pcode; + uint32_t _distributionKey; + +public: + uint32_t _features; // see queryresult_features + uint32_t _offset; + uint32_t _numDocs; + uint64_t _totNumDocs; + search::HitRank _maxRank; + uint32_t *_sortIndex; // if QRF_SORTDATA + char *_sortData; // if QRF_SORTDATA + uint32_t _aggrDataLen; // if QRF_AGGRDATA + char *_aggrData; // if QRF_AGGRDATA + uint32_t _groupDataLen; // if QRF_GROUPDATA + char *_groupData; // if QRF_GROUPDATA + uint64_t _coverageDocs; // if QRF_COVERAGE + uint64_t _activeDocs; // if QRF_COVERAGE + class FS4_hit { + public: + FS4_hit() : _gid(), _metric(0), _partid(0), _distributionKey(0) { } + uint32_t getDistributionKey() const { return _distributionKey; } + void setDistributionKey(uint32_t key) { _distributionKey = key; } + const document::GlobalId & HT_GetGlobalID() const { return _gid; } + search::HitRank HT_GetMetric() const { return _metric; } + uint32_t HT_GetPartID() const { return _partid; } + + void HT_SetGlobalID(const document::GlobalId & val) { _gid = val; } + void HT_SetMetric(search::HitRank val) { _metric = val; } + void HT_SetPartID(uint32_t val) { _partid = val; } + document::GlobalId _gid; + search::HitRank _metric; + uint32_t _partid; // if QRF_MLD + private: + uint32_t _distributionKey; // if QRF_MLD + } *_hits; + PropsVector _propsVector; // if QRF_PROPERTIES + + void AllocateSortIndex(uint32_t cnt); + void AllocateSortData(uint32_t len); + void SetSortDataRef(uint32_t cnt, uint32_t *sortIndex, const char *sortData); + void AllocateAggrData(uint32_t len); + void SetAggrDataRef(const char *aggrData, uint32_t len); + void AllocateGroupData(uint32_t len); + void SetGroupDataRef(const char *groupData, uint32_t len); + void AllocateHits(uint32_t cnt); + + FS4Packet_QUERYRESULTX(uint32_t pcode = PCODE_QUERYRESULTX); + ~FS4Packet_QUERYRESULTX(); + void UpdateCompatPCODE(); + void UpdateCompatFeatures(); + void SetRealPCODE() { _pcode = PCODE_QUERYRESULTX; } + uint32_t GetPCODE() override { return _pcode; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override ; + vespalib::string toString(uint32_t indent) const override ; + uint32_t getDistributionKey() const { return _distributionKey; } + void setDistributionKey(uint32_t key) { _distributionKey = key; } +}; + +//========================================================================== + +class FS4Packet_QUERYX : public FS4Packet +{ +private: + FS4Packet_QUERYX(const FS4Packet_QUERYX &); + FS4Packet_QUERYX& operator=(const FS4Packet_QUERYX &); + + uint32_t _pcode; + uint32_t _timeout; + +public: + uint32_t _features; // see query_features + uint32_t _offset; + uint32_t _maxhits; + uint32_t _qflags; + string _ranking; // if QF_RANKP + PropsVector _propsVector; // if QF_PROPERTIES + string _sortSpec; // if QF_SORTSPEC + string _aggrSpec; // if QF_AGGRSPEC + string _groupSpec; // if QF_GROUPSPEC + string _sessionId; // if QF_SESSIONID + string _location; // if QF_LOCATION + + uint32_t _numStackItems; // if QF_PARSEDQUERY + string _stackDump; // if QF_PARSEDQUERY + + void setRanking(const vespalib::stringref &ranking) { _ranking = ranking; } + void setSortSpec(const vespalib::stringref &spec) { _sortSpec = spec; } + void setAggrSpec(const vespalib::stringref &spec) { _aggrSpec = spec; } + void setGroupSpec(const vespalib::stringref &spec) { _groupSpec = spec; } + void setSessionId(const vespalib::stringref &sid) { _sessionId = sid; } + void setLocation(const vespalib::stringref &loc) { _location = loc; } + void setStackDump(const vespalib::stringref &buf) { _stackDump = buf; } + void setTimeout(const fastos::TimeStamp & timeout); + fastos::TimeStamp getTimeout() const; + + explicit FS4Packet_QUERYX(uint32_t pcode = PCODE_QUERYX); + ~FS4Packet_QUERYX(); + void UpdateCompatPCODE(); + void UpdateCompatFeatures(); + void SetRealPCODE() { _pcode = PCODE_QUERYX; } + uint32_t GetPCODE() override { return _pcode; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_GETDOCSUMSX : public FS4Packet +{ +private: + FS4Packet_GETDOCSUMSX(const FS4Packet_GETDOCSUMSX &); + FS4Packet_GETDOCSUMSX& operator=(const FS4Packet_GETDOCSUMSX &); + + uint32_t _pcode; + uint32_t _timeout; +public: + uint32_t _features; // see getdocsums_features + string _ranking; // if GDF_RANKP_QFLAGS + uint32_t _qflags; // if GDF_RANKP_QFLAGS + string _resultClassName; // if GDF_RESCLASSNAME + PropsVector _propsVector; // if GDF_PROPERTIES + uint32_t _stackItems; // if GDF_QUERYSTACK + string _stackDump; // if GDF_QUERYSTACK + string _location; // if GDF_LOCATION + uint32_t _flags; // if GDF_FLAGS + class FS4_docid { + public: + FS4_docid() : _gid(), _partid(0) { } + document::GlobalId _gid; + uint32_t _partid; // if GDF_MLD + } *_docid; + + uint32_t _docidCnt; + + void AllocateDocIDs(uint32_t cnt); + + void setResultClassName(const vespalib::stringref &name) { _resultClassName = name; } + void setStackDump(const vespalib::stringref &buf) { _stackDump = buf; } + void setRanking(const vespalib::stringref &ranking) { _ranking = ranking; } + void setLocation(const vespalib::stringref &loc) { _location = loc; } + void setTimeout(const fastos::TimeStamp & timeout); + fastos::TimeStamp getTimeout() const; + + FS4Packet_GETDOCSUMSX(uint32_t pcode = PCODE_GETDOCSUMSX); + ~FS4Packet_GETDOCSUMSX(); + void UpdateCompatPCODE(); + void UpdateCompatFeatures(); + void SetRealPCODE() { _pcode = PCODE_GETDOCSUMSX; } + uint32_t GetPCODE() override { return _pcode; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; +}; + +//========================================================================== + +class FS4Packet_TRACEREPLY : public FS4Packet +{ +public: + FS4Packet_TRACEREPLY() {} + ~FS4Packet_TRACEREPLY() {} + uint32_t GetPCODE() override { return PCODE_TRACEREPLY; } + uint32_t GetLength() override; + void Encode(FNET_DataBuffer *dst) override; + bool Decode(FNET_DataBuffer *src, uint32_t len) override; + vespalib::string toString(uint32_t indent) const override; + + PropsVector _propsVector; +}; + +//========================================================================== + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.cpp b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp new file mode 100644 index 00000000000..b3472abe89a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "partialbitvector.h" + +///////////////////////////////// +namespace search +{ + +PartialBitVector::PartialBitVector(Index start, Index end) : + BitVector(), + _alloc(numActiveBytes(start, end)) +{ + init(_alloc.get(), start, end); + clear(); +} + +PartialBitVector::~PartialBitVector() +{ +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h new file mode 100644 index 00000000000..94facc9512a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +/** + * search::PartialBitVector is a bitvector that is only represents 1 part + * of the full space. All operations concerning the whole vector while only + * be conducted on this smaller area. + */ +class PartialBitVector : public BitVector +{ +public: + typedef vespalib::AutoAlloc<0x800000, 0x1000> Alloc; + + /** + * Class constructor specifying startindex and endindex. + * Allocated area is zeroed. + * + * @param start is the beginning. + * @end is the end. + * + */ + PartialBitVector(Index start, Index end); + + virtual ~PartialBitVector(void); + +private: + Alloc _alloc; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/range.h b/searchlib/src/vespa/searchlib/common/range.h new file mode 100644 index 00000000000..3fd53b43a97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/range.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +namespace search +{ + +template +class Range { +public: + Range() : + _lower(std::numeric_limits::max()), + _upper(std::numeric_limits::min()) { } + Range(T v) : _lower(v), _upper(v) { } + Range(T low, T high) : _lower(low), _upper(high) { } + T lower() const { return _lower; } + T upper() const { return _upper; } + bool valid() const { return _lower <= _upper; } + bool isPoint() const { return _lower == _upper; } +private: + T _lower; + T _upper; +}; + +typedef Range Int64Range; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/rankedhit.h b/searchlib/src/vespa/searchlib/common/rankedhit.h new file mode 100644 index 00000000000..8776f997d38 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/rankedhit.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include "hitrank.h" + +namespace search +{ + +struct RankedHit { + RankedHit() : _docId(0), _rankValue(0) { } + RankedHit(unsigned int docId, HitRank rank=0.0) : _docId(docId), _rankValue(rank) { } + unsigned int getDocId() const { return _docId & 0x7fffffff; } + bool hasMore() const { return _docId & 0x80000000; } + HitRank getRank() const { return _rankValue; } +//:private + unsigned int _docId; + HitRank _rankValue; +}; + +class RankedHitIterator { +public: + RankedHitIterator(const RankedHit * h, size_t sz) : _h(h), _sz(sz), _pos(0) { } + bool hasNext() const { return _pos < _sz; } + uint32_t next() { return _h[_pos++].getDocId(); } +private: + const RankedHit *_h; + const size_t _sz; + size_t _pos; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/rcuvector.h b/searchlib/src/vespa/searchlib/common/rcuvector.h new file mode 100644 index 00000000000..9c5954848c4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/rcuvector.h @@ -0,0 +1,354 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace attribute { + +template +class RcuVectorHeld : public vespalib::GenerationHeldBase +{ + std::unique_ptr _data; + +public: + RcuVectorHeld(size_t size, std::unique_ptr data) + : vespalib::GenerationHeldBase(size), + _data(std::move(data)) + { + } + + virtual + ~RcuVectorHeld(void) + { + } +}; + + +/** + * Vector class for elements of type T using the read-copy-update + * mechanism to ensure that reader threads will have a consistent view + * of the vector while the update thread is inserting new elements. + * The update thread is also responsible for updating the current + * generation of the vector, and initiating removing of old underlying + * data vectors. + **/ +template +class RcuVectorBase +{ + static_assert(std::is_trivially_destructible::value, + "Value type must be trivially destructible"); + +protected: + typedef vespalib::Array Array; + typedef vespalib::GenerationHandler::generation_t generation_t; + typedef vespalib::GenerationHolder GenerationHolder; + Array _data; + size_t _growPercent; + size_t _growDelta; + GenerationHolder &_genHolder; + + size_t + calcSize(size_t baseSize) const + { + size_t delta = (baseSize * _growPercent / 100) + _growDelta; + return baseSize + std::max(delta, static_cast(1)); + } + size_t + calcSize() const + { + return calcSize(_data.capacity()); + } + void expand(size_t newCapacity); + void expandAndInsert(const T & v); + +public: + RcuVectorBase(GenerationHolder &genHolder); + + /** + * Construct a new vector with the given initial capacity and grow + * parameters. + * + * New capacity is calculated based on old capacity and grow parameters: + * nc = oc + (oc * growPercent / 100) + growDelta. + **/ + RcuVectorBase(size_t initialCapacity, size_t growPercent, size_t growDelta, + GenerationHolder &genHolder); + + RcuVectorBase(GrowStrategy growStrategy, GenerationHolder &genHolder) + : RcuVectorBase(growStrategy.getDocsInitialCapacity(), + growStrategy.getDocsGrowPercent(), + growStrategy.getDocsGrowDelta(), + genHolder) {} + + /** + * Return whether all capacity has been used. If true the next + * call to push_back() will cause an expand of the underlying + * data. + **/ + bool isFull() const { return _data.size() == _data.capacity(); } + + /** + * Return the combined memory usage for this instance. + **/ + MemoryUsage getMemoryUsage() const; + + // vector interface + // no swap method, use reset() to forget old capacity and holds + // NOTE: Unsafe resize/reserve may invalidate data references held by readers! + void unsafe_resize(size_t n) { _data.resize(n); } + void unsafe_reserve(size_t n) { _data.reserve(n); } + void ensure_size(size_t n, T fill = T()) { + if (n > capacity()) { + expand(calcSize(n)); + } + while (size() < n) { + _data.push_back(fill); + } + } + void push_back(const T & v) { + if (_data.size() < _data.capacity()) { + _data.push_back(v); + } else { + expandAndInsert(v); + } + } + + bool + empty(void) const + { + return _data.empty(); + } + + size_t size() const { return _data.size(); } + size_t capacity() const { return _data.capacity(); } + void clear() { _data.clear(); } + T & operator[](size_t i) { return _data[i]; } + const T & operator[](size_t i) const { return _data[i]; } + + void + reset(void) + { + // Assumes no readers at this moment + Array().swap(_data); + _data.reserve(16); + } + + void + shrink(size_t newSize) __attribute__((noinline)); + + template + void + fillMapped(GenerationHolder &genHolder, + Reader &reader, + uint64_t numValues, + const T *map, + size_t mapSize, + Saver &saver, + uint32_t numDocs); +}; + +template +void +RcuVectorBase::expand(size_t newCapacity) { + std::unique_ptr tmpData(new Array()); + tmpData->reserve(newCapacity); + tmpData->resize(_data.size()); + memcpy(tmpData->begin(), _data.begin(), _data.size() * sizeof(T)); + tmpData->swap(_data); // atomic switch of underlying data + size_t holdSize = tmpData->size() * sizeof(T); + vespalib::GenerationHeldBase::UP hold(new RcuVectorHeld(holdSize, std::move(tmpData))); + _genHolder.hold(std::move(hold)); +} + +template +void +RcuVectorBase::expandAndInsert(const T & v) +{ + expand(calcSize()); + assert(_data.size() < _data.capacity()); + _data.push_back(v); +} + + +template +void +RcuVectorBase::shrink(size_t newSize) +{ + // TODO: Extend Array class to support more optimial shrink when + // backing store is memory mapped. + assert(newSize <= _data.size()); + std::unique_ptr tmpData(new Array()); + tmpData->reserve(newSize); + tmpData->resize(newSize); + for (uint32_t i = 0; i < newSize; ++i) { + (*tmpData)[i] = _data[i]; + } + // Users of RCU vector must ensure that no readers use old size + // after swap. Attribute vectors uses _committedDocIdLimit for this. + tmpData->swap(_data); // atomic switch of underlying data + // Use capacity() instead of size() ? + size_t holdSize = tmpData->size() * sizeof(T); + vespalib::GenerationHeldBase::UP hold(new RcuVectorHeld(holdSize, std::move(tmpData))); + _genHolder.hold(std::move(hold)); +} + + +template +RcuVectorBase::RcuVectorBase(GenerationHolder &genHolder) + : _data(), + _growPercent(100), + _growDelta(0), + _genHolder(genHolder) +{ + _data.reserve(16); +} + +template +RcuVectorBase::RcuVectorBase(size_t initialCapacity, + size_t growPercent, + size_t growDelta, + GenerationHolder &genHolder) + : _data(), + _growPercent(growPercent), + _growDelta(growDelta), + _genHolder(genHolder) +{ + _data.reserve(initialCapacity); +} + +template +MemoryUsage +RcuVectorBase::getMemoryUsage() const +{ + MemoryUsage retval; + retval.incAllocatedBytes(_data.capacity() * sizeof(T)); + retval.incUsedBytes(_data.size() * sizeof(T)); + return retval; +} + + +template +template +void +RcuVectorBase::fillMapped(GenerationHolder &genHolder, + Reader &reader, + uint64_t numValues, + const T *map, + size_t mapSize, + Saver &saver, + uint32_t numDocs) +{ + assert(numDocs == numValues); + (void) numValues; + genHolder.clearHoldLists(); + reset(); + unsafe_reserve(numDocs); + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t e = reader.getNextEnum(); + assert(e < mapSize); + (void) mapSize; + push_back(map[e]); + saver.save(e, doc, 0, 1); + } +} + + +template +class RcuVector : public RcuVectorBase +{ +private: + typedef typename RcuVectorBase::generation_t generation_t; + typedef typename RcuVectorBase::GenerationHolder GenerationHolder; + using RcuVectorBase::_data; + generation_t _generation; + GenerationHolder _genHolderStore; + + void + expandAndInsert(const T & v) + { + RcuVectorBase::expandAndInsert(v); + _genHolderStore.transferHoldLists(_generation); + } + +public: + RcuVector() + : RcuVectorBase(_genHolderStore), + _generation(0), + _genHolderStore() + { + } + + /** + * Construct a new vector with the given initial capacity and grow + * parameters. + * + * New capacity is calculated based on old capacity and grow parameters: + * nc = oc + (oc * growPercent / 100) + growDelta. + **/ + RcuVector(size_t initialCapacity, size_t growPercent, size_t growDelta) + : RcuVectorBase(initialCapacity, growPercent, growDelta, + _genHolderStore), + _generation(0), + _genHolderStore() + { + } + + RcuVector(GrowStrategy growStrategy) + : RcuVectorBase(growStrategy, _genHolderStore), _generation(0), _genHolderStore() + { + } + + ~RcuVector() + { + _genHolderStore.clearHoldLists(); + } + + generation_t + getGeneration() const + { + return _generation; + } + + void + setGeneration(generation_t generation) + { + _generation = generation; + } + + /** + * Remove all old data vectors where generation < firstUsed. + **/ + void + removeOldGenerations(generation_t firstUsed) + { + _genHolderStore.trimHoldLists(firstUsed); + } + + void + push_back(const T & v) + { + if (_data.size() < _data.capacity()) { + _data.push_back(v); + } else { + expandAndInsert(v); + } + } + + MemoryUsage + getMemoryUsage() const + { + MemoryUsage retval(RcuVectorBase::getMemoryUsage()); + retval.incAllocatedBytesOnHold(_genHolderStore.getHeldBytes()); + return retval; + } +}; + + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/reserved.h b/searchlib/src/vespa/searchlib/common/reserved.h new file mode 100644 index 00000000000..d97cb2216df --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/reserved.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +// These are used by FAST Web Search for host name anchoring. + +// NB! Should be changed to uppercase once the functionality is implemented!! + +static const char *ANCHOR_START_OF_HOST = "StArThOsT"; +static const char *ANCHOR_END_OF_HOST = "EnDhOsT"; + +// These are used in the query parser when parsing fields with parsemode +// 'boundaries'. Not used otherwise. Lowercased for performance reasons. + +#define ANCHOR_LEFT_BOUNDARY "fastpbfast" +#define ANCHOR_RIGHT_BOUNDARY "fastpbfast" + diff --git a/searchlib/src/vespa/searchlib/common/resultset.cpp b/searchlib/src/vespa/searchlib/common/resultset.cpp new file mode 100644 index 00000000000..ac69680b427 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/resultset.cpp @@ -0,0 +1,149 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(""); + +#include +#include + +namespace search +{ + + +ResultSet::ResultSet(void) + : _elemsUsedInRankedHitsArray(0u), + _rankedHitsArrayAllocElements(0u), + _bitOverflow(), + _rankedHitsArray() +{ +} + + +ResultSet::ResultSet(const ResultSet &other) + : _elemsUsedInRankedHitsArray(0), + _rankedHitsArrayAllocElements(0), + _bitOverflow(), + _rankedHitsArray() +{ + allocArray(other._elemsUsedInRankedHitsArray); + _elemsUsedInRankedHitsArray = other._elemsUsedInRankedHitsArray; + if (_elemsUsedInRankedHitsArray > 0) + memcpy(_rankedHitsArray.get(), + other._rankedHitsArray.get(), + _elemsUsedInRankedHitsArray * sizeof(RankedHit)); + + if (other._bitOverflow) { + _bitOverflow = BitVector::create(*other._bitOverflow); + } +} + + +ResultSet::~ResultSet(void) +{ +} + + +void +ResultSet::allocArray(unsigned int arrayAllocated) +{ + if (arrayAllocated > 0) { + ArrayAlloc n(arrayAllocated * sizeof(RankedHit)); + _rankedHitsArray.swap(n); + } else { + ArrayAlloc n; + _rankedHitsArray.swap(n); + } + _rankedHitsArrayAllocElements = arrayAllocated; + _elemsUsedInRankedHitsArray = 0; +} + + +void +ResultSet::setArrayUsed(unsigned int arrayUsed) +{ + assert(arrayUsed <= _rankedHitsArrayAllocElements); + _elemsUsedInRankedHitsArray = arrayUsed; +} + + +void +ResultSet::setBitOverflow(BitVector::UP newBitOverflow) +{ + _bitOverflow = std::move(newBitOverflow); +} + + +////////////////////////////////////////////////////////////////////// +// Find number of hits +////////////////////////////////////////////////////////////////////// +unsigned int +ResultSet::getNumHits(void) const +{ + return (_bitOverflow) ? _bitOverflow->countTrueBits() : _elemsUsedInRankedHitsArray; +} + + +void +ResultSet::mergeWithBitOverflow(void) +{ + if ( ! _bitOverflow) { + return; + } + + const BitVector *bitVector = _bitOverflow.get(); + + const RankedHit *oldA = getArray(); + const RankedHit *oldAEnd = oldA + _elemsUsedInRankedHitsArray; + uint32_t bidx = bitVector->getFirstTrueBit(); + + uint32_t actualHits = getNumHits(); + ArrayAlloc newHitsAlloc(actualHits*sizeof(RankedHit)); + RankedHit *newHitsArray = static_cast(newHitsAlloc.get()); + + RankedHit * tgtA = newHitsArray; + RankedHit * tgtAEnd = newHitsArray + actualHits; + + if (oldAEnd > oldA) { // we have array hits + uint32_t firstArrayHit = oldA->_docId; + uint32_t lastArrayHit = (oldAEnd - 1)->_docId; + + // bitvector hits before array hits + while (bidx < firstArrayHit) { + tgtA->_docId = bidx; + tgtA->_rankValue = 0; + tgtA++; + bidx = bitVector->getNextTrueBit(bidx + 1); + } + + // merge bitvector and array hits + while (bidx <= lastArrayHit) { + tgtA->_docId = bidx; + if (bidx == oldA->_docId) { + tgtA->_rankValue = oldA->_rankValue; + oldA++; + } else { + tgtA->_rankValue = 0; + } + tgtA++; + bidx = bitVector->getNextTrueBit(bidx + 1); + } + } + assert(oldA == oldAEnd); + + // bitvector hits after array hits + while (tgtA < tgtAEnd) { + tgtA->_docId = bidx; + tgtA->_rankValue = 0; + tgtA++; + bidx = bitVector->getNextTrueBit(bidx + 1); + } + _rankedHitsArrayAllocElements = actualHits; + _elemsUsedInRankedHitsArray = actualHits; + _rankedHitsArray.swap(newHitsAlloc); + setBitOverflow(NULL); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/resultset.h b/searchlib/src/vespa/searchlib/common/resultset.h new file mode 100644 index 00000000000..4489654d0a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/resultset.h @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include + +namespace search +{ + +class ResultSet +{ +private: + // Everything above 8m we return to OS. + typedef vespalib::AutoAlloc<0x800000> ArrayAlloc; + ResultSet& operator=(const ResultSet &); + + unsigned int _elemsUsedInRankedHitsArray; + unsigned int _rankedHitsArrayAllocElements; + BitVector::UP _bitOverflow; + ArrayAlloc _rankedHitsArray; + +public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + ResultSet(void); + ResultSet(const ResultSet &); // Used only for testing ..... + virtual ~ResultSet(void); + + void allocArray(unsigned int arrayAllocated); + + void setArrayUsed(unsigned int arrayUsed); + void setBitOverflow(BitVector::UP newBitOverflow); + const RankedHit * getArray(void) const { return static_cast(_rankedHitsArray.get()); } + RankedHit * getArray(void) { return static_cast(_rankedHitsArray.get()); } + unsigned int getArrayUsed(void) const { return _elemsUsedInRankedHitsArray; } + unsigned int getArrayAllocated(void) const { return _rankedHitsArrayAllocElements; } + + const BitVector * getBitOverflow(void) const { return _bitOverflow.get(); } + BitVector * getBitOverflow(void) { return _bitOverflow.get(); } + unsigned int getNumHits(void) const; + void mergeWithBitOverflow(void); + + /* isEmpty() is allowed to return false even if bitmap has no hits */ + bool isEmpty(void) const { return (_bitOverflow == NULL && _elemsUsedInRankedHitsArray == 0); } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h b/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h new file mode 100644 index 00000000000..d6c6f29abaf --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/scheduletaskcallback.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "idestructorcallback.h" + +namespace search +{ + +/** + * Class that schedules a task when instance is destroyed. Typically a + * shared pointer to an instance is passed around to multiple worker + * threads that performs portions of a larger task before dropping the + * shared pointer, triggering the callback when all worker threads + * have completed. + */ +class ScheduleTaskCallback : public IDestructorCallback +{ + vespalib::Executor &_executor; + vespalib::Executor::Task::UP _task; +public: + ScheduleTaskCallback(vespalib::Executor &executor, + vespalib::Executor::Task::UP task) + : _executor(executor), + _task(std::move(task)) + { + } + virtual ~ScheduleTaskCallback() { + _executor.execute(std::move(_task)); + } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp new file mode 100644 index 00000000000..2fe4a23e3ae --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".common.sequencedtaskexecutor"); + +#include "sequencedtaskexecutor.h" +#include + +using vespalib::ThreadStackExecutor; + +namespace search +{ + +namespace +{ + +constexpr uint32_t stackSize = 128 * 1024; + +} + + +SequencedTaskExecutor::SequencedTaskExecutor(uint32_t threads) + : _executors() +{ + for (uint32_t id = 0; id < threads; ++id) { + auto executor = std::make_unique(1, stackSize); + _executors.push_back(std::move(executor)); + } +} + +SequencedTaskExecutor::~SequencedTaskExecutor() +{ + sync(); +} + + +void +SequencedTaskExecutor::executeTask(uint64_t id, + vespalib::Executor::Task::UP task) +{ + auto itr = _ids.find(id); + if (itr == _ids.end()) { + auto insarg = std::make_pair(id, _ids.size() % _executors.size()); + auto insres = _ids.insert(insarg); + assert(insres.second); + itr = insres.first; + } + size_t executorId = itr->second; + vespalib::ThreadStackExecutorBase &executor(*_executors[executorId]); + auto rejectedTask = executor.execute(std::move(task)); + assert(!rejectedTask); +} + + +void +SequencedTaskExecutor::sync() +{ + for (auto &executor : _executors) { + executor->sync(); + } +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h new file mode 100644 index 00000000000..c3b4a778cf2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutor.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "isequencedtaskexecutor.h" +#include + +namespace vespalib +{ + +class ThreadStackExecutorBase; + +} + +namespace search +{ + +/** + * Class to run multiple tasks in parallel, but tasks with same + * id has to be run in sequence. + */ +class SequencedTaskExecutor : public ISequencedTaskExecutor +{ + std::vector> _executors; + vespalib::hash_map _ids; +public: + SequencedTaskExecutor(uint32_t threads); + + ~SequencedTaskExecutor(); + + virtual void executeTask(uint64_t id, + vespalib::Executor::Task::UP task) override; + + virtual void sync() override; +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h new file mode 100644 index 00000000000..ffc6ba7f55b --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sequencedtaskexecutorobserver.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "isequencedtaskexecutor.h" +#include + +namespace search +{ + +/** + * Observer class to observe class to run multiple tasks in parallel, + * but tasks with same id has to be run in sequence. + */ +class SequencedTaskExecutorObserver : public ISequencedTaskExecutor +{ + ISequencedTaskExecutor &_executor; + std::atomic _executeCnt; + std::atomic _syncCnt; +public: + SequencedTaskExecutorObserver(ISequencedTaskExecutor &executor) + : _executor(executor), + _executeCnt(0u), + _syncCnt(0u) + { + } + + virtual ~SequencedTaskExecutorObserver() { } + + virtual void executeTask(uint64_t id, + vespalib::Executor::Task::UP task) override { + ++_executeCnt; + _executor.executeTask(id, std::move(task)); + } + + virtual void sync() override { + ++_syncCnt; + _executor.sync(); + } + + uint32_t getExecuteCnt() const { return _executeCnt; } + uint32_t getSyncCnt() const { return _syncCnt; } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/serialnum.h b/searchlib/src/vespa/searchlib/common/serialnum.h new file mode 100644 index 00000000000..f71f10719d2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/serialnum.h @@ -0,0 +1,13 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +// This is a unique identification number. +typedef uint64_t SerialNum; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp new file mode 100644 index 00000000000..d9b575678d9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "serialnumfileheadercontext.h" +#include + + +namespace search +{ + +namespace common +{ + + +SerialNumFileHeaderContext::SerialNumFileHeaderContext( + const FileHeaderContext &parentFileHeaderContext, + SerialNum serialNum) + : FileHeaderContext(), + _parentFileHeaderContext(parentFileHeaderContext), + _serialNum(serialNum) +{ +} + + +void +SerialNumFileHeaderContext::addTags(vespalib::GenericHeader &header, + const vespalib::string &name) const +{ + _parentFileHeaderContext.addTags(header, name); + typedef vespalib::GenericHeader::Tag Tag; + if (_serialNum != 0u) + header.putTag(Tag("serialNum", _serialNum)); +} + +} // namespace common + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h new file mode 100644 index 00000000000..24969193347 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/serialnumfileheadercontext.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "fileheadercontext.h" +#include "serialnum.h" + +namespace search +{ + +namespace common +{ + +class SerialNumFileHeaderContext : public FileHeaderContext +{ + const FileHeaderContext &_parentFileHeaderContext; + SerialNum _serialNum; + +public: + SerialNumFileHeaderContext(const FileHeaderContext & + parentFileHeaderContext, + SerialNum serialNum); + + virtual void + addTags(vespalib::GenericHeader &header, + const vespalib::string &name) const; +}; + +} // namespace common + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/sort.cpp b/searchlib/src/vespa/searchlib/common/sort.cpp new file mode 100644 index 00000000000..9d43e98457d --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sort.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { + +bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]) +{ + // Accumulate cnt positions + bool sorted = (cnt[0]==n); + ptr[0] = 0; + for(unsigned int i(1); i<256; i++) { + ptr[i] = ptr[i-1] + cnt[i-1]; + sorted |= (cnt[i]==n); + } + memcpy(last, ptr, 256*sizeof(unsigned int)); + last[256] = last[255] + cnt[255]; + return sorted; +} + +} diff --git a/searchlib/src/vespa/searchlib/common/sort.h b/searchlib/src/vespa/searchlib/common/sort.h new file mode 100644 index 00000000000..231865321c0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sort.h @@ -0,0 +1,537 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + + +namespace search +{ + +bool radix_prepare(unsigned int n, unsigned int last[257], unsigned int ptr[256], unsigned int cnt[256]); + +template +void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) __attribute__ ((noinline)); + +template +void radix_sort_core(const unsigned int * last, T * a, unsigned int n, uint32_t * radixScratch, unsigned int shiftWidth) +{ + T temp, swap; + // Go through all permutation cycles until all + // elements are moved or found to be already in place + unsigned int ptr[256]; + unsigned int i, j, k; + memcpy(ptr, last, sizeof(ptr)); + i = 0; + unsigned int remain = n; + + while (remain > 0) { + // Find first uncompleted class + while (ptr[i] == last[i+1]) { + i++; + } + + // Grab first element to move + j = ptr[i]; + uint32_t swapK = radixScratch[j]; + k = (swapK >> shiftWidth) & 0xFF; + + // Swap into correct class until cycle completed + if (i != k) { + swap = a[j]; + do { + unsigned int t(ptr[k]); + temp = a[t]; + uint32_t tempK(radixScratch[t]); + radixScratch[t] = swapK; + a[t] = swap; + ptr[k]++; + swapK = tempK; + swap = temp; + k = (tempK >> shiftWidth) & 0xFF; + remain--; + } while (i!=k); + // Place last element in cycle + a[j] = swap; + radixScratch[j] = swapK; + } + ptr[k]++; + remain--; + } +} + +template +unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) __attribute__ ((noinline)); + +template +unsigned int radix_fetch(T *a, unsigned int n, uint32_t * radixScratch, GR R) +{ + unsigned int i = 0; + uint32_t usedBits = 0; + if (n > 3) { + for(; i < n - 3; i += 4) { + radixScratch[i + 0] = R(a[i + 0]); + radixScratch[i + 1] = R(a[i + 1]); + radixScratch[i + 2] = R(a[i + 2]); + radixScratch[i + 3] = R(a[i + 3]); + usedBits |= radixScratch[i + 0]; + usedBits |= radixScratch[i + 1]; + usedBits |= radixScratch[i + 2]; + usedBits |= radixScratch[i + 3]; + } + } + for(; i < n; i++) { + radixScratch[i] = R(a[i]); + usedBits |= radixScratch[i]; + } + if (usedBits != 0) { + int msb = vespalib::Optimized::msbIdx(usedBits); + return (msb+8) & ~0x7; + } + return 0; +} + +template +class AlwaysEof +{ +public: + bool operator () (const T &) const { return true; } + static bool alwaysEofOnCheck() { return true; } +}; + +template +bool radix_eof(const T *a, unsigned int n, ER E) __attribute__ ((noinline)); + +template +bool radix_eof(const T *a, unsigned int n, ER E) +{ + unsigned int i = 0; + bool eof(true); + if (n > 3) { + for(; eof && (i < n - 3); i += 4) { + eof = E(a[i + 0]) && + E(a[i + 1]) && + E(a[i + 2]) && + E(a[i + 3]); + } + } + for(; eof && (i < n); i++) { + eof = E(a[i]); + } + return eof; +} + +/** + * radix sort implementation. + * + * @param stackDepth recursion level reached; since radix_sort uses + * lots of stack we try another algorithm if this + * becomes too high. + * @param a Pointer to the start of the array to sort + * @param n number of data elements to sort + * @param radixScratch scratch area for upto 32bits of sorting data + * @param radixBits how many bits of sorting data radixScratch contains + * @param insertSortLevel when to fall back to simple insertion sort + **/ +template +void radix_sort(GR R, GE E, GRE EE, int stackDepth, + T * a, unsigned int n, + uint32_t *radixScratch, + int radixBits, + unsigned insertSortLevel=10, + unsigned int topn=std::numeric_limits::max()) +{ + if (((stackDepth > 20) && (radixBits == 0)) || (n < insertSortLevel)) { + // switch to simpler sort if few elements + if (n > 1) { + std::sort(a, a+n, E); + } + return; + } + + unsigned int last[257]; + unsigned int cnt[256]; + int shiftWidth = radixBits - 8; + for (bool allInOneBucket(true); allInOneBucket;) { + while ( radixBits == 0 ) { + // no data left in scratch buffer; fill up with upto 32 new bits + radixBits = radix_fetch(a, n, radixScratch, R); + if (radixBits == 0) { + if (EE.alwaysEofOnCheck() || radix_eof(a, n, EE)) { + // everything has reached end-of-string terminating zero, + // so we are done sorting. + return; + } + } + } + + shiftWidth = radixBits - 8; + memset(cnt, 0, sizeof(cnt)); + unsigned int i = 0; + if (n > 3) { + for(; i < n - 3; i += 4) { + cnt[(radixScratch[i + 0] >> shiftWidth) & 0xFF]++; + cnt[(radixScratch[i + 1] >> shiftWidth) & 0xFF]++; + cnt[(radixScratch[i + 2] >> shiftWidth) & 0xFF]++; + cnt[(radixScratch[i + 3] >> shiftWidth) & 0xFF]++; + } + } + for(; i < n; i++) { + cnt[(radixScratch[i] >> shiftWidth) & 0xFF]++; + } + + // Accumulate cnt positions + allInOneBucket = false; + last[0] = 0; + for(i = 1; (i < 257) && !allInOneBucket; i++) { + last[i] = last[i-1] + cnt[i-1]; + allInOneBucket = (cnt[i-1] == n); + } + + radixBits -= 8; + } + + radix_sort_core(last, a, n, radixScratch, shiftWidth); + + // Sort on next 8 bits of key + for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) { + const unsigned l(last[i]); + const unsigned c(cnt[i]); + if (c) { + if (c > insertSortLevel) { + radix_sort(R, E, EE, stackDepth + 1, &a[l], c, &radixScratch[l], radixBits, insertSortLevel, topn-sum); + } else { + std::sort(&a[l], &a[l]+c, E); + } + sum += c; + } + } +} + + +template +class ShiftBasedRadixSorterBase +{ +protected: + static void radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__((noinline)); + static void radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) __attribute__((noinline)); +}; + +template +void ShiftBasedRadixSorterBase::radix_fetch(GR R, unsigned int cnt[256], const T * a, unsigned int n) +{ + memset(cnt, 0, 256*sizeof(unsigned int)); + unsigned int p(0); + if (n > 3) { + for(; p < n - 3; p += 4) { + cnt[(R(a[p]) >> SHIFT) & 0xFF]++; + cnt[(R(a[p + 1]) >> SHIFT) & 0xFF]++; + cnt[(R(a[p + 2]) >> SHIFT) & 0xFF]++; + cnt[(R(a[p + 3]) >> SHIFT) & 0xFF]++; + } + } + for(; p < n; p++) { + cnt[(R(a[p]) >> SHIFT) & 0xFF]++; + } +} + + +template +void ShiftBasedRadixSorterBase::radix_sort_core(GR R, unsigned int ptr[256], unsigned int last[257], T * a, unsigned int n) +{ + // Go through all permutation cycles until all + // elements are moved or found to be already in place + unsigned int i(0), remain(n); + unsigned int j, k; + T temp, swap; + + while(remain>0) { + // Find first uncompleted class + while(ptr[i]==last[i+1]) { + i++; + } + + // Grab first element to move + j = ptr[i]; + k = (R(a[j]) >> SHIFT) & 0xFF; + + // Swap into correct class until cycle completed + if (i!=k) { + swap = a[j]; + do { + temp = a[ptr[k]]; + a[ptr[k]++] = swap; + k = (R(swap=temp) >> SHIFT) & 0xFF; + remain--; + } while (i!=k); + // Place last element in cycle + a[j] = swap; + } + ptr[k]++; + remain--; + } +} + +/** + * @param T the type of the object being sorted + * @param GR the functor used to fetch the number used for radix sorting. It must enure same sorting as GE. + * @param GE the functor used for testing if one object is orderers ahead of another. + * @param SHIFT is the number of significant bits in the radix - 8. Must a multiple of 8. + * @param continueAfterRadixEnds indicates if the radix only represents a prefix of the objects. If it is true we + * will continue using std::sort to order objects that have equal radix representation. + */ +template +class ShiftBasedRadixSorter : private ShiftBasedRadixSorterBase +{ +public: + static size_t radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits::max()); + static size_t radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn); +private: + typedef ShiftBasedRadixSorterBase Base; +}; + +template +size_t ShiftBasedRadixSorter::radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +{ + unsigned int last[257], ptr[256], cnt[256]; + unsigned int sum(n); + + Base::radix_fetch(R, cnt, a, n); + + bool sorted = radix_prepare(n, last, ptr, cnt); + + if (!sorted) { + Base::radix_sort_core(R, ptr, last, a, n); + } else { + return ShiftBasedRadixSorter::radix_sort_internal(R, E, a, n, insertSortLevel, topn); + } + + if (SHIFT>0 || continueAfterRadixEnds) { + // Sort on next key + sum = 0; + for(unsigned i(0); (i<256) && (sum < topn); i++) { + const unsigned int c(cnt[i]); + const unsigned int l(last[i]); + if (c) { + if (c>insertSortLevel) { + sum += ShiftBasedRadixSorter::radix_sort_internal(R, E, &a[l], c, insertSortLevel, topn-sum); + } else { + std::sort(a+l, a+l+c, E); + sum += c; + } + } + } + } + return sum; +} + + +template +size_t ShiftBasedRadixSorter::radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +{ + if (n > insertSortLevel) { + return radix_sort_internal(R, E, a, n, insertSortLevel, topn); + } else if (n > 1) { + std::sort(a, a + n, E); + } + return n; +} + +template +class ShiftBasedRadixSorter { +public: + static size_t radix_sort_internal(B, C, A *, unsigned int, unsigned int, unsigned int) { + return 0; + } +}; + +template +class ShiftBasedRadixSorter { +public: + static size_t radix_sort_internal(B, C E, A * v, unsigned int sz, unsigned int, unsigned int) { + std::sort(v, v + sz, E); + return sz; + } +}; + +template +class NumericRadixSorter +{ +public: + typedef vespalib::convertForSort C; + class RadixSortable { + public: + typename C::UIntType operator () (typename C::InputType v) const { return C::convert(v); } + }; + void operator() (T * start, size_t sz, unsigned topn = std::numeric_limits::max()) const { + if (sz > 16) { + ShiftBasedRadixSorter::radix_sort_internal(RadixSortable(), typename C::Compare(), start, sz, 16, topn); + } else { + std::sort(start, start + sz, typename C::Compare()); + } + } +}; + +template +void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) __attribute__ ((noinline)); + +template +void radix_fetch2(GR R, unsigned int cnt[256], const T * a, unsigned int n) +{ + memset(cnt, 0, 256*sizeof(unsigned int)); + unsigned int p(0); + if (n > 3) { + for(; p < n - 3; p += 4) { + cnt[R(a[p + 0], IDX)]++; + cnt[R(a[p + 1], IDX)]++; + cnt[R(a[p + 2], IDX)]++; + cnt[R(a[p + 3], IDX)]++; + } + } + for(; p < n; p++) { + cnt[R(a[p], IDX)]++; + } +} + +template +void radix_sort_internal(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel, unsigned int topn) +{ + unsigned int last[257], ptr[256], cnt[256]; + + radix_fetch2(R, cnt, a, n); + + bool sorted = radix_prepare(n, last, ptr, cnt); + + if (!sorted) { + // Go through all permutation cycles until all + // elements are moved or found to be already in place + unsigned int i(0), remain(n); + unsigned int j, k; + T temp, swap; + + while(remain>0) { + // Find first uncompleted class + while(ptr[i]==last[i+1]) { + i++; + } + + // Grab first element to move + j = ptr[i]; + k = R(a[j], LEN-POS); + + // Swap into correct class until cycle completed + if (i!=k) { + swap = a[j]; + do { + temp = a[ptr[k]]; + a[ptr[k]++] = swap; + k = R(swap=temp, LEN-POS); + remain--; + } while (i!=k); + // Place last element in cycle + a[j] = swap; + } + ptr[k]++; + remain--; + } + } else { + radix_sort_internal(R, E, a, n, insertSortLevel, topn); + return; + } + + if (LEN>0) { + // Sort on next key + for(unsigned i(0), sum(0); (i<256) && (sum < topn); i++) { + const unsigned int c(cnt[i]); + const unsigned int l(last[i]); + if (c) { + if (c>insertSortLevel) { + radix_sort_internal(R, E, &a[l], c, insertSortLevel, topn-sum); + } else { + std::sort(a+l, a+l+c, E); + } + sum += c; + } + } + } +} + + +template +void radix_sort(GR R, GE E, T * a, unsigned int n, unsigned int insertSortLevel=10, unsigned int topn=std::numeric_limits::max()) +{ + if (n > insertSortLevel) { + radix_sort_internal(R, E, a, n, insertSortLevel, + topn); + } else if (n > 1) { + std::sort(a, a + n, E); + } +} + + +template +void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) __attribute__ ((noinline)); + +template +void radix_stable_core(GR R, unsigned int ptr[256], const T * a, T * b, unsigned int n) +{ + unsigned int k; + for (unsigned int i(0); i < n; i++) { + k = (R(a[i]) >> SHIFT) & 0xFF; + b[ptr[k]] = a[i]; + ptr[k]++; + } +} + +template +T * radix_stable_sort_internal(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10) +{ + unsigned int last[257], ptr[256], cnt[256]; + + radix_fetch(R, cnt, a, n); + + bool sorted = radix_prepare(n, last, ptr, cnt); + + if (!sorted) { + radix_stable_core(R, ptr, a, b, n); + } else { + return radix_stable_sort_internal(R, E, a, b, n, insertSortLevel); + } + + if (SHIFT>0) { + // Sort on next key + for(unsigned i(0); i<256 ; i++) { + const unsigned int c(cnt[i]); + const unsigned int l(last[i]); + if (c>insertSortLevel) { + const T * r = radix_stable_sort_internal(R, E, &b[l], &a[l], c, insertSortLevel); + if (r != &b[l]) { + memcpy(&b[l], &a[l], c*sizeof(*r)); + } + } else { + if (c>1) { + std::stable_sort(b+l, b+l+c, E); + } + } + } + } + return b; +} + +template +T* radix_stable_sort(GR R, GE E, T * a, T * b, unsigned int n, unsigned int insertSortLevel=10) +{ + if (n > insertSortLevel) { + return radix_stable_sort_internal(R, E, a, b, n, + insertSortLevel); + } else if (n > 1) { + std::stable_sort(a, a + n, E); + } + return a; +} + +} + diff --git a/searchlib/src/vespa/searchlib/common/sortdata.cpp b/searchlib/src/vespa/searchlib/common/sortdata.cpp new file mode 100644 index 00000000000..a0923611b4e --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortdata.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include + +namespace search { +namespace common { + +uint32_t +SortData::GetSize(uint32_t hitcnt, + const uint32_t *sortIndex) +{ + if (hitcnt == 0) + return 0; + return ((hitcnt + 1) * sizeof(uint32_t) + + (sortIndex[hitcnt] - sortIndex[0])); +} + + +bool +SortData::Equals(uint32_t hitcnt, + const uint32_t *sortIndex_1, + const char *sortData_1, + const uint32_t *sortIndex_2, + const char *sortData_2) +{ + if (hitcnt == 0) + return true; + uint32_t diff = sortIndex_2[0] - sortIndex_1[0]; + for (uint32_t i = 1; i <= hitcnt; i++) { + if (diff != (sortIndex_2[i] - sortIndex_1[i])) + return false; + } + assert((sortIndex_1[hitcnt] - sortIndex_1[0]) == + (sortIndex_2[hitcnt] - sortIndex_2[0])); + return (memcmp(sortData_1 + sortIndex_1[0], + sortData_2 + sortIndex_2[0], + sortIndex_1[hitcnt] - sortIndex_1[0]) == 0); +} + + +void +SortData::Copy(uint32_t hitcnt, + uint32_t *sortIndex_dst, + char *sortData_dst, + const uint32_t *sortIndex_src, + const char *sortData_src) +{ + if (hitcnt == 0) + return; + uint32_t diff = sortIndex_dst[0] - sortIndex_src[0]; + for (uint32_t i = 1; i <= hitcnt; i++) { + sortIndex_dst[i] = sortIndex_src[i] + diff; + } + assert((sortIndex_dst[hitcnt] - sortIndex_dst[0]) == + (sortIndex_src[hitcnt] - sortIndex_src[0])); + memcpy(sortData_dst + sortIndex_dst[0], + sortData_src + sortIndex_src[0], + sortIndex_dst[hitcnt] - sortIndex_dst[0]); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/common/sortdata.h b/searchlib/src/vespa/searchlib/common/sortdata.h new file mode 100644 index 00000000000..186e534ad5b --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortdata.h @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { +namespace common { + +class SortData +{ +public: + struct Ref + { + const char *_buf; + uint32_t _len; + }; + + static uint32_t GetSize(uint32_t hitcnt, + const uint32_t *sortIndex); + + static bool Equals(uint32_t hitcnt, + const uint32_t *sortIndex_1, + const char *sortData_1, + const uint32_t *sortIndex_2, + const char *sortData_2); + + // NB: first element of sortIndex_dst must be set + static void Copy(uint32_t hitcnt, + uint32_t *sortIndex_dst, + char *sortData_dst, + const uint32_t *sortIndex_src, + const char *sortData_src); +}; + + +class SortDataIterator +{ +private: + const uint32_t *_ofs; + const uint32_t *_ofs_end; + const char *_data; + const char *_buf; + uint32_t _len; + +public: + SortDataIterator() + : _ofs(NULL), _ofs_end(NULL), _data(NULL), + _buf(NULL), _len(0) {} + + void Next() + { + if (_ofs >= _ofs_end) { + _buf = NULL; + _len = 0; + return; + } + uint32_t tmp = *_ofs++; + _buf = _data + tmp; + // NB: *_ofs_end is a valid index entry + _len = *_ofs - tmp; + } + + void Init(uint32_t cnt, + const uint32_t *idx, + const char *data) + { + _ofs = idx; + _ofs_end = idx + cnt; + _data = data; + _buf = NULL; + _len = 0; + Next(); + } + + uint32_t GetLen() const { return _len; } + const char *GetBuf() const { return _buf; } + bool Before(SortDataIterator *other, bool beforeOnMatch = false) + { + uint32_t tlen = GetLen(); + uint32_t olen = other->GetLen(); + uint32_t mlen = (tlen <= olen) ? tlen : olen; + + if (mlen == 0) + return (tlen != 0 || beforeOnMatch); + + int res = memcmp(GetBuf(), other->GetBuf(), mlen); + + if (res != 0) + return (res < 0); + return (tlen < olen || (tlen == olen && beforeOnMatch)); + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp new file mode 100644 index 00000000000..c58f15a8372 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -0,0 +1,507 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "sortresults.h" +#include +#include +#include +#include +#include +LOG_SETUP(".search.attribute.sortresults"); + +using search::RankedHit; +using search::common::SortSpec; +using search::common::SortInfo; +using search::attribute::IAttributeContext; +using search::attribute::IAttributeVector; + +namespace { + +template +class RadixHelper +{ +public: + typedef vespalib::convertForSort C; + inline typename C::UIntType + operator()(typename C::InputType v) const + { + return C::convert(v); + } +}; + +} // namespace + + +inline void +FastS_insertion_sort(RankedHit a[], uint32_t n) +{ + uint32_t i, j; + RankedHit swap; + typedef RadixHelper RT; + RT R; + + for (i=1; i R(a[j-1]._rankValue)) + { + a[j] = a[j-1]; + if (!(--j)) break;; + } + a[j] = swap; + } +} + + +template +void +FastS_radixsort(RankedHit a[], uint32_t n, uint32_t ntop) +{ + uint32_t last[256], ptr[256], cnt[256]; + uint32_t sorted, remain; + uint32_t i, j, k; + RankedHit temp, swap; + typedef RadixHelper RT; + RT R; + + memset(cnt, 0, 256*sizeof(uint32_t)); + // Count occurrences [NB: will fail with n < 3] + for(i = 0; i < n - 3; i += 4) { + FastOS_Prefetch::NT(((char *)(&a[i])) + PREFETCH); + cnt[(R(a[i]._rankValue) >> SHIFT) & 0xFF]++; + cnt[(R(a[i + 1]._rankValue) >> SHIFT) & 0xFF]++; + cnt[(R(a[i + 2]._rankValue) >> SHIFT) & 0xFF]++; + cnt[(R(a[i + 3]._rankValue) >> SHIFT) & 0xFF]++; + } + for(; i < n; i++) + cnt[(R(a[i]._rankValue) >> SHIFT) & 0xFF]++; + + // Accumulate cnt positions + sorted = (cnt[0]==n); + ptr[0] = n-cnt[0]; + last[0] = n; + for(i=1; i<256; i++) + { + ptr[i] = (last[i]=ptr[i-1]) - cnt[i]; + sorted |= (cnt[i]==n); + } + + if (!sorted) + { + // Go through all permutation cycles until all + // elements are moved or found to be already in place + i = 255; + remain = n; + + while(remain>0) + { + // Find first uncompleted class + while(ptr[i]==last[i]) + { + i--; + } + + // Stop if top candidates in place + if (last[i]-cnt[i]>=ntop) break; + + // Grab first element to move + j = ptr[i]; + swap = a[j]; + k = (R(swap._rankValue) >> SHIFT) & 0xFF; + + // Swap into correct class until cycle completed + if (i!=k) + { + do + { + temp = a[ptr[k]]; + a[ptr[k]++] = swap; + k = (R((swap = temp)._rankValue) >> SHIFT) & 0xFF; + remain--; + } while (i!=k); + // Place last element in cycle + a[j] = swap; + } + ptr[k]++; + remain--; + } + } else { + FastS_radixsort(a, n, ntop); + return; + } + + if (SHIFT>0) + { + // Sort on next key + for(i=0; i<256 ; i++) + if ((last[i]-cnt[i])INSERT_SORT_LEVEL) { + if (last[i](&a[last[i]-cnt[i]], cnt[i], + cnt[i]); + } else { + FastS_radixsort(&a[last[i]-cnt[i]], cnt[i], + cnt[i]+ntop-last[i]); + } + } else if (cnt[i]>1) { + FastS_insertion_sort(&a[last[i]-cnt[i]], cnt[i]); + } + } + } +} +template<> +void +FastS_radixsort<-8>(RankedHit *, uint32_t, uint32_t) {} + +void +FastS_SortResults(RankedHit a[], uint32_t n, uint32_t ntop) +{ + if (n > INSERT_SORT_LEVEL) { + FastS_radixsort(a, n, ntop); + } else { + FastS_insertion_sort(a, n); + } +} + +//----------------------------------------------------------------------------- + +FastS_DefaultResultSorter FastS_DefaultResultSorter::__instance; + +//----------------------------------------------------------------------------- + +FastS_DocIdResultSorter FastS_DocIdResultSorter::__instance; + +//----------------------------------------------------------------------------- + +bool +FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo) +{ + if (sInfo._field.empty()) + return false; + + uint32_t type = ASC_VECTOR; + const IAttributeVector * vector(NULL); + + if ((sInfo._field.size() == 6) && (sInfo._field == "[rank]")) { + type = (sInfo._ascending) ? ASC_RANK : DESC_RANK; + } else if ((sInfo._field.size() == 7) && (sInfo._field == "[docid]")) { + type = (sInfo._ascending) ? ASC_DOCID : DESC_DOCID; + } else { + type = (sInfo._ascending) ? ASC_VECTOR : DESC_VECTOR; + vector = vecMan.getAttribute(sInfo._field); + if ( !vector || vector->hasMultiValue()) { + const char * err = "OK"; + if ( !vector ) { + err = "not valid"; + } else if ( vector->hasMultiValue()) { + err = "multivalued"; + } + LOG(warning, "Attribute vector '%s' is %s. Skipped in sorting", sInfo._field.c_str(), err); + return false; + } + } + + LOG(spam, "SortSpec: adding vector (%s)'%s'", + (sInfo._ascending) ? "+" : "-", sInfo._field.c_str()); + + _vectors.push_back(VectorRef(type, vector, sInfo._converter.get())); + + return true; +} + +uint8_t * +FastS_SortSpec::realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData) +{ + // realloc + variableWidth *= 2; + available += variableWidth * n; + dataSize += variableWidth * n; + uint32_t byteUsed = mySortData - &_binarySortData[0]; + _binarySortData.resize(dataSize); + return &_binarySortData[0] + byteUsed; +} + +void +FastS_SortSpec::initSortData(const RankedHit *hits, uint32_t n) +{ + freeSortData(); + size_t fixedWidth = 0; + size_t variableWidth = 0; + for (auto iter = _vectors.begin(); iter != _vectors.end(); ++iter) { + if (iter->_type >= ASC_DOCID) { // doc id + fixedWidth += 4; + }else if (iter->_type >= ASC_RANK) { // rank value + fixedWidth += sizeof(search::HitRank); + } else { + size_t numBytes = iter->_vector->getFixedWidth(); + if (numBytes == 0) { // string + variableWidth += 11; + } else if (!iter->_vector->hasMultiValue()) { + fixedWidth += numBytes; + } + } + } + uint32_t dataSize = (fixedWidth + variableWidth) * n; + uint32_t available = dataSize; + _binarySortData.resize(dataSize); + uint8_t *mySortData = &_binarySortData[0]; + + _sortDataArray.resize(n); + + for (uint32_t i(0), idx(0); (i < n) && !_doom.doom(); ++i) { + uint32_t len = 0; + for (auto iter = _vectors.begin(); iter != _vectors.end(); ++iter) { + int written(0); + if (available < std::max(sizeof(hits->_docId), sizeof(hits->_rankValue))) { + mySortData = realloc(n, variableWidth, available, dataSize, mySortData); + } + do { + switch (iter->_type) { + case ASC_DOCID: + vespalib::serializeForSort >(hits[i].getDocId(), mySortData); + written = sizeof(hits->_docId); + break; + case DESC_DOCID: + vespalib::serializeForSort >(hits[i].getDocId(), mySortData); + written = sizeof(hits->_docId); + break; + case ASC_RANK: + vespalib::serializeForSort >(hits[i]._rankValue, mySortData); + written = sizeof(hits->_rankValue); + break; + case DESC_RANK: + vespalib::serializeForSort >(hits[i]._rankValue, mySortData); + written = sizeof(hits->_rankValue); + break; + case ASC_VECTOR: + written = iter->_vector->serializeForAscendingSort(hits[i].getDocId(), mySortData, available, iter->_converter); + break; + case DESC_VECTOR: + written = iter->_vector->serializeForDescendingSort(hits[i].getDocId(), mySortData, available, iter->_converter); + break; + } + if (written == -1) { + mySortData = realloc(n, variableWidth, available, dataSize, mySortData); + } + } while(written == -1); + available -= written; + mySortData += written; + len += written; + } + SortData & sd = _sortDataArray[i]; + sd._docId = hits[i]._docId; + sd._rankValue = hits[i]._rankValue; + sd._idx = idx; + sd._len = len; + sd._pos = 0; + idx += len; + } +} + + +FastS_SortSpec::FastS_SortSpec(const vespalib::Doom & doom, int method) : + _doom(doom), + _method(method), + _sortSpec(), + _vectors() +{ +} + + +FastS_SortSpec::~FastS_SortSpec() +{ + freeSortData(); +} + + +bool +FastS_SortSpec::Init(const vespalib::string & sortStr, IAttributeContext & vecMan) +{ + LOG(spam, "sortStr = %s", sortStr.c_str()); + bool retval(true); + try { + _sortSpec = SortSpec(sortStr); + for (SortSpec::const_iterator it(_sortSpec.begin()), mt(_sortSpec.end()); retval && (it < mt); it++) { + retval = Add(vecMan, *it); + } + } catch (const std::exception & e) { + LOG(warning, "Failed parsing sortspec: %s", sortStr.c_str()); + return retval; + } + + return retval; +} + + +uint32_t +FastS_SortSpec::getSortDataSize(uint32_t offset, uint32_t n) +{ + uint32_t size = 0; + for (uint32_t i = offset; i < (offset + n); ++i) { + size += _sortDataArray[i]._len; + } + return size; +} + +void +FastS_SortSpec::copySortData(uint32_t offset, uint32_t n, + uint32_t *idx, char *buf) +{ + const uint8_t * sortData = &_binarySortData[0]; + uint32_t totalLen = 0; + for (uint32_t i = offset; i < (offset + n); ++i, ++idx) { + const uint8_t * src = sortData + _sortDataArray[i]._idx; + uint32_t len = _sortDataArray[i]._len; + memcpy(buf, src, len); + buf += len; + *idx = totalLen; + totalLen += len; + } + *idx = totalLen; // end of data index entry +} + +void +FastS_SortSpec::freeSortData() +{ + { + BinarySortData tmp; + _binarySortData.swap(tmp); + } + { + SortDataArray tmp; + _sortDataArray.swap(tmp); + } +} + +bool +FastS_SortSpec::hasSortData() const +{ + return ! _binarySortData.empty() && ! _sortDataArray.empty(); +} + +void +FastS_SortSpec::initWithoutSorting(const RankedHit * hits, uint32_t hitCnt) +{ + initSortData(hits, hitCnt); +} + +inline int +FastS_SortSpec::Compare(const FastS_SortSpec *self, const SortData &a, + const SortData &b) +{ + const uint8_t * ref = &(self->_binarySortData[0]); + uint32_t len = a._len < b._len ? a._len : b._len; + int retval = memcmp(ref + a._idx, + ref + b._idx, len); + if (retval < 0) { + return -1; + } else if (retval > 0) { + return 1; + } + return 0; +} + +template +inline T * +FastS_median3(T *a, T *b, T *c, Compare *compobj) +{ + return Compare::Compare(compobj, *a, *b) < 0 ? + (Compare::Compare(compobj, *b, *c) < 0 ? b : Compare::Compare(compobj, + *a, *c) < 0 ? c : a) : + (Compare::Compare(compobj, *b, *c) > 0 ? b : Compare::Compare(compobj, + *a, *c) > 0 ? c : a); +} + + +template +void +FastS_insertion_sort(T a[], uint32_t n, Compare *compobj) +{ + uint32_t i, j; + T swap; + + for (i=1; i +{ +public: + StdSortDataCompare(const uint8_t * s) : _sortSpec(s) { } + bool operator() (const FastS_SortSpec::SortData & x, const FastS_SortSpec::SortData & y) const { + return cmp(x, y) < 0; + } + int cmp(const FastS_SortSpec::SortData & a, const FastS_SortSpec::SortData & b) const { + uint32_t len = std::min(a._len, b._len); + int retval = memcmp(_sortSpec + a._idx, _sortSpec + b._idx, len); + return retval ? retval : a._len - b._len; + } +private: + const uint8_t * _sortSpec; +}; + +class SortDataRadix +{ +public: + SortDataRadix(const uint8_t * s) : _data(s) { } + uint32_t operator () (FastS_SortSpec::SortData & a) const { + uint32_t r(0); + uint32_t left(a._len - a._pos); + switch (left) { + default: + case 4: + r |= _data[a._idx + a._pos + 3] << 0; + case 3: + r |= _data[a._idx + a._pos + 2] << 8; + case 2: + r |= _data[a._idx + a._pos + 1] << 16; + case 1: + r |= _data[a._idx + a._pos + 0] << 24; + case 0: + ; + } + a._pos += std::min(4u, left); + return r; + } +private: + const uint8_t * _data; +}; + +class SortDataEof +{ +public: + bool operator () (const FastS_SortSpec::SortData & a) const { return a._pos >= a._len; } + static bool alwaysEofOnCheck() { return false; } +}; + + +void +FastS_SortSpec::sortResults(RankedHit a[], uint32_t n, uint32_t topn) +{ + initSortData(a, n); + SortData * sortData = &_sortDataArray[0]; + if (_method == 0) { + search::qsort<7, 40, SortData, FastS_SortSpec>(sortData, n, this); + } else if (_method == 1) { + std::sort(sortData, sortData + n, StdSortDataCompare(&_binarySortData[0])); + } else { + vespalib::Array radixScratchPad(n); + search::radix_sort(SortDataRadix(&_binarySortData[0]), StdSortDataCompare(&_binarySortData[0]), SortDataEof(), 1, sortData, n, &radixScratchPad[0], 0, 96, topn); + } + for (uint32_t i(0), m(_sortDataArray.size()); i < m; ++i) { + a[i]._rankValue = _sortDataArray[i]._rankValue; + a[i]._docId = _sortDataArray[i]._docId; + } +} diff --git a/searchlib/src/vespa/searchlib/common/sortresults.h b/searchlib/src/vespa/searchlib/common/sortresults.h new file mode 100644 index 00000000000..8da643411a0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortresults.h @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define PREFETCH 64 +#define INSERT_SORT_LEVEL 80 + +/** + * Sort the given array of results. + * + * @param a the array of hits + * @param n the number of hits + * @param ntop the number of hits needed in correct order + **/ +void FastS_SortResults(search::RankedHit a[], + unsigned int n, unsigned int ntop); + +//----------------------------------------------------------------------------- + +struct FastS_IResultSorter { + /** + * Destructor. No cleanup needed for base class. + */ + virtual ~FastS_IResultSorter(void) {} + + /** + * @return should bitvector hits also be sorted? + **/ + virtual bool completeSort() const = 0; + + /** + * Sort the given array of results. + * + * @param a the array of hits + * @param n the number of hits + * @param ntop the number of hits needed in correct order + **/ + virtual void sortResults(search::RankedHit a[], uint32_t n, + uint32_t ntop) = 0; +}; + +//----------------------------------------------------------------------------- + +class FastS_DefaultResultSorter : public FastS_IResultSorter +{ +private: + static FastS_DefaultResultSorter __instance; + +public: + static FastS_DefaultResultSorter *instance() { return &__instance; } + virtual bool completeSort() const { return false; } + virtual void sortResults(search::RankedHit a[], uint32_t n, + uint32_t ntop) + { + return FastS_SortResults(a, n, ntop); + } +}; + +//----------------------------------------------------------------------------- + +class FastS_DocIdResultSorter : public FastS_IResultSorter +{ +private: + static FastS_DocIdResultSorter __instance; + +public: + static FastS_DocIdResultSorter *Instance() { return &__instance; } + virtual bool completeSort() const { return true; } + virtual void sortResults(search::RankedHit[], uint32_t, uint32_t) { + // already sorted on docid + } +}; + +//----------------------------------------------------------------------------- + +class FastS_SortSpec : public FastS_IResultSorter, public vespalib::noncopyable +{ +private: + friend class MultilevelSortTest; +public: + enum { + ASC_VECTOR = 0, + DESC_VECTOR = 1, + ASC_RANK = 2, + DESC_RANK = 3, + ASC_DOCID = 4, + DESC_DOCID = 5 + }; + + struct VectorRef + { + VectorRef(uint32_t type, const search::attribute::IAttributeVector * vector, const search::common::BlobConverter *converter) + : _type(type), + _vector(vector), + _converter(converter) + { + } + uint32_t _type; + const search::attribute::IAttributeVector *_vector; + const search::common::BlobConverter *_converter; + }; + + struct SortData : public search::RankedHit + { + uint32_t _idx; + uint32_t _len; + uint32_t _pos; + }; + +private: + typedef std::vector VectorRefList; + typedef vespalib::AutoAlloc<0x800000> Alloc; + typedef vespalib::Array BinarySortData; + typedef vespalib::Array SortDataArray; + vespalib::Doom _doom; + int _method; + search::common::SortSpec _sortSpec; + VectorRefList _vectors; + BinarySortData _binarySortData; + SortDataArray _sortDataArray; + + bool Add(search::attribute::IAttributeContext & vecMan, const search::common::SortInfo & sInfo); + void initSortData(const search::RankedHit *a, uint32_t n); + uint8_t * realloc(uint32_t n, size_t & variableWidth, uint32_t & available, uint32_t & dataSize, uint8_t *mySortData); + +public: + FastS_SortSpec(const vespalib::Doom & doom, int method=2); + virtual ~FastS_SortSpec(); + + std::pair getSortRef(size_t i) const { + return std::pair((const char*)(&_binarySortData[0] + _sortDataArray[i]._idx), + _sortDataArray[i]._len); + } + bool Init(const vespalib::string & sortSpec, search::attribute::IAttributeContext & vecMan); + virtual bool completeSort() const { return true; } + virtual void sortResults(search::RankedHit a[], uint32_t n, uint32_t topn); + uint32_t getSortDataSize(uint32_t offset, uint32_t n); + void copySortData(uint32_t offset, uint32_t n, uint32_t *idx, char *buf); + void freeSortData(); + bool hasSortData() const; + void initWithoutSorting(const search::RankedHit * hits, + uint32_t hitCnt); + static int Compare(const FastS_SortSpec *self, const SortData &a, const SortData &b); +}; + +//----------------------------------------------------------------------------- + diff --git a/searchlib/src/vespa/searchlib/common/sortspec.cpp b/searchlib/src/vespa/searchlib/common/sortspec.cpp new file mode 100644 index 00000000000..b522d76ebaa --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortspec.cpp @@ -0,0 +1,180 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +LOG_SETUP(".search.common.sortspec"); + +namespace search { +namespace common { + +using vespalib::ConstBufferRef; +using vespalib::make_string; + +ConstBufferRef PassThroughConverter::onConvert(const ConstBufferRef & src) const +{ + return src; +} + +LowercaseConverter::LowercaseConverter() : + _buffer() +{ +} + +ConstBufferRef LowercaseConverter::onConvert(const ConstBufferRef & src) const +{ + _buffer.clear(); + vespalib::stringref input((const char *)src.data(), src.size()); + vespalib::Utf8Reader r(input); + vespalib::Utf8Writer w(_buffer); + while (r.hasMore()) { + ucs4_t c = r.getChar(0xFFFD); + c = Fast_NormalizeWordFolder::ToFold(c); + w.putChar(c); + } + return ConstBufferRef(_buffer.begin(), _buffer.size()); +} + +namespace { + vespalib::Lock _GlobalDirtyICUThreadSafeLock; +} + +UcaConverter::UcaConverter(const vespalib::string & locale, const vespalib::string & strength) : + _buffer(), + _u16Buffer(128), + _collator() +{ + UErrorCode status = U_ZERO_ERROR; + Collator *coll(NULL); + { + vespalib::LockGuard guard(_GlobalDirtyICUThreadSafeLock); + coll = Collator::createInstance(icu::Locale(locale.c_str()), status); + } + if(U_SUCCESS(status)) { + _collator.reset(coll); + if (strength.empty()) { + _collator->setStrength(Collator::PRIMARY); + } else if (strength == "PRIMARY") { + _collator->setStrength(Collator::PRIMARY); + } else if (strength == "SECONDARY") { + _collator->setStrength(Collator::SECONDARY); + } else if (strength == "TERTIARY") { + _collator->setStrength(Collator::TERTIARY); + } else if (strength == "QUATERNARY") { + _collator->setStrength(Collator::QUATERNARY); + } else if (strength == "IDENTICAL") { + _collator->setStrength(Collator::IDENTICAL); + } else { + throw std::runtime_error("Illegal uca collation strength : " + strength); + } + } else { + delete coll; + throw std::runtime_error("Failed Collator::createInstance(Locale(locale.c_str()), status) with locale : " + locale); + } +} + +int UcaConverter::utf8ToUtf16(const ConstBufferRef & src) const +{ + UErrorCode status = U_ZERO_ERROR; + int32_t u16Wanted(0); + u_strFromUTF8(&_u16Buffer[0], _u16Buffer.size(), &u16Wanted, static_cast(src.data()), -1, &status); + if (U_SUCCESS(status)) { + } else if (status == U_INVALID_CHAR_FOUND) { + LOG(warning, "ICU was not able to convert the %ld alleged utf8 characters'%s' to utf16", src.size(), src.c_str()); + } else if (status == U_BUFFER_OVERFLOW_ERROR) { + //Ignore as this is handled on the outside. + } else { + LOG(warning, "ICU made a undefined complaint(%d) about the %ld alleged utf8 characters'%s' to utf16", status, src.size(), src.c_str()); + } + return u16Wanted; +} + +ConstBufferRef UcaConverter::onConvert(const ConstBufferRef & src) const +{ + int32_t u16Wanted(utf8ToUtf16(src)); + if (u16Wanted > (int)_u16Buffer.size()) { + _u16Buffer.resize(u16Wanted); + u16Wanted = utf8ToUtf16(src); + } + int wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz()); + _buffer.check(); + if (wanted > _buffer.siz()) { + _buffer.reserve(wanted); + wanted = _collator->getSortKey(&_u16Buffer[0], u16Wanted, _buffer.ptr(), _buffer.siz()); + _buffer.check(); + } + return ConstBufferRef(_buffer.ptr(), wanted); +} + +SortSpec::SortSpec(const vespalib::string & spec) : + _spec(spec) +{ + for (const char *pt(spec.c_str()), *mt(spec.c_str() + spec.size()); pt < mt;) { + for (; pt < mt && *pt != '+' && *pt != '-'; pt++); + if (pt != mt) { + bool ascending = (*pt++ == '+'); + const char *vectorName = pt; + for (;pt < mt && *pt != ' '; pt++); + vespalib::string funcSpec(vectorName, pt - vectorName); + const char * func = funcSpec.c_str(); + const char *p = func; + const char *e = func+funcSpec.size(); + for(; (p < e) && (*p != '('); p++); + if (*p == '(') { + if (strncmp(func, "uca", std::min(3l, p-func)) == 0) { + p++; + const char * attrName = p; + for(; (p < e) && (*p != ','); p++); + if (*p == ',') { + vespalib::string attr(attrName, p-attrName); + p++; + const char *localeName = p; + for(; (p < e) && (*p != ')') && (*p != ','); p++); + if (*p == ',') { + vespalib::string locale(localeName, p-localeName); + p++; + const char *strengthName = p; + for(; (p < e) && (*p != ')'); p++); + if (*p == ')') { + vespalib::string strength(strengthName, p - strengthName); + push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, strength)))); + } else { + throw std::runtime_error(make_string("Missing ')' at %s attr=%s locale=%s strength=%s", p, attr.c_str(), localeName, strengthName)); + } + } else if (*p == ')') { + vespalib::string locale(localeName, p-localeName); + push_back(SortInfo(attr, ascending, BlobConverter::SP(new UcaConverter(locale, "")))); + } else { + throw std::runtime_error(make_string("Missing ')' or ',' at %s attr=%s locale=%s", p, attr.c_str(), localeName)); + } + } else { + throw std::runtime_error(make_string("Missing ',' at %s", p)); + } + } else if (strncmp(func, "lowercase", std::min(9l, p-func)) == 0) { + p++; + const char * attrName = p; + for(; (p < e) && (*p != ')'); p++); + if (*p == ')') { + vespalib::string attr(attrName, p-attrName); + push_back(SortInfo(attr, ascending, BlobConverter::SP(new LowercaseConverter()))); + } else { + throw std::runtime_error("Missing ')'"); + } + } else { + throw std::runtime_error("Unknown func " + vespalib::string(func, p-func)); + } + } else { + push_back(SortInfo(funcSpec, ascending, BlobConverter::SP(NULL))); + } + } + } +} + +} +} diff --git a/searchlib/src/vespa/searchlib/common/sortspec.h b/searchlib/src/vespa/searchlib/common/sortspec.h new file mode 100644 index 00000000000..bfa6a064105 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/sortspec.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace common { + +struct SortInfo { + SortInfo(const vespalib::string & field, bool ascending, const BlobConverter::SP & converter) : _field(field), _ascending(ascending), _converter(converter) { } + vespalib::string _field; + bool _ascending; + BlobConverter::SP _converter; +}; + +class SortSpec : public std::vector +{ +public: + SortSpec() : _spec() { } + SortSpec(const vespalib::string & spec); + const vespalib::string & getSpec() const { return _spec; } +private: + vespalib::string _spec; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/common/transport.h b/searchlib/src/vespa/searchlib/common/transport.h new file mode 100644 index 00000000000..9b4f2ecb5c2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/transport.h @@ -0,0 +1,401 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#pragma once + + +#include + +namespace search +{ + +namespace fs4transport +{ + +/** + * Instead of using a 32-bit number to send the 'usehardware' flag, we + * now use this 32-bit number to send 32 flags. The currently defined flags + * are as follows: + *

    + *
  • QFLAG_ALLOW_ERRORPACKET: Allow an error packet to be sent as + * response to this query packet.
  • + *
  • QFLAG_REPORT_QUEUELEN: Send an extra queue length packet before + * query result packets.
  • + *
  • QFLAG_ESTIMATE: Indicates that the query is performed to get + * an estimate of the total number of hits
  • + *
  • QFLAG_DUMP_FEATURES: Dump detailed ranking information. Note that + * this flag will only be considered when sent in a + * GETDOCSUMSX packet. Is is put here to avoid having + * 2 separate query related flag spaces
  • + *
  • QFLAG_DROP_SORTDATA: Don't return any sort data even if sortspec + * is used.
  • + *
  • QFLAG_NO_RESULTCACHE: Do not use any result cache. Perform query no matter what.
  • + *
+ **/ +enum queryflags { + QFLAG_ALLOW_ERRORPACKET = 0x00000004, + QFLAG_REPORT_QUEUELEN = 0x00000008, + QFLAG_ESTIMATE = 0x00000080, + QFLAG_DROP_SORTDATA = 0x00004000, + QFLAG_REPORT_COVERAGE = 0x00008000, + QFLAG_NO_RESULTCACHE = 0x00010000, + QFLAG_DUMP_FEATURES = 0x00040000, + + QFLAG_CACHE_MASK = (// which flags affect the cache + QFLAG_ESTIMATE | + QFLAG_DROP_SORTDATA | + QFLAG_REPORT_COVERAGE) +}; + + +/** + * The new PCODE_QUERYRESULTX packet contains a 32-bit field called + * 'featureflags'. Each bit in that field denotes a separate feature + * that may be present in the query result packet or not. The comment + * describing the packet format indicates what data fields depend on + * what features. Note that after removing the query id and the + * feature flags from a PCODE_QUERYRESULTX packet it is binary + * compatible with the PCODE_QUERYRESULT, PCODE_MLD_QUERYRESULT and + * PCODE_MLD_QUERYRESULT2 packets given the correct set of + * features. The features present in the 'old' query result packets + * are defined in this enum along with the Query Result Features + * themselves. The value called QRF_SUPPORTED_MASK denotes which + * features are supported by the current version. If a packet with + * unknown features is received on the network is is discarded (as it + * would be if it had an illegal PCODE). + **/ +enum queryresult_features { + QRF_MLD = 0x00000001, + QRF_SORTDATA = 0x00000010, + QRF_AGGRDATA = 0x00000020, + QRF_COVERAGE = 0x00000040, + QRF_GROUPDATA = 0x00000200, + QRF_PROPERTIES = 0x00000400, + + QRF_QUERYRESULT_MASK = 0, + QRF_MLD_QUERYRESULT_MASK = QRF_MLD +}; + + +/** + * The new PCODE_QUERYX packet contains a 32-bit field called + * 'featureflags'. Each bit in that field denotes a separate feature + * that may be present in the query packet or not. The comment + * describing the packet format indicates what data fields depend on + * what features. Note that after removing the query id and the + * feature flags from a PCODE_QUERYX packet it is binary compatible + * with the PCODE_PARSEDQUERY2 packets + * given the correct set of features. The features present in the + * 'old' query packets are defined in this enum along with the Query + * Features themselves. The values called + * QF_SUPPORTED_[FSEARCH/FDISPATCH]_MASK denotes which features are + * supported by the current version. If a packet with unknown features + * is received on the network is is discarded (as it would be if it + * had an illegal PCODE). + **/ +enum query_features { + QF_PARSEDQUERY = 0x00000002, + QF_RANKP = 0x00000004, + QF_SORTSPEC = 0x00000080, + QF_AGGRSPEC = 0x00000100, + QF_LOCATION = 0x00000800, + QF_PROPERTIES = 0x00100000, + QF_WARMUP = 0x00200000, // Deprecated, do not use! + QF_GROUPSPEC = 0x00400000, + QF_SESSIONID = 0x00800000, + + QF_PARSEDQUERY2_MASK = (QF_PARSEDQUERY | QF_RANKP) +}; + + +/** + * The new PCODE_GETDOCSUMSX packet contains a 32-bit field called + * 'featureflags'. Each bit in that field denotes a separate feature + * that may be present in the getdocsums packet or not. The comment + * describing the packet format indicates what data fields depend on + * what features. Note that after removing the query id and the + * feature flags from a PCODE_GETDOCSUMSX packet it is binary + * compatible with the PCODE_GETDOCSUMS, PCODE_MLD_GETDOCSUMS and + * PCODE_MLD_GETDOCSUMS2 packets given the correct set of + * features. The features present in the 'old' getdocsums packets are + * defined in this enum along with the GetDocsums Features + * themselves. The values called + * GDF_SUPPORTED_[FSEARCH/FDISPATCH]_MASK denotes which features are + * supported by the current version. If a packet with unknown features + * is received on the network is is discarded (as it would be if it + * had an illegal PCODE). + **/ +enum getdocsums_features { + GDF_MLD = 0x00000001, + GDF_QUERYSTACK = 0x00000004, + GDF_RANKP_QFLAGS = 0x00000010, + GDF_LOCATION = 0x00000080, + GDF_RESCLASSNAME = 0x00000800, + GDF_PROPERTIES = 0x00001000, + GDF_FLAGS = 0x00002000, + + GDF_GETDOCSUMS_MASK = 0, + GDF_MLD_GETDOCSUMS_MASK = (GDF_MLD) +}; + + +enum getdocsums_flags +{ + GDFLAG_IGNORE_ROW = 0x00000001, + GDFLAG_ALLOW_SLIME = 0x00000002 +}; + +// docsum class for slime tunneling +const uint32_t SLIME_MAGIC_ID = 0x55555555; + +enum monitorquery_features +{ + MQF_QFLAGS = 0x00000002, + + MQF_MONITORQUERY_MASK = 0 +}; + + +enum monitorquery_flags +{ + // NOT_USED MQFLAG_REPORT_SOFTOFFLINE = 0x00000010, + MQFLAG_REPORT_ACTIVEDOCS = 0x00000020 +}; + + +enum monitorresult_features +{ + MRF_MLD = 0x00000001, + MRF_RFLAGS = 0x00000008, + MRF_ACTIVEDOCS = 0x00000010, + + MRF_MONITORRESULT_MASK = 0, + MRF_MLD_MONITORRESULT_MASK = (MRF_MLD) +}; + + +enum monitorresult_flags +{ + // NOT_USED MRFLAG_SOFTOFFLINE = 0x00000001 +}; + + +/** + * Codes for packets between dispatch nodes and search nodes. + * general packet (i.e. message) format: + * uint32_t packetLength- length in bytes, EXCLUDING this length field + * packetcode pCode - see the enum below; same length as uint32_t + * packetData - variable length + */ +enum packetcode { + PCODE_EOL = 200, /* ..fdispatch <-> ..fsearch. PacketData: + *0 {uint32_t queryId,} - only in new format!*/ + PCODE_QUERY_NOTUSED = 201, + PCODE_QUERYRESULT = 202, /* ..fdispatch <- ..fsearch. PacketData: + *0 {uint32_t queryId,} - only in new format! + *1 uint32_t offset, + *2 uint32_t numDocs, + *3 uint32_t totNumDocs, + *4 search::HitRank maxRank, + *5 time_t docstamp, - sent as Uint32 + *6 struct FastS_connhitresult { + * uint32_t docid; + * search::HitRank metric + * }[] hits */ + PCODE_ERROR = 203, /* ..fdispatch <- ..fsearch/..fdispatch + * {uint32_t queryId,} - only in new format! + * uint32_t error_code [see common/errorcodes.h] + * uint32_t message_len + * char[] message (UTF-8) */ + PCODE_GETDOCSUMS = 204, /* ..fdispatch -> ..fsearch. PacketData: + *0 {uint32_t queryId,} - only in new format! + * time_t docstamp - header + * uint32_t[] docid - body */ + PCODE_DOCSUM = 205, /* ..fdispatch <- ..fsearch. + *0 {uint32_t queryId,} - only in new format! + *1 uint32_t location + *2 char[] + */ + PCODE_MONITORQUERY = 206, /* ..fdispatch -> ..fsearch. No packet data. + */ + PCODE_MONITORRESULT = 207, /* ..fdispatch <- ..fsearch. PacketData: + * int partitionId, + * time_t timeStamp */ + PCODE_MLD_QUERYRESULT = 208,/* ..fdispatch <- ..fdispatch. + * header: {queryId,} offset, numdocs, tnumdocs, + * maxRank, docstamp + * body: (docid, metric, partition, docstamp)* + */ + PCODE_MLD_GETDOCSUMS = 209, /* ..fdispatch -> ..fdispatch. + * header: {queryId,} docstamp + * body: (docid, partition, docstamp)* + */ + PCODE_MLD_MONITORRESULT = 210 ,/* ..fdispatch <- ..fdispatch NB: no queryId! + * lowest partition id, + * timestamp, + * total number of nodes, + * active nodes, + * total number of partitions, + * active partitions + */ + PCODE_CLEARCACHES = 211, /* ..fdispatch -> ..fdispatch. No packet data/ NotUsed + */ + PCODE_QUERY2_NOTUSED = 212, + PCODE_PARSEDQUERY2 = 213, /* ..fdispatch -> ..fsearch. PacketData: + *0 {uint32_t queryId,} - only in new format! + *1 ..query::querytypes searchType, - all/any/exact + *2 uint32_t offset, + *3 uint32_t maxhits, + *4 uint32_t qflags, (including usehardware) + *5 uint32_t rankprofile, - enum + *6 uint32_t numStackItems, + *7 multiple encoded stackitems: + - uint32_t OR|AND|NOT|RANK + uint32_t arity + - uint32_t PHRASE + uint32_t arity + uint32_t indexNameLen + char[] indexName + - uint32_t TERM + uint32_t indexNameLen + char[] indexName + uint32_t termLen + char[] term + */ + PCODE_MLD_QUERYRESULT2_NOTUSED = 214, + PCODE_MLD_GETDOCSUMS2_NOTUSED = 215, + + PCODE_QUEUELEN = 216, /* fdispatch <- fsearch. + * header: queueLen, dispatchers + */ + PCODE_QUERYRESULTX = 217, /* + * {uint32_t queryId,} - only if persistent + * uint32_t featureflags, - see 'queryresult_features' + * uint32_t offset, + * uint32_t numDocs, + * uint32_t totNumDocs, + * search::HitRank maxRank, + * uint32_t docstamp, + * uint32_t[numDocs] sortIndex - if QRF_SORTDATA + * char[sidx[n - 1]] sortData - if QRF_SORTDATA + * uint32_t aggrDataLen - if QRF_AGGRDATA + * char[aggrDataLen] aggrData - if QRF_AGGRDATA + * uint32_t groupDataLen - if QRF_GROUPDATA + * char[groupDataLen] groupData - if QRF_GROUPDATA + * uint64_t coverageDocs - if QRF_COVERAGE + * uint32_t coverageNodes - if QRF_COVERAGE + * uint32_t coverageFull - if QRF_COVERAGE + * numDocs * hit { + * uint32_t docid, + * search::HitRank metric, + * uint32_t partid, - if QRF_MLD + * uint32_t docstamp, - if QRF_MLD + * } */ + PCODE_QUERYX = 218, /* + * {uint32_t queryId,} - only if persistent + * uint32_t featureflags, - see 'query_features' + * uint32_t querytype + * uint32_t offset, + * uint32_t maxhits, + * uint32_t qflags, + * uint32_t minhits, - if QF_MINHITS + * uint32_t numProperties - if QF_PROPERTIES + * numProperties * props { - if QF_PROPERTIES + * uint32_t nameLen + * char[nameLen] name + * uint32_t numEntries + * numentries * entry { + * uint32_t keyLen + * char[keyLen] key + * uint32_t valueLen + * char[valueLen] value + * } + * } + * uint32_t sortSpecLen - if QF_SORTSPEC + * char[sortSpecLen] sortSpec - if QF_SORTSPEC + * uint32_t aggrSpecLen - if QF_AGGRSPEC + * char[aggrSpecLen] aggrSpec - if QF_AGGRSPEC + * uint32_t groupSpecLen - if QF_GROUPSPEC + * char[groupSpecLen] groupSpec - if QF_GROUPSPEC + * uint32_t locationLen - if QF_LOCATION + * char[locationLen] location - if QF_LOCATION + * uint32_t numStackItems, - if QF_PARSEDQUERY + * multiple encoded stackitems: - if QF_PARSEDQUERY + - uint32_t OR|AND|NOT|RANK + uint32_t arity + - uint32_t PHRASE + uint32_t arity + uint32_t indexNameLen + char[] indexName + - uint32_t TERM + uint32_t indexNameLen + char[] indexName + uint32_t termLen + char[] term + */ + PCODE_GETDOCSUMSX = 219, /* + * {uint32_t queryId,} - only if persistent + * uint32_t featureflags, - see 'getdocsums_features' + * uint32_t docstamp, + * uint32_t rankprofile, - if GDF_RANKP_QFLAGS + * uint32_t qflags, - if GDF_RANKP_QFLAGS + * uint32_t resClassNameLen - if GDF_RESCLASSNAME + * char [] resClassName - if GDF_RESCLASSNAME + * uint32_t numProperties - if GDF_PROPERTIES + * numProperties * props { - if GDF_PROPERTIES + * uint32_t nameLen + * char[nameLen] name + * uint32_t numEntries + * numentries * entry { + * uint32_t keyLen + * char[keyLen] key + * uint32_t valueLen + * char[valueLen] value + * } + * } + * uint32_t stackItems, - if GDF_STACKDUMP + * uint32_t stackDumpLen, - if GDF_STACKDUMP + * char[stackDumpLen] stackDump, - if GDF_STACKDUMP + * uint32_t locationLen - if GDF_LOCATION + * char[locationLen] location - if GDF_LOCATION + * N * doc { + * uint32_t docid, + * uint32_t partid, - if GDF_MLD + * uint32_t docstamp, - if GDF_MLD + * } + */ + PCODE_MONITORQUERYX = 220, /* + * uint32_t featureFlags; + * - see monitorquery_features + */ + PCODE_MONITORRESULTX = 221, /* + * uint32_t featureFlags; + * - see monitorresult_features + * uint32_t partitionId; + * uint32_t timestamp; + * uint32_t totalNodes; - if MRF_MLD + * uint32_t activeNodes; - if MRF_MLD + * uint32_t totalParts; - if MRF_MLD + * uint32_t activeParts; - if MRF_MLD + */ + PCODE_TRACEREPLY = 222, /* + * numProperties * props { + * uint32_t nameLen + * char[nameLen] name + * uint32_t numEntries + * numentries * entry { + * uint32_t keyLen + * char[keyLen] key + * uint32_t valueLen + * char[valueLen] value + * } + * } + */ + PCODE_LastCode = 223 // Used for consistency checking only, must be last. +}; + +} // namespace fs4transport +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp b/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp new file mode 100644 index 00000000000..231407fadca --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/tunefileinfo.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tunefileinfo.h" + + +namespace search +{ + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/tunefileinfo.h b/searchlib/src/vespa/searchlib/common/tunefileinfo.h new file mode 100644 index 00000000000..d2281cd112c --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/tunefileinfo.h @@ -0,0 +1,431 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +class TuneFileSeqRead +{ +public: + enum TuneControl + { + NORMAL, + DIRECTIO + }; + +private: + + TuneControl _tuneControl; + +public: + TuneFileSeqRead(void) + : _tuneControl(NORMAL) + { + } + + void + setWantNormal(void) + { + _tuneControl = NORMAL; + } + + void + setWantDirectIO(void) + { + _tuneControl = DIRECTIO; + } + + bool + getWantDirectIO(void) const + { + return _tuneControl == DIRECTIO; + } + + template + void + setFromConfig(const enum Config::Io &config) + { + switch (config) { + case Config::NORMAL: + _tuneControl = NORMAL; + break; + case Config::DIRECTIO: + _tuneControl = DIRECTIO; + break; + default: + _tuneControl = NORMAL; + break; + } + } + + bool + operator==(const TuneFileSeqRead &rhs) const + { + return _tuneControl == rhs._tuneControl; + } + + bool + operator!=(const TuneFileSeqRead &rhs) const + { + return _tuneControl != rhs._tuneControl; + } +}; + + +class TuneFileSeqWrite +{ +public: + enum TuneControl + { + NORMAL, + OSYNC, + DIRECTIO + }; + +private: + + TuneControl _tuneControl; + +public: + TuneFileSeqWrite(void) + : _tuneControl(NORMAL) + { + } + + void + setWantNormal(void) + { + _tuneControl = NORMAL; + } + + void + setWantSyncWrites(void) + { + _tuneControl = OSYNC; + } + + void + setWantDirectIO(void) + { + _tuneControl = DIRECTIO; + } + + bool + getWantDirectIO(void) const + { + return _tuneControl == DIRECTIO; + } + + bool + getWantSyncWrites(void) const + { + return _tuneControl == OSYNC; + } + + template + void + setFromConfig(const enum Config::Io &config) + { + switch (config) { + case Config::NORMAL: + _tuneControl = NORMAL; + break; + case Config::OSYNC: + _tuneControl = OSYNC; + break; + case Config::DIRECTIO: + _tuneControl = DIRECTIO; + break; + default: + _tuneControl = NORMAL; + break; + } + } + + bool + operator==(const TuneFileSeqWrite &rhs) const + { + return _tuneControl == rhs._tuneControl; + } + + bool + operator!=(const TuneFileSeqWrite &rhs) const + { + return _tuneControl != rhs._tuneControl; + } +}; + + +class TuneFileRandRead +{ +public: + enum TuneControl { NORMAL, DIRECTIO, MMAP }; +private: + TuneControl _tuneControl; + int _mmapFlags; + int _advise; +public: + TuneFileRandRead(void) + : _tuneControl(NORMAL), + _mmapFlags(0), + _advise(0) + { + } + + void setMemoryMapFlags(int flags) { _mmapFlags = flags; } + void setAdvise(int advise) { _advise = advise; } + void setWantMemoryMap() { _tuneControl = MMAP; } + void setWantDirectIO() { _tuneControl = DIRECTIO; } + void setWantNormal() { _tuneControl = NORMAL; } + bool getWantDirectIO() const { return _tuneControl == DIRECTIO; } + bool getWantMemoryMap() const { return _tuneControl == MMAP; } + int getMemoryMapFlags() const { return _mmapFlags; } + int getAdvise() const { return _advise; } + + template + void + setFromConfig(const enum TuneControlConfig::Io & tuneControlConfig, const MMapConfig & mmapFlags) + { + switch ( tuneControlConfig) { + case TuneControlConfig::NORMAL: _tuneControl = NORMAL; break; + case TuneControlConfig::DIRECTIO: _tuneControl = DIRECTIO; break; + case TuneControlConfig::MMAP: _tuneControl = MMAP; break; + default: _tuneControl = NORMAL; break; + } + for (size_t i(0), m(mmapFlags.options.size()); i < m; i++) { + switch (mmapFlags.options[i]) { + case MMapConfig::MLOCK: _mmapFlags |= MAP_LOCKED; break; + case MMapConfig::POPULATE: _mmapFlags |= MAP_POPULATE; break; + case MMapConfig::HUGETLB: _mmapFlags |= MAP_HUGETLB; break; + } + } + switch (mmapFlags.advise) { + case MMapConfig::NORMAL: setAdvise(POSIX_FADV_NORMAL); break; + case MMapConfig::RANDOM: setAdvise(POSIX_FADV_RANDOM); break; + case MMapConfig::SEQUENTIAL: setAdvise(POSIX_FADV_SEQUENTIAL); break; + } + } + + bool + operator==(const TuneFileRandRead &rhs) const { + return (_tuneControl == rhs._tuneControl) && (_mmapFlags == rhs._mmapFlags); + } + + bool + operator!=(const TuneFileRandRead &rhs) const { + return (_tuneControl != rhs._tuneControl) && (_mmapFlags == rhs._mmapFlags); + } +}; + + +/** + * Controls file access for indexed fields, word list and dictionary + * during memory dump and fusion. + */ +class TuneFileIndexing +{ +public: + TuneFileSeqRead _read; + TuneFileSeqWrite _write; + + TuneFileIndexing(void) + : _read(), + _write() + { + } + + TuneFileIndexing(const TuneFileSeqRead &r, + const TuneFileSeqWrite &w) + : _read(r), + _write(w) + { + } + + bool + operator==(const TuneFileIndexing &rhs) const + { + return _read == rhs._read && + _write == rhs._write; + } + + bool + operator!=(const TuneFileIndexing &rhs) const + { + return _read != rhs._read || + _write != rhs._write; + } +}; + + +/** + * Controls file access for indexed fields and dictionary during + * search. + */ +class TuneFileSearch +{ +public: + TuneFileRandRead _read; + + TuneFileSearch(void) + : _read() + { + } + + TuneFileSearch(const TuneFileRandRead &r) + : _read(r) + { + } + + bool + operator==(const TuneFileSearch &rhs) const + { + return _read == rhs._read; + } + + bool + operator!=(const TuneFileSearch &rhs) const + { + return _read != rhs._read; + } +}; + + +/** + * Controls file access for indexed fields and dictionary during + * memory dump, fusion and search. + */ +class TuneFileIndexManager +{ +public: + TuneFileIndexing _indexing; + TuneFileSearch _search; + + TuneFileIndexManager(void) + : _indexing(), + _search() + { + } + + bool + operator==(const TuneFileIndexManager &rhs) const + { + return _indexing == rhs._indexing && + _search == rhs._search; + } + + bool + operator!=(const TuneFileIndexManager &rhs) const + { + return _indexing != rhs._indexing || + _search != rhs._search; + } +}; + + +/** + * Controls file access for writing attributes to disk. + */ +class TuneFileAttributes +{ +public: + TuneFileSeqWrite _write; + + TuneFileAttributes(void) + : _write() + { + } + + bool + operator==(const TuneFileAttributes &rhs) const + { + return _write == rhs._write; + } + + bool + operator!=(const TuneFileAttributes &rhs) const + { + return _write != rhs._write; + } +}; + + +/** + * Controls file access for summaries (docstore). + */ +class TuneFileSummary +{ +public: + TuneFileSeqRead _seqRead; + TuneFileSeqWrite _write; + TuneFileRandRead _randRead; + + TuneFileSummary(void) + : _seqRead(), + _write(), + _randRead() + { + } + + bool + operator==(const TuneFileSummary &rhs) const + { + return _seqRead == rhs._seqRead && + _write == rhs._write && + _randRead == rhs._randRead; + } + + bool + operator!=(const TuneFileSummary &rhs) const + { + return _seqRead != rhs._seqRead || + _write != rhs._write || + _randRead != rhs._randRead; + } +}; + + +/** + * Controls file access for document db, i.e. "everything". + */ +class TuneFileDocumentDB +{ +public: + typedef std::shared_ptr SP; + + TuneFileIndexManager _index; + TuneFileAttributes _attr; + TuneFileSummary _summary; + + TuneFileDocumentDB(void) + : _index(), + _attr(), + _summary() + { + } + + bool + operator==(const TuneFileDocumentDB &rhs) const + { + return _index == rhs._index && + _attr == rhs._attr && + _summary == rhs._summary; + } + + bool + operator!=(const TuneFileDocumentDB &rhs) const + { + return _index != rhs._index || + _attr != rhs._attr || + _summary != rhs._summary; + } +}; + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/config/.gitignore b/searchlib/src/vespa/searchlib/config/.gitignore new file mode 100644 index 00000000000..0d614ad8ec7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/config/.gitignore @@ -0,0 +1,5 @@ +*.So +.depend +Makefile +config-*.cpp +config-*.h diff --git a/searchlib/src/vespa/searchlib/config/CMakeLists.txt b/searchlib/src/vespa/searchlib/config/CMakeLists.txt new file mode 100644 index 00000000000..ad1a75f8b84 --- /dev/null +++ b/searchlib/src/vespa/searchlib/config/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_sconfig OBJECT + SOURCES + DEPENDS +) +vespa_generate_config(searchlib_sconfig translogserver.def) +install(FILES translogserver.def DESTINATION var/db/vespa/config_server/serverdb/classes) diff --git a/searchlib/src/vespa/searchlib/config/translogserver.def b/searchlib/src/vespa/searchlib/config/translogserver.def new file mode 100644 index 00000000000..b617e6e2783 --- /dev/null +++ b/searchlib/src/vespa/searchlib/config/translogserver.def @@ -0,0 +1,24 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=searchlib + +## Port number to use for listening. +listenport int default=13700 restart + +## Max file size (50M) +filesizemax int default=50000000 restart + +## Server name to identify server. +servername string default="tls" restart + +## Base directory. The default is not used as it is decided by the model. +basedir string default="tmp" restart + +## Use fsync after each commit. +## If not the below interval is used. +usefsync bool default=false restart + +##Number of threads available for visiting/subscription. +maxthreads int default=4 restart + +##Default crc method used +crcmethod enum {ccitt_crc32, xxh64} default=xxh64 diff --git a/searchlib/src/vespa/searchlib/diskindex/.gitignore b/searchlib/src/vespa/searchlib/diskindex/.gitignore new file mode 100644 index 00000000000..0b3af54ee50 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt new file mode 100644 index 00000000000..1cde63458ec --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_diskindex OBJECT + SOURCES + bitvectordictionary.cpp + bitvectorfile.cpp + bitvectoridxfile.cpp + bitvectorkeyscope.cpp + checkpointfile.cpp + dictionarywordreader.cpp + diskindex.cpp + disktermblueprint.cpp + docidmapper.cpp + extposocc.cpp + fieldreader.cpp + fieldwriter.cpp + fileheader.cpp + fusion.cpp + indexbuilder.cpp + pagedict4file.cpp + pagedict4randread.cpp + wordnummapper.cpp + zcposocc.cpp + zcposocciterators.cpp + zcposoccrandread.cpp + zcposting.cpp + zcpostingiterators.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/diskindex/OWNERS b/searchlib/src/vespa/searchlib/diskindex/OWNERS new file mode 100644 index 00000000000..64735d11d93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/OWNERS @@ -0,0 +1 @@ +tegge diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp new file mode 100644 index 00000000000..799b02dd071 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.bitvectordictionary"); +#include "bitvectordictionary.h" +#include + +namespace search +{ + +namespace diskindex +{ + + +BitVectorDictionary::BitVectorDictionary() + : _docIdLimit(), + _entries(), + _vectorSize(), + _datFile(), + _datHeaderLen(0u) +{ +} + + +BitVectorDictionary::~BitVectorDictionary() +{ + if (_datFile.get() != NULL) { + _datFile->Close(); + } +} + + +bool +BitVectorDictionary::open(const vespalib::string &pathPrefix, + const TuneFileRandRead &tuneFileRead, + BitVectorKeyScope scope) +{ + vespalib::string booloccIdxName = pathPrefix + "boolocc" + + getBitVectorKeyScopeSuffix(scope); + vespalib::string booloccDatName = pathPrefix + "boolocc.bdat"; + FastOS_File idxFile; + idxFile.OpenReadOnly(booloccIdxName.c_str()); + if (!idxFile.IsOpened()) { + LOG(warning, "Could not open bitvector idx file '%s'", + booloccIdxName.c_str()); + return false; + } + + vespalib::FileHeader idxHeader; + uint32_t idxHeaderLen = idxHeader.readFile(idxFile); + idxFile.SetPosition(idxHeaderLen); + assert(idxHeader.hasTag("frozen")); + assert(idxHeader.hasTag("docIdLimit")); + assert(idxHeader.hasTag("numKeys")); + assert(idxHeader.getTag("frozen").asInteger() != 0); + _docIdLimit = idxHeader.getTag("docIdLimit").asInteger(); + uint32_t numEntries = idxHeader.getTag("numKeys").asInteger(); + + _entries.resize(numEntries); + size_t bufSize = sizeof(WordSingleKey) * numEntries; + assert(idxFile.GetSize() >= static_cast(idxHeaderLen + bufSize)); + if (bufSize > 0) { + idxFile.Read(&_entries[0], bufSize); + } + idxFile.Close(); + + _vectorSize = BitVector::getFileBytes(_docIdLimit); + _datFile.reset(new FastOS_File()); + if (tuneFileRead.getWantMemoryMap()) { + _datFile->enableMemoryMap(tuneFileRead.getMemoryMapFlags()); + } else if (tuneFileRead.getWantDirectIO()) { + _datFile->EnableDirectIO(); + } + _datFile->OpenReadOnly(booloccDatName.c_str()); + if (!_datFile->IsOpened()) { + LOG(warning, "Could not open bitvector dat file '%s'", + booloccDatName.c_str()); + return false; + } + vespalib::FileHeader datHeader(64); + _datHeaderLen = datHeader.readFile(*_datFile); + assert(_datFile->GetSize() >= + static_cast(_vectorSize) * numEntries + _datHeaderLen); + return true; +} + + +BitVector::UP +BitVectorDictionary::lookup(uint64_t wordNum) +{ + WordSingleKey key; + key._wordNum = wordNum; + std::vector::const_iterator itr = + std::lower_bound(_entries.begin(), _entries.end(), key); + if (itr == _entries.end() || key < *itr) { + return BitVector::UP(); + } + int64_t pos = &*itr - &_entries[0]; + return BitVector::create(_docIdLimit, *_datFile, + ((int64_t) _vectorSize) * pos + _datHeaderLen, + itr->_numDocs); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h new file mode 100644 index 00000000000..75b88de1a75 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include "bitvectorkeyscope.h" + +namespace search { + +namespace diskindex { + +/** + * This dictionary provides a sparse mapping from word number -> BitVector. + * The dictionary is constructed based on the boolocc idx file and + * the actual bit vectors are stored in the boolocc dat file. + **/ +class BitVectorDictionary +{ +private: + BitVectorDictionary(const BitVectorDictionary &rhs); + + BitVectorDictionary & + operator=(const BitVectorDictionary &rhs); + + typedef search::index::BitVectorWordSingleKey WordSingleKey; + + uint32_t _docIdLimit; + std::vector _entries; + size_t _vectorSize; + std::unique_ptr _datFile; + uint32_t _datHeaderLen; + +public: + typedef std::shared_ptr SP; + + BitVectorDictionary(); + ~BitVectorDictionary(); + + /** + * Open this dictionary using the following path prefix to where + * the files are located. The boolocc idx file is loaded into + * memory while the dat file is just opened. + * + * @param pathPrefix the path prefix to where the boolocc files + * are located. + * @return true if the files could be opened. + **/ + bool + open(const vespalib::string &pathPrefix, + const TuneFileRandRead &tuneFileRead, + BitVectorKeyScope scope); + + /** + * Lookup the given word number and load and return the associated + * bit vector if found. + * + * @param wordNum the word number to lookup a bit vector for. + * @return the loaded bit vector or NULL if not found. + **/ + BitVector::UP + lookup(uint64_t wordNum); + + uint32_t + getDocIdLimit() const + { + return _docIdLimit; + } + + const std::vector & + getEntries() const + { + return _entries; + } +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp new file mode 100644 index 00000000000..0a2c9cbc955 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -0,0 +1,238 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(".diskindex.bitvectorfile"); +#include +#include "bitvectorfile.h" +#include +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +using vespalib::nbostream; +using search::index::BitVectorWordSingleKey; +using search::common::FileHeaderContext; + +namespace { + +void +readHeader(vespalib::FileHeader &h, + const vespalib::string &name) +{ + Fast_BufferedFile file(32768u); + file.OpenReadOnly(name.c_str()); + h.readFile(file); + file.Close(); +} + +const size_t FILE_HEADERSIZE_ALIGNMENT = 4096; + +} + +BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope) + : BitVectorIdxFileWrite(scope), + _datFile(NULL), + _datHeaderLen(0) +{ +} + + +BitVectorFileWrite::~BitVectorFileWrite(void) +{ + // No implicit close() call, but cleanup memory allocations. + delete _datFile; +} + + +void +BitVectorFileWrite::checkPointWrite(nbostream &out) +{ + flush(); + Parent::checkPointWriteCommon(out); + out << _datHeaderLen; + sync(); +} + + +void +BitVectorFileWrite::checkPointRead(nbostream &in) +{ + Parent::checkPointRead(in); + in >> _datHeaderLen; +} + + +void +BitVectorFileWrite::open(const vespalib::string &name, + uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + vespalib::string datname = name + ".bdat"; + + assert(_datFile == NULL); + + Parent::open(name, docIdLimit, tuneFileWrite, fileHeaderContext); + + FastOS_FileInterface *datfile = new FastOS_File; + _datFile = new Fast_BufferedFile(datfile); + if (tuneFileWrite.getWantSyncWrites()) + _datFile->EnableSyncWrites(); + if (tuneFileWrite.getWantDirectIO()) + _datFile->EnableDirectIO(); + // XXX no checking for success: + _datFile->OpenWriteOnly(datname.c_str()); + + if (_datHeaderLen == 0) { + assert(_numKeys == 0); + makeDatHeader(fileHeaderContext); + } + + int64_t pos; + size_t bitmapbytes; + + bitmapbytes = BitVector::getFileBytes(_docIdLimit); + + pos = static_cast(_numKeys) * + static_cast(bitmapbytes) + _datHeaderLen; + + int64_t olddatsize = _datFile->GetSize(); + assert(olddatsize >= pos); + (void) olddatsize; + + _datFile->SetSize(pos); + + assert(pos == _datFile->GetPosition()); +} + + +void +BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext) +{ + vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); // 64 byte alignment on bitvector.dat header + typedef vespalib::GenericHeader::Tag Tag; + fileHeaderContext.addTags(h, _datFile->GetFileName()); + h.putTag(Tag("docIdLimit", _docIdLimit)); + h.putTag(Tag("numKeys", _numKeys)); + h.putTag(Tag("frozen", 0)); + h.putTag(Tag("fileBitSize", 0)); + h.putTag(Tag("desc", "Bitvector data file")); + _datFile->SetPosition(0); + _datHeaderLen = h.writeFile(*_datFile); + _datFile->Flush(); +} + + +void +BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize) +{ + vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + typedef vespalib::GenericHeader::Tag Tag; + readHeader(h, _datFile->GetFileName()); + FileHeaderContext::setFreezeTime(h); + h.putTag(Tag("numKeys", _numKeys)); + h.putTag(Tag("frozen", 1)); + h.putTag(Tag("fileBitSize", fileBitSize)); + _datFile->Flush(); + _datFile->Sync(); + assert(h.getSize() == _datHeaderLen); + _datFile->SetPosition(0); + h.writeFile(*_datFile); + _datFile->Flush(); + _datFile->Sync(); +} + + +void +BitVectorFileWrite::addWordSingle(uint64_t wordNum, + const BitVector &bitVector) +{ + assert(bitVector.size() == _docIdLimit); + bitVector.invalidateCachedCount(); + Parent::addWordSingle(wordNum, bitVector.countTrueBits()); + _datFile->WriteBuf(bitVector.getStart(), + bitVector.getFileBytes()); +} + + +void +BitVectorFileWrite::flush(void) +{ + Parent::flush(); + _datFile->Flush(); +} + + +void +BitVectorFileWrite::sync(void) +{ + flush(); + Parent::syncCommon(); + _datFile->Sync(); +} + + +void +BitVectorFileWrite::close(void) +{ + size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit); + + if (_datFile != NULL) { + if (_datFile->IsOpened()) { + uint64_t pos = _datFile->GetPosition(); + assert(pos == static_cast(_numKeys) * + static_cast(bitmapbytes) + _datHeaderLen); + (void) bitmapbytes; + _datFile->alignEndForDirectIO(); + updateDatHeader(pos * 8); + _datFile->Close(); + } + delete _datFile; + _datFile = NULL; + } + Parent::close(); +} + + +void +BitVectorCandidate::checkPointWrite(nbostream &out) +{ + uint32_t docIdLimit = _bv->size(); + out << docIdLimit << _numDocs << _bitVectorLimit; + out.saveVector(_array); + if (getCrossedBitVectorLimit()) + out << *_bv; +} + + +void +BitVectorCandidate::checkPointRead(nbostream &in) +{ + uint32_t docIdLimit = _bv->size(); + uint32_t checkDocIdLimit; + uint32_t checkBitVectorLimit; + in >> checkDocIdLimit >> _numDocs >> checkBitVectorLimit; + assert(checkDocIdLimit == docIdLimit); + (void) docIdLimit; + assert(checkBitVectorLimit == _bitVectorLimit); + in.restoreVector(_array); + if (getCrossedBitVectorLimit()) { + in >> *_bv; + } else { + _bv->clear(); + } +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h new file mode 100644 index 00000000000..a33bd8e6c0f --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.h @@ -0,0 +1,204 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS +#pragma once + +#include +#include +#include +#include +#include "bitvectoridxfile.h" + +namespace search +{ + + +namespace diskindex +{ + + +class BitVectorFileWrite : public BitVectorIdxFileWrite +{ +private: + BitVectorFileWrite(const BitVectorFileWrite &) = delete; + BitVectorFileWrite(const BitVectorFileWrite &&) = delete; + BitVectorFileWrite& operator=(const BitVectorFileWrite &) = delete; + BitVectorFileWrite& operator=(const BitVectorFileWrite &&) = delete; + + using Parent = BitVectorIdxFileWrite; + + Fast_BufferedFile *_datFile; +public: + +private: + uint32_t _datHeaderLen; + +public: + BitVectorFileWrite(BitVectorKeyScope scope); + + ~BitVectorFileWrite(void); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); + + void + open(const vespalib::string &name, uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext); + + + void + addWordSingle(uint64_t wordNum, const BitVector &bitVector); + + void + flush(void); + + void + sync(void); + + void + close(void); + + void + makeDatHeader(const common::FileHeaderContext &fileHeaderContext); + + void + updateDatHeader(uint64_t fileBitSize); +}; + + +/* + * Buffer document ids for a candidate bitvector. + */ +class BitVectorCandidate +{ +private: + std::vector _array; + uint64_t _numDocs; + uint32_t _bitVectorLimit; + BitVector::UP _bv; + +public: + BitVectorCandidate(uint32_t docIdLimit, uint32_t bitVectorLimit) + : _array(), + _numDocs(0u), + _bitVectorLimit(bitVectorLimit), + _bv(BitVector::create(docIdLimit)) + { + _array.reserve(_bitVectorLimit); + } + + + BitVectorCandidate(uint32_t docIdLimit) + : _array(), + _numDocs(0u), + _bitVectorLimit(BitVectorFileWrite::getBitVectorLimit(docIdLimit)), + _bv(BitVector::create(docIdLimit)) + { + _array.reserve(_bitVectorLimit); + } + + void + clear(void) + { + if (__builtin_expect(_numDocs > _bitVectorLimit, false)) { + _bv->clear(); + } + _numDocs = 0; + _array.clear(); + } + + void + flush(BitVector &obv) + { + if (__builtin_expect(_numDocs > _bitVectorLimit, false)) { + obv.orWith(*_bv); + } else { + for (uint32_t i : _array) { + obv.setBit(i); + } + } + clear(); + } + + void + add(uint32_t docId) + { + if (_numDocs < _bitVectorLimit) { + _array.push_back(docId); + } else { + if (__builtin_expect(_numDocs == _bitVectorLimit, false)) { + for (uint32_t i : _array) { + _bv->setBit(i); + } + _array.clear(); + } + _bv->setBit(docId); + } + ++_numDocs; + } + + /* + * Get number of documents buffered. This might include duplicates. + */ + uint64_t + getNumDocs(void) const + { + return _numDocs; + } + + bool + empty(void) const + { + return _numDocs == 0; + } + + /* + * Return true if array limit has been exceeded and bitvector has been + * populated. + */ + bool + getCrossedBitVectorLimit(void) const + { + return _numDocs > _bitVectorLimit; + } + + BitVector & + getBitVector(void) + { + return *_bv; + } + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp new file mode 100644 index 00000000000..82c46d2172f --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -0,0 +1,233 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(".diskindex.bitvectoridxfile"); +#include +#include "bitvectoridxfile.h" +#include +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +using vespalib::nbostream; +using search::index::BitVectorWordSingleKey; +using search::common::FileHeaderContext; + +namespace { + +void +readHeader(vespalib::FileHeader &h, + const vespalib::string &name) +{ + Fast_BufferedFile file(32768u); + file.OpenReadOnly(name.c_str()); + h.readFile(file); + file.Close(); +} + +const size_t FILE_HEADERSIZE_ALIGNMENT = 4096; + +} + +BitVectorIdxFileWrite::BitVectorIdxFileWrite(BitVectorKeyScope scope) + : _idxFile(NULL), + _numKeys(0), + _docIdLimit(0), + _idxHeaderLen(0), + _scope(scope) +{ +} + + +BitVectorIdxFileWrite::~BitVectorIdxFileWrite(void) +{ + // No implicit close() call, but cleanup memory allocations. + delete _idxFile; +} + + +uint64_t +BitVectorIdxFileWrite::idxSize(void) const +{ + return _idxHeaderLen + + static_cast(_numKeys) * sizeof(BitVectorWordSingleKey); +} + + +void +BitVectorIdxFileWrite::checkPointWriteCommon(nbostream &out) +{ + out << _scope; + out << _docIdLimit << _numKeys; + out << _idxHeaderLen; +} + + +void +BitVectorIdxFileWrite::checkPointWrite(nbostream &out) +{ + flush(); + checkPointWriteCommon(out); + sync(); +} + + +void +BitVectorIdxFileWrite::checkPointRead(nbostream &in) +{ + BitVectorKeyScope checkScope; + in >> checkScope; + assert(checkScope == _scope); + in >> _docIdLimit >> _numKeys; + in >> _idxHeaderLen; +} + + +void +BitVectorIdxFileWrite::open(const vespalib::string &name, + uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + if (_numKeys != 0) { + assert(docIdLimit == _docIdLimit); + } else { + _docIdLimit = docIdLimit; + } + vespalib::string idxname = name + getBitVectorKeyScopeSuffix(_scope); + + assert(_idxFile == NULL); + FastOS_FileInterface *idxfile = new FastOS_File; + _idxFile = new Fast_BufferedFile(idxfile); + if (tuneFileWrite.getWantSyncWrites()) + _idxFile->EnableSyncWrites(); + if (tuneFileWrite.getWantDirectIO()) + _idxFile->EnableDirectIO(); + + // XXX no checking for success: + _idxFile->OpenWriteOnly(idxname.c_str()); + + if (_idxHeaderLen == 0) { + assert(_numKeys == 0); + makeIdxHeader(fileHeaderContext); + } + + int64_t pos = idxSize(); + + int64_t oldidxsize = _idxFile->GetSize(); + assert(oldidxsize >= pos); + (void) oldidxsize; + + _idxFile->SetSize(pos); + + assert(pos == _idxFile->GetPosition()); +} + + +void +BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext) +{ + vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + typedef vespalib::GenericHeader::Tag Tag; + fileHeaderContext.addTags(h, _idxFile->GetFileName()); + h.putTag(Tag("docIdLimit", _docIdLimit)); + h.putTag(Tag("numKeys", _numKeys)); + h.putTag(Tag("frozen", 0)); + if (_scope != BitVectorKeyScope::SHARED_WORDS) { + h.putTag(Tag("fileBitSize", 0)); + } + h.putTag(Tag("desc", "Bitvector dictionary file, single words")); + _idxFile->SetPosition(0); + _idxHeaderLen = h.writeFile(*_idxFile); + _idxFile->Flush(); +} + + +void +BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize) +{ + vespalib::FileHeader h(FILE_HEADERSIZE_ALIGNMENT); + typedef vespalib::GenericHeader::Tag Tag; + readHeader(h, _idxFile->GetFileName()); + FileHeaderContext::setFreezeTime(h); + h.putTag(Tag("numKeys", _numKeys)); + h.putTag(Tag("frozen", 1)); + if (_scope != BitVectorKeyScope::SHARED_WORDS) { + h.putTag(Tag("fileBitSize", fileBitSize)); + } + _idxFile->Flush(); + _idxFile->Sync(); + assert(h.getSize() == _idxHeaderLen); + _idxFile->SetPosition(0); + h.writeFile(*_idxFile); + _idxFile->Flush(); + _idxFile->Sync(); +} + + +void +BitVectorIdxFileWrite::addWordSingle(uint64_t wordNum, uint32_t numDocs) +{ + BitVectorWordSingleKey key; + key._wordNum = wordNum; + key._numDocs = numDocs; + _idxFile->WriteBuf(&key, sizeof(key)); + ++_numKeys; +} + + +void +BitVectorIdxFileWrite::flush(void) +{ + _idxFile->Flush(); + + uint64_t pos = _idxFile->GetPosition(); + assert(pos == idxSize()); + (void) pos; +} + + +void +BitVectorIdxFileWrite::syncCommon() +{ + _idxFile->Sync(); +} + + +void +BitVectorIdxFileWrite::sync(void) +{ + flush(); + syncCommon(); +} + + +void +BitVectorIdxFileWrite::close(void) +{ + if (_idxFile != NULL) { + if (_idxFile->IsOpened()) { + uint64_t pos = _idxFile->GetPosition(); + assert(pos == idxSize()); + _idxFile->alignEndForDirectIO(); + updateIdxHeader(pos * 8); + _idxFile->Close(); + } + delete _idxFile; + _idxFile = NULL; + } +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h new file mode 100644 index 00000000000..269b6e659af --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h @@ -0,0 +1,122 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS +#pragma once + +#include +#include +#include +#include +#include "bitvectorkeyscope.h" + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + + +namespace common +{ + +class FileHeaderContext; + +} + + +namespace diskindex +{ + +class BitVectorIdxFileWrite +{ +private: + BitVectorIdxFileWrite(const BitVectorIdxFileWrite &) = delete; + BitVectorIdxFileWrite(const BitVectorIdxFileWrite &&) = delete; + BitVectorIdxFileWrite& operator=(const BitVectorIdxFileWrite &) = delete; + BitVectorIdxFileWrite& operator=(const BitVectorIdxFileWrite &&) = delete; + + Fast_BufferedFile *_idxFile; + +public: + +protected: + uint32_t _numKeys; // Number of bitvectors and keys + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + uint32_t _idxHeaderLen; + BitVectorKeyScope _scope; + + uint64_t + idxSize(void) const; + + void + checkPointWriteCommon(vespalib::nbostream &out); + + void syncCommon(); + +public: + BitVectorIdxFileWrite(BitVectorKeyScope scope); + + ~BitVectorIdxFileWrite(void); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); + + void + open(const vespalib::string &name, uint32_t docIdLimit, + const TuneFileSeqWrite &tuneFileWrite, + const search::common::FileHeaderContext &fileHeaderContext); + + + + void + addWordSingle(uint64_t wordNum, uint32_t numDocs); + + void + flush(void); + + void + sync(void); + + void + close(void); + + static uint32_t + getBitVectorLimit(uint32_t docIdLimit) + { + // Must match FastS_BinSizeParams::CalcMaxBinSize() + uint32_t ret = (docIdLimit + 63) / 64; + if (ret < 16) + ret = 16; + if (ret > docIdLimit) + ret = docIdLimit; + return ret; + } + + void + makeIdxHeader(const search::common::FileHeaderContext &fileHeaderContext); + + void + updateIdxHeader(uint64_t fileBitSize); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp new file mode 100644 index 00000000000..5c783035236 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "bitvectorkeyscope.h" +#include + + +using search::diskindex::BitVectorKeyScope; + +namespace search +{ + +namespace diskindex +{ + +const char *getBitVectorKeyScopeSuffix(BitVectorKeyScope scope) +{ + switch (scope) { + case BitVectorKeyScope::SHARED_WORDS: + return ".bidx"; + default: + return ".idx"; + } +} + +} + +} + + +namespace { + +uint8_t +getVal(BitVectorKeyScope scope) +{ + switch (scope) { + case BitVectorKeyScope::SHARED_WORDS: + return 0u; + default: + return 1u; + } +} + + +const BitVectorKeyScope scopes[] = { BitVectorKeyScope::SHARED_WORDS, + BitVectorKeyScope::PERFIELD_WORDS }; + +} + + +namespace vespalib +{ + +nbostream & +operator<<(nbostream &stream, const BitVectorKeyScope &scope) +{ + uint8_t val = getVal(scope); + stream << val; + return stream; +} + +nbostream & +operator>>(nbostream &stream, BitVectorKeyScope &scope) +{ + uint8_t val; + stream >> val; + assert(val < sizeof(scopes) / sizeof(scopes[0])); + scope = scopes[val]; + return stream; +} + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h new file mode 100644 index 00000000000..5ae7a223629 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorkeyscope.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + + +namespace vespalib +{ + +class nbostream; + +} + + +namespace search +{ + +namespace diskindex +{ + +enum class BitVectorKeyScope +{ + SHARED_WORDS, + PERFIELD_WORDS +}; + +const char *getBitVectorKeyScopeSuffix(BitVectorKeyScope scope); + +} + +} + +namespace vespalib +{ + +nbostream & +operator<<(nbostream &stream, + const search::diskindex::BitVectorKeyScope &scope); + +nbostream & +operator>>(nbostream &stream, + search::diskindex::BitVectorKeyScope &scope); + +} diff --git a/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp new file mode 100644 index 00000000000..f8005a6ba97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.cpp @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.checkpointfile"); +#include "checkpointfile.h" +#include +#include + +using vespalib::getLastErrorString; + +namespace search +{ + +namespace diskindex +{ + +using common::FileHeaderContext; + +CheckPointFile::CheckPointFile(const vespalib::string &name) + : _file(), + _name(name), + _nameNew(name + ".NEW"), + _nameNewNew(name + ".NEW.NEW"), + _writeOpened(false), + _headerLen(0u) +{ +} + + +CheckPointFile::~CheckPointFile(void) +{ + close(); +} + + +void +CheckPointFile::writeOpen(const FileHeaderContext &fileHeaderContext) +{ + FastOS_File::Delete(_nameNewNew.c_str()); + _file.OpenWriteOnly(_nameNewNew.c_str()); + _writeOpened = true; + makeHeader(fileHeaderContext); +} + + +bool +CheckPointFile::readOpen(void) +{ + bool openres; + + openres = _file.OpenReadOnly(_name.c_str()); + if (!openres) { + bool renameres = FastOS_File::Rename(_nameNew.c_str(), + _name.c_str()); + if (!renameres) + return false; + openres = _file.OpenReadOnly(_name.c_str()); + if (!openres) + return false; + } + _headerLen = readHeader(); + return true; +} + + +void +CheckPointFile::close(void) +{ + if (_writeOpened) { + _file.Sync(); + } + _file.Close(); + if (_writeOpened) { + updateHeader(); + rename1(); + rename2(); + } + _writeOpened = false; +} + + +void +CheckPointFile::rename1(void) +{ + FastOS_File::Delete(_nameNew.c_str()); + bool renameres = FastOS_File::Rename(_nameNewNew.c_str(), + _nameNew.c_str()); + if (!renameres) { + LOG(error, "FATAL: rename %s -> %s failed: %s", + _nameNewNew.c_str(), _nameNew.c_str(), getLastErrorString().c_str()); + abort(); + } +} + + +void +CheckPointFile::rename2(void) +{ + FastOS_File::Delete(_name.c_str()); + bool renameres = FastOS_File::Rename(_nameNew.c_str(), _name.c_str()); + if (!renameres) { + LOG(error, "FATAL: rename %s -> %s failed: %s", + _nameNew.c_str(), _name.c_str(), getLastErrorString().c_str()); + abort(); + } +} + + +void +CheckPointFile::remove(void) +{ + FastOS_File::Delete(_nameNew.c_str()); + FastOS_File::Delete(_name.c_str()); +} + + + +void +CheckPointFile::write(vespalib::nbostream &buf, + const FileHeaderContext &fileHeaderContext) +{ + writeOpen(fileHeaderContext); + _file.WriteBuf(buf.peek(), buf.size()); + close(); +} + + +bool +CheckPointFile::read(vespalib::nbostream &buf) +{ + if (!readOpen()) + return false; + size_t sz = _file.GetSize() - _headerLen; + + std::vector tmp(sz); + _file.ReadBuf(&tmp[0], sz); + buf.clear(); + buf.write(&tmp[0], sz); + std::vector().swap(tmp); + close(); + return true; +} + + +void +CheckPointFile::makeHeader(const FileHeaderContext &fileHeaderContext) +{ + vespalib::FileHeader header; + + typedef vespalib::GenericHeader::Tag Tag; + fileHeaderContext.addTags(header, _file.GetFileName()); + header.putTag(Tag("frozen", 0)); + header.putTag(Tag("desc", "Check point file")); + header.writeFile(_file); +} + + +void +CheckPointFile::updateHeader(void) +{ + vespalib::FileHeader h; + FastOS_File f; + f.OpenReadWrite(_nameNewNew.c_str()); + h.readFile(f); + FileHeaderContext::setFreezeTime(h); + typedef vespalib::GenericHeader::Tag Tag; + h.putTag(Tag("frozen", 1)); + h.rewriteFile(f); + f.Sync(); + f.Close(); +} + + +uint32_t +CheckPointFile::readHeader(void) +{ + vespalib::FileHeader h; + uint32_t headerLen = h.readFile(_file); + _file.SetPosition(headerLen); + assert(h.hasTag("frozen")); + assert(h.getTag("frozen").asInteger() != 0); + return headerLen; +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h new file mode 100644 index 00000000000..f8aed3a6cdd --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/checkpointfile.h @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace diskindex +{ + +class CheckPointFile +{ +public: + FastOS_File _file; + vespalib::string _name; + vespalib::string _nameNew; + vespalib::string _nameNewNew; + bool _writeOpened; + uint32_t _headerLen; + + void + writeOpen(const common::FileHeaderContext &fileHeaderContext); + + bool + readOpen(void); + + void + close(void); + + void + rename1(void); + + void + rename2(void); + + void + remove(void); + + void + makeHeader(const common::FileHeaderContext &fileHeaderContext); + + void + updateHeader(void); + + uint32_t + readHeader(void); +public: + CheckPointFile(const vespalib::string &name); + + ~CheckPointFile(void); + + void + write(vespalib::nbostream &buf, + const common::FileHeaderContext &fileHeaderContext); + + bool + read(vespalib::nbostream &buf); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp new file mode 100644 index 00000000000..d0e611e3136 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "dictionarywordreader.h" +#include +#include +#include +LOG_SETUP(".diskindex.dictionarywordreader"); + +namespace search +{ + +namespace diskindex +{ + +using vespalib::getLastErrorString; +using index::SchemaUtil; + +DictionaryWordReader::DictionaryWordReader(void) + : _word(), + _wordNum(noWordNumHigh()), + _old2newwordfile(), + _dictFile() +{ +} + + +DictionaryWordReader::~DictionaryWordReader(void) +{ +} + + +bool +DictionaryWordReader::open(const vespalib::stringref &dictionaryName, + const vespalib::stringref & wordMapName, + const TuneFileSeqRead &tuneFileRead) +{ + _old2newwordfile.reset(new Fast_BufferedFile(new FastOS_File)); + _dictFile.reset(new PageDict4FileSeqRead); + if (!_dictFile->open(dictionaryName, tuneFileRead)) { + LOG(error, "Could not open dictionary %s: %s", + dictionaryName.c_str(), getLastErrorString().c_str()); + return false; + } + _wordNum = noWordNum(); + + // Make a mapping from old to new wordID + if (tuneFileRead.getWantDirectIO()) + _old2newwordfile->EnableDirectIO(); + // no checking possible + _old2newwordfile->WriteOpen(wordMapName.c_str()); + _old2newwordfile->SetSize(0); + + return true; +} + +void +DictionaryWordReader::close(void) +{ + if (!_dictFile->close()) + LOG(error, "Error closing input dictionary"); + _old2newwordfile->Flush(); + _old2newwordfile->Sync(); + _old2newwordfile->Close(); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h new file mode 100644 index 00000000000..744b73bf370 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.h @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "pagedict4file.h" +#include + +namespace search +{ + +namespace diskindex +{ + + +/* + * Helper class, will be used by fusion later to handle generation of + * word numbering without writing a word list file. + */ +class WordAggregator +{ +private: + vespalib::string _word; + uint64_t _wordNum; + +public: + WordAggregator() + : _word(), + _wordNum(0) + { + } + + void + tryWriteWord(const vespalib::stringref &word) + { + if (word != _word || _wordNum == 0) { + ++_wordNum; + _word = word; + } + } + + uint64_t + getWordNum() const + { + return _wordNum; + } +}; + + +/* + * Class used to merge words in multiple dictionaries for + * new style fusion (using WordAggregator). + */ +class DictionaryWordReader +{ +public: + vespalib::string _word; + uint64_t _wordNum; + index::PostingListCounts _counts; + +private: + // "owners" of file handles. + std::unique_ptr _old2newwordfile; + + using DictionaryFileSeqRead = index::DictionaryFileSeqRead; + std::unique_ptr _dictFile; + + void + allocFiles(void); + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNum(void) + { + return 0u; + } + +public: + DictionaryWordReader(void); + + ~DictionaryWordReader(void); + + bool + isValid(void) const + { + return _wordNum != noWordNumHigh(); + } + + bool + operator<(const DictionaryWordReader &rhs) const + { + if (!isValid()) + return false; + if (!rhs.isValid()) + return true; + return _word < rhs._word; + } + + void + read(void) + { + _dictFile->readWord(_word, _wordNum, _counts); + } + + bool + open(const vespalib::stringref & dictionaryName, + const vespalib::stringref & wordMapName, + const TuneFileSeqRead &tuneFileRead); + + void + close(void); + + void + writeNewWordNum(uint64_t newWordNum) + { + _old2newwordfile->WriteBuf(&newWordNum, sizeof(newWordNum)); + } + + void + write(WordAggregator &writer) + { + writer.tryWriteWord(_word); + writeNewWordNum(writer.getWordNum()); + } +}; + + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp new file mode 100644 index 00000000000..8cc12c88463 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -0,0 +1,476 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".diskindex.diskindex"); +#include "diskindex.h" +#include "disktermblueprint.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "pagedict4randread.h" +#include "fileheader.h" +#include "bitvectorkeyscope.h" + +using namespace search::index; +using namespace search::query; +using namespace search::queryeval; + +namespace search +{ + +namespace diskindex +{ + +void swap(DiskIndex::LookupResult & a, DiskIndex::LookupResult & b) +{ + a.swap(b); +} + +DiskIndex::LookupResult::LookupResult() + : indexId(0u), + wordNum(0), + counts(), + bitOffset(0) +{ +} + +DiskIndex::DiskIndex(const vespalib::string &indexDir, size_t cacheSize) + : _indexDir(indexDir), + _cacheSize(cacheSize), + _schema(), + _postingFiles(), + _bitVectorDicts(), + _dicts(), + _tuneFileSearch(), + _cache(*this, cacheSize) +{ +} + +bool +DiskIndex::loadSchema(void) +{ + vespalib::string schemaName = _indexDir + "/schema.txt"; + if (!_schema.loadFromFile(schemaName)) { + LOG(error, + "Could not open schema '%s'", + schemaName.c_str()); + return false; + } + if (!SchemaUtil::validateSchema(_schema)) { + LOG(error, + "Could not validate schema loaded from '%s'", + schemaName.c_str()); + return false; + } + return true; +} + + +bool +DiskIndex::openDictionaries(const TuneFileSearch &tuneFileSearch) +{ + for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) { + vespalib::string dictName = + _indexDir + "/" + itr.getName() + "/dictionary"; + auto dict = std::make_unique(); + if (!dict->open(dictName, tuneFileSearch._read)) { + LOG(warning, "Could not open disk dictionary '%s'", + dictName.c_str()); + _dicts.clear(); + return false; + } + _dicts.push_back(std::move(dict)); + } + return true; +} + + +bool +DiskIndex::openField(const vespalib::string &fieldDir, + const TuneFileSearch &tuneFileSearch) +{ + vespalib::string postingName = fieldDir + "posocc.dat.compressed"; + + DiskPostingFile::SP pFile; + BitVectorDictionary::SP bDict; + FileHeader fileHeader; + bool dynamicK = false; + if (fileHeader.taste(postingName, tuneFileSearch._read)) { + if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + DiskPostingFileDynamicKReal::getIdentifier() && + fileHeader.getFormats()[1] == + DiskPostingFileDynamicKReal::getSubIdentifier()) { + dynamicK = true; + } else if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + DiskPostingFileReal::getIdentifier() && + fileHeader.getFormats()[1] == + DiskPostingFileReal::getSubIdentifier()) { + dynamicK = false; + } else { + LOG(warning, + "Could not detect format for posocc file read %s", + postingName.c_str()); + } + } + pFile.reset(dynamicK ? + new DiskPostingFileDynamicKReal() : + new DiskPostingFileReal()); + if (!pFile->open(postingName, tuneFileSearch._read)) { + LOG(warning, + "Could not open posting list file '%s'", + postingName.c_str()); + return false; + } + + bDict.reset(new BitVectorDictionary()); + if (!bDict->open(fieldDir, tuneFileSearch._read, + BitVectorKeyScope::PERFIELD_WORDS)) { + LOG(warning, + "Could not open bit vector dictionary in '%s'", + fieldDir.c_str()); + return false; + } + _postingFiles.push_back(pFile); + _bitVectorDicts.push_back(bDict); + return true; +} + + +bool +DiskIndex::setup(const TuneFileSearch &tuneFileSearch) +{ + if (!loadSchema() || !openDictionaries(tuneFileSearch)) + return false; + for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) { + vespalib::string fieldDir = + _indexDir + "/" + itr.getName() + "/"; + if (!openField(fieldDir, tuneFileSearch)) + return false; + } + _tuneFileSearch = tuneFileSearch; + return true; +} + + +bool +DiskIndex::setup(const TuneFileSearch &tuneFileSearch, + const DiskIndex &old) +{ + if (tuneFileSearch != old._tuneFileSearch) + return setup(tuneFileSearch); + if (!loadSchema() || !openDictionaries(tuneFileSearch)) + return false; + const Schema &oldSchema = old._schema; + for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) { + vespalib::string fieldDir = + _indexDir + "/" + itr.getName() + "/"; + SchemaUtil::IndexSettings settings = itr.getIndexSettings(); + if (settings.hasError()) + return false; + bool hasPhraseOcc = settings.hasPhrases(); + SchemaUtil::IndexIterator oItr(oldSchema, itr); + if (!itr.hasMatchingOldFields(oldSchema, hasPhraseOcc) || + !oItr.isValid()) { + if (!openField(fieldDir, tuneFileSearch)) + return false; + } else { + uint32_t oldPacked = oItr.getIndex(); + _postingFiles.push_back(old._postingFiles[oldPacked]); + _bitVectorDicts.push_back(old._bitVectorDicts[oldPacked]); + } + } + _tuneFileSearch = tuneFileSearch; + return true; +} + +DiskIndex::LookupResult::UP +DiskIndex::lookup(uint32_t index, const vespalib::stringref & word) +{ + /** Only used for testing */ + IndexList indexes; + indexes.push_back(index); + Key key(indexes, word); + LookupResultVector resultV(indexes.size()); + LookupResult::UP result; + if ( read(key, resultV)) { + result.reset(new LookupResult()); + result->swap(resultV[0]); + } + return result; +} + +namespace { + +bool +containsAll(const DiskIndex::IndexList & indexes, const DiskIndex::LookupResultVector & result) +{ + for (uint32_t index : indexes) { + bool found(false); + for (size_t i(0); !found && (i < result.size()); i++) { + found = index == result[i].indexId; + } + if ( ! found ) { + return false; + } + } + return true; +} + +DiskIndex::IndexList +unite(const DiskIndex::IndexList & indexes, const DiskIndex::LookupResultVector & result) +{ + vespalib::hash_set all; + for (uint32_t index : indexes) { + all.insert(index); + } + for (const DiskIndex::LookupResult & lr : result) { + all.insert(lr.indexId); + } + DiskIndex::IndexList v; + v.reserve(all.size()); + for (uint32_t indexId : all) { + v.push_back(indexId); + } + return v; +} + +} + +DiskIndex::LookupResultVector +DiskIndex::lookup(const std::vector & indexes, const vespalib::stringref & word) +{ + Key key(indexes, word); + LookupResultVector result; + if (_cacheSize > 0) { + result = _cache.read(key); + if (!containsAll(indexes, result)) { + key = Key(unite(indexes, result), word); + _cache.invalidate(key); + result = _cache.read(key); + } + } else { + read(key, result); + } + return result; +} + +bool +DiskIndex::read(const Key & key, LookupResultVector & result) +{ + uint64_t wordNum(0); + const IndexList & indexes(key.getIndexes()); + result.resize(indexes.size()); + for (size_t i(0); i < result.size(); i++) { + LookupResult & lr(result[i]); + lr.indexId = indexes[i]; + PostingListOffsetAndCounts offsetAndCounts; + wordNum = 0; + SchemaUtil::IndexIterator it(_schema, lr.indexId); + uint32_t fieldId = it.getIndex(); + if (fieldId < _dicts.size()) { + (void) _dicts[fieldId]->lookup(key.getWord(), wordNum, + offsetAndCounts); + } + lr.wordNum = wordNum; + lr.counts.swap(offsetAndCounts._counts); + lr.bitOffset = offsetAndCounts._offset; + } + return true; +} + +index::PostingListHandle::UP +DiskIndex::readPostingList(const LookupResult &lookupRes) const +{ + PostingListHandle::UP handle(new PostingListHandle()); + handle->_bitOffset = lookupRes.bitOffset; + handle->_bitLength = lookupRes.counts._bitLength; + SchemaUtil::IndexIterator it(_schema, lookupRes.indexId); + handle->_file = _postingFiles[it.getIndex()].get(); + if (handle->_file == NULL) { + return PostingListHandle::UP(); + } + const uint32_t firstSegment = 0; + const uint32_t numSegments = 0; // means all segments + handle->_file->readPostingList(lookupRes.counts, + firstSegment, + numSegments, + *handle); + return handle; +} + + +BitVector::UP +DiskIndex::readBitVector(const LookupResult &lookupRes) const +{ + SchemaUtil::IndexIterator it(_schema, lookupRes.indexId); + BitVectorDictionary * dict = _bitVectorDicts[it.getIndex()].get(); + if (dict == NULL) { + return BitVector::UP(); + } + return dict->lookup(lookupRes.wordNum); +} + + +uint64_t +DiskIndex::getSize() const +{ + search::DirectoryTraverse dirt(_indexDir.c_str()); + return dirt.GetTreeSize(); +} + + +namespace +{ + +DiskIndex::LookupResult _G_nothing; + +class LookupCache +{ +public: + LookupCache(DiskIndex & diskIndex, const std::vector & fieldIds) : + _diskIndex(diskIndex), + _fieldIds(fieldIds), + _cache() + { + } + const DiskIndex::LookupResult & + lookup(const vespalib::string & word, uint32_t fieldId) { + Cache::const_iterator it = _cache.find(word); + if (it == _cache.end()) { + _cache[word] = _diskIndex.lookup(_fieldIds, word); + it = _cache.find(word); + } + for (size_t i(0); i < it->second.size(); i++) { + if (it->second[i].indexId == fieldId) { + return it->second[i]; + } + } + return _G_nothing; + } +private: + typedef vespalib::hash_map Cache; + DiskIndex & _diskIndex; + const std::vector & _fieldIds; + Cache _cache; +}; + +class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper +{ +private: + LookupCache &_cache; + DiskIndex &_diskIndex; + const FieldSpec &_field; + const uint32_t _fieldId; + +public: + CreateBlueprintVisitor(LookupCache & cache, DiskIndex &diskIndex, + const IRequestContext & requestContext, + const FieldSpec &field, + uint32_t fieldId) + : CreateBlueprintVisitorHelper(diskIndex, field, requestContext), + _cache(cache), + _diskIndex(diskIndex), + _field(field), + _fieldId(fieldId) + { + } + + template + void + visitTerm(TermNode &n) + { + const vespalib::string termStr = termAsString(n); + const DiskIndex::LookupResult & lookupRes = _cache.lookup(termStr, _fieldId); + if (lookupRes.valid()) { + bool useBitVector = _field.isFilter(); + DiskIndex::LookupResult::UP copy(new DiskIndex::LookupResult(lookupRes)); + setResult(make_UP(new DiskTermBlueprint(_field, _diskIndex, std::move(copy), useBitVector))); + } else { + setResult(make_UP(new EmptyBlueprint(_field))); + } + } + + virtual void visit(NumberTerm &n) { + handleNumberTermAsText(n); + } + + virtual void visit(LocationTerm &n) { visitTerm(n); } + virtual void visit(PrefixTerm &n) { visitTerm(n); } + virtual void visit(RangeTerm &n) { visitTerm(n); } + virtual void visit(StringTerm &n) { visitTerm(n); } + virtual void visit(SubstringTerm &n) { visitTerm(n); } + virtual void visit(SuffixTerm &n) { visitTerm(n); } + virtual void visit(RegExpTerm &n) { visitTerm(n); } + virtual void visit(PredicateQuery &) { } +}; + + +Blueprint::UP +createBlueprintHelper(LookupCache & cache, DiskIndex & diskIndex, const IRequestContext & requestContext, + const FieldSpec &field, uint32_t fieldId, const Node &term) +{ + if (fieldId != Schema::UNKNOWN_FIELD_ID) { + CreateBlueprintVisitor visitor(cache, diskIndex, requestContext, field, fieldId); + const_cast(term).accept(visitor); + return visitor.getResult(); + } + return Blueprint::UP(new EmptyBlueprint(field)); +} + +} // namespace + +Blueprint::UP +DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpec &field, const Node &term) +{ + std::vector fieldIds; + fieldIds.push_back(_schema.getIndexFieldId(field.getName())); + LookupCache cache(*this, fieldIds); + return createBlueprintHelper(cache, *this, requestContext, field, fieldIds[0], term); +} + + +Blueprint::UP +DiskIndex::createBlueprint(const IRequestContext & requestContext, const FieldSpecList &fields, const Node &term) +{ + if (fields.empty()) { + return Blueprint::UP(new EmptyBlueprint()); + } + + std::vector fieldIds; + fieldIds.reserve(fields.size()); + for (size_t i(0); i< fields.size(); i++) { + const FieldSpec & field = fields[i]; + uint32_t fieldId = _schema.getIndexFieldId(field.getName()); + if (fieldId != Schema::UNKNOWN_FIELD_ID) { + fieldIds.push_back(_schema.getIndexFieldId(field.getName())); + } + } + Blueprint::UP result(new OrBlueprint()); + OrBlueprint & orbp(static_cast(*result)); + LookupCache cache(*this, fieldIds); + for (size_t i(0); i< fields.size(); i++) { + const FieldSpec & field = fields[i]; + orbp.addChild(createBlueprintHelper(cache, *this, requestContext, field, _schema.getIndexFieldId(field.getName()), term)); + } + if (orbp.childCnt() == 1) { + return orbp.removeChild(0); + } else { + return result; + } +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.h b/searchlib/src/vespa/searchlib/diskindex/diskindex.h new file mode 100644 index 00000000000..840f4c32738 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.h @@ -0,0 +1,193 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { + +namespace diskindex { + +/** + * This class represents a disk index with a common dictionary, and + * posting list files and bit vector files for each field. + * Parts of the disk dictionary and all bit vector + * dictionaries are loaded into memory during setup. All other files + * are just opened, ready for later access. + **/ +class DiskIndex : public queryeval::Searchable +{ +public: + /** + * The result after performing a disk dictionary lookup. + **/ + struct LookupResult { + uint32_t indexId; + uint64_t wordNum; + index::PostingListCounts counts; + uint64_t bitOffset; + typedef std::unique_ptr UP; + LookupResult(); + bool valid() const { return counts._numDocs > 0; } + void swap(LookupResult & rhs) { + std::swap(indexId , rhs.indexId); + std::swap(wordNum , rhs.wordNum); + counts.swap(rhs.counts); + std::swap(bitOffset , rhs.bitOffset); + } + }; + typedef std::vector LookupResultVector; + typedef std::vector IndexList; + + class Key { + public: + Key() : _indexes() { } + Key(const IndexList & indexes, vespalib::stringref word) : + _word(word), + _indexes(indexes) + { } + uint32_t hash() const { + return vespalib::hashValue(_word.c_str(), _word.size()); + } + bool operator == (const Key & rhs) const { + return _word == rhs._word; + } + void push_back(uint32_t indexId) { _indexes.push_back(indexId); } + const IndexList & getIndexes() const { return _indexes; } + const vespalib::string & getWord() const { return _word; } + private: + vespalib::string _word; + IndexList _indexes; + }; +private: + typedef index::PostingListFileRandRead DiskPostingFile; + typedef Zc4PosOccRandRead DiskPostingFileReal; + typedef ZcPosOccRandRead DiskPostingFileDynamicKReal; + typedef vespalib::cache, DiskIndex>> Cache; + + vespalib::string _indexDir; + size_t _cacheSize; + index::Schema _schema; + std::vector _postingFiles; + std::vector _bitVectorDicts; + std::vector> _dicts; + TuneFileSearch _tuneFileSearch; + Cache _cache; + + bool + loadSchema(void); + + bool + openDictionaries(const TuneFileSearch &tuneFileSearch); + + bool + openField(const vespalib::string &fieldDir, + const TuneFileSearch &tuneFileSearch); + +public: + /** + * Create a view of the disk index located in the given directory + * described by the given schema. + * + * @param indexDir the directory where the disk index is located. + **/ + DiskIndex(const vespalib::string &indexDir, size_t cacheSize=0); + + /** + * Setup this instance by opening and loading relevant index files. + * + * @return true if this instance was successfully setup. + **/ + bool + setup(const TuneFileSearch &tuneFileSearch); + + bool + setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old); + + /** + * Perform a dictionary lookup for the given word in the given + * field. + * + * @param indexId the id of the field to + * perform lookup for. + * @param word the word to lookup. + * @return the lookup result or NULL if the word is not found. + **/ + LookupResult::UP + lookup(uint32_t indexId, const vespalib::stringref & word); + + LookupResultVector + lookup(const std::vector & indexes, const vespalib::stringref & word); + + + /** + * Read the posting list corresponding to the given lookup result. + * + * @param lookupRes the result of the previous dictionary lookup. + * @return a handle for the posting list in memory. + **/ + index::PostingListHandle::UP + readPostingList(const LookupResult &lookupRes) const; + + /** + * Read the bit vector corresponding to the given lookup result. + * + * @param lookupRes the result of the previous dictionary lookup. + * @return the bit vector or NULL if no bit vector exists for the + * word in the lookup result. + **/ + BitVector::UP + readBitVector(const LookupResult &lookupRes) const; + + // Inherit doc from Searchable + virtual queryeval::Blueprint::UP + createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpec &field, + const query::Node &term); + + virtual queryeval::Blueprint::UP + createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpecList &fields, + const query::Node &term); + + /** + * Get the size on disk of this index. + * @return the size of the index. + */ + uint64_t + getSize() const; + + const index::Schema & + getSchema(void) const + { + return _schema; + } + + const vespalib::string & + getIndexDir(void) const + { + return _indexDir; + } + + const TuneFileSearch & + getTuneFileSearch(void) const + { + return _tuneFileSearch; + } + + /** + * Needed for the Cache::BackingStore interface. + */ + bool read(const Key & key, LookupResultVector & result); +}; + +void swap(DiskIndex::LookupResult & a, DiskIndex::LookupResult & b); + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp new file mode 100644 index 00000000000..258eaac51cf --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.disktermblueprint"); + +#include "disktermblueprint.h" +#include +#include +#include +#include +#include + +using search::BitVectorIterator; +using search::fef::TermFieldMatchDataArray; +using search::index::Schema; +using search::queryeval::BooleanMatchIteratorWrapper; +using search::queryeval::FieldSpecBase; +using search::queryeval::FieldSpecBaseList; +using search::queryeval::SearchIterator; +using search::queryeval::LeafBlueprint; +using search::queryeval::EquivBlueprint; +using search::queryeval::Blueprint; + +namespace search { +namespace diskindex { + +namespace { + +vespalib::string +getName(uint32_t indexId) +{ + return vespalib::make_string("fieldId(%u)", indexId); +} + +} + +DiskTermBlueprint::DiskTermBlueprint(const FieldSpecBase & field, + const search::diskindex::DiskIndex & diskIndex, + search::diskindex::DiskIndex::LookupResult::UP lookupRes, + bool useBitVector) : + SimpleLeafBlueprint(field), + _field(field), + _diskIndex(diskIndex), + _lookupRes(std::move(lookupRes)), + _useBitVector(useBitVector), + _fetchPostingsDone(false), + _hasEquivParent(false), + _postingHandle(), + _bitVector() +{ + setEstimate(HitEstimate(_lookupRes->counts._numDocs, + _lookupRes->counts._numDocs == 0)); +} + + +DiskTermBlueprint::DiskTermBlueprint(const DiskTermBlueprint & rhs) + : SimpleLeafBlueprint(rhs), + _field(rhs._field), + _diskIndex(rhs._diskIndex), + _lookupRes(new DiskIndex::LookupResult(*rhs._lookupRes)), + _useBitVector(rhs._useBitVector), + _fetchPostingsDone(rhs._fetchPostingsDone), + _hasEquivParent(rhs._hasEquivParent), + _postingHandle(), + _bitVector() +{ + if (_fetchPostingsDone) { + if (rhs._bitVector.get() != NULL) + _bitVector = BitVector::create(*rhs._bitVector); + if (_bitVector.get() == NULL) { + _postingHandle = _diskIndex.readPostingList(*_lookupRes); + } + } +} + +namespace { + +bool +areAnyParentsEquiv(const Blueprint * node) +{ + return (node == NULL) + ? false + : (dynamic_cast(node) != NULL) + ? true + : areAnyParentsEquiv(node->getParent()); +} + +} + +void +DiskTermBlueprint::fetchPostings(bool strict) +{ + (void) strict; + _hasEquivParent = areAnyParentsEquiv(getParent()); + _bitVector = _diskIndex.readBitVector(*_lookupRes); + if (!_useBitVector || (_bitVector.get() == NULL)) { + _postingHandle = _diskIndex.readPostingList(*_lookupRes); + } + _fetchPostingsDone = true; +} + +SearchIterator::UP +DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda, bool strict) const +{ + if ((_bitVector.get() != NULL) && (_useBitVector || (tfmda[0]->isNotNeeded() && !_hasEquivParent))) { + LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", + getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs); + return BitVectorIterator::create(_bitVector.get(), tfmda, strict); + } + SearchIterator::UP search(_postingHandle->createIterator(_lookupRes->counts, tfmda, _useBitVector)); + if (_useBitVector) { + LOG(debug, "Return BooleanMatchIteratorWrapper: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", + getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs); + return SearchIterator::UP(new BooleanMatchIteratorWrapper(std::move(search), tfmda)); + } + LOG(debug, "Return posting list iterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")", + getName(_lookupRes->indexId).c_str(), _lookupRes->wordNum, _lookupRes->counts._numDocs); + return search; +} + +} // namespace diskindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h new file mode 100644 index 00000000000..f1790cd0cbd --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "diskindex.h" +#include + +namespace search { +namespace diskindex { + +/** + * Blueprint implementation for term searching in a disk index. + **/ +class DiskTermBlueprint : public search::queryeval::SimpleLeafBlueprint +{ +private: + search::queryeval::FieldSpecBase _field; + const search::diskindex::DiskIndex & _diskIndex; + DiskIndex::LookupResult::UP _lookupRes; + bool _useBitVector; + bool _fetchPostingsDone; + bool _hasEquivParent; + search::index::PostingListHandle::UP _postingHandle; + search::BitVector::UP _bitVector; + +public: + /** + * Create a new blueprint. + * + * @param field the field to search in. + * @param diskIndex the disk index used to read the bit vector or posting list. + * @param lookupRes the result after disk dictionary lookup. + * @param useBitVector whether or not we should use bit vector. + **/ + DiskTermBlueprint(const search::queryeval::FieldSpecBase & field, + const search::diskindex::DiskIndex & diskIndex, + search::diskindex::DiskIndex::LookupResult::UP lookupRes, + bool useBitVector); + + DiskTermBlueprint(const DiskTermBlueprint &); + + // Inherit doc from Blueprint. + // For now, this DiskTermBlueprint instance must have longer lifetime than the created iterator. + virtual search::queryeval::SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray & tfmda, bool strict) const; + + virtual void + fetchPostings(bool strict); +}; + +} // namespace diskindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp b/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp new file mode 100644 index 00000000000..073c4b79031 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.docidmapper"); +#include +#include +#include +#include +#include "docidmapper.h" + +#define NO_DOC static_cast(-1) + +namespace search +{ + +namespace diskindex +{ + + +DocIdMapping::DocIdMapping(void) + : _docIdLimit(0u), + _selector(NULL), + _selectorId(0) +{ +} + + +void +DocIdMapping::clear(void) +{ + _docIdLimit = 0; + _selector = NULL; + _selectorId = 0; +} + + +void +DocIdMapping::setup(uint32_t docIdLimit) +{ + _docIdLimit = docIdLimit; + _selector = NULL; + _selectorId = 0; +} + + +void +DocIdMapping::setup(uint32_t docIdLimit, + const SelectorArray *selector, + uint8_t selectorId) +{ + _docIdLimit = docIdLimit; + _selector = selector; + _selectorId = selectorId; +} + + +bool +DocIdMapping::readDocIdLimit(const vespalib::string &mergedDir) +{ + uint32_t docIdLimit = 0; + if (!search::docsummary::DocumentSummary:: + readDocIdLimit(mergedDir, docIdLimit)) + return false; + _docIdLimit = docIdLimit; + return true; +} + + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/docidmapper.h b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h new file mode 100644 index 00000000000..43e1ea44b89 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/docidmapper.h @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search +{ + +class BitVector; + +namespace diskindex +{ + +typedef vespalib::Array SelectorArray; + +class DocIdMapping +{ +public: + uint32_t _docIdLimit; + const SelectorArray *_selector; // External ownership + uint8_t _selectorId; + + DocIdMapping(void); + + void + clear(void); + + void + setup(uint32_t docIdLimit); + + void + setup(uint32_t docIdLimit, + const SelectorArray *selector, + uint8_t selectorId); + + bool + readDocIdLimit(const vespalib::string &dir); +}; + + +class DocIdMapper +{ +public: + const uint8_t *_selector; + uint32_t _docIdLimit; // Limit on legal input values + uint32_t _selectorLimit; // Limit on output + uint8_t _selectorId; + + DocIdMapper(void) + : _selector(NULL), + _docIdLimit(0u), + _selectorLimit(0), + _selectorId(0u) + { + } + + void + setup(const DocIdMapping &mapping) + { + _selector = (mapping._selector != NULL) ? + &((*mapping._selector)[0]) : NULL; + _docIdLimit = mapping._docIdLimit; + _selectorLimit = (mapping._selector != NULL) ? + (*mapping._selector).size() : + 0u; + _selectorId = mapping._selectorId; + } + + static uint32_t + noDocId(void) + { + return static_cast(-1); + } + + uint32_t + mapDocId(uint32_t docId) const + { + assert(docId < _docIdLimit); + if (_selector != NULL && + (docId >= _selectorLimit || _selector[docId] != _selectorId)) { + docId = noDocId(); + } + return docId; + } +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp new file mode 100644 index 00000000000..9eaac550192 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include + +#include "extposocc.h" +#include +#include +#include +#include +#include +#include "zcposocc.h" +#include "fileheader.h" + +LOG_SETUP(".diskindex.extposocc"); + +using search::index::PostingListFileSeqRead; +using search::index::PostingListFileSeqWrite; +using search::index::PostingListCountFileSeqRead; +using search::index::PostingListCountFileSeqWrite; +using search::index::DocIdAndFeatures; +using search::index::WordDocElementFeatures; +using search::index::WordDocElementWordPosFeatures; +using search::index::PostingListCounts; +using search::index::PostingListParams; +using search::index::Schema; + +namespace +{ + +vespalib::string PosOccIdCooked = "PosOcc.1.Cooked"; + +} + +namespace search +{ + +namespace diskindex +{ + +void +setupDefaultPosOccParameters(PostingListParams *countParams, + PostingListParams *params, + uint64_t numWordIds, + uint32_t docIdLimit) +{ + params->set("minSkipDocs", 64u); + params->set("minChunkDocs", 262144u); + + countParams->set("numWordIds", numWordIds); + /* + * ZcPosOcc interleaved min: 2 + 1 + 2 + 1 = 6, assuming k == 1 + * for both docid delta and wordpos delta, i.e. average docsize is + * less than 8. + */ + countParams->set("avgBitsPerDoc", static_cast(27)); + countParams->set("minChunkDocs", static_cast(262144)); + countParams->set("docIdLimit", docIdLimit); +} + + +PostingListFileSeqWrite * +makePosOccWrite(const vespalib::string &name, + PostingListCountFileSeqWrite *const posOccCountWrite, + bool dynamicK, + const PostingListParams ¶ms, + const PostingListParams &featureParams, + const Schema &schema, + uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite) +{ + PostingListFileSeqWrite *posOccWrite = NULL; + + FileHeader fileHeader; + if (fileHeader.taste(name, tuneFileWrite)) { + if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + ZcPosOccSeqRead::getIdentifier() && + fileHeader.getFormats()[1] == + ZcPosOccSeqRead::getSubIdentifier()) { + dynamicK = true; + } else if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + Zc4PosOccSeqRead::getIdentifier() && + fileHeader.getFormats()[1] == + Zc4PosOccSeqRead::getSubIdentifier()) { + dynamicK = false; + } else { + LOG(warning, + "Could not detect format for posocc file write %s", + name.c_str()); + } + } + if (dynamicK) + posOccWrite = new ZcPosOccSeqWrite(schema, indexId, posOccCountWrite); + else + posOccWrite = + new Zc4PosOccSeqWrite(schema, indexId, posOccCountWrite); + + posOccWrite->setFeatureParams(featureParams); + posOccWrite->setParams(params); + return posOccWrite; +} + + +PostingListFileSeqRead * +makePosOccRead(const vespalib::string &name, + PostingListCountFileSeqRead *const posOccCountRead, + bool dynamicK, + const PostingListParams &featureParams, + const TuneFileSeqRead &tuneFileRead) +{ + PostingListFileSeqRead *posOccRead = NULL; + + FileHeader fileHeader; + if (fileHeader.taste(name, tuneFileRead)) { + if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + ZcPosOccSeqRead::getIdentifier() && + fileHeader.getFormats()[1] == + ZcPosOccSeqRead::getSubIdentifier()) { + dynamicK = true; + } else if (fileHeader.getVersion() == 1 && + fileHeader.getBigEndian() && + fileHeader.getFormats().size() == 2 && + fileHeader.getFormats()[0] == + Zc4PosOccSeqRead::getIdentifier() && + fileHeader.getFormats()[1] == + Zc4PosOccSeqRead::getSubIdentifier()) { + dynamicK = false; + } else { + LOG(warning, + "Could not detect format for posocc file read %s", + name.c_str()); + } + } + if (dynamicK) + posOccRead = new ZcPosOccSeqRead(posOccCountRead); + else + posOccRead = new Zc4PosOccSeqRead(posOccCountRead); + + posOccRead->setFeatureParams(featureParams); + return posOccRead; +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.h b/searchlib/src/vespa/searchlib/diskindex/extposocc.h new file mode 100644 index 00000000000..1deb788d488 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search +{ + +class TuneFileSeqRead; +class TuneFileSeqWrite; + +namespace index { + +class PostingListParams; +class PostingListCountFileSeqWrite; +class PostingListCountFileSeqRead; +class PostingListFileSeqWrite; +class PostingListFileSeqRead; +class Schema; + +} + +namespace diskindex +{ + + +void +setupDefaultPosOccParameters(index::PostingListParams *countParams, + index::PostingListParams *params, + uint64_t numWordIds, + uint32_t docIdLimit); + +index::PostingListFileSeqWrite * +makePosOccWrite(const vespalib::string &name, + index::PostingListCountFileSeqWrite *const posOccCountWrite, + bool dynamicK, + const index::PostingListParams ¶ms, + const index::PostingListParams &featureParams, + const index::Schema &schema, + uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite); + +index::PostingListFileSeqRead * +makePosOccRead(const vespalib::string &name, + index::PostingListCountFileSeqRead *const posOccCountRead, + bool dynamicK, + const index::PostingListParams &featureParams, + const TuneFileSeqRead &tuneFileRead); + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp new file mode 100644 index 00000000000..279a73935f5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -0,0 +1,385 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include "fieldreader.h" +#include "zcposocc.h" +#include "extposocc.h" +#include +#include "pagedict4file.h" +LOG_SETUP(".diskindex.fieldreader"); + +#define NO_DOC static_cast(-1) + +namespace +{ + +vespalib::string PosOccIdCooked = "PosOcc.3.Cooked"; + +} + +using vespalib::getLastErrorString; +using search::index::Schema; +using search::index::SchemaUtil; +using search::bitcompression::PosOccFieldParams; +using search::bitcompression::PosOccFieldsParams; + +namespace search +{ + +namespace diskindex +{ + + +FieldReader::FieldReader(void) + : _wordNum(noWordNumHigh()), + _docIdAndFeatures(), + _dictFile(), + _oldposoccfile(), + _wordNumMapper(), + _docIdMapper(), + _oldWordNum(noWordNumHigh()), + _residue(0u), + _docIdLimit(0u), + _checkPointResume(false), + _word() +{ +} + + +FieldReader::~FieldReader(void) +{ +} + + +void +FieldReader::readCounts(void) +{ + PostingListCounts counts; + _dictFile->readWord(_word, _oldWordNum, counts); + _oldposoccfile->readCounts(counts); + if (_oldWordNum != noWordNumHigh()) { + _wordNum = _wordNumMapper.map(_oldWordNum); + assert(_wordNum != noWordNum()); + assert(_wordNum != noWordNumHigh()); + _residue = counts._numDocs; + } else + _wordNum = _oldWordNum; +} + + +void +FieldReader::readDocIdAndFeatures(void) +{ + _oldposoccfile->readDocIdAndFeatures(_docIdAndFeatures); + _docIdAndFeatures._docId = _docIdMapper.mapDocId(_docIdAndFeatures._docId); +} + + +void +FieldReader::read(void) +{ + for (;;) { + while (_residue == 0) { + readCounts(); + if (_wordNum == noWordNumHigh()) { + assert(_residue == 0); + _docIdAndFeatures._docId = NO_DOC; + return; + } + } + --_residue; + readDocIdAndFeatures(); + if (_docIdAndFeatures._docId != NO_DOC) + return; + } +} + + +bool +FieldReader::allowRawFeatures(void) +{ + return true; +} + + +void +FieldReader::setup(const WordNumMapping &wordNumMapping, + const DocIdMapping &docIdMapping) +{ + _wordNumMapper.setup(wordNumMapping); + _docIdMapper.setup(docIdMapping); +} + + +bool +FieldReader::earlyOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + vespalib::string name = prefix + "posocc.dat.compressed"; + FastOS_StatInfo statInfo; + bool statres; + + bool dynamicKPosOccFormat = false; // Will autodetect anyway + statres = FastOS_File::Stat(name.c_str(), &statInfo); + if (!statres) { + LOG(error, + "Could not stat compressed posocc file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + + _dictFile.reset(new search::diskindex::PageDict4FileSeqRead); + PostingListParams featureParams; + _oldposoccfile.reset(search::diskindex::makePosOccRead(name, + _dictFile.get(), + dynamicKPosOccFormat, + featureParams, + tuneFileRead)); + return true; +} + + +bool +FieldReader::lateOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + vespalib::string cname = prefix + "dictionary"; + vespalib::string name = prefix + "posocc.dat.compressed"; + + if (!_dictFile->open(cname, tuneFileRead)) { + LOG(error, + "Could not open posocc count file %s for read", + cname.c_str()); + } + + // open posocc.dat + if (!_oldposoccfile->open(name, tuneFileRead)) { + LOG(error, + "Could not open posocc file %s for read", + name.c_str()); + } + if (!_checkPointResume) { + _oldWordNum = noWordNum(); + _wordNum = _oldWordNum; + PostingListParams params; + _oldposoccfile->getParams(params); + params.get("docIdLimit", _docIdLimit); + } + return true; +} + + +bool +FieldReader::open(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + if (!earlyOpen(prefix, tuneFileRead)) + return false; + return lateOpen(prefix, tuneFileRead); +} + + +bool +FieldReader::close(void) +{ + bool ret = true; + + if (_oldposoccfile) { + bool closeRes = _oldposoccfile->close(); + if (!closeRes) { + LOG(error, + "Could not close posocc file for read"); + ret = false; + } + _oldposoccfile.reset(); + } + if (_dictFile) { + bool closeRes = _dictFile->close(); + if (!closeRes) { + LOG(error, + "Could not close posocc file for read"); + ret = false; + } + _dictFile.reset(); + } + + return ret; +} + + +void +FieldReader::checkPointWrite(vespalib::nbostream &out) +{ + out << _wordNum << _oldWordNum; + out << _residue << _docIdAndFeatures; + out << _docIdLimit; + out << _word; + _oldposoccfile->checkPointWrite(out); + _dictFile->checkPointWrite(out); +} + +void +FieldReader::checkPointRead(vespalib::nbostream &in) +{ + in >> _wordNum >> _oldWordNum; + in >> _residue >> _docIdAndFeatures; + in >> _docIdLimit; + in >> _word; + _oldposoccfile->checkPointRead(in); + _dictFile->checkPointRead(in); + _checkPointResume = true; +} + +void +FieldReader::setFeatureParams(const PostingListParams ¶ms) +{ + _oldposoccfile->setFeatureParams(params); +} + + +void +FieldReader::getFeatureParams(PostingListParams ¶ms) +{ + _oldposoccfile->getFeatureParams(params); +} + + +std::unique_ptr +FieldReader::allocFieldReader(const SchemaUtil::IndexIterator &index, + const Schema &oldSchema) +{ + assert(index.isValid()); + if (index.hasMatchingOldFields(oldSchema, false)) + return std::make_unique(); // The common case + if (!index.hasOldFields(oldSchema, false)) + return std::make_unique(index); // drop data + // field exists in old schema with different collection type setting + return std::make_unique(index); // degraded +} + + +FieldReaderEmpty::FieldReaderEmpty(const IndexIterator &index) + : _index(index) +{ +} + + +bool +FieldReaderEmpty::earlyOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + (void) prefix; + (void) tuneFileRead; + return true; +} + + +bool +FieldReaderEmpty::lateOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + (void) prefix; + (void) tuneFileRead; + return true; +} + + +bool +FieldReaderEmpty::open(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) +{ + (void) prefix; + (void) tuneFileRead; + return true; +} + + +void +FieldReaderEmpty::getFeatureParams(PostingListParams ¶ms) +{ + PosOccFieldsParams fieldsParams; + fieldsParams.setSchemaParams(_index.getSchema(), _index.getIndex()); + params.clear(); + fieldsParams.getParams(params); +} + + +FieldReaderStripInfo::FieldReaderStripInfo(const IndexIterator &index) + : _hasElements(false), + _hasElementWeights(false) +{ + PosOccFieldsParams fieldsParams; + fieldsParams.setSchemaParams(index.getSchema(), index.getIndex()); + assert(fieldsParams.getNumFields() > 0); + const PosOccFieldParams &fieldParams = fieldsParams.getFieldParams()[0]; + _hasElements = fieldParams._hasElements; + _hasElementWeights = fieldParams._hasElementWeights; +} + + +bool +FieldReaderStripInfo::allowRawFeatures(void) +{ + return false; +} + + +void +FieldReaderStripInfo::read(void) +{ + typedef search::index::WordDocElementFeatures Element; + + for (;;) { + FieldReader::read(); + DocIdAndFeatures &features = _docIdAndFeatures; + if (_wordNum == noWordNumHigh()) + return; + assert(!features.getRaw()); + uint32_t numElements = features._elements.size(); + assert(numElements > 0); + std::vector::iterator element = + features._elements.begin(); + if (_hasElements) { + if (!_hasElementWeights) { + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++element) { + element->setWeight(1); + } + assert(element == features._elements.end()); + } + } else { + if (element->getElementId() != 0) + continue; // Drop this entry, try to read new entry + element->setWeight(1); + features._wordPositions.resize(element->getNumOccs()); + if (numElements > 1) { + features._elements.resize(1); + } + } + break; + } +} + + +void +FieldReaderStripInfo::getFeatureParams(PostingListParams ¶ms) +{ + FieldReader::getFeatureParams(params); + vespalib::string paramsPrefix = PosOccFieldParams::getParamsPrefix(0); + vespalib::string collStr = paramsPrefix + ".collectionType"; + if (_hasElements) { + if (_hasElementWeights) + params.setStr(collStr, "weightedSet"); + else + params.setStr(collStr, "array"); + } else + params.setStr(collStr, "single"); + params.erase("encoding"); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.h b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h new file mode 100644 index 00000000000..b3cf6446419 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.h @@ -0,0 +1,216 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include "wordnummapper.h" +#include "docidmapper.h" +#include "fieldwriter.h" + +namespace search +{ + +namespace diskindex +{ + +class FieldReaderFieldInfo; + +/* + * FieldReader is used to read a dictionary and posting list file + * together, and get a sequential view of the stored data. + * + * It can use mappings for word numbers and document ids to skip + * documents that are logically removed and use shared word numbers + * with other field readers. + * + * It is used by the fusion code as one of many input objects connected + * to a FieldWriter class that writes the merged output for the field. + */ +class FieldReader +{ + FieldReader(const FieldReader &rhs) = delete; + FieldReader(const FieldReader &&rhs) = delete; + FieldReader &operator=(const FieldReader &rhs) = delete; + FieldReader &operator=(const FieldReader &&rhs) = delete; +public: + using DictionaryFileSeqRead = index::DictionaryFileSeqRead; + + typedef index::PostingListFileSeqRead PostingListFileSeqRead; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::Schema Schema; + typedef index::SchemaUtil::IndexIterator IndexIterator; + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListParams PostingListParams; + + uint64_t _wordNum; + DocIdAndFeatures _docIdAndFeatures; +protected: + std::unique_ptr _dictFile; + std::unique_ptr _oldposoccfile; + WordNumMapper _wordNumMapper; + DocIdMapper _docIdMapper; + uint64_t _oldWordNum; + uint32_t _residue; + uint32_t _docIdLimit; + bool _checkPointResume; + vespalib::string _word; + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNum(void) + { + return 0u; + } + + void + readCounts(void); + + void + readDocIdAndFeatures(void); + +public: + FieldReader(void); + + virtual + ~FieldReader(void); + + virtual void + read(void); + + virtual bool + allowRawFeatures(void); + + void + write(FieldWriter &writer) + { + if (_wordNum != writer.getSparseWordNum()) { + writer.newWord(_wordNum, _word); + } + writer.add(_docIdAndFeatures); + } + + bool + isValid(void) const + { + return _wordNum != noWordNumHigh(); + } + + bool + operator<(const FieldReader &rhs) const + { + return _wordNum < rhs._wordNum || + (_wordNum == rhs._wordNum && + _docIdAndFeatures._docId < rhs._docIdAndFeatures._docId); + } + + virtual void + setup(const WordNumMapping &wordNumMapping, + const DocIdMapping &docIdMapping); + + virtual bool + earlyOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead); + + virtual bool + lateOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead); + + virtual bool + open(const vespalib::string &prefix, const TuneFileSeqRead &tuneFileRead); + + virtual bool + close(void); + + /* + * To be called between words, not in the middle of one. + */ + virtual void + checkPointWrite(vespalib::nbostream &out); + + /* + * To be called after earlyOpen() but before afterOpen(). + */ + virtual void + checkPointRead(vespalib::nbostream &in); + + virtual void + setFeatureParams(const PostingListParams ¶ms); + + virtual void + getFeatureParams(PostingListParams ¶ms); + + uint32_t + getDocIdLimit(void) const + { + return _docIdLimit; + } + + static std::unique_ptr + allocFieldReader(const IndexIterator &index, const Schema &oldSchema); +}; + + +/* + * Field reader that pretends that input is empty, e.g. due to field + * not existing in source or being incompatible. + */ +class FieldReaderEmpty : public FieldReader +{ +private: + const IndexIterator _index; + +public: + FieldReaderEmpty(const IndexIterator &index); + + virtual bool + earlyOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) override; + + virtual bool + lateOpen(const vespalib::string &prefix, + const TuneFileSeqRead &tuneFileRead) override; + + virtual bool + open(const vespalib::string &prefix, const TuneFileSeqRead &tuneFileRead) + override; + + virtual void + getFeatureParams(PostingListParams ¶ms) override; +}; + +/* + * Field reader that strips information from source, e.g. remove + * weights or discard nonzero elements, due to collection type change. + */ +class FieldReaderStripInfo : public FieldReader +{ +private: + bool _hasElements; + bool _hasElementWeights; +public: + FieldReaderStripInfo(const IndexIterator &index); + + virtual bool + allowRawFeatures(void) override; + + virtual void + read(void) override; + + virtual void + getFeatureParams(PostingListParams ¶ms) override; +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp new file mode 100644 index 00000000000..7449a946286 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.cpp @@ -0,0 +1,258 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include "fieldwriter.h" +#include "zcposocc.h" +#include "extposocc.h" +#include +#include "pagedict4file.h" +LOG_SETUP(".diskindex.fieldwriter"); + +namespace search +{ + +namespace diskindex +{ + +using vespalib::nbostream; +using vespalib::getLastErrorString; +using common::FileHeaderContext; + +FieldWriter::FieldWriter(uint32_t docIdLimit, + uint64_t numWordIds) + : _wordNum(noWordNum()), + _prevDocId(0), + _dictFile(), + _posoccfile(), + _bvc(docIdLimit), + _bmapfile(BitVectorKeyScope::PERFIELD_WORDS), + _docIdLimit(docIdLimit), + _numWordIds(numWordIds), + _prefix(), + _compactWordNum(0), + _word() +{ +} + + +void +FieldWriter::earlyOpen(const vespalib::string &prefix, + uint32_t minSkipDocs, + uint32_t minChunkDocs, + bool dynamicKPosOccFormat, + const Schema &schema, + const uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite) +{ + _prefix = prefix; + vespalib::string name = prefix + "posocc.dat.compressed"; + + PostingListParams params; + PostingListParams featureParams; + PostingListParams countParams; + + diskindex::setupDefaultPosOccParameters(&countParams, + ¶ms, + _numWordIds, + _docIdLimit); + + if (minSkipDocs != 0) { + countParams.set("minSkipDocs", minSkipDocs); + params.set("minSkipDocs", minSkipDocs); + } + if (minChunkDocs != 0) { + countParams.set("minChunkDocs", minChunkDocs); + params.set("minChunkDocs", minChunkDocs); + } + + _dictFile.reset(new PageDict4FileSeqWrite); + _dictFile->setParams(countParams); + + _posoccfile.reset(diskindex::makePosOccWrite(name, + _dictFile.get(), + dynamicKPosOccFormat, + params, + featureParams, + schema, + indexId, + tuneFileWrite)); +} + + +bool +FieldWriter::lateOpen(const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + vespalib::string cname = _prefix + "dictionary"; + vespalib::string name = _prefix + "posocc.dat.compressed"; + + // Open output dictionary file + if (!_dictFile->open(cname, tuneFileWrite, fileHeaderContext)) { + LOG(error, "Could not open posocc count file %s for write: %s", + cname.c_str(), getLastErrorString().c_str()); + return false; + } + + // Open output posocc.dat file + if (!_posoccfile->open(name, tuneFileWrite, fileHeaderContext)) { + LOG(error, "Could not open posocc file %s for write: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + + // Open output boolocc.bdat file + vespalib::string booloccbidxname = _prefix + "boolocc"; + _bmapfile.open(booloccbidxname.c_str(), _docIdLimit, tuneFileWrite, + fileHeaderContext); + + return true; +} + + +void +FieldWriter::flush(void) +{ + _posoccfile->flushWord(); + PostingListCounts &counts = _posoccfile->getCounts(); + if (counts._numDocs != 0) { + assert(_compactWordNum != 0); + _dictFile->writeWord(_word, counts); + // Write bitmap entries + if (_bvc.getCrossedBitVectorLimit()) + _bmapfile.addWordSingle(_compactWordNum, _bvc.getBitVector()); + _bvc.clear(); + counts.clear(); + } else { + assert(counts._bitLength == 0); + assert(_bvc.empty()); + assert(_compactWordNum == 0); + } +} + + +void +FieldWriter::newWord(uint64_t wordNum, const vespalib::stringref &word) +{ + assert(wordNum <= _numWordIds); + assert(wordNum != noWordNum()); + assert(wordNum > _wordNum); + flush(); + _wordNum = wordNum; + ++_compactWordNum; + _word = word; + _prevDocId = 0; +} + + +void +FieldWriter::newWord(const vespalib::stringref &word) +{ + newWord(_wordNum + 1, word); +} + + +bool +FieldWriter::close(void) +{ + bool ret = true; + flush(); + _wordNum = noWordNum(); + if (_posoccfile) { + bool closeRes = _posoccfile->close(); + if (!closeRes) { + LOG(error, + "Could not close posocc file for write"); + ret = false; + } + _posoccfile.reset(); + } + if (_dictFile) { + bool closeRes = _dictFile->close(); + if (!closeRes) { + LOG(error, + "Could not close posocc count file for write"); + ret = false; + } + _dictFile.reset(); + } + + _bmapfile.close(); + return ret; +} + + +void +FieldWriter::checkPointWrite(nbostream &out) +{ + out << _wordNum << _prevDocId; + out << _docIdLimit << _numWordIds; + out << _compactWordNum << _word; + _posoccfile->checkPointWrite(out); + _dictFile->checkPointWrite(out); + _bvc.checkPointWrite(out); + _bmapfile.checkPointWrite(out); +} + + +void +FieldWriter::checkPointRead(nbostream &in) +{ + in >> _wordNum >> _prevDocId; + uint32_t checkDocIdLimit = 0; + uint64_t checkNumWordIds = 0; + in >> checkDocIdLimit >> checkNumWordIds; + assert(checkDocIdLimit == _docIdLimit); + assert(checkNumWordIds == _numWordIds); + in >> _compactWordNum >> _word; + _posoccfile->checkPointRead(in); + _dictFile->checkPointRead(in); + _bvc.checkPointRead(in); + _bmapfile.checkPointRead(in); +} + + +void +FieldWriter::setFeatureParams(const PostingListParams ¶ms) +{ + _posoccfile->setFeatureParams(params); +} + + +void +FieldWriter::getFeatureParams(PostingListParams ¶ms) +{ + _posoccfile->getFeatureParams(params); +} + + +static const char *termOccNames[] = +{ + "boolocc.bdat", + "boolocc.bidx", + "boolocc.idx", + "posocc.ccnt", + "posocc.cnt", + "posocc.dat.compressed", + "dictionary.pdat", + "dictionary.spdat", + "dictionary.ssdat", + "dictionary.words", + NULL, +}; + + +void +FieldWriter::remove(const vespalib::string &prefix) +{ + for (const char **j = termOccNames; *j != NULL; ++j) { + vespalib::string tmpName = prefix + *j; + FastOS_File::Delete(tmpName.c_str()); + } +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h new file mode 100644 index 00000000000..dfed6036405 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include "bitvectorfile.h" + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +namespace diskindex +{ + +/* + * FieldWriter is used to write a dictionary and posting list file + * together. + * + * It is used by the fusion code to write the merged output for a field, + * and by the memory index dump code to write a field to disk. + */ +class FieldWriter +{ +private: + FieldWriter(const FieldWriter &rhs) = delete; + FieldWriter(const FieldWriter &&rhs) = delete; + FieldWriter &operator=(const FieldWriter &rhs) = delete; + FieldWriter &operator=(const FieldWriter &&rhs) = delete; + + uint64_t _wordNum; + uint32_t _prevDocId; + + static uint64_t + noWordNum(void) + { + return 0u; + } +public: + + using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite; + + typedef index::PostingListFileSeqWrite PostingListFileSeqWrite; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::Schema Schema; + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListParams PostingListParams; + + std::unique_ptr _dictFile; + std::unique_ptr _posoccfile; +private: + BitVectorCandidate _bvc; + BitVectorFileWrite _bmapfile; + uint32_t _docIdLimit; + uint64_t _numWordIds; + vespalib::string _prefix; + uint64_t _compactWordNum; + vespalib::string _word; + + void + flush(void); + +public: + FieldWriter(uint32_t docIdLimit, + uint64_t numWordIds); + + void + newWord(uint64_t wordNum, const vespalib::stringref &word); + + void + newWord(const vespalib::stringref &word); + + void + add(const DocIdAndFeatures &features) + { + assert(features._docId < _docIdLimit); + assert(features._docId > _prevDocId); + _posoccfile->writeDocIdAndFeatures(features); + _bvc.add(features._docId); + _prevDocId = features._docId; + } + + uint64_t + getSparseWordNum() const + { + return _wordNum; + } + + void + earlyOpen(const vespalib::string &prefix, + uint32_t minSkipDocs, + uint32_t minChunkDocs, + bool dynamicKPosOccFormat, + const Schema &schema, + uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite); + + bool + lateOpen(const TuneFileSeqWrite &tuneFileWrite, + const search::common::FileHeaderContext &fileHeaderContext); + + bool + close(void); + + /* + * To be called between words, not in the middle of one. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /* + * To be called after earlyOpen() but before afterOpen(). + */ + void + checkPointRead(vespalib::nbostream &in); + + void + setFeatureParams(const PostingListParams ¶ms); + + void + getFeatureParams(PostingListParams ¶ms); + + static void + remove(const vespalib::string &prefix); +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp b/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp new file mode 100644 index 00000000000..5c67ea5f064 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fileheader.cpp @@ -0,0 +1,165 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "fileheader.h" +#include +#include +#include + +LOG_SETUP(".diskindex.fileheader"); + +namespace search +{ + +namespace diskindex +{ + +using bitcompression::FeatureDecodeContextBE; + +FileHeader::FileHeader(void) + : _bigEndian(false), + _hostEndian(false), + _completed(false), + _allowNoFileBitSize(false), + _version(0), + _headerLen(0), + _fileBitSize(0), + _formats() +{ +} + + +FileHeader::~FileHeader(void) +{ +} + + +static inline uint32_t +bswap(uint32_t val) +{ + __asm__("bswap %0" : "=r" (val) : "0" (val)); + return val; +} + + +bool +FileHeader::taste(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) +{ + vespalib::FileHeader header; + FastOS_File file; + + if (tuneFileRead.getWantDirectIO()) + file.EnableDirectIO(); + bool res = file.OpenReadOnly(name.c_str()); + if (!res) { + return false; + } + + uint32_t headerLen = 0u; + uint64_t fileSize = file.GetSize(); + try { + headerLen = header.readFile(file); + assert(headerLen >= header.getSize()); + (void) headerLen; + } catch (vespalib::IllegalHeaderException &e) { + if (e.getMessage() != "Failed to read header info." && + e.getMessage() != "Failed to verify magic bits.") { + LOG(error, + "FileHeader::tastGeneric(\"%s\") exception: %s", + name.c_str(), + e.getMessage().c_str()); + } + file.Close(); + return false; + } + file.Close(); + + _version = 1; + _headerLen = headerLen; + _bigEndian = htonl(1) == 1; + if (header.hasTag("endian")) { + vespalib::string endian(header.getTag("endian").asString()); + if (endian == "big") { + _bigEndian = true; + } else if (endian == "little") { + _bigEndian = false; + } else { + LOG(error, + "Bad endian: %s", + endian.c_str()); + return false; + } + } + _hostEndian = _bigEndian == (htonl(1) == 1); + if (header.hasTag("frozen")) { + _completed = header.getTag("frozen").asInteger() != 0; + } else { + LOG(error, + "FileHeader::taste(\"%s\"): Missing frozen tag", + name.c_str()); + return false; + } + if (header.hasTag("fileBitSize")) { + _fileBitSize = header.getTag("fileBitSize").asInteger(); + if (_completed && _fileBitSize < 8 * _headerLen) { + LOG(error, + "FileHeader::taste(\"%s\"): " + "fleBitSize(%" PRIu64 ") < 8 * headerLen(%u)", + name.c_str(), + _fileBitSize, _headerLen); + return false; + } + if (_completed && _fileBitSize > 8 * fileSize) { + LOG(error, + "FileHeader::taste(\"%s\"): " + "fleBitSize(%" PRIu64 ") > 8 * fileSize(%" PRIu64 ")", + name.c_str(), + _fileBitSize, fileSize); + abort(); + } + } else if (!_allowNoFileBitSize) { + LOG(error, + "FileHeader::taste(\"%s\"): Missing fileBitSize tag", + name.c_str()); + return false; + } + for (uint32_t i = 0; ;++i) { + vespalib::asciistream as; + as << "format." << i; + vespalib::stringref key(as.str()); + if (!header.hasTag(key)) + break; + _formats.push_back(header.getTag(key).asString()); + } + return true; +} + + +bool +FileHeader::taste(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite) +{ + TuneFileSeqRead tuneFileRead; + if (tuneFileWrite.getWantDirectIO()) + tuneFileRead.setWantDirectIO(); + return taste(name, tuneFileRead); +} + + +bool +FileHeader::taste(const vespalib::string &name, + const TuneFileRandRead &tuneFileSearch) +{ + TuneFileSeqRead tuneFileRead; + if (tuneFileSearch.getWantDirectIO()) + tuneFileRead.setWantDirectIO(); + return taste(name, tuneFileRead); +} + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/fileheader.h b/searchlib/src/vespa/searchlib/diskindex/fileheader.h new file mode 100644 index 00000000000..05db2d22e10 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fileheader.h @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +class FileHeader +{ +private: + bool _bigEndian; + bool _hostEndian; + bool _completed; + bool _allowNoFileBitSize; + uint32_t _version; + uint32_t _headerLen; + uint64_t _fileBitSize; + std::vector _formats; + +public: + FileHeader(void); + + ~FileHeader(void); + + bool + taste(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead); + + bool + taste(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite); + + bool + taste(const vespalib::string &name, + const TuneFileRandRead &tuneFileSearch); + + bool + getBigEndian(void) const + { + return _bigEndian; + } + + bool + getHostEndian(void) const + { + return _hostEndian; + } + + uint32_t + getVersion(void) const + { + return _version; + } + + uint32_t + getHeaderLen(void) const + { + return _headerLen; + } + + const std::vector & + getFormats(void) const + { + return _formats; + } + + bool + getCompleted() const + { + return _completed; + } + + void + setAllowNoFileBitSize() + { + _allowNoFileBitSize = true; + } +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.cpp b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp new file mode 100644 index 00000000000..eb6e4c9dad5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fusion.cpp @@ -0,0 +1,606 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2003 Fast Search & Transfer ASA + +#include +#include +#include "fusion.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "fieldreader.h" +#include "fieldwriter.h" +#include "dictionarywordreader.h" + +LOG_SETUP(".diskindex.fusion"); + +using search::FileKit; +using search::index::PostingListParams; +using search::index::Schema; +using search::index::SchemaUtil; +using search::common::FileHeaderContext; +using search::diskindex::DocIdMapping; +using search::diskindex::WordNumMapping; +using search::PostingPriorityQueue; +using search::docsummary::DocumentSummary; +using vespalib::getLastErrorString; + + +namespace search +{ + +namespace diskindex +{ + + +void +FusionInputIndex::setSchema(const Schema::SP &schema) +{ + _schema = schema; +} + +Fusion::Fusion(bool dynamicKPosIndexFormat, + const TuneFileIndexing &tuneFileIndexing, + const FileHeaderContext &fileHeaderContext) + : _schema(NULL), + _oldIndexes(), + _docIdLimit(0u), + _numWordIds(0u), + _dynamicKPosIndexFormat(dynamicKPosIndexFormat), + _outDir("merged"), + _tuneFileIndexing(tuneFileIndexing), + _fileHeaderContext(fileHeaderContext) +{ +} + + +Fusion::~Fusion() +{ + ReleaseMappingTables(); +} + + +void +Fusion::setSchema(const Schema *schema) +{ + _schema = schema; +} + + +void +Fusion::setOutDir(const vespalib::string &outDir) +{ + _outDir = outDir; +} + + +void +Fusion::SetOldIndexList(const std::vector &oldIndexList) +{ + _oldIndexes.resize(oldIndexList.size()); + OldIndexIterator oldIndexIt = _oldIndexes.begin(); + uint32_t i = 0; + for (std::vector::const_iterator + it = oldIndexList.begin(), ite = oldIndexList.end(); + it != ite; + ++it, ++oldIndexIt, ++i) { + oldIndexIt->reset(allocOldIndex()); + OldIndex &oi = **oldIndexIt; + oi.setPath(*it); + std::ostringstream tmpindexpath0; + tmpindexpath0 << _outDir; + tmpindexpath0 << "/tmpindex"; + tmpindexpath0 << i; + oi.setTmpPath(tmpindexpath0.str()); + } +} + + +bool +Fusion::openInputWordReaders(const SchemaUtil::IndexIterator &index, + std::vector< + std::unique_ptr > & + readers, + PostingPriorityQueue &heap) +{ + for (auto &i : getOldIndexes()) { + OldIndex &oi = *i; + auto reader(std::make_unique()); + const vespalib::string &tmpindexpath = oi.getTmpPath(); + const vespalib::string &oldindexpath = oi.getPath(); + vespalib::string wordMapName = tmpindexpath + "/old2new.dat"; + vespalib::string fieldDir(oldindexpath + "/" + index.getName()); + vespalib::string dictName(fieldDir + "/dictionary"); + const Schema &oldSchema = oi.getSchema(); + if (!index.hasOldFields(oldSchema, false)) { + continue; // drop data + } + bool res = reader->open(dictName, + wordMapName, + _tuneFileIndexing._read); + if (!res) { + LOG(error, "Could not open dictionary %s to generate %s", + dictName.c_str(), wordMapName.c_str()); + return false; + } + reader->read(); + if (reader->isValid()) { + readers.push_back(std::move(reader)); + heap.initialAdd(readers.back().get()); + } + } + return true; +} + + +bool +Fusion::renumberFieldWordIds(const SchemaUtil::IndexIterator &index) +{ + vespalib::string indexName = index.getName(); + LOG(debug, "Renumber word IDs for field %s", indexName.c_str()); + + std::vector> readers; + PostingPriorityQueue heap; + WordAggregator out; + + if (!openInputWordReaders(index, readers, heap)) + return false; + + + heap.merge(out, 4); + assert(heap.empty()); + _numWordIds = out.getWordNum(); + + // Close files + for (auto &i : readers) { + i->close(); + } + + // Now read mapping files back into an array + // XXX: avoid this, and instead make the array here + if (!ReadMappingFiles(&index)) + return false; + + LOG(debug, "Finished renumbering words IDs for field %s", + indexName.c_str()); + + return true; +} + + +bool +Fusion::mergeFields() +{ + typedef SchemaUtil::IndexIterator IndexIterator; + + const Schema &schema = getSchema(); + for (IndexIterator index(schema); index.isValid(); ++index) { + if (!mergeField(index.getIndex())) + return false; + } + return true; +} + + +bool +Fusion::mergeField(uint32_t id) +{ + typedef SchemaUtil::IndexIterator IndexIterator; + typedef SchemaUtil::IndexSettings IndexSettings; + + const Schema &schema = getSchema(); + IndexIterator index(schema, id); + const vespalib::string &indexName = index.getName(); + IndexSettings settings = index.getIndexSettings(); + if (settings.hasError()) + return false; + vespalib::string indexDir = _outDir + "/" + indexName; + + if (FileKit::hasStamp(indexDir + "/.mergeocc_done")) + return true; + + vespalib::mkdir(indexDir.c_str(), false); + + LOG(debug, "mergeField for field %s dir %s", + indexName.c_str(), indexDir.c_str()); + + makeTmpDirs(); + + if (!renumberFieldWordIds(index)) { + LOG(error, "Could not renumber field word ids for field %s dir %s", + indexName.c_str(), indexDir.c_str()); + return false; + } + + // Tokamak + bool res = mergeFieldPostings(index); + if (!res) { + LOG(error, "Could not merge field postings for field %s dir %s", + indexName.c_str(), indexDir.c_str()); + abort(); + } + if (!FileKit::createStamp(indexDir + "/.mergeocc_done")) + return false; + + if (!CleanTmpDirs()) + return false; + + LOG(debug, "Finished mergeField for field %s dir %s", + indexName.c_str(), indexDir.c_str()); + + return true; +} + +template +bool +Fusion::selectCookedOrRawFeatures(Reader &reader, Writer &writer) +{ + bool rawFormatOK = true; + bool cookedFormatOK = true; + PostingListParams featureParams; + PostingListParams outFeatureParams; + vespalib::string cookedFormat; + vespalib::string rawFormat; + + if (!reader.isValid()) + return true; + { + writer.getFeatureParams(featureParams); + cookedFormat = featureParams.getStr("cookedEncoding"); + rawFormat = featureParams.getStr("encoding"); + if (rawFormat == "") + rawFormatOK = false; // Typically uncompressed file + outFeatureParams = featureParams; + } + { + reader.getFeatureParams(featureParams); + if (cookedFormat != featureParams.getStr("cookedEncoding")) + cookedFormatOK = false; + if (rawFormat != featureParams.getStr("encoding")) + rawFormatOK = false; + if (featureParams != outFeatureParams) + rawFormatOK = false; + if (!reader.allowRawFeatures()) + rawFormatOK = false; // Reader transforms data + } + if (!cookedFormatOK) { + LOG(error, + "Cannot perform fusion, cooked feature formats don't match"); + return false; + } + if (rawFormatOK) { + featureParams.clear(); + featureParams.set("cooked", false); + reader.setFeatureParams(featureParams); + reader.getFeatureParams(featureParams); + if (featureParams.isSet("cookedEncoding") || + rawFormat != featureParams.getStr("encoding")) + rawFormatOK = false; + if (!rawFormatOK) { + LOG(error, "Cannot perform fusion, raw format setting failed"); + return false; + } + LOG(debug, "Using raw feature format for fusion of posting files"); + } + return true; +} + + +bool +Fusion::openInputFieldReaders(const SchemaUtil::IndexIterator &index, + std::vector > & + readers) +{ + vespalib::string indexName = index.getName(); + for (auto &i : _oldIndexes) { + OldIndex &oi = *i; + const Schema &oldSchema = oi.getSchema(); + if (!index.hasOldFields(oldSchema, false)) { + continue; // drop data + } + auto reader = FieldReader::allocFieldReader(index, oldSchema); + reader->setup(oi.getWordNumMapping(), + oi.getDocIdMapping()); + if (!reader->open(oi.getPath() + "/" + + indexName + "/", + _tuneFileIndexing._read)) + return false; + readers.push_back(std::move(reader)); + } + return true; +} + + +bool +Fusion::openFieldWriter(const SchemaUtil::IndexIterator &index, + FieldWriter &writer) +{ + vespalib::string dir = _outDir + "/" + index.getName(); + + writer.earlyOpen(dir + "/", + 64, + 262144, + _dynamicKPosIndexFormat, + index.getSchema(), + index.getIndex(), + _tuneFileIndexing._write); + // No checkpointing + if (!writer.lateOpen(_tuneFileIndexing._write, + _fileHeaderContext)) { + LOG(error, "Could not open output posocc + dictionary in %s", + dir.c_str()); + abort(); + return false; + } + return true; +} + + +bool +Fusion::setupMergeHeap(const std::vector > & + readers, + FieldWriter &writer, + PostingPriorityQueue &heap) +{ + for (auto &reader : readers) { + if (!selectCookedOrRawFeatures(*reader, writer)) + return false; + if (reader->isValid()) + reader->read(); + if (reader->isValid()) + heap.initialAdd(reader.get()); + } + return true; +} + + +bool +Fusion::mergeFieldPostings(const SchemaUtil::IndexIterator &index) +{ + std::vector> readers; + PostingPriorityQueue heap; + /* OUTPUT */ + FieldWriter fieldWriter(_docIdLimit, _numWordIds); + vespalib::string indexName = index.getName(); + + if (!openInputFieldReaders(index, readers)) + return false; + if (!openFieldWriter(index, fieldWriter)) + return false; + if (!setupMergeHeap(readers, fieldWriter, heap)) + return false; + + heap.merge(fieldWriter, 4); + assert(heap.empty()); + + for (auto &reader : readers) { + if (!reader->close()) + return false; + } + if (!fieldWriter.close()) { + LOG(error, "Could not close output posocc + dictionary in %s/%s", + _outDir.c_str(), indexName.c_str()); + abort(); + } + return true; +} + + +bool +Fusion::ReadMappingFiles(const SchemaUtil::IndexIterator *index) +{ + ReleaseMappingTables(); + + size_t numberOfOldIndexes = _oldIndexes.size(); + for (uint32_t i = 0; i < numberOfOldIndexes; i++) + { + OldIndex &oi = *_oldIndexes[i]; + WordNumMapping &wordNumMapping = oi.getWordNumMapping(); + std::vector oldIndexes; + const Schema &oldSchema = oi.getSchema(); + if (!SchemaUtil::getIndexIds(oldSchema, + Schema::STRING, + oldIndexes)) + return false; + if (oldIndexes.empty()) { + wordNumMapping.noMappingFile(); + continue; + } + if (index && !index->hasOldFields(oldSchema, false)) { + continue; // drop data + } + + // Open word mapping file + vespalib::string old2newname = oi.getTmpPath() + + "/old2new.dat"; + wordNumMapping.readMappingFile(old2newname, _tuneFileIndexing._read); + } + + return true; +} + + +bool +Fusion::ReleaseMappingTables() +{ + size_t numberOfOldIndexes = _oldIndexes.size(); + for (uint32_t i = 0; i < numberOfOldIndexes; i++) + { + OldIndex &oi = *_oldIndexes[i]; + oi.getWordNumMapping().clear(); + } + return true; +} + + +void +Fusion::makeTmpDirs() +{ + for (auto &i : getOldIndexes()) { + OldIndex &oi = *i; + // Make tmpindex directories + const vespalib::string &tmpindexpath = oi.getTmpPath(); + vespalib::mkdir(tmpindexpath, false); + } +} + +bool +Fusion::CleanTmpDirs(void) +{ + uint32_t i = 0; + for (;;) { + std::ostringstream tmpindexpath0; + tmpindexpath0 << _outDir; + tmpindexpath0 << "/tmpindex"; + tmpindexpath0 << i; + const vespalib::string &tmpindexpath = tmpindexpath0.str(); + FastOS_StatInfo statInfo; + if (!FastOS_File::Stat(tmpindexpath.c_str(), &statInfo)) { + if (statInfo._error == FastOS_StatInfo::FileNotFound) + break; + LOG(error, "Failed to stat tmpdir %s", tmpindexpath.c_str()); + return false; + } + i++; + } + while (i > 0) { + i--; + // Remove tmpindex directories + std::ostringstream tmpindexpath0; + tmpindexpath0 << _outDir; + tmpindexpath0 << "/tmpindex"; + tmpindexpath0 << i; + const vespalib::string &tmpindexpath = tmpindexpath0.str(); + search::DirectoryTraverse dt(tmpindexpath.c_str()); + if (!dt.RemoveTree()) { + LOG(error, "Failed to clean tmpdir %s", tmpindexpath.c_str()); + return false; + } + } + return true; +} + + +bool +Fusion::checkSchemaCompat(void) +{ + return true; +} + + +bool +Fusion::readSchemaFiles(void) +{ + OldIndexIterator oldIndexIt = _oldIndexes.begin(); + OldIndexIterator oldIndexIte = _oldIndexes.end(); + + for(; oldIndexIt != oldIndexIte; ++oldIndexIt) { + OldIndex &oi = **oldIndexIt; + vespalib::string oldcfname = oi.getPath() + "/schema.txt"; + Schema::SP schema(new Schema); + if (!schema->loadFromFile(oldcfname)) + return false; + if (!SchemaUtil::validateSchema(*_schema)) + return false; + oi.setSchema(schema); + } + + /* TODO: Check compatibility */ + bool res = checkSchemaCompat(); + if (!res) + LOG(error, "Index fusion cannot continue due to incompatible indexes"); + return res; +} + + +bool +Fusion::merge(const Schema &schema, + const vespalib::string &dir, + const std::vector &sources, + const SelectorArray &selector, + bool dynamicKPosOccFormat, + const TuneFileIndexing &tuneFileIndexing, + const FileHeaderContext &fileHeaderContext) +{ + assert(sources.size() <= 255); + uint32_t docIdLimit = selector.size(); + uint32_t trimmedDocIdLimit = docIdLimit; + + // Limit docIdLimit in output based on selections that cannot be satisfied + uint32_t sourcesSize = sources.size(); + while (trimmedDocIdLimit > 0 && + selector[trimmedDocIdLimit - 1] >= sourcesSize) + --trimmedDocIdLimit; + + FastOS_StatInfo statInfo; + if (!FastOS_File::Stat(dir.c_str(), &statInfo)) { + if (statInfo._error != FastOS_StatInfo::FileNotFound) { + LOG(error, "Could not stat \"%s\"", dir.c_str()); + return false; + } + } else { + if (!statInfo._isDirectory) { + LOG(error, "\"%s\" is not a directory", dir.c_str()); + return false; + } + search::DirectoryTraverse dt(dir.c_str()); + if (!dt.RemoveTree()) { + LOG(error, "Failed to clean directory \"%s\"", dir.c_str()); + return false; + } + } + + vespalib::mkdir(dir, false); + schema.saveToFile(dir + "/schema.txt"); + if (!DocumentSummary::writeDocIdLimit(dir, trimmedDocIdLimit)) { + LOG(error, "Could not write docsum count in dir %s: %s", + dir.c_str(), getLastErrorString().c_str()); + return false; + } + + std::unique_ptr fusion(new Fusion(dynamicKPosOccFormat, + tuneFileIndexing, + fileHeaderContext)); + fusion->setSchema(&schema); + fusion->setOutDir(dir); + fusion->SetOldIndexList(sources); + if (!fusion->readSchemaFiles()) { + LOG(error, "Cannot read schema files for source indexes"); + return false; + } + uint32_t idx = 0; + std::vector > &oldIndexes = + fusion->getOldIndexes(); + + for (OldIndexIterator i = oldIndexes.begin(), ie = oldIndexes.end(); + i != ie; ++i, ++idx) { + OldIndex &oi = **i; + // Make tmpindex directories + const vespalib::string &tmpindexpath = oi.getTmpPath(); + vespalib::mkdir(tmpindexpath, false); + DocIdMapping &docIdMapping = oi.getDocIdMapping(); + if (!docIdMapping.readDocIdLimit(oi.getPath())) { + LOG(error, "Cannot determine docIdLimit for old index \"%s\"", + oi.getPath().c_str()); + return false; + } + docIdMapping.setup(docIdMapping._docIdLimit, + &selector, + idx); + } + fusion->setDocIdLimit(trimmedDocIdLimit); + if (!fusion->mergeFields()) + return false; + return true; +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/fusion.h b/searchlib/src/vespa/searchlib/diskindex/fusion.h new file mode 100644 index 00000000000..1cc23c61f10 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/fusion.h @@ -0,0 +1,265 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include "docidmapper.h" +#include "wordnummapper.h" + +#include +#include +#include + +namespace search +{ + +template +class PostingPriorityQueue; + +namespace common +{ + +class TuneFileIndexing; +class FileHeaderContext; + +} + +namespace diskindex +{ + +class FieldReader; +class FieldWriter; +class DictionaryWordReader; + +class FusionInputIndex +{ +public: + typedef diskindex::WordNumMapping WordNumMapping; + typedef diskindex::DocIdMapping DocIdMapping; +private: + vespalib::string _path; + WordNumMapping _wordNumMapping; + DocIdMapping _docIdMapping; + vespalib::string _tmpPath; + index::Schema::SP _schema; + +public: + FusionInputIndex(void) + : _path(), + _wordNumMapping(), + _docIdMapping(), + _tmpPath(), + _schema() + { + } + + virtual + ~FusionInputIndex(void) + { + } + + void + setPath(const vespalib::string &path) + { + _path = path; + } + + const vespalib::string & + getPath(void) const + { + return _path; + } + + void + setTmpPath(const vespalib::string &tmpPath) + { + _tmpPath = tmpPath; + } + + const vespalib::string & + getTmpPath(void) const + { + return _tmpPath; + } + + const WordNumMapping & + getWordNumMapping(void) const + { + return _wordNumMapping; + } + + WordNumMapping & + getWordNumMapping(void) + { + return _wordNumMapping; + } + + const DocIdMapping & + getDocIdMapping(void) const + { + return _docIdMapping; + } + + DocIdMapping & + getDocIdMapping(void) + { + return _docIdMapping; + } + + const index::Schema & + getSchema(void) const + { + assert(_schema.get() != NULL); + return *_schema.get(); + } + + void + setSchema(const index::Schema::SP &schema); +}; + + +class Fusion +{ +public: + typedef search::index::Schema Schema; + typedef search::index::SchemaUtil SchemaUtil; + +private: + Fusion(const Fusion &); + Fusion& operator=(const Fusion &); + +public: + Fusion(bool dynamicKPosIndexFormat, + const TuneFileIndexing &tuneFileIndexing, + const search::common::FileHeaderContext &fileHeaderContext); + + virtual + ~Fusion(void); + + void SetOldIndexList(const std::vector &oldIndexList); + + bool mergeFields(); + bool mergeField(uint32_t id); + bool openInputFieldReaders(const SchemaUtil::IndexIterator &index, + std::vector > & + readers); + bool openFieldWriter(const SchemaUtil::IndexIterator &index, + FieldWriter &writer); + bool setupMergeHeap(const std::vector > & + readers, + FieldWriter &writer, + PostingPriorityQueue &heap); + bool mergeFieldPostings(const SchemaUtil::IndexIterator &index); + bool openInputWordReaders(const SchemaUtil::IndexIterator &index, + std::vector< + std::unique_ptr > & + readers, + PostingPriorityQueue &heap); + bool renumberFieldWordIds(const SchemaUtil::IndexIterator &index); + + void + setSchema(const Schema *schema); + + void + setOutDir(const vespalib::string &outDir); + + void makeTmpDirs(); + + bool CleanTmpDirs(void); + + bool + readSchemaFiles(void); + + bool + checkSchemaCompat(void); + + template + static bool + selectCookedOrRawFeatures(Reader &reader, Writer &writer); + +protected: + bool ReadMappingFiles(const SchemaUtil::IndexIterator *index); + bool ReleaseMappingTables(); + + static unsigned int noGen(void) + { + return static_cast(-1); + } + +protected: + + typedef FusionInputIndex OldIndex; + + const Schema *_schema; // External ownership + std::vector > _oldIndexes; + typedef std::vector >::iterator + OldIndexIterator; + + // OUTPUT: + + uint32_t _docIdLimit; + uint64_t _numWordIds; + + // Index format parameters. + bool _dynamicKPosIndexFormat; + + // Index location parameters + + /* + * Output location + */ + vespalib::string _outDir; + + const TuneFileIndexing &_tuneFileIndexing; + const search::common::FileHeaderContext &_fileHeaderContext; + + const Schema & + getSchema(void) const + { + assert(_schema != NULL); + return *_schema; + } +public: + + void + setDocIdLimit(uint32_t docIdLimit) + { + _docIdLimit = docIdLimit; + } + + void + setNumWordIds(uint64_t numWordIds) + { + _numWordIds = numWordIds; + } + + std::vector > & + getOldIndexes(void) + { + return _oldIndexes; + } + + virtual OldIndex * + allocOldIndex(void) + { + return new OldIndex; + } + + /** + * This method is used by new indexing pipeline to merge indexes. + */ + static bool + merge(const Schema &schema, + const vespalib::string &dir, + const std::vector &sources, + const SelectorArray &docIdSelector, + bool dynamicKPosOccFormat, + const TuneFileIndexing &tuneFileIndexing, + const search::common::FileHeaderContext &fileHeaderContext); +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp new file mode 100644 index 00000000000..a88ce029814 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp @@ -0,0 +1,720 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "indexbuilder.h" +#include +#include +#include +#include +#include + +LOG_SETUP(".diskindex.indexbuilder"); + + +namespace search +{ + +namespace diskindex +{ + +namespace +{ + +using index::DocIdAndFeatures; +using index::PostingListCounts; +using index::Schema; +using index::SchemaUtil; +using index::WordDocElementFeatures; +using common::FileHeaderContext; +using vespalib::getLastErrorString; + +static uint32_t +noWordPos(void) +{ + return std::numeric_limits::max(); +} + + +class FileHandle +{ +public: + FieldWriter *_fieldWriter; + DocIdAndFeatures _docIdAndFeatures; + + FileHandle(void); + + ~FileHandle(void); + + void + open(const vespalib::stringref &dir, + const SchemaUtil::IndexIterator &index, + uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext); + + void + close(void); +}; + + +} + +inline IndexBuilder::FieldHandle & +IndexBuilder::getIndexFieldHandle(uint32_t fieldId) +{ + return _fields[fieldId]; +} + + +class IndexBuilder::FieldHandle +{ +public: + FieldHandle(const Schema &schema, + uint32_t fieldId, + IndexBuilder *ib); + + ~FieldHandle(void); + + static uint32_t + noDocRef(void) + { + return std::numeric_limits::max(); + } + + static uint32_t + noElRef(void) + { + return std::numeric_limits::max(); + } + + class FHWordDocFieldFeatures + { + public: + uint32_t _docId; + uint32_t _numElements; + + FHWordDocFieldFeatures(uint32_t docId) + : _docId(docId), + _numElements(0u) + { + } + + uint32_t + getDocId(void) const + { + return _docId; + } + + uint32_t + getNumElements(void) const + { + return _numElements; + } + + void + incNumElements(void) + { + ++_numElements; + } + }; + + class FHWordDocElementFeatures + : public WordDocElementFeatures + { + public: + uint32_t _docRef; + + FHWordDocElementFeatures(uint32_t elementId, + int32_t weight, + uint32_t elementLen, + uint32_t docRef) + : WordDocElementFeatures(elementId), + _docRef(docRef) + { + setWeight(weight); + setElementLen(elementLen); + } + }; + + class FHWordDocElementWordPosFeatures + : public WordDocElementWordPosFeatures + { + public: + uint32_t _elementRef; + + FHWordDocElementWordPosFeatures( + const WordDocElementWordPosFeatures &features, + uint32_t elementRef) + : WordDocElementWordPosFeatures(features), + _elementRef(elementRef) + { + } + }; + + typedef vespalib::Array FHWordDocFieldFeaturesVector; + typedef vespalib::Array FHWordDocElementFeaturesVector; + typedef vespalib::Array FHWordDocElementWordPosFeaturesVector; + + FHWordDocFieldFeaturesVector _wdff; + FHWordDocElementFeaturesVector _wdfef; + FHWordDocElementWordPosFeaturesVector _wdfepf; + + uint32_t _docRef; + uint32_t _elRef; + bool _valid; + const Schema *_schema; // Ptr to allow being std::vector member + uint32_t _fieldId; + IndexBuilder *_ib; // Ptr to allow being std::vector member + + uint32_t _lowestOKElementId; + uint32_t _lowestOKWordPos; + + FileHandle _files; + + void + startWord(const vespalib::stringref &word); + + void + endWord(void); + + void + startDocument(uint32_t docId); + + void + endDocument(void); + + void + startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen); + + void + endElement(void); + + void + addOcc(const WordDocElementWordPosFeatures &features); + + void + setValid(void) + { + _valid = true; + } + + bool + getValid(void) const + { + return _valid; + } + + const Schema::IndexField & + getSchemaField(void); + + const vespalib::string & + getName(void); + + vespalib::string + getDir(void); + + void + open(uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext); + + void + close(void); + + uint32_t + getIndexId(void) const + { + return _fieldId; + } +}; + + +namespace { + +class SingleIterator +{ +public: + typedef IndexBuilder::FieldHandle FH; + FH::FHWordDocFieldFeaturesVector::const_iterator _dFeatures; + FH::FHWordDocFieldFeaturesVector::const_iterator _dFeaturesE; + FH::FHWordDocElementFeaturesVector::const_iterator _elFeatures; + FH::FHWordDocElementWordPosFeaturesVector::const_iterator _pFeatures; + uint32_t _docId; + uint32_t _localFieldId; + + SingleIterator(FH &fieldHandle, uint32_t localFieldId); + + void + appendFeatures(DocIdAndFeatures &features); + + bool + isValid(void) const + { + return _dFeatures != _dFeaturesE; + } + + bool + operator<(const SingleIterator &rhs) const + { + if (_docId != rhs._docId) + return _docId < rhs._docId; + return _localFieldId < rhs._localFieldId; + } +}; + + +} + + +FileHandle::FileHandle(void) + : _fieldWriter(NULL), + _docIdAndFeatures() +{ +} + + +FileHandle::~FileHandle(void) +{ + delete _fieldWriter; +} + + +void +FileHandle::open(const vespalib::stringref &dir, + const SchemaUtil::IndexIterator &index, + uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + assert(_fieldWriter == NULL); + + _fieldWriter = new FieldWriter(docIdLimit, numWordIds); + + _fieldWriter->earlyOpen(dir + "/", 64, 262144u, false, + index.getSchema(), index.getIndex(), + tuneFileWrite); + + // No checkpointing + + if (!_fieldWriter->lateOpen(tuneFileWrite, fileHeaderContext)) { + LOG(error, "Could not open term writer %s for write (%s)", + dir.c_str(), getLastErrorString().c_str()); + abort(); + } +} + + +void +FileHandle::close(void) +{ + bool ret = true; + if (_fieldWriter != NULL) { + bool closeRes = _fieldWriter->close(); + delete _fieldWriter; + _fieldWriter = NULL; + if (!closeRes) { + LOG(error, + "Could not close term writer"); + ret = false; + } + } + assert(ret); + (void) ret; +} + + +IndexBuilder::FieldHandle::FieldHandle(const Schema &schema, + uint32_t fieldId, + IndexBuilder *ib) + : _wdff(), + _wdfef(), + _wdfepf(), + _docRef(noDocRef()), + _elRef(noElRef()), + _valid(false), + _schema(&schema), + _fieldId(fieldId), + _ib(ib), + _lowestOKElementId(0u), + _lowestOKWordPos(0u), + _files() +{ +} + + +IndexBuilder::FieldHandle::~FieldHandle(void) +{ +} + + +void +IndexBuilder::FieldHandle::startWord(const vespalib::stringref &word) +{ + assert(_valid); + _files._fieldWriter->newWord(word); +} + + +void +IndexBuilder::FieldHandle::endWord() +{ + DocIdAndFeatures &features = _files._docIdAndFeatures; + SingleIterator si(*this, 0u); + for (; si.isValid();) { + features.clear(si._docId); + si.appendFeatures(features); + _files._fieldWriter->add(features); + } + assert(si._elFeatures == _wdfef.end()); + assert(si._pFeatures == _wdfepf.end()); + _wdff.clear(); + _wdfef.clear(); + _wdfepf.clear(); + _docRef = noDocRef(); + _elRef = noElRef(); +} + + +void +IndexBuilder::FieldHandle::startDocument(uint32_t docId) +{ + assert(_docRef == noDocRef()); + assert(_wdff.empty() || _wdff.back().getDocId() < docId); + _wdff.push_back(FHWordDocFieldFeatures(docId)); + _docRef = _wdff.size() - 1; + _lowestOKElementId = 0u; +} + + +void +IndexBuilder::FieldHandle::endDocument(void) +{ + assert(_docRef != noDocRef()); + assert(_elRef == noElRef()); + FHWordDocFieldFeatures &ff = _wdff[_docRef]; + assert(ff.getNumElements() > 0); + (void) ff; + _docRef = noDocRef(); +} + + +void +IndexBuilder::FieldHandle:: +startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) +{ + assert(_docRef != noDocRef()); + assert(_elRef == noElRef()); + assert(elementId >= _lowestOKElementId); + + FHWordDocFieldFeatures &ff = _wdff[_docRef]; + _wdfef.push_back( + FHWordDocElementFeatures(elementId, + weight, + elementLen, + _docRef)); + ff.incNumElements(); + _elRef = _wdfef.size() - 1; + _lowestOKWordPos = 0u; +} + + +void +IndexBuilder::FieldHandle::endElement(void) +{ + assert(_elRef != noElRef()); + FHWordDocElementFeatures &ef = _wdfef[_elRef]; + assert(ef.getNumOccs() > 0); + _elRef = noElRef(); + _lowestOKElementId = ef.getElementId() + 1; +} + + +void +IndexBuilder::FieldHandle:: +addOcc(const WordDocElementWordPosFeatures &features) +{ + assert(_elRef != noElRef()); + FHWordDocElementFeatures &ef = _wdfef[_elRef]; + uint32_t wordPos = features.getWordPos(); + assert(wordPos < ef.getElementLen()); + assert(wordPos >= _lowestOKWordPos); + _lowestOKWordPos = wordPos; + _wdfepf.push_back( + FHWordDocElementWordPosFeatures(features, + _elRef)); + ef.incNumOccs(); +} + + +const Schema::IndexField & +IndexBuilder::FieldHandle::getSchemaField(void) +{ + return _schema->getIndexField(_fieldId); +} + + +const vespalib::string & +IndexBuilder::FieldHandle::getName(void) +{ + return getSchemaField().getName(); + +} + + +vespalib::string +IndexBuilder::FieldHandle::getDir(void) +{ + return _ib->appendToPrefix(getName()); +} + + +void +IndexBuilder::FieldHandle::open(uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + _files.open(getDir(), + SchemaUtil::IndexIterator(*_schema, getIndexId()), + docIdLimit, numWordIds, tuneFileWrite, fileHeaderContext); +} + + +void +IndexBuilder::FieldHandle::close(void) +{ + _files.close(); +} + + +SingleIterator::SingleIterator(FH &fieldHandle, uint32_t localFieldId) + : _dFeatures(fieldHandle._wdff.begin()), + _dFeaturesE(fieldHandle._wdff.end()), + _elFeatures(fieldHandle._wdfef.begin()), + _pFeatures(fieldHandle._wdfepf.begin()), + _docId(_dFeatures->getDocId()), + _localFieldId(localFieldId) +{ +} + + +void +SingleIterator::appendFeatures(DocIdAndFeatures &features) +{ + uint32_t elCount = _dFeatures->getNumElements(); + for (uint32_t elId = 0; elId < elCount; ++elId, ++_elFeatures) { + features._elements.push_back(*_elFeatures); + features._elements.back().setNumOccs(0); + uint32_t posCount = _elFeatures->getNumOccs(); + uint32_t lastWordPos = noWordPos(); + for (uint32_t posId = 0; posId < posCount; ++posId, ++_pFeatures) { + uint32_t wordPos = _pFeatures->getWordPos(); + if (wordPos != lastWordPos) { + lastWordPos = wordPos; + features._elements.back().incNumOccs(); + features._wordPositions.push_back(*_pFeatures); + } + } + } + ++_dFeatures; + if (_dFeatures != _dFeaturesE) + _docId = _dFeatures->getDocId(); +} + + +IndexBuilder::IndexBuilder(const Schema &schema) + : index::IndexBuilder(schema), + _currentField(NULL), + _curDocId(noDocId()), + _lowestOKDocId(1u), + _curWord(), + _inWord(false), + _lowestOKFieldId(0u), + _fields(), + _prefix(), + _docIdLimit(0u), + _numWordIds(0u), + _schema(schema) +{ + // TODO: Filter for text indexes + for (uint32_t i = 0, ie = schema.getNumIndexFields(); i < ie; ++i) { + const Schema::IndexField &iField = schema.getIndexField(i); + FieldHandle fh(schema, i, this); + // Only know how to handle string index for now. + if (iField.getDataType() == Schema::STRING) + fh.setValid(); + _fields.push_back(fh); + } +} + + +IndexBuilder::~IndexBuilder(void) +{ +} + + +void +IndexBuilder::startWord(const vespalib::stringref &word) +{ + assert(_currentField != nullptr); + assert(!_inWord); + // TODO: Check sort order + _curWord = word; + _inWord = true; + _currentField->startWord(word); +} + + +void +IndexBuilder::endWord(void) +{ + assert(_inWord); + assert(_currentField != NULL); + _currentField->endWord(); + _inWord = false; + _lowestOKDocId = 1u; +} + + +void +IndexBuilder::startDocument(uint32_t docId) +{ + assert(_curDocId == noDocId()); + assert(docId >= _lowestOKDocId); + assert(docId < _docIdLimit); + assert(_currentField != NULL); + _curDocId = docId; + assert(_curDocId != noDocId()); + _currentField->startDocument(docId); +} + + +void +IndexBuilder::endDocument(void) +{ + assert(_curDocId != noDocId()); + assert(_currentField != NULL); + _currentField->endDocument(); + _lowestOKDocId = _curDocId + 1; + _curDocId = noDocId(); +} + + +void +IndexBuilder::startField(uint32_t fieldId) +{ + assert(_curDocId == noDocId()); + assert(_currentField == NULL); + assert(fieldId < _fields.size()); + assert(fieldId >= _lowestOKFieldId); + _currentField = &_fields[fieldId]; + assert(_currentField != NULL); +} + + +void +IndexBuilder::endField(void) +{ + assert(_curDocId == noDocId()); + assert(!_inWord); + assert(_currentField != NULL); + _lowestOKFieldId = _currentField->_fieldId + 1; + _currentField = NULL; +} + + +void +IndexBuilder::startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) +{ + assert(_currentField != NULL); + _currentField->startElement(elementId, weight, elementLen); +} + + +void +IndexBuilder::endElement(void) +{ + assert(_currentField != NULL); + _currentField->endElement(); +} + + +void +IndexBuilder::addOcc(const WordDocElementWordPosFeatures &features) +{ + assert(_currentField != NULL); + _currentField->addOcc(features); +} + + +void +IndexBuilder::setPrefix(const vespalib::stringref &prefix) +{ + _prefix = prefix; +} + + +vespalib::string +IndexBuilder::appendToPrefix(const vespalib::stringref &name) +{ + if (_prefix.empty()) + return name; + return _prefix + "/" + name; +} + + +void +IndexBuilder::open(uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileIndexing &tuneFileIndexing, + const FileHeaderContext &fileHeaderContext) +{ + std::vector indexes; + + _docIdLimit = docIdLimit; + _numWordIds = numWordIds; + if (!_prefix.empty()) { + vespalib::mkdir(_prefix, false); + } + // TODO: Filter for text indexes + for (FieldHandle & fh : _fields) { + if (!fh.getValid()) + continue; + vespalib::mkdir(fh.getDir(), false); + fh.open(docIdLimit, numWordIds, tuneFileIndexing._write, + fileHeaderContext); + indexes.push_back(fh.getIndexId()); + } + vespalib::string schemaFile = appendToPrefix("schema.txt"); + if (!_schema.saveToFile(schemaFile)) { + LOG(error, "Cannot save schema to \"%s\"", schemaFile.c_str()); + abort(); + } +} + + +void +IndexBuilder::close(void) +{ + // TODO: Filter for text indexes + for (FieldHandle & fh : _fields) { + if (fh.getValid()) { + fh.close(); + } + } + if (!docsummary::DocumentSummary::writeDocIdLimit(_prefix, _docIdLimit)) { + LOG(error, "Could not write docsum count in dir %s: %s", + _prefix.c_str(), getLastErrorString().c_str()); + abort(); + } +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h new file mode 100644 index 00000000000..cf8735154f8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace diskindex +{ + +class BitVectorCandidate; + +class IndexBuilder : public index::IndexBuilder +{ +public: + class FieldHandle; + + typedef index::Schema Schema; +private: + // Text fields + FieldHandle *_currentField; + uint32_t _curDocId; + uint32_t _lowestOKDocId; + vespalib::string _curWord; + bool _inWord; + uint32_t _lowestOKFieldId; + std::vector _fields; // Defined fields. + vespalib::string _prefix; + uint32_t _docIdLimit; + uint64_t _numWordIds; + + const Schema &_schema; // Ptr to allow being std::vector member + + static uint32_t + noDocId(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + +public: + typedef index::WordDocElementWordPosFeatures + WordDocElementWordPosFeatures; + + // schema argument must live until indexbuilder has been deleted. + IndexBuilder(const Schema &schema); + + virtual + ~IndexBuilder(void); + + virtual void + startWord(const vespalib::stringref &word) override; + + virtual void + endWord(void) override; + + virtual void + startDocument(uint32_t docId) override; + + virtual void + endDocument(void) override; + + virtual void + startField(uint32_t fieldId) override; + + virtual void + endField(void) override; + + virtual void + startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) + override; + + virtual void + endElement(void) override; + + virtual void + addOcc(const WordDocElementWordPosFeatures &features) override; + + // TODO: methods for attribute vectors. + + // TODO: methods for document summary. + inline FieldHandle & + getIndexFieldHandle(uint32_t fieldId); + + void + setPrefix(const vespalib::stringref &prefix); + + vespalib::string + appendToPrefix(const vespalib::stringref &name); + + void + open(uint32_t docIdLimit, uint64_t numWordIds, + const TuneFileIndexing &tuneFileIndexing, + const search::common::FileHeaderContext &fileHandleContext); + + void + close(void); +}; + + + +} // namespace diskindex + +} // namespace search + + + diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp new file mode 100644 index 00000000000..495654fa5cc --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.cpp @@ -0,0 +1,738 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.pagedict4file"); +#include "pagedict4file.h" +#include +#include +#include +#include + +namespace +{ + +vespalib::string myPId("PageDict4P.1"); +vespalib::string mySPId("PageDict4SP.1"); +vespalib::string mySSId("PageDict4SS.1"); +vespalib::string emptyId; + +} + +using search::common::FileHeaderContext; +using search::index::PostingListParams; +using vespalib::getLastErrorString; + +namespace search +{ + +namespace diskindex +{ + +namespace +{ + +const uint32_t headerAlign = 4096; + +} + +PageDict4FileSeqRead::PageDict4FileSeqRead(void) + : _pReader(NULL), + _ssReader(NULL), + _ssd(), + _ssReadContext(_ssd), + _ssfile(), + _spd(), + _spReadContext(_spd), + _spfile(), + _pd(), + _pReadContext(_pd), + _pfile(), + _ssFileBitSize(0u), + _spFileBitSize(0u), + _pFileBitSize(0u), + _ssHeaderLen(0u), + _spHeaderLen(0u), + _pHeaderLen(0u), + _ssCompleted(false), + _spCompleted(false), + _pCompleted(false), + _wordNum(0u), + _checkPointData() +{ + _ssd.setReadContext(&_ssReadContext); + _spd.setReadContext(&_spReadContext); + _pd.setReadContext(&_pReadContext); +} + + +PageDict4FileSeqRead::~PageDict4FileSeqRead(void) +{ + delete _pReader; + delete _ssReader; +} + + +void +PageDict4FileSeqRead::readSSHeader() +{ + DC &ssd = _ssd; + + vespalib::FileHeader header; + uint32_t headerLen = ssd.readHeader(header, _ssfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("numWordIds")); + assert(header.hasTag("avgBitsPerDoc")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("endian")); + _ssCompleted = header.getTag("frozen").asInteger() != 0; + _ssFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == mySSId); + ssd._numWordIds = header.getTag("numWordIds").asInteger(); + ssd._avgBitsPerDoc = header.getTag("avgBitsPerDoc").asInteger(); + ssd._minChunkDocs = header.getTag("minChunkDocs").asInteger(); + ssd._docIdLimit = header.getTag("docIdLimit").asInteger(); + + assert(header.getTag("endian").asString() == "big"); + ssd.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(ssd.getReadOffset() == headerLen * 8); + _ssHeaderLen = headerLen; +} + + +void +PageDict4FileSeqRead::readSPHeader(void) +{ + DC &spd = _spd; + + vespalib::FileHeader header; + uint32_t headerLen = spd.readHeader(header, _spfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("endian")); + _spCompleted = header.getTag("frozen").asInteger() != 0; + _spFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == mySPId); + assert(header.getTag("endian").asString() == "big"); + spd.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(spd.getReadOffset() == headerLen * 8); + _spHeaderLen = headerLen; +} + + +void +PageDict4FileSeqRead::readPHeader(void) +{ + DC &pd = _pd; + + vespalib::FileHeader header; + uint32_t headerLen = pd.readHeader(header, _pfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("endian")); + _pCompleted = header.getTag("frozen").asInteger() != 0; + _pFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == myPId); + assert(header.getTag("endian").asString() == "big"); + pd.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(pd.getReadOffset() == headerLen * 8); + _pHeaderLen = headerLen; +} + + +void +PageDict4FileSeqRead::readWord(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) +{ + // Map to external ids and filter by what's present in the schema. + uint64_t checkWordNum = 0; + _pReader->readCounts(word, checkWordNum, counts); + if (checkWordNum != noWordNumHigh()) { + wordNum = ++_wordNum; + assert(wordNum == checkWordNum); + } else { + wordNum = noWordNumHigh(); + counts.clear(); + } +} + + +bool +PageDict4FileSeqRead::open(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) +{ + if (tuneFileRead.getWantDirectIO()) { + _ssfile.EnableDirectIO(); + _spfile.EnableDirectIO(); + _pfile.EnableDirectIO(); + } + + vespalib::string pname = name + ".pdat"; + vespalib::string spname = name + ".spdat"; + vespalib::string ssname = name + ".ssdat"; + + if (!_ssfile.OpenReadOnly(ssname.c_str())) { + LOG(error, "could not open %s: %s", + _ssfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + if (!_spfile.OpenReadOnly(spname.c_str())) { + LOG(error, "could not open %s: %s", + _spfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + if (!_pfile.OpenReadOnly(pname.c_str())) { + LOG(error, "could not open %s: %s", + _pfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + + _spReadContext.setFile(&_spfile); + _spReadContext.setFileSize(_spfile.GetSize()); + _spReadContext.allocComprBuf(65536u, 32768u); + _spd.emptyBuffer(0); + + _pReadContext.setFile(&_pfile); + _pReadContext.setFileSize(_pfile.GetSize()); + _pReadContext.allocComprBuf(65536u, 32768u); + _pd.emptyBuffer(0); + + uint64_t fileSize = _ssfile.GetSize(); + _ssReadContext.setFile(&_ssfile); + _ssReadContext.setFileSize(fileSize); + _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / + sizeof(uint64_t), + 32768u); + _ssd.emptyBuffer(0); + + if (_checkPointData) { + _ssReadContext.setPosition(_ssReadContext.getCheckPointOffset()); + if (_ssd._valI >= _ssd._valE) + _ssReadContext.readComprBuffer(); + _spReadContext.setPosition(_spReadContext.getCheckPointOffset()); + if (_spd._valI >= _spd._valE) + _spReadContext.readComprBuffer(); + _pReadContext.setPosition(_pReadContext.getCheckPointOffset()); + if (_pd._valI >= _pd._valE) + _pReadContext.readComprBuffer(); + } else { + _ssReadContext.readComprBuffer(); + assert(_ssReadContext.getBufferEndFilePos() >= fileSize); + readSSHeader(); + _spReadContext.readComprBuffer(); + readSPHeader(); + _pReadContext.readComprBuffer(); + readPHeader(); + } + + + _ssReader = new SSReader(_ssReadContext, + _ssHeaderLen, + _ssFileBitSize, + _spHeaderLen, + _spFileBitSize, + _pHeaderLen, + _pFileBitSize); + + // Instantiate helper class for reading + _pReader = new Reader(*_ssReader, + _spd, + _pd); + + if (_checkPointData) { + _ssReader->checkPointRead(*_checkPointData); + _pReader->checkPointRead(*_checkPointData); + assert(_checkPointData->empty()); + } else { + _ssReader->setup(_ssd); + _pReader->setup(); + _wordNum = 0; + } + + return true; +} + + +bool +PageDict4FileSeqRead::close(void) +{ + delete _pReader; + delete _ssReader; + _pReader = NULL; + _ssReader = NULL; + + _ssReadContext.dropComprBuf(); + _spReadContext.dropComprBuf(); + _pReadContext.dropComprBuf(); + _ssReadContext.setFile(NULL); + _spReadContext.setFile(NULL); + _pReadContext.setFile(NULL); + _ssfile.Close(); + _spfile.Close(); + _pfile.Close(); + return true; +} + + +void +PageDict4FileSeqRead::checkPointWrite(vespalib::nbostream &out) +{ + _ssd.checkPointWrite(out); + _spReadContext.checkPointWrite(out); + _pReadContext.checkPointWrite(out); + vespalib::nbostream data; + _ssReader->checkPointWrite(data); + _pReader->checkPointWrite(data); + std::vector checkPointData(data.size()); + data.read(&checkPointData[0], data.size()); + out << checkPointData; + out << _wordNum; + out << _ssCompleted << _ssFileBitSize << _ssHeaderLen; + out << _spCompleted << _spFileBitSize << _spHeaderLen; + out << _pCompleted << _pFileBitSize << _pHeaderLen; +} + +void +PageDict4FileSeqRead::checkPointRead(vespalib::nbostream &in) +{ + _ssd.checkPointRead(in); + _spReadContext.checkPointRead(in); + _pReadContext.checkPointRead(in); + std::vector checkPointData; + in >> checkPointData; + _checkPointData.reset(new vespalib::nbostream(checkPointData.size())); + _checkPointData->write(&checkPointData[0], checkPointData.size()); + in >> _wordNum; + in >> _ssCompleted >> _ssFileBitSize >> _ssHeaderLen; + in >> _spCompleted >> _spFileBitSize >> _spHeaderLen; + in >> _pCompleted >> _pFileBitSize >> _pHeaderLen; +} + +void +PageDict4FileSeqRead::getParams(PostingListParams ¶ms) +{ + params.clear(); + params.set("avgBitsPerDoc", _ssd._avgBitsPerDoc); + params.set("minChunkDocs", _ssd._minChunkDocs); + params.set("docIdLimit", _ssd._docIdLimit); + params.set("numWordIds", _ssd._numWordIds); + params.set("numCounts", _ssd._numWordIds); +} + + +PageDict4FileSeqWrite::PageDict4FileSeqWrite(void) + : _pWriter(NULL), + _spWriter(NULL), + _ssWriter(NULL), + _pe(), + _pWriteContext(_pe), + _pfile(), + _spe(), + _spWriteContext(_spe), + _spfile(), + _sse(), + _ssWriteContext(_sse), + _ssfile(), + _pHeaderLen(0), + _spHeaderLen(0), + _ssHeaderLen(0) +{ + _pe.setWriteContext(&_pWriteContext); + _spe.setWriteContext(&_spWriteContext); + _sse.setWriteContext(&_ssWriteContext); +} + + +PageDict4FileSeqWrite::~PageDict4FileSeqWrite(void) +{ + delete _pWriter; + delete _spWriter; + delete _ssWriter; +} + + +void +PageDict4FileSeqWrite::writeWord(const vespalib::stringref &word, + const PostingListCounts &counts) +{ + _pWriter->addCounts(word, counts); +} + + +bool +PageDict4FileSeqWrite::open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + assert(_pWriter == NULL); + assert(_spWriter == NULL); + assert(_ssWriter == NULL); + + vespalib::string pname = name + ".pdat"; + vespalib::string spname = name + ".spdat"; + vespalib::string ssname = name + ".ssdat"; + + if (tuneFileWrite.getWantSyncWrites()) { + _pfile.EnableSyncWrites(); + _spfile.EnableSyncWrites(); + _ssfile.EnableSyncWrites(); + } + if (tuneFileWrite.getWantDirectIO()) { + _pfile.EnableDirectIO(); + _spfile.EnableDirectIO(); + _ssfile.EnableDirectIO(); + } + bool ok = _pfile.OpenWriteOnly(pname.c_str()); + assert(ok); + (void) ok; + _pWriteContext.setFile(&_pfile); + + ok = _spfile.OpenWriteOnly(spname.c_str()); + assert(ok); + _spWriteContext.setFile(&_spfile); + + ok = _ssfile.OpenWriteOnly(ssname.c_str()); + assert(ok); + _ssWriteContext.setFile(&_ssfile); + + if (!_checkPointData) { + _pWriteContext.allocComprBuf(65536u, 32768u); + _spWriteContext.allocComprBuf(65536u, 32768u); + _ssWriteContext.allocComprBuf(65536u, 32768u); + } + + uint64_t pFileSize = _pfile.GetSize(); + uint64_t spFileSize = _spfile.GetSize(); + uint64_t ssFileSize = _ssfile.GetSize(); + uint64_t pBufferStartFilePos = _pWriteContext.getBufferStartFilePos(); + uint64_t spBufferStartFilePos = _spWriteContext.getBufferStartFilePos(); + uint64_t ssBufferStartFilePos = _ssWriteContext.getBufferStartFilePos(); + assert(pFileSize >= pBufferStartFilePos); + assert(spFileSize >= spBufferStartFilePos); + assert(ssFileSize >= ssBufferStartFilePos); + _pfile.SetSize(pBufferStartFilePos); + _spfile.SetSize(spBufferStartFilePos); + _ssfile.SetSize(ssBufferStartFilePos); + assert(pBufferStartFilePos == static_cast(_pfile.GetPosition())); + assert(spBufferStartFilePos == + static_cast(_spfile.GetPosition())); + assert(ssBufferStartFilePos == + static_cast(_ssfile.GetPosition())); + + if (!_checkPointData) { + _pe.setupWrite(_pWriteContext); + _spe.setupWrite(_spWriteContext); + _sse.setupWrite(_ssWriteContext); + assert(_pe.getWriteOffset() == 0); + assert(_spe.getWriteOffset() == 0); + assert(_sse.getWriteOffset() == 0); + _spe.copyParams(_sse); + _pe.copyParams(_sse); + // Write initial file headers + makePHeader(fileHeaderContext); + makeSPHeader(fileHeaderContext); + makeSSHeader(fileHeaderContext); + } + + _ssWriter = new SSWriter(_sse); + _spWriter = new SPWriter(*_ssWriter, _spe); + _pWriter = new PWriter(*_spWriter, _pe); + if (_checkPointData) { + _ssWriter->checkPointRead(*_checkPointData); + _spWriter->checkPointRead(*_checkPointData); + _pWriter->checkPointRead(*_checkPointData); + assert(_checkPointData->empty()); + } else { + _spWriter->setup(); + _pWriter->setup(); + } + + return true; +} + + +bool +PageDict4FileSeqWrite::close(void) +{ + _pWriter->flush(); + uint64_t usedPBits = _pe.getWriteOffset(); + uint64_t usedSPBits = _spe.getWriteOffset(); + uint64_t usedSSBits = _sse.getWriteOffset(); + _pe.flush(); + _pWriteContext.writeComprBuffer(true); + _spe.flush(); + _spWriteContext.writeComprBuffer(true); + _sse.flush(); + _ssWriteContext.writeComprBuffer(true); + + _pWriteContext.dropComprBuf(); + _pfile.Sync(); + _pfile.Close(); + _pWriteContext.setFile(NULL); + _spWriteContext.dropComprBuf(); + _spfile.Sync(); + _spfile.Close(); + _spWriteContext.setFile(NULL); + _ssWriteContext.dropComprBuf(); + _ssfile.Sync(); + _ssfile.Close(); + _ssWriteContext.setFile(NULL); + + // Update file headers + updatePHeader(usedPBits); + updateSPHeader(usedSPBits); + updateSSHeader(usedSSBits); + + delete _pWriter; + delete _spWriter; + delete _ssWriter; + _pWriter = NULL; + _spWriter = NULL; + _ssWriter = NULL; + + return true; +} + + +void +PageDict4FileSeqWrite::writeSSSubHeader(vespalib::GenericHeader &header) +{ + SSEC &e = _sse; + typedef vespalib::GenericHeader::Tag Tag; + header.putTag(Tag("numWordIds", e._numWordIds)); + header.putTag(Tag("avgBitsPerDoc", e._avgBitsPerDoc)); + header.putTag(Tag("minChunkDocs", e._minChunkDocs)); + header.putTag(Tag("docIdLimit", e._docIdLimit)); +} + + +void +PageDict4FileSeqWrite::makePHeader(const FileHeaderContext &fileHeaderContext) +{ + PEC &e = _pe; + ComprFileWriteContext &wc = _pWriteContext; + + // subheader only written to SS file. + + typedef vespalib::GenericHeader::Tag Tag; + vespalib::FileHeader header(headerAlign); + + fileHeaderContext.addTags(header, _pfile.GetFileName()); + header.putTag(Tag("frozen", 0)); + header.putTag(Tag("fileBitSize", 0)); + header.putTag(Tag("format.0", myPId)); + header.putTag(Tag("endian", "big")); + header.putTag(Tag("desc", "Dictionary page file")); + e.setupWrite(wc); + e.writeHeader(header); + e.smallAlign(64); + e.flush(); + uint32_t headerLen = header.getSize(); + headerLen += (-headerLen & 7); + assert(e.getWriteOffset() == headerLen * 8); + assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned + if (_pHeaderLen != 0) { + assert(_pHeaderLen == headerLen); + } + _pHeaderLen = headerLen; +} + + +void +PageDict4FileSeqWrite::makeSPHeader(const FileHeaderContext &fileHeaderContext) +{ + SPEC &e = _spe; + ComprFileWriteContext &wc = _spWriteContext; + + // subheader only written to SS file. + + typedef vespalib::GenericHeader::Tag Tag; + vespalib::FileHeader header(headerAlign); + + fileHeaderContext.addTags(header, _spfile.GetFileName()); + header.putTag(Tag("frozen", 0)); + header.putTag(Tag("fileBitSize", 0)); + header.putTag(Tag("format.0", mySPId)); + header.putTag(Tag("endian", "big")); + header.putTag(Tag("desc", "Dictionary sparse page file")); + e.setupWrite(wc); + e.writeHeader(header); + e.smallAlign(64); + e.flush(); + uint32_t headerLen = header.getSize(); + headerLen += (-headerLen & 7); + assert(e.getWriteOffset() == headerLen * 8); + assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned + if (_spHeaderLen != 0) { + assert(_spHeaderLen == headerLen); + } + _spHeaderLen = headerLen; +} + + +void +PageDict4FileSeqWrite::makeSSHeader(const FileHeaderContext &fileHeaderContext) +{ + SSEC &e = _sse; + ComprFileWriteContext &wc = _ssWriteContext; + + typedef vespalib::GenericHeader::Tag Tag; + vespalib::FileHeader header(headerAlign); + + fileHeaderContext.addTags(header, _ssfile.GetFileName()); + header.putTag(Tag("frozen", 0)); + header.putTag(Tag("fileBitSize", 0)); + header.putTag(Tag("format.0", mySSId)); + header.putTag(Tag("endian", "big")); + header.putTag(Tag("desc", "Dictionary sparse sparse file")); + writeSSSubHeader(header); + + e.setupWrite(wc); + e.writeHeader(header); + e.smallAlign(64); + e.flush(); + uint32_t headerLen = header.getSize(); + headerLen += (-headerLen & 7); + assert(e.getWriteOffset() == headerLen * 8); + assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned + if (_ssHeaderLen != 0) { + assert(_ssHeaderLen == headerLen); + } + _ssHeaderLen = headerLen; +} + + +void +PageDict4FileSeqWrite::updatePHeader(uint64_t fileBitSize) +{ + vespalib::FileHeader h(headerAlign); + FastOS_File f; + f.OpenReadWrite(_pfile.GetFileName()); + h.readFile(f); + FileHeaderContext::setFreezeTime(h); + typedef vespalib::GenericHeader::Tag Tag; + h.putTag(Tag("frozen", 1)); + h.putTag(Tag("fileBitSize", fileBitSize)); + h.rewriteFile(f); + f.Sync(); + f.Close(); +} + + +void +PageDict4FileSeqWrite::updateSPHeader(uint64_t fileBitSize) +{ + vespalib::FileHeader h(headerAlign); + FastOS_File f; + f.OpenReadWrite(_spfile.GetFileName()); + h.readFile(f); + FileHeaderContext::setFreezeTime(h); + typedef vespalib::GenericHeader::Tag Tag; + h.putTag(Tag("frozen", 1)); + h.putTag(Tag("fileBitSize", fileBitSize)); + h.rewriteFile(f); + f.Sync(); + f.Close(); +} + + +void +PageDict4FileSeqWrite::updateSSHeader(uint64_t fileBitSize) +{ + vespalib::FileHeader h(headerAlign); + FastOS_File f; + f.OpenReadWrite(_ssfile.GetFileName()); + h.readFile(f); + FileHeaderContext::setFreezeTime(h); + typedef vespalib::GenericHeader::Tag Tag; + h.putTag(Tag("frozen", 1)); + h.putTag(Tag("fileBitSize", fileBitSize)); + uint64_t wordNum = _pWriter->getWordNum(); + assert(wordNum <= _sse._numWordIds); + h.putTag(Tag("numWordIds", wordNum)); + h.rewriteFile(f); + f.Sync(); + f.Close(); +} + + +void +PageDict4FileSeqWrite::checkPointWrite(vespalib::nbostream &out) +{ + _ssWriteContext.writeComprBuffer(true); + _spWriteContext.writeComprBuffer(true); + _pWriteContext.writeComprBuffer(true); + _ssWriteContext.checkPointWrite(out); + _spWriteContext.checkPointWrite(out); + _pWriteContext.checkPointWrite(out); + vespalib::nbostream data; + _ssWriter->checkPointWrite(data); + _spWriter->checkPointWrite(data); + _pWriter->checkPointWrite(data); + std::vector checkPointData(data.size()); + data.read(&checkPointData[0], data.size()); + out << checkPointData; + out << _ssHeaderLen << _spHeaderLen << _pHeaderLen; + _ssfile.Sync(); + _spfile.Sync(); + _pfile.Sync(); +} + + +void +PageDict4FileSeqWrite::checkPointRead(vespalib::nbostream &in) +{ + _ssWriteContext.checkPointRead(in); + _spWriteContext.checkPointRead(in); + _pWriteContext.checkPointRead(in); + std::vector checkPointData; + in >> checkPointData; + _checkPointData.reset(new vespalib::nbostream(checkPointData.size())); + _checkPointData->write(&checkPointData[0], checkPointData.size()); + in >> _ssHeaderLen >> _spHeaderLen >> _pHeaderLen; +} + + +void +PageDict4FileSeqWrite::setParams(const PostingListParams ¶ms) +{ + params.get("avgBitsPerDoc", _sse._avgBitsPerDoc); + params.get("minChunkDocs", _sse._minChunkDocs); + params.get("docIdLimit", _sse._docIdLimit); + params.get("numWordIds", _sse._numWordIds); + _spe.copyParams(_sse); + _pe.copyParams(_sse); +} + + +void +PageDict4FileSeqWrite::getParams(PostingListParams ¶ms) +{ + params.clear(); + params.set("avgBitsPerDoc", _sse._avgBitsPerDoc); + params.set("minChunkDocs", _sse._minChunkDocs); + params.set("docIdLimit", _sse._docIdLimit); + params.set("numWordIds", _sse._numWordIds); +} + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h new file mode 100644 index 00000000000..bc080fc58c3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4file.h @@ -0,0 +1,239 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace vespalib +{ + +class GenericHeader; + +} + +namespace search +{ + +namespace diskindex +{ + +/** + * Dictionary file containing words and counts for words. + */ +class PageDict4FileSeqRead : public index::DictionaryFileSeqRead +{ + typedef bitcompression::PostingListCountFileDecodeContext DC; + typedef bitcompression::PageDict4SSReader SSReader; + typedef bitcompression::PageDict4Reader Reader; + + typedef index::PostingListCounts PostingListCounts; + + Reader *_pReader; + SSReader *_ssReader; + + DC _ssd; + ComprFileReadContext _ssReadContext; + FastOS_File _ssfile; + + DC _spd; + ComprFileReadContext _spReadContext; + FastOS_File _spfile; + + DC _pd; + ComprFileReadContext _pReadContext; + FastOS_File _pfile; + + uint64_t _ssFileBitSize; + uint64_t _spFileBitSize; + uint64_t _pFileBitSize; + uint32_t _ssHeaderLen; + uint32_t _spHeaderLen; + uint32_t _pHeaderLen; + + bool _ssCompleted; + bool _spCompleted; + bool _pCompleted; + + uint64_t _wordNum; + + std::unique_ptr _checkPointData; + + void + readSSHeader(); + + void + readSPHeader(void); + + void + readPHeader(void); + +public: + + PageDict4FileSeqRead(void); + + virtual + ~PageDict4FileSeqRead(void); + + /** + * Read word and counts. Only nonzero counts are returned. If at + * end of dictionary then noWordNumHigh() is returned as word number. + */ + virtual void + readWord(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) override; + + virtual bool open(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) override; + + /** + * Close dictionary file. + */ + virtual bool close(void) override; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) override; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) override; + + /* + * Get current parameters. + */ + virtual void + getParams(index::PostingListParams ¶ms) override; +}; + +/** + * Interface for dictionary file containing words and count for words. + */ +class PageDict4FileSeqWrite : public index::DictionaryFileSeqWrite +{ + typedef bitcompression::PostingListCountFileEncodeContext EC; + typedef EC SPEC; + typedef EC PEC; + typedef EC SSEC; + typedef bitcompression::PageDict4SSWriter SSWriter; + typedef bitcompression::PageDict4SPWriter SPWriter; + typedef bitcompression::PageDict4PWriter PWriter; + + typedef index::PostingListCounts PostingListCounts; + + PWriter *_pWriter; + SPWriter *_spWriter; + SSWriter *_ssWriter; + + EC _pe; + ComprFileWriteContext _pWriteContext; + FastOS_File _pfile; + + EC _spe; + ComprFileWriteContext _spWriteContext; + FastOS_File _spfile; + + EC _sse; + ComprFileWriteContext _ssWriteContext; + FastOS_File _ssfile; + + uint32_t _pHeaderLen; // Length of header for page file (bytes) + uint32_t _spHeaderLen; // Length of header for sparse page file (bytes) + uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes) + + std::unique_ptr _checkPointData; + + void + writeIndexNames(vespalib::GenericHeader &header); + + void + writeSSSubHeader(vespalib::GenericHeader &header); + + void + makePHeader(const search::common::FileHeaderContext &fileHeaderContext); + + void + makeSPHeader(const search::common::FileHeaderContext &fileHeaderContext); + + void + makeSSHeader(const search::common::FileHeaderContext &fileHeaderContext); + + void + updatePHeader(uint64_t fileBitSize); + + void + updateSPHeader(uint64_t fileBitSize); + + void + updateSSHeader(uint64_t fileBitSize); + +public: + PageDict4FileSeqWrite(void); + + virtual + ~PageDict4FileSeqWrite(void); + + /** + * Write word and counts. Only nonzero counts should be supplied. + */ + virtual void + writeWord(const vespalib::stringref &word, + const PostingListCounts &counts) override; + + /** + * Open dictionary file for sequential write. The index with most + * words should be first for optimal compression. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const search::common::FileHeaderContext &fileHeaderContext) override; + + /** + * Close dictionary file. + */ + virtual bool + close(void) override; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) override; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) override; + + /* + * Set parameters. + */ + virtual void + setParams(const index::PostingListParams ¶ms) override; + + /* + * Get current parameters. + */ + virtual void + getParams(index::PostingListParams ¶ms) override; +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp new file mode 100644 index 00000000000..f1261c83a51 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.cpp @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.pagedict4randread"); +#include +#include "pagedict4randread.h" +#include +#include + + +namespace +{ + +vespalib::string myPId("PageDict4P.1"); +vespalib::string mySPId("PageDict4SP.1"); +vespalib::string mySSId("PageDict4SS.1"); +vespalib::string emptyId; +vespalib::string emptyStr; + +} + +using vespalib::getLastErrorString; + +namespace search +{ + +namespace diskindex +{ + + +PageDict4RandRead::PageDict4RandRead(void) + : DictionaryFileRandRead(), + _ssReader(NULL), + _ssd(), + _ssReadContext(_ssd), + _ssfile(), + _spfile(), + _pfile(), + _ssFileBitSize(0u), + _spFileBitSize(0u), + _pFileBitSize(0u), + _ssHeaderLen(0u), + _spHeaderLen(0u), + _pHeaderLen(0u) +{ + _ssd.setReadContext(&_ssReadContext); +} + + +PageDict4RandRead::~PageDict4RandRead(void) +{ + delete _ssReader; +} + + +void +PageDict4RandRead::readSSHeader() +{ + DC &ssd = _ssd; + + vespalib::FileHeader header; + uint32_t headerLen = ssd.readHeader(header, _ssfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("numWordIds")); + assert(header.hasTag("avgBitsPerDoc")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("endian")); + assert(header.getTag("frozen").asInteger() != 0); + _ssFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == mySSId); + ssd._numWordIds = header.getTag("numWordIds").asInteger(); + ssd._avgBitsPerDoc = header.getTag("avgBitsPerDoc").asInteger(); + ssd._minChunkDocs = header.getTag("minChunkDocs").asInteger(); + ssd._docIdLimit = header.getTag("docIdLimit").asInteger(); + + assert(header.getTag("endian").asString() == "big"); + ssd.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(ssd.getReadOffset() == headerLen * 8); + _ssHeaderLen = headerLen; +} + + +void +PageDict4RandRead::readSPHeader(void) +{ + DC d; + ComprFileReadContext rc(d); + + d.setReadContext(&rc); + rc.setFile(&_spfile); + rc.setFileSize(_spfile.GetSize()); + rc.allocComprBuf(512, 32768u); + d.emptyBuffer(0); + rc.readComprBuffer(); + + vespalib::FileHeader header; + uint32_t headerLen = d.readHeader(header, _spfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("endian")); + assert(header.getTag("frozen").asInteger() != 0); + _spFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == mySPId); + assert(header.getTag("endian").asString() == "big"); + d.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(d.getReadOffset() == headerLen * 8); + _spHeaderLen = headerLen; +} + + +void +PageDict4RandRead::readPHeader(void) +{ + DC d; + ComprFileReadContext rc(d); + + d.setReadContext(&rc); + rc.setFile(&_pfile); + rc.setFileSize(_pfile.GetSize()); + rc.allocComprBuf(512, 32768u); + d.emptyBuffer(0); + rc.readComprBuffer(); + + vespalib::FileHeader header; + uint32_t headerLen = d.readHeader(header, _pfile.getSize()); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(!header.hasTag("format.1")); + assert(header.hasTag("endian")); + assert(header.getTag("frozen").asInteger() != 0); + _pFileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == myPId); + assert(header.getTag("endian").asString() == "big"); + d.smallAlign(64); + uint32_t minHeaderLen = header.getSize(); + minHeaderLen += (-minHeaderLen & 7); + assert(headerLen >= minHeaderLen); + assert(d.getReadOffset() == headerLen * 8); + _pHeaderLen = headerLen; +} + + +bool +PageDict4RandRead::lookup(const vespalib::stringref &word, + uint64_t &wordNum, + PostingListOffsetAndCounts &offsetAndCounts) +{ + SSLookupRes ssRes(_ssReader->lookup(word)); + if (!ssRes._res) { + offsetAndCounts._offset = ssRes._l6StartOffset._fileOffset; + offsetAndCounts._accNumDocs = ssRes._l6StartOffset._accNumDocs; + wordNum = ssRes._l6WordNum; // XXX ? + offsetAndCounts._counts.clear(); + return false; + } + + if (ssRes._overflow) { + offsetAndCounts._offset = ssRes._startOffset._fileOffset; + offsetAndCounts._accNumDocs = ssRes._startOffset._accNumDocs; + wordNum = ssRes._l6WordNum; + offsetAndCounts._counts = ssRes._counts; + return true; + } else { + SPLookupRes spRes; + size_t pageSize = PageDict4PageParams::getPageByteSize(); + const char *spData = static_cast + (_spfile.MemoryMapPtr(0)); + spRes.lookup(*_ssReader, + spData + pageSize * ssRes._sparsePageNum, + word, + ssRes._l6Word, + ssRes._lastWord, + ssRes._l6StartOffset, + ssRes._l6WordNum, + ssRes._pageNum); + + PLookupRes pRes; + const char *pData = static_cast + (_pfile.MemoryMapPtr(0)); + pRes.lookup(*_ssReader, + pData + pageSize * spRes._pageNum, + word, + spRes._l3Word, + spRes._lastWord, + spRes._l3StartOffset, + spRes._l3WordNum); + offsetAndCounts._offset = pRes._startOffset._fileOffset; + offsetAndCounts._accNumDocs = pRes._startOffset._accNumDocs; + wordNum = pRes._wordNum; + if (!pRes._res) { + offsetAndCounts._counts.clear(); + return false; + } + offsetAndCounts._counts = pRes._counts; + return true; + } +} + + +bool +PageDict4RandRead::open(const vespalib::string &name, + const TuneFileRandRead &tuneFileRead) +{ + vespalib::string pname = name + ".pdat"; + vespalib::string spname = name + ".spdat"; + vespalib::string ssname = name + ".ssdat"; + + if (tuneFileRead.getWantMemoryMap() || true) { + int mmapFlags(tuneFileRead.getMemoryMapFlags()); + _ssfile.enableMemoryMap(mmapFlags); + _spfile.enableMemoryMap(mmapFlags); + _pfile.enableMemoryMap(mmapFlags); + } else if (tuneFileRead.getWantDirectIO()) { + _ssfile.EnableDirectIO(); + _spfile.EnableDirectIO(); + _pfile.EnableDirectIO(); + } + + if (!_ssfile.OpenReadOnly(ssname.c_str())) { + LOG(error, "could not open %s: %s", + _ssfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + if (!_spfile.OpenReadOnly(spname.c_str())) { + LOG(error, "could not open %s: %s", + _spfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + if (!_pfile.OpenReadOnly(pname.c_str())) { + LOG(error, "could not open %s: %s", + _pfile.GetFileName(), getLastErrorString().c_str()); + return false; + } + + uint64_t fileSize = _ssfile.GetSize(); + _ssReadContext.setFile(&_ssfile); + _ssReadContext.setFileSize(fileSize); + _ssReadContext.allocComprBuf((fileSize + sizeof(uint64_t) - 1) / + sizeof(uint64_t), + 32768u); + _ssd.emptyBuffer(0); + _ssReadContext.readComprBuffer(); + assert(_ssReadContext.getBufferEndFilePos() >= fileSize); + + readSSHeader(); + readSPHeader(); + readPHeader(); + + _ssReader = new SSReader(_ssReadContext, + _ssHeaderLen, + _ssFileBitSize, + _spHeaderLen, + _spFileBitSize, + _pHeaderLen, + _pFileBitSize); + _ssReader->setup(_ssd); + + return true; +} + + +bool +PageDict4RandRead::close(void) +{ + delete _ssReader; + _ssReader = NULL; + + _ssReadContext.dropComprBuf(); + _ssReadContext.setFile(NULL); + _ssfile.Close(); + _spfile.Close(); + _pfile.Close(); + return true; +} + + +uint64_t +PageDict4RandRead::getNumWordIds(void) const +{ + return _ssd._numWordIds; +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h new file mode 100644 index 00000000000..fba9dfd483b --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/pagedict4randread.h @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +class PageDict4RandRead : public index::DictionaryFileRandRead +{ + typedef bitcompression::PostingListCountFileDecodeContext DC; + typedef bitcompression::PageDict4SSReader SSReader; + + typedef bitcompression::PageDict4SSLookupRes SSLookupRes; + typedef bitcompression::PageDict4SPLookupRes SPLookupRes; + typedef bitcompression::PageDict4PLookupRes PLookupRes; + typedef bitcompression::PageDict4PageParams PageDict4PageParams; + + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListOffsetAndCounts PostingListOffsetAndCounts; + + SSReader *_ssReader; + + DC _ssd; + ComprFileReadContext _ssReadContext; + FastOS_File _ssfile; + FastOS_File _spfile; + FastOS_File _pfile; + + uint64_t _ssFileBitSize; + uint64_t _spFileBitSize; + uint64_t _pFileBitSize; + uint32_t _ssHeaderLen; + uint32_t _spHeaderLen; + uint32_t _pHeaderLen; + + void + readSSHeader(); + + void + readSPHeader(void); + + void + readPHeader(void); + +public: + PageDict4RandRead(void); + + virtual + ~PageDict4RandRead(void); + + virtual bool + lookup(const vespalib::stringref &word, + uint64_t &wordNum, + PostingListOffsetAndCounts &offsetAndCounts); + + /** + * Open dictionary file for random read. + */ + virtual bool open(const vespalib::string &name, + const TuneFileRandRead &tuneFileRead); + + /** + * Close dictionary file. + */ + virtual bool close(void); + + virtual uint64_t + getNumWordIds(void) const; +}; + + +} // namespace diskindex + +} // namespace search + + + diff --git a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp new file mode 100644 index 00000000000..3a66fc05685 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.wordnummapper"); +#include +#include +#include "wordnummapper.h" + +namespace search +{ + +namespace diskindex +{ + +WordNumMapping::WordNumMapping(void) + : _old2newwords(), + _oldDictSize(0u) +{ +} + + +void +WordNumMapping::readMappingFile(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) +{ + // Open word mapping file + Fast_BufferedFile old2newwordfile(new FastOS_File); + if (tuneFileRead.getWantDirectIO()) + old2newwordfile.EnableDirectIO(); + // XXX no checking for success + old2newwordfile.ReadOpen(name.c_str()); + int64_t tempfilesize = old2newwordfile.GetSize(); + uint64_t tempfileentries = static_cast(tempfilesize / + sizeof(uint64_t)); + Array &map = _old2newwords; + map.resize(tempfileentries + 2); + _oldDictSize = tempfileentries; + + old2newwordfile.Read(&map[1], + static_cast(tempfilesize)); + old2newwordfile.Close(); + map[0] = noWordNum(); + map[tempfileentries + 1] = noWordNumHigh(); +} + + +void +WordNumMapping::noMappingFile(void) +{ + Array &map = _old2newwords; + map.resize(2); + map[0] = noWordNum(); + map[1] = noWordNumHigh(); + _oldDictSize = 0; +} + + +void +WordNumMapping::clear(void) +{ + Array &map = _old2newwords; + map.clear(); + _oldDictSize = 0; +} + + +void +WordNumMapping::setup(uint32_t numWordIds) +{ + _oldDictSize = numWordIds; +} + + +void +WordNumMapper::sanityCheck(bool allowHoles) +{ + uint64_t dictSize = getMaxWordNum(); + uint64_t mappedWordNum = map(0u); + assert(mappedWordNum == 0u); + for (uint64_t wordNum = 1; wordNum <= dictSize; ++wordNum) { + uint64_t prevMappedWordNum = mappedWordNum; + mappedWordNum = map(wordNum); + if (mappedWordNum == 0u && allowHoles) + continue; // In case some words are being removed + assert(mappedWordNum > prevMappedWordNum); + (void) prevMappedWordNum; + } +} + + +uint64_t +WordNumMapping::getMaxMappedWordNum(void) const +{ + WordNumMapper mapper(*this); + return mapper.getMaxMappedWordNum(); +} + + +void +WordNumMapping::sanityCheck(bool allowHoles) +{ + WordNumMapper mapper(*this); + mapper.sanityCheck(allowHoles); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h new file mode 100644 index 00000000000..a1a72757f22 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.h @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search +{ + +namespace diskindex +{ + +class WordNumMapper; + +class WordNumMapping +{ + typedef vespalib::Array Array; + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNum(void) + { + return 0u; + } + + Array _old2newwords; + uint64_t _oldDictSize; +public: + + WordNumMapping(void); + + const uint64_t * + getOld2NewWordNums(void) const + { + return (_old2newwords.empty()) + ? NULL + : &_old2newwords[0]; + } + + uint64_t + getOldDictSize(void) const + { + return _oldDictSize; + } + + void + readMappingFile(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead); + + void + noMappingFile(void); + + void + clear(void); + + void + setup(uint32_t numWordIds); + + uint64_t + getMaxMappedWordNum(void) const; + + void + sanityCheck(bool allowHoles); +}; + + +class WordNumMapper +{ + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } + + static uint64_t + noWordNum(void) + { + return 0u; + } + + const uint64_t *_old2newwords; + uint64_t _oldDictSize; + +public: + WordNumMapper(void) + : _old2newwords(NULL), + _oldDictSize(0) + { + } + + WordNumMapper(const WordNumMapping &mapping) + : _old2newwords(NULL), + _oldDictSize(0) + { + setup(mapping); + } + + void + setup(const WordNumMapping &mapping) + { + _old2newwords = mapping.getOld2NewWordNums(); + _oldDictSize = mapping.getOldDictSize(); + } + + uint64_t + map(uint32_t wordNum) const + { + return (_old2newwords != NULL) + ? _old2newwords[wordNum] + : wordNum; + } + + uint64_t + getMaxWordNum(void) const + { + return _oldDictSize; + } + + uint64_t + getMaxMappedWordNum(void) const + { + return map(_oldDictSize); + } + + void + sanityCheck(bool allowHoles); +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp new file mode 100644 index 00000000000..440a61fcab2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "zcposocc.h" +#include +#include +#include +#include + +LOG_SETUP(".diskindex.zcposocc"); + +namespace search +{ + +namespace diskindex +{ + +using search::bitcompression::PosOccFieldsParams; +using search::bitcompression::EG2PosOccDecodeContext; +using search::bitcompression::EGPosOccDecodeContext; +using search::index::PostingListCountFileSeqRead; +using search::index::PostingListCountFileSeqWrite; + +Zc4PosOccSeqRead::Zc4PosOccSeqRead(PostingListCountFileSeqRead *countFile) + : Zc4PostingSeqRead(countFile), + _fieldsParams(), + _cookedDecodeContext(&_fieldsParams), + _rawDecodeContext(&_fieldsParams) +{ + _decodeContext = &_cookedDecodeContext; + _decodeContext->setReadContext(&_readContext); + _readContext.setDecodeContext(_decodeContext); +} + + +void +Zc4PosOccSeqRead:: +setFeatureParams(const PostingListParams ¶ms) +{ + bool oldCooked = _decodeContext == &_cookedDecodeContext; + bool newCooked = oldCooked; + params.get("cooked", newCooked); + if (oldCooked != newCooked) { + if (newCooked) { + _cookedDecodeContext = _rawDecodeContext; + _decodeContext = &_cookedDecodeContext; + } else { + _rawDecodeContext = _cookedDecodeContext; + _decodeContext = &_rawDecodeContext; + } + _readContext.setDecodeContext(_decodeContext); + } +} + + +const vespalib::string & +Zc4PosOccSeqRead::getSubIdentifier(void) +{ + PosOccFieldsParams fieldsParams; + EG2PosOccDecodeContext d(&fieldsParams); + return d.getIdentifier(); +} + + +Zc4PosOccSeqWrite::Zc4PosOccSeqWrite(const Schema &schema, + uint32_t indexId, + PostingListCountFileSeqWrite *countFile) + : Zc4PostingSeqWrite(countFile), + _fieldsParams(), + _realEncodeFeatures(&_fieldsParams) +{ + _encodeFeatures = &_realEncodeFeatures; + _encodeFeatures->setWriteContext(&_featureWriteContext); + _featureWriteContext.setEncodeContext(_encodeFeatures); + _fieldsParams.setSchemaParams(schema, indexId); +} + + +ZcPosOccSeqRead::ZcPosOccSeqRead(PostingListCountFileSeqRead *countFile) + : ZcPostingSeqRead(countFile), + _fieldsParams(), + _cookedDecodeContext(&_fieldsParams), + _rawDecodeContext(&_fieldsParams) +{ + _decodeContext = &_cookedDecodeContext; + _decodeContext->setReadContext(&_readContext); + _readContext.setDecodeContext(_decodeContext); +} + + +void +ZcPosOccSeqRead:: +setFeatureParams(const PostingListParams ¶ms) +{ + bool oldCooked = _decodeContext == &_cookedDecodeContext; + bool newCooked = oldCooked; + params.get("cooked", newCooked); + if (oldCooked != newCooked) { + if (newCooked) { + _cookedDecodeContext = _rawDecodeContext; + _decodeContext = &_cookedDecodeContext; + } else { + _rawDecodeContext = _cookedDecodeContext; + _decodeContext = &_rawDecodeContext; + } + _readContext.setDecodeContext(_decodeContext); + } +} + + +const vespalib::string & +ZcPosOccSeqRead::getSubIdentifier(void) +{ + PosOccFieldsParams fieldsParams; + EGPosOccDecodeContext d(&fieldsParams); + return d.getIdentifier(); +} + + +ZcPosOccSeqWrite::ZcPosOccSeqWrite(const Schema &schema, + uint32_t indexId, + PostingListCountFileSeqWrite *countFile) + : ZcPostingSeqWrite(countFile), + _fieldsParams(), + _realEncodeFeatures(&_fieldsParams) +{ + _encodeFeatures = &_realEncodeFeatures; + _encodeFeatures->setWriteContext(&_featureWriteContext); + _featureWriteContext.setEncodeContext(_encodeFeatures); + _fieldsParams.setSchemaParams(schema, indexId); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.h b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h new file mode 100644 index 00000000000..cbd6791198d --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "zcposting.h" +#include + +namespace search +{ + +namespace diskindex +{ + +class Zc4PosOccSeqRead : public Zc4PostingSeqRead +{ +private: + bitcompression::PosOccFieldsParams _fieldsParams; + bitcompression::EG2PosOccDecodeContextCooked _cookedDecodeContext; + bitcompression::EG2PosOccDecodeContext _rawDecodeContext; + +public: + Zc4PosOccSeqRead(index::PostingListCountFileSeqRead *countFile); + + virtual void + setFeatureParams(const PostingListParams ¶ms); + + static const vespalib::string & + getSubIdentifier(void); +}; + + +class Zc4PosOccSeqWrite : public Zc4PostingSeqWrite +{ +private: + bitcompression::PosOccFieldsParams _fieldsParams; + bitcompression::EG2PosOccEncodeContext _realEncodeFeatures; + +public: + typedef index::Schema Schema; + + Zc4PosOccSeqWrite(const Schema &schema, + uint32_t indexId, + index::PostingListCountFileSeqWrite *countFile); +}; + + +class ZcPosOccSeqRead : public ZcPostingSeqRead +{ +private: + bitcompression::PosOccFieldsParams _fieldsParams; + bitcompression::EGPosOccDecodeContextCooked _cookedDecodeContext; + bitcompression::EGPosOccDecodeContext _rawDecodeContext; + +public: + ZcPosOccSeqRead(index::PostingListCountFileSeqRead *countFile); + + virtual void + setFeatureParams(const PostingListParams ¶ms); + + static const vespalib::string & + getSubIdentifier(void); +}; + + +class ZcPosOccSeqWrite : public ZcPostingSeqWrite +{ +private: + bitcompression::PosOccFieldsParams _fieldsParams; + bitcompression::EGPosOccEncodeContext _realEncodeFeatures; + +public: + typedef index::Schema Schema; + + ZcPosOccSeqWrite(const Schema &schema, + uint32_t indexId, + index::PostingListCountFileSeqWrite *countFile); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp new file mode 100644 index 00000000000..fdb498e8e28 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.zcposocciterators"); +#include "zcposocciterators.h" + +namespace search +{ + +namespace diskindex +{ + +using search::fef::TermFieldMatchDataArray; +using search::bitcompression::PosOccFieldsParams; +using search::index::PostingListCounts; + +#define DEBUG_ZCFILTEROCC_PRINTF 0 +#define DEBUG_ZCFILTEROCC_ASSERT 0 + +template +Zc4RareWordPosOccIterator:: +Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + const PosOccFieldsParams *fieldsParams, + const TermFieldMatchDataArray &matchData) + : Zc4RareWordPostingIterator(matchData, start, docIdLimit), + _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) +{ + LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); + _decodeContext = &_decodeContextReal; +} + + +template +Zc4PosOccIterator:: +Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + uint32_t minChunkDocs, const PostingListCounts &counts, + const PosOccFieldsParams *fieldsParams, + const TermFieldMatchDataArray &matchData) + : ZcPostingIterator(minChunkDocs, false, counts, matchData, start, docIdLimit), + _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) +{ + LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); + _decodeContext = &_decodeContextReal; +} + + +template +ZcRareWordPosOccIterator:: +ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + const PosOccFieldsParams *fieldsParams, + const TermFieldMatchDataArray &matchData) + : ZcRareWordPostingIterator(matchData, start, docIdLimit), + _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) +{ + LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); + _decodeContext = &_decodeContextReal; +} + + +template +ZcPosOccIterator:: +ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + uint32_t minChunkDocs, const PostingListCounts &counts, + const PosOccFieldsParams *fieldsParams, + const TermFieldMatchDataArray &matchData) + : ZcPostingIterator(minChunkDocs, true, counts, matchData, start, docIdLimit), + _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) +{ + LOG_ASSERT(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); + _decodeContext = &_decodeContextReal; +} + + +template class Zc4RareWordPosOccIterator; +template class Zc4RareWordPosOccIterator; + +template class Zc4PosOccIterator; +template class Zc4PosOccIterator; + +template class ZcRareWordPosOccIterator; +template class ZcRareWordPosOccIterator; + +template class ZcPosOccIterator; +template class ZcPosOccIterator; + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h new file mode 100644 index 00000000000..3af84f888ea --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "zcpostingiterators.h" +#include + +namespace search { + +namespace diskindex { + +template +class Zc4RareWordPosOccIterator : public Zc4RareWordPostingIterator +{ +private: + typedef Zc4RareWordPostingIterator ParentClass; + using ParentClass::_decodeContext; + + typedef bitcompression::EG2PosOccDecodeContextCooked DecodeContextReal; + DecodeContextReal _decodeContextReal; +public: + Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + const bitcompression::PosOccFieldsParams *fieldsParams, + const search::fef::TermFieldMatchDataArray &matchData); +}; + + +template +class Zc4PosOccIterator : public ZcPostingIterator +{ +private: + typedef ZcPostingIterator ParentClass; + using ParentClass::_decodeContext; + + typedef bitcompression::EG2PosOccDecodeContextCooked DecodeContext; + DecodeContext _decodeContextReal; +public: + Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, + uint32_t minChunkDocs, const index::PostingListCounts &counts, + const bitcompression::PosOccFieldsParams *fieldsParams, + const search::fef::TermFieldMatchDataArray &matchData); +}; + + +template +class ZcRareWordPosOccIterator : public ZcRareWordPostingIterator +{ +private: + typedef ZcRareWordPostingIterator ParentClass; + using ParentClass::_decodeContext; + + typedef bitcompression::EGPosOccDecodeContextCooked DecodeContextReal; + DecodeContextReal _decodeContextReal; +public: + ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit, + const bitcompression::PosOccFieldsParams *fieldsParams, + const search::fef::TermFieldMatchDataArray &matchData); +}; + + +template +class ZcPosOccIterator : public ZcPostingIterator +{ +private: + typedef ZcPostingIterator ParentClass; + using ParentClass::_decodeContext; + + typedef bitcompression::EGPosOccDecodeContextCooked DecodeContext; + DecodeContext _decodeContextReal; +public: + ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit, + uint32_t minChunkDocs, const index::PostingListCounts &counts, + const bitcompression::PosOccFieldsParams *fieldsParams, + const search::fef::TermFieldMatchDataArray &matchData); +}; + + +extern template class Zc4RareWordPosOccIterator; +extern template class Zc4RareWordPosOccIterator; + +extern template class Zc4PosOccIterator; +extern template class Zc4PosOccIterator; + +extern template class ZcRareWordPosOccIterator; +extern template class ZcRareWordPosOccIterator; + +extern template class ZcPosOccIterator; +extern template class ZcPosOccIterator; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp new file mode 100644 index 00000000000..26ed327ec52 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -0,0 +1,381 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.zcposoccrandread"); +#include +#include "zcposoccrandread.h" +#include "zcposocciterators.h" +#include +#include + +using search::bitcompression::EG2PosOccEncodeContext; +using search::bitcompression::EGPosOccEncodeContext; +using search::bitcompression::EG2PosOccDecodeContext; +using search::bitcompression::EG2PosOccDecodeContextCooked; +using search::bitcompression::EGPosOccDecodeContext; +using search::bitcompression::EGPosOccDecodeContextCooked; +using search::bitcompression::PosOccFieldsParams; +using search::bitcompression::FeatureDecodeContext; +using search::index::PostingListCounts; +using search::index::PostingListHandle; +using search::ComprFileReadContext; + +namespace +{ + +vespalib::string myId4("Zc.4"); +vespalib::string myId5("Zc.5"); + +} + +namespace search +{ + +namespace diskindex +{ + +using vespalib::getLastErrorString; + +ZcPosOccRandRead::ZcPosOccRandRead(void) + : _file(), + _fileSize(0), + _minChunkDocs(1 << 30), + _minSkipDocs(64), + _docIdLimit(10000000), + _numWords(0), + _fileBitSize(0), + _headerBitSize(0), + _fieldsParams(), + _dynamicK(true) +{ +} + + +ZcPosOccRandRead::~ZcPosOccRandRead(void) +{ + if (_file.IsOpened()) + close(); +} + + +search::queryeval::SearchIterator * +ZcPosOccRandRead:: +createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const +{ + (void) counts; + (void) handle; + (void) matchData; + (void) usebitVector; + + typedef EGPosOccEncodeContext EC; + + assert((handle._bitLength != 0) == (counts._bitLength != 0)); + assert((counts._numDocs != 0) == (counts._bitLength != 0)); + assert(handle._bitOffsetMem <= handle._bitOffset); + + if (handle._bitLength == 0) + return new search::queryeval::EmptySearch; + + const char *cmem = static_cast(handle._mem); + uint64_t memOffset = reinterpret_cast(cmem) & 7; + const uint64_t *mem = reinterpret_cast + (cmem - memOffset) + + (memOffset * 8 + handle._bitOffset - + handle._bitOffsetMem) / 64; + int bitOffset = (memOffset * 8 + handle._bitOffset - + handle._bitOffsetMem) & 63; + + Position start(mem, bitOffset); + + EGPosOccDecodeContext d(mem, bitOffset, &_fieldsParams); + + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + + uint32_t numDocs = static_cast(val64) + 1; + + if (numDocs < _minSkipDocs) { + return new ZcRareWordPosOccIterator(start, handle._bitLength, _docIdLimit, &_fieldsParams, matchData); + } else { + return new ZcPosOccIterator(start, handle._bitLength, _docIdLimit, _minChunkDocs, counts, &_fieldsParams, matchData); + } +} + + +void +ZcPosOccRandRead::readPostingList(const PostingListCounts &counts, + uint32_t firstSegment, + uint32_t numSegments, + PostingListHandle &handle) +{ + // XXX: Ignore segments for now. + (void) firstSegment; + (void) numSegments; + (void) counts; + + handle.drop(); + if (handle._bitLength == 0) + return; + + uint64_t startOffset = (handle._bitOffset + _headerBitSize) >> 3; + // Align start at 64-bit boundary + startOffset -= (startOffset & 7); + + void *mapPtr = _file.MemoryMapPtr(startOffset); + if (mapPtr != NULL) { + handle._mem = mapPtr; + handle._allocMem = NULL; + handle._allocSize = 0; + } else { + uint64_t endOffset = (handle._bitOffset + _headerBitSize + + handle._bitLength + 7) >> 3; + // Align end at 64-bit boundary + endOffset += (-endOffset & 7); + + uint64_t vectorLen = endOffset - startOffset; + size_t padBefore; + size_t padAfter; + size_t padExtraAfter; // Decode prefetch space + _file.DirectIOPadding(startOffset, vectorLen, + padBefore, padAfter); + padExtraAfter = 0; + if (padAfter < 16) + padExtraAfter = 16 - padAfter; + + size_t mallocLen = padBefore + vectorLen + padAfter + padExtraAfter; + void *mallocStart = NULL; + void *alignedBuffer = NULL; + if (mallocLen > 0) { + alignedBuffer = _file.AllocateDirectIOBuffer(mallocLen, + mallocStart); + assert(mallocStart != NULL); + assert(endOffset + padAfter + padExtraAfter <= _fileSize); + _file.ReadBuf(alignedBuffer, + padBefore + vectorLen + padAfter, + startOffset - padBefore); + } + // Zero decode prefetch memory to avoid uninitialized reads + if (padExtraAfter > 0) { + memset(reinterpret_cast(alignedBuffer) + + padBefore + vectorLen + padAfter, + '\0', + padExtraAfter); + } + handle._mem = static_cast(alignedBuffer) + padBefore; + handle._allocMem = mallocStart; + handle._allocSize = mallocLen; + } + handle._bitOffsetMem = (startOffset << 3) - _headerBitSize; +} + + +bool +ZcPosOccRandRead:: +open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead) +{ + if (tuneFileRead.getWantMemoryMap()) { + _file.enableMemoryMap(tuneFileRead.getMemoryMapFlags()); + } else if (tuneFileRead.getWantDirectIO()) + _file.EnableDirectIO(); + bool res = _file.OpenReadOnly(name.c_str()); + if (!res) { + LOG(error, "could not open %s: %s", + _file.GetFileName(), getLastErrorString().c_str()); + return false; + } + _fileSize = _file.GetSize(); + + readHeader(); + return true; +} + + +bool +ZcPosOccRandRead::close(void) +{ + _file.Close(); + return true; +} + + +void +ZcPosOccRandRead::readHeader(void) +{ + EGPosOccDecodeContext d(&_fieldsParams); + ComprFileReadContext drc(d); + + drc.setFile(&_file); + drc.setFileSize(_file.GetSize()); + drc.allocComprBuf(512, 32768u); + d.emptyBuffer(0); + drc.readComprBuffer(); + d.setReadContext(&drc); + + vespalib::FileHeader header; + d.readHeader(header, _file.getSize()); + uint32_t headerLen = header.getSize(); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(header.hasTag("format.1")); + assert(!header.hasTag("format.2")); + assert(header.hasTag("numWords")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("minSkipDocs")); + assert(header.getTag("frozen").asInteger() != 0); + _fileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == myId5); + assert(header.getTag("format.1").asString() == d.getIdentifier()); + _numWords = header.getTag("numWords").asInteger(); + _minChunkDocs = header.getTag("minChunkDocs").asInteger(); + _docIdLimit = header.getTag("docIdLimit").asInteger(); + _minSkipDocs = header.getTag("minSkipDocs").asInteger(); + // Read feature decoding specific subheader + d.readHeader(header, "features."); + // Align on 64-bit unit + d.smallAlign(64); + headerLen += (-headerLen & 7); + assert(d.getReadOffset() == headerLen * 8); + _headerBitSize = d.getReadOffset(); +} + + +const vespalib::string & +ZcPosOccRandRead::getIdentifier(void) +{ + return myId5; +} + + +const vespalib::string & +ZcPosOccRandRead::getSubIdentifier(void) +{ + PosOccFieldsParams fieldsParams; + EGPosOccDecodeContext d(&fieldsParams); + return d.getIdentifier(); +} + + +Zc4PosOccRandRead:: +Zc4PosOccRandRead(void) + : ZcPosOccRandRead() +{ + _dynamicK = false; +} + + +search::queryeval::SearchIterator * +Zc4PosOccRandRead:: +createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const +{ + (void) usebitVector; + typedef EGPosOccEncodeContext EC; + + assert((handle._bitLength != 0) == (counts._bitLength != 0)); + assert((counts._numDocs != 0) == (counts._bitLength != 0)); + assert(handle._bitOffsetMem <= handle._bitOffset); + + if (handle._bitLength == 0) + return new search::queryeval::EmptySearch; + + const char *cmem = static_cast(handle._mem); + uint64_t memOffset = reinterpret_cast(cmem) & 7; + const uint64_t *mem = reinterpret_cast + (cmem - memOffset) + + (memOffset * 8 + handle._bitOffset - + handle._bitOffsetMem) / 64; + int bitOffset = (memOffset * 8 + handle._bitOffset - + handle._bitOffsetMem) & 63; + + Position start(mem, bitOffset); + EG2PosOccDecodeContext d(mem, bitOffset, &_fieldsParams); + + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + + uint32_t numDocs = static_cast(val64) + 1; + + if (numDocs < _minSkipDocs) { + return new Zc4RareWordPosOccIterator(start, handle._bitLength, _docIdLimit, &_fieldsParams, matchData); + } else { + return new Zc4PosOccIterator(start, handle._bitLength, _docIdLimit, _minChunkDocs, counts, &_fieldsParams, matchData); + } +} + + +void +Zc4PosOccRandRead::readHeader(void) +{ + EG2PosOccDecodeContext d(&_fieldsParams); + ComprFileReadContext drc(d); + + drc.setFile(&_file); + drc.setFileSize(_file.GetSize()); + drc.allocComprBuf(512, 32768u); + d.emptyBuffer(0); + drc.readComprBuffer(); + d.setReadContext(&drc); + + vespalib::FileHeader header; + d.readHeader(header, _file.getSize()); + uint32_t headerLen = header.getSize(); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(header.hasTag("format.1")); + assert(!header.hasTag("format.2")); + assert(header.hasTag("numWords")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("minSkipDocs")); + assert(header.getTag("frozen").asInteger() != 0); + _fileBitSize = header.getTag("fileBitSize").asInteger(); + assert(header.getTag("format.0").asString() == myId4); + assert(header.getTag("format.1").asString() == d.getIdentifier()); + _numWords = header.getTag("numWords").asInteger(); + _minChunkDocs = header.getTag("minChunkDocs").asInteger(); + _docIdLimit = header.getTag("docIdLimit").asInteger(); + _minSkipDocs = header.getTag("minSkipDocs").asInteger(); + // Read feature decoding specific subheader + d.readHeader(header, "features."); + // Align on 64-bit unit + d.smallAlign(64); + headerLen += (-headerLen & 7); + assert(d.getReadOffset() == headerLen * 8); + _headerBitSize = d.getReadOffset(); +} + + +const vespalib::string & +Zc4PosOccRandRead::getIdentifier(void) +{ + return myId4; +} + + +const vespalib::string & +Zc4PosOccRandRead::getSubIdentifier(void) +{ + PosOccFieldsParams fieldsParams; + EG2PosOccDecodeContext d(&fieldsParams); + return d.getIdentifier(); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h new file mode 100644 index 00000000000..c86d9a2cd13 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +class ZcPosOccRandRead : public index::PostingListFileRandRead +{ +protected: + FastOS_File _file; + uint64_t _fileSize; + + uint32_t _minChunkDocs; // # of documents needed for chunking + uint32_t _minSkipDocs; // # of documents needed for skipping + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + + uint64_t _numWords; // Number of words in file + uint64_t _fileBitSize; + uint64_t _headerBitSize; + bitcompression::PosOccFieldsParams _fieldsParams; + bool _dynamicK; + + +public: + ZcPosOccRandRead(void); + + virtual + ~ZcPosOccRandRead(void); + + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListHandle PostingListHandle; + + /** + * Create iterator for single word. Semantic lifetime of counts and + * handle must exceed lifetime of iterator. + */ + virtual search::queryeval::SearchIterator * + createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const; + + /** + * Read (possibly partial) posting list into handle. + */ + virtual void + readPostingList(const PostingListCounts &counts, + uint32_t firstSegment, + uint32_t numSegments, + PostingListHandle &handle); + + /** + * Open posting list file for random read. + */ + virtual bool + open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead); + + /** + * Close posting list file. + */ + virtual bool + close(void); + + virtual void + readHeader(void); + + static const vespalib::string & + getIdentifier(void); + + static const vespalib::string & + getSubIdentifier(void); +}; + +class Zc4PosOccRandRead : public ZcPosOccRandRead +{ +public: + Zc4PosOccRandRead(void); + + /** + * Create iterator for single word. Semantic lifetime of counts and + * handle must exceed lifetime of iterator. + */ + virtual search::queryeval::SearchIterator * + createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const; + + virtual void + readHeader(void); + + static const vespalib::string & + getIdentifier(void); + + static const vespalib::string & + getSubIdentifier(void); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp new file mode 100644 index 00000000000..13b536e8a6b --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp @@ -0,0 +1,1470 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "zcposting.h" +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".diskindex.zcposting"); + +namespace +{ + +vespalib::string myId5("Zc.5"); +vespalib::string myId4("Zc.4"); +vespalib::string emptyId; + +} + +namespace search +{ + +namespace diskindex +{ + +using index::PostingListCountFileSeqRead; +using index::PostingListCountFileSeqWrite; +using common::FileHeaderContext; +using bitcompression::FeatureDecodeContextBE; +using bitcompression::FeatureEncodeContextBE; +using vespalib::nbostream; +using vespalib::getLastErrorString; + + +void +ZcBuf::clearReserve(size_t reserveSize) +{ + if (reserveSize + zcSlack() > _mallocSize) { + size_t newSize = _mallocSize * 2; + if (newSize < 16) + newSize = 16; + while (newSize < reserveSize + zcSlack()) + newSize *= 2; + uint8_t *newBuf = static_cast(malloc(newSize)); + free(_mallocStart); + _mallocStart = newBuf; + _mallocSize = newSize; + } + _valE = _mallocStart + _mallocSize - zcSlack(); + _valI = _mallocStart; +} + + +void +ZcBuf::expand(void) +{ + size_t newSize = _mallocSize * 2; + size_t oldSize = size(); + if (newSize < 16) + newSize = 16; + + uint8_t *newBuf = static_cast(malloc(newSize)); + + if (oldSize > 0) + memcpy(newBuf, _mallocStart, oldSize); + free(_mallocStart); + _mallocStart = newBuf; + _mallocSize = newSize; + _valI = _mallocStart + oldSize; + _valE = _mallocStart + newSize - zcSlack(); +} + + +Zc4PostingSeqRead:: +Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile) + : PostingListFileSeqRead(), + _decodeContext(), + _docIdK(0), + _prevDocId(0), + _numDocs(0), + _readContext(sizeof(uint64_t)), + _file(), + _hasMore(false), + _dynamicK(false), + _lastDocId(0), + _minChunkDocs(1 << 30), + _minSkipDocs(64), + _docIdLimit(10000000), + _zcDocIds(), + _l1Skip(), + _l2Skip(), + _l3Skip(), + _l4Skip(), + _numWords(0), + _fileBitSize(0), + _chunkNo(0), + _l1SkipDocId(0), + _l1SkipDocIdPos(0), + _l1SkipFeaturesPos(0), + _l2SkipDocId(0), + _l2SkipDocIdPos(0), + _l2SkipL1SkipPos(0), + _l2SkipFeaturesPos(0), + _l3SkipDocId(0), + _l3SkipDocIdPos(0), + _l3SkipL1SkipPos(0), + _l3SkipL2SkipPos(0), + _l3SkipFeaturesPos(0), + _l4SkipDocId(0), + _l4SkipDocIdPos(0), + _l4SkipL1SkipPos(0), + _l4SkipL2SkipPos(0), + _l4SkipL3SkipPos(0), + _l4SkipFeaturesPos(0), + _featuresSize(0), + _countFile(countFile), + _headerBitLen(0), + _rangeEndOffset(0), + _readAheadEndOffset(0), + _wordStart(0), + _checkPointPos(0), + _residue(0), + _checkPointChunkNo(0u), + _checkPointResidue(0u), + _checkPointHasMore(false) +{ + if (_countFile != NULL) { + PostingListParams params; + _countFile->getParams(params); + params.get("docIdLimit", _docIdLimit); + params.get("minChunkDocs", _minChunkDocs); + } +} + + +Zc4PostingSeqRead::~Zc4PostingSeqRead(void) +{ +} + + +void +Zc4PostingSeqRead:: +readCommonWordDocIdAndFeatures(DocIdAndFeatures &features) +{ + if (_zcDocIds._valI >= _zcDocIds._valE && _hasMore) + readWordStart(); // Read start of next chunk + // Split docid & features. + assert(_zcDocIds._valI < _zcDocIds._valE); + uint32_t docIdPos = _zcDocIds.pos(); + uint32_t docId = _prevDocId + 1 + _zcDocIds.decode(); + features._docId = docId; + _prevDocId = docId; + assert(docId <= _lastDocId); + if (docId > _l1SkipDocId) { + _l1SkipDocIdPos += _l1Skip.decode() + 1; + assert(docIdPos == _l1SkipDocIdPos); + _l1SkipFeaturesPos += _l1Skip.decode() + 1; + uint64_t featuresPos = _decodeContext->getReadOffset(); + assert(featuresPos == _l1SkipFeaturesPos); + (void) featuresPos; + if (docId > _l2SkipDocId) { + _l2SkipDocIdPos += _l2Skip.decode() + 1; + assert(docIdPos == _l2SkipDocIdPos); + _l2SkipFeaturesPos += _l2Skip.decode() + 1; + assert(featuresPos == _l2SkipFeaturesPos); + _l2SkipL1SkipPos += _l2Skip.decode() + 1; + assert(_l1Skip.pos() == _l2SkipL1SkipPos); + if (docId > _l3SkipDocId) { + _l3SkipDocIdPos += _l3Skip.decode() + 1; + assert(docIdPos == _l3SkipDocIdPos); + _l3SkipFeaturesPos += _l3Skip.decode() + 1; + assert(featuresPos == _l3SkipFeaturesPos); + _l3SkipL1SkipPos += _l3Skip.decode() + 1; + assert(_l1Skip.pos() == _l3SkipL1SkipPos); + _l3SkipL2SkipPos += _l3Skip.decode() + 1; + assert(_l2Skip.pos() == _l3SkipL2SkipPos); + if (docId > _l4SkipDocId) { + _l4SkipDocIdPos += _l4Skip.decode() + 1; + assert(docIdPos == _l4SkipDocIdPos); + (void) docIdPos; + _l4SkipFeaturesPos += _l4Skip.decode() + 1; + assert(featuresPos == _l4SkipFeaturesPos); + _l4SkipL1SkipPos += _l4Skip.decode() + 1; + assert(_l1Skip.pos() == _l4SkipL1SkipPos); + _l4SkipL2SkipPos += _l4Skip.decode() + 1; + assert(_l2Skip.pos() == _l4SkipL2SkipPos); + _l4SkipL3SkipPos += _l4Skip.decode() + 1; + assert(_l3Skip.pos() == _l4SkipL3SkipPos); + _l4SkipDocId += _l4Skip.decode() + 1; + assert(_l4SkipDocId <= _lastDocId); + assert(_l4SkipDocId >= docId); + } + _l3SkipDocId += _l3Skip.decode() + 1; + assert(_l3SkipDocId <= _lastDocId); + assert(_l3SkipDocId <= _l4SkipDocId); + assert(_l3SkipDocId >= docId); + } + _l2SkipDocId += _l2Skip.decode() + 1; + assert(_l2SkipDocId <= _lastDocId); + assert(_l2SkipDocId <= _l4SkipDocId); + assert(_l2SkipDocId <= _l3SkipDocId); + assert(_l2SkipDocId >= docId); + } + _l1SkipDocId += _l1Skip.decode() + 1; + assert(_l1SkipDocId <= _lastDocId); + assert(_l1SkipDocId <= _l4SkipDocId); + assert(_l1SkipDocId <= _l3SkipDocId); + assert(_l1SkipDocId <= _l2SkipDocId); + assert(_l1SkipDocId >= docId); + } + if (docId < _lastDocId) { + // Assert more space available when not yet at last docid + assert(_zcDocIds._valI < _zcDocIds._valE); + } else { + // Assert that space has been used when at last docid + assert(_zcDocIds._valI == _zcDocIds._valE); + // Assert that we've read to end of skip info + assert(_l1SkipDocId == _lastDocId); + assert(_l2SkipDocId == _lastDocId); + assert(_l3SkipDocId == _lastDocId); + assert(_l4SkipDocId == _lastDocId); + if (!_hasMore) { + _chunkNo = 0; + } + } + _decodeContext->readFeatures(features); + --_residue; +} + + +void +Zc4PostingSeqRead:: +readDocIdAndFeatures(DocIdAndFeatures &features) +{ + if (_residue == 0 && !_hasMore) { + if (_rangeEndOffset != 0) { + DecodeContext &d = *_decodeContext; + uint64_t curOffset = d.getReadOffset(); + assert(curOffset <= _rangeEndOffset); + if (curOffset < _rangeEndOffset) + readWordStart(); + } + if (_residue == 0) { + // Don't read past end of posting list. + features.clear(static_cast(-1)); + return; + } + } + if (_lastDocId > 0) + return readCommonWordDocIdAndFeatures(features); + // Interleaves docid & features + typedef FeatureEncodeContextBE EC; + DecodeContext &d = *_decodeContext; + uint32_t length; + uint64_t val64; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + + UC64BE_DECODEEXPGOLOMB_SMALL_NS(o, + K_VALUE_ZCPOSTING_DELTA_DOCID, + EC); + uint32_t docId = _prevDocId + 1 + val64; + features._docId = docId; + _prevDocId = docId; + UC64_DECODECONTEXT_STORE(o, d._); + if (__builtin_expect(oCompr >= d._valE, false)) { + _readContext.readComprBuffer(); + } + _decodeContext->readFeatures(features); + --_residue; +} + + +void +Zc4PostingSeqRead::checkPointWrite(nbostream &out) +{ + out << _counts; + out << _wordStart; + uint64_t curPos = _decodeContext->getReadOffset(); + out << curPos; + out << _residue; + out << _chunkNo; + out << _hasMore; +} + + +void +Zc4PostingSeqRead::checkPointRead(nbostream &in) +{ + in >> _counts; + in >> _wordStart; + in >> _checkPointPos; + in >> _checkPointResidue; + in >> _checkPointChunkNo; + in >> _checkPointHasMore; + assert(_checkPointPos >= _wordStart); +} + + +void +Zc4PostingSeqRead::readWordStartWithSkip(void) +{ + typedef FeatureEncodeContextBE EC; + DecodeContext &d = *_decodeContext; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + const uint64_t *valE = d._valE; + + if (_hasMore) + ++_chunkNo; + else + _chunkNo = 0; + assert(_numDocs >= _minSkipDocs || _hasMore); + bool hasMore = false; + if (__builtin_expect(_numDocs >= _minChunkDocs, false)) { + hasMore = static_cast(oVal) < 0; + oVal <<= 1; + length = 1; + UC64BE_READBITS_NS(o, EC); + } + if (_dynamicK) + _docIdK = EC::calcDocIdK((_hasMore || hasMore) ? 1 : _numDocs, + _docIdLimit); + if (_hasMore || hasMore) { + if (_rangeEndOffset == 0) { + assert(hasMore == (_chunkNo + 1 < _counts._segments.size())); + assert(_numDocs == _counts._segments[_chunkNo]._numDocs); + } + if (hasMore) { + assert(_numDocs >= _minSkipDocs); + assert(_numDocs >= _minChunkDocs); + } + } else { + assert(_numDocs >= _minSkipDocs); + if (_rangeEndOffset == 0) { + assert(_numDocs == _counts._numDocs); + } + } + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, d._); + _readContext.readComprBuffer(); + valE = d._valE; + UC64_DECODECONTEXT_LOAD(o, d._); + } + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_DOCIDSSIZE, + EC); + uint32_t docIdsSize = val64 + 1; + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_L1SKIPSIZE, + EC); + uint32_t l1SkipSize = val64; + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, d._); + _readContext.readComprBuffer(); + valE = d._valE; + UC64_DECODECONTEXT_LOAD(o, d._); + } + uint32_t l2SkipSize = 0; + if (l1SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_L2SKIPSIZE, + EC); + l2SkipSize = val64; + } + uint32_t l3SkipSize = 0; + if (l2SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_L3SKIPSIZE, + EC); + l3SkipSize = val64; + } + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, d._); + _readContext.readComprBuffer(); + valE = d._valE; + UC64_DECODECONTEXT_LOAD(o, d._); + } + uint32_t l4SkipSize = 0; + if (l3SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_L4SKIPSIZE, + EC); + l4SkipSize = val64; + } + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_FEATURESSIZE, + EC); + _featuresSize = val64; + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, d._); + _readContext.readComprBuffer(); + valE = d._valE; + UC64_DECODECONTEXT_LOAD(o, d._); + } + if (_dynamicK) { + UC64BE_DECODEEXPGOLOMB_NS(o, + _docIdK, + EC); + } else { + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_LASTDOCID, + EC); + } + _lastDocId = _docIdLimit - 1 - val64; + if (_hasMore || hasMore) { + if (_rangeEndOffset == 0) { + assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc); + } + } + + if (__builtin_expect(oCompr >= valE, false)) { + UC64_DECODECONTEXT_STORE(o, d._); + _readContext.readComprBuffer(); + valE = d._valE; + UC64_DECODECONTEXT_LOAD(o, d._); + } + uint64_t bytePad = oPreRead & 7; + if (bytePad > 0) { + length = bytePad; + oVal <<= length; + UC64BE_READBITS_NS(o, EC); + } + UC64_DECODECONTEXT_STORE(o, d._); + if (__builtin_expect(oCompr >= valE, false)) { + _readContext.readComprBuffer(); + } + _zcDocIds.clearReserve(docIdsSize); + _l1Skip.clearReserve(l1SkipSize); + _l2Skip.clearReserve(l2SkipSize); + _l3Skip.clearReserve(l3SkipSize); + _l4Skip.clearReserve(l4SkipSize); + _decodeContext->readBytes(_zcDocIds._valI, docIdsSize); + _zcDocIds._valE = _zcDocIds._valI + docIdsSize; + if (l1SkipSize > 0) + _decodeContext->readBytes(_l1Skip._valI, l1SkipSize); + _l1Skip._valE = _l1Skip._valI + l1SkipSize; + if (l2SkipSize > 0) + _decodeContext->readBytes(_l2Skip._valI, l2SkipSize); + _l2Skip._valE = _l2Skip._valI + l2SkipSize; + if (l3SkipSize > 0) + _decodeContext->readBytes(_l3Skip._valI, l3SkipSize); + _l3Skip._valE = _l3Skip._valI + l3SkipSize; + if (l4SkipSize > 0) + _decodeContext->readBytes(_l4Skip._valI, l4SkipSize); + _l4Skip._valE = _l4Skip._valI + l4SkipSize; + + if (l1SkipSize > 0) + _l1SkipDocId = _l1Skip.decode() + 1 + _prevDocId; + else + _l1SkipDocId = _lastDocId; + if (l2SkipSize > 0) + _l2SkipDocId = _l2Skip.decode() + 1 + _prevDocId; + else + _l2SkipDocId = _lastDocId; + if (l3SkipSize > 0) + _l3SkipDocId = _l3Skip.decode() + 1 + _prevDocId; + else + _l3SkipDocId = _lastDocId; + if (l4SkipSize > 0) + _l4SkipDocId = _l4Skip.decode() + 1 + _prevDocId; + else + _l4SkipDocId = _lastDocId; + _l1SkipDocIdPos = 0; + _l1SkipFeaturesPos = _decodeContext->getReadOffset(); + _l2SkipDocIdPos = 0; + _l2SkipL1SkipPos = 0; + _l2SkipFeaturesPos = _decodeContext->getReadOffset(); + _l3SkipDocIdPos = 0; + _l3SkipL1SkipPos = 0; + _l3SkipL2SkipPos = 0; + _l3SkipFeaturesPos = _decodeContext->getReadOffset(); + _l4SkipDocIdPos = 0; + _l4SkipL1SkipPos = 0; + _l4SkipL2SkipPos = 0; + _l4SkipL3SkipPos = 0; + _l4SkipFeaturesPos = _decodeContext->getReadOffset(); + _hasMore = hasMore; + // Decode context is now positioned at start of features +} + + +void +Zc4PostingSeqRead::readWordStart(void) +{ + typedef FeatureEncodeContextBE EC; + UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); + uint32_t length; + uint64_t val64; + const uint64_t *valE = _decodeContext->_valE; + + UC64BE_DECODEEXPGOLOMB_NS(o, + K_VALUE_ZCPOSTING_NUMDOCS, + EC); + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + if (oCompr >= valE) + _readContext.readComprBuffer(); + _numDocs = static_cast(val64) + 1; + _residue = _numDocs; + _prevDocId = _hasMore ? _lastDocId : 0u; + if (_rangeEndOffset == 0) { + assert(_numDocs <= _counts._numDocs); + assert(_numDocs == _counts._numDocs || + _numDocs >= _minChunkDocs || + _hasMore); + } + + if (_numDocs >= _minSkipDocs || _hasMore) { + readWordStartWithSkip(); + // Decode context is not positioned at start of features + } else { + if (_dynamicK) + _docIdK = EC::calcDocIdK(_numDocs, _docIdLimit); + _lastDocId = 0u; + // Decode context is not positioned at start of docids & features + } +} + + +void +Zc4PostingSeqRead::readCounts(const PostingListCounts &counts) +{ + assert(!_hasMore); // Previous words must have been read. + + _counts = counts; + + assert((_counts._numDocs == 0) == (_counts._bitLength == 0)); + if (_counts._numDocs > 0) { + _wordStart = _decodeContext->getReadOffset(); + readWordStart(); + } +} + + +bool +Zc4PostingSeqRead::open(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) +{ + if (tuneFileRead.getWantDirectIO()) + _file.EnableDirectIO(); + bool res = _file.OpenReadOnly(name.c_str()); + if (res) { + _readContext.setFile(&_file); + _readContext.setFileSize(_file.GetSize()); + DecodeContext &d = *_decodeContext; + _readContext.allocComprBuf(65536u, 32768u); + d.emptyBuffer(0); + _readContext.readComprBuffer(); + + readHeader(); + if (d._valI >= d._valE) + _readContext.readComprBuffer(); + if (_checkPointPos != 0) { + if (_checkPointResidue != 0 || _checkPointHasMore) { + // Checkpointed in the middle of a word. Read from + // start at word until at right position. + DocIdAndFeatures features; + _readContext.setPosition(_wordStart); + assert(_decodeContext->getReadOffset() == _wordStart); + _readContext.readComprBuffer(); + readWordStart(); + assert(_chunkNo < _checkPointChunkNo || + (_chunkNo == _checkPointChunkNo && + _residue >= _checkPointResidue)); + while (_chunkNo < _checkPointChunkNo || + _residue > _checkPointResidue) { + readDocIdAndFeatures(features); + } + assert(_chunkNo == _checkPointChunkNo); + assert(_residue == _checkPointResidue); + assert(_hasMore == _checkPointHasMore); + assert(_decodeContext->getReadOffset() == _checkPointPos); + } else { + // Checkpointed between words. + _readContext.setPosition(_checkPointPos); + assert(_decodeContext->getReadOffset() == _checkPointPos); + _readContext.readComprBuffer(); + } + } + } else { + LOG(error, "could not open %s: %s", + _file.GetFileName(), getLastErrorString().c_str()); + } + return res; +} + + +bool +Zc4PostingSeqRead::close(void) +{ + _readContext.dropComprBuf(); + _file.Close(); + _readContext.setFile(NULL); + return true; +} + + +void +Zc4PostingSeqRead::getParams(PostingListParams ¶ms) +{ + if (_countFile != NULL) { + PostingListParams countParams; + _countFile->getParams(countParams); + params = countParams; + uint32_t countDocIdLimit = 0; + uint32_t countMinChunkDocs = 0; + countParams.get("docIdLimit", countDocIdLimit); + countParams.get("minChunkDocs", countMinChunkDocs); + assert(_docIdLimit == countDocIdLimit); + assert(_minChunkDocs == countMinChunkDocs); + } else { + params.clear(); + params.set("docIdLimit", _docIdLimit); + params.set("minChunkDocs", _minChunkDocs); + } + params.set("minSkipDocs", _minSkipDocs); +} + + +void +Zc4PostingSeqRead::getFeatureParams(PostingListParams ¶ms) +{ + _decodeContext->getParams(params); +} + + +void +Zc4PostingSeqRead::readHeader(void) +{ + FeatureDecodeContextBE &d = *_decodeContext; + const vespalib::string &myId = _dynamicK ? myId5 : myId4; + + vespalib::FileHeader header; + d.readHeader(header, _file.getSize()); + uint32_t headerLen = header.getSize(); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(header.hasTag("format.1")); + assert(!header.hasTag("format.2")); + assert(header.hasTag("numWords")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("minSkipDocs")); + assert(header.hasTag("endian")); + bool completed = header.getTag("frozen").asInteger() != 0; + _fileBitSize = header.getTag("fileBitSize").asInteger(); + headerLen += (-headerLen & 7); + assert(completed); + (void) completed; + assert(_fileBitSize >= 8 * headerLen); + assert(header.getTag("format.0").asString() == myId); + (void) myId; + assert(header.getTag("format.1").asString() == d.getIdentifier()); + _numWords = header.getTag("numWords").asInteger(); + _minChunkDocs = header.getTag("minChunkDocs").asInteger(); + _docIdLimit = header.getTag("docIdLimit").asInteger(); + _minSkipDocs = header.getTag("minSkipDocs").asInteger(); + assert(header.getTag("endian").asString() == "big"); + // Read feature decoding specific subheader + d.readHeader(header, "features."); + // Align on 64-bit unit + d.smallAlign(64); + assert(d.getReadOffset() == headerLen * 8); + _headerBitLen = d.getReadOffset(); +} + + +const vespalib::string & +Zc4PostingSeqRead::getIdentifier(void) +{ + return myId4; +} + + +uint64_t +Zc4PostingSeqRead::getCurrentPostingOffset(void) const +{ + FeatureDecodeContextBE &d = *_decodeContext; + return d.getReadOffset() - _headerBitLen; +} + + +void +Zc4PostingSeqRead::setPostingOffset(uint64_t offset, + uint64_t endOffset, + uint64_t readAheadOffset) +{ + assert(_residue == 0); // Only to be called between posting lists + + FeatureDecodeContextBE &d = *_decodeContext; + + _rangeEndOffset = endOffset + _headerBitLen; + _readAheadEndOffset = readAheadOffset + _headerBitLen; + _readContext.setStopOffset(_readAheadEndOffset, false); + uint64_t newOffset = offset + _headerBitLen; + if (newOffset != d.getReadOffset()) { + _readContext.setPosition(newOffset); + assert(newOffset == d.getReadOffset()); + _readContext.readComprBuffer(); + } +} + + +Zc4PostingSeqWrite:: +Zc4PostingSeqWrite(PostingListCountFileSeqWrite *countFile) + : PostingListFileSeqWrite(), + _encodeContext(), + _writeContext(_encodeContext), + _file(), + _minChunkDocs(1 << 30), + _minSkipDocs(64), + _docIdLimit(10000000), + _docIds(), + _encodeFeatures(NULL), + _featureOffset(0), + _featureWriteContext(sizeof(uint64_t)), + _writePos(0), + _dynamicK(false), + _zcDocIds(), + _l1Skip(), + _l2Skip(), + _l3Skip(), + _l4Skip(), + _numWords(0), + _fileBitSize(0), + _countFile(countFile) +{ + _encodeContext.setWriteContext(&_writeContext); + + if (_countFile != NULL) { + PostingListParams params; + _countFile->getParams(params); + params.get("docIdLimit", _docIdLimit); + params.get("minChunkDocs", _minChunkDocs); + } + _featureWriteContext.allocComprBuf(64, 1); +} + + +Zc4PostingSeqWrite::~Zc4PostingSeqWrite(void) +{ +} + + +void +Zc4PostingSeqWrite:: +writeDocIdAndFeatures(const DocIdAndFeatures &features) +{ + if (__builtin_expect(_docIds.size() >= _minChunkDocs, false)) + flushChunk(); + _encodeFeatures->writeFeatures(features); + uint64_t writeOffset = _encodeFeatures->getWriteOffset(); + uint64_t featureSize = writeOffset - _featureOffset; + assert(static_cast(featureSize) == featureSize); + _docIds.push_back(std::make_pair(features._docId, + static_cast(featureSize))); + _featureOffset = writeOffset; +} + + +void +Zc4PostingSeqWrite::flushWord(void) +{ + if (__builtin_expect(_docIds.size() >= _minSkipDocs || + !_counts._segments.empty(), false)) { + // Use skip information if enough documents of chunking has happened + flushWordWithSkip(false); + _numWords++; + } else if (_docIds.size() > 0) { + flushWordNoSkip(); + _numWords++; + } + + EncodeContext &e = _encodeContext; + uint64_t writePos = e.getWriteOffset(); + + _counts._bitLength = writePos - _writePos; + _writePos = writePos; +} + + +void +Zc4PostingSeqWrite::checkPointWrite(nbostream &out) +{ + _writeContext.writeComprBuffer(true); // Also flush slack + out << _numWords; + _writeContext.checkPointWrite(out); + _featureWriteContext.checkPointWrite(out); + out.saveVector(_docIds) << _writePos << _counts; + _file.Sync(); +} + + +void +Zc4PostingSeqWrite::checkPointRead(nbostream &in) +{ + in >> _numWords; + _writeContext.checkPointRead(in); + _featureWriteContext.checkPointRead(in); + _featureOffset = _encodeFeatures->getWriteOffset(); + in.restoreVector(_docIds) >> _writePos >> _counts; +} + + +uint32_t +Zc4PostingSeqWrite::readHeader(const vespalib::string &name) +{ + EncodeContext &f = *_encodeFeatures; + + FeatureDecodeContextBE d; + ComprFileReadContext drc(d); + FastOS_File file; + const vespalib::string &myId = _dynamicK ? myId5 : myId4; + + d.setReadContext(&drc); + bool res = file.OpenReadOnly(name.c_str()); + if (!res) { + LOG(error, "Could not open %s for reading file header: %s", + name.c_str(), getLastErrorString().c_str()); + abort(); + } + + drc.setFile(&file); + drc.setFileSize(file.GetSize()); + drc.allocComprBuf(512, 32768u); + d.emptyBuffer(0); + drc.readComprBuffer(); + + vespalib::FileHeader header; + d.readHeader(header, file.getSize()); + uint32_t headerLen = header.getSize(); + assert(header.hasTag("frozen")); + assert(header.hasTag("fileBitSize")); + assert(header.hasTag("format.0")); + assert(header.hasTag("format.1")); + assert(!header.hasTag("format.2")); + assert(header.hasTag("numWords")); + assert(header.hasTag("minChunkDocs")); + assert(header.hasTag("docIdLimit")); + assert(header.hasTag("minSkipDocs")); + assert(header.hasTag("endian")); + bool headerCompleted = header.getTag("frozen").asInteger() != 0; + uint64_t headerFileBitSize = header.getTag("fileBitSize").asInteger(); + headerLen += (-headerLen & 7); + assert(!headerCompleted || headerFileBitSize >= headerLen * 8); + (void) headerCompleted; + (void) headerFileBitSize; + assert(header.getTag("format.0").asString() == myId); + (void) myId; + assert(header.getTag("format.1").asString() == f.getIdentifier()); + _minChunkDocs = header.getTag("minChunkDocs").asInteger(); + _docIdLimit = header.getTag("docIdLimit").asInteger(); + _minSkipDocs = header.getTag("minSkipDocs").asInteger(); + assert(header.getTag("endian").asString() == "big"); + // Read feature decoding specific subheader using helper decode context + f.readHeader(header, "features."); + // Align on 64-bit unit + d.smallAlign(64); + assert(d.getReadOffset() == headerLen * 8); + file.Close(); + return headerLen; +} + + +void +Zc4PostingSeqWrite::makeHeader(const FileHeaderContext &fileHeaderContext) +{ + EncodeContext &f = *_encodeFeatures; + EncodeContext &e = _encodeContext; + ComprFileWriteContext &wce = _writeContext; + + const vespalib::string &myId = _dynamicK ? myId5 : myId4; + vespalib::FileHeader header; + + typedef vespalib::GenericHeader::Tag Tag; + fileHeaderContext.addTags(header, _file.GetFileName()); + header.putTag(Tag("frozen", 0)); + header.putTag(Tag("fileBitSize", 0)); + header.putTag(Tag("format.0", myId)); + header.putTag(Tag("format.1", f.getIdentifier())); + header.putTag(Tag("numWords", 0)); + header.putTag(Tag("minChunkDocs", _minChunkDocs)); + header.putTag(Tag("docIdLimit", _docIdLimit)); + header.putTag(Tag("minSkipDocs", _minSkipDocs)); + header.putTag(Tag("endian", "big")); + header.putTag(Tag("desc", "Posting list file")); + + f.writeHeader(header, "features."); + e.setupWrite(wce); + e.writeHeader(header); + e.smallAlign(64); + e.flush(); + uint32_t headerLen = header.getSize(); + headerLen += (-headerLen & 7); // Then to uint64_t + assert(e.getWriteOffset() == headerLen * 8); + assert((e.getWriteOffset() & 63) == 0); // Header must be word aligned +} + + +void +Zc4PostingSeqWrite::updateHeader(void) +{ + vespalib::FileHeader h; + FastOS_File f; + f.OpenReadWrite(_file.GetFileName()); + h.readFile(f); + FileHeaderContext::setFreezeTime(h); + typedef vespalib::GenericHeader::Tag Tag; + h.putTag(Tag("frozen", 1)); + h.putTag(Tag("fileBitSize", _fileBitSize)); + h.putTag(Tag("numWords", _numWords)); + h.rewriteFile(f); + f.Sync(); + f.Close(); +} + + +bool +Zc4PostingSeqWrite::open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const FileHeaderContext &fileHeaderContext) +{ + if (tuneFileWrite.getWantSyncWrites()) + _file.EnableSyncWrites(); + if (tuneFileWrite.getWantDirectIO()) + _file.EnableDirectIO(); + bool ok = _file.OpenWriteOnly(name.c_str()); + if (!ok) { + LOG(error, "could not open '%s' for writing: %s", + _file.GetFileName(), getLastErrorString().c_str()); + // XXX may need to do something more here, I don't know what... + return false; + } + uint64_t fileSize = _file.GetSize(); + uint64_t bufferStartFilePos = _writeContext.getBufferStartFilePos(); + assert(fileSize >= bufferStartFilePos); + (void) fileSize; + _file.SetSize(bufferStartFilePos); + assert(bufferStartFilePos == static_cast(_file.GetPosition())); + _writeContext.setFile(&_file); + search::ComprBuffer &cb = _writeContext; + EncodeContext &e = _encodeContext; + _writeContext.allocComprBuf(65536u, 32768u); + if (bufferStartFilePos == 0) { + e.setupWrite(cb); + // Reset accumulated stats + _fileBitSize = 0; + _numWords = 0; + // Start write initial header + makeHeader(fileHeaderContext); + _encodeFeatures->setupWrite(_featureWriteContext); + // end write initial header + _writePos = e.getWriteOffset(); + } else { + assert(bufferStartFilePos >= 8u); + uint32_t headerSize = readHeader(name); // Read existing header + assert(bufferStartFilePos >= headerSize); + (void) headerSize; + e.afterWrite(_writeContext, 0, bufferStartFilePos); + } + + // Ensure that some space is initially available in encoding buffers + _zcDocIds.maybeExpand(); + _l1Skip.maybeExpand(); + _l2Skip.maybeExpand(); + _l3Skip.maybeExpand(); + _l4Skip.maybeExpand(); + return true; // Assume success +} + + +bool +Zc4PostingSeqWrite::close(void) +{ + EncodeContext &e = _encodeContext; + + _fileBitSize = e.getWriteOffset(); + // Write some pad bits to avoid decompression readahead going past + // memory mapped file during search and into SIGSEGV territory. + + // First pad to 64 bits alignment. + e.smallAlign(64); + e.writeComprBufferIfNeeded(); + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + e.padBits(128); + e.alignDirectIO(); + e.flush(); + e.writeComprBuffer(); // Also flushes slack + + _writeContext.dropComprBuf(); + _file.Sync(); + _file.Close(); + _writeContext.setFile(NULL); + updateHeader(); + return true; +} + + + +void +Zc4PostingSeqWrite:: +setParams(const PostingListParams ¶ms) +{ + if (_countFile != NULL) + _countFile->setParams(params); + params.get("docIdLimit", _docIdLimit); + params.get("minChunkDocs", _minChunkDocs); + params.get("minSkipDocs", _minSkipDocs); +} + + +void +Zc4PostingSeqWrite:: +getParams(PostingListParams ¶ms) +{ + if (_countFile != NULL) { + PostingListParams countParams; + _countFile->getParams(countParams); + params = countParams; + uint32_t countDocIdLimit = 0; + uint32_t countMinChunkDocs = 0; + countParams.get("docIdLimit", countDocIdLimit); + countParams.get("minChunkDocs", countMinChunkDocs); + assert(_docIdLimit == countDocIdLimit); + assert(_minChunkDocs == countMinChunkDocs); + } else { + params.clear(); + params.set("docIdLimit", _docIdLimit); + params.set("minChunkDocs", _minChunkDocs); + } + params.set("minSkipDocs", _minSkipDocs); +} + + +void +Zc4PostingSeqWrite:: +setFeatureParams(const PostingListParams ¶ms) +{ + _encodeFeatures->setParams(params); +} + + +void +Zc4PostingSeqWrite:: +getFeatureParams(PostingListParams ¶ms) +{ + _encodeFeatures->getParams(params); +} + + +void +Zc4PostingSeqWrite::flushChunk(void) +{ + /* TODO: Flush chunk and prepare for new (possible short) chunk */ + flushWordWithSkip(true); +} + +#define L1SKIPSTRIDE 16 +#define L2SKIPSTRIDE 8 +#define L3SKIPSTRIDE 8 +#define L4SKIPSTRIDE 8 + + +void +Zc4PostingSeqWrite::calcSkipInfo(void) +{ + uint32_t lastDocId = 0u; + uint32_t lastL1SkipDocId = 0u; + uint32_t lastL1SkipDocIdPos = 0; + uint32_t lastL1SkipFeaturePos = 0; + uint32_t lastL2SkipDocId = 0u; + uint32_t lastL2SkipDocIdPos = 0; + uint32_t lastL2SkipFeaturePos = 0; + uint32_t lastL2SkipL1SkipPos = 0; + uint32_t lastL3SkipDocId = 0u; + uint32_t lastL3SkipDocIdPos = 0; + uint32_t lastL3SkipFeaturePos = 0; + uint32_t lastL3SkipL1SkipPos = 0; + uint32_t lastL3SkipL2SkipPos = 0; + uint32_t lastL4SkipDocId = 0u; + uint32_t lastL4SkipDocIdPos = 0; + uint32_t lastL4SkipFeaturePos = 0; + uint32_t lastL4SkipL1SkipPos = 0; + uint32_t lastL4SkipL2SkipPos = 0; + uint32_t lastL4SkipL3SkipPos = 0; + unsigned int l1SkipCnt = 0; + unsigned int l2SkipCnt = 0; + unsigned int l3SkipCnt = 0; + unsigned int l4SkipCnt = 0; + uint64_t featurePos = 0; + + std::vector::const_iterator dit = _docIds.begin(); + std::vector::const_iterator dite = _docIds.end(); + + if (!_counts._segments.empty()) { + lastDocId = _counts._segments.back()._lastDoc; + lastL1SkipDocId = lastDocId; + lastL2SkipDocId = lastDocId; + lastL3SkipDocId = lastDocId; + lastL4SkipDocId = lastDocId; + } + + for (; dit != dite; ++dit) { + if (l1SkipCnt >= L1SKIPSTRIDE) { + // L1 docid delta + uint32_t docIdDelta = lastDocId - lastL1SkipDocId; + assert(static_cast(docIdDelta) > 0); + _l1Skip.encode(docIdDelta - 1); + lastL1SkipDocId = lastDocId; + // L1 docid pos + uint64_t docIdPos = _zcDocIds.size(); + _l1Skip.encode(docIdPos - lastL1SkipDocIdPos - 1); + lastL1SkipDocIdPos = docIdPos; + // L1 features pos + _l1Skip.encode(featurePos - lastL1SkipFeaturePos - 1); + lastL1SkipFeaturePos = featurePos; + l1SkipCnt = 0; + ++l2SkipCnt; + if (l2SkipCnt >= L2SKIPSTRIDE) { + // L2 docid delta + docIdDelta = lastDocId - lastL2SkipDocId; + assert(static_cast(docIdDelta) > 0); + _l2Skip.encode(docIdDelta - 1); + lastL2SkipDocId = lastDocId; + // L2 docid pos + docIdPos = _zcDocIds.size(); + _l2Skip.encode(docIdPos - lastL2SkipDocIdPos - 1); + lastL2SkipDocIdPos = docIdPos; + // L2 features pos + _l2Skip.encode(featurePos - lastL2SkipFeaturePos - 1); + lastL2SkipFeaturePos = featurePos; + // L2 L1Skip pos + uint64_t l1SkipPos = _l1Skip.size(); + _l2Skip.encode(l1SkipPos - lastL2SkipL1SkipPos - 1); + lastL2SkipL1SkipPos = l1SkipPos; + l2SkipCnt = 0; + ++l3SkipCnt; + if (l3SkipCnt >= L3SKIPSTRIDE) { + // L3 docid delta + docIdDelta = lastDocId - lastL3SkipDocId; + assert(static_cast(docIdDelta) > 0); + _l3Skip.encode(docIdDelta - 1); + lastL3SkipDocId = lastDocId; + // L3 docid pos + docIdPos = _zcDocIds.size(); + _l3Skip.encode(docIdPos - lastL3SkipDocIdPos - 1); + lastL3SkipDocIdPos = docIdPos; + // L3 features pos + _l3Skip.encode(featurePos - lastL3SkipFeaturePos - 1); + lastL3SkipFeaturePos = featurePos; + // L3 L1Skip pos + l1SkipPos = _l1Skip.size(); + _l3Skip.encode(l1SkipPos - lastL3SkipL1SkipPos - 1); + lastL3SkipL1SkipPos = l1SkipPos; + // L3 L2Skip pos + uint64_t l2SkipPos = _l2Skip.size(); + _l3Skip.encode(l2SkipPos - lastL3SkipL2SkipPos - 1); + lastL3SkipL2SkipPos = l2SkipPos; + l3SkipCnt = 0; + ++l4SkipCnt; + if (l4SkipCnt >= L4SKIPSTRIDE) { + // L4 docid delta + docIdDelta = lastDocId - lastL4SkipDocId; + assert(static_cast(docIdDelta) > 0); + _l4Skip.encode(docIdDelta - 1); + lastL4SkipDocId = lastDocId; + // L4 docid pos + docIdPos = _zcDocIds.size(); + _l4Skip.encode(docIdPos - lastL4SkipDocIdPos - 1); + lastL4SkipDocIdPos = docIdPos; + // L4 features pos + _l4Skip.encode(featurePos - lastL4SkipFeaturePos - 1); + lastL4SkipFeaturePos = featurePos; + // L4 L1Skip pos + l1SkipPos = _l1Skip.size(); + _l4Skip.encode(l1SkipPos - lastL4SkipL1SkipPos - 1); + lastL4SkipL1SkipPos = l1SkipPos; + // L4 L2Skip pos + l2SkipPos = _l2Skip.size(); + _l4Skip.encode(l2SkipPos - lastL4SkipL2SkipPos - 1); + lastL4SkipL2SkipPos = l2SkipPos; + // L4 L3Skip pos + uint64_t l3SkipPos = _l3Skip.size(); + _l4Skip.encode(l3SkipPos - lastL4SkipL3SkipPos - 1); + lastL4SkipL3SkipPos = l3SkipPos; + l4SkipCnt = 0; + } + } + } + } + uint32_t docId = dit->first; + featurePos += dit->second; + _zcDocIds.encode(docId - lastDocId - 1); + lastDocId = docId; + ++l1SkipCnt; + } + // Extra partial entries for skip tables to simplify iterator during search + if (_l1Skip.size() > 0) + _l1Skip.encode(lastDocId - lastL1SkipDocId - 1); + if (_l2Skip.size() > 0) + _l2Skip.encode(lastDocId - lastL2SkipDocId - 1); + if (_l3Skip.size() > 0) + _l3Skip.encode(lastDocId - lastL3SkipDocId - 1); + if (_l4Skip.size() > 0) + _l4Skip.encode(lastDocId - lastL4SkipDocId - 1); +} + + +void +Zc4PostingSeqWrite::flushWordWithSkip(bool hasMore) +{ + assert(_docIds.size() >= _minSkipDocs || !_counts._segments.empty()); + + _encodeFeatures->flush(); + EncodeContext &e = _encodeContext; + + uint32_t numDocs = _docIds.size(); + + e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + if (numDocs >= _minChunkDocs) + e.writeBits((hasMore ? 1 : 0), 1); + + // TODO: Calculate docids size, possible also k parameter */ + calcSkipInfo(); + + uint32_t docIdsSize = _zcDocIds.size(); + uint32_t l1SkipSize = _l1Skip.size(); + uint32_t l2SkipSize = _l2Skip.size(); + uint32_t l3SkipSize = _l3Skip.size(); + uint32_t l4SkipSize = _l4Skip.size(); + + e.encodeExpGolomb(docIdsSize - 1, K_VALUE_ZCPOSTING_DOCIDSSIZE); + e.encodeExpGolomb(l1SkipSize, K_VALUE_ZCPOSTING_L1SKIPSIZE); + if (l1SkipSize != 0) { + e.encodeExpGolomb(l2SkipSize, K_VALUE_ZCPOSTING_L2SKIPSIZE); + if (l2SkipSize != 0) { + e.encodeExpGolomb(l3SkipSize, K_VALUE_ZCPOSTING_L3SKIPSIZE); + if (l3SkipSize != 0) { + e.encodeExpGolomb(l4SkipSize, K_VALUE_ZCPOSTING_L4SKIPSIZE); + } + } + } + e.encodeExpGolomb(_featureOffset, K_VALUE_ZCPOSTING_FEATURESSIZE); + + // Encode last document id in chunk or word. + if (_dynamicK) { + uint32_t docIdK = e.calcDocIdK((_counts._segments.empty() && + !hasMore) ? + numDocs : 1, + _docIdLimit); + e.encodeExpGolomb(_docIdLimit - 1 - _docIds.back().first, + docIdK); + } else { + e.encodeExpGolomb(_docIdLimit - 1 - _docIds.back().first, + K_VALUE_ZCPOSTING_LASTDOCID); + } + + e.smallAlign(8); // Byte align + + uint8_t *docIds = _zcDocIds._mallocStart; + e.writeBits(reinterpret_cast(docIds), + 0, + docIdsSize * 8); + if (l1SkipSize > 0) { + uint8_t *l1Skip = _l1Skip._mallocStart; + e.writeBits(reinterpret_cast(l1Skip), + 0, + l1SkipSize * 8); + } + if (l2SkipSize > 0) { + uint8_t *l2Skip = _l2Skip._mallocStart; + e.writeBits(reinterpret_cast(l2Skip), + 0, + l2SkipSize * 8); + } + if (l3SkipSize > 0) { + uint8_t *l3Skip = _l3Skip._mallocStart; + e.writeBits(reinterpret_cast(l3Skip), + 0, + l3SkipSize * 8); + } + if (l4SkipSize > 0) { + uint8_t *l4Skip = _l4Skip._mallocStart; + e.writeBits(reinterpret_cast(l4Skip), + 0, + l4SkipSize * 8); + } + + // Write features + e.writeBits(static_cast(_featureWriteContext._comprBuf), + 0, + _featureOffset); + + _counts._numDocs += numDocs; + if (hasMore || !_counts._segments.empty()) { + uint64_t writePos = e.getWriteOffset(); + PostingListCounts::Segment seg; + seg._bitLength = writePos - (_writePos + _counts._bitLength); + seg._numDocs = numDocs; + seg._lastDoc = _docIds.back().first; + _counts._segments.push_back(seg); + _counts._bitLength += seg._bitLength; + } + // reset tables in preparation for next word or next chunk + _zcDocIds.clear(); + _l1Skip.clear(); + _l2Skip.clear(); + _l3Skip.clear(); + _l4Skip.clear(); + resetWord(); +} + + +void +Zc4PostingSeqWrite::flushWordNoSkip(void) +{ + // Too few document ids for skip info. + assert(_docIds.size() < _minSkipDocs && _counts._segments.empty()); + + _encodeFeatures->flush(); + EncodeContext &e = _encodeContext; + uint32_t numDocs = _docIds.size(); + + e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + + uint32_t baseDocId = 1; + const uint64_t *features = + static_cast(_featureWriteContext._comprBuf); + uint64_t featureOffset = 0; + + std::vector::const_iterator dit = _docIds.begin(); + std::vector::const_iterator dite = _docIds.end(); + + for (; dit != dite; ++dit) { + uint32_t docId = dit->first; + uint32_t featureSize = dit->second; + e.encodeExpGolomb(docId - baseDocId, K_VALUE_ZCPOSTING_DELTA_DOCID); + baseDocId = docId + 1; + e.writeBits(features + (featureOffset >> 6), + featureOffset & 63, + featureSize); + featureOffset += featureSize; + } + _counts._numDocs += numDocs; + resetWord(); +} + + +void +Zc4PostingSeqWrite::resetWord(void) +{ + _docIds.clear(); + _encodeFeatures->setupWrite(_featureWriteContext); + _featureOffset = 0; +} + + +ZcPostingSeqRead::ZcPostingSeqRead(PostingListCountFileSeqRead *countFile) + : Zc4PostingSeqRead(countFile) +{ + _dynamicK = true; +} + + +void +ZcPostingSeqRead:: +readDocIdAndFeatures(DocIdAndFeatures &features) +{ + if (_residue == 0 && !_hasMore) { + if (_rangeEndOffset != 0) { + DecodeContext &d = *_decodeContext; + uint64_t curOffset = d.getReadOffset(); + assert(curOffset <= _rangeEndOffset); + if (curOffset < _rangeEndOffset) + readWordStart(); + } + if (_residue == 0) { + // Don't read past end of posting list. + features.clear(static_cast(-1)); + return; + } + } + if (_lastDocId > 0) { + readCommonWordDocIdAndFeatures(features); + return; + } + // Interleaves docid & features + typedef FeatureEncodeContextBE EC; + DecodeContext &d = *_decodeContext; + uint32_t length; + uint64_t val64; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + + UC64BE_DECODEEXPGOLOMB_SMALL_NS(o, + _docIdK, + EC); + uint32_t docId = _prevDocId + 1 + val64; + features._docId = docId; + _prevDocId = docId; + UC64_DECODECONTEXT_STORE(o, d._); + if (__builtin_expect(oCompr >= d._valE, false)) { + _readContext.readComprBuffer(); + } + _decodeContext->readFeatures(features); + --_residue; +} + + +const vespalib::string & +ZcPostingSeqRead::getIdentifier(void) +{ + return myId5; +} + + +ZcPostingSeqWrite::ZcPostingSeqWrite(PostingListCountFileSeqWrite *countFile) + : Zc4PostingSeqWrite(countFile) +{ + _dynamicK = true; +} + + +void +ZcPostingSeqWrite::flushWordNoSkip(void) +{ + // Too few document ids for skip info. + assert(_docIds.size() < _minSkipDocs && _counts._segments.empty()); + + _encodeFeatures->flush(); + EncodeContext &e = _encodeContext; + uint32_t numDocs = _docIds.size(); + + e.encodeExpGolomb(numDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + + uint32_t docIdK = e.calcDocIdK(numDocs, _docIdLimit); + + uint32_t baseDocId = 1; + const uint64_t *features = + static_cast(_featureWriteContext._comprBuf); + uint64_t featureOffset = 0; + + std::vector::const_iterator dit = _docIds.begin(); + std::vector::const_iterator dite = _docIds.end(); + + for (; dit != dite; ++dit) { + uint32_t docId = dit->first; + uint32_t featureSize = dit->second; + e.encodeExpGolomb(docId - baseDocId, docIdK); + baseDocId = docId + 1; + e.writeBits(features + (featureOffset >> 6), + featureOffset & 63, + featureSize); + featureOffset += featureSize; + } + _counts._numDocs += numDocs; + resetWord(); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.h b/searchlib/src/vespa/searchlib/diskindex/zcposting.h new file mode 100644 index 00000000000..447216d84cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.h @@ -0,0 +1,495 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search +{ + +namespace index +{ + +class PostingListCountFileSeqRead; + +class PostingListCountFileSeqWrite; + +} + +namespace diskindex +{ + +class ZcBuf +{ +public: + uint8_t *_valI; + uint8_t *_valE; + uint8_t *_mallocStart; + size_t _mallocSize; + + ZcBuf(void) + : _valI(NULL), + _valE(NULL), + _mallocStart(NULL), + _mallocSize(0) + { + } + + ~ZcBuf(void) + { + free(_mallocStart); + } + + + static size_t + zcSlack(void) + { + return 4; + } + + void + clearReserve(size_t reserveSize); + + void + clear(void) + { + _valI = _mallocStart; + } + + size_t + capacity(void) const + { + return _valE - _mallocStart; + } + + size_t + size(void) const + { + return _valI - _mallocStart; + } + + size_t + pos(void) const + { + return _valI - _mallocStart; + } + + void + expand(void); + + void + maybeExpand(void) + { + if (__builtin_expect(_valI >= _valE, false)) + expand(); + } + + void + encode(uint32_t num) + { + for (;;) { + if (num < (1 << 7)) { + *_valI++ = num; + break; + } + *_valI++ = (num & ((1 << 7) - 1)) | (1 << 7); + num >>= 7; + } + maybeExpand(); + } + + uint32_t + decode(void) + { + uint32_t res; + uint8_t *valI = _valI; + if (__builtin_expect(valI[0] < (1 << 7), true)) { + res = valI[0]; + valI += 1; + } else if (__builtin_expect(valI[1] < (1 << 7), true)) { + res = (valI[0] & ((1 << 7) - 1)) + + (valI[1] << 7); + valI += 2; + } else if (__builtin_expect(valI[2] < (1 << 7), true)) { + res = (valI[0] & ((1 << 7) - 1)) + + ((valI[1] & ((1 << 7) - 1)) << 7) + + (valI[2] << 14); + valI += 3; + } else if (__builtin_expect(valI[3] < (1 << 7), true)) { + res = (valI[0] & ((1 << 7) - 1)) + + ((valI[1] & ((1 << 7) - 1)) << 7) + + ((valI[2] & ((1 << 7) - 1)) << 14) + + (valI[3] << 21); + valI += 4; + } else { + res = (valI[0] & ((1 << 7) - 1)) + + ((valI[1] & ((1 << 7) - 1)) << 7) + + ((valI[2] & ((1 << 7) - 1)) << 14) + + ((valI[3] & ((1 << 7) - 1)) << 21) + + (valI[4] << 28); + valI += 5; + } + _valI = valI; + return res; + } +}; + +class Zc4PostingSeqRead : public index::PostingListFileSeqRead +{ + Zc4PostingSeqRead(const Zc4PostingSeqRead &); + + Zc4PostingSeqRead & + operator=(const Zc4PostingSeqRead &); + +protected: + typedef bitcompression::FeatureDecodeContextBE DecodeContext; + typedef bitcompression::FeatureEncodeContextBE EncodeContext; + + DecodeContext *_decodeContext; + uint32_t _docIdK; + uint32_t _prevDocId; // Previous document id + uint32_t _numDocs; // Documents in chunk or word + search::ComprFileReadContext _readContext; + FastOS_File _file; + bool _hasMore; + bool _dynamicK; // Caclulate EG compression parameters ? + uint32_t _lastDocId; // last document in chunk or word + uint32_t _minChunkDocs; // # of documents needed for chunking + uint32_t _minSkipDocs; // # of documents needed for skipping + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + + ZcBuf _zcDocIds; // Document id deltas + ZcBuf _l1Skip; // L1 skip info + ZcBuf _l2Skip; // L2 skip info + ZcBuf _l3Skip; // L3 skip info + ZcBuf _l4Skip; // L4 skip info + + uint64_t _numWords; // Number of words in file + uint64_t _fileBitSize; + uint32_t _chunkNo; // Chunk number + + // Variables for validating skip information while reading + uint32_t _l1SkipDocId; + uint32_t _l1SkipDocIdPos; + uint64_t _l1SkipFeaturesPos; + uint32_t _l2SkipDocId; + uint32_t _l2SkipDocIdPos; + uint32_t _l2SkipL1SkipPos; + uint64_t _l2SkipFeaturesPos; + uint32_t _l3SkipDocId; + uint32_t _l3SkipDocIdPos; + uint32_t _l3SkipL1SkipPos; + uint32_t _l3SkipL2SkipPos; + uint64_t _l3SkipFeaturesPos; + uint32_t _l4SkipDocId; + uint32_t _l4SkipDocIdPos; + uint32_t _l4SkipL1SkipPos; + uint32_t _l4SkipL2SkipPos; + uint32_t _l4SkipL3SkipPos; + uint64_t _l4SkipFeaturesPos; + + // Variable for validating chunk information while reading + uint64_t _featuresSize; + index::PostingListCountFileSeqRead *const _countFile; + + uint64_t _headerBitLen; // Size of file header in bits + uint64_t _rangeEndOffset; // End offset for word pair + uint64_t _readAheadEndOffset;// Readahead end offset for word pair + uint64_t _wordStart; // last word header position + uint64_t _checkPointPos; // file position when checkpointing + uint32_t _residue; // Number of unread documents after word header + uint32_t _checkPointChunkNo; // _chunkNo when checkpointing + uint32_t _checkPointResidue;// _residue when checkpointing + bool _checkPointHasMore; // _hasMore when checkpointing +public: + Zc4PostingSeqRead(index::PostingListCountFileSeqRead *countFile); + + virtual + ~Zc4PostingSeqRead(void); + + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListParams PostingListParams; + + /** + * Read document id and features for common word. + */ + virtual void + readCommonWordDocIdAndFeatures(DocIdAndFeatures &features); + + /** + * Read document id and features. + */ + virtual void + readDocIdAndFeatures(DocIdAndFeatures &features); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in); + + /** + * Read counts for a word. + */ + virtual void + readCounts(const PostingListCounts &counts); // Fill in for next word + + /** + * Open posting list file for sequential read. + */ + virtual bool + open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead); + + /** + * Close posting list file. + */ + virtual bool + close(void); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); + + /* + * Get current feature parameters. + */ + virtual void + getFeatureParams(PostingListParams ¶ms); + + void + readWordStartWithSkip(void); + + void + readWordStart(void); + + void + readHeader(void); + + static const vespalib::string & + getIdentifier(void); + + // Methods used when generating posting list for common word pairs. + + /* + * Get current posting offset, measured in bits. First posting list + * starts at 0, i.e. file header is not accounted for here. + * + * @return current posting offset, measured in bits. + */ + virtual uint64_t + getCurrentPostingOffset(void) const; + + /** + * Set current posting offset, measured in bits. First posting + * list starts at 0, i.e. file header is not accounted for here. + * + * @param Offset start of posting lists for word pair. + * @param endOffset end of posting lists for word pair. + * @param readAheadOffset end of posting list for either this or a + * later word pair, depending on disk seek cost. + */ + virtual void + setPostingOffset(uint64_t offset, + uint64_t endOffset, + uint64_t readAheadOffset); +}; + + +class Zc4PostingSeqWrite : public index::PostingListFileSeqWrite +{ + Zc4PostingSeqWrite(const Zc4PostingSeqWrite &); + + Zc4PostingSeqWrite & + operator=(const Zc4PostingSeqWrite &); + +protected: + typedef bitcompression::FeatureEncodeContextBE EncodeContext; + + EncodeContext _encodeContext; + search::ComprFileWriteContext _writeContext; + FastOS_File _file; + uint32_t _minChunkDocs; // # of documents needed for chunking + uint32_t _minSkipDocs; // # of documents needed for skipping + uint32_t _docIdLimit; // Limit for document ids (docId < docIdLimit) + // Unpacked document ids for word and feature sizes + typedef std::pair DocIdAndFeatureSize; + std::vector _docIds; + + // Buffer up features in memory + EncodeContext *_encodeFeatures; + uint64_t _featureOffset; // Bit offset of next feature + search::ComprFileWriteContext _featureWriteContext; + uint64_t _writePos; // Bit position for start of current word + bool _dynamicK; // Caclulate EG compression parameters ? + ZcBuf _zcDocIds; // Document id deltas + ZcBuf _l1Skip; // L1 skip info + ZcBuf _l2Skip; // L2 skip info + ZcBuf _l3Skip; // L3 skip info + ZcBuf _l4Skip; // L4 skip info + + uint64_t _numWords; // Number of words in file + uint64_t _fileBitSize; + index::PostingListCountFileSeqWrite *const _countFile; +public: + Zc4PostingSeqWrite(index::PostingListCountFileSeqWrite *countFile); + + virtual + ~Zc4PostingSeqWrite(void); + + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListCounts PostingListCounts; + typedef index::PostingListParams PostingListParams; + + /** + * Write document id and features. + */ + virtual void + writeDocIdAndFeatures(const DocIdAndFeatures &features); + + /** + * Flush word (during write) after it is complete to buffers, i.e. + * prepare for next word, but not for application crash. + */ + virtual void + flushWord(void); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in); + + /** + * Open posting list file for sequential write. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const search::common::FileHeaderContext &fileHeaderContext); + + /** + * Close posting list file. + */ + virtual bool + close(void); + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); + + /* + * Set feature parameters. + */ + virtual void + setFeatureParams(const PostingListParams ¶ms); + + /* + * Get current feature parameters. + */ + virtual void + getFeatureParams(PostingListParams ¶ms); + + /** + * Flush chunk to file. + */ + void + flushChunk(void); + + /** + * + */ + void + calcSkipInfo(void); + + /** + * Flush word with skip info to disk + */ + void + flushWordWithSkip(bool hasMore); + + + /** + * Flush word without skip info to disk. + */ + virtual void + flushWordNoSkip(void); + + /** + * Prepare for next word or next chunk. + */ + void + resetWord(void); + + /** + * Make header using feature encode write context. + */ + void + makeHeader(const search::common::FileHeaderContext &fileHeaderContext); + + void + updateHeader(void); + + /** + * Read header, using temporary feature decode context. + */ + uint32_t + readHeader(const vespalib::string &name); +}; + + +class ZcPostingSeqRead : public Zc4PostingSeqRead +{ +public: + ZcPostingSeqRead(index::PostingListCountFileSeqRead *countFile); + + virtual void + readDocIdAndFeatures(DocIdAndFeatures &features); + + static const vespalib::string & + getIdentifier(void); +}; + +class ZcPostingSeqWrite : public Zc4PostingSeqWrite +{ +public: + ZcPostingSeqWrite(index::PostingListCountFileSeqWrite *countFile); + + virtual void + flushWordNoSkip(void); + +}; + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp new file mode 100644 index 00000000000..884bdfa6415 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp @@ -0,0 +1,700 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".diskindex.zcpostingiterators"); +#include "zcpostingiterators.h" +#include +#include + +namespace search +{ + +namespace diskindex +{ + +using search::fef::TermFieldMatchDataArray; +using search::bitcompression::FeatureDecodeContext; +using search::bitcompression::FeatureEncodeContext; +using queryeval::RankedSearchIteratorBase; + +#define DEBUG_ZCPOSTING_PRINTF 0 +#define DEBUG_ZCPOSTING_ASSERT 0 + +ZcIteratorBase::ZcIteratorBase(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit) : + RankedSearchIteratorBase(matchData), + _docIdLimit(docIdLimit), + _start(start) +{ } + +void +ZcIteratorBase::initRange(uint32_t beginid, uint32_t endid) +{ + uint32_t prev = getDocId(); + RankedSearchIteratorBase::initRange(beginid, endid); + if ((beginid <= prev) || (prev == 0)) { + rewind(_start); + readWordStart(getDocIdLimit()); + } + seek(beginid); +} + + +template +Zc4RareWordPostingIterator:: +Zc4RareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit) + : ZcIteratorBase(matchData, start, docIdLimit), + _decodeContext(NULL), + _residue(0), + _prevDocId(0), + _numDocs(0) +{ } + + +template +void +Zc4RareWordPostingIterator::doSeek(uint32_t docId) +{ + typedef FeatureEncodeContext EC; + uint32_t length; + uint64_t val64; + + uint32_t oDocId = getDocId(); + + UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); + if (getUnpacked()) { + clearUnpacked(); + if (__builtin_expect(--_residue == 0, false)) + goto atbreak; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); + oDocId += 1 + static_cast(val64); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + while (__builtin_expect(oDocId < docId, true)) { + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + _decodeContext->skipFeatures(1); + UC64_DECODECONTEXT_LOAD(o, _decodeContext->_); + if (__builtin_expect(--_residue == 0, false)) + goto atbreak; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); + oDocId += 1 + static_cast(val64); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + setDocId(oDocId); + return; + atbreak: + setAtEnd(); // Mark end of data + return; +} + + +template +void +Zc4RareWordPostingIterator::doUnpack(uint32_t docId) +{ + if (!_matchData.valid() || getUnpacked()) + return; + assert(docId == getDocId()); + _decodeContext->unpackFeatures(_matchData, docId); + setUnpacked(); +} + +template +void Zc4RareWordPostingIterator::rewind(Position start) +{ + _decodeContext->setPosition(start); +} + +template +void +Zc4RareWordPostingIterator::readWordStart(uint32_t docIdLimit) +{ + (void) docIdLimit; + typedef FeatureEncodeContext EC; + UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); + uint32_t length; + uint64_t val64; + + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + + _numDocs = static_cast(val64) + 1; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); + uint32_t docId = static_cast(val64) + 1; + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + + setDocId(docId); + _residue = _numDocs; + clearUnpacked(); +} + + +template +ZcRareWordPostingIterator:: +ZcRareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit) + : Zc4RareWordPostingIterator(matchData, start, docIdLimit), + _docIdK(0) +{ +} + + +template +void +ZcRareWordPostingIterator::doSeek(uint32_t docId) +{ + typedef FeatureEncodeContext EC; + uint32_t length; + uint64_t val64; + + uint32_t oDocId = getDocId(); + + UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); + if (getUnpacked()) { + clearUnpacked(); + if (__builtin_expect(--_residue == 0, false)) + goto atbreak; + UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + oDocId += 1 + static_cast(val64); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + while (__builtin_expect(oDocId < docId, true)) { + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + _decodeContext->skipFeatures(1); + UC64_DECODECONTEXT_LOAD(o, _decodeContext->_); + if (__builtin_expect(--_residue == 0, false)) + goto atbreak; + UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + oDocId += 1 + static_cast(val64); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + setDocId(oDocId); + return; + atbreak: + setAtEnd(); // Mark end of data + return; +} + + +template +void +ZcRareWordPostingIterator::readWordStart(uint32_t docIdLimit) +{ + typedef FeatureEncodeContext EC; + UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); + uint32_t length; + uint64_t val64; + + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + _numDocs = static_cast(val64) + 1; + _docIdK = EC::calcDocIdK(_numDocs, docIdLimit); + UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + uint32_t docId = static_cast(val64) + 1; + UC64_DECODECONTEXT_STORE(o, _decodeContext->_); + + setDocId(docId); + _residue = _numDocs; + clearUnpacked(); +} + + +template +ZcPostingIterator:: +ZcPostingIterator(uint32_t minChunkDocs, + bool dynamicK, + const PostingListCounts &counts, + const search::fef::TermFieldMatchDataArray &matchData, + Position start, uint32_t docIdLimit) + : ZcIteratorBase(matchData, start, docIdLimit), + _valI(NULL), + _lastDocId(0), + _l1SkipDocId(0), + _l2SkipDocId(0), + _l3SkipDocId(0), + _l4SkipDocId(0), + _l1SkipDocIdPos(NULL), + _l1SkipValI(NULL), + _l1SkipFeaturePos(0), + _valIBase(NULL), + _l1SkipValIBase(NULL), + _l2SkipDocIdPos(NULL), + _l2SkipValI(NULL), + _l2SkipFeaturePos(0), + _l2SkipL1SkipPos(NULL), + _l2SkipValIBase(NULL), + _l3SkipDocIdPos(NULL), + _l3SkipValI(NULL), + _l3SkipFeaturePos(0), + _l3SkipL1SkipPos(NULL), + _l3SkipL2SkipPos(NULL), + _l3SkipValIBase(NULL), + _l4SkipDocIdPos(NULL), + _l4SkipValI(NULL), + _l4SkipFeaturePos(0), + _l4SkipL1SkipPos(NULL), + _l4SkipL2SkipPos(NULL), + _l4SkipL3SkipPos(NULL), + _decodeContext(NULL), + _minChunkDocs(minChunkDocs), + _docIdK(0), + _hasMore(false), + _dynamicK(dynamicK), + _chunkNo(0), + _numDocs(0), + _featuresSize(0), + _featureSeekPos(0), + _featuresValI(NULL), + _featuresBitOffset(0), + _counts(counts) +{ } + + +template +void +ZcPostingIterator::readWordStart(uint32_t docIdLimit) +{ + typedef FeatureEncodeContext EC; + DecodeContextBase &d = *_decodeContext; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + + uint32_t prevDocId = _hasMore ? _lastDocId : 0u; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + + _numDocs = static_cast(val64) + 1; + bool hasMore = false; + if (__builtin_expect(_numDocs >= _minChunkDocs, false)) { + if (bigEndian) { + hasMore = static_cast(oVal) < 0; + oVal <<= 1; + length = 1; + } else { + hasMore = (oVal & 1) != 0; + oVal >>= 1; + length = 1; + } + UC64_READBITS_NS(o, EC); + } + if (_dynamicK) + _docIdK = EC::calcDocIdK((_hasMore || hasMore) ? 1 : _numDocs, docIdLimit); + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); + uint32_t docIdsSize = val64 + 1; + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC); + uint32_t l1SkipSize = val64; + uint32_t l2SkipSize = 0; + if (l1SkipSize != 0) { + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); + l2SkipSize = val64; + } + uint32_t l3SkipSize = 0; + if (l2SkipSize != 0) { + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); + l3SkipSize = val64; + } + uint32_t l4SkipSize = 0; + if (l3SkipSize != 0) { + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); + l4SkipSize = val64; + } + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC); + _featuresSize = val64; + if (_dynamicK) { + UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + } else { + UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_LASTDOCID, EC); + } + _lastDocId = docIdLimit - 1 - val64; + if (_hasMore || hasMore) { + if (!_counts._segments.empty()) { + assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc); + } + } + + uint64_t bytePad = oPreRead & 7; + if (bytePad > 0) { + length = bytePad; + UC64_READBITS_NS(o, EC); + } + + UC64_DECODECONTEXT_STORE(o, d._); + assert((d.getBitOffset() & 7) == 0); + const uint8_t *bcompr = d.getByteCompr(); + _valIBase = _valI = bcompr; + _l1SkipDocIdPos = _l2SkipDocIdPos = bcompr; + _l3SkipDocIdPos = _l4SkipDocIdPos = bcompr; + bcompr += docIdsSize; + if (l1SkipSize != 0) { + _l1SkipValIBase = _l1SkipValI = bcompr; + _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr; + bcompr += l1SkipSize; + } else { + _l1SkipValIBase = _l1SkipValI = NULL; + _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL; + } + if (l2SkipSize != 0) { + _l2SkipValIBase = _l2SkipValI = bcompr; + _l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr; + bcompr += l2SkipSize; + } else { + _l2SkipValIBase = _l2SkipValI = NULL; + _l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL; + } + if (l3SkipSize != 0) { + _l3SkipValIBase = _l3SkipValI = bcompr; + _l4SkipL3SkipPos = bcompr; + bcompr += l3SkipSize; + } else { + _l3SkipValIBase = _l3SkipValI = NULL; + _l4SkipL3SkipPos = NULL; + } + if (l4SkipSize != 0) { + _l4SkipValI = bcompr; + bcompr += l4SkipSize; + } else { + _l4SkipValI = NULL; + } + d.setByteCompr(bcompr); + _hasMore = hasMore; + // Save information about start of next chunk + _featuresValI = d.getCompr(); + _featuresBitOffset = d.getBitOffset(); + _l1SkipFeaturePos = _l2SkipFeaturePos = 0; + _l3SkipFeaturePos = _l4SkipFeaturePos = 0; + _featureSeekPos = 0; + clearUnpacked(); + // Unpack first docid delta in chunk + uint32_t oDocId = prevDocId; + ZCDECODE(_valI, oDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + setDocId(oDocId); + // Unpack first L1 Skip info docid delta + if (_l1SkipValI != NULL) { + _l1SkipDocId = prevDocId; + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + } else + _l1SkipDocId = _lastDocId; + // Unpack first L2 skip info docid delta + if (_l2SkipValI != NULL) { + _l2SkipDocId = prevDocId; + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); + } else + _l2SkipDocId = _lastDocId; + // Unpack first L3 skip info docid delta + if (_l3SkipValI != NULL) { + _l3SkipDocId = prevDocId; + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +); + } else + _l3SkipDocId = _lastDocId; + // Unpack first L4 skip info docid delta + if (_l4SkipValI != NULL) { + _l4SkipDocId = prevDocId; + ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 +); + } else + _l4SkipDocId = _lastDocId; +} + + +template +void +ZcPostingIterator::doChunkSkipSeek(uint32_t docId) +{ + while (docId > _lastDocId && _hasMore) { + // Skip to start of next chunk + _featureSeekPos = 0; + featureSeek(_featuresSize); + _chunkNo++; + readWordStart(getDocIdLimit()); // Read word start for next chunk + } + if (docId > _lastDocId) { + _l4SkipDocId = _l3SkipDocId = _l2SkipDocId = _l1SkipDocId = search::endDocId; + setAtEnd(); + } +} + + +template +void +ZcPostingIterator::doL4SkipSeek(uint32_t docId) +{ + uint32_t lastL4SkipDocId; + + if (__builtin_expect(docId > _lastDocId, false)) { + doChunkSkipSeek(docId); + if (docId <= _l4SkipDocId) + return; + } + do { + lastL4SkipDocId = _l4SkipDocId; + ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipFeaturePos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 + ); +#if DEBUG_ZCPOSTING_PRINTF + printf("L4Decode docId %d, docIdPos %d," + "l1SkipPos %d, l2SkipPos %d, l3SkipPos %d, nextDocId %d\n", + lastL4SkipDocId, + (int) (_l4SkipDocIdPos - _valIBase), + (int) (_l4SkipL1SkipPos - _l1SkipValIBase), + (int) (_l4SkipL2SkipPos - _l2SkipValIBase), + (int) (_l4SkipL3SkipPos - _l3SkipValIBase), + _l4SkipDocId); +#endif + } while (docId > _l4SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos = + _l4SkipDocIdPos; + _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos = + _l4SkipFeaturePos; + _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos; + _l2SkipValI = _l3SkipL2SkipPos = _l4SkipL2SkipPos; + _l3SkipValI = _l4SkipL3SkipPos; + ZCDECODE(_valI, lastL4SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("L4Seek, docId %d docIdPos %d" + " L1SkipPos %d L2SkipPos %d L3SkipPos %d, nextDocId %d\n", + lastL4SkipDocId, + (int) (_l4SkipDocIdPos - _valIBase), + (int) (_l4SkipL1SkipPos - _l1SkipValIBase), + (int) (_l4SkipL2SkipPos - _l2SkipValIBase), + (int) (_l4SkipL3SkipPos - _l3SkipValIBase), + _l4SkipDocId); +#endif + setDocId(lastL4SkipDocId); + _featureSeekPos = _l4SkipFeaturePos; + clearUnpacked(); +} + + +template +void +ZcPostingIterator::doL3SkipSeek(uint32_t docId) +{ + uint32_t lastL3SkipDocId; + + if (__builtin_expect(docId > _l4SkipDocId, false)) { + doL4SkipSeek(docId); + if (docId <= _l3SkipDocId) + return; + } + do { + lastL3SkipDocId = _l3SkipDocId; + ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipFeaturePos += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 + ); + ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 + ); + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 + ); +#if DEBUG_ZCPOSTING_PRINTF + printf("L3Decode docId %d, docIdPos %d," + "l1SkipPos %d, l2SkipPos %d, nextDocId %d\n", + lastL3SkipDocId, + (int) (_l3SkipDocIdPos - _valIBase), + (int) (_l3SkipL1SkipPos - _l1SkipValIBase), + (int) (_l3SkipL2SkipPos - _l2SkipValIBase), + _l3SkipDocId); +#endif + } while (docId > _l3SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos; + _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos; + _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos; + _l2SkipValI = _l3SkipL2SkipPos; + ZCDECODE(_valI, lastL3SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("L3Seek, docId %d docIdPos %d" + " L1SkipPos %d L2SkipPos %d, nextDocId %d\n", + lastL3SkipDocId, + (int) (_l3SkipDocIdPos - _valIBase), + (int) (_l3SkipL1SkipPos - _l1SkipValIBase), + (int) (_l3SkipL2SkipPos - _l2SkipValIBase), + _l3SkipDocId); +#endif + setDocId(lastL3SkipDocId); + _featureSeekPos = _l3SkipFeaturePos; + clearUnpacked(); +} + + +template +void +ZcPostingIterator::doL2SkipSeek(uint32_t docId) +{ + uint32_t lastL2SkipDocId; + + if (__builtin_expect(docId > _l3SkipDocId, false)) { + doL3SkipSeek(docId); + if (docId <= _l2SkipDocId) + return; + } + do { + lastL2SkipDocId = _l2SkipDocId; + ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipFeaturePos += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 + ); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 + ); +#if DEBUG_ZCPOSTING_PRINTF + printf("L2Decode docId %d, docIdPos %d, l1SkipPos %d, nextDocId %d\n", + lastL2SkipDocId, + (int) (_l2SkipDocIdPos - _valIBase), + (int) (_l2SkipL1SkipPos - _l1SkipValIBase), + _l2SkipDocId); +#endif + } while (docId > _l2SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos; + _l1SkipFeaturePos = _l2SkipFeaturePos; + _l1SkipDocId = lastL2SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos; + ZCDECODE(_valI, lastL2SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n", + lastL2SkipDocId, + (int) (_l2SkipDocIdPos - _valIBase), + (int) (_l2SkipL1SkipPos - _l1SkipValIBase), + _l2SkipDocId); +#endif + setDocId(lastL2SkipDocId); + _featureSeekPos = _l2SkipFeaturePos; + clearUnpacked(); +} + + +template +void +ZcPostingIterator::doL1SkipSeek(uint32_t docId) +{ + uint32_t lastL1SkipDocId; + if (__builtin_expect(docId > _l2SkipDocId, false)) { + doL2SkipSeek(docId); + if (docId <= _l1SkipDocId) + return; + } + do { + lastL1SkipDocId = _l1SkipDocId; + ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipFeaturePos += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("L1Decode docId %d, docIdPos %d, L1SkipPos %d, nextDocId %d\n", + lastL1SkipDocId, + (int) (_l1SkipDocIdPos - _valIBase), + (int) (_l1SkipValI - _l1SkipValIBase), + _l1SkipDocId); +#endif + } while (docId > _l1SkipDocId); + _valI = _l1SkipDocIdPos; + ZCDECODE(_valI, lastL1SkipDocId += 1 +); + setDocId(lastL1SkipDocId); +#if DEBUG_ZCPOSTING_PRINTF + printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n", + lastL1SkipDocId, + (int) (_l1SkipDocIdPos - _valIBase), + _l1SkipDocId); +#endif + _featureSeekPos = _l1SkipFeaturePos; + clearUnpacked(); +} + + +template +void +ZcPostingIterator::doSeek(uint32_t docId) +{ + if (docId > _l1SkipDocId) { + doL1SkipSeek(docId); + } + uint32_t oDocId = getDocId(); +#if DEBUG_ZCPOSTING_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(docId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(docId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(docId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); + assert(docId <= _l4SkipDocId); +#endif + const uint8_t *oCompr = _valI; + while (__builtin_expect(oDocId < docId, true)) { +#if DEBUG_ZCPOSTING_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); +#endif + ZCDECODE(oCompr, oDocId += 1 +); +#if DEBUG_ZCPOSTING_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + incNeedUnpack(); + } + _valI = oCompr; + setDocId(oDocId); + return; +} + + +template +void +ZcPostingIterator::doUnpack(uint32_t docId) +{ + if (!_matchData.valid() || getUnpacked()) + return; + if (_featureSeekPos != 0) { + // Handle deferred feature position seek now. + featureSeek(_featureSeekPos); + _featureSeekPos = 0; + } + assert(docId == getDocId()); + uint32_t needUnpack = getNeedUnpack(); + if (needUnpack > 1) + _decodeContext->skipFeatures(needUnpack - 1); + _decodeContext->unpackFeatures(_matchData, docId); + setUnpacked(); +} + +template +void ZcPostingIterator::rewind(Position start) +{ + _decodeContext->setPosition(start); + _hasMore = false; + _lastDocId = 0; + _chunkNo = 0; +} + + +template class Zc4RareWordPostingIterator; +template class Zc4RareWordPostingIterator; + +template class ZcPostingIterator; +template class ZcPostingIterator; + +template class ZcRareWordPostingIterator; +template class ZcRareWordPostingIterator; + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h new file mode 100644 index 00000000000..f0bf5b99a30 --- /dev/null +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h @@ -0,0 +1,200 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search +{ + +namespace diskindex +{ + +using bitcompression::Position; + +#define ZCDECODE(valI, resop) \ +do { \ + if (__builtin_expect(valI[0] < (1 << 7), true)) { \ + resop valI[0]; \ + valI += 1; \ + } else if (__builtin_expect(valI[1] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + (valI[1] << 7); \ + valI += 2; \ + } else if (__builtin_expect(valI[2] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + (valI[2] << 14); \ + valI += 3; \ + } else if (__builtin_expect(valI[3] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + ((valI[2] & ((1 << 7) - 1)) << 14) + \ + (valI[3] << 21); \ + valI += 4; \ + } else { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + ((valI[2] & ((1 << 7) - 1)) << 14) + \ + ((valI[3] & ((1 << 7) - 1)) << 21) + \ + (valI[4] << 28); \ + valI += 5; \ + } \ +} while (0) + +class ZcIteratorBase : public queryeval::RankedSearchIteratorBase +{ +protected: + ZcIteratorBase(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit); + virtual void readWordStart(uint32_t docIdLimit) = 0; + virtual void rewind(Position start) = 0; + void initRange(uint32_t beginid, uint32_t endid) override; + uint32_t getDocIdLimit() const { return _docIdLimit; } + Trinary is_strict() const override { return Trinary::True; } +private: + uint32_t _docIdLimit; + Position _start; +}; + +template +class Zc4RareWordPostingIterator : public ZcIteratorBase +{ +private: + typedef ZcIteratorBase ParentClass; + +public: + typedef bitcompression::FeatureDecodeContext DecodeContextBase; + typedef index::DocIdAndFeatures DocIdAndFeatures; + DecodeContextBase *_decodeContext; + unsigned int _residue; + uint32_t _prevDocId; // Previous document id + uint32_t _numDocs; // Documents in chunk or word + + Zc4RareWordPostingIterator(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void readWordStart(uint32_t docIdLimit) override; + void rewind(Position start) override; +}; + +template +class ZcRareWordPostingIterator : public Zc4RareWordPostingIterator +{ +private: + typedef Zc4RareWordPostingIterator ParentClass; + using ParentClass::getDocId; + using ParentClass::getUnpacked; + using ParentClass::clearUnpacked; + using ParentClass::_residue; + using ParentClass::setDocId; + using ParentClass::setAtEnd; + using ParentClass::_numDocs; + + uint32_t _docIdK; + +public: + using ParentClass::_decodeContext; + ZcRareWordPostingIterator(const search::fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit); + + void doSeek(uint32_t docId) override; + void readWordStart(uint32_t docIdLimit) override; +}; + + +template +class ZcPostingIterator : public ZcIteratorBase +{ +private: + typedef ZcIteratorBase ParentClass; + using ParentClass::getDocId; + +public: + // Pointer to compressed data + const uint8_t *_valI; + uint32_t _lastDocId; + uint32_t _l1SkipDocId; + uint32_t _l2SkipDocId; + uint32_t _l3SkipDocId; + uint32_t _l4SkipDocId; + const uint8_t *_l1SkipDocIdPos; + const uint8_t *_l1SkipValI; + uint64_t _l1SkipFeaturePos; + const uint8_t *_valIBase; + const uint8_t *_l1SkipValIBase; + const uint8_t *_l2SkipDocIdPos; + const uint8_t *_l2SkipValI; + uint64_t _l2SkipFeaturePos; + const uint8_t *_l2SkipL1SkipPos; + const uint8_t *_l2SkipValIBase; + const uint8_t *_l3SkipDocIdPos; + const uint8_t *_l3SkipValI; + uint64_t _l3SkipFeaturePos; + const uint8_t *_l3SkipL1SkipPos; + const uint8_t *_l3SkipL2SkipPos; + const uint8_t *_l3SkipValIBase; + const uint8_t *_l4SkipDocIdPos; + const uint8_t *_l4SkipValI; + uint64_t _l4SkipFeaturePos; + const uint8_t *_l4SkipL1SkipPos; + const uint8_t *_l4SkipL2SkipPos; + const uint8_t *_l4SkipL3SkipPos; + + typedef bitcompression::FeatureDecodeContext DecodeContextBase; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef index::PostingListCounts PostingListCounts; + DecodeContextBase *_decodeContext; + uint32_t _minChunkDocs; + uint32_t _docIdK; + bool _hasMore; + bool _dynamicK; + uint32_t _chunkNo; + uint32_t _numDocs; + uint64_t _featuresSize; + uint64_t _featureSeekPos; + // Start of current features block, needed for seeks + const uint64_t *_featuresValI; + int _featuresBitOffset; + // Counts used for assertions + const PostingListCounts &_counts; + + ZcPostingIterator(uint32_t minChunkDocs, + bool dynamicK, + const PostingListCounts &counts, + const search::fef::TermFieldMatchDataArray &matchData, + Position start, uint32_t docIdLimit); + + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void readWordStart(uint32_t docIdLimit) override; + void rewind(Position start) override; + VESPA_DLL_LOCAL void doChunkSkipSeek(uint32_t docId); + VESPA_DLL_LOCAL void doL4SkipSeek(uint32_t docId); + VESPA_DLL_LOCAL void doL3SkipSeek(uint32_t docId); + VESPA_DLL_LOCAL void doL2SkipSeek(uint32_t docId); + VESPA_DLL_LOCAL void doL1SkipSeek(uint32_t docId); + + void featureSeek(uint64_t offset) { + _decodeContext->_valI = _featuresValI + (_featuresBitOffset + offset) / 64; + _decodeContext->setupBits((_featuresBitOffset + offset) & 63); + } +}; + + +extern template class Zc4RareWordPostingIterator; +extern template class Zc4RareWordPostingIterator; + +extern template class ZcPostingIterator; +extern template class ZcPostingIterator; + +extern template class ZcRareWordPostingIterator; +extern template class ZcRareWordPostingIterator; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/.gitignore b/searchlib/src/vespa/searchlib/docstore/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt new file mode 100644 index 00000000000..347f3562794 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_docstore OBJECT + SOURCES + bytecomplens.cpp + chunk.cpp + chunkformat.cpp + chunkformats.cpp + data_store_file_chunk_id.cpp + documentstore.cpp + document_store_visitor_progress.cpp + filechunk.cpp + idatastore.cpp + idocumentstore.cpp + logdatastore.cpp + logdocumentstore.cpp + writeablefilechunk.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/docstore/OWNERS b/searchlib/src/vespa/searchlib/docstore/OWNERS new file mode 100644 index 00000000000..3e9fc8ab356 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/OWNERS @@ -0,0 +1,2 @@ +balder +tegge diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp new file mode 100644 index 00000000000..1a2b8f090e1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/bytecomplens.cpp @@ -0,0 +1,260 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".search.docstore"); +#include "bytecomplens.h" + +namespace search { + +static inline uint64_t getBCN(const uint8_t *&buffer) __attribute__((__always_inline__)); + +/** + * get "Byte Compressed Number" from buffer, incrementing pointer + **/ +static inline uint64_t getBCN(const uint8_t *&buffer) +{ + uint8_t b = *buffer++; + uint64_t len = (b & 127); + unsigned shiftLen = 0; + while (b & 128) { + shiftLen += 7; + b = *buffer++; + len |= ((b & 127) << shiftLen); + } + return len; +} + +static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) +{ + size_t bytes = 0; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + buf.writeInt8(b); + ++bytes; + } while (len > 0); + return bytes; +} + + +ByteCompressedLengths::ByteCompressedLengths() + : _l0space(), + _l1space(), + _l2space(), + _l3table(), + _entries(0), + _progress(), + _ptrcache(), + _hasInitialOffset(false) +{ + clear(); +} + + +void +ByteCompressedLengths::clear() +{ + _l0space.clear(); + _l1space.clear(); + _l2space.clear(); + _l3table.clear(); + + _entries = 0; + + _progress.lenSum1 = 0; + _progress.lenSum2 = 0; + _progress.l0oSum1 = 0; + _progress.l0oSum2 = 0; + _progress.l1oSum2 = 0; + _progress.last_offset = 0; + + _ptrcache.l0table = NULL; + _ptrcache.l1table = NULL; + _ptrcache.l2table = NULL; + + _hasInitialOffset = false; +} + + +void +ByteCompressedLengths::swap(ByteCompressedLengths& other) +{ + _l0space.swap(other._l0space); + _l1space.swap(other._l1space); + _l2space.swap(other._l2space); + _l3table.swap(other._l3table); + + std::swap(_entries, other._entries); + std::swap(_progress, other._progress); + std::swap(_ptrcache, other._ptrcache); + std::swap(_hasInitialOffset, other._hasInitialOffset); +} + + +// add a new offset to the compressed tables +void +ByteCompressedLengths::addOffset(uint64_t offset) +{ + assert(offset >= _progress.last_offset); + + // delta from last offset: + uint64_t len = offset - _progress.last_offset; + + // which entry is this: + uint64_t idx = _entries++; + + if ((idx & 31) == 0) { + // add entry to some skip-table + _progress.lenSum2 += _progress.lenSum1; // accumulate to Level2 + _progress.l0oSum2 += _progress.l0oSum1; // accumulate to Level2 + + uint64_t t1n = idx >> 5; + if ((t1n & 31) == 0) { + // add Level2 or Level3 table entry: + uint64_t t2n = t1n >> 5; + + if ((t2n & 31) == 0) { + // add new Level3 table entry: + L3Entry e; + e.offset = _progress.last_offset; + e.l0toff = _l0space.getDataLen(); + e.l1toff = _l1space.getDataLen(); + e.l2toff = _l2space.getDataLen(); + + _l3table.push_back(e); + } else { + // write to Level2 table, sums since last reset: + writeLen(_l2space, _progress.lenSum2); // sum of Level0 lengths + writeLen(_l2space, _progress.l0oSum2); // sum size of Level0 entries + writeLen(_l2space, _progress.l1oSum2); // sum size of Level1 entries + } + // reset Level2 sums: + _progress.lenSum2 = 0; + _progress.l0oSum2 = 0; + _progress.l1oSum2 = 0; + } else { + // write to Level1 table, sums since last reset: + _progress.l1oSum2 += writeLen(_l1space, _progress.lenSum1); // sum of Level0 lengths + _progress.l1oSum2 += writeLen(_l1space, _progress.l0oSum1); // sum size of Level0 entries + } + // reset Level1 sums: + _progress.lenSum1 = 0; + _progress.l0oSum1 = 0; + } + // always write length (offset delta) to Level0 table: + _progress.l0oSum1 += writeLen(_l0space, len); // accumulate to Level1 + _progress.lenSum1 += len; // accumulate to Level1 + _progress.last_offset = offset; +} + + +void +ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) +{ + // ignore NOP: + if (entries == 0) return; + + // Do we have some offsets already? + if (_hasInitialOffset) { + // yes, add first offset normally + addOffset(offsets[0]); + } else { + // no, special treatment for very first offset + _progress.last_offset = offsets[0]; + _hasInitialOffset = true; + } + for (uint64_t cnt = 1; cnt < entries; ++cnt) { + addOffset(offsets[cnt]); + } + + // Simplify access to actual data: + _ptrcache.l0table = (uint8_t *)_l0space.getData(); + _ptrcache.l1table = (uint8_t *)_l1space.getData(); + _ptrcache.l2table = (uint8_t *)_l2space.getData(); + + // some statistics available when debug logging: + LOG(debug, "compressed %ld offsets", (_entries+1)); + LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t)); + LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", + _l0space.getDataLen(), + _l1space.getDataLen(), + _l2space.getDataLen(), + _l3table.size()); + LOG(debug, "(%ld bytes)", + (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + + _l3table.size()*sizeof(L3Entry))); +} + + +ByteCompressedLengths::~ByteCompressedLengths() +{ +} + +ByteCompressedLengths::OffLen +ByteCompressedLengths::getOffLen(uint64_t idx) const +{ + assert(idx < _entries); + + unsigned skipL0 = idx & 31; + unsigned skipL1 = (idx >> 5) & 31; + unsigned skipL2 = (idx >> 10) & 31; + uint64_t skipL3 = (idx >> 15); + + uint64_t offset = _l3table[skipL3].offset; + uint64_t l0toff = _l3table[skipL3].l0toff; + uint64_t l1toff = _l3table[skipL3].l1toff; + uint64_t l2toff = _l3table[skipL3].l2toff; + + // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + + const uint8_t *l2pos = _ptrcache.l2table + l2toff; + + while (skipL2 > 0) { + --skipL2; + offset += getBCN(l2pos); + l0toff += getBCN(l2pos); + l1toff += getBCN(l2pos); + } + + const uint8_t *l1pos = _ptrcache.l1table + l1toff; + + while (skipL1 > 0) { + --skipL1; + offset += getBCN(l1pos); + l0toff += getBCN(l1pos); + + } + const uint8_t *l0pos = _ptrcache.l0table + l0toff; + + while (skipL0 > 0) { + --skipL0; + offset += getBCN(l0pos); + } + // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + OffLen retval; + retval.offset = offset; + retval.length = getBCN(l0pos); + return retval; +} + + +size_t +ByteCompressedLengths::memoryUsed() const +{ + size_t mem = sizeof(*this); + mem += _l0space.getBufSize(); + mem += _l1space.getBufSize(); + mem += _l2space.getBufSize(); + mem += _l3table.capacity() * sizeof(L3Entry); + return mem; +} + + + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/bytecomplens.h b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h new file mode 100644 index 00000000000..5e4675ae297 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/bytecomplens.h @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +/** + * Class compressing a table of offsets in memory. + * After adding (n) offsets you can access + * (n-1) pairs of (length, offset). + * All offsets must be increasing, but they + * may be added in several chunks. + **/ +class ByteCompressedLengths +{ +public: + /** + * Construct an empty instance + **/ + ByteCompressedLengths(); + + /** + * add the given offset table. + * @param entries number of offsets to store. + * @param offsets pointer to table that contains (entries) offsets. + **/ + void addOffsetTable(uint64_t entries, uint64_t *offsets); + + /** + * free resources + **/ + ~ByteCompressedLengths(); + + struct OffLen + { + uint64_t offset; + uint64_t length; + }; + + /** + * Fetch an offset and length from compressed data. + * Note restriction: idx must be < size() + * + * @param idx The index into the offset table + * @return offset[id] and the delta (offset[id+1] - offset[id]) + **/ + OffLen getOffLen(uint64_t idx) const; + + /** + * The number of (length, offset) pairs stored + * Note that size() == sum(entries) - 1 + **/ + uint64_t size() const { return _entries; } + + /** + * remove all data from this instance + **/ + void clear(); + + /** + * swap all data with another instance + **/ + void swap(ByteCompressedLengths& other); + + /** + * Calculate memory used by this instance + * @return memory usage (in bytes) + **/ + size_t memoryUsed() const; + +private: + struct L3Entry { + uint64_t offset; + uint64_t l0toff; + uint64_t l1toff; + uint64_t l2toff; + }; + vespalib::DataBuffer _l0space; + vespalib::DataBuffer _l1space; + vespalib::DataBuffer _l2space; + + std::vector _l3table; + + uint64_t _entries; + + struct ProgressPoint { + uint64_t lenSum1; + uint64_t lenSum2; + uint64_t l0oSum1; + uint64_t l0oSum2; + uint64_t l1oSum2; + uint64_t last_offset; + } _progress; + + struct CachedPointers { + const uint8_t *l0table; + const uint8_t *l1table; + const uint8_t *l2table; + } _ptrcache; + + bool _hasInitialOffset; + + void addOffset(uint64_t offset); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/cachestats.h b/searchlib/src/vespa/searchlib/docstore/cachestats.h new file mode 100644 index 00000000000..216b62f199a --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/cachestats.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +struct CacheStats { + size_t hits; + size_t misses; + size_t elements; + size_t memory_used; + + CacheStats(void) + : hits(0), + misses(0), + elements(0), + memory_used(0) + { + } + + CacheStats(size_t hit, size_t miss, size_t elem, size_t mem) + : hits(hit), + misses(miss), + elements(elem), + memory_used(mem) + { + } + + CacheStats & + operator+=(const CacheStats &rhs) + { + hits += rhs.hits; + misses += rhs.misses; + elements += rhs.elements; + memory_used += rhs.memory_used; + return *this; + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.cpp b/searchlib/src/vespa/searchlib/docstore/chunk.cpp new file mode 100644 index 00000000000..931e3a11353 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunk.cpp @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include + +namespace search { + +LidMeta +Chunk::append(uint32_t lid, const void * buffer, size_t len) +{ + vespalib::nbostream & os = getData(); + size_t oldSz(os.size()); + os << lid << static_cast(len); + os.write(buffer, len); + _lids.push_back(Entry(lid, len, oldSz)); + return LidMeta(lid, len); +} + +ssize_t +Chunk::read(uint32_t lid, vespalib::DataBuffer & buffer) const +{ + vespalib::ConstBufferRef buf = getLid(lid); + if (buf.size() != 0) { + buffer.writeBytes(buf.c_str(), buf.size()); + } + return buf.size(); +} + +bool +Chunk::hasRoom(size_t len) const +{ + const size_t HeaderSize(2*sizeof(uint32_t)); + const size_t TrailerSize(sizeof(uint64_t)); + // To avoid read races during compacting These buffers must be preallocated. + // There is always room for at least one element. + // There is also room as long as neither _lids[] nor _dataBuf[] require reallocation. + // Remember to account for Header and Trailer space requirement. + const vespalib::nbostream & os = getData(); + return _lids.empty() + || (((HeaderSize + TrailerSize + os.size() + len) <= os.capacity()) + && ((_lids.size() + 1) <= _lids.capacity())); +} + +void +Chunk::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression) +{ + _lastSerial = lastSerial; + _format->pack(_lastSerial, compressed, compression); +} + +Chunk::Chunk(uint32_t id, const Config & config) : + _id(id), + _nextOffset(0), + _lastSerial(static_cast(-1l)), + _format(new ChunkFormatV2(config.getMaxBytes())) +{ + _lids.reserve(config.getMaxCount()); +} + +Chunk::Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc) : + _id(id), + _nextOffset(0), + _lastSerial(static_cast(-1l)), + _format(ChunkFormat::deserialize(buffer, len, skipcrc)) +{ + vespalib::nbostream &os = getData(); + while (os.size() > sizeof(_lastSerial)) { + uint32_t sz(0); + uint32_t lid(0); + ssize_t oldRp(os.rp()); + os >> lid >> sz; + os.adjustReadPos(sz); + _lids.push_back(Entry(lid, sz, oldRp)); + } + os >> _lastSerial; +} + +vespalib::ConstBufferRef +Chunk::getLid(uint32_t lid) const +{ + vespalib::ConstBufferRef buf; + for (LidList::const_iterator it(_lids.begin()), mt(_lids.end()); it != mt; it++) { + if (it->getLid() == lid) { +#if 1 + uint32_t bLid(0), bLen(0); + vespalib::nbostream is(getData().c_str()+it->getOffset(), it->size()); + is >> bLid >> bLen; + assert(bLid == lid); + assert(bLen == it->netSize()); + assert((bLen + 2*sizeof(uint32_t)) == it->size()); +#endif + buf = vespalib::ConstBufferRef(getData().c_str() + it->getNetOffset(), it->netSize()); + } + } + return buf; +} + +Chunk::LidList +Chunk::getUniqueLids() const +{ + vespalib::hash_map last; + for (const Entry & e : _lids) { + last[e.getLid()] = e; + } + LidList unique; + unique.reserve(last.size()); + for (auto it(last.begin()), mt(last.end()); it != mt; it++) { + unique.push_back(it->second); + } + return unique; +} + +vespalib::nbostream & +ChunkMeta::deserialize(vespalib::nbostream & is) +{ + return is >> _offset >> _size >> _lastSerial >> _numEntries; +} + +vespalib::nbostream & +ChunkMeta::serialize(vespalib::nbostream & os) const +{ + return os << _offset << _size << _lastSerial << _numEntries; +} + +vespalib::nbostream & +LidMeta::deserialize(vespalib::nbostream & is) +{ + return is >> _lid >> _size; +} + +vespalib::nbostream & +LidMeta::serialize(vespalib::nbostream & os) const +{ + return os << _lid << _size; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/chunk.h b/searchlib/src/vespa/searchlib/docstore/chunk.h new file mode 100644 index 00000000000..b4c521aa3da --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunk.h @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +class ChunkMeta { +public: + ChunkMeta() : + _offset(0), + _lastSerial(0), + _size(0), + _numEntries(0) + { } + ChunkMeta(uint64_t offset, uint32_t size, uint64_t lastSerial, uint32_t numEntries) : + _offset(offset), + _lastSerial(lastSerial), + _size(size), + _numEntries(numEntries) + { } + uint32_t getNumEntries() const { return _numEntries; } + uint32_t getSize() const { return _size; } + uint64_t getLastSerial() const { return _lastSerial; } + uint64_t getOffset() const { return _offset; } + vespalib::nbostream & deserialize(vespalib::nbostream & is); + vespalib::nbostream & serialize(vespalib::nbostream & os) const; + bool operator < (const ChunkMeta & b) const { return _lastSerial < b._lastSerial; } +private: + uint64_t _offset; + uint64_t _lastSerial; + uint32_t _size; + uint32_t _numEntries; +}; + +class LidMeta { +public: + LidMeta() : _lid(0), _size(0) { } + LidMeta(uint32_t lid, uint32_t sz) : _lid(lid), _size(sz) { } + uint32_t getLid() const { return _lid; } + uint32_t size() const { return _size; } + vespalib::nbostream & deserialize(vespalib::nbostream & is); + vespalib::nbostream & serialize(vespalib::nbostream & os) const; +private: + uint32_t _lid; + uint32_t _size; +}; + +class Chunk { +public: + typedef std::unique_ptr UP; + class Config { + public: + Config(size_t maxBytes, size_t maxCount) : _maxBytes(maxBytes), _maxCount(maxCount) { } + size_t getMaxBytes() const { return _maxBytes; } + size_t getMaxCount() const { return _maxCount; } + private: + size_t _maxBytes; + size_t _maxCount; + }; + class Entry { + public: + Entry() : _lid(0), _sz(0), _offset(0) { } + Entry(uint32_t lid, uint32_t sz, uint32_t offset) : _lid(lid), _sz(sz), _offset(offset) { } + uint32_t getLid() const { return _lid; } + uint32_t size() const { return _sz + 2*4; } + uint32_t netSize() const { return _sz; } + uint32_t getNetOffset() const { return _offset + 2*4; } + uint32_t getOffset() const { return _offset; } + private: + uint32_t _lid; + uint32_t _sz; + uint32_t _offset; + }; + typedef std::vector LidList; + Chunk(uint32_t id, const Config & config); + Chunk(uint32_t id, const void * buffer, size_t len, bool skipcrc=false); + LidMeta append(uint32_t lid, const void * buffer, size_t len); + ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const; + size_t count() const { return _lids.size(); } + bool empty() const { return count() == 0; } + size_t size() const { return getData().size(); } + const LidList & getLids() const { return _lids; } + LidList getUniqueLids() const; + size_t getMaxPackSize(const document::CompressionConfig & compression) const { return _format->getMaxPackSize(compression); } + void pack(uint64_t lastSerial, vespalib::DataBuffer & buffer, const document::CompressionConfig & compression); + uint64_t getLastSerial() const { return _lastSerial; } + uint32_t getId() const { return _id; } + bool validSerial() const { return getLastSerial() != static_cast(-1l); } + vespalib::ConstBufferRef getLid(uint32_t lid) const; + const vespalib::nbostream & getData() const { return _format->getBuffer(); } + bool hasRoom(size_t len) const; +private: + vespalib::nbostream & getData() { return _format->getBuffer(); } + + uint32_t _id; + uint32_t _nextOffset; + uint64_t _lastSerial; + ChunkFormat::UP _format; + LidList _lids; +}; + +typedef std::vector ChunkMetaV; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp new file mode 100644 index 00000000000..1b243e1546e --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +namespace search { + +using vespalib::make_string; +using vespalib::Exception; + +ChunkException::ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location) : + Exception(make_string("Illegal chunk: %s", msg.c_str()), location) +{ +} + +void +ChunkFormat::pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression) +{ + vespalib::nbostream & os = _dataBuf; + os << lastSerial; + const uint8_t version(getVersion()); + compressed.writeInt8(version); + writeHeader(compressed); + const size_t serializedSizePos(compressed.getDataLen()); + if (includeSerializedSize()) { + compressed.writeInt32(0); + } + const size_t oldPos(compressed.getDataLen()); + compressed.writeInt8(compression.type); + compressed.writeInt32(os.size()); + document::CompressionConfig::Type type(document::compress(compression, vespalib::ConstBufferRef(os.c_str(), os.size()), compressed, false)); + if (compression.type != type) { + compressed.getData()[oldPos] = type; + } + if (includeSerializedSize()) { + const uint32_t serializedSize = compressed.getDataLen()+4; + *reinterpret_cast(compressed.getData() + serializedSizePos) = htonl(serializedSize); + } + uint32_t crc = computeCrc(compressed.getData(), compressed.getDataLen()); + compressed.writeInt32(crc); +} + +size_t +ChunkFormat::getMaxPackSize(const document::CompressionConfig & compression) const +{ + const size_t OVERHEAD(0); + const size_t MINSIZE(1 + 1 + 4 + 4 + includeSerializedSize() ? 4 : 0); // version + type + real length + crc + lastserial + const size_t formatSpecificSize(getHeaderSize()); + size_t rawSize(MINSIZE + formatSpecificSize + OVERHEAD); + const size_t payloadSize(_dataBuf.size() + 8); + // This is a little dirty -> need interface. + if (compression.type == document::CompressionConfig::LZ4) { + document::LZ4Compressor lz4; + rawSize += lz4.adjustProcessLen(0, payloadSize); + } else { + rawSize += payloadSize; + } + return rawSize; +} + +void +ChunkFormat::verifyCompression(uint8_t type) +{ + if ((type != document::CompressionConfig::LZ4) && + (type != document::CompressionConfig::NONE)) { + throw ChunkException(make_string("Unknown compressiontype %d", type), VESPA_STRLOC); + } +} + +ChunkFormat::UP +ChunkFormat::deserialize(const void * buffer, size_t len, bool skipcrc) +{ + uint8_t version(0); + vespalib::nbostream raw(buffer, len); + const uint32_t minimumRequiredSpace(sizeof(uint8_t)*2 + sizeof(uint32_t)*2); + if (raw.size() < minimumRequiredSpace) { + throw ChunkException(make_string("Available space (%ld) is less than required (%d)", raw.size(), minimumRequiredSpace), VESPA_STRLOC); + } + raw >> version; + size_t currPos = raw.rp(); + raw.adjustReadPos(raw.size() - sizeof(uint32_t)); + uint32_t crc32(0); + raw >> crc32; + raw.rp(currPos); + ChunkFormat::UP format; + if (version == ChunkFormatV1::VERSION) { + if (skipcrc) { + format.reset(new ChunkFormatV1(raw)); + } else { + format.reset(new ChunkFormatV1(raw, crc32)); + } + } else if (version == ChunkFormatV2::VERSION) { + if (skipcrc) { + format.reset(new ChunkFormatV2(raw)); + } else { + format.reset(new ChunkFormatV2(raw, crc32)); + } + } else { + throw ChunkException(make_string("Unknown version %d", version), VESPA_STRLOC); + } + return format; +} + +ChunkFormat::ChunkFormat() : + _dataBuf() +{ +} + +ChunkFormat::~ChunkFormat() +{ +} + +ChunkFormat::ChunkFormat(size_t maxSize) : + _dataBuf(maxSize) +{ +} + +void +ChunkFormat::verifyCrc(const vespalib::nbostream & is, uint32_t expectedCrc) const +{ + uint32_t computedCrc32 = computeCrc(is.peek()-1, is.size() + 1 - sizeof(uint32_t)); + if (expectedCrc != computedCrc32) { + throw ChunkException(make_string("Crc32 mismatch. Expected (%0x), computed (%0x)", expectedCrc, computedCrc32), VESPA_STRLOC); + } +} + +void +ChunkFormat::deserializeBody(vespalib::nbostream & is) +{ + if (includeSerializedSize()) { + uint32_t serializedSize(0); + is >> serializedSize; + const uint32_t alreadyRead(sizeof(uint8_t) + getHeaderSize() + sizeof(uint32_t)); + const uint32_t required(serializedSize - alreadyRead); + if ((is.size() + alreadyRead) < serializedSize) { + throw ChunkException(make_string("Not enough data(%d) available in stream(%ld)", required, is.size()), VESPA_STRLOC); + } + } + uint8_t type(0); + is >> type; + verifyCompression(type); + uint32_t uncompressedLen(0); + is >> uncompressedLen; + // This is a dirty trick to fool some odd sanity checking in DataBuffer::swap + vespalib::DataBuffer uncompressed(const_cast(is.peek()), (size_t)0); + vespalib::ConstBufferRef data(is.peek(), is.size() - sizeof(uint32_t)); + document::decompress(document::CompressionConfig::Type(type), uncompressedLen, data, uncompressed, true); + assert(uncompressed.getData() == uncompressed.getDead()); + if (uncompressed.getData() != data.c_str()) { + const size_t sz(uncompressed.getDataLen()); + vespalib::nbostream(uncompressed.stealBuffer(), sz).swap(_dataBuf); + } else { + _dataBuf = vespalib::nbostream(uncompressed.getData(), uncompressed.getDataLen()); + } +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformat.h b/searchlib/src/vespa/searchlib/docstore/chunkformat.h new file mode 100644 index 00000000000..8da755522e0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunkformat.h @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +class ChunkException : public vespalib::Exception +{ +public: + ChunkException(const vespalib::stringref & msg, const vespalib::stringref & location); +}; + +// This is an interface for implementing a chunk format +class ChunkFormat +{ +public: + virtual ~ChunkFormat(); + typedef std::unique_ptr UP; + vespalib::nbostream & getBuffer() { return _dataBuf; } + const vespalib::nbostream & getBuffer() const { return _dataBuf; } + + /** + * Will serialze your chunk. + * @param lastSerial The last serial number of any entry in the packet. + * @param compressed The buffer where the serialized data shall be placed. + * @param compression What kind of compression shall be employed. + */ + void pack(uint64_t lastSerial, vespalib::DataBuffer & compressed, const document::CompressionConfig & compression); + /** + * Will deserialize and create a representation of the uncompressed data. + * param buffer Pointer to the serialized data + * @param len Length of serialized data + * @param indicate if crc verification shall be skipped. + */ + static ChunkFormat::UP deserialize(const void * buffer, size_t len, bool skipcrc); + /** + * return the maximum size a packet can have. It allows correct size estimation + * need for direct io alignment. + * @param compression Compression config to be used. + * @return maximum number of bytes a packet can take in serialized form. + */ + size_t getMaxPackSize(const document::CompressionConfig & compression) const; +protected: + /** + * Constructor used when deserializing + */ + ChunkFormat(); + /** + * Constructor used when creating a new chunk. + * @param maxSize The maximum size the chunk can take before it will need to be closed. + */ + ChunkFormat(size_t maxSize); + /** + * Will deserialize and uncompress the body. + * @param the potentially compressed stream. + */ + void deserializeBody(vespalib::nbostream & is); + /** + * Wille compute and check the crc of the incoming stream. + * Will start 1 byte earlier and stop 4 bytes ahead of end. + * Thows exception if check fails. + */ + void verifyCrc(const vespalib::nbostream & is, uint32_t expected) const; +private: + /** + * Used when serializing to obtain correct version. + * @return version + */ + virtual uint8_t getVersion() const = 0; + /** + * Used to compute maximum size needed for a serialized chunk. + * @return size of header this format will produce. + */ + virtual size_t getHeaderSize() const = 0; + /** + * Does this format require the length of the serialized data to be include. + * Length will is inclusive. From and including version to end of crc. + * @return if length is required. + */ + virtual bool includeSerializedSize() const = 0; + /** + * Will compute the crc for verifying the data. + * @param buf Start of buffer + * @param sz Size of buffer + * @return computed crc. + */ + virtual uint32_t computeCrc(const void * buf, size_t sz) const = 0; + /** + * Allows each format to write its special stuff after the version byte. + * Must be reflected in @getHeaderSize + * @param buf Buffer to write into. + */ + virtual void writeHeader(vespalib::DataBuffer & buf) const = 0; + + static void verifyCompression(uint8_t type); + + vespalib::nbostream _dataBuf; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp b/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp new file mode 100644 index 00000000000..bdff46aacbc --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunkformats.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include + +namespace search { + +using vespalib::make_string; + +ChunkFormatV1::ChunkFormatV1(vespalib::nbostream & is) : + ChunkFormat() +{ + deserializeBody(is); +} + +ChunkFormatV1::ChunkFormatV1(vespalib::nbostream & is, uint32_t expectedCrc) : + ChunkFormat() +{ + verifyCrc(is, expectedCrc); + deserializeBody(is); +} + +ChunkFormatV1::ChunkFormatV1(size_t maxSize) : + ChunkFormat(maxSize) +{ +} + +uint32_t +ChunkFormatV1::computeCrc(const void * buf, size_t sz) const +{ + return vespalib::crc_32_type::crc(buf, sz); +} + +ChunkFormatV2::ChunkFormatV2(vespalib::nbostream & is) : + ChunkFormat() +{ + verifyMagic(is); + deserializeBody(is); +} + +ChunkFormatV2::ChunkFormatV2(vespalib::nbostream & is, uint32_t expectedCrc) : + ChunkFormat() +{ + verifyCrc(is, expectedCrc); + verifyMagic(is); + deserializeBody(is); +} + + +ChunkFormatV2::ChunkFormatV2(size_t maxSize) : + ChunkFormat(maxSize) +{ +} + +uint32_t +ChunkFormatV2::computeCrc(const void * buf, size_t sz) const +{ + return XXH32(buf, sz, 0); +} + +void +ChunkFormatV2::verifyMagic(vespalib::nbostream & is) const +{ + uint32_t magic; + is >> magic; + if (magic != MAGIC) { + throw ChunkException(make_string("Unknown magic %0x, expected %0x", magic, MAGIC), VESPA_STRLOC); + } +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/chunkformats.h b/searchlib/src/vespa/searchlib/docstore/chunkformats.h new file mode 100644 index 00000000000..15a45ec7e60 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/chunkformats.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +class ChunkFormatV1 : public ChunkFormat +{ +public: + enum {VERSION=0}; + ChunkFormatV1(vespalib::nbostream & is); + ChunkFormatV1(vespalib::nbostream & is, uint32_t expectedCrc); + ChunkFormatV1(size_t maxSize); +private: + virtual bool includeSerializedSize() const { return false; } + virtual uint8_t getVersion() const { return VERSION; } + virtual size_t getHeaderSize() const { return 0; } + virtual uint32_t computeCrc(const void * buf, size_t sz) const; + virtual void writeHeader(vespalib::DataBuffer & buf) const { + (void) buf; + } +}; + +class ChunkFormatV2 : public ChunkFormat +{ +public: + enum {VERSION=1, MAGIC=0x5ba32de7}; + ChunkFormatV2(vespalib::nbostream & is); + ChunkFormatV2(vespalib::nbostream & is, uint32_t expectedCrc); + ChunkFormatV2(size_t maxSize); +private: + virtual bool includeSerializedSize() const { return true; } + virtual size_t getHeaderSize() const { + // MAGIC + return 4; + } + virtual uint8_t getVersion() const { return VERSION; } + virtual uint32_t computeCrc(const void * buf, size_t sz) const; + virtual void writeHeader(vespalib::DataBuffer & buf) const { + buf.writeInt32(MAGIC); + } + void verifyMagic(vespalib::nbostream & is) const; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp new file mode 100644 index 00000000000..50177e5af31 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "data_store_file_chunk_id.h" +#include "filechunk.h" + +namespace search +{ + +vespalib::string +DataStoreFileChunkId::createName(const vespalib::string &baseName) const +{ + FileChunk::NameId id(_nameId); + return id.createName(baseName); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h new file mode 100644 index 00000000000..7ed50a9d291 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_id.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/* + * Class representing the relative naming of a underlying file for a + * data store. + */ +class DataStoreFileChunkId +{ + uint64_t _nameId; +public: + DataStoreFileChunkId(uint64_t nameId_in) + : _nameId(nameId_in) + { + } + uint64_t nameId() const { return _nameId; } + vespalib::string createName(const vespalib::string &baseName) const; + bool operator<(const DataStoreFileChunkId &rhs) const { + return _nameId < rhs._nameId; + } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h new file mode 100644 index 00000000000..48a99c0762c --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/data_store_file_chunk_stats.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "data_store_storage_stats.h" +#include "data_store_file_chunk_id.h" + +namespace search { + +/* + * Class representing stats for the underlying file for a data store. + */ +class DataStoreFileChunkStats : public DataStoreStorageStats, + public DataStoreFileChunkId +{ +public: + DataStoreFileChunkStats(uint64_t diskUsage_in, uint64_t diskBloat_in, + double maxBucketSpread_in, + uint64_t lastSerialNum_in, + uint64_t lastFlushedSerialNum_in, + uint64_t nameId_in) + : DataStoreStorageStats(diskUsage_in, diskBloat_in, + maxBucketSpread_in, + lastSerialNum_in, lastFlushedSerialNum_in), + DataStoreFileChunkId(nameId_in) + { + } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h b/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h new file mode 100644 index 00000000000..906a2d76995 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/data_store_storage_stats.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +/* + * Class representing brief stats for a data store. + */ +class DataStoreStorageStats +{ + uint64_t _diskUsage; + uint64_t _diskBloat; + double _maxBucketSpread; + uint64_t _lastSerialNum; + uint64_t _lastFlushedSerialNum; +public: + DataStoreStorageStats(uint64_t diskUsage_in, uint64_t diskBloat_in, + double maxBucketSpread_in, + uint64_t lastSerialNum_in, + uint64_t lastFlushedSerialNum_in) + : _diskUsage(diskUsage_in), + _diskBloat(diskBloat_in), + _maxBucketSpread(maxBucketSpread_in), + _lastSerialNum(lastSerialNum_in), + _lastFlushedSerialNum(lastFlushedSerialNum_in) + { + } + uint64_t diskUsage() const { return _diskUsage; } + uint64_t diskBloat() const { return _diskBloat; } + double maxBucketSpread() const { return _maxBucketSpread; } + uint64_t lastSerialNum() const { return _lastSerialNum; } + uint64_t lastFlushedSerialNum() const { return _lastFlushedSerialNum; } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp new file mode 100644 index 00000000000..cca794ccb69 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.cpp @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "document_store_visitor_progress.h" + +namespace search +{ + + +DocumentStoreVisitorProgress::DocumentStoreVisitorProgress() + : search::IDocumentStoreVisitorProgress(), + _progress(0.0) +{ +} + + +void +DocumentStoreVisitorProgress::updateProgress(double progress) +{ + _progress = progress; +} + + +double +DocumentStoreVisitorProgress::getProgress() const +{ + return _progress; +} + + +} // namespace proton diff --git a/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h new file mode 100644 index 00000000000..125e8cb0f98 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/document_store_visitor_progress.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "idocumentstore.h" + +namespace search +{ + +class DocumentStoreVisitorProgress : public IDocumentStoreVisitorProgress +{ + double _progress; +public: + DocumentStoreVisitorProgress(); + + virtual void + updateProgress(double progress); + + virtual double + getProgress() const; +}; + +} // namespace proton + diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.cpp b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp new file mode 100644 index 00000000000..024a8a59d47 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.cpp @@ -0,0 +1,392 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".seach.docstore"); + +#include "cachestats.h" +#include "documentstore.h" +#include +#include + +namespace search +{ + +namespace { + +class DocumentVisitorAdapter : public IBufferVisitor +{ +public: + DocumentVisitorAdapter(const document::DocumentTypeRepo & repo, IDocumentVisitor & visitor) : + _repo(repo), + _visitor(visitor) + { } + void visit(uint32_t lid, vespalib::ConstBufferRef buf) override; +private: + const document::DocumentTypeRepo & _repo; + IDocumentVisitor & _visitor; +}; + +void +DocumentVisitorAdapter::visit(uint32_t lid, vespalib::ConstBufferRef buf) { + if (buf.size() > 0) { + vespalib::nbostream is(buf.c_str(), buf.size()); + document::Document::UP doc(new document::Document(_repo, is)); + _visitor.visit(lid, std::move(doc)); + } +} + +} + +using vespalib::nbostream; + +void +DocumentStore::Value::set(vespalib::DataBuffer && buf, + ssize_t len, + const document::CompressionConfig & compression) +{ + //Underlying buffer must be identical to allow swap. + vespalib::DataBuffer compressed(buf.getData(), 0u); + document::CompressionConfig::Type type = + document::compress(compression, + vespalib::ConstBufferRef(buf.getData(), len), + compressed, true); + _compressedSize = compressed.getDataLen(); + if (buf.getData() == compressed.getData()) { + // Uncompressed so we can just steal the underlying buffer. + buf.stealBuffer().swap(_buf); + } else { + compressed.stealBuffer().swap(_buf); + } + assert(((type == document::CompressionConfig::NONE) && + (len == ssize_t(_compressedSize))) || + ((type != document::CompressionConfig::NONE) && + (len > ssize_t(_compressedSize)))); + setCompression(type, len); +} + + +document::Document::UP +DocumentStore::Value::deserializeDocument(const document::DocumentTypeRepo & repo) +{ + vespalib::DataBuffer uncompressed((char *)_buf.get(), (size_t)0); + document::decompress(getCompression(), + getUncompressedSize(), + vespalib::ConstBufferRef(*this, size()), + uncompressed, true); + vespalib::nbostream is(uncompressed.getData(), uncompressed.getDataLen()); + return document::Document::UP(new document::Document(repo, is)); +} + + +void DocumentStore::BackingStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const { + DocumentVisitorAdapter adapter(repo, visitor); + _backingStore.read(lids, adapter); +} + +bool +DocumentStore::BackingStore::read(DocumentIdT key, Value & value) const { + bool found(false); + vespalib::DataBuffer buf(4096); + ssize_t len = _backingStore.read(key, buf); + if (len > 0) { + value.set(std::move(buf), len, _compression); + found = true; + } + return found; +} + +DocumentStore::DocumentStore(const Config & config, IDataStore & store) + : IDocumentStore(), + _config(config), + _backingStore(store), + _store(_backingStore, _config.getCompression()), + _cache(new Cache(_store, config.getMaxCacheBytes())), + _uncached_lookups(0) +{ + _cache->reserveElements(config.getInitialCacheEntries()); +} + +DocumentStore::~DocumentStore() +{ +} + +void +DocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const +{ + _store.visit(lids, repo, visitor); +} + +document::Document::UP +DocumentStore::read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const +{ + document::Document::UP retval; + Value value; + if (useCache()) { + value = _cache->read(lid); + } else { + vespalib::Atomic::add(&_uncached_lookups, 1UL); + _store.read(lid, value); + } + if ( ! value.empty() ) { + retval = value.deserializeDocument(repo); + } + return retval; +} + +void +DocumentStore::write(uint64_t syncToken, const document::Document& doc, DocumentIdT lid) +{ + nbostream stream(12345); + doc.serialize(stream); + _backingStore.write(syncToken, lid, stream.peek(), stream.size()); + if (useCache()) { + _cache->invalidate(lid); + } +} + +void +DocumentStore::remove(uint64_t syncToken, DocumentIdT lid) +{ + _backingStore.remove(syncToken, lid); + if (useCache()) { + _cache->invalidate(lid); + } +} + +void +DocumentStore::compact(uint64_t syncToken) +{ + (void) syncToken; + // Most implementations does not offer compact. +} + +void +DocumentStore::flush(uint64_t syncToken) +{ + _backingStore.flush(syncToken); +} + +uint64_t +DocumentStore::initFlush(uint64_t syncToken) +{ + return _backingStore.initFlush(syncToken); +} + +uint64_t +DocumentStore::lastSyncToken() const +{ + return _backingStore.lastSyncToken(); +} + +uint64_t +DocumentStore::tentativeLastSyncToken() const +{ + return _backingStore.tentativeLastSyncToken(); +} + +fastos::TimeStamp +DocumentStore::getLastFlushTime() const +{ + return _backingStore.getLastFlushTime(); +} + +template +class DocumentStore::WrapVisitor : public IDataStoreVisitor +{ + Visitor &_visitor; + const document::DocumentTypeRepo &_repo; + const document::CompressionConfig &_compression; + IDocumentStore &_ds; + uint64_t _syncToken; + +public: + virtual void + visit(uint32_t lid, const void *buffer, size_t sz); + + WrapVisitor(Visitor &visitor, + const document::DocumentTypeRepo &repo, + const document::CompressionConfig &compresion, + IDocumentStore &ds, + uint64_t syncToken); + + inline void rewrite(uint32_t lid, const document::Document &doc); + + inline void rewrite(uint32_t lid); + + inline void visitRemove(uint32_t lid); +}; + + +class DocumentStore::WrapVisitorProgress : public IDataStoreVisitorProgress +{ + IDocumentStoreVisitorProgress &_visitorProgress; +public: + virtual void + updateProgress(double progress) + { + _visitorProgress.updateProgress(progress); + } + + WrapVisitorProgress(IDocumentStoreVisitorProgress &visitProgress) + : _visitorProgress(visitProgress) + { + } +}; + + +template <> +void +DocumentStore::WrapVisitor:: +rewrite(uint32_t lid, const document::Document &doc) +{ + (void) lid; + (void) doc; +} + +template <> +void +DocumentStore::WrapVisitor:: +rewrite(uint32_t lid) +{ + (void) lid; +} + + +template <> +void +DocumentStore::WrapVisitor:: +visitRemove(uint32_t lid) +{ + _visitor.visit(lid); +} + + +template <> +void +DocumentStore::WrapVisitor:: +rewrite(uint32_t lid, const document::Document &doc) +{ + _ds.write(_syncToken, doc, lid); +} + +template <> +void +DocumentStore::WrapVisitor:: +rewrite(uint32_t lid) +{ + _ds.remove(_syncToken, lid); +} + + +template <> +void +DocumentStore::WrapVisitor:: +visitRemove(uint32_t lid) +{ + (void) lid; +} + + + +template +void +DocumentStore::WrapVisitor::visit(uint32_t lid, + const void *buffer, + size_t sz) +{ + (void) lid; + (void) buffer; + (void) sz; + + Value value; + vespalib::DataBuffer buf(4096); + buf.clear(); + buf.writeBytes(buffer, sz); + ssize_t len = sz; + if (len > 0) { + value.set(std::move(buf), len, _compression); + } + if (! value.empty()) { + document::Document::UP doc(value.deserializeDocument(_repo)); + _visitor.visit(lid, *doc); + rewrite(lid, *doc); + } else { + visitRemove(lid); + rewrite(lid); + } +} + + +template +DocumentStore::WrapVisitor:: +WrapVisitor(Visitor &visitor, + const document::DocumentTypeRepo &repo, + const document::CompressionConfig &compression, + IDocumentStore &ds, + uint64_t syncToken) + : _visitor(visitor), + _repo(repo), + _compression(compression), + _ds(ds), + _syncToken(syncToken) +{ +} + + +void +DocumentStore::accept(IDocumentStoreReadVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) +{ + WrapVisitor wrap(visitor, repo, + _store.getCompression(), + *this, + _backingStore. + tentativeLastSyncToken()); + WrapVisitorProgress wrapVisitorProgress(visitorProgress); + _backingStore.accept(wrap, wrapVisitorProgress, false); +} + + +void +DocumentStore::accept(IDocumentStoreRewriteVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) +{ + WrapVisitor wrap(visitor, + repo, + _store.getCompression(), + *this, + _backingStore. + tentativeLastSyncToken()); + WrapVisitorProgress wrapVisitorProgress(visitorProgress); + _backingStore.accept(wrap, wrapVisitorProgress, true); +} + + +double +DocumentStore::getVisitCost() const +{ + return _backingStore.getVisitCost(); +} + +DataStoreStorageStats +DocumentStore::getStorageStats() const +{ + return _backingStore.getStorageStats(); +} + +std::vector +DocumentStore::getFileChunkStats() const +{ + return _backingStore.getFileChunkStats(); +} + +CacheStats DocumentStore::getCacheStats() const { + return CacheStats(_cache->getHit(), _cache->getMiss() + _uncached_lookups, + _cache->size(), _cache->sizeBytes()); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/documentstore.h b/searchlib/src/vespa/searchlib/docstore/documentstore.h new file mode 100644 index 00000000000..947ed58876c --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/documentstore.h @@ -0,0 +1,244 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "idocumentstore.h" +#include "idatastore.h" +#include + +namespace search { + +/** + * Simple document store that contains serialized Document instances. + * updates will be held in memory until flush() is called. + * Uses a Local ID as key. + **/ +class DocumentStore : public IDocumentStore +{ +public: + class Config { + public: + Config() : + _compression(document::CompressionConfig::LZ4, 9, 70), + _maxCacheBytes(1000000000), + _initialCacheEntries(0) + { } + Config(const document::CompressionConfig & compression, size_t maxCacheBytes, size_t initialCacheEntries) : + _compression((maxCacheBytes != 0) ? compression : document::CompressionConfig::NONE), + _maxCacheBytes(maxCacheBytes), + _initialCacheEntries(initialCacheEntries) + { } + const document::CompressionConfig & getCompression() const { return _compression; } + size_t getMaxCacheBytes() const { return _maxCacheBytes; } + size_t getInitialCacheEntries() const { return _initialCacheEntries; } + private: + document::CompressionConfig _compression; + size_t _maxCacheBytes; + size_t _initialCacheEntries; + }; + + /** + * Construct a document store. + * If the "simpledocstore.dat" data file exists, reads meta-data (offsets) into memory. + * + * @throws vespalib::IoException if the file is corrupt or other IO problems occur. + * @param baseDir The path to a directory where "simpledocstore.dat" will exist. + **/ + DocumentStore(const Config & config, IDataStore & store); + ~DocumentStore(); + + /** + * Make a Document from a stored serialized data blob. + * @param lid The local ID associated with the document. + * @return NULL if there is no document associated with the lid. + **/ + document::Document::UP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const override; + void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const override; + + /** + * Serialize and store a document. + * @param doc The document to store + * @param lid The local ID associated with the document + **/ + void write(uint64_t synkToken, const document::Document& doc, DocumentIdT lid) override; + + /** + * Mark a document as removed. A later read() will return NULL for the given lid. + * @param lid The local ID associated with the document + **/ + void remove(uint64_t syncToken, DocumentIdT lid) override; + + /** + * Flush all in-memory updates to disk. + **/ + void flush(uint64_t syncToken) override; + uint64_t initFlush(uint64_t synctoken) override; + + + /** + * If possible compact the disk. + **/ + void compact(uint64_t syncToken) override; + + /** + * The sync token used for the last successful flush() operation, + * or 0 if no flush() has been performed yet. + * @return Last flushed sync token. + **/ + uint64_t lastSyncToken() const override; + uint64_t tentativeLastSyncToken() const override; + fastos::TimeStamp getLastFlushTime() const override; + + /** + * Get the number of entries (including removed IDs + * or gaps in the local ID sequence) in the document store. + * @return The next local ID expected to be used. + */ + uint64_t nextId() const override { return _backingStore.nextId(); } + + /** + * Calculate memory used by this instance. During flush() actual + * memory usage may be approximately twice the reported amount. + * @return memory usage (in bytes) + **/ + size_t memoryUsed() const override { return _backingStore.memoryUsed(); } + size_t getDiskFootprint() const override { return _backingStore.getDiskFootprint(); } + size_t getDiskBloat() const override { return _backingStore.getDiskBloat(); } + size_t getMaxCompactGain() const override { return _backingStore.getMaxCompactGain(); } + + CacheStats getCacheStats() const override; + + /** + * Calculates memory that is used for meta data by this instance. Calling + * flush() does not free this memory. + * @return memory usage (in bytes) + **/ + size_t memoryMeta() const override { return _backingStore.memoryMeta(); } + + const vespalib::string & getBaseDir() const override { return _backingStore.getBaseDir(); } + + /** + * Visit all documents found in document store. + */ + void + accept(IDocumentStoreReadVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) override; + + /** + * Visit all documents found in document store. + */ + void + accept(IDocumentStoreRewriteVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) override; + + /** + * Return cost of visiting all documents found in document store. + */ + double getVisitCost() const override; + + /* + * Return brief stats for data store. + */ + virtual DataStoreStorageStats getStorageStats() const override; + + /* + * Return detailed stats about underlying files for data store. + */ + virtual std::vector + getFileChunkStats() const override; + +private: + template class WrapVisitor; + class WrapVisitorProgress; + class Value { + public: + typedef std::unique_ptr UP; + Value() : _compressedSize(0), _uncompressedSize(0), _compression(document::CompressionConfig::NONE) { } + + Value(Value && rhs) : + _compressedSize(rhs._compressedSize), + _uncompressedSize(rhs._uncompressedSize), + _compression(rhs._compression), + _buf(std::move(rhs._buf)) + { } + + Value(const Value & rhs) : + _compressedSize(rhs._compressedSize), + _uncompressedSize(rhs._uncompressedSize), + _compression(rhs._compression), + _buf(rhs.size()) + { + memcpy(get(), rhs.get(), size()); + } + Value & operator = (Value && rhs) { + _buf = std::move(rhs._buf); + _compressedSize = rhs._compressedSize; + _uncompressedSize = rhs._uncompressedSize; + _compression = rhs._compression; + return *this; + } + void setCompression(document::CompressionConfig::Type comp, size_t uncompressedSize) { + _compression = comp; + _uncompressedSize = uncompressedSize; + } + document::CompressionConfig::Type getCompression() const { return _compression; } + size_t getUncompressedSize() const { return _uncompressedSize; } + + /** + * Compress buffer into temporary buffer and copy temporary buffer to + * value along with compression config. + */ + void set(vespalib::DataBuffer && buf, ssize_t len, const document::CompressionConfig &compression); + + /** + * Decompress value into temporary buffer and deserialize document from + * the temporary buffer. + */ + document::Document::UP deserializeDocument(const document::DocumentTypeRepo &repo); + + size_t size() const { return _compressedSize; } + bool empty() const { return size() == 0; } + operator const void * () const { return _buf.get(); } + const void * get() const { return _buf.get(); } + void * get() { return _buf.get(); } + + private: + size_t _compressedSize; + size_t _uncompressedSize; + document::CompressionConfig::Type _compression; + vespalib::DefaultAlloc _buf; + }; + class BackingStore { + public: + typedef vespalib::hash_map LidValueMap; + BackingStore(IDataStore & store, const document::CompressionConfig & compression) : + _backingStore(store), + _compression(compression) + { } + bool read(DocumentIdT key, Value & value) const; + void visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const; + void write(DocumentIdT, const Value &) { } + void erase(DocumentIdT ) { } + + const document::CompressionConfig & getCompression(void) const { return _compression; } + private: + IDataStore & _backingStore; + const document::CompressionConfig & _compression; + }; + bool useCache() const { return (_cache->capacityBytes() != 0) && (_cache->capacity() != 0); } + typedef vespalib::cache< vespalib::CacheParam< vespalib::LruParam, + BackingStore, + vespalib::zero, + vespalib::size > > Cache; + + Config _config; + IDataStore & _backingStore; + BackingStore _store; + std::shared_ptr _cache; + mutable volatile uint64_t _uncached_lookups; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.cpp b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp new file mode 100644 index 00000000000..6194c2985b8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.cpp @@ -0,0 +1,676 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include +#include +#include "data_store_file_chunk_stats.h" + +LOG_SETUP(".search.filechunk"); + +using vespalib::GenericHeader; +using vespalib::FileHeader; +using vespalib::IoException; +using vespalib::getLastErrorString; +using vespalib::getErrorString; + + +namespace search +{ + +namespace { + +constexpr size_t ALIGNMENT=0x1000; +constexpr size_t ENTRY_BIAS_SIZE=8; + +} + +using vespalib::make_string; + +SummaryException::SummaryException(const vespalib::stringref &msg, + FastOS_FileInterface &file, + const vespalib::stringref &location) + : IoException(make_string("%s : Failing file = '%s'. Reason given by OS = '%s'", + msg.c_str(), file.GetFileName(), file.getLastErrorString().c_str()), + getErrorType(file.GetLastError()), location) +{ +} + +FileChunk::ChunkInfo::ChunkInfo(uint64_t offset, uint32_t size, uint64_t lastSerial) + : _lastSerial(lastSerial), + _offset(offset), + _size(size) +{ + assert(valid()); +} + + +LidInfo::LidInfo(uint32_t fileId, uint32_t chunkId, uint32_t sz) +{ + _value.v.fileId = fileId; + _value.v.chunkId = chunkId; + _value.v.size = sz; + if (fileId >= (1 << 10)) { + throw std::runtime_error( + make_string("LidInfo(fileId=%u, chunkId=%u, size=%u) has invalid fileId larger than %d", + fileId, chunkId, sz, (1 << 10) - 1)); + } + if (chunkId >= (1 << 22)) { + throw std::runtime_error( + make_string("LidInfo(fileId=%u, chunkId=%u, size=%u) has invalid chunkId larger than %d", + fileId, chunkId, sz, (1 << 22) - 1)); + } +} + +DirectIORandRead::DirectIORandRead(const vespalib::string & fileName) + : _file(fileName.c_str()), + _alignment(1), + _granularity(1), + _maxChunkSize(0x100000) +{ + _file.EnableDirectIO(); + if (_file.OpenReadOnly()) { + if (!_file.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) { + LOG(debug, "Direct IO setup failed for file %s due to %s", + _file.GetFileName(), _file.getLastErrorString().c_str()); + } + } else { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +DirectIORandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + size_t padBefore(0); + size_t padAfter(0); + bool directio = _file.DirectIOPadding(offset, sz, padBefore, padAfter); + buffer.clear(); + buffer.ensureFree(padBefore + sz + padAfter + _alignment - 1); + if (directio) { + size_t unAligned = (-reinterpret_cast(buffer.getFree()) & (_alignment - 1)); + buffer.moveFreeToData(unAligned); + buffer.moveDataToDead(unAligned); + } + // XXX needs to use pread or file-position-mutex + _file.ReadBuf(buffer.getFree(), padBefore + sz + padAfter, offset - padBefore); + buffer.moveFreeToData(padBefore + sz); + buffer.moveDataToDead(padBefore); + return FSP(); +} + + +int64_t +DirectIORandRead::getSize(void) +{ + return _file.GetSize(); +} + + +MMapRandRead::MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions) + : _file(fileName.c_str()) +{ + _file.enableMemoryMap(mmapFlags); + _file.setFAdviseOptions(fadviseOptions); + if ( ! _file.OpenReadOnly()) { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + + +NormalRandRead::NormalRandRead(const vespalib::string & fileName) + : _file(fileName.c_str()) +{ + if ( ! _file.OpenReadOnly()) { + throw SummaryException("Failed opening data file", _file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +MMapRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + const char *ptr = static_cast(_file.MemoryMapPtr(offset)); + vespalib::DataBuffer(ptr, sz).swap(buffer); + return FSP(); +} + +int64_t +MMapRandRead::getSize(void) +{ + return _file.GetSize(); +} + +MMapRandReadDynamic::MMapRandReadDynamic(const vespalib::string &fileName, int mmapFlags, int fadviseOptions) + : _fileName(fileName), + _mmapFlags(mmapFlags), + _fadviseOptions(fadviseOptions) +{ + reopen(); +} + +void +MMapRandReadDynamic::reopen() +{ + std::unique_ptr file(new FastOS_File(_fileName.c_str())); + file->enableMemoryMap(_mmapFlags); + file->setFAdviseOptions(_fadviseOptions); + if (file->OpenReadOnly()) { + _holder.set(file.release()); + _holder.latch(); + } else { + throw SummaryException("Failed opening data file", *file, VESPA_STRLOC); + } +} + +FileRandRead::FSP +MMapRandReadDynamic::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + FSP file(_holder.get()); + const char * data(static_cast(file->MemoryMapPtr(offset))); + if ((data == NULL) || (file->MemoryMapPtr(offset+sz-1) == NULL)) { + // Must check that both start and end of file is mapped in. + // Previous reopen could happend during a partial write of this buffer. + // This should fix bug 4630695. + reopen(); + file = _holder.get(); + data = static_cast(file->MemoryMapPtr(offset)); + } + vespalib::DataBuffer(data, sz).swap(buffer); + return file; +} + +int64_t +MMapRandReadDynamic::getSize(void) +{ + return _holder.get()->GetSize(); +} + +FileRandRead::FSP +NormalRandRead::read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) +{ + buffer.clear(); + buffer.ensureFree(sz); + _file.ReadBuf(buffer.getFree(), sz, offset); + buffer.moveFreeToData(sz); + return FSP(); +} + +int64_t +NormalRandRead::getSize(void) +{ + return _file.GetSize(); +} + +vespalib::string +FileChunk::NameId::createName(const vespalib::string &baseName) const { + vespalib::asciistream os; + os << baseName << '/' << vespalib::setfill('0') << vespalib::setw(19) << getId(); + return os.str(); +} + +vespalib::string +FileChunk::createIdxFileName(const vespalib::string & name) { + return name + ".idx"; +} + +vespalib::string +FileChunk::createDatFileName(const vespalib::string & name) { + return name + ".dat"; +} + +FileChunk::FileChunk(FileId fileId, NameId nameId, const vespalib::string & baseName, + const TuneFileSummary & tune, const IBucketizer * bucketizer, bool skipCrcOnRead) + : _fileId(fileId), + _nameId(nameId), + _name(nameId.createName(baseName)), + _skipCrcOnRead(skipCrcOnRead), + _erasedCount(0), + _erasedBytes(0), + _diskFootprint(0), + _sumNumBuckets(0), + _numUniqueBuckets(0), + _file(), + _bucketizer(bucketizer), + _addedBytes(0), + _tune(tune), + _dataFileName(createDatFileName(_name)), + _idxFileName(createIdxFileName(_name)), + _chunkInfo(), + _dataHeaderLen(0u), + _idxHeaderLen(0u), + _lastPersistedSerialNum(0), + _modificationTime() +{ + FastOS_File dataFile(_dataFileName.c_str()); + if (dataFile.OpenReadOnly()) { + if (!dataFile.Sync()) { + throw SummaryException("Failed syncing dat file", dataFile, VESPA_STRLOC); + } + _diskFootprint += dataFile.GetSize(); + FastOS_File idxFile(_idxFileName.c_str()); + if (idxFile.OpenReadOnly()) { + if (!idxFile.Sync()) { + throw SummaryException("Failed syncing idx file", idxFile, VESPA_STRLOC); + } + _diskFootprint += idxFile.GetSize(); + _modificationTime = FileKit::getModificationTime(_idxFileName); + } else { + dataFile.Close(); + throw SummaryException("Failed opening idx file", idxFile, VESPA_STRLOC); + } + } else { + } +} + +FileChunk::~FileChunk() +{ +} + +void +FileChunk::addNumBuckets(size_t numBucketsInChunk) +{ + _sumNumBuckets += numBucketsInChunk; +} + +class TmpChunkMeta : public ChunkMeta, + public std::vector +{ +public: + void fill(vespalib::nbostream & is) { + resize(getNumEntries()); + for (LidMeta & lm : *this) { + lm.deserialize(is); + } + } +}; + +typedef vespalib::Array TmpChunkMetaV; + +namespace { + +void +verifyOrAssert(const TmpChunkMetaV & v) +{ + for (auto prev(v.begin()), it(prev); it != v.end(); ++it) { + assert(prev->getLastSerial() <= it->getLastSerial()); + prev = it; + } +} + +vespalib::string eraseErrorMsg(const vespalib::string & fileName, int error) { + return make_string("Error erasing file '%s'. Error is '%s'", + fileName.c_str(), getErrorString(error).c_str()); +} + +} + +void +FileChunk::erase() +{ + _file.reset(); + if (!FastOS_File::Delete(_idxFileName.c_str()) && (errno != ENOENT)) { + throw std::runtime_error(eraseErrorMsg(_idxFileName, errno)); + } + if (!FastOS_File::Delete(_dataFileName.c_str()) && (errno != ENOENT)) { + throw std::runtime_error(eraseErrorMsg(_dataFileName, errno)); + } +} + +size_t +FileChunk::updateLidMap(ISetLid & ds, uint64_t serialNum) +{ + size_t sz(0); + assert(_chunkInfo.empty()); + + FastOS_File idxFile(_idxFileName.c_str()); + idxFile.enableMemoryMap(0); + if (idxFile.OpenReadOnly()) { + if (idxFile.IsMemoryMapped()) { + const int64_t fileSize = idxFile.GetSize(); + if (_idxHeaderLen == 0) { + _idxHeaderLen = readIdxHeader(idxFile); + } + vespalib::nbostream is(static_cast(idxFile.MemoryMapPtr(0)) + _idxHeaderLen, + fileSize - _idxHeaderLen); + TmpChunkMetaV tempVector; + tempVector.reserve(fileSize/(sizeof(ChunkMeta)+sizeof(LidMeta))); + while ( ! is.empty() && is.good()) { + const int64_t lastKnownGoodPos = _idxHeaderLen + is.rp(); + tempVector.push_back(TmpChunkMeta()); + TmpChunkMeta & chunkMeta(tempVector.back()); + try { + chunkMeta.deserialize(is); + chunkMeta.fill(is); + } catch (const vespalib::IllegalStateException & e) { + LOG(warning, "Exception deserializing idx file : %s", e.what()); + LOG(warning, "File '%s' seems to be partially truncated. Will truncate from size=%ld to %ld", + _idxFileName.c_str(), fileSize, lastKnownGoodPos); + FastOS_File toTruncate(_idxFileName.c_str()); + if ( toTruncate.OpenReadWrite()) { + if (toTruncate.SetSize(lastKnownGoodPos)) { + tempVector.resize(tempVector.size() - 1); + } else { + throw SummaryException("SetSize(%ld) failed.", toTruncate, VESPA_STRLOC); + } + } else { + throw SummaryException("Open for truncation failed.", toTruncate, VESPA_STRLOC); + } + } + } + if ( ! tempVector.empty()) { + verifyOrAssert(tempVector); + if (tempVector[0].getLastSerial() < serialNum) { + LOG(warning, + "last serial num(%ld) from previous file is " + "bigger than my first(%ld). That is odd." + "Current filename is '%s'", + serialNum, tempVector[0].getLastSerial(), + _idxFileName.c_str()); + serialNum = tempVector[0].getLastSerial(); + } + BucketDensityComputer globalBucketMap(_bucketizer); + // Guard comes from the same bucketizer so the same guard can be used + // for both local and global BucketDensityComputer + vespalib::GenerationHandler::Guard bucketizerGuard = globalBucketMap.getGuard(); + for (const TmpChunkMeta & chunkMeta : tempVector) { + assert(serialNum <= chunkMeta.getLastSerial()); + BucketDensityComputer bucketMap(_bucketizer); + for (size_t i(0), m(chunkMeta.getNumEntries()); i < m; i++) { + const LidMeta & lidMeta(chunkMeta[i]); + bucketMap.recordLid(bucketizerGuard, lidMeta.getLid(), lidMeta.size()); + globalBucketMap.recordLid(bucketizerGuard, lidMeta.getLid(), lidMeta.size()); + ds.setLid(lidMeta.getLid(), LidInfo(getFileId().getId(), _chunkInfo.size(), lidMeta.size())); + _addedBytes += adjustSize(lidMeta.size()); + } + serialNum = chunkMeta.getLastSerial(); + addNumBuckets(bucketMap.getNumBuckets()); + _chunkInfo.push_back(ChunkInfo(chunkMeta.getOffset(), chunkMeta.getSize(), chunkMeta.getLastSerial())); + assert(serialNum >= _lastPersistedSerialNum); + _lastPersistedSerialNum = serialNum; + } + _numUniqueBuckets = globalBucketMap.getNumBuckets(); + } + } else { + assert(idxFile.getSize() == 0); + } + } else { + assert(false); + } + return sz; +} + +void +FileChunk::enableRead() +{ + if (_tune._randRead.getWantDirectIO()) { + LOG(debug, "enableRead(): DirectIORandRead: file='%s'", _dataFileName.c_str()); + _file.reset(new DirectIORandRead(_dataFileName)); + } else if (_tune._randRead.getWantMemoryMap()) { + const int mmapFlags(_tune._randRead.getMemoryMapFlags()); + const int fadviseOptions(_tune._randRead.getAdvise()); + if (frozen()) { + LOG(debug, "enableRead(): MMapRandRead: file='%s'", _dataFileName.c_str()); + _file.reset(new MMapRandRead(_dataFileName, mmapFlags, fadviseOptions)); + } else { + LOG(debug, "enableRead(): MMapRandReadDynamic: file='%s'", _dataFileName.c_str()); + _file.reset(new MMapRandReadDynamic(_dataFileName, mmapFlags, fadviseOptions)); + } + } else { + LOG(debug, "enableRead(): NormalRandRead: file='%s'", _dataFileName.c_str()); + _file.reset(new NormalRandRead(_dataFileName)); + } + _dataHeaderLen = readDataHeader(*_file); + if (_dataHeaderLen == 0u) { + throw std::runtime_error(make_string("bad file header: %s", _dataFileName.c_str())); + } +} + +size_t FileChunk::adjustSize(size_t sz) { + return sz + ENTRY_BIAS_SIZE; +} +void +FileChunk::remove(uint32_t lid, uint32_t size) +{ + (void) lid; + _erasedCount++; + _erasedBytes += adjustSize(size); +} + +uint64_t +FileChunk::getLastPersistedSerialNum() const +{ + return _lastPersistedSerialNum; +} + +fastos::TimeStamp +FileChunk::getModificationTime() const +{ + return _modificationTime; +} + +void +FileChunk::appendTo(const IGetLid & db, IWriteData & dest, + uint32_t numChunks, + IFileChunkVisitorProgress *visitorProgress) +{ + assert(frozen() || visitorProgress); + vespalib::GenerationHandler::Guard lidReadGuard(db.getLidReadGuard()); + assert(numChunks <= getNumChunks()); + for (size_t chunkId(0); chunkId < numChunks; chunkId++) { + const ChunkInfo & cInfo(_chunkInfo[chunkId]); + vespalib::DataBuffer whole(0ul, ALIGNMENT); + FileRandRead::FSP keepAlive(_file->read(cInfo.getOffset(), whole, cInfo.getSize())); + Chunk chunk(chunkId, whole.getData(), whole.getDataLen()); + const Chunk::LidList ll(chunk.getUniqueLids()); + for (const Chunk::Entry & e : ll) { + LidInfo lidInfo(getFileId().getId(), chunk.getId(), e.netSize()); + if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) { + vespalib::LockGuard guard(db.getLidGuard(e.getLid())); + if (db.getLid(lidReadGuard, e.getLid()) == lidInfo) { + // I am still in use so I need to taken care of. + vespalib::ConstBufferRef data(chunk.getLid(e.getLid())); + dest.write(guard, chunk.getId(), e.getLid(), data.c_str(), data.size()); + } + } + } + if (visitorProgress != NULL) { + visitorProgress->updateProgress(); + } + } + dest.close(); +} + +void +FileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const +{ + if (count == 0) { return; } + uint32_t prevChunk = begin->getChunkId(); + uint32_t start(0); + for (size_t i(0); i < count; i++) { + const LidInfoWithLid & li = *(begin + i); + if (li.getChunkId() != prevChunk) { + ChunkInfo ci = _chunkInfo[prevChunk]; + read(begin + start, i - start, ci, visitor); + prevChunk = li.getChunkId(); + start = i; + } + } + ChunkInfo ci = _chunkInfo[prevChunk]; + read(begin + start, count - start, ci, visitor); +} + +void +FileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, ChunkInfo ci, IBufferVisitor & visitor) const +{ + vespalib::DataBuffer whole(0ul, ALIGNMENT); + FileRandRead::FSP keepAlive = _file->read(ci.getOffset(), whole, ci.getSize()); + Chunk chunk(begin->getChunkId(), whole.getData(), whole.getDataLen(), _skipCrcOnRead); + for (size_t i(0); i < count; i++) { + const LidInfoWithLid & li = *(begin + i); + vespalib::ConstBufferRef buf = chunk.getLid(li.getLid()); + if (buf.size() != 0) { + visitor.visit(li.getLid(), buf); + } + } +} + +ssize_t +FileChunk::read(uint32_t lid, SubChunkId chunkId, + vespalib::DataBuffer & buffer) const +{ + return (chunkId < _chunkInfo.size()) + ? read(lid, chunkId, _chunkInfo[chunkId], buffer) + : -1; +} + +ssize_t +FileChunk::read(uint32_t lid, SubChunkId chunkId, const ChunkInfo & chunkInfo, + vespalib::DataBuffer & buffer) const +{ + vespalib::DataBuffer whole(0ul, ALIGNMENT); + FileRandRead::FSP keepAlive(_file->read(chunkInfo.getOffset(), whole, chunkInfo.getSize())); + Chunk chunk(chunkId, whole.getData(), whole.getDataLen(), _skipCrcOnRead); + return chunk.read(lid, buffer); +} + +uint64_t +FileChunk::readDataHeader(FileRandRead &datFile) +{ + uint64_t dataHeaderLen(0); + int64_t fileSize = datFile.getSize(); + uint32_t hl = GenericHeader::getMinSize(); + if (fileSize >= hl) { + vespalib::DataBuffer h(hl, ALIGNMENT); + datFile.read(0, h, hl); + GenericHeader::BufferReader rd(h); + uint32_t headerLen = GenericHeader::readSize(rd); + if (headerLen <= fileSize) { + dataHeaderLen = headerLen; + } + } + return dataHeaderLen; +} + + +uint64_t +FileChunk::readIdxHeader(FastOS_FileInterface &idxFile) +{ + int64_t fileSize = idxFile.GetSize(); + uint32_t hl = GenericHeader::getMinSize(); + uint64_t idxHeaderLen = 0; + if (fileSize >= hl) { + GenericHeader::MMapReader rd(static_cast (idxFile.MemoryMapPtr(0)), hl); + uint32_t headerLen = GenericHeader::readSize(rd); + if (headerLen <= fileSize) { + idxHeaderLen = headerLen; + } + } + if (idxHeaderLen == 0u) { + throw SummaryException("bad file header", idxFile, VESPA_STRLOC); + } + return idxHeaderLen; +} + +void +FileChunk::verify(bool reportOnly) const +{ + (void) reportOnly; + LOG(info, + "Verifying file '%s' with fileid '%u'. " + "erased-count='%u' and erased-bytes='%lu'. diskFootprint='%lu'", + _name.c_str(), _fileId.getId(), + _erasedCount, _erasedBytes, _diskFootprint); + size_t lastSerial(0); + size_t chunkId(0); + bool errorInPrev(false); + for (const ChunkInfo & ci : _chunkInfo) { + vespalib::DataBuffer whole(0ul, ALIGNMENT); + FileRandRead::FSP keepAlive(_file->read(ci.getOffset(), whole, ci.getSize())); + try { + Chunk chunk(chunkId++, whole.getData(), whole.getDataLen()); + assert(chunk.getLastSerial() >= lastSerial); + lastSerial = chunk.getLastSerial(); + if (errorInPrev) { + LOG(error, "Last serial number in first good chunk is %ld", chunk.getLastSerial()); + errorInPrev = false; + } + } catch (const std::exception & e) { + LOG(error, + "Errors in chunk number %ld/%ld at file offset %lu and size %u." + " Last known good serial number = %ld\n.Got Exception : %s", + chunkId, _chunkInfo.size(), ci.getOffset(), ci.getSize(), lastSerial, e.what()); + errorInPrev = true; + } + } +} + +uint32_t +FileChunk::getNumChunks() const +{ + return _chunkInfo.size(); +} + +size_t +FileChunk::getMemoryFootprint() const +{ + // The memory footprint does not vary before or after flush + // Once frozen, there is no variable component. + // It is all captured by getMemoryMetaFootprint() + return 0; +} + +size_t +FileChunk::getMemoryMetaFootprint() const +{ + return sizeof(*this) + _chunkInfo.byteSize(); +} + +bool +FileChunk::isIdxFileEmpty(const vespalib::string & name) +{ + vespalib::string fileName(name + ".idx"); + FastOS_File idxFile(fileName.c_str()); + idxFile.enableMemoryMap(0); + if (idxFile.OpenReadOnly()) { + if (idxFile.IsMemoryMapped()) { + int64_t fileSize = idxFile.getSize(); + int64_t idxHeaderLen = FileChunk::readIdxHeader(idxFile); + return fileSize <= idxHeaderLen; + } else if ( idxFile.getSize() == 0u) { + return true; + } else { + throw SummaryException("Failed opening idx file for memorymapping", idxFile, VESPA_STRLOC); + } + } else { + throw SummaryException("Failed opening idx file readonly ", idxFile, VESPA_STRLOC); + } + return false; +} + +void +FileChunk::eraseIdxFile(const vespalib::string & name) +{ + vespalib::string fileName(name + ".idx"); + if ( ! FastOS_File::Delete(fileName.c_str())) { + throw std::runtime_error(make_string("Failed to delete '%s'", fileName.c_str())); + } +} + + +DataStoreFileChunkStats +FileChunk::getStats() const +{ + uint64_t diskFootprint = getDiskFootprint(); + uint64_t diskBloat = getDiskBloat(); + double bucketSpread = getBucketSpread(); + uint64_t serialNum = getLastPersistedSerialNum(); + uint64_t nameId = getNameId().getId(); + return DataStoreFileChunkStats(diskFootprint, diskBloat, bucketSpread, + serialNum, serialNum, nameId); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/filechunk.h b/searchlib/src/vespa/searchlib/docstore/filechunk.h new file mode 100644 index 00000000000..9ef2e733e7b --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/filechunk.h @@ -0,0 +1,338 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + +class IDataStoreVisitorProgress; +class DataStoreFileChunkStats; + +class LidInfo { +public: + LidInfo() : _value() { } + LidInfo(uint64_t rep) { _value.r = rep; } + LidInfo(uint32_t fileId, uint32_t chunkId, uint32_t size); + uint32_t getFileId() const { return _value.v.fileId; } + uint32_t getChunkId() const { return _value.v.chunkId; } + uint32_t size() const { return _value.v.size; } + operator uint64_t () const { return _value.r; } + bool empty() const { return size() == 0; } + bool valid() const { return _value.r != std::numeric_limits::max(); } + + bool operator==(const LidInfo &b) const { + return (getFileId() == b.getFileId()) && + (getChunkId() == b.getChunkId()); + } + bool operator < (const LidInfo &b) const { + return (getFileId() == b.getFileId()) + ? (getChunkId() < b.getChunkId()) + : (getFileId() < b.getFileId()); + } + static uint32_t getMaxFileNum() { return 1 << 10; } + static uint32_t getMaxChunkNum() { return 1 << 22; } +private: + struct Rep { + uint16_t fileId : 10; + uint32_t chunkId : 22; + uint32_t size; + }; + union Value { + Value() : r(std::numeric_limits::max()) { } + Rep v; + uint64_t r; + } _value; +}; + +class LidInfoWithLid : public LidInfo { +public: + LidInfoWithLid(LidInfo lidInfo, uint32_t lid) : LidInfo(lidInfo), _lid(lid) { } + uint32_t getLid() const { return _lid; } +private: + uint32_t _lid; +}; + +typedef std::vector LidInfoWithLidV; + +class ISetLid +{ +public: + virtual ~ISetLid() { } + virtual void setLid(uint32_t lid, const LidInfo & lm) = 0; +}; + +class IGetLid +{ +public: + typedef vespalib::GenerationHandler::Guard Guard; + virtual ~IGetLid() { } + + virtual LidInfo getLid(Guard & guard, uint32_t lid) const = 0; + virtual vespalib::LockGuard getLidGuard(uint32_t lid) const = 0; + virtual Guard getLidReadGuard() const = 0; +}; + +class IWriteData +{ +public: + typedef std::unique_ptr UP; + virtual ~IWriteData() { } + + virtual void write(vespalib::LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) = 0; + virtual void close() = 0; +}; + +class IFileChunkVisitorProgress +{ +public: + virtual ~IFileChunkVisitorProgress() { } + virtual void updateProgress() = 0; +}; + +class FileRandRead +{ +public: + typedef std::shared_ptr FSP; + virtual ~FileRandRead() { } + virtual FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) = 0; + virtual int64_t getSize(void) = 0; +}; + +class DirectIORandRead : public FileRandRead +{ +public: + DirectIORandRead(const vespalib::string & fileName); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize(void) override; +private: + FastOS_File _file; + size_t _alignment; + size_t _granularity; + size_t _maxChunkSize; +}; + +class MMapRandRead : public FileRandRead +{ +public: + MMapRandRead(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize(void) override; + const void * getMapping() { return _file.MemoryMapPtr(0); } +private: + FastOS_File _file; +}; + +class MMapRandReadDynamic : public FileRandRead +{ +public: + MMapRandReadDynamic(const vespalib::string & fileName, int mmapFlags, int fadviseOptions); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize(void) override; +private: + void reopen(); + vespalib::string _fileName; + vespalib::PtrHolder _holder; + int _mmapFlags; + int _fadviseOptions; +}; + +class NormalRandRead : public FileRandRead +{ +public: + NormalRandRead(const vespalib::string & fileName); + FSP read(size_t offset, vespalib::DataBuffer & buffer, size_t sz) override; + int64_t getSize(void) override; +private: + FastOS_File _file; +}; + +class BucketDensityComputer +{ +public: + BucketDensityComputer(const IBucketizer * bucketizer) : _bucketizer(bucketizer), _count(0) { } + void recordLid(const vespalib::GenerationHandler::Guard & guard, uint32_t lid, uint32_t dataSize) { + if (_bucketizer && (dataSize > 0)) { + _count++; + _bucketSet[_bucketizer->getBucketOf(guard, lid)]++; + } + } + size_t getNumBuckets() const { return _bucketSet.size(); } + vespalib::GenerationHandler::Guard getGuard() const { + return _bucketizer + ? _bucketizer->getGuard() + : vespalib::GenerationHandler::Guard(); + } +private: + const IBucketizer * _bucketizer; + size_t _count; + vespalib::hash_map _bucketSet; +}; + +class FileChunk +{ +public: + class NameId { + public: + explicit NameId(size_t id) : _id(id) { } + uint64_t getId() const { return _id; } + vespalib::string createName(const vespalib::string &baseName) const; + bool operator == (const NameId & rhs) const { return _id == rhs._id; } + bool operator != (const NameId & rhs) const { return _id != rhs._id; } + bool operator < (const NameId & rhs) const { return _id < rhs._id; } + NameId next() const { return NameId(_id + 1); } + static NameId first() { return NameId(0u); } + static NameId last() { return NameId(std::numeric_limits::max()); } + private: + uint64_t _id; + }; + class FileId { + public: + explicit FileId(uint32_t id) : _id(id) { } + uint32_t getId() const { return _id; } + bool operator != (const FileId & rhs) const { return _id != rhs._id; } + bool operator == (const FileId & rhs) const { return _id == rhs._id; } + bool operator < (const FileId & rhs) const { return _id < rhs._id; } + FileId prev() const { return FileId(_id - 1); } + FileId next() const { return FileId(_id + 1); } + bool isActive() const { return _id < 0; } + static FileId first() { return FileId(0u); } + static FileId active() { return FileId(-1); } + private: + int32_t _id; + }; + typedef vespalib::hash_map LidBufferMap; + typedef std::unique_ptr UP; + typedef uint32_t SubChunkId; + FileChunk(FileId fileId, NameId nameId, const vespalib::string & baseName, const TuneFileSummary & tune, const IBucketizer * bucketizer, bool skipCrcOnRead); + virtual ~FileChunk(); + + virtual size_t updateLidMap(ISetLid & lidMap, uint64_t serialNum); + virtual ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const; + virtual void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const; + void remove(uint32_t lid, uint32_t size); + virtual size_t getDiskFootprint() const { return _diskFootprint; } + virtual size_t getMemoryFootprint() const; + virtual size_t getMemoryMetaFootprint() const; + + virtual size_t getDiskHeaderFootprint(void) const { return _dataHeaderLen + _idxHeaderLen; } + size_t getDiskBloat() const { + return (_addedBytes == 0) + ? getDiskFootprint() + : size_t(getDiskFootprint() * double(_erasedBytes)/_addedBytes); + } + double getBucketSpread() const { + return ((_chunkInfo.empty() || (_numUniqueBuckets == 0)) + ? 1.0 + : double(_sumNumBuckets)/_numUniqueBuckets); + } + void addNumBuckets(size_t numBucketsInChunk); + + FileId getFileId() const { return _fileId; } + NameId getNameId() const { return _nameId; } + size_t getBloatCount() const { return _erasedCount; } + uint64_t getLastPersistedSerialNum() const; + virtual fastos::TimeStamp getModificationTime() const; + virtual bool frozen() const { return true; } + const vespalib::string & getName() const { return _name; } + void compact(const IGetLid & iGetLid); + void appendTo(const IGetLid & db, IWriteData & dest, uint32_t numChunks, IFileChunkVisitorProgress *visitorProgress); + /** + * Must be called after chunk has been created to allow correct + * underlying file object to be created. Must be called before + * any read. + */ + void enableRead(); + // This should never be done to something that is used. Backing + // Files are removed and everythings dies. + void erase(); + /** + * This will spinn through the data and verify the content of both + * the '.dat' and the '.idx' files. + * + * @param reportOnly If set inconsitencies will be written to 'stderr'. + */ + void verify(bool reportOnly) const; + + uint32_t getNumChunks() const; + size_t getNumBuckets() const { return _sumNumBuckets; } + size_t getNumUniqueBuckets() const { return _numUniqueBuckets; } + + virtual DataStoreFileChunkStats getStats() const; + + /** + * Read header and return number of bytes it consist of. + */ + static uint64_t readIdxHeader(FastOS_FileInterface &idxFile); + static uint64_t readDataHeader(FileRandRead &idxFile); + static bool isIdxFileEmpty(const vespalib::string & name); + static void eraseIdxFile(const vespalib::string & name); + static vespalib::string createIdxFileName(const vespalib::string & name); + static vespalib::string createDatFileName(const vespalib::string & name); +private: + typedef std::unique_ptr File; + void loadChunkInfo(); + const FileId _fileId; + const NameId _nameId; + const vespalib::string _name; + const bool _skipCrcOnRead; + uint32_t _erasedCount; + size_t _erasedBytes; + size_t _diskFootprint; + size_t _sumNumBuckets; + size_t _numUniqueBuckets; + File _file; +protected: + void setDiskFootprint(size_t sz) { _diskFootprint = sz; } + static size_t adjustSize(size_t sz); + + class ChunkInfo + { + public: + ChunkInfo() : _lastSerial(0), _offset(0), _size(0) { } + ChunkInfo(size_t offset, uint32_t size, uint64_t lastSerial); + size_t getOffset() const { return _offset; } + uint32_t getSize() const { return _size; } + uint64_t getLastSerial() const { return _lastSerial; } + + bool valid() const { return (_offset != 0) || (_size != 0) || (_lastSerial != 0); } + private: + uint64_t _lastSerial; + size_t _offset; + uint32_t _size; + }; + + void setNumUniqueBuckets(size_t numUniqueBuckets) { _numUniqueBuckets = numUniqueBuckets; } + ssize_t read(uint32_t lid, SubChunkId chunkId, const ChunkInfo & chunkInfo, vespalib::DataBuffer & buffer) const; + void read(LidInfoWithLidV::const_iterator begin, size_t count, ChunkInfo ci, IBufferVisitor & visitor) const; + + typedef vespalib::Array ChunkInfoVector; + const IBucketizer * _bucketizer; + size_t _addedBytes; + TuneFileSummary _tune; + vespalib::string _dataFileName; + vespalib::string _idxFileName; + ChunkInfoVector _chunkInfo; + uint32_t _dataHeaderLen; + uint32_t _idxHeaderLen; + uint64_t _lastPersistedSerialNum; + fastos::TimeStamp _modificationTime; +}; + +class SummaryException : public vespalib::IoException +{ +public: + SummaryException(const vespalib::stringref &msg, + FastOS_FileInterface & file, + const vespalib::stringref &location); +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/ibucketizer.h b/searchlib/src/vespa/searchlib/docstore/ibucketizer.h new file mode 100644 index 00000000000..63757f71f45 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/ibucketizer.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +class IBucketizer +{ +public: + typedef std::shared_ptr SP; + virtual ~IBucketizer() { } + virtual uint64_t getBucketOf(const vespalib::GenerationHandler::Guard & guard, uint32_t lid) const = 0; + virtual vespalib::GenerationHandler::Guard getGuard() const = 0; +}; + +class IBufferVisitor { +public: + virtual ~IBufferVisitor() { } + virtual void visit(uint32_t lid, vespalib::ConstBufferRef buffer) = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.cpp b/searchlib/src/vespa/searchlib/docstore/idatastore.cpp new file mode 100644 index 00000000000..e0e788968c1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/idatastore.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "idatastore.h" + +namespace search { + +IDataStore::IDataStore(const vespalib::string& dirName) : + _nextId(0), + _dirName(dirName) +{ +} + +IDataStore::~IDataStore() +{ +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/idatastore.h b/searchlib/src/vespa/searchlib/docstore/idatastore.h new file mode 100644 index 00000000000..0db069d2fa3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/idatastore.h @@ -0,0 +1,187 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include "data_store_file_chunk_stats.h" + +namespace search { + +class IDataStoreVisitor +{ +public: + virtual ~IDataStoreVisitor() { } + virtual void visit(uint32_t lid, const void *buffer, size_t sz) = 0; +}; + +class IDataStoreVisitorProgress +{ +public: + virtual ~IDataStoreVisitorProgress() { } + virtual void updateProgress(double progress) = 0; +}; + +/** + * Simple data storage for byte arrays. + * A small integer key is associated with each byte array; + * a zero-sized array is equivalent to a removed key. + * Changes are held in memory until flush() is called. + * A sync token is associated with each flush(). + **/ +class IDataStore +{ +public: + typedef std::vector LidVector; + /** + * Construct an idata store. + * A data store has a base directory. The rest is up to the implementation. + * + * @param dirName The directory that will contain the data file. + **/ + IDataStore(const vespalib::string & dirName); + + /** + * Allow inhertitance. + **/ + virtual ~IDataStore(); + + /** + * Read data from the data store into a buffer. + * @param lid The local ID associated with the data. + * @param buffer The buffer where the data will be written + * @param len On return is set to the number of bytes written to buffer + * @return true if non-zero-size data was found. + **/ + virtual ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const = 0; + virtual void read(const LidVector & lids, IBufferVisitor & visitor) const = 0; + + /** + * Write data to the data store. + * @param serialNum The official unique reference number for this operation. + * @param lid The local ID associated with the data. + * @param buffer The source where the data will be fetched. + * @param len The number of bytes to fetch from the buffer. + **/ + virtual void write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) = 0; + + /** + * Remove old data for a key. Equivalent to write with len==0. + * @param serialNum The official unique reference number for this operation. + * @param lid The local ID associated with the data. + **/ + virtual void remove(uint64_t serialNum, uint32_t lid) = 0; + + /** + * Flush in-memory data to disk. + **/ + virtual void flush(uint64_t syncToken) = 0; + + /* + * Prepare for flushing in-memory data to disk. + */ + virtual uint64_t initFlush(uint64_t syncToken) = 0; + + /** + * Calculate memory used by this instance. During flush() actual + * memory usage may be approximately twice the reported amount. + * @return memory usage (in bytes) + **/ + virtual size_t memoryUsed() const = 0; + + /** + * Calculates memory that is used for meta data by this instance. Calling + * flush() does not free this memory. + * @return memory usage (in bytes) + **/ + virtual size_t memoryMeta() const = 0; + + /** + * Calculates how much disk is used + * @return disk space used. + */ + virtual size_t getDiskFootprint() const = 0; + + /** + * Calculates how much disk is used by file headers. + * @return disk space used. + */ + virtual size_t getDiskHeaderFootprint() const { return 0u; } + /** + * Calculates how much wasted space there is. + * @return disk bloat. + */ + virtual size_t getDiskBloat() const = 0; + + /** + * Calculates how much diskspace can be compacted during a flush. + * default is to return th ebloat limit, but as some targets have some internal limits + * to avoid misuse we let the report a more conservative number here if necessary. + * @return diskspace to be gained. + */ + virtual size_t getMaxCompactGain() const { return getDiskBloat(); } + + + /** + * The sync token used for the last successful flush() operation, + * or 0 if no flush() has been performed yet. + * @return Last flushed sync token. + **/ + virtual uint64_t lastSyncToken() const = 0; + + /* + * The sync token used for last write operation. + */ + virtual uint64_t tentativeLastSyncToken() const = 0; + + /** + * The time of the last flush operation, + * or 0 if no flush has been performed yet. + * @return Time of last flush. + **/ + virtual fastos::TimeStamp getLastFlushTime() const = 0; + + /** + * Visit all data found in data store. + */ + virtual void accept(IDataStoreVisitor &visitor, IDataStoreVisitorProgress &visitorProgress, bool prune) = 0; + + /** + * Return cost of visiting all data found in data store. + */ + virtual double getVisitCost() const = 0; + + /* + * Return brief stats for data store. + */ + virtual DataStoreStorageStats getStorageStats() const = 0; + + /* + * Return detailed stats about underlying files for data store. + */ + virtual std::vector getFileChunkStats() const = 0; + + /** + * Get the number of entries (including removed IDs + * or gaps in the local ID sequence) in the data store. + * @return The next local ID expected to be used + */ + uint64_t nextId() const { return _nextId; } + + /** + * Returns the name of the base directory where the data file is stored. + **/ + const vespalib::string & getBaseDir() const { return _dirName; } + +protected: + void setNextId(uint64_t id) { _nextId = id; } + +private: + uint64_t _nextId; + vespalib::string _dirName; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp b/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp new file mode 100644 index 00000000000..1042e0dcc12 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.cpp @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "idocumentstore.h" + +namespace search { + +IDocumentStore::IDocumentStore(void) +{ +} + +IDocumentStore::~IDocumentStore() +{ +} + +void IDocumentStore::visit(const LidVector & lids, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const { + for (uint32_t lid : lids) { + visitor.visit(lid, read(lid, repo)); + } +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/idocumentstore.h b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h new file mode 100644 index 00000000000..902cb3e8d4b --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/idocumentstore.h @@ -0,0 +1,203 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { + +class CacheStats; + +class IDocumentStoreReadVisitor +{ +public: + virtual ~IDocumentStoreReadVisitor() { } + virtual void visit(uint32_t lid, const document::Document &doc) = 0; + virtual void visit(uint32_t lid) = 0; +}; + +class IDocumentStoreRewriteVisitor +{ +public: + virtual ~IDocumentStoreRewriteVisitor() { } + virtual void visit(uint32_t lid, document::Document &doc) = 0; +}; + +class IDocumentStoreVisitorProgress +{ +public: + virtual ~IDocumentStoreVisitorProgress() { } + + virtual void updateProgress(double progress) = 0; +}; + +class IDocumentVisitor +{ +public: + virtual ~IDocumentVisitor() { } + virtual void visit(uint32_t lid, document::Document::UP doc) = 0; +private: +}; + +/** + * Simple document store that contains serialized Document instances. + * updates will be held in memory until flush() is called. + * Uses a Local ID as key. + **/ +class IDocumentStore +{ +public: + /** + * Convenience typedef for a shared pointer to this class. + **/ + typedef std::shared_ptr SP; + typedef std::vector LidVector; + + /** + * Construct a document store. + * + * @throws vespalib::IoException if the file is corrupt or other IO problems occur. + * @param docMan The document type manager to use when deserializing. + * @param baseDir The path to a directory where the implementaion specific files will reside. + **/ + IDocumentStore(void); + virtual ~IDocumentStore(); + + /** + * Make a Document from a stored serialized data blob. + * @param lid The local ID associated with the document. + * @return NULL if there is no document associated with the lid. + **/ + virtual document::Document::UP read(DocumentIdT lid, const document::DocumentTypeRepo &repo) const = 0; + virtual void visit(const LidVector & lidVector, const document::DocumentTypeRepo &repo, IDocumentVisitor & visitor) const; + + /** + * Serialize and store a document. + * @param doc The document to store + * @param lid The local ID associated with the document + **/ + virtual void write(uint64_t syncToken, const document::Document& doc, DocumentIdT lid) = 0; + + /** + * Mark a document as removed. A later read() will return NULL for the given lid. + * @param lid The local ID associated with the document + **/ + virtual void remove(uint64_t syncToken, DocumentIdT lid) = 0; + + /** + * Flush all in-memory updates to disk. + **/ + virtual void flush(uint64_t syncToken) = 0; + + virtual uint64_t initFlush(uint64_t synctoken) = 0; + + /** + * If possible compact the disk. + **/ + virtual void compact(uint64_t syncToken) = 0; + + /** + * The sync token used for the last successful flush() operation, + * or 0 if no flush() has been performed yet. + * @return Last flushed sync token. + **/ + virtual uint64_t lastSyncToken() const = 0; + + /* + * The sync token used for last write operation. + */ + virtual uint64_t tentativeLastSyncToken() const = 0; + + /** + * The time of the last flush operation, + * or 0 if no flush has been performed yet. + * @return Time of last flush. + **/ + virtual fastos::TimeStamp getLastFlushTime() const = 0; + + /** + * Get the number of entries (including removed IDs + * or gaps in the local ID sequence) in the document store. + * @return The next local ID expected to be used. + */ + virtual uint64_t nextId() const = 0; + + /** + * Calculate memory used by this instance. During flush() actual + * memory usage may be approximately twice the reported amount. + * @return memory usage (in bytes) + **/ + virtual size_t memoryUsed() const = 0; + + /** + * Calculates memory that is used for meta data by this instance. Calling + * flush() does not free this memory. + * @return memory usage (in bytes) + **/ + virtual size_t memoryMeta() const = 0; + + /** + * Calculates how much disk is used + * @return disk space used. + */ + virtual size_t getDiskFootprint() const = 0; + /** + * Calculates how much wasted space there is. + * @return disk bloat. + */ + virtual size_t getDiskBloat() const = 0; + + /** + * Calculates how much diskspace can be compacted during a flush. + * default is to return th ebloat limit, but as some targets have some internal limits + * to avoid misuse we let the report a more conservative number here if necessary. + * @return diskspace to be gained. + */ + virtual size_t getMaxCompactGain() const { return getDiskBloat(); } + + /** + * Returns statistics about the cache. + */ + virtual CacheStats getCacheStats() const = 0; + + /** + * Returns the base directory from which all structures are stored. + **/ + virtual const vespalib::string & getBaseDir() const = 0; + + /** + * Visit all documents found in document store. + */ + virtual void + accept(IDocumentStoreReadVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) = 0; + + /** + * Visit all documents found in document store. + */ + virtual void + accept(IDocumentStoreRewriteVisitor &visitor, + IDocumentStoreVisitorProgress &visitorProgress, + const document::DocumentTypeRepo &repo) = 0; + + /** + * Return cost of visiting all documents found in document store. + */ + virtual double getVisitCost() const = 0; + + /* + * Return brief stats for data store. + */ + virtual DataStoreStorageStats getStorageStats() const = 0; + + /* + * Return detailed stats about underlying files for data store. + */ + virtual std::vector getFileChunkStats() const = 0; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/liddatastore.h b/searchlib/src/vespa/searchlib/docstore/liddatastore.h new file mode 100644 index 00000000000..1f8472df716 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/liddatastore.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { + +/** + * Factor out stuff common to MultiDataStore and SimpleDatastore + **/ +class LidDataStore : public IDataStore +{ +public: + /** + * Construct an idata store. + * A data store has a base directory. The rest is up to the implementation. + * + * @param dirName The directory that will contain the data file. + **/ + LidDataStore(const vespalib::string & dirName) : IDataStore(dirName), _lastSyncToken(0) { } + + + /** + * The sync token used for the last successful flush() operation, + * or 0 if no flush() has been performed yet. + * @return Last flushed sync token. + **/ + virtual uint64_t lastSyncToken() const { return _lastSyncToken; } + + virtual size_t getDiskBloat() const { return 0; } + + /** + * Flush all in-memory data to disk. + **/ + virtual void flushAll(uint64_t syncToken) { + flush(syncToken); + } + + /** + * Get the number of entries (including removed IDs + * or gaps in the local ID sequence) in the data store. + * @return The next local ID expected to be used + */ +// uint64_t nextId() const { return _nextId; } + + +protected: + void setLastSyncToken(uint64_t last) { _lastSyncToken = last; } +// void setNextId(uint64_t id) { _nextId = id; } + +private: + uint64_t _lastSyncToken; +// uint64_t _nextId; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp new file mode 100644 index 00000000000..080dc71cbf2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.cpp @@ -0,0 +1,1240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "logdatastore.h" +#include +#include +#include +#include +LOG_SETUP(".searchlib.docstore.logdatastore"); +#include +#include +#include + +namespace search +{ + +using vespalib::LockGuard; +using vespalib::getLastErrorString; +using vespalib::getErrorString; +using vespalib::GenerationHandler; +using vespalib::make_string; +using common::FileHeaderContext; +using std::runtime_error; + +LogDataStore::LogDataStore(vespalib::ThreadStackExecutorBase &executor, + const vespalib::string &dirName, + const Config &config, + const GrowStrategy &growStrategy, + const TuneFileSummary &tune, + const FileHeaderContext &fileHeaderContext, + transactionlog::SyncProxy &tlSyncer, + const IBucketizer::SP & bucketizer, + bool readOnly) + : IDataStore(dirName), + _config(config), + _tune(tune), + _fileHeaderContext(fileHeaderContext), + _genHandler(), + _lidInfo(growStrategy.getDocsInitialCapacity(), + growStrategy.getDocsGrowPercent(), + growStrategy.getDocsGrowDelta()), + _fileChunks(), + _holdFileChunks(), + _active(0), + _prevActive(FileId::active()), + _readOnly(readOnly), + _executor(executor), + _initFlushSyncToken(0), + _tlSyncer(tlSyncer), + _bucketizer(bucketizer) +{ + // Reserve space for 1TB summary in order to avoid locking. + _fileChunks.reserve(LidInfo::getMaxFileNum()); + _holdFileChunks.resize(LidInfo::getMaxFileNum()); + + preload(); + updateLidMap(); + updateSerialNum(); +} + +void +LogDataStore::updateSerialNum() +{ + LockGuard guard(_updateLock); + if (getPrevActive(guard) != NULL) { + if (getActive(guard).getSerialNum() < + getPrevActive(guard)->getLastPersistedSerialNum()) { + getActive(guard).setSerialNum(getPrevActive(guard)->getLastPersistedSerialNum()); + } + } +} + +LogDataStore::~LogDataStore() +{ + // Must be called before ending threads as there are sanity checks. + _fileChunks.clear(); + //_executor.shutdown(); + _executor.sync(); + _genHandler.updateFirstUsedGeneration(); + _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration()); +} + +void +LogDataStore::updateLidMap() +{ + uint64_t lastSerialNum(0); + for (FileChunk::UP & fc : _fileChunks) { + fc->updateLidMap(*this, lastSerialNum); + lastSerialNum = fc->getLastPersistedSerialNum(); + } +} + +void +LogDataStore::read(const LidVector & lids, IBufferVisitor & visitor) const +{ + LidInfoWithLidV orderedLids; + GenerationHandler::Guard guard(_genHandler.takeGuard()); + for (uint32_t lid : lids) { + LidInfo li = _lidInfo[lid]; + if (!li.empty() && li.valid()) { + orderedLids.emplace_back(li, lid); + } + } + if (orderedLids.empty()) { return; } + + std::sort(orderedLids.begin(), orderedLids.end()); + uint32_t prevFile = orderedLids[0].getFileId(); + uint32_t start = 0; + for (size_t curr(1); curr < orderedLids.size(); curr++) { + const LidInfoWithLid & li = orderedLids[curr]; + if (prevFile != li.getFileId()) { + const FileChunk & fc(*_fileChunks[prevFile]); + fc.read(orderedLids.begin() + start, curr - start, visitor); + start = curr; + prevFile = li.getFileId(); + } + } + const FileChunk & fc(*_fileChunks[prevFile]); + fc.read(orderedLids.begin() + start, orderedLids.size() - start, visitor); +} + +ssize_t +LogDataStore::read(uint32_t lid, vespalib::DataBuffer& buffer) const +{ + ssize_t sz(0); + if (lid < _lidInfo.size()) { + LidInfo li(0); + { + GenerationHandler::Guard guard(_genHandler.takeGuard()); + li = _lidInfo[lid]; + } + if (!li.empty() && li.valid()) { + const FileChunk & fc(*_fileChunks[li.getFileId()]); + sz = fc.read(lid, li.getChunkId(), buffer); + } + } + return sz; +} + + +void +LogDataStore::write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) +{ + LockGuard guard(_updateLock); + WriteableFileChunk & active = getActive(guard); + write(guard, active, serialNum, lid, buffer, len); +} + +void +LogDataStore::write(LockGuard guard, FileId destinationFileId, uint32_t lid, const void * buffer, size_t len) +{ + WriteableFileChunk & destination = static_cast(*_fileChunks[destinationFileId.getId()]); + write(guard, destination, destination.getSerialNum(), lid, buffer, len); +} + +void +LogDataStore::write(LockGuard guard, WriteableFileChunk & destination, + uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) +{ + LidInfo lm = destination.append(serialNum, lid, buffer, len); + setLid(lid, lm); + if (destination.getFileId() == getActiveFileId(guard)) { + requireSpace(guard, destination); + } +} + +void +LogDataStore::requireSpace(LockGuard guard, WriteableFileChunk & active) +{ + assert(active.getFileId() == getActiveFileId(guard)); + size_t oldSz(active.getDiskFootprint()); + LOG(spam, "Checking file %s size %ld < %ld", + active.getName().c_str(), oldSz, _config.getMaxFileSize()); + if (oldSz > _config.getMaxFileSize()) { + FileId fileId = allocateFileId(guard); + _fileChunks[fileId.getId()] = createWritableFile(fileId, active.getSerialNum()); + setActive(guard, fileId); + std::unique_ptr activeHolder = holdFileChunk(active.getFileId()); + guard.unlock(); + // Write chunks to old .dat file + // Note: Feed latency spike + active.flush(true, active.getSerialNum()); + // Sync transaction log + _tlSyncer.sync(active.getSerialNum()); + // sync old active .dat file, write pending chunks to old .idx file + // and sync old .idx file to disk. + active.flushPendingChunks(active.getSerialNum()); + active.freeze(); + // TODO: Delay create of new file + LOG(debug, "Closed file %s of size %ld due to maxsize of %ld reached. Bloat is %ld", + active.getName().c_str(), active.getDiskFootprint(), + _config.getMaxFileSize(), active.getDiskBloat()); + } +} + +uint64_t +LogDataStore::lastSyncToken() const +{ + LockGuard guard(_updateLock); + uint64_t lastSerial(getActive(guard).getLastPersistedSerialNum()); + if (lastSerial == 0) { + const FileChunk * prev = getPrevActive(guard); + if (prev != NULL) { + lastSerial = prev->getLastPersistedSerialNum(); + } + } + return lastSerial; +} + +uint64_t +LogDataStore::tentativeLastSyncToken() const +{ + LockGuard guard(_updateLock); + return getActive(guard).getSerialNum(); +} + +fastos::TimeStamp +LogDataStore::getLastFlushTime() const +{ + if (lastSyncToken() == 0) { + return fastos::TimeStamp(); + } + LockGuard guard(_updateLock); + fastos::TimeStamp timeStamp(getActive(guard).getModificationTime()); + if (timeStamp == 0) { + const FileChunk * prev = getPrevActive(guard); + if (prev != nullptr) { + timeStamp = prev->getModificationTime(); + } + } + return timeStamp; +} + +void +LogDataStore::remove(uint64_t serialNum, uint32_t lid) +{ + LockGuard guard(_updateLock); + if (lid < _lidInfo.size()) { + LidInfo lm = _lidInfo[lid]; + if (lm.valid()) { + _fileChunks[lm.getFileId()]->remove(lid, lm.size()); + } + lm = getActive(guard).append(serialNum, lid, NULL, 0); + assert( lm.empty() ); + _lidInfo[lid] = lm; + } +} + +namespace { + +vespalib::string bloatMsg(size_t bloat, size_t usage) { + return make_string("Disk bloat is now at %ld of %ld at %2.2f percent", bloat, usage, (bloat*100.0)/usage); +} + +} + +void +LogDataStore::compact(uint64_t syncToken) +{ + uint64_t usage = getDiskFootprint(); + uint64_t bloat = getDiskBloat(); + LOG(debug, "%s", bloatMsg(bloat, usage).c_str()); + if ((_fileChunks.size() > 1) && + ( isBucketSpreadTooLarge(getMaxBucketSpread()) || + isBloatOverLimit(bloat, usage))) + { + LOG(info, "%s. Will compact", bloatMsg(bloat, usage).c_str()); + compactWorst(); + usage = getDiskFootprint(); + bloat = getDiskBloat(); + LOG(info, "Done compacting. %s", bloatMsg(bloat, usage).c_str()); + } + + flushActiveAndWait(syncToken); +} + +size_t +LogDataStore::getMaxCompactGain() const +{ + const size_t diskFootPrint = getDiskFootprint(); + const size_t maxConfiguredDiskBloat = diskFootPrint * _config.getMaxDiskBloatFactor(); + double maxSpread = getMaxBucketSpread(); + size_t bloat = getDiskBloat(); + if (bloat < maxConfiguredDiskBloat) { + bloat = 0; + } + size_t spreadAsBloat = diskFootPrint * (1.0 - 1.0/maxSpread); + if ( ! isBucketSpreadTooLarge(maxSpread)) { + spreadAsBloat = 0; + } + return (bloat + spreadAsBloat); +} + +void +LogDataStore::flush(uint64_t syncToken) +{ + WriteableFileChunk * active = NULL; + std::unique_ptr activeHolder; + assert(syncToken == _initFlushSyncToken); + { + LockGuard guard(_updateLock); + // Note: Feed latency spike + getActive(guard).flush(true, syncToken); + active = &getActive(guard); + activeHolder = holdFileChunk(active->getFileId()); + } + active->flushPendingChunks(syncToken); + activeHolder.reset(); + LOG(info, "Flushing. %s",bloatMsg(getDiskBloat(), getDiskFootprint()).c_str()); +} + + +uint64_t +LogDataStore::initFlush(uint64_t syncToken) +{ + assert(syncToken >= _initFlushSyncToken); + syncToken = flushActive(syncToken); + _initFlushSyncToken = syncToken; + return syncToken; +} + +class Compacter : public IWriteData +{ +public: + Compacter(LogDataStore & ds) + : _ds(ds) + { + } + void + write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override { + (void) chunkId; + FileChunk::FileId fileId= _ds.getActiveFileId(guard); + _ds.write(guard, fileId, lid, buffer, sz); + } + void close() override { } +private: + LogDataStore & _ds; +}; + +typedef std::unique_ptr BufferUP; +class StoreByBucket +{ +public: + StoreByBucket(); + class IWrite { + public: + virtual ~IWrite() { } + virtual void write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) = 0; + }; + void add(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz); + void drain(IWrite & drain); + size_t getChunkCount() const { return _chunks.size(); } + size_t getBucketCount() const { return _where.size(); } + size_t getLidCount() const { + size_t lidCount(0); + for (const auto & it : _where) { + lidCount += it.second.size(); + } + return lidCount; + } +private: + void closeCurrent(); + void createCurrent(); + struct Index { + Index(uint32_t id, uint32_t chunkId, uint32_t entry) : _id(id), _chunkId(chunkId), _lid(entry) { } + uint32_t _id; + uint32_t _chunkId; + uint32_t _lid; + }; + std::vector _chunks; + Chunk::UP _current; + std::map> _where; +}; + +StoreByBucket::StoreByBucket() : + _chunks(), + _current(), + _where() +{ + createCurrent(); +} + +void +StoreByBucket::add(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) +{ + if ( ! _current->hasRoom(sz)) { + closeCurrent(); + createCurrent(); + } + Index idx(_chunks.size(), chunkId, lid); + _current->append(lid, buffer, sz); + _where[bucketId].push_back(idx); +} + +void StoreByBucket::createCurrent() +{ + _current.reset(new Chunk(_chunks.size(), Chunk::Config(0x10000, 1000))); +} + +void +StoreByBucket::closeCurrent() +{ + BufferUP buffer(new vespalib::DataBuffer()); + document::CompressionConfig lz4(document::CompressionConfig::LZ4); + _current->pack(1, *buffer, lz4); + buffer->shrink(buffer->getDataLen()); + _chunks.push_back(std::move(buffer)); + _current.reset(); +} + +void +StoreByBucket::drain(IWrite & drainer) +{ + closeCurrent(); + std::vector chunks; + for (BufferUP & buffer : _chunks) { + chunks.push_back(Chunk::UP(new Chunk(chunks.size(), buffer->getData(), buffer->getDataLen()))); + buffer.reset(); + } + _chunks.clear(); + for (const auto & it : _where) { + for (Index idx : it.second) { + vespalib::ConstBufferRef data(chunks[idx._id]->getLid(idx._lid)); + drainer.write(it.first, idx._chunkId, idx._lid, data.c_str(), data.size()); + } + } +} + + +class BucketCompacter : public IWriteData, public StoreByBucket::IWrite +{ +public: + using FileId = FileChunk::FileId; + BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination); + void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override ; + void write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override; + void close() override; +private: + FileId getDestinationId(const LockGuard & guard) const { + return (_destinationFileId.isActive()) ? _ds.getActiveFileId(guard) : _destinationFileId; + } + FileId _sourceFileId; + FileId _destinationFileId; + LogDataStore & _ds; + const IBucketizer & _bucketizer; + std::vector _tmpStore; + GenerationHandler::Guard _lidGuard; + GenerationHandler::Guard _bucketizerGuard; + vespalib::hash_map _stat; +}; + +BucketCompacter::BucketCompacter(LogDataStore & ds, const IBucketizer & bucketizer, FileId source, FileId destination) : + _sourceFileId(source), + _destinationFileId(destination), + _ds(ds), + _bucketizer(bucketizer), + _tmpStore(256), + _lidGuard(ds.getLidReadGuard()), + _bucketizerGuard(bucketizer.getGuard()), + _stat() +{ +} + +void +BucketCompacter::write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) +{ + guard.unlock(); + uint64_t bucketId = (sz > 0) ? _bucketizer.getBucketOf(_bucketizerGuard, lid) : 0; + uint32_t hash = XXH32(&bucketId, sizeof(bucketId), 0); + _tmpStore[hash%_tmpStore.size()].add(bucketId, chunkId, lid, buffer, sz); +} + +void +BucketCompacter::close() +{ + size_t lidCount1(0); + size_t bucketCount(0); + size_t chunkCount(0); + for (const StoreByBucket & store : _tmpStore) { + lidCount1 += store.getLidCount(); + bucketCount += store.getBucketCount(); + chunkCount += store.getChunkCount(); + } + LOG(info, "Have read %ld lids and placed them in %ld buckets. Temporary compressed in %ld chunks.", + lidCount1, bucketCount, chunkCount); + + for (StoreByBucket & store : _tmpStore) { + store.drain(*this); + } + + size_t lidCount(0); + for (const auto & it : _stat) { + lidCount += it.second; + } + LOG(info, "Compacted %ld lids into %ld buckets", lidCount, _stat.size()); +} + +void +BucketCompacter::write(uint64_t bucketId, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) +{ + _stat[bucketId]++; + LockGuard guard(_ds.getLidGuard(lid)); + LidInfo lidInfo(_sourceFileId.getId(), chunkId, sz); + if (_ds.getLid(_lidGuard, lid) == lidInfo) { + FileId fileId = getDestinationId(guard); + _ds.write(guard, fileId, lid, buffer, sz); + } +} + +double +LogDataStore::getMaxBucketSpread() const +{ + double maxSpread(1.0); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + if (_bucketizer && fc->frozen()) { + maxSpread = std::max(maxSpread, fc->getBucketSpread()); + } + } + } + return maxSpread; +} + +std::pair +LogDataStore::findNextToCompact() +{ + typedef std::multimap> CostMap; + CostMap worstBloat; + CostMap worstSpread; + LockGuard guard(_updateLock); + for (size_t i(0); i < _fileChunks.size(); i++) { + const FileChunk::UP & fc(_fileChunks[i]); + if (fc && fc->frozen() && (_currentlyCompacting.find(fc->getNameId()) == _currentlyCompacting.end())) { + uint64_t usage = fc->getDiskFootprint(); + uint64_t bloat = fc->getDiskBloat(); + if (_bucketizer) { + worstSpread.emplace(fc->getBucketSpread(), FileId(i)); + } + if (usage > 0) { + double tmp(double(bloat)/usage); + worstBloat.emplace(tmp, FileId(i)); + } + } + } + if (LOG_WOULD_LOG(debug)) { + for (const auto & it : worstBloat) { + const FileChunk & fc = *_fileChunks[it.second.getId()]; + LOG(debug, "File '%s' has bloat '%2.2f' and bucket-spread '%1.4f numChunks=%d , numBuckets=%ld, numUniqueBuckets=%ld", + fc.getName().c_str(), it.first * 100, fc.getBucketSpread(), fc.getNumChunks(), fc.getNumBuckets(), fc.getNumUniqueBuckets()); + } + } + std::pair retval(false, FileId(-1)); + if ( ! worstBloat.empty() && (worstBloat.begin()->first > _config.getMaxDiskBloatFactor())) { + retval.first = true; + retval.second = worstBloat.begin()->second; + } else if ( ! worstSpread.empty() && (worstSpread.begin()->first > _config.getMaxBucketSpread())) { + retval.first = true; + retval.second = worstSpread.begin()->second; + } + if (retval.first) { + _currentlyCompacting.insert(_fileChunks[retval.second.getId()]->getNameId()); + } + return retval; +} + +void +LogDataStore::compactWorst() { + auto worst = findNextToCompact(); + if (worst.first) { + compactFile(worst.second); + } +} + +SerialNum LogDataStore::flushFile(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken) { + (void) guard; + uint64_t lastSerial(file.getSerialNum()); + if (lastSerial > syncToken) { + syncToken = lastSerial; + } + file.flush(false, syncToken); + return syncToken; +} + +void LogDataStore::flushFileAndWait(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken) { + syncToken = flushFile(guard, file, syncToken); + file.waitForDiskToCatchUpToNow(); + _tlSyncer.sync(syncToken); + file.flushPendingChunks(syncToken); +} + +SerialNum LogDataStore::flushActive(SerialNum syncToken) { + LockGuard guard(_updateLock); + WriteableFileChunk &active = getActive(guard); + return flushFile(guard, active, syncToken); +} + +void LogDataStore::flushActiveAndWait(SerialNum syncToken) { + LockGuard guard(_updateLock); + WriteableFileChunk &active = getActive(guard); + return flushFileAndWait(guard, active, syncToken); +} + +bool LogDataStore::shouldCompactToActiveFile(size_t compactedSize) const { + return _config.compact2ActiveFile() + || (_config.getMinFileSizeFactor() * _config.getMaxFileSize() > compactedSize); +} + +void LogDataStore::compactFile(FileId fileId) +{ + FileChunk::UP & fc(_fileChunks[fileId.getId()]); + NameId compactedNameId = fc->getNameId(); + LOG(info, "Compacting file '%s' which has bloat '%2.2f' and bucket-spread '%1.4f", + fc->getName().c_str(), 100*fc->getDiskBloat()/double(fc->getDiskFootprint()), fc->getBucketSpread()); + IWriteData::UP compacter; + FileId destinationFileId = FileId::active(); + if (_bucketizer) { + if ( ! shouldCompactToActiveFile(fc->getDiskFootprint() - fc->getDiskBloat())) { + destinationFileId = allocateFileId(); + FileChunk::UP destination = createWritableFile(destinationFileId, fc->getLastPersistedSerialNum(), + fc->getNameId().next()); + _fileChunks[destination->getFileId().getId()] = std::move(destination); + } + + compacter.reset(new BucketCompacter(*this, *_bucketizer, fc->getFileId(), destinationFileId)); + } else { + compacter.reset(new Compacter(*this)); + } + + fc->appendTo(*this, *compacter, fc->getNumChunks(), nullptr); + + if (destinationFileId.isActive()) { + flushActiveAndWait(0); + } else { + LockGuard guard(_updateLock); + WriteableFileChunk & compactTo = dynamic_cast(*_fileChunks[destinationFileId.getId()]); + flushFileAndWait(guard, compactTo, 0); + compactTo.freeze(); + } + + FastOS_Thread::Sleep(10 * 1000); + FileChunk::UP toDie; + for (;;) { + LockGuard guard(_updateLock); + if (_holdFileChunks[fc->getFileId().getId()] == 0u) { + toDie = std::move(fc); + break; + } + guard.unlock(); + /* + * Wait for requireSpace() and flush() methods to leave chunk + * alone. + */ + FastOS_Thread::Sleep(1000); + } + toDie->erase(); + LockGuard guard(_updateLock); + _currentlyCompacting.erase(compactedNameId); +} + +size_t +LogDataStore::memoryUsed() const +{ + size_t sz(memoryMeta()); + { + LockGuard guard(_updateLock); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + sz += fc->getMemoryFootprint(); + } + } + } + return sz; +} + +size_t +LogDataStore::memoryMeta() const +{ + LockGuard guard(_updateLock); + size_t sz(_lidInfo.getMemoryUsage().allocatedBytes()); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + sz += fc->getMemoryMetaFootprint(); + } + } + return sz; +} + +FileChunk::FileId +LogDataStore::allocateFileId() +{ + LockGuard guard(_updateLock); + return allocateFileId(guard); +} +FileChunk::FileId +LogDataStore::allocateFileId(const LockGuard & guard) +{ + (void) guard; + for (size_t i(0); i < _fileChunks.size(); i++) { + if (_fileChunks[i].get() == nullptr) { + return FileId(i); + } + } + // This assert is verify that we have not gotten ourselves into a mess + // that would require the use of locks to prevent. Just assure that the + // below resize is 'safe'. + assert(_fileChunks.capacity() > _fileChunks.size()); + _fileChunks.resize(_fileChunks.size()+1); + return FileId(_fileChunks.size() - 1); +} + +size_t +LogDataStore::getDiskFootprint() const +{ + LockGuard guard(_updateLock); + size_t sz(0); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + sz += fc->getDiskFootprint(); + } + } + return sz; +} + + +size_t +LogDataStore::getDiskHeaderFootprint(void) const +{ + LockGuard guard(_updateLock); + size_t sz(0); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + sz += fc->getDiskHeaderFootprint(); + } + } + return sz; +} + + +size_t +LogDataStore::getDiskBloat() const +{ + LockGuard guard(_updateLock); + size_t sz(0); + for (FileId i(0); i < FileId(_fileChunks.size()); i = i.next()) { + /// Do not count the holes in the last file as bloat + if (i != _active) { + const FileChunk * chunk = _fileChunks[i.getId()].get(); + if (chunk != NULL) { + sz += chunk->getDiskBloat(); + } + } + } + return sz; +} + +vespalib::string +LogDataStore::createFileName(NameId id) const +{ + return id.createName(getBaseDir()); +} +vespalib::string +LogDataStore::createDatFileName(NameId id) const +{ + return FileChunk::createDatFileName(id.createName(getBaseDir())); +} + +vespalib::string +LogDataStore::createIdxFileName(NameId id) const +{ + return FileChunk::createIdxFileName(id.createName(getBaseDir())); +} + +FileChunk::UP +LogDataStore::createReadOnlyFile(FileId fileId, NameId nameId) +{ + FileChunk::UP file(new FileChunk(fileId, nameId, getBaseDir(), _tune, + _bucketizer.get(), _config.crcOnReadDisabled())); + file->enableRead(); + return file; +} + +FileChunk::UP +LogDataStore::createWritableFile(FileId fileId, SerialNum serialNum, NameId nameId) +{ + for (const auto & fc : _fileChunks) { + if (fc && (fc->getNameId() == nameId)) { + LOG(error, "We already have a file registered with internal fileId=%u, and external nameId=%ld", + fileId.getId(), nameId.getId()); + return FileChunk::UP(); + } + } + FileChunk::UP file(new WriteableFileChunk(_executor, fileId, nameId, getBaseDir(), + serialNum, _config.getFileConfig(), _tune, _fileHeaderContext, + _bucketizer.get(), _config.crcOnReadDisabled())); + file->enableRead(); + return file; +} + +FileChunk::UP +LogDataStore::createWritableFile(FileId fileId, SerialNum serialNum) +{ + return createWritableFile(fileId, serialNum, NameId(fastos::ClockSystem::now())); +} + +namespace { + +vespalib::string +lsSingleFile(const vespalib::string & fileName) +{ + vespalib::string s; + FastOS_StatInfo stat; + if ( FastOS_File::Stat(fileName.c_str(), &stat)) { + s += make_string("%s %20ld %12ld", fileName.c_str(), stat._modifiedTimeNS, stat._size); + } else { + s = make_string("%s 'stat' FAILED !!", fileName.c_str()); + } + return s; +} + +} + +vespalib::string LogDataStore::ls(const NameIdSet & partList) +{ + vespalib::string s; + for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) { + s += lsSingleFile(createDatFileName(*it)); + s += "\n"; + s += lsSingleFile(createIdxFileName(*it)); + } + return s; +} + + +static bool +hasNonHeaderData(const vespalib::string &name) +{ + FastOS_File file(name.c_str()); + if (!file.OpenReadOnly()) + return false; + int64_t fSize(file.GetSize()); + uint32_t headerLen = 0; + uint32_t minHeaderLen = vespalib::GenericHeader::getMinSize(); + if (fSize < minHeaderLen) + return false; + try { + vespalib::FileHeader h; + headerLen = h.readFile(file); + } catch (vespalib::IllegalHeaderException &e) { + file.SetPosition(0); + try { + vespalib::FileHeader::FileReader fr(file); + uint32_t header2Len = vespalib::FileHeader::readSize(fr); + if (header2Len <= fSize) { + e.throwSelf(); // header not truncated + } + } catch (vespalib::IllegalHeaderException &e2) { + } + return false; + } + return fSize > headerLen; +} + + +void +LogDataStore::verifyModificationTime(const NameIdSet & partList) +{ + FastOS_StatInfo prevDatStat; + FastOS_StatInfo prevIdxStat; + NameId nameId(*partList.begin()); + vespalib::string datName(createDatFileName(nameId)); + vespalib::string idxName(createIdxFileName(nameId)); + if ( ! FastOS_File::Stat(datName.c_str(), &prevDatStat)) { + throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", datName.c_str(), ls(partList).c_str())); + } + if ( ! FastOS_File::Stat(idxName.c_str(), &prevIdxStat)) { + throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", idxName.c_str(), ls(partList).c_str())); + } + for (auto it(++partList.begin()), mt(partList.end()); it != mt; ++it) { + vespalib::string prevDatNam(datName); + vespalib::string prevIdxNam(idxName); + FastOS_StatInfo datStat; + FastOS_StatInfo idxStat; + nameId = *it; + datName = createDatFileName(nameId); + idxName = createIdxFileName(nameId); + if ( ! FastOS_File::Stat(datName.c_str(), &datStat)) { + throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", datName.c_str(), ls(partList).c_str())); + } + if ( ! FastOS_File::Stat(idxName.c_str(), &idxStat)) { + throw runtime_error(make_string("Failed to Stat '%s'\nDirectory =\n%s", idxName.c_str(), ls(partList).c_str())); + } + ns_log::Logger::LogLevel logLevel = _config.compact2ActiveFile() + ? ns_log::Logger::warning + : ns_log::Logger::debug; + if ((datStat._modifiedTimeNS < prevDatStat._modifiedTimeNS) && hasNonHeaderData(datName)) { + VLOG(logLevel, "Older file '%s' is newer (%ld) than file '%s' (%ld)\nDirectory =\n%s", + prevDatNam.c_str(), prevDatStat._modifiedTimeNS, + datName.c_str(), datStat._modifiedTimeNS, + ls(partList).c_str()); + } + if ((idxStat._modifiedTimeNS < prevIdxStat._modifiedTimeNS) && hasNonHeaderData(idxName)) { + VLOG(logLevel, "Older file '%s' is newer (%ld) than file '%s' (%ld)\nDirectory =\n%s", + prevIdxNam.c_str(), prevIdxStat._modifiedTimeNS, + idxName.c_str(), idxStat._modifiedTimeNS, + ls(partList).c_str()); + } + prevDatStat = datStat; + prevIdxStat = idxStat; + } +} + +void +LogDataStore::preload() +{ + // scan directory + NameIdSet partList = scanDir(getBaseDir(), ".idx"); + NameIdSet datPartList = scanDir(getBaseDir(), ".dat"); + + partList = eraseEmptyIdxFiles(partList); + eraseDanglingDatFiles(partList, datPartList); + + if (!partList.empty()) { + verifyModificationTime(partList); + partList = scanDir(getBaseDir(), ".idx"); + typedef NameIdSet::const_iterator It; + for (It it(partList.begin()), mt(--partList.end()); it != mt; it++) { + _fileChunks.push_back(createReadOnlyFile(FileId(_fileChunks.size()), *it)); + } + _fileChunks.push_back(isReadOnly() + ? createReadOnlyFile(FileId(_fileChunks.size()), *partList.rbegin()) + : createWritableFile(FileId(_fileChunks.size()), getMinLastPersistedSerialNum(), *partList.rbegin())); + } else { + _fileChunks.push_back(createWritableFile(FileId::first(), 0)); + } + _active = FileId(_fileChunks.size() - 1); + _prevActive = _active.prev(); +} + + +LogDataStore::NameIdSet +LogDataStore::eraseEmptyIdxFiles(const NameIdSet &partList) +{ + NameIdSet nonEmptyIdxPartList; + for (const auto & part : partList) { + vespalib::string name(createFileName(part)); + if (FileChunk::isIdxFileEmpty(name)) { + LOG(warning, "We detected an empty idx file for part '%s'. Erasing it.", name.c_str()); + FileChunk::eraseIdxFile(name); + } else { + nonEmptyIdxPartList.insert(part); + } + } + return nonEmptyIdxPartList; +} + +void +LogDataStore::eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList) +{ + typedef NameIdSet::const_iterator IT; + + IT iib(partList.begin()); + IT ii(iib); + IT iie(partList.end()); + IT dib(datPartList.begin()); + IT di(dib); + IT die(datPartList.end()); + IT dirb(die); + NameId endMarker(NameId::last()); + + if (dirb != dib) { + --dirb; + } + for (;;) { + if (ii == iie && di == die) { + break; + } + NameId ibase(ii == iie ? endMarker : *ii); + NameId dbase(di == die ? endMarker : *di); + if (ibase < dbase) { + vespalib::string name(createFileName(ibase)); + const char *s = name.c_str(); + throw runtime_error(make_string( "Missing file '%s.dat', found '%s.idx'", s, s)); + } else if (dbase < ibase) { + vespalib::string name(createDatFileName(dbase)); + const char *s = name.c_str(); + LOG(warning, "Removing dangling file '%s'", s); + if (!FastOS_File::Delete(s)) { + vespalib::string e = getErrorString(errno); + throw runtime_error(make_string("Error erasing dangling file '%s'. Error is '%s'", s, e.c_str())); + } + ++di; + } else { + ++ii; + ++di; + } + } +} + +LogDataStore::NameIdSet +LogDataStore::scanDir(const vespalib::string &dir, const vespalib::string &suffix) +{ + NameIdSet baseFiles; + FastOS_DirectoryScan dirScan(dir.c_str()); + while (dirScan.ReadNext()) { + if (dirScan.IsRegular()) { + vespalib::stringref file(dirScan.GetName()); + if (file.size() > suffix.size() && + file.find(suffix.c_str()) == file.size() - suffix.size()) { + vespalib::string base(file.substr(0, file.find(suffix.c_str()))); + char *err(NULL); + errno = 0; + NameId baseId(strtoul(base.c_str(), &err, 10)); + if ((errno == 0) && (err[0] == '\0')) { + vespalib::string tmpFull = createFileName(baseId); + vespalib::string tmp = tmpFull.substr(tmpFull.rfind('/') + 1); + assert(tmp == base); + baseFiles.insert(baseId); + } else { + throw runtime_error(make_string("Error converting '%s' to a unsigned integer number. Error occurred at '%s'. Error is '%s'", + base.c_str(), err, getLastErrorString().c_str())); + } + } else { + LOG(debug, "Skipping '%s' since it does not end with '%s'", file.c_str(), suffix.c_str()); + } + } + } + return baseFiles; +} + +void +LogDataStore::setLid(uint32_t lid, const LidInfo & meta) +{ + if (lid < _lidInfo.size()) { + _genHandler.updateFirstUsedGeneration(); + _lidInfo.removeOldGenerations(_genHandler.getFirstUsedGeneration()); + const LidInfo & prev = _lidInfo[lid]; + if (prev.valid()) { + _fileChunks[prev.getFileId()]->remove(lid, prev.size()); + } + } else { + while (lid >= _lidInfo.size()) { + _lidInfo.push_back(LidInfo()); + } + _lidInfo.setGeneration(_genHandler.getNextGeneration()); + _genHandler.incGeneration(); + setNextId(_lidInfo.size()); + } + _lidInfo[lid] = meta; +} + +void +LogDataStore::verify(bool reportOnly) const +{ + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + fc->verify(reportOnly); + } + } +} + +class LogDataStore::WrapVisitor : public IWriteData +{ + IDataStoreVisitor &_visitor; + +public: + void write(LockGuard guard, uint32_t chunkId, uint32_t lid, const void *buffer, size_t sz) override { + (void) chunkId; + guard.unlock(); + _visitor.visit(lid, buffer, sz); + } + + WrapVisitor(IDataStoreVisitor &visitor) : _visitor(visitor) { } + void close() override { } +}; + + +class LogDataStore::WrapVisitorProgress : public IFileChunkVisitorProgress +{ + IDataStoreVisitorProgress &_progress; + const uint32_t _totalChunks; + uint32_t _processedChunks; + +public: + virtual + ~WrapVisitorProgress() + { + } + + virtual void + updateProgress() + { + ++_processedChunks; + if (_totalChunks != 0) { + double progress = std::min(static_cast(_processedChunks) / + static_cast(_totalChunks), + 1.0); + _progress.updateProgress(progress); + } + }; + + WrapVisitorProgress(IDataStoreVisitorProgress &progress, + uint32_t totalChunks) + : _progress(progress), + _totalChunks(totalChunks), + _processedChunks(0u) + { + if (totalChunks == 0) { + progress.updateProgress(1.0); + } + } +}; + + +void +LogDataStore::internalFlushAll() +{ + uint64_t flushToken(initFlush(tentativeLastSyncToken())); + _tlSyncer.sync(flushToken); + flush(flushToken); +} + + +void +LogDataStore::accept(IDataStoreVisitor &visitor, + IDataStoreVisitorProgress &visitorProgress, + bool prune) +{ + WrapVisitor wrap(visitor); + internalFlushAll(); + FileIdxVector fileChunks; + fileChunks.reserve(_fileChunks.size()); + for (auto &fc : _fileChunks) { + if (fc && (fc->getFileId() != _active)) { + fileChunks.push_back(fc->getFileId()); + } + } + FileChunk & lfc = *_fileChunks[_active.getId()]; + + uint32_t totalChunks = 0; + for (auto &fc : fileChunks) { + totalChunks += _fileChunks[fc.getId()]->getNumChunks(); + } + uint32_t lastChunks = lfc.getNumChunks(); + totalChunks += lastChunks; + WrapVisitorProgress wrapProgress(visitorProgress, totalChunks); + for (FileId fcId : fileChunks) { + FileChunk & fc = *_fileChunks[fcId.getId()]; + fc.appendTo(*this, wrap, fc.getNumChunks(), &wrapProgress); + if (prune) { + internalFlushAll(); + FileChunk::UP toDie; + { + LockGuard guard(_updateLock); + toDie = std::move(_fileChunks[fcId.getId()]); + } + toDie->erase(); + } + } + lfc.appendTo(*this, wrap, lastChunks, &wrapProgress); + if (prune) { + internalFlushAll(); + } +} + + +double +LogDataStore::getVisitCost() const +{ + uint32_t totalChunks = 0; + for (auto &fc : _fileChunks) { + totalChunks += fc->getNumChunks(); + } + return totalChunks; +} + + +class LogDataStore::FileChunkHolder +{ +private: + LogDataStore &_store; + FileId _fileId; +public: + FileChunkHolder(LogDataStore &store, FileId fileId) : _store(store), _fileId(fileId) { } + ~FileChunkHolder() { _store.unholdFileChunk(_fileId); } +}; + +std::unique_ptr +LogDataStore::holdFileChunk(FileId fileId) +{ + assert(fileId.getId() < _holdFileChunks.size()); + assert(_holdFileChunks[fileId.getId()] < 2000u); + ++_holdFileChunks[fileId.getId()]; + return std::unique_ptr(new FileChunkHolder(*this, fileId)); +} + + +void +LogDataStore::unholdFileChunk(FileId fileId) +{ + LockGuard guard(_updateLock); + assert(fileId.getId() < _holdFileChunks.size()); + assert(_holdFileChunks[fileId.getId()] > 0u); + --_holdFileChunks[fileId.getId()]; + // No signalling, compactWorst() sleeps and retries +} + + +DataStoreStorageStats +LogDataStore::getStorageStats() const +{ + uint64_t diskFootprint = getDiskFootprint(); + uint64_t diskBloat = getDiskBloat(); + double maxBucketSpread = getMaxBucketSpread(); + // Note: Naming consistency issue + SerialNum lastSerialNum = tentativeLastSyncToken(); + SerialNum lastFlushedSerialNum = lastSyncToken(); + return DataStoreStorageStats(diskFootprint, diskBloat, maxBucketSpread, + lastSerialNum, lastFlushedSerialNum); +} + + +std::vector +LogDataStore::getFileChunkStats() const +{ + std::vector result; + { + LockGuard guard(_updateLock); + for (const FileChunk::UP & fc : _fileChunks) { + if (fc) { + result.push_back(fc->getStats()); + } + } + } + std::sort(result.begin(), result.end()); + return std::move(result); +} + + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/logdatastore.h b/searchlib/src/vespa/searchlib/docstore/logdatastore.h new file mode 100644 index 00000000000..53e5d5a5b69 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/logdatastore.h @@ -0,0 +1,304 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + + +/** + * Simple data storage for byte arrays. + * A small integer key is associated with each byte array; + * a zero-sized array is equivalent to a removed key. + * Changes are held in memory until flush() is called. + * A sync token is associated with each flush(). + **/ +class LogDataStore : public IDataStore, public ISetLid, public IGetLid +{ +private: + using NameId = FileChunk::NameId; + using FileId = FileChunk::FileId; +public: + typedef vespalib::LockGuard LockGuard; + class Config { + public: + Config() + : _maxFileSize(1000000000ul), + _maxDiskBloatFactor(0.2), + _maxBucketSpread(2.5), + _minFileSizeFactor(0.2), + _numThreads(8), + _skipCrcOnRead(false), + _compactToActiveFile(true) + { } + + Config(size_t maxFileSize, + double maxDiskBloatFactor, + double maxBucketSpread, + double minFileSizeFactor, + size_t numThreads, + bool compactToActiveFile, + const WriteableFileChunk::Config & fileConfig) + : _maxFileSize(maxFileSize), + _maxDiskBloatFactor(maxDiskBloatFactor), + _maxBucketSpread(maxBucketSpread), + _minFileSizeFactor(minFileSizeFactor), + _numThreads(numThreads), + _skipCrcOnRead(false), + _compactToActiveFile(compactToActiveFile), + _fileConfig(fileConfig) + { } + + size_t getMaxFileSize() const { return _maxFileSize; } + double getMaxDiskBloatFactor() const { return _maxDiskBloatFactor; } + double getMaxBucketSpread() const { return _maxBucketSpread; } + double getMinFileSizeFactor() const { return _minFileSizeFactor; } + + size_t getNumThreads() const { return _numThreads; } + bool crcOnReadDisabled() const { return _skipCrcOnRead; } + void disableCrcOnRead(bool v) { _skipCrcOnRead = v; } + bool compact2ActiveFile() const { return _compactToActiveFile; } + + const WriteableFileChunk::Config & getFileConfig() const { return _fileConfig; } + private: + size_t _maxFileSize; + double _maxDiskBloatFactor; + double _maxBucketSpread; + double _minFileSizeFactor; + size_t _numThreads; + bool _skipCrcOnRead; + bool _compactToActiveFile; + WriteableFileChunk::Config _fileConfig; + }; +public: + /** + * Construct a log based data store. + * All files are stored in base directory. + * + * @param dirName The directory that will contain the data file. + * @param fileHeaderContext The file header context used to populate + * the generic file header with extra tags. + * The caller must keep it alive for the semantic + * lifetime of the log data store. + * @param tlSyncer Helper to sync transaction log to avoid + * it being behind the document store after a + * crash. + * The caller must keep it alive for the semantic + * lifetime of the log data store. + */ + LogDataStore(vespalib::ThreadStackExecutorBase &executor, + const vespalib::string &dirName, + const Config & config, + const GrowStrategy &growStrategy, + const TuneFileSummary &tune, + const search::common::FileHeaderContext &fileHeaderContext, + transactionlog::SyncProxy &tlSyncer, + const IBucketizer::SP & bucketizer, + bool readOnly = false); + + ~LogDataStore(); + + // Implements IDataStore API + ssize_t read(uint32_t lid, vespalib::DataBuffer & buffer) const override; + void read(const LidVector & lids, IBufferVisitor & visitor) const override; + void write(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len) override; + void remove(uint64_t serialNum, uint32_t lid) override; + void flush(uint64_t syncToken) override; + uint64_t initFlush(uint64_t syncToken) override; + size_t memoryUsed() const override; + size_t memoryMeta() const override; + uint64_t lastSyncToken() const override; + uint64_t tentativeLastSyncToken() const override; + fastos::TimeStamp getLastFlushTime() const override; + size_t getDiskFootprint() const override; + size_t getDiskHeaderFootprint() const override; + size_t getDiskBloat() const override; + size_t getMaxCompactGain() const override; + + /** + * Will compact the docsummary up to a lower limit of 5% bloat. + */ + void compact(uint64_t syncToken); + + const Config & getConfig() const { return _config; } + Config & getConfig() { return _config; } + + void write(LockGuard guard, WriteableFileChunk & destination, uint64_t serialNum, uint32_t lid, const void * buffer, size_t len); + void write(LockGuard guard, FileId destinationFileId, uint32_t lid, const void * buffer, size_t len); + + /** + * This will spinn through the data and verify the content of both + * the '.dat' and the '.idx' files. + * + * @param reportOnly If set inconsitencies will be written to 'stderr'. + */ + void verify(bool reportOnly) const; + + /** + * Visit all data found in data store. + */ + void accept(IDataStoreVisitor &visitor, IDataStoreVisitorProgress &visitorProgress, bool prune) override; + + /** + * Return cost of visiting all data found in data store. + */ + double getVisitCost() const override; + + // Implements IGetLid API + Guard getLidReadGuard() const override { + return _genHandler.takeGuard(); + } + + // Implements IGetLid API + LockGuard getLidGuard(uint32_t lid) const override { + (void) lid; + return LockGuard(_updateLock); + } + + // Implements IGetLid API + LidInfo getLid(Guard & guard, uint32_t lid) const override { + (void) guard; + return _lidInfo[lid]; + } + FileId getActiveFileId(const vespalib::LockGuard & guard) const { + assert(guard.locks(_updateLock)); + return _active; + } + + virtual DataStoreStorageStats getStorageStats() const override; + + virtual std::vector + getFileChunkStats() const override; + +private: + class WrapVisitor; + class WrapVisitorProgress; + class FileChunkHolder; + + void waitForUnblock(); + + // Implements ISetLid API + void setLid(uint32_t lid, const LidInfo & lm) override; + + void compactWorst(); + void compactFile(FileId chunkId); + + typedef std::set NameIdSet; + typedef attribute::RcuVector LidInfoVector; + typedef std::vector FileChunkVector; + + void updateLidMap(); + void preload(); + void verifyModificationTime(const NameIdSet & partList); + + void eraseDanglingDatFiles(const NameIdSet &partList, const NameIdSet &datPartList); + NameIdSet eraseEmptyIdxFiles(const NameIdSet &partList); + void internalFlushAll(void); + + NameIdSet scanDir(const vespalib::string &dir, const vespalib::string &suffix); + FileId allocateFileId(const LockGuard & guard); + FileId allocateFileId(); + vespalib::string ls(const NameIdSet & partList); + + WriteableFileChunk & getActive(const LockGuard & guard) { + assert(guard.locks(_updateLock)); + return static_cast(*_fileChunks[_active.getId()]); + } + + const WriteableFileChunk & getActive(const LockGuard & guard) const { + assert(guard.locks(_updateLock)); + return static_cast(*_fileChunks[_active.getId()]); + } + + const FileChunk * getPrevActive(const LockGuard & guard) const { + assert(guard.locks(_updateLock)); + return ( !_prevActive.isActive() ) ? _fileChunks[_prevActive.getId()].get() : NULL; + } + void setActive(const LockGuard & guard, FileId fileId) { + assert(guard.locks(_updateLock)); + _prevActive = _active; + _active = fileId; + } + + bool isBucketSpreadTooLarge(double spread) const { + return (spread >= _config.getMaxBucketSpread()); + } + double getMaxBucketSpread() const; + + FileChunk::UP createReadOnlyFile(FileId fileId, NameId nameId); + FileChunk::UP createWritableFile(FileId fileId, SerialNum serialNum); + FileChunk::UP createWritableFile(FileId fileId, SerialNum serialNum, NameId nameId); + vespalib::string createFileName(NameId id) const; + vespalib::string createDatFileName(NameId id) const; + vespalib::string createIdxFileName(NameId id) const; + + void requireSpace(LockGuard guard, WriteableFileChunk & active); + bool isReadOnly() const { return _readOnly; } + void updateSerialNum(); + + bool isBloatOverLimit() const { + return isBloatOverLimit(getDiskBloat(), getDiskFootprint()); + } + bool isBloatOverLimit(uint64_t bloat, uint64_t usage) const { + return (usage*_config.getMaxDiskBloatFactor() < bloat); + } + + /* + * Protect against compactWorst() dropping file chunk. Caller must hold + * _updateLock. + */ + std::unique_ptr holdFileChunk(FileId fileId); + + /* + * Drop protection against compactWorst() dropping file chunk. + */ + void unholdFileChunk(FileId fileId); + + SerialNum flushFile(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken); + SerialNum flushActive(SerialNum syncToken); + void flushActiveAndWait(SerialNum syncToken); + void flushFileAndWait(LockGuard guard, WriteableFileChunk & file, SerialNum syncToken); + SerialNum getMinLastPersistedSerialNum() const { + return (_fileChunks.empty() ? 0 : _fileChunks.back()->getLastPersistedSerialNum()); + } + bool shouldCompactToActiveFile(size_t compactedSize) const; + std::pair findNextToCompact(); + + typedef std::vector FileIdxVector; + Config _config; + TuneFileSummary _tune; + const search::common::FileHeaderContext &_fileHeaderContext; + mutable vespalib::GenerationHandler _genHandler; + LidInfoVector _lidInfo; + FileChunkVector _fileChunks; + std::vector _holdFileChunks; + FileId _active; + FileId _prevActive; + vespalib::Lock _updateLock; + bool _readOnly; + vespalib::ThreadStackExecutorBase &_executor; + SerialNum _initFlushSyncToken; + transactionlog::SyncProxy &_tlSyncer; + IBucketizer::SP _bucketizer; + NameIdSet _currentlyCompacting; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp new file mode 100644 index 00000000000..2a15cc118bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.cpp @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "logdocumentstore.h" + +namespace search +{ + +using vespalib::nbostream; +using common::FileHeaderContext; + +LogDocumentStore::LogDocumentStore(vespalib::ThreadStackExecutorBase & executor, + const vespalib::string & baseDir, + const Config & config, + const GrowStrategy & growStrategy, + const TuneFileSummary & tuneFileSummary, + const FileHeaderContext &fileHeaderContext, + transactionlog::SyncProxy &tlSyncer, + const IBucketizer::SP & bucketizer) + : DocumentStore(config, _backingStore), + _backingStore(executor, baseDir, config.getLogConfig(), growStrategy, + tuneFileSummary, fileHeaderContext, tlSyncer, bucketizer) +{ +} + +LogDocumentStore::~LogDocumentStore() +{ +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h new file mode 100644 index 00000000000..3a8227c9f3c --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/logdocumentstore.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "documentstore.h" +#include "logdatastore.h" +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +/** + * Simple document store that contains serialized Document instances. + * updates will be held in memory until flush() is called. + * Uses a Local ID as key. + **/ +class LogDocumentStore : public DocumentStore +{ +public: + class Config : public DocumentStore::Config { + public: + Config(const DocumentStore::Config & base, const LogDataStore::Config & log) : + DocumentStore::Config(base), + _logConfig(log) + { } + const LogDataStore::Config & getLogConfig() const { return _logConfig; } + LogDataStore::Config & getLogConfig() { return _logConfig; } + private: + LogDataStore::Config _logConfig; + }; + /** + * Construct a document store. + * If the "simpledocstore.dat" data file exists, reads meta-data (offsets) into memory. + * + * @throws vespalib::IoException if the file is corrupt or other IO problems occur. + * @param docMan The document type manager to use when deserializing. + * @param baseDir The path to a directory where "simpledocstore.dat" will exist. + * @param fileHeaderContext The file header context used to populate + * the generic file header with extra tags. + * The caller must keep it alive for the semantic + * lifetime of the log data store. + */ + LogDocumentStore(vespalib::ThreadStackExecutorBase & executor, + const vespalib::string & baseDir, + const Config & config, + const GrowStrategy & growStrategy, + const TuneFileSummary &tuneFileSummary, + const common::FileHeaderContext &fileHeaderContext, + transactionlog::SyncProxy &tlSyncer, + const IBucketizer::SP & bucketizer); + ~LogDocumentStore(); + LogDataStore::Config & getLogConfig() { return _backingStore.getConfig(); } + const LogDataStore::Config & getLogConfig() const { return _backingStore.getConfig(); } +private: + void compact(uint64_t syncToken) override { _backingStore.compact(syncToken); } + LogDataStore _backingStore; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp new file mode 100644 index 00000000000..fe5678c61e8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.cpp @@ -0,0 +1,868 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "data_store_file_chunk_stats.h" + +LOG_SETUP(".search.writeablefilechunk"); + +using vespalib::makeTask; +using vespalib::makeClosure; +using vespalib::FileHeader; +using vespalib::make_string; +using vespalib::LockGuard; +using vespalib::MonitorGuard; +using vespalib::nbostream; +using vespalib::IllegalHeaderException; +using vespalib::GenerationHandler; +using search::common::FileHeaderContext; + +namespace search { + +namespace +{ + +const uint64_t Alignment = 4096; +const uint64_t headerAlign = 4096; + +} + +WriteableFileChunk:: +WriteableFileChunk(vespalib::ThreadStackExecutorBase &executor, + FileId fileId, NameId nameId, + const vespalib::string &baseName, + SerialNum initialSerialNum, + const Config &config, + const TuneFileSummary &tune, + const FileHeaderContext &fileHeaderContext, + const IBucketizer * bucketizer, + bool skipCrcOnRead) + : FileChunk(fileId, nameId, baseName, tune, bucketizer, skipCrcOnRead), + _config(config), + _serialNum(initialSerialNum), + _frozen(false), + _lock(), + _writeLock(), + _flushLock(), + _dataFile(_dataFileName.c_str()), + _idxFile(_idxFileName.c_str()), + _chunkMap(), + _pendingChunks(), + _pendingIdx(0), + _pendingDat(0), + _currentDiskFootprint(0), + _nextChunkId(1), + _active(new Chunk(0, Chunk::Config(config.getMaxChunkBytes(), config.getMaxChunkEntries()))), + _alignment(1), + _granularity(1), + _maxChunkSize(0x100000), + _firstChunkIdToBeWritten(0), + _writeTaskIsRunning(false), + _executor(executor), + _bucketMap(bucketizer) +{ + if (tune._write.getWantDirectIO()) { + _dataFile.EnableDirectIO(); + _idxFile.EnableSyncWrites(); + } else if (tune._write.getWantSyncWrites()) { + _dataFile.EnableSyncWrites(); + _idxFile.EnableSyncWrites(); + } + if (_dataFile.OpenReadWrite()) { + readDataHeader(); + if (_dataHeaderLen == 0) { + writeDataHeader(fileHeaderContext); + } + _dataFile.SetPosition(_dataFile.GetSize()); + if (tune._write.getWantDirectIO()) { + if (!_dataFile.GetDirectIORestrictions(_alignment, _granularity, _maxChunkSize)) { + LOG(debug, "Direct IO setup failed for file %s due to %s", + _dataFile.GetFileName(), _dataFile.getLastErrorString().c_str()); + } + } + if (_idxFile.OpenReadWrite()) { + readIdxHeader(); + if (_idxHeaderLen == 0) { + _idxHeaderLen = writeIdxHeader(fileHeaderContext, _idxFile); + } + _idxFile.SetPosition(_idxFile.GetSize()); + } else { + _dataFile.Close(); + throw SummaryException("Failed opening idx file", _idxFile, VESPA_STRLOC); + } + } else { + throw SummaryException("Failed opening data file", _dataFile, VESPA_STRLOC); + } + _firstChunkIdToBeWritten = _active->getId(); + updateCurrentDiskFootprint(); +} + +WriteableFileChunk::~WriteableFileChunk() +{ + if (!frozen()) { + if (_active->size() || _active->count()) { + flush(true, _serialNum); + } + freeze(); + } + // This is a wild stab at fixing bug 6348143. + // If it works it indicates something bad with the filesystem. + if (_dataFile.IsOpened()) { + if (! _dataFile.Sync()) { + assert(false); + } + } + if (_idxFile.IsOpened()) { + if (! _idxFile.Sync()) { + assert(false); + } + } +} + +size_t +WriteableFileChunk::updateLidMap(ISetLid & ds, uint64_t serialNum) +{ + size_t sz = FileChunk::updateLidMap(ds, serialNum); + _nextChunkId = _chunkInfo.size(); + _active.reset( new Chunk(_nextChunkId++, Chunk::Config(_config.getMaxChunkBytes(), _config.getMaxChunkEntries()))); + _serialNum = getLastPersistedSerialNum(); + _firstChunkIdToBeWritten = _active->getId(); + setDiskFootprint(0); + _chunkInfo.reserve(0x10000); + return sz; +} + +void +WriteableFileChunk::restart(const MonitorGuard & guard, uint32_t nextChunkId) +{ + (void) guard; + _writeTaskIsRunning = true; + _executor.execute(makeTask(makeClosure(this, &WriteableFileChunk::fileWriter, nextChunkId))); +} + +namespace { + +LidInfoWithLidV::const_iterator +find_first(LidInfoWithLidV::const_iterator begin, uint32_t chunkId) { + for ( ; begin->getChunkId() != chunkId; ++begin); + return begin; +} + +LidInfoWithLidV::const_iterator +seek_past(LidInfoWithLidV::const_iterator begin, LidInfoWithLidV::const_iterator end, uint32_t chunkId) { + for ( ; (begin < end) && (begin->getChunkId() == chunkId); begin++); + return begin; +} + +} + +void +WriteableFileChunk::read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const +{ + if (count == 0) { return; } + if (!frozen()) { + vespalib::hash_map chunksOnFile; + { + LockGuard guard(_lock); + for (size_t i(0); i < count; i++) { + const LidInfoWithLid & li = *(begin + i); + uint32_t chunk = li.getChunkId(); + if ((chunk >= _chunkInfo.size()) || !_chunkInfo[chunk].valid()) { + ChunkMap::const_iterator found = _chunkMap.find(chunk); + vespalib::ConstBufferRef buffer; + if (found != _chunkMap.end()) { + buffer = found->second->getLid(li.getLid()); + } else { + assert(chunk == _active->getId()); + buffer = _active->getLid(li.getLid()); + } + visitor.visit(li.getLid(), buffer); + } else { + chunksOnFile[chunk] = _chunkInfo[chunk]; + } + } + } + for (auto & it : chunksOnFile) { + LidInfoWithLidV::const_iterator first = find_first(begin, it.first); + LidInfoWithLidV::const_iterator last = seek_past(first, begin + count, it.first); + FileChunk::read(first, last - first, it.second, visitor); + } + } else { + FileChunk::read(begin, count, visitor); + } +} + +ssize_t +WriteableFileChunk::read(uint32_t lid, SubChunkId chunkId, vespalib::DataBuffer & buffer) const +{ + ChunkInfo chunkInfo; + if (!frozen()) { + LockGuard guard(_lock); + if ((chunkId >= _chunkInfo.size()) || !_chunkInfo[chunkId].valid()) { + ChunkMap::const_iterator found = _chunkMap.find(chunkId); + if (found != _chunkMap.end()) { + return found->second->read(lid, buffer); + } else { + assert(chunkId == _active->getId()); + return _active->read(lid, buffer); + } + } + chunkInfo = _chunkInfo[chunkId]; + } else { + chunkInfo = _chunkInfo[chunkId]; + } + return FileChunk::read(lid, chunkId, chunkInfo, buffer); +} + +void +WriteableFileChunk::internalFlush(uint32_t chunkId, uint64_t serialNum) +{ + Chunk * active(NULL); + { + LockGuard guard(_lock); + active = _chunkMap[chunkId].get(); + } + + ProcessedChunk::UP tmp(new ProcessedChunk(chunkId, _alignment)); + if (_alignment > 1) { + tmp->getBuf().ensureFree(active->getMaxPackSize(_config.getCompression()) + _alignment - 1); + } + active->pack(serialNum, tmp->getBuf(), _config.getCompression()); + tmp->setPayLoad(); + if (_alignment > 1) { + const size_t padAfter((_alignment - tmp->getPayLoad() % _alignment) % _alignment); + memset(tmp->getBuf().getFree(), 0, padAfter); + tmp->getBuf().moveFreeToData(padAfter); + } + { + LockGuard innerGuard(_lock); + setDiskFootprint(FileChunk::getDiskFootprint() + tmp->getBuf().getDataLen()); + } + enque(std::move(tmp)); +} + +void +WriteableFileChunk::enque(ProcessedChunk::UP tmp) +{ + LOG(debug, "enqueing %p", tmp.get()); + MonitorGuard guard(_writeMonitor); + _writeQ.push_back(std::move(tmp)); + if (_writeTaskIsRunning == false) { + restart(guard, _firstChunkIdToBeWritten); + } + guard.signal(); +} + +namespace { + +const std::vector Padding(Alignment, '\0'); + +size_t +getAlignedStartPos(FastOS_File & file) +{ + ssize_t startPos(file.GetPosition()); + assert(startPos == file.GetSize()); + if (startPos & (Alignment-1)) { + FastOS_File align(file.GetFileName()); + if (align.OpenWriteOnly()) { + align.SetPosition(startPos); + ssize_t toWrite(Alignment - (startPos & (Alignment-1))); + ssize_t written = align.Write2(&Padding[0], toWrite); + if (written == toWrite) { + align.Sync(); + file.SetPosition(align.GetSize()); + startPos = file.GetPosition(); + } else { + throw SummaryException( + make_string("Failed writing %ld bytes to dat file. Only %ld written", toWrite, written), + align, VESPA_STRLOC); + } + } else { + throw SummaryException("Failed opening dat file for padding for direct io.", align, VESPA_STRLOC); + } + } + assert((startPos & (Alignment-1)) == 0); + return startPos; +} + +} + +WriteableFileChunk::ProcessedChunkQ +WriteableFileChunk::drainQ() +{ + ProcessedChunkQ newChunks; + MonitorGuard guard(_writeMonitor); + newChunks.swap(_writeQ); + if ( ! newChunks.empty() ) { + guard.broadcast(); + } + return newChunks; +} + +void +WriteableFileChunk::insertChunks(ProcessedChunkMap & orderedChunks, ProcessedChunkQ & newChunks, const uint32_t nextChunkId) +{ + for (auto &chunk : newChunks) { + if (chunk.get() != 0) { + assert(chunk->getChunkId() >= nextChunkId); + assert(orderedChunks.find(chunk->getChunkId()) == orderedChunks.end()); + orderedChunks[chunk->getChunkId()] = std::move(chunk); + } else { + orderedChunks[std::numeric_limits::max()] = ProcessedChunk::UP(); + } + } +} + +WriteableFileChunk::ProcessedChunkQ +WriteableFileChunk::fetchNextChain(ProcessedChunkMap & orderedChunks, const uint32_t firstChunkId) +{ + ProcessedChunkQ chunks; + while (!orderedChunks.empty() && + ((orderedChunks.begin()->first == (firstChunkId+chunks.size())) || + (orderedChunks.begin()->second.get() == NULL))) + { + chunks.push_back(std::move(orderedChunks.begin()->second)); + orderedChunks.erase(orderedChunks.begin()); + } + return chunks; +} + +ChunkMeta +WriteableFileChunk::computeChunkMeta(const LockGuard & guard, + const GenerationHandler::Guard & bucketizerGuard, + size_t offset, const ProcessedChunk & tmp, const Chunk & active) +{ + (void) guard; + size_t dataLen = tmp.getBuf().getDataLen(); + const ChunkMeta cmeta(offset, tmp.getPayLoad(), active.getLastSerial(), active.count()); + assert((size_t(tmp.getBuf().getData())%_alignment) == 0); + assert((dataLen%_alignment) == 0); + PendingChunk::SP pcsp; + pcsp.reset(new PendingChunk(active.getLastSerial(), offset, dataLen)); + PendingChunk &pc(*pcsp.get()); + nbostream &os(pc.getSerializedIdx()); + cmeta.serialize(os); + BucketDensityComputer bucketMap(_bucketizer); + for (const Chunk::Entry & e : active.getLids()) { + bucketMap.recordLid(bucketizerGuard, e.getLid(), e.netSize()); + _bucketMap.recordLid(bucketizerGuard, e.getLid(), e.netSize()); + LidMeta lm(e.getLid(), e.netSize()); + lm.serialize(os); + } + addNumBuckets(bucketMap.getNumBuckets()); + setNumUniqueBuckets(_bucketMap.getNumBuckets()); + + _pendingDat += pc.getDataLen(); + _pendingIdx += pc.getIdxLen(); + _pendingChunks.push_back(pcsp); + return cmeta; +} + +ChunkMetaV +WriteableFileChunk::computeChunkMeta(ProcessedChunkQ & chunks, size_t startPos, size_t & sz, bool & done) +{ + ChunkMetaV cmetaV; + cmetaV.reserve(chunks.size()); + uint64_t lastSerial(_lastPersistedSerialNum); + LockGuard guard(_lock); + + if (!_pendingChunks.empty()) { + const PendingChunk::SP pcsp(_pendingChunks.back()); + const PendingChunk &pc(*pcsp.get()); + assert(pc.getLastSerial() >= lastSerial); + lastSerial = pc.getLastSerial(); + } + + GenerationHandler::Guard bucketizerGuard = _bucketMap.getGuard(); + for (size_t i(0), m(chunks.size()); i < m; i++) { + if (chunks[i].get() != 0) { + const ProcessedChunk & chunk = *chunks[i]; + const ChunkMeta cmeta(computeChunkMeta(guard, bucketizerGuard, startPos + sz, chunk, *_chunkMap[chunk.getChunkId()])); + sz += chunk.getBuf().getDataLen(); + cmetaV.push_back(cmeta); + assert(cmeta.getLastSerial() >= lastSerial); + lastSerial = cmeta.getLastSerial(); + } else { + done = true; + assert((i+1) == chunks.size()); + chunks.resize(i); + assert(i == chunks.size()); + } + } + return cmetaV; +} + +void +WriteableFileChunk::writeData(const ProcessedChunkQ & chunks, size_t sz) +{ + vespalib::DataBuffer buf(0ul, _alignment); + buf.ensureFree(sz); + for (const ProcessedChunk::UP & chunk : chunks) { + buf.writeBytes(chunk->getBuf().getData(), chunk->getBuf().getDataLen()); + } + + LockGuard guard(_writeLock); + ssize_t wlen = _dataFile.Write2(buf.getData(), buf.getDataLen()); + if (wlen != static_cast(buf.getDataLen())) { + throw SummaryException(make_string("Failed writing %ld bytes to dat file. Only %ld written", + buf.getDataLen(), wlen), + _idxFile, VESPA_STRLOC); + } + updateCurrentDiskFootprint(); +} + +void +WriteableFileChunk::updateChunkInfo(const ProcessedChunkQ & chunks, const ChunkMetaV & cmetaV, size_t sz) +{ + MonitorGuard guard(_lock); + size_t nettoSz(sz); + for (size_t i(0); i < chunks.size(); i++) { + const ProcessedChunk & chunk = *chunks[i]; + assert(_chunkMap.find(chunk.getChunkId()) == _chunkMap.begin()); + const Chunk & active = *_chunkMap.begin()->second; + if (active.getId() >= _chunkInfo.size()) { + _chunkInfo.resize(active.getId()+1); + } + const ChunkMeta & cmeta(cmetaV[i]); + _chunkInfo[active.getId()] = ChunkInfo(cmeta.getOffset(), chunk.getPayLoad(), cmeta.getLastSerial()); + nettoSz += active.size(); + _chunkMap.erase(_chunkMap.begin()); + } + setDiskFootprint(FileChunk::getDiskFootprint() - nettoSz); + guard.broadcast(); +} + +void +WriteableFileChunk::fileWriter(const uint32_t firstChunkId) +{ + LOG(debug, "Starting the filewriter with chunkid = %d", firstChunkId); + uint32_t nextChunkId(firstChunkId); + bool done(false); + { + ProcessedChunkQ newChunks(drainQ()); + if ( ! newChunks.empty()) { + insertChunks(_orderedChunks, newChunks, nextChunkId); + ProcessedChunkQ chunks(fetchNextChain(_orderedChunks, nextChunkId)); + nextChunkId += chunks.size(); + + size_t sz(0); + ChunkMetaV cmetaV(computeChunkMeta(chunks, getAlignedStartPos(_dataFile), sz, done)); + writeData(chunks, sz); + updateChunkInfo(chunks, cmetaV, sz); + LOG(spam, "bucket spread = '%3.2f'", getBucketSpread()); + } + } + LOG(debug, + "Stopping the filewriter with startchunkid = %d and ending chunkid = %d done=%d", + firstChunkId, nextChunkId, done); + if (done) { + MonitorGuard guard(_writeMonitor); + assert(_writeQ.empty()); + assert(_chunkMap.empty()); + for (const ChunkInfo & cm : _chunkInfo) { + assert(cm.valid() && cm.getSize() != 0); + } + _writeTaskIsRunning = false; + guard.broadcast(); + } else { + MonitorGuard guard(_writeMonitor); + if (_writeQ.empty()) { + _firstChunkIdToBeWritten = nextChunkId; + _writeTaskIsRunning = false; + } else { + restart(guard, nextChunkId); + } + } +} + +fastos::TimeStamp +WriteableFileChunk::getModificationTime() const +{ + LockGuard guard(_lock); + return _modificationTime; +} + +void +WriteableFileChunk::freeze() +{ + if (!frozen()) { + waitForAllChunksFlushedToDisk(); + enque(ProcessedChunk::UP()); + _executor.sync(); + { + MonitorGuard guard(_writeMonitor); + while (_writeTaskIsRunning) { + guard.wait(10); + } + assert(_writeQ.empty()); + } + { + MonitorGuard guard(_lock); + setDiskFootprint(getDiskFootprint(guard)); + _frozen = true; + } + _dataFile.Close(); + _idxFile.Close(); + _bucketMap = BucketDensityComputer(_bucketizer); + } +} + +size_t +WriteableFileChunk::getDiskFootprint() const +{ + if (frozen()) { + return FileChunk::getDiskFootprint(); + } else { + // Double checked locking. + MonitorGuard guard(_lock); + return getDiskFootprint(guard); + } +} + +size_t +WriteableFileChunk::getDiskFootprint(const vespalib::MonitorGuard & guard) const +{ + assert(guard.monitors(_lock)); + return frozen() + ? FileChunk::getDiskFootprint() + : _currentDiskFootprint + FileChunk::getDiskFootprint(); +} + +size_t +WriteableFileChunk::getMemoryFootprint() const +{ + size_t sz(0); + LockGuard guard(_lock); + for (const auto & it : _chunkMap) { + sz += it.second->size(); + } + sz += _pendingIdx + _pendingDat; + return sz + FileChunk::getMemoryFootprint(); +} + +size_t +WriteableFileChunk::getMemoryMetaFootprint() const +{ + constexpr size_t mySizeWithoutMyParent(sizeof(*this) - sizeof(FileChunk)); + return mySizeWithoutMyParent + FileChunk::getMemoryMetaFootprint(); +} + +int32_t WriteableFileChunk::flushLastIfNonEmpty(bool force) +{ + int32_t chunkId(-1); + MonitorGuard guard(_lock); + for (bool ready(false); !ready;) { + if (_chunkMap.size() > 1000) { + LOG(debug, "Summary write overload at least 1000 outstanding chunks. Suspending."); + guard.wait(); + LOG(debug, "Summary write overload eased off. Commencing."); + } else { + ready = true; + } + } + if ( force || ! _active->empty()) { + chunkId = _active->getId(); + _chunkMap[chunkId] = std::move(_active); + assert(_nextChunkId < LidInfo::getMaxChunkNum()); + _active.reset(new Chunk(_nextChunkId++, + Chunk::Config(_config.getMaxChunkBytes(), + _config.getMaxChunkEntries()))); + } + return chunkId; +} + +void +WriteableFileChunk::flush(bool block, uint64_t syncToken) +{ + int32_t chunkId = flushLastIfNonEmpty(syncToken > _serialNum); + if (chunkId >= 0) { + setSerialNum(syncToken); + _executor.execute(makeTask(makeClosure(this, + &WriteableFileChunk::internalFlush, + static_cast(chunkId), + _serialNum))); + } else { + if (block) { + MonitorGuard guard(_lock); + if (!_chunkMap.empty()) { + chunkId = _chunkMap.rbegin()->first; + } + } + } + if (block) { + _executor.sync(); + waitForChunkFlushedToDisk(chunkId); + } +} + +void +WriteableFileChunk::waitForDiskToCatchUpToNow() const +{ + int32_t chunkId(-1); + { + MonitorGuard guard(_lock); + if (!_chunkMap.empty()) { + chunkId = _chunkMap.rbegin()->first; + } + } + waitForChunkFlushedToDisk(chunkId); +} + +void +WriteableFileChunk::waitForChunkFlushedToDisk(uint32_t chunkId) const +{ + MonitorGuard guard(_lock); + while( _chunkMap.find(chunkId) != _chunkMap.end() ) { + guard.wait(); + } +} + +void +WriteableFileChunk::waitForAllChunksFlushedToDisk() const +{ + MonitorGuard guard(_lock); + while( ! _chunkMap.empty() ) { + guard.wait(); + } +} + +LidInfo +WriteableFileChunk::append(uint64_t serialNum, + uint32_t lid, + const void * buffer, + size_t len) +{ + assert( !frozen() ); + if ( ! _active->hasRoom(len)) { + flush(false, _serialNum); + } + assert(serialNum >= _serialNum); + _serialNum = serialNum; + _addedBytes += adjustSize(len); + size_t oldSz(_active->size()); + LidMeta lm = _active->append(lid, buffer, len); + setDiskFootprint(FileChunk::getDiskFootprint() - oldSz + _active->size()); + return LidInfo(getFileId().getId(), _active->getId(), lm.size()); +} + + +void +WriteableFileChunk::readDataHeader(void) +{ + int64_t fSize(_dataFile.GetSize()); + try { + FileHeader h; + _dataHeaderLen = h.readFile(_dataFile); + _dataFile.SetPosition(_dataHeaderLen); + } catch (IllegalHeaderException &e) { + _dataFile.SetPosition(0); + try { + FileHeader::FileReader fr(_dataFile); + uint32_t header2Len = FileHeader::readSize(fr); + if (header2Len <= fSize) + e.throwSelf(); // header not truncated + } catch (IllegalHeaderException &e2) { + } + if (fSize > 0) { + // Truncate file (dropping header) if cannot even read + // header length, or if header has been truncated. + _dataFile.SetPosition(0); + _dataFile.SetSize(0); + assert(_dataFile.GetSize() == 0); + assert(_dataFile.GetPosition() == 0); + LOG(warning, + "Truncated file chunk data %s due to truncated file header", + _dataFile.GetFileName()); + } + } +} + + +void +WriteableFileChunk::readIdxHeader(void) +{ + int64_t fSize(_idxFile.GetSize()); + try { + FileHeader h; + _idxHeaderLen = h.readFile(_idxFile); + _idxFile.SetPosition(_idxHeaderLen); + } catch (IllegalHeaderException &e) { + _idxFile.SetPosition(0); + try { + FileHeader::FileReader fr(_idxFile); + uint32_t header2Len = FileHeader::readSize(fr); + if (header2Len <= fSize) + e.throwSelf(); // header not truncated + } catch (IllegalHeaderException &e2) { + } + if (fSize > 0) { + // Truncate file (dropping header) if cannot even read + // header length, or if header has been truncated. + _idxFile.SetPosition(0); + _idxFile.SetSize(0); + assert(_idxFile.GetSize() == 0); + assert(_idxFile.GetPosition() == 0); + LOG(warning, + "Truncated file chunk index %s due to truncated file header", + _idxFile.GetFileName()); + } + } +} + + +void +WriteableFileChunk::writeDataHeader(const FileHeaderContext &fileHeaderContext) +{ + typedef FileHeader::Tag Tag; + FileHeader h(headerAlign); + assert(_dataFile.IsOpened()); + assert(_dataFile.IsWriteMode()); + assert(_dataFile.GetPosition() == 0); + fileHeaderContext.addTags(h, _dataFile.GetFileName()); + h.putTag(Tag("desc", "Log data store chunk data")); + _dataHeaderLen = h.writeFile(_dataFile); +} + + +uint64_t +WriteableFileChunk::writeIdxHeader(const FileHeaderContext &fileHeaderContext, FastOS_FileInterface & file) +{ + typedef FileHeader::Tag Tag; + FileHeader h; + assert(file.IsOpened()); + assert(file.IsWriteMode()); + assert(file.GetPosition() == 0); + fileHeaderContext.addTags(h, file.GetFileName()); + h.putTag(Tag("desc", "Log data store chunk index")); + return h.writeFile(file); +} + + +bool +WriteableFileChunk::needFlushPendingChunks(uint64_t serialNum, uint64_t datFileLen) { + MonitorGuard guard(_lock); + return needFlushPendingChunks(guard, serialNum, datFileLen); +} + +bool +WriteableFileChunk::needFlushPendingChunks(const MonitorGuard & guard, uint64_t serialNum, uint64_t datFileLen) +{ + assert(guard.monitors(_lock)); + if (_pendingChunks.empty()) + return false; + const PendingChunk::SP pcsp(_pendingChunks.front()); + const PendingChunk &pc(*pcsp.get()); + if (pc.getLastSerial() > serialNum) + return false; + bool datWritten = datFileLen >= pc.getDataOffset() + pc.getDataLen(); + if (pc.getLastSerial() < serialNum) { + assert(datWritten); + return true; + } + return datWritten; +} + +void +WriteableFileChunk::updateCurrentDiskFootprint() { + _currentDiskFootprint = _idxFile.getSize() + _dataFile.getSize(); +} + +/* + * Called by writeExecutor thread for now. + */ +void +WriteableFileChunk::flushPendingChunks(uint64_t serialNum) { + LockGuard flushGuard(_flushLock); + if (frozen()) + return; + uint64_t datFileLen = _dataFile.getSize(); + fastos::TimeStamp timeStamp(fastos::ClockSystem::now()); + if (needFlushPendingChunks(serialNum, datFileLen)) { + timeStamp = unconditionallyFlushPendingChunks(flushGuard, serialNum, datFileLen); + } + LockGuard guard(_lock); + _modificationTime = std::max(timeStamp, _modificationTime); +} + +fastos::TimeStamp +WriteableFileChunk::unconditionallyFlushPendingChunks(const vespalib::LockGuard &flushGuard, uint64_t serialNum, uint64_t datFileLen) +{ + assert(flushGuard.locks(_flushLock)); + if ( ! _dataFile.Sync()) { + throw SummaryException("Failed fsync of dat file", _dataFile, VESPA_STRLOC); + } + nbostream os; + uint64_t lastSerial = 0; + { + MonitorGuard guard(_lock); + lastSerial = _lastPersistedSerialNum; + for (;;) { + if (!needFlushPendingChunks(guard, serialNum, datFileLen)) + break; + PendingChunk::SP pcsp; + pcsp.swap(_pendingChunks.front()); + _pendingChunks.pop_front(); + const PendingChunk &pc(*pcsp.get()); + assert(_pendingIdx >= pc.getIdxLen()); + assert(_pendingDat >= pc.getDataLen()); + assert(datFileLen >= pc.getDataOffset() + pc.getDataLen()); + assert(lastSerial <= pc.getLastSerial()); + _pendingIdx -= pc.getIdxLen(); + _pendingDat -= pc.getDataLen(); + lastSerial = pc.getLastSerial(); + const nbostream &os2(pc.getSerializedIdx()); + os.write(os2.c_str(), os2.size()); + } + } + fastos::TimeStamp timeStamp(fastos::ClockSystem::now()); + ssize_t wlen = _idxFile.Write2(os.c_str(), os.size()); + updateCurrentDiskFootprint(); + + if (wlen != static_cast(os.size())) { + throw SummaryException("Failed writing idx file", _idxFile, VESPA_STRLOC); + } + if ( ! _idxFile.Sync()) { + throw SummaryException("Failed fsync of idx file", _idxFile, VESPA_STRLOC); + } + if (_lastPersistedSerialNum < lastSerial) { + _lastPersistedSerialNum = lastSerial; + } + return timeStamp; +} + +DataStoreFileChunkStats +WriteableFileChunk::getStats() const +{ + DataStoreFileChunkStats stats = FileChunk::getStats(); + uint64_t serialNum = getSerialNum(); + return DataStoreFileChunkStats(stats.diskUsage(), stats.diskBloat(), + stats.maxBucketSpread(), + serialNum, + stats.lastFlushedSerialNum(), + stats.nameId()); +}; + +WriteableFileChunk::PendingChunk::PendingChunk(uint64_t lastSerial, + uint64_t dataOffset, + uint32_t dataLen) + : _idx(), + _lastSerial(lastSerial), + _dataOffset(dataOffset), + _dataLen(dataLen) +{ +} + +WriteableFileChunk::PendingChunk::~PendingChunk(void) +{ +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h new file mode 100644 index 00000000000..97c6ad8d711 --- /dev/null +++ b/searchlib/src/vespa/searchlib/docstore/writeablefilechunk.h @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +class WriteableFileChunk : public FileChunk +{ +public: + class Config + { + public: + Config() + : _compression(document::CompressionConfig::LZ4, 9, 60), + _maxChunkBytes(0x10000), + _maxChunkEntries(256) + { } + + Config(const document::CompressionConfig &compression, + size_t maxChunkBytes, size_t maxChunkEntries) + : _compression(compression), + _maxChunkBytes(maxChunkBytes), + _maxChunkEntries(maxChunkEntries) + { } + + const document::CompressionConfig & getCompression() const { return _compression; } + size_t getMaxChunkBytes() const { return _maxChunkBytes; } + size_t getMaxChunkEntries() const { return _maxChunkEntries; } + private: + document::CompressionConfig _compression; + size_t _maxChunkBytes; + size_t _maxChunkEntries; + }; + +public: + typedef std::unique_ptr UP; + WriteableFileChunk(vespalib::ThreadStackExecutorBase & executor, + FileId fileId, NameId nameId, + const vespalib::string & baseName, + uint64_t initialSerialNum, + const Config & config, + const TuneFileSummary &tune, + const common::FileHeaderContext &fileHeaderContext, + const IBucketizer * bucketizer, + bool crcOnReadDisabled); + ~WriteableFileChunk(); + + ssize_t read(uint32_t lid, SubChunkId chunk, vespalib::DataBuffer & buffer) const override; + void read(LidInfoWithLidV::const_iterator begin, size_t count, IBufferVisitor & visitor) const override; + + LidInfo append(uint64_t serialNum, uint32_t lid, const void * buffer, size_t len); + void flush(bool block, uint64_t syncToken); + uint64_t getSerialNum() const { return _serialNum; } + void setSerialNum(uint64_t serialNum) { _serialNum = std::max(_serialNum, serialNum); } + + virtual fastos::TimeStamp getModificationTime() const override; + void freeze(); + size_t getDiskFootprint() const override; + size_t getMemoryFootprint() const override; + size_t getMemoryMetaFootprint() const override; + size_t updateLidMap(ISetLid & lidMap, uint64_t serialNum) override; + void waitForDiskToCatchUpToNow() const; + void flushPendingChunks(uint64_t serialNum); + virtual DataStoreFileChunkStats getStats() const override; + + static uint64_t writeIdxHeader(const common::FileHeaderContext &fileHeaderContext, FastOS_FileInterface & file); +private: + class ProcessedChunk + { + public: + typedef std::unique_ptr UP; + ProcessedChunk(uint32_t chunkId, uint32_t alignment) + : _chunkId(chunkId), + _payLoad(0), + _buf(0ul, alignment) + { } + void setPayLoad() { _payLoad = _buf.getDataLen(); } + uint32_t getPayLoad() const { return _payLoad; } + uint32_t getChunkId() const { return _chunkId; } + const vespalib::DataBuffer & getBuf() const { return _buf; } + vespalib::DataBuffer & getBuf() { return _buf; } + private: + uint32_t _chunkId; + uint32_t _payLoad; + vespalib::DataBuffer _buf; + }; + typedef std::map ProcessedChunkMap; + + typedef std::vector ProcessedChunkQ; + + /* + * Information about serialized chunk written to .dat file but not yet + * synced. + */ + class PendingChunk + { + vespalib::nbostream _idx; // Serialized chunk for .idx file + uint64_t _lastSerial; + uint64_t _dataOffset; + uint32_t _dataLen; + public: + typedef std::shared_ptr SP; + PendingChunk(uint64_t lastSerial, uint64_t dataOffset, uint32_t dataLen); + ~PendingChunk(void); + vespalib::nbostream & getSerializedIdx(void) { return _idx; } + const vespalib::nbostream & getSerializedIdx(void) const { return _idx; } + uint64_t getDataOffset(void) const { return _dataOffset; } + uint32_t getDataLen(void) const { return _dataLen; } + uint32_t getIdxLen(void) const { return _idx.size(); } + uint64_t getLastSerial(void) const { return _lastSerial; } + }; + + bool frozen() const override { return _frozen; } + void waitForChunkFlushedToDisk(uint32_t chunkId) const; + void waitForAllChunksFlushedToDisk() const; + void fileWriter(const uint32_t firstChunkId); + void internalFlush(uint32_t, uint64_t serialNum); + void enque(ProcessedChunk::UP); + int32_t flushLastIfNonEmpty(bool force); + void restart(const vespalib::MonitorGuard & guard, uint32_t nextChunkId); + ProcessedChunkQ drainQ(); + void readDataHeader(void); + void readIdxHeader(void); + void writeDataHeader(const common::FileHeaderContext &fileHeaderContext); + bool needFlushPendingChunks(uint64_t serialNum, uint64_t datFileLen); + bool needFlushPendingChunks(const vespalib::MonitorGuard & guard, uint64_t serialNum, uint64_t datFileLen); + fastos::TimeStamp unconditionallyFlushPendingChunks(const vespalib::LockGuard & flushGuard, uint64_t serialNum, uint64_t datFileLen); + static void insertChunks(ProcessedChunkMap & orderedChunks, ProcessedChunkQ & newChunks, const uint32_t nextChunkId); + static ProcessedChunkQ fetchNextChain(ProcessedChunkMap & orderedChunks, const uint32_t firstChunkId); + size_t computeDataLen(const ProcessedChunk & tmp, const Chunk & active); + ChunkMeta computeChunkMeta(const vespalib::LockGuard & guard, + const vespalib::GenerationHandler::Guard & bucketizerGuard, + size_t offset, const ProcessedChunk & tmp, const Chunk & active); + ChunkMetaV computeChunkMeta(ProcessedChunkQ & chunks, size_t startPos, size_t & sz, bool & done); + void writeData(const ProcessedChunkQ & chunks, size_t sz); + void updateChunkInfo(const ProcessedChunkQ & chunks, const ChunkMetaV & cmetaV, size_t sz); + void updateCurrentDiskFootprint(); + size_t getDiskFootprint(const vespalib::MonitorGuard & guard) const; + + Config _config; + SerialNum _serialNum; + bool _frozen; + // Lock order is _writeLock, _flushLock, _lock + vespalib::Monitor _lock; + vespalib::Lock _writeLock; + vespalib::Lock _flushLock; + FastOS_File _dataFile; + FastOS_File _idxFile; + typedef std::map ChunkMap; + ChunkMap _chunkMap; + typedef std::deque PendingChunks; + PendingChunks _pendingChunks; + uint64_t _pendingIdx; + uint64_t _pendingDat; + uint64_t _currentDiskFootprint; + uint32_t _nextChunkId; + Chunk::UP _active; + size_t _alignment; + size_t _granularity; + size_t _maxChunkSize; + uint32_t _firstChunkIdToBeWritten; + bool _writeTaskIsRunning; + vespalib::Monitor _writeMonitor; + ProcessedChunkQ _writeQ; + vespalib::ThreadStackExecutorBase & _executor; + ProcessedChunkMap _orderedChunks; + BucketDensityComputer _bucketMap; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/.gitignore b/searchlib/src/vespa/searchlib/engine/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/engine/CMakeLists.txt b/searchlib/src/vespa/searchlib/engine/CMakeLists.txt new file mode 100644 index 00000000000..06c1c2db0ec --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_engine OBJECT + SOURCES + docsumapi.cpp + docsumreply.cpp + docsumrequest.cpp + errorcodes.cpp + monitorreply.cpp + monitorrequest.cpp + packetconverter.cpp + propertiesmap.cpp + request.cpp + searchreply.cpp + searchrequest.cpp + source_description.cpp + transport_metrics.cpp + transportserver.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/engine/OWNERS b/searchlib/src/vespa/searchlib/engine/OWNERS new file mode 100644 index 00000000000..12b533ec610 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/OWNERS @@ -0,0 +1 @@ +havardpe diff --git a/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh b/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh new file mode 100755 index 00000000000..1f638bf0cb6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/create-class-cpp.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` +name=`echo $class | tr 'A-Z' 'a-z'` + +cat < +LOG_SETUP(".engine.$name"); +#include +#include "$name.h" + +namespace search { +namespace engine { + +$class::$class() +{ +} + +$class::~$class() +{ +} + +} // namespace engine +} // namespace search +EOF diff --git a/searchlib/src/vespa/searchlib/engine/create-class-h.sh b/searchlib/src/vespa/searchlib/engine/create-class-h.sh new file mode 100644 index 00000000000..703d61207b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/create-class-h.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` + +cat < + +namespace search { +namespace engine { + +DocsumReply::UP +DocsumServer::getDocsums(DocsumRequest::UP request) +{ + (void) request; + assert(false); + return DocsumReply::UP(); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/engine/docsumapi.h b/searchlib/src/vespa/searchlib/engine/docsumapi.h new file mode 100644 index 00000000000..39c5acd07e3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/docsumapi.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "docsumrequest.h" +#include "docsumreply.h" + +namespace search { +namespace engine { + +/** + * A docsum client is the object being notified of the completion of + * an asynchronous docsum operation. + **/ +class DocsumClient +{ +public: + /** + * Invoked by the docsum server to indicate the completion of an + * asynchronous docsum operation. + * + * @param reply the docsum reply + **/ + virtual void getDocsumsDone(DocsumReply::UP reply) = 0; + + /** + * Empty, needed for subclassing + **/ + virtual ~DocsumClient() {} +}; + +/** + * A docsum server is an object capable of performing a docsum + * operation. + **/ +class DocsumServer +{ +public: + /** + * Initiate a docsum operation that can be completed either + * synchronously or asynchronously. The return value will indicate + * whether the server selected to perform the operation + * synchronously or asynchronously. If the return value contains + * an object, then the operation completed synchronously and no + * further action will be taken by the server. If the return value + * did not contain an object, the operation will continue + * asynchronously, and the given client will be notified when the + * operation is completed. The server is not allowed to signal an + * asynchronous completion of the operation in the context of this + * method invocation. + * + * @return actual return value if sync, 'null' if async + * @param request object containing request parameters. + * Note that it is decoded lazily -> upon access. + * @param client the client to be notified of async completion + **/ + + virtual DocsumReply::UP getDocsums(DocsumRequest::Source request, DocsumClient &client) = 0; + /** + * As above but synchronous. + * @param request object containing request parameters. + * @return the response. + **/ + virtual DocsumReply::UP getDocsums(DocsumRequest::UP request); + + /** + * Empty, needed for subclassing + **/ + virtual ~DocsumServer() {} +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/docsumreply.cpp b/searchlib/src/vespa/searchlib/engine/docsumreply.cpp new file mode 100644 index 00000000000..3fb21abc959 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/docsumreply.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.docsumreply"); +#include "docsumreply.h" +#include "tracereply.h" + +namespace search { +namespace engine { + +DocsumReply::DocsumReply() : DocsumReply(vespalib::Slime::UP(nullptr)) { } + +DocsumReply::DocsumReply(vespalib::Slime::UP root) + : docsums(), + _root(std::move(root)) +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/docsumreply.h b/searchlib/src/vespa/searchlib/engine/docsumreply.h new file mode 100644 index 00000000000..4a751069bd8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/docsumreply.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "tracereply.h" + +namespace search { +namespace engine { + +struct DocsumReply +{ + typedef std::unique_ptr UP; + + typedef vespalib::MallocPtr Blob; + + struct Docsum { + uint32_t docid; + document::GlobalId gid; + Blob data; + + Docsum() : docid(0), gid(), data(0) {} + Docsum(document::GlobalId gid_) : docid(0), gid(gid_), data(0) { } + Docsum(document::GlobalId gid_, const char *buf, uint32_t len) : docid(0), gid(gid_), data(len) { + memcpy(data.str(), buf, len); + } + Docsum & setData(const char *buf, uint32_t len) { + data.resize(len); + memcpy(data.str(), buf, len); + return *this; + } + }; + std::vector docsums; + + mutable DocsumRequest::UP request; + vespalib::Slime::UP _root; + + DocsumReply(); + DocsumReply(vespalib::Slime::UP root); +}; + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp b/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp new file mode 100644 index 00000000000..80c4202f013 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.cpp @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.docsumrequest"); +#include "docsumrequest.h" +#include "packetconverter.h" + +namespace search { +namespace engine { + +DocsumRequest::DocsumRequest() + : DocsumRequest(false) +{ +} + +DocsumRequest::DocsumRequest(bool useRootSlime_) + : _flags(0u), + resultClassName(), + useWideHits(false), + _useRootSlime(useRootSlime_), + hits() +{ +} + + +void DocsumRequest::Source::lazyDecode() const +{ + if ((_request.get() == NULL) && (_fs4Packet != NULL)) { + _request.reset(new DocsumRequest()); + PacketConverter::toDocsumRequest(*_fs4Packet, *_request); + _fs4Packet->Free(); + _fs4Packet = NULL; + } +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/docsumrequest.h b/searchlib/src/vespa/searchlib/engine/docsumrequest.h new file mode 100644 index 00000000000..c7b613f203d --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/docsumrequest.h @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "propertiesmap.h" +#include "request.h" +#include "source_description.h" +#include +#include + +namespace search { +namespace engine { + +class DocsumRequest : public Request +{ +public: + typedef fs4transport::FS4Packet_GETDOCSUMSX FS4Packet_GETDOCSUMSX; + + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + class Source { + private: + mutable DocsumRequest::UP _request; + mutable FS4Packet_GETDOCSUMSX *_fs4Packet; + void lazyDecode() const; + const SourceDescription _desc; + public: + + Source(DocsumRequest * request) : _request(request), _fs4Packet(NULL), _desc(0) {} + Source(DocsumRequest::UP request) : _request(std::move(request)), _fs4Packet(NULL), _desc(0) {} + Source(FS4Packet_GETDOCSUMSX *query, SourceDescription desc) : _request(), _fs4Packet(query), _desc(desc) { } + + Source(Source && rhs) + : _request(std::move(rhs._request)), + _fs4Packet(rhs._fs4Packet), + _desc(std::move(rhs._desc)) + { + rhs._fs4Packet = NULL; + } + + ~Source() { + if (_fs4Packet != NULL) { + _fs4Packet->Free(); + } + } + + const DocsumRequest * operator -> () const { return get(); } + + const DocsumRequest * get() const { + lazyDecode(); + return _request.get(); + } + + Source& operator= (Source && rhs) = delete; + Source & operator= (const Source &) = delete; + Source(const Source &) = delete; + + UP release() { + lazyDecode(); + return std::move(_request); + } + }; + + class Hit + { + public: + Hit() : gid(), docid(0), path(0) {} + Hit(const document::GlobalId & gid_) : gid(gid_), docid(0), path(0) {} + + document::GlobalId gid; + mutable uint32_t docid; // converted in backend + uint32_t path; // wide + }; + +public: + uint32_t _flags; + vespalib::string resultClassName; + bool useWideHits; +private: + const bool _useRootSlime; +public: + std::vector hits; + std::vector sessionId; + + DocsumRequest(); + explicit DocsumRequest(bool useRootSlime_); + + const vespalib::stringref getStackRef() const { + return vespalib::stringref(&stackDump[0], stackDump.size()); + } + bool useRootSlime() const { return _useRootSlime; } +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/errorcodes.cpp b/searchlib/src/vespa/searchlib/engine/errorcodes.cpp new file mode 100644 index 00000000000..beeff3f86c2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/errorcodes.cpp @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "errorcodes.h" + +namespace search { +namespace engine { + +const char * +getStringFromErrorCode(ErrorCode ecode) +{ + switch (ecode) { + case ECODE_NO_ERROR: + return "No error has occurred"; + case ECODE_GENERAL_ERROR: + return "General error"; + case ECODE_QUERY_PARSE_ERROR: + return "Error parsing query"; + case ECODE_ALL_PARTITIONS_DOWN: + return "All searchnodes are down. This might indicate that no index is available yet."; + case ECODE_ILLEGAL_DATASET: + return "No such dataset"; + case ECODE_OVERLOADED: + return "System is overloaded"; + case ECODE_NOT_IMPLEMENTED: + return "The requested functionality is not implemented"; + case ECODE_QUERY_NOT_ALLOWED: + return "Query not allowed to run"; + case ECODE_TIMEOUT: + return "Query timed out"; + } + return "Unknown error"; +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/errorcodes.h b/searchlib/src/vespa/searchlib/engine/errorcodes.h new file mode 100644 index 00000000000..bd5fe04b4e7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/errorcodes.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +namespace search { +namespace engine { + +/** + * Enum defining global error codes. + * Used in error_code field in search::fs4transport::PCODE_ERROR packets. + **/ +enum ErrorCode { + ECODE_NO_ERROR = 0, + ECODE_GENERAL_ERROR = 1, + ECODE_QUERY_PARSE_ERROR = 2, + ECODE_ALL_PARTITIONS_DOWN = 3, + ECODE_ILLEGAL_DATASET = 4, + ECODE_OVERLOADED = 5, + ECODE_NOT_IMPLEMENTED = 6, + ECODE_QUERY_NOT_ALLOWED = 7, + ECODE_TIMEOUT = 8 +}; + +/** + * Normally error codes should be accompanied by an error message + * describing the error. If no such message is present, this method + * may be used to obtain the default description of an error code. + * + * @param error the error code we want info about. + * @return the default error message for the given error code. + **/ +const char* getStringFromErrorCode(ErrorCode error); + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/monitorapi.h b/searchlib/src/vespa/searchlib/engine/monitorapi.h new file mode 100644 index 00000000000..20c0bdb9caa --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/monitorapi.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "monitorrequest.h" +#include "monitorreply.h" + +namespace search { +namespace engine { + +/** + * A monitor client is the object being notified of the completion of + * an asynchronous monitor operation. + **/ +class MonitorClient +{ +public: + /** + * Invoked by the monitor server to indicate the completion of an + * asynchronous monitor operation. + * + * @param reply the monitor reply + **/ + virtual void pingDone(MonitorReply::UP reply) = 0; + + /** + * Empty, needed for subclassing + **/ + virtual ~MonitorClient() {} +}; + +/** + * A monitor server is an object capable of performing a monitor + * operation. + **/ +class MonitorServer +{ +public: + /** + * Initiate a monitor operation that can be completed either + * synchronously or asynchronously. The return value will indicate + * whether the server selected to perform the operation + * synchronously or asynchronously. If the return value contains + * an object, then the operation completed synchronously and no + * further action will be taken by the server. If the return value + * did not contain an object, the operation will continue + * asynchronously, and the given client will be notified when the + * operation is completed. The server is not allowed to signal an + * asynchronous completion of the operation in the context of this + * method invocation. + * + * @return actual return value if sync, 'null' if async + * @param request object containing request parameters + * @param client the client to be notified of async completion + **/ + virtual MonitorReply::UP ping(MonitorRequest::UP request, MonitorClient &client) = 0; + + /** + * Empty, needed for subclassing + **/ + virtual ~MonitorServer() {} +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/monitorreply.cpp b/searchlib/src/vespa/searchlib/engine/monitorreply.cpp new file mode 100644 index 00000000000..576f8e9bf39 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/monitorreply.cpp @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.monitorreply"); +#include "monitorreply.h" + +namespace search { +namespace engine { + +MonitorReply::MonitorReply() + : mld(), + activeDocsRequested(false), + partid(), + timestamp(), + totalNodes(), + activeNodes(), + totalParts(), + activeParts(), + activeDocs(0), + flags() +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/monitorreply.h b/searchlib/src/vespa/searchlib/engine/monitorreply.h new file mode 100644 index 00000000000..ff4d64ff941 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/monitorreply.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace engine { + +struct MonitorReply +{ + typedef std::unique_ptr UP; + + bool mld; + bool activeDocsRequested; + uint32_t partid; + uint32_t timestamp; + uint32_t totalNodes; // mld + uint32_t activeNodes; // mld + uint32_t totalParts; // mld + uint32_t activeParts; // mld + uint64_t activeDocs; + uint32_t flags; + + MonitorReply(); +}; + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp b/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp new file mode 100644 index 00000000000..39d97830443 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/monitorrequest.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.monitorrequest"); +#include "monitorrequest.h" + +namespace search { +namespace engine { + +MonitorRequest::MonitorRequest() + : reportActiveDocs(false), flags(0) +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/monitorrequest.h b/searchlib/src/vespa/searchlib/engine/monitorrequest.h new file mode 100644 index 00000000000..1f99180a754 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/monitorrequest.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + + +namespace search { +namespace engine { + +struct MonitorRequest +{ + typedef std::shared_ptr SP; + typedef std::unique_ptr UP; + + bool reportActiveDocs; + uint32_t flags; + + MonitorRequest(); +}; + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/packetconverter.cpp b/searchlib/src/vespa/searchlib/engine/packetconverter.cpp new file mode 100644 index 00000000000..9282c7cc820 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/packetconverter.cpp @@ -0,0 +1,261 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.packetconverter"); +#include "packetconverter.h" + +namespace { + +bool checkFeature(uint32_t features, uint32_t mask) { + return ((features & mask) != 0); +} + +struct FS4PropertiesBuilder : public search::fef::IPropertiesVisitor { + uint32_t idx; + search::fs4transport::FS4Properties &props; + FS4PropertiesBuilder(search::fs4transport::FS4Properties &p) : idx(0), props(p) {} + virtual void visitProperty(const search::fef::Property::Value &key, + const search::fef::Property &values) + { + for (uint32_t i = 0; i < values.size(); ++i) { + props.setKey(idx, key.data(), key.size()); + props.setValue(idx, values.getAt(i).data(), values.getAt(i).size()); + ++idx; + } + } +}; + +} // namespace + +namespace search { +namespace engine { + +using namespace search::fs4transport; + +void +PacketConverter::fillPacketProperties(const PropertiesMap &source, PropsVector& target) +{ + target.resize(source.size()); + PropertiesMap::ITR itr = source.begin(); + PropertiesMap::ITR end = source.end(); + for (uint32_t i = 0; itr != end; ++itr, ++i) { + const vespalib::string &name = itr->first; + const search::fef::Properties &values = itr->second; + target[i].setName(name.c_str(), name.size()); + target[i].allocEntries(values.numValues()); + FS4PropertiesBuilder builder(target[i]); + values.visitProperties(builder); + LOG_ASSERT(builder.idx == target[i].size()); + LOG_ASSERT(builder.idx == values.numValues()); + } +} + +void +PacketConverter::toSearchRequest(const QUERYX &packet, SearchRequest &request) +{ + request.offset = packet._offset; + request.maxhits = packet._maxhits; + request.setTimeout(packet.getTimeout()); + request.queryFlags = packet._qflags; + request.ranking = packet._ranking; + + for (uint32_t i = 0; i < packet._propsVector.size(); ++i) { + const FS4Properties &src = packet._propsVector[i]; + search::fef::Properties &dst = request.propertiesMap.lookupCreate(src.getName()); + for (uint32_t e = 0; e < src.size(); ++e) { + dst.add(vespalib::stringref(src.getKey(e), src.getKeyLen(e)), + vespalib::stringref(src.getValue(e), src.getValueLen(e))); + } + } + request.sortSpec = packet._sortSpec; + request.groupSpec.assign( packet._groupSpec.begin(), packet._groupSpec.end()); + request.sessionId.assign( packet._sessionId.begin(), packet._sessionId.end()); + request.location = packet._location; + request.stackItems = packet._numStackItems; + request.stackDump.assign( packet._stackDump.begin(), packet._stackDump.end()); +} + +void +PacketConverter::fromSearchRequest(const SearchRequest &request, QUERYX &packet) +{ + // not needed yet + (void) packet; + (void) request; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::toSearchReply(const QUERYRESULTX &packet, SearchReply &reply) +{ + // not needed yet + (void) packet; + (void) reply; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::fromSearchReply(const SearchReply &reply, QUERYRESULTX &packet) +{ + packet._offset = reply.offset; + packet._numDocs = reply.hits.size(); + packet._totNumDocs = reply.totalHitCount; + packet._maxRank = reply.maxRank; + packet.setDistributionKey(reply.getDistributionKey()); + if (reply.sortIndex.size() > 0) { + packet._features |= QRF_SORTDATA; + uint32_t idxCnt = reply.sortIndex.size(); + LOG_ASSERT(reply.sortIndex.size() == reply.hits.size()+1); + // allocate for N hits (will make space for N+1 indexes) + packet.AllocateSortIndex(reply.hits.size()); + packet.AllocateSortData(reply.sortData.size()); + for (uint32_t i = 0; i < idxCnt; ++i) { + packet._sortIndex[i] = reply.sortIndex[i]; + } + memcpy(packet._sortData, &(reply.sortData[0]), reply.sortData.size()); + } + if (reply.groupResult.size() > 0) { + packet._features |= QRF_GROUPDATA; + packet.AllocateGroupData(reply.groupResult.size()); + memcpy(packet._groupData, &(reply.groupResult[0]), reply.groupResult.size()); + } + if (reply.useCoverage) { + packet._features |= QRF_COVERAGE; + packet._coverageDocs = reply.coverage.getCovered(); + packet._activeDocs = reply.coverage.getActive(); + } + if (reply.useWideHits) { + packet._features |= QRF_MLD; + } + if (reply.propertiesMap.size() > 0) { + fillPacketProperties(reply.propertiesMap, packet._propsVector); + packet._features |= QRF_PROPERTIES; + } + uint32_t hitCnt = reply.hits.size(); + packet.AllocateHits(hitCnt); + for (uint32_t i = 0; i < hitCnt; ++i) { + packet._hits[i]._gid = reply.hits[i].gid; + packet._hits[i]._metric = reply.hits[i].metric; + packet._hits[i]._partid = reply.hits[i].path; + packet._hits[i].setDistributionKey(reply.hits[i].getDistributionKey()); + } +} + +void +PacketConverter::toDocsumRequest(const GETDOCSUMSX &packet, DocsumRequest &request) +{ + request.setTimeout(packet.getTimeout()); + request.ranking = packet._ranking; + request.queryFlags = packet._qflags; + request.resultClassName = packet._resultClassName; + for (uint32_t i = 0; i < packet._propsVector.size(); ++i) { + const FS4Properties &src = packet._propsVector[i]; + search::fef::Properties &dst = request.propertiesMap.lookupCreate(src.getName()); + for (uint32_t e = 0; e < src.size(); ++e) { + dst.add(vespalib::stringref(src.getKey(e), src.getKeyLen(e)), + vespalib::stringref(src.getValue(e), src.getValueLen(e))); + } + } + request.stackItems = packet._stackItems; + request.stackDump.assign(packet._stackDump.begin(), packet._stackDump.end()); + request.location = packet._location; + request._flags = packet._flags; + request.useWideHits = checkFeature(packet._features, GDF_MLD); + uint32_t hitCnt = packet._docidCnt; + request.hits.resize(hitCnt); + for (uint32_t i = 0; i < hitCnt; ++i) { + request.hits[i].gid = packet._docid[i]._gid; + request.hits[i].path = packet._docid[i]._partid; + } + search::fef::Property sessionId = + request.propertiesMap.rankProperties().lookup("sessionId"); + if (sessionId.found()) { + vespalib::string id = sessionId.get(); + request.sessionId.assign(id.begin(), id.end()); + } +} + +void +PacketConverter::fromDocsumRequest(const DocsumRequest &request, GETDOCSUMSX &packet) +{ + // not needed yet + (void) packet; + (void) request; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::toDocsumReplyElement(const DOCSUM &packet, DocsumReply::Docsum &docsum) +{ + // not needed yet + (void) packet; + (void) docsum; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::fromDocsumReplyElement(const DocsumReply::Docsum &docsum, DOCSUM &packet) +{ + if (docsum.data.get() != 0) { + packet.SetBuf(docsum.data.c_str(), docsum.data.size()); + } + packet.setGid(docsum.gid); +} + +void +PacketConverter::toMonitorRequest(const MONITORQUERYX &packet, MonitorRequest &request) +{ + request.flags = packet._qflags; + if ((packet._qflags & MQFLAG_REPORT_ACTIVEDOCS) != 0) { + request.reportActiveDocs = true; + } +} + +void +PacketConverter::fromMonitorRequest(const MonitorRequest &request, MONITORQUERYX &packet) +{ + // not needed yet + (void) packet; + (void) request; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::toMonitorReply(const MONITORRESULTX &packet, MonitorReply &reply) +{ + // not needed yet + (void) packet; + (void) reply; + LOG_ABORT("not implemented"); +} + +void +PacketConverter::fromMonitorReply(const MonitorReply &reply, MONITORRESULTX &packet) +{ + if (reply.mld) { + packet._features |= MRF_MLD; + } + if (reply.activeDocsRequested) { + packet._features |= MRF_ACTIVEDOCS; + packet._activeDocs = reply.activeDocs; + } + packet._partid = reply.partid; + packet._timestamp = reply.timestamp; + packet._totalNodes = reply.totalNodes; + packet._activeNodes = reply.activeNodes; + packet._totalParts = reply.totalParts; + packet._activeParts = reply.activeParts; + packet._rflags = reply.flags; + if (packet._rflags != 0) { + packet._features |= MRF_RFLAGS; + } +} + +void +PacketConverter::fromTraceReply(const TraceReply &reply, TRACEREPLY &packet) +{ + fillPacketProperties(reply.propertiesMap, packet._propsVector); +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/packetconverter.h b/searchlib/src/vespa/searchlib/engine/packetconverter.h new file mode 100644 index 00000000000..f58bf6a9642 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/packetconverter.h @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchrequest.h" +#include "searchreply.h" +#include "docsumrequest.h" +#include "docsumreply.h" +#include "monitorrequest.h" +#include "monitorreply.h" +#include "tracereply.h" + +namespace search { +namespace engine { + + +/** + * This class helps convert data back and forth between transport + * packets and engine api request/reply objects. All converting + * methods expect the const object to be fully filled out and the + * non-const object to be newly created and thus empty. Half of the + * methods are left unimplemented for now as they would only be needed + * if we also were to use the api to wrap remote engines. However, if + * such a time comes, we will probably not be using the packet + * protocol anymore anyways. + **/ +class PacketConverter +{ +private: + PacketConverter(); // can not be instantiated + PacketConverter(const PacketConverter &); + PacketConverter &operator=(const PacketConverter &); + +public: + typedef search::fs4transport::FS4Packet_QUERYX QUERYX; + typedef search::fs4transport::FS4Packet_QUERYRESULTX QUERYRESULTX; + typedef search::fs4transport::FS4Packet_QUEUELEN QUEUELEN; + typedef search::fs4transport::FS4Packet_ERROR ERROR; + typedef search::fs4transport::FS4Packet_GETDOCSUMSX GETDOCSUMSX; + typedef search::fs4transport::FS4Packet_DOCSUM DOCSUM; + typedef search::fs4transport::FS4Packet_EOL EOL; + typedef search::fs4transport::FS4Packet_MONITORQUERYX MONITORQUERYX; + typedef search::fs4transport::FS4Packet_MONITORRESULTX MONITORRESULTX; + typedef search::fs4transport::FS4Packet_TRACEREPLY TRACEREPLY; + + /** + * Utility conversion from a "fef" set of propertymaps to an array of FS4Properties. + * @return false if no properties were converted. + **/ + static void + fillPacketProperties(const PropertiesMap &source, search::fs4transport::PropsVector& target); + + /** + * Convert from a QUERYX packet to a SearchRequest object. + * + * @param packet transport packet + * @param request api request object + **/ + static void toSearchRequest(const QUERYX &packet, SearchRequest &request); + + /** + * Convert from a SearchRequest object to a QUERYX packet. + * + * (NOT YET IMPLEMENTED) + * + * @param request api request object + * @param packet transport packet + **/ + static void fromSearchRequest(const SearchRequest &request, QUERYX &packet); + + /** + * Convert from a QUERYRESULTX packet to a SearchReply object. + * + * (NOT YET IMPLEMENTED) + * + * @param packet transport packet + * @param reply api reply object + **/ + static void toSearchReply(const QUERYRESULTX &packet, SearchReply &reply); + + /** + * Convert from a SearchReply object to a QUERYRESULTX + * packet. Note that this method only handles the query result + * aspect of the reply, errors and queue length reporting still + * needs to be handled separately by the code using this utility + * method. + * + * @param reply api reply object + * @param packet transport packet + **/ + static void fromSearchReply(const SearchReply &reply, QUERYRESULTX &packet); + + /** + * Convert from a GETDOCSUMSX packet to a DocsumRequest object. + * + * @param packet transport packet + * @param request api request object + **/ + static void toDocsumRequest(const GETDOCSUMSX &packet, DocsumRequest &request); + + /** + * Convert from a DocsumRequest object to a GETDOCSUMSX packet. + * + * (NOT YET IMPLEMENTED) + * + * @param packet transport packet + * @param request api request object + **/ + static void fromDocsumRequest(const DocsumRequest &request, GETDOCSUMSX &packet); + + /** + * Convert from a DOCSUM packet to an entry in a DocsumReply object + * + * (NOT YET IMPLEMENTED) + * + * @param packet transport packet + * @param docsum api reply object element + **/ + static void toDocsumReplyElement(const DOCSUM &packet, DocsumReply::Docsum &docsum); + + /** + * Convert from an entry in a DocsumReply object to a DOCSUM packet. + * + * @param docsum api reply object element + * @param packet transport packet + **/ + static void fromDocsumReplyElement(const DocsumReply::Docsum &docsum, DOCSUM &packet); + + /** + * Convert a MONITORQUERYX packet to a MonitorRequest object. + * + * @param packet transport packet + * @param request api request object + **/ + static void toMonitorRequest(const MONITORQUERYX &packet, MonitorRequest &request); + + /** + * Convert from a MonitorRequest object to a MONITORQUERYX packet + * + * (NOT YET IMPLEMENTED) + * + * @param request api request object + * @param packet transport packet + **/ + static void fromMonitorRequest(const MonitorRequest &request, MONITORQUERYX &packet); + + /** + * Convert from a MONITORRESULTX packet to a MonitorReply object. + * + * (NOT YET IMPLEMENTED) + * + * @param packet transport packet + * @param reply api reply object + **/ + static void toMonitorReply(const MONITORRESULTX &packet, MonitorReply &reply); + + /** + * Convert from a MonitorReply object to a MONITORRESULTX packet. + * + * @param reply api reply object + * @param packet transport packet + **/ + static void fromMonitorReply(const MonitorReply &reply, MONITORRESULTX &packet); + + /** + * Convert from a TraceReply object to a TRACE packet. + * + * @param reply api reply object + * @param packet transport packet + **/ + static void fromTraceReply(const TraceReply &reply, TRACEREPLY &packet); +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp b/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp new file mode 100644 index 00000000000..8f90ebdfc75 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/propertiesmap.cpp @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.propertiesmap"); +#include "propertiesmap.h" + +namespace search { +namespace engine { + +search::fef::Properties PropertiesMap::_emptyProperties; + +search::fef::Properties & +PropertiesMap::lookupCreate(const vespalib::stringref &name) +{ + return _propertiesMap[name]; +} + +const search::fef::Properties & +PropertiesMap::lookup(const vespalib::stringref &name) const +{ + PropsMap::const_iterator pos = _propertiesMap.find(name); + if (pos == _propertiesMap.end()) { + return _emptyProperties; + } + return pos->second; +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/propertiesmap.h b/searchlib/src/vespa/searchlib/engine/propertiesmap.h new file mode 100644 index 00000000000..f6f34512483 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/propertiesmap.h @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace engine { + +/** + * A simple wrapper class used to hold multiple named collections of + * properties. + **/ +class PropertiesMap +{ +private: + typedef search::fef::Properties Props; + typedef vespalib::hash_map PropsMap; + + static Props _emptyProperties; + PropsMap _propertiesMap; + + /** + * Obtain a named collection of properties. This method will + * return an empty collection of properties if the properties did + * not exist. + * + * @param name name of properties + * @return the properties + **/ + const search::fef::Properties &lookup(const vespalib::stringref &name) const; + +public: + typedef PropsMap::const_iterator ITR; + + /** + * Obtain a named collection of properties. This method will + * create the properties if they did not exist yet. + * + * @param name name of properties + * @return the properties + **/ + search::fef::Properties &lookupCreate(const vespalib::stringref &name); + + /** + * Obtain the number of named collection of properties held by + * this object. + * + * @return number of named collections of properties + **/ + uint32_t size() const { return _propertiesMap.size(); } + + /** + * Iterate the map. + * + * @return begin iterator + **/ + ITR begin() const { return _propertiesMap.begin(); } + + /** + * Iterate the map. + * + * @return end iterator + **/ + ITR end() const { return _propertiesMap.end(); } + + /** + * Obtain rank properties (used to tune ranking evaluation) + * + * @return rank properties + **/ + const search::fef::Properties &rankProperties() const { + return lookup(MapNames::RANK); + } + + /** + * Obtain feature overrides (used to hardwire the values of + * features during ranking evaluation) + * + * @return feature overrides + **/ + const search::fef::Properties &featureOverrides() const { + return lookup(MapNames::FEATURE); + } + + /** + * Obtain properties used to define additional highlight terms to + * be used during dynamic summary generation. + * + * @return highlight terms properties + **/ + const search::fef::Properties &highlightTerms() const { + return lookup(MapNames::HIGHLIGHTTERMS); + } + + /** + * Obtain match properties (used to tune match evaluation) + * + * @return match properties + **/ + const search::fef::Properties &matchProperties() const { + return lookup(MapNames::MATCH); + } + + /** + * Obtain cache properties (used to tune cache usage) + * + * @return cache properties + **/ + const search::fef::Properties &cacheProperties() const { + return lookup(MapNames::CACHES); + } + + /** + * Obtain model overrides + * + * @return model properties + **/ + const search::fef::Properties &modelOverrides() const { + return lookup(MapNames::MODEL); + } + +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/request.cpp b/searchlib/src/vespa/searchlib/engine/request.cpp new file mode 100644 index 00000000000..16853281098 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/request.cpp @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "request.h" + +namespace search { +namespace engine { + +Request::Request() : + _startTime(fastos::ClockSystem::now()), + _timeOfDoom(fastos::TimeStamp(fastos::TimeStamp::FUTURE)), + ranking(), + queryFlags(0), + location(), + propertiesMap(), + stackItems(0), + stackDump() +{ +} + +void Request::setTimeout(const fastos::TimeStamp & timeout) +{ + _timeOfDoom = _startTime + timeout; +} + +fastos::TimeStamp Request::getTimeUsed() const +{ + return fastos::TimeStamp(fastos::ClockSystem::now()) - _startTime; +} + +fastos::TimeStamp Request::getTimeLeft() const +{ + return _timeOfDoom - fastos::TimeStamp(fastos::ClockSystem::now()); +} + +Request::~Request() +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/request.h b/searchlib/src/vespa/searchlib/engine/request.h new file mode 100644 index 00000000000..a65f9896f98 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/request.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "propertiesmap.h" + +namespace search { +namespace engine { + +class Request +{ +public: + Request(); + virtual ~Request(); + void setTimeout(const fastos::TimeStamp & timeout); + fastos::TimeStamp getStartTime() const { return _startTime; } + fastos::TimeStamp getTimeOfDoom() const { return _timeOfDoom; } + fastos::TimeStamp getTimeUsed() const; + fastos::TimeStamp getTimeLeft() const; + bool expired() const { return getTimeLeft() > 0l; } + + const vespalib::stringref getStackRef() const { + return vespalib::stringref(&stackDump[0], stackDump.size()); + } + +private: + const fastos::TimeStamp _startTime; + fastos::TimeStamp _timeOfDoom; +public: + /// Everything here should move up to private section and have accessors + vespalib::string ranking; + uint32_t queryFlags; + vespalib::string location; + PropertiesMap propertiesMap; + uint32_t stackItems; + std::vector stackDump; +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/searchapi.h b/searchlib/src/vespa/searchlib/engine/searchapi.h new file mode 100644 index 00000000000..d44dd07ae89 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/searchapi.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchrequest.h" +#include "searchreply.h" + +namespace search { +namespace engine { + +/** + * A search client is the object being notified of the completion of + * an asynchronous search operation. + **/ +class SearchClient +{ +public: + /** + * Invoked by the search server to indicate the completion of an + * asynchronous search operation. + * + * @param reply the search reply + **/ + virtual void searchDone(SearchReply::UP reply) = 0; + + /** + * Empty, needed for subclassing + **/ + virtual ~SearchClient() {} +}; + +/** + * A search server is an object capable of performing a search + * operation. + **/ +class SearchServer +{ +public: + /** + * Initiate a search operation that can be completed either + * synchronously or asynchronously. The return value will indicate + * whether the server selected to perform the operation + * synchronously or asynchronously. If the return value contains + * an object, then the operation completed synchronously and no + * further action will be taken by the server. If the return value + * did not contain an object, the operation will continue + * asynchronously, and the given client will be notified when the + * operation is completed. The server is not allowed to signal an + * asynchronous completion of the operation in the context of this + * method invocation. + * + * @return actual return value if sync, 'null' if async + * @param request object containing request parameters + * @param client the client to be notified of async completion + **/ + virtual SearchReply::UP search(SearchRequest::Source request, SearchClient &client) = 0; + + /** + * Empty, needed for subclassing + **/ + virtual ~SearchServer() {} +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.cpp b/searchlib/src/vespa/searchlib/engine/searchreply.cpp new file mode 100644 index 00000000000..529be053e42 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/searchreply.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.searchreply"); +#include "searchreply.h" +#include + +namespace search { +namespace engine { + +SearchReply::SearchReply() + : valid(true), + offset(0), + _distributionKey(0), + totalHitCount(0), + maxRank(0), + sortIndex(), + sortData(), + groupResult(), + useCoverage(false), + coverage(), + useWideHits(false), + hits(), + errorCode(0), + errorMessage(), + useQueueLen(false), + queueLen(0), + request() +{ +} + +SearchReply::SearchReply(const SearchReply &rhs) + : + valid (rhs.valid), + offset (rhs.offset), + _distributionKey (rhs._distributionKey), + totalHitCount(rhs.totalHitCount), + maxRank (rhs.maxRank), + sortIndex (rhs.sortIndex), + sortData (rhs.sortData), + groupResult (rhs.groupResult), + useCoverage (rhs.useCoverage), + coverage (rhs.coverage), + useWideHits (rhs.useWideHits), + hits (rhs.hits), + errorCode (rhs.errorCode), + errorMessage (rhs.errorMessage), + useQueueLen (rhs.useQueueLen), + queueLen (rhs.queueLen), + request() // NB not copied +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/searchreply.h b/searchlib/src/vespa/searchlib/engine/searchreply.h new file mode 100644 index 00000000000..692806114ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/searchreply.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { +namespace engine { + +class SearchReply +{ +public: + typedef std::unique_ptr UP; + + class Hit + { + public: + Hit() : gid(), metric(0), path(0), _distributionKey(0) {} + void setDistributionKey(uint32_t key) { _distributionKey = key; } + uint32_t getDistributionKey() const { return _distributionKey; } + document::GlobalId gid; + search::HitRank metric; + uint32_t path; // wide + private: + int32_t _distributionKey; // wide + }; + + class Coverage { + public: + Coverage() : _covered(0), _active(0) {} + Coverage(uint64_t active) : _covered(active), _active(active) {} + Coverage(uint64_t active, uint64_t covered) : _covered(covered), _active(active) {} + uint64_t getCovered() const { return _covered; } + uint64_t getActive() const { return _active; } + Coverage & setCovered(uint64_t v) { _covered = v; return *this; } + Coverage & setActive(uint64_t v) { _active = v; return *this; } + private: + uint64_t _covered; + uint64_t _active; + }; + + // set to false to indicate 'talk to the hand' behavior + bool valid; + + // normal results + uint32_t offset; +private: + uint32_t _distributionKey; +public: + uint64_t totalHitCount; + search::HitRank maxRank; + std::vector sortIndex; + std::vector sortData; + vespalib::Array groupResult; + bool useCoverage; + Coverage coverage; + bool useWideHits; + std::vector hits; + PropertiesMap propertiesMap; + + // in case of error + uint32_t errorCode; + vespalib::string errorMessage; + + // piggyback monitoring + bool useQueueLen; + uint32_t queueLen; + SearchRequest::UP request; + + SearchReply(); + SearchReply(const SearchReply &rhs); // for test only + + void setDistributionKey(uint32_t key) { _distributionKey = key; } + uint32_t getDistributionKey() const { return _distributionKey; } +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/searchrequest.cpp b/searchlib/src/vespa/searchlib/engine/searchrequest.cpp new file mode 100644 index 00000000000..06df3c0b764 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/searchrequest.cpp @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.searchrequest"); +#include "searchrequest.h" +#include "packetconverter.h" + +namespace search { +namespace engine { + +SearchRequest::SearchRequest() + : Request(), + offset(0), + maxhits(10), + sortSpec(), + groupSpec(), + sessionId() +{ +} + +void SearchRequest::Source::lazyDecode() const +{ + if ((_request.get() == NULL) && (_fs4Packet != NULL)) { + _request.reset(new SearchRequest()); + PacketConverter::toSearchRequest(*_fs4Packet, *_request); + _fs4Packet->Free(); + _fs4Packet = NULL; + } +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/searchrequest.h b/searchlib/src/vespa/searchlib/engine/searchrequest.h new file mode 100644 index 00000000000..9214aec02f6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/searchrequest.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "propertiesmap.h" +#include "request.h" +#include "source_description.h" +#include + +namespace search { +namespace engine { + +class SearchRequest : public Request +{ +public: + typedef std::unique_ptr UP; + typedef fs4transport::FS4Packet_QUERYX FS4Packet_QUERYX; + + class Source { + private: + mutable std::unique_ptr _request; + mutable FS4Packet_QUERYX *_fs4Packet; + void lazyDecode() const; + const SourceDescription _desc; + public: + + Source(SearchRequest * request) : _request(request), _fs4Packet(NULL), _desc(0) + {} + + Source(FS4Packet_QUERYX *query, SourceDescription desc) : _request(), _fs4Packet(query), _desc(desc) + { + } + + Source(Source && rhs) + : _request(std::move(rhs._request)), + _fs4Packet(rhs._fs4Packet), + _desc(std::move(rhs._desc)) + { + rhs._fs4Packet = NULL; + } + + ~Source() { + if (_fs4Packet != NULL) { + _fs4Packet->Free(); + } + } + + const SearchRequest * operator -> () const { return get(); } + + const SearchRequest * get() const { + lazyDecode(); + return _request.get(); + } + + Source& operator= (Source && rhs) = delete; + Source & operator= (const Source &) = delete; + Source(const Source &) = delete; + + UP release() { + lazyDecode(); + return std::move(_request); + } + }; + typedef std::shared_ptr SP; + + uint32_t offset; + uint32_t maxhits; + vespalib::string sortSpec; + std::vector groupSpec; + std::vector sessionId; + + SearchRequest(); + + const vespalib::stringref getStackRef() const { + return vespalib::stringref(&stackDump[0], stackDump.size()); + } +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/source_description.cpp b/searchlib/src/vespa/searchlib/engine/source_description.cpp new file mode 100644 index 00000000000..91feb25b0ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/source_description.cpp @@ -0,0 +1,11 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "source_description.h" + +namespace search { +namespace engine { + +const vespalib::string SourceDescription::protocol("FS4"); + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/source_description.h b/searchlib/src/vespa/searchlib/engine/source_description.h new file mode 100644 index 00000000000..52c65d35dee --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/source_description.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once +#include + +namespace search { +namespace engine { + +struct SourceDescription { + int listenPort; + static const vespalib::string protocol; + SourceDescription(int port) : listenPort(port) {} +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/tracereply.h b/searchlib/src/vespa/searchlib/engine/tracereply.h new file mode 100644 index 00000000000..2d1cdf7ab61 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/tracereply.h @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "propertiesmap.h" + +namespace search { +namespace engine { + +struct TraceReply +{ + PropertiesMap propertiesMap; +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp b/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp new file mode 100644 index 00000000000..c254787244a --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/transport_metrics.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.transportmetrics"); +#include "transport_metrics.h" + +namespace search { +namespace engine { + +TransportMetrics::QueryMetrics::QueryMetrics(metrics::MetricSet *parent) + : metrics::MetricSet("query", "", "Query metrics", parent), + count("count", "logdefault", "Query requests handled", this), + latency("latency", "logdefault", "Query request latency", this) +{ +} + +TransportMetrics::DocsumMetrics::DocsumMetrics(metrics::MetricSet *parent) + : metrics::MetricSet("docsum", "", "Docsum metrics", parent), + count("count", "logdefault", "Docsum requests handled", this), + docs("docs", "logdefault", "Total docsums returned", this), + latency("latency", "logdefault", "Docsum request latency", this) +{ +} + +TransportMetrics::TransportMetrics() + : metrics::MetricSet("transport", "", "Transport server metrics", 0), + updateLock(), + query(this), + docsum(this) +{ +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/transport_metrics.h b/searchlib/src/vespa/searchlib/engine/transport_metrics.h new file mode 100644 index 00000000000..fa62460434c --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/transport_metrics.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace engine { + +struct TransportMetrics : metrics::MetricSet +{ + struct QueryMetrics : metrics::MetricSet { + metrics::LongCountMetric count; + metrics::DoubleAverageMetric latency; + + QueryMetrics(metrics::MetricSet *parent); + }; + + struct DocsumMetrics : metrics::MetricSet { + metrics::LongCountMetric count; + metrics::LongCountMetric docs; + metrics::DoubleAverageMetric latency; + + DocsumMetrics(metrics::MetricSet *parent); + }; + + vespalib::Lock updateLock; + QueryMetrics query; + DocsumMetrics docsum; + + TransportMetrics(); +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/engine/transportserver.cpp b/searchlib/src/vespa/searchlib/engine/transportserver.cpp new file mode 100644 index 00000000000..cdde798b579 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/transportserver.cpp @@ -0,0 +1,427 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".engine.transportserver"); +#include +#include +#include "packetconverter.h" +#include "transportserver.h" + +namespace search { +namespace engine { + +//----------------------------------------------------------------------------- + +typedef search::fs4transport::FS4PersistentPacketStreamer PacketStreamer; + +//----------------------------------------------------------------------------- + +void +TransportServer::SearchHandler::start() +{ + SearchReply::UP reply = parent._searchServer.search(std::move(request), *this); + if (reply.get() != 0) { + searchDone(std::move(reply)); + } +} + +void +TransportServer::SearchHandler::searchDone(SearchReply::UP reply) +{ + if (reply.get() != 0) { + const SearchReply &r = *reply; + if (r.valid) { + if (r.useQueueLen) { + PacketConverter::QUEUELEN *p = new PacketConverter::QUEUELEN(); + p->_queueLen = r.queueLen; + p->_dispatchers = clientCnt; + if (shouldLog(DEBUG_SEARCH)) { + logPacket("outgoing packet", p, 0, channel->GetConnection()); + } + channel->GetConnection()->PostPacket(p, FNET_NOID); + } + if (r.errorCode == 0) { + PacketConverter::QUERYRESULTX *p = new PacketConverter::QUERYRESULTX(); + PacketConverter::fromSearchReply(r, *p); + p->UpdateCompatPCODE(); + if (shouldLog(DEBUG_SEARCH)) { + logPacket("outgoing packet", p, channel, 0); + } + channel->Send(p); + } else { + PacketConverter::ERROR *p = new PacketConverter::ERROR(); + p->_errorCode = r.errorCode; + p->setErrorMessage(r.errorMessage); + if (shouldLog(DEBUG_SEARCH)) { + logPacket("outgoing packet", p, channel, 0); + } + channel->Send(p); + } + if (r.request.get() != NULL) { + parent.updateQueryMetrics(r.request->getTimeUsed().sec()); // possible thread issue + } + } else { + PacketConverter::EOL *p = new PacketConverter::EOL(); + if (shouldLog(DEBUG_SEARCH)) { + logPacket("outgoing packet", p, channel, 0); + } + channel->Send(p); + } + } else { + LOG(warning, "got search reply from back-end"); + } + delete this; // we are done +} + +TransportServer::SearchHandler::~SearchHandler() +{ + channel->Free(); +} + +//----------------------------------------------------------------------------- + +void +TransportServer::DocsumHandler::start() +{ + DocsumReply::UP reply = parent._docsumServer.getDocsums(std::move(request), *this); + if (reply.get() != 0) { + getDocsumsDone(std::move(reply)); + } +} + +void +TransportServer::DocsumHandler::getDocsumsDone(DocsumReply::UP reply) +{ + if (reply.get() != 0) { + const DocsumReply &r = *reply; + for (uint32_t i = 0; i < r.docsums.size(); ++i) { + PacketConverter::DOCSUM *p = new PacketConverter::DOCSUM(); + PacketConverter::fromDocsumReplyElement(r.docsums[i], *p); + if (shouldLog(DEBUG_DOCSUM)) { + logPacket("outgoing packet", p, channel, 0); + } + channel->Send(p); + } + PacketConverter::EOL *p = new PacketConverter::EOL(); + if (shouldLog(DEBUG_DOCSUM)) { + logPacket("outgoing packet", p, channel, 0); + } + channel->Send(p); + if (r.request.get() != NULL) { + parent.updateDocsumMetrics(r.request->getTimeUsed().sec(), + r.docsums.size()); // possible thread issue + } + } else { + LOG(warning, "got docsum reply from back-end"); + } + delete this; // we are done +} + +TransportServer::DocsumHandler::~DocsumHandler() +{ + channel->Free(); +} + +//----------------------------------------------------------------------------- + +void +TransportServer::MonitorHandler::start() +{ + MonitorReply::UP reply = parent._monitorServer.ping(std::move(request), *this); + if (reply.get() != 0) { + pingDone(std::move(reply)); + } +} + +void +TransportServer::MonitorHandler::pingDone(MonitorReply::UP reply) +{ + if (reply.get() != 0) { + const MonitorReply &r = *reply; + PacketConverter::MONITORRESULTX *p = new PacketConverter::MONITORRESULTX(); + PacketConverter::fromMonitorReply(r, *p); + p->UpdateCompatPCODE(); + if (shouldLog(DEBUG_MONITOR)) { + logPacket("outgoing packet", p, 0, connection); + } + connection->PostPacket(p, FNET_NOID); + } else { + LOG(warning, "got monitor reply from back-end"); + } + delete this; // we are done +} + +TransportServer::MonitorHandler::~MonitorHandler() +{ + connection->SubRef(); +} + +//----------------------------------------------------------------------------- + +FNET_IPacketHandler::HP_RetCode +TransportServer::HandlePacket(FNET_Packet *packet, FNET_Context context) +{ + uint32_t pcode = packet->GetPCODE(); + FNET_Channel *channel = context._value.CHANNEL; + HP_RetCode rc = FNET_FREE_CHANNEL; + + if (channel->GetID() == FNET_NOID) { // admin packet + if (packet->IsChannelLostCMD()) { + _clients.erase(channel); + if (shouldLog(DEBUG_CONNECTION)) { + LOG(debug, "connection closed: tag=%u", channel->GetConnection()->GetContext()._value.INT); + } + } else if (pcode == search::fs4transport::PCODE_MONITORQUERYX) { + const PacketConverter::MONITORQUERYX &mqx = static_cast(*packet); + if (shouldLog(DEBUG_MONITOR)) { + logPacket("incoming packet", packet, channel, 0); + } + MonitorRequest::UP req(new MonitorRequest()); + PacketConverter::toMonitorRequest(mqx, *req); + channel->GetConnection()->AddRef(); + _pending.push(new MonitorHandler(*this, std::move(req), channel->GetConnection())); + rc = FNET_KEEP_CHANNEL; + } else if (shouldLog(DEBUG_UNHANDLED)) { + logPacket("unhandled packet", packet, channel, 0); + } + } else { // search/docsum request + if (pcode == search::fs4transport::PCODE_QUERYX) { + PacketConverter::QUERYX * qx = static_cast(packet); + if (shouldLog(DEBUG_SEARCH)) { + logPacket("incoming packet", packet, channel, 0); + } + SearchRequest::Source req(qx, _sourceDesc); + packet = NULL; + _pending.push(new SearchHandler(*this, std::move(req), channel, _clients.size())); + rc = FNET_CLOSE_CHANNEL; + } else if (pcode == search::fs4transport::PCODE_GETDOCSUMSX) { + PacketConverter::GETDOCSUMSX * gdx = static_cast(packet); + if (shouldLog(DEBUG_DOCSUM)) { + logPacket("incoming packet", packet, channel, 0); + } + DocsumRequest::Source req(gdx, _sourceDesc); + packet = NULL; + _pending.push(new DocsumHandler(*this, std::move(req), channel)); + rc = FNET_CLOSE_CHANNEL; + } else if (shouldLog(DEBUG_UNHANDLED)) { + logPacket("unhandled packet", packet, channel, 0); + } + } + if (packet != NULL) { + packet->Free(); + } + return rc; +} + +bool +TransportServer::InitAdminChannel(FNET_Channel *channel) +{ + if (_listener == NULL) { + // handle race where we get an incoming connection and + // disables listening at the 'same time'. Note that sync close + // is only allowed in the InitAdminChannel method + channel->GetConnection()->Close(); // sync close + return false; + } + channel->SetContext(channel); + channel->SetHandler(this); + assert(_clients.count(channel) == 0); + _clients.insert(channel); + channel->GetConnection()->SetContext(FNET_Context(++_connTag)); + if (shouldLog(DEBUG_CONNECTION)) { + LOG(debug, "connection established: tag=%u", _connTag); + } + return true; +} + +bool +TransportServer::InitChannel(FNET_Channel *channel, uint32_t pcode) +{ + channel->SetContext(channel); + channel->SetHandler(this); + if (shouldLog(DEBUG_CHANNEL)) { + LOG(debug, "new channel: id=%u, first pcode=%u", channel->GetID(), pcode); + } + return true; +} + +void +TransportServer::Run(FastOS_ThreadInterface *, void *) +{ + _dispatchTask.ScheduleNow(); + _ready = true; + _transport.Main(); // <- transport event loop + _dispatchTask.Kill(); + _listenTask.Kill(); + discardRequests(); +} + +bool +TransportServer::updateListen() +{ + bool doListen = _doListen; + if (doListen) { + if (_listener == NULL) { // start listening + _listener = _transport.Listen(_listenSpec.c_str(), &PacketStreamer::Instance, this); + if (_listener == NULL) { + LOG(error, "Could not bind fnet transport socket to %s", _listenSpec.c_str()); + _failed = true; + return false; + } + } + } else { + if (_listener != NULL) { // stop listening + _transport.Close(_listener); // async close + _listener->SubRef(); + _listener = NULL; + // also close client connections + std::set::iterator it = _clients.begin(); + for (; it != _clients.end(); ++it) { + _transport.Close((*it)->GetConnection()); // async close + } + } + } + return true; +} + +void +TransportServer::dispatchRequests() +{ + while (!_pending.empty()) { + Handler *h = _pending.front(); + _pending.pop(); + h->start(); + } +} + +void +TransportServer::discardRequests() +{ + while (!_pending.empty()) { + Handler *h = _pending.front(); + _pending.pop(); + delete h; + } +} + +void +TransportServer::logPacket(const vespalib::stringref &msg, FNET_Packet *p, FNET_Channel *ch, FNET_Connection *conn) +{ + uint32_t chid = -1; + uint32_t conntag = -1; + vespalib::string str; + if (ch != 0) { + chid = ch->GetID(); + conntag = ch->GetConnection()->GetContext()._value.INT; + } else if (conn != 0) { + conntag = conn->GetContext()._value.INT; + } + search::fs4transport::FS4Packet *fs4p = dynamic_cast(p); + if (fs4p != 0) { + str = fs4p->toString(0); + } else { + str = vespalib::make_string("packet { pcode=%u }", p->GetPCODE()); + } + LOG(debug, "%s (chid=%u, conn=%u):\n%s", msg.c_str(), chid, conntag, str.c_str()); +} + +void +TransportServer::updateQueryMetrics(double latency_s) +{ + vespalib::LockGuard guard(_metrics.updateLock); + _metrics.query.count.inc(); + _metrics.query.latency.set(latency_s); +} + +void +TransportServer::updateDocsumMetrics(double latency_s, uint32_t numDocs) +{ + vespalib::LockGuard guard(_metrics.updateLock); + _metrics.docsum.count.inc(); + _metrics.docsum.docs.inc(numDocs); + _metrics.docsum.latency.set(latency_s); +} + +//----------------------------------------------------------------------------- + +bool +TransportServer::shouldLog(uint32_t msgType) { + return (((msgType & _debugMask) != 0) + && ((msgType != DEBUG_MONITOR && LOG_WOULD_LOG(debug)) || + (msgType == DEBUG_MONITOR && LOG_WOULD_LOG(spam)))); +} + +TransportServer::TransportServer(SearchServer &searchServer, + DocsumServer &docsumServer, + MonitorServer &monitorServer, + int port, uint32_t debugMask) + : _searchServer(searchServer), + _docsumServer(docsumServer), + _monitorServer(monitorServer), + _transport(), + _ready(false), + _failed(false), + _doListen(true), + _threadPool(256 * 1024), + _sourceDesc(port), + _listenSpec(), + _listener(0), + _clients(), + _pending(), + _dispatchTask(*this), + _listenTask(*this), + _connTag(0), + _debugMask(debugMask), + _metrics() +{ + _listenSpec = vespalib::make_string("tcp/%d", port); +} + +bool +TransportServer::start() +{ + if (!updateListen()) { + return false; + } + if (_threadPool.NewThread(this) == 0) { + LOG(error, "Could not start internal transport thread"); + _failed = true; + return false; + } + return true; +} + +int +TransportServer::getListenPort() +{ + struct Cmd : public FNET_IExecutable { + TransportServer &server; + vespalib::Gate done; + int port; + Cmd(TransportServer &s) : server(s), done(), port(-1) {} + virtual void execute() { + if (server._listener != 0) { + port = server._listener->GetPortNumber(); + } + done.countDown(); + } + }; + Cmd cmd(*this); + if (_transport.execute(&cmd)) { + cmd.done.await(); + } + return cmd.port; +}; + +TransportServer::~TransportServer() +{ + shutDown(); // ensure shutdown + if (_listener != 0) { + _listener->SubRef(); + _listener = 0; + } +} + +} // namespace engine +} // namespace search diff --git a/searchlib/src/vespa/searchlib/engine/transportserver.h b/searchlib/src/vespa/searchlib/engine/transportserver.h new file mode 100644 index 00000000000..a7de33093e2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/engine/transportserver.h @@ -0,0 +1,334 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "transport_metrics.h" +#include "source_description.h" + +namespace search { +namespace engine { + + +/** + * Common transport server implementation interacting with the + * underlying search engine using the common search api. This + * implementation has less optimization tricks compared to the + * previous ones being integrated into specific applications. + **/ +class TransportServer : public FastOS_Runnable, + public FNET_IServerAdapter, + public FNET_IPacketHandler +{ +private: + TransportServer(const TransportServer &); + TransportServer &operator=(const TransportServer &); + + /** + * Task used to update listen status + **/ + struct ListenTask : public FNET_Task + { + TransportServer &parent; + ListenTask(TransportServer &p) : FNET_Task(p._transport.GetScheduler()), parent(p) {} + virtual void PerformTask() { parent.updateListen(); } + }; + + /** + * Task used to dispatch incoming requests in an untangled way + * (aka not in the packet callback). + **/ + struct DispatchTask : public FNET_Task + { + TransportServer &parent; + DispatchTask(TransportServer &p) : FNET_Task(p._transport.GetScheduler()), parent(p) {} + virtual void PerformTask() { + parent.dispatchRequests(); + ScheduleNow(); // run each tick + } + }; + + class Handler; + + SearchServer &_searchServer; + DocsumServer &_docsumServer; + MonitorServer &_monitorServer; + FNET_Transport _transport; + bool _ready; // flag indicating initial readyness + bool _failed; // flag indicating a critical failure + bool _doListen; // flag telling us to accept requests or not + FastOS_ThreadPool _threadPool; // thread pool owning transport thread + SourceDescription _sourceDesc; // description of where requests are coming from + vespalib::string _listenSpec; // where to listen; FNET connect spec + FNET_Connector *_listener; // object accepting incoming connections + std::set _clients; // the admin channel of all client connections + std::queue _pending; // queue of incoming requests not yet started + DispatchTask _dispatchTask; // task used to dispatch incoming requests + ListenTask _listenTask; // task used to update listen status + uint32_t _connTag; // sequential number used to tag connections + uint32_t _debugMask; // enable more debug logging with this + TransportMetrics _metrics; // metrics for this transport server + + /** + * Toplevel class used to wrap incoming requests. Actual objects + * are used both to delay starting the request until we are not in + * the packet delivery callback and also as the callback target + * used by the underlying api objects to notify completion of + * individual requests. + **/ + struct Handler + { + TransportServer &parent; + uint32_t _debugMask; + Handler(TransportServer &p) : parent(p), _debugMask(p._debugMask) {} + bool shouldLog(uint32_t msgType) { return parent.shouldLog(msgType); } // possible thread issue + virtual void start() = 0; + virtual ~Handler() {} + private: + Handler(const Handler &rhs); + Handler &operator=(const Handler &rhs); + }; + + /** + * Wrapper for search requests + **/ + struct SearchHandler : public Handler, + public SearchClient + { + SearchRequest::Source request; + FNET_Channel *channel; + uint32_t clientCnt; + + SearchHandler(TransportServer &p, SearchRequest::Source req, FNET_Channel *ch, uint32_t cnt) + : Handler(p), request(std::move(req)), channel(ch), clientCnt(cnt) {} + virtual void start(); + virtual void searchDone(SearchReply::UP reply); + virtual ~SearchHandler(); + }; + + /** + * Wrapper for docsum requests + **/ + struct DocsumHandler : public Handler, + public DocsumClient + { + DocsumRequest::Source request; + FNET_Channel *channel; + + DocsumHandler(TransportServer &p, DocsumRequest::Source req, FNET_Channel *ch) + : Handler(p), request(std::move(req)), channel(ch) {} + virtual void start(); + virtual void getDocsumsDone(DocsumReply::UP reply); + virtual ~DocsumHandler(); + }; + + /** + * Wrapper for monitor requests + **/ + struct MonitorHandler : public Handler, + public MonitorClient + { + MonitorRequest::UP request; + FNET_Connection *connection; + + MonitorHandler(TransportServer &p, MonitorRequest::UP req, FNET_Connection *conn) + : Handler(p), request(std::move(req)), connection(conn) {} + virtual void start(); + virtual void pingDone(MonitorReply::UP reply); + virtual ~MonitorHandler(); + }; + + // handle incoming network packets + virtual HP_RetCode HandlePacket(FNET_Packet *packet, FNET_Context context); + + // set up admin channel for new clients + virtual bool InitAdminChannel(FNET_Channel *channel); + + // set up channel for individual request + virtual bool InitChannel(FNET_Channel *channel, uint32_t pcode); + + // entry point for thread running transport thread + virtual void Run(FastOS_ThreadInterface *thisThread, void *arg); + + // update listen status + bool updateListen(); + + // dispatch incoming requests + void dispatchRequests(); + + // discard any pending requests during shutdown + void discardRequests(); + + // convenience method used to log packets + static void logPacket(const vespalib::stringref &msg, FNET_Packet *p, FNET_Channel *ch, FNET_Connection *conn); + + void updateQueryMetrics(double latency_s); + void updateDocsumMetrics(double latency_s, uint32_t numDocs); + +public: + /** + * Convenience typedes. + */ + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + /** no debug logging flags set **/ + const static uint32_t DEBUG_NONE = 0x00000000; + + /** log connect disconnect from clients **/ + const static uint32_t DEBUG_CONNECTION = 0x00000001; + + /** log channel open events **/ + const static uint32_t DEBUG_CHANNEL = 0x00000002; + + /** log search related packets **/ + const static uint32_t DEBUG_SEARCH = 0x00000004; + + /** log docsum related packets **/ + const static uint32_t DEBUG_DOCSUM = 0x00000008; + + /** log monitor related packets **/ + const static uint32_t DEBUG_MONITOR = 0x00000010; + + /** log unhandled packets **/ + const static uint32_t DEBUG_UNHANDLED = 0x00000020; + + /** all debug logging flags set **/ + const static uint32_t DEBUG_ALL = 0x0000003f; + + /** + * Check if we should log a debug message + * + * @return true if we should log a message for this event + * @param msgType the event we might want to log + **/ + bool shouldLog(uint32_t msgType); + + /** + * Create a transport server based on the given underlying api + * objects. An appropriate debug mask can be made by or'ing + * together the appropriate DEBUG_ constants defined in this + * class. + * + * @param searchServer search api + * @param docsumServer docsum api + * @param monitorServer monitor api + * @param port listen port. + * @param debugMask mask indicating what information should be logged as debug messages. + **/ + TransportServer(SearchServer &searchServer, + DocsumServer &docsumServer, + MonitorServer &monitorServer, + int port, uint32_t debugMask = DEBUG_NONE); + + /** + * Obtain the metrics used by this transport server. + * + * @return internal metrics + **/ + TransportMetrics &getMetrics() { return _metrics; } + + /** + * Obtain the listen spec used by this transport server + * + * @return listen spec + **/ + const vespalib::string &getListenSpec() const { return _listenSpec; } + + /** + * Start this server. + * + * @return success(true)/failure(false) + **/ + bool start(); + + /** + * Check for initial readyness. + * + * @return true if we are ready. + **/ + bool isReady() const { return _ready; } + + /** + * Check if a critical error has occurred. + * + * @return true if something bad has happened. + **/ + bool isFailed() const { return _failed; } + + /** + * Get a reference to the internal fnet scheduler. + * + * @return fnet scheduler + **/ + FNET_Scheduler &getScheduler() { return *(_transport.GetScheduler()); } + + /** + * Set a flag indicating whether we should accept incoming + * requests or not. Setting the flag to false will make this + * server unavailable to any client application. + * + * @param listen flag indicating if we should listen + **/ + void setListen(bool listen) { + _doListen = listen; + _listenTask.ScheduleNow(); + } + + /** + * Check which port this server is currently listening to. This + * method is useful when using automatically allocated port + * numbers (listening to port 0). + * + * @return current listening port number, -1 if not listening. + **/ + int getListenPort(); + + /** + * Enable or disable nagles algorithm. + * + * @param noDelay set to true to disable nagles algorithm + **/ + void setTCPNoDelay(bool noDelay) { _transport.SetTCPNoDelay(noDelay); } + + /** + * Enable or disable the use of a Q for throughput between search thread and network thread. + * + * @param directWrite bypasses Q + **/ + void setDirectWrite(bool directWrite) { _transport.SetDirectWrite(directWrite); } + + /** + * Set a limit on how long a connection may be idle before closing it. + * + * @param millisecs max idle time in milliseconds + **/ + void setIdleTimeout(double millisecs) { _transport.SetIOCTimeOut((uint32_t) millisecs); } + + /** + * Shut down this component. This method will block until the + * transport server has been shut down. After this method returns, + * no new requests will be generated by this component. + **/ + void shutDown() { + _transport.ShutDown(false); + _threadPool.Close(); + } + + /** + * Destructor will perform shutdown if needed. + **/ + virtual ~TransportServer(); +}; + +} // namespace engine +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/expression/.gitignore b/searchlib/src/vespa/searchlib/expression/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/expression/CMakeLists.txt b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt new file mode 100644 index 00000000000..8184765167d --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_expression OBJECT + SOURCES + perdocexpression.cpp + expressiontree.cpp + timestamp.cpp + bucketresultnode.cpp + integerbucketresultnode.cpp + floatbucketresultnode.cpp + stringbucketresultnode.cpp + rawbucketresultnode.cpp + fixedwidthbucketfunctionnode.cpp + rangebucketpredef.cpp + resultvector.cpp + catserializer.cpp + strcatserializer.cpp + documentfieldnode.cpp + attributenode.cpp + zcurve.cpp + ucafunctionnode.cpp + debugwaitfunctionnode.cpp + mathfunctionnode.cpp + numericfunctionnode.cpp + resultnode.cpp + interpolatedlookupfunctionnode.cpp + functionnodes.cpp + resultnodes.cpp + arrayatlookupfunctionnode.cpp + arrayoperationnode.cpp + aggregationrefnode.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/expression/OWNERS b/searchlib/src/vespa/searchlib/expression/OWNERS new file mode 100644 index 00000000000..1037590124e --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/OWNERS @@ -0,0 +1 @@ +balder diff --git a/searchlib/src/vespa/searchlib/expression/addfunctionnode.h b/searchlib/src/vespa/searchlib/expression/addfunctionnode.h new file mode 100644 index 00000000000..c2fc34e99be --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/addfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class AddFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(AddFunctionNode); + AddFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenSum(result); } + virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp new file mode 100644 index 00000000000..52774fa7234 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.cpp @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +#include + +namespace search { +namespace expression { + +using namespace vespalib; + +IMPLEMENT_EXPRESSIONNODE(AggregationRefNode, ExpressionNode); + +AggregationRefNode::AggregationRefNode(const AggregationRefNode & rhs) : + ExpressionNode(), + _index(rhs._index), + _expressionNode(NULL) +{ +} + +AggregationRefNode & AggregationRefNode::operator = (const AggregationRefNode & expr) +{ + if (this != &expr) { + _index = expr._index; + _expressionNode = NULL; + } + return *this; +} + +bool AggregationRefNode::onExecute() const +{ + if (_expressionNode != NULL) { + return _expressionNode->execute(); + } + return false; +} + +void AggregationRefNode::locateExpression(ExpressionNodeArray & exprVec) const +{ + if (_expressionNode == NULL) { + _expressionNode = static_cast(exprVec[_index].get()); + if (_expressionNode == NULL) { + throw std::runtime_error(make_string("Failed locating expression for index '%d'", _index)); + } + } +} + +Serializer & AggregationRefNode::onSerialize(Serializer & os) const +{ + return os << _index; +} + +Deserializer & AggregationRefNode::onDeserialize(Deserializer & is) +{ + return is >> _index; +} + +void +AggregationRefNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "index", _index); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_expressionrefnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h new file mode 100644 index 00000000000..190682cb534 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/aggregationrefnode.h @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +class AggregationRefNode : public ExpressionNode +{ +public: + DECLARE_NBO_SERIALIZE; + class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + Configure(ExpressionNodeArray & exprVec) : _exprVec(exprVec) { } + private: + virtual void execute(vespalib::Identifiable &obj) { static_cast(obj).locateExpression(_exprVec); } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AggregationRefNode::classId); } + ExpressionNodeArray & _exprVec; + }; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + DECLARE_EXPRESSIONNODE(AggregationRefNode); + AggregationRefNode() : _index(0), _expressionNode(NULL) { } + AggregationRefNode(uint32_t index) : _index(index), _expressionNode(NULL) { } + AggregationRefNode(const AggregationRefNode & rhs); + AggregationRefNode & operator = (const AggregationRefNode & exprref); + + ExpressionNode *getExpression() { return _expressionNode; } + virtual const ResultNode & getResult() const { return _expressionNode->getResult(); } + virtual void onPrepare(bool preserveAccurateTypes) { _expressionNode->prepare(preserveAccurateTypes); } + virtual bool onExecute() const; + +private: + void locateExpression(ExpressionNodeArray & exprVec) const; + + uint32_t _index; + mutable ExpressionNode *_expressionNode; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/andfunctionnode.h b/searchlib/src/vespa/searchlib/expression/andfunctionnode.h new file mode 100644 index 00000000000..cc8d89c669c --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/andfunctionnode.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class AndFunctionNode : public BitFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(AndFunctionNode); + AndFunctionNode() { } +private: + virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(-1)); } + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenAnd(result); } + + virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp new file mode 100644 index 00000000000..b112aa90969 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.cpp @@ -0,0 +1,163 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(ArrayAtLookup, UnaryFunctionNode); + +ArrayAtLookup::ArrayAtLookup() +{ +} + +ArrayAtLookup::~ArrayAtLookup() +{ +} + +ArrayAtLookup::ArrayAtLookup(const vespalib::string &attribute, + const ExpressionNode::CP &arg) + : UnaryFunctionNode(arg), + _attributeName(attribute) +{ +} + +ArrayAtLookup::ArrayAtLookup(const search::attribute::IAttributeVector &attr, + const ExpressionNode::CP &indexArg) + : UnaryFunctionNode(indexArg), + _attributeName(attr.getName()), + _attribute(&attr) +{ +} + + +ArrayAtLookup::ArrayAtLookup(const ArrayAtLookup &rhs) : + UnaryFunctionNode(rhs), + _attributeName(rhs._attributeName), + _attribute(rhs._attribute), + _docId(rhs._docId), + _basicAttributeType(rhs._basicAttributeType) +{ + // why? + _docId = 0; +} + +ArrayAtLookup & ArrayAtLookup::operator= (const ArrayAtLookup &rhs) +{ + if (this != &rhs) { + UnaryFunctionNode::operator =(rhs); + _attributeName = rhs._attributeName; + _attribute = rhs._attribute; + // _docId = rhs._docId; + _docId = 0; + _basicAttributeType = rhs._basicAttributeType; + } + return *this; +} + +void ArrayAtLookup::onPrepareResult() +{ + if (_attribute->isIntegerType()) { + _basicAttributeType = BAT_INT; + setResultType(std::unique_ptr(new Int64ResultNode())); + } else if (_attribute->isFloatingPointType()) { + _basicAttributeType = BAT_FLOAT; + setResultType(std::unique_ptr(new FloatResultNode())); + } else { + _basicAttributeType = BAT_STRING; + setResultType(std::unique_ptr(new StringResultNode())); + } +} + +bool ArrayAtLookup::onExecute() const +{ + getArg().execute(); + int64_t idx = getArg().getResult().getInteger(); + // get attribute data + size_t numValues = _attribute->getValueCount(_docId); + if (idx < 0) { + idx = 0; + } + if (idx >= (int64_t)numValues) { + idx = numValues - 1; + } + + if (_basicAttributeType == BAT_FLOAT) { + std::vector wVector; + wVector.resize(numValues); + _attribute->get(_docId, &wVector[0], numValues); + std::vector tmp; + tmp.resize(numValues); + for (size_t i = 0; i < numValues; ++i) { + tmp[i] = wVector[i].getValue(); + } + double result = 0; + if (idx >= 0 && idx < (int64_t)numValues) { + result = tmp[idx]; + } + static_cast(updateResult()).set(result); + } else if (_basicAttributeType == BAT_INT) { + std::vector wVector; + wVector.resize(numValues); + _attribute->get(_docId, &wVector[0], numValues); + std::vector tmp; + tmp.resize(numValues); + for (size_t i = 0; i < numValues; ++i) { + tmp[i] = wVector[i].getValue(); + } + int64_t result = 0; + if (idx >= 0 && idx < (int64_t)numValues) { + result = tmp[idx]; + } + static_cast(updateResult()).set(result); + } else { + std::vector wVector; + wVector.resize(numValues); + _attribute->get(_docId, &wVector[0], numValues); + std::vector tmp; + tmp.resize(numValues); + for (size_t i = 0; i < numValues; ++i) { + tmp[i] = wVector[i].getValue(); + } + vespalib::string result; + if (idx >= 0 && idx < (int64_t)numValues) { + result = tmp[idx]; + } + static_cast(updateResult()).set(result); + } + return true; +} + +void ArrayAtLookup::wireAttributes(const search::attribute::IAttributeContext & attrCtx) +{ + _attribute = attrCtx.getAttribute(_attributeName); + if (_attribute == NULL) { + throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str())); + } +} + +Serializer & ArrayAtLookup::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + os << _attributeName; + return os; +} + +Deserializer & ArrayAtLookup::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + is >> _attributeName; + return is; +} + +} // namespace expression +} // namespace search diff --git a/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h new file mode 100644 index 00000000000..271543126da --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/arrayatlookupfunctionnode.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ArrayAtLookup : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ArrayAtLookup); + DECLARE_NBO_SERIALIZE; + + ArrayAtLookup(); + ~ArrayAtLookup(); + + ArrayAtLookup(const vespalib::string &attribute, + const ExpressionNode::CP & arg); + + ArrayAtLookup(const search::attribute::IAttributeVector &attr, + const ExpressionNode::CP &indexArg); + + ArrayAtLookup(const ArrayAtLookup &rhs); + + ArrayAtLookup & operator= (const ArrayAtLookup &rhs); + + void setDocId(DocId docId) { _docId = docId; } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); + virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx); + + enum BasicAttributeType { + BAT_INT, BAT_FLOAT, BAT_STRING + }; + + vespalib::string _attributeName = vespalib::string(); + const search::attribute::IAttributeVector * _attribute = 0; + DocId _docId = 0; + BasicAttributeType _basicAttributeType = BAT_STRING; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp new file mode 100644 index 00000000000..eccc1dfe02f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.cpp @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "arrayoperationnode.h" +#include + +namespace search { +namespace expression { + +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ArrayOperationNode, FunctionNode); + +ArrayOperationNode::ArrayOperationNode() + : FunctionNode(), _attributeName(), _attribute(0), _docId(0) +{} + +ArrayOperationNode::ArrayOperationNode(const ArrayOperationNode& rhs) + : FunctionNode(), + _attributeName(rhs._attributeName), + _attribute(rhs._attribute), + _docId(0) +{} + +// for unit testing +ArrayOperationNode::ArrayOperationNode(IAttributeVector &attr) + : FunctionNode(), + _attributeName(attr.getName()), + _attribute(&attr), + _docId(0) +{} + +ArrayOperationNode& +ArrayOperationNode::operator= (const ArrayOperationNode& rhs) +{ + _attributeName = rhs._attributeName; + _attribute = rhs._attribute; + _docId = 0; + return *this; +} + +void +ArrayOperationNode::wireAttributes(const IAttributeContext &attrCtx) +{ + _attribute = attrCtx.getAttribute(_attributeName); + if (_attribute == NULL) { + throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str())); + } +} + +using vespalib::Serializer; +using vespalib::Deserializer; + +Serializer & ArrayOperationNode::onSerialize(Serializer & os) const +{ + FunctionNode::onSerialize(os); + os << _attributeName; + return os; +} + +Deserializer & ArrayOperationNode::onDeserialize(Deserializer & is) +{ + FunctionNode::onDeserialize(is); + is >> _attributeName; + return is; +} + +} // namespace expression +} // namespace search diff --git a/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h new file mode 100644 index 00000000000..6afcbdcccaf --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/arrayoperationnode.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +class ArrayOperationNode : public FunctionNode +{ +public: + typedef search::attribute::IAttributeVector IAttributeVector; + typedef search::attribute::IAttributeContext IAttributeContext; + + DECLARE_NBO_SERIALIZE; + DECLARE_ABSTRACT_EXPRESSIONNODE(ArrayOperationNode); + + ArrayOperationNode(); + ArrayOperationNode(const ArrayOperationNode& rhs); + // for unit testing + ArrayOperationNode(IAttributeVector &attr); + + ArrayOperationNode& operator= (const ArrayOperationNode& rhs); + + void setDocId(DocId newDocId) { _docId = newDocId; } + + virtual void wireAttributes(const IAttributeContext &attrCtx); + +protected: + DocId docId() const { return _docId; } + + const IAttributeVector& attribute() const { + return *_attribute; + } + +private: + vespalib::string _attributeName; + const search::attribute::IAttributeVector * _attribute; + DocId _docId; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.cpp b/searchlib/src/vespa/searchlib/expression/attributenode.cpp new file mode 100644 index 00000000000..558177a7972 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/attributenode.cpp @@ -0,0 +1,283 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +namespace search { +namespace expression { + +using namespace vespalib; +using search::attribute::IAttributeContext; +using search::attribute::IAttributeVector; +using search::attribute::BasicType; + +IMPLEMENT_EXPRESSIONNODE(AttributeNode, FunctionNode); +IMPLEMENT_RESULTNODE(AttributeResult, ResultNode); + +namespace { + +class EnumAttributeResult : public AttributeResult +{ +public: + DECLARE_RESULTNODE(EnumAttributeResult); + EnumAttributeResult(const attribute::IAttributeVector * attribute, DocId docId) : + AttributeResult(attribute, docId), + _enumAttr(dynamic_cast(attribute)) + { + } +private: + EnumAttributeResult() : + AttributeResult(), + _enumAttr(NULL) + { } + int64_t onGetEnum(size_t index) const override { (void) index; return (static_cast(_enumAttr->getE(getDocId()))); } + const SingleValueEnumAttributeBase * _enumAttr; +}; + +IMPLEMENT_RESULTNODE(EnumAttributeResult, AttributeResult); + +AttributeResult::UP createResult(const IAttributeVector * attribute) +{ + return (dynamic_cast(attribute) != NULL) + ? AttributeResult::UP(new EnumAttributeResult(attribute, 0)) + : AttributeResult::UP(new AttributeResult(attribute, 0)); +} + +} + +AttributeNode::AttributeNode() : + FunctionNode(), + _scratchResult(new AttributeResult()), + _hasMultiValue(false), + _useEnumOptimization(false), + _handler(), + _attributeName() +{ +} + +AttributeNode::AttributeNode(const vespalib::stringref &name) : + FunctionNode(), + _scratchResult(new AttributeResult()), + _hasMultiValue(false), + _useEnumOptimization(false), + _handler(), + _attributeName(name) +{ +} +AttributeNode::AttributeNode(const IAttributeVector & attribute) : + FunctionNode(), + _scratchResult(createResult(&attribute)), + _hasMultiValue(attribute.hasMultiValue()), + _useEnumOptimization(false), + _handler(), + _attributeName(attribute.getName()) +{ +} + +AttributeNode::AttributeNode(const AttributeNode & attribute) : + FunctionNode(attribute), + _scratchResult(attribute._scratchResult->clone()), + _hasMultiValue(attribute._hasMultiValue), + _useEnumOptimization(attribute._useEnumOptimization), + _handler(), + _attributeName(attribute._attributeName) +{ + _scratchResult->setDocId(0); +} + +AttributeNode & AttributeNode::operator = (const AttributeNode & attr) +{ + if (this != &attr) { + FunctionNode::operator = (attr); + _attributeName = attr._attributeName; + _hasMultiValue = attr._hasMultiValue; + _useEnumOptimization = attr._useEnumOptimization; + _scratchResult.reset(attr._scratchResult->clone()); + _scratchResult->setDocId(0); + } + return *this; +} + +void AttributeNode::onPrepare(bool preserveAccurateTypes) +{ + const IAttributeVector * attribute = _scratchResult->getAttribute(); + if (attribute != NULL) { + BasicType::Type basicType = attribute->getBasicType(); + if (attribute->isIntegerType()) { + if (_hasMultiValue) { + if (preserveAccurateTypes) { + switch (basicType) { + case BasicType::INT8: + setResultType(std::unique_ptr(new Int8ResultNodeVector())); + break; + case BasicType::INT16: + setResultType(std::unique_ptr(new Int16ResultNodeVector())); + break; + case BasicType::INT32: + setResultType(std::unique_ptr(new Int32ResultNodeVector())); + break; + case BasicType::INT64: + setResultType(std::unique_ptr(new Int64ResultNodeVector())); + break; + default: + throw std::runtime_error("This is no valid integer attribute " + attribute->getName()); + break; + } + } else { + setResultType(std::unique_ptr(new IntegerResultNodeVector())); + } + _handler.reset(new IntegerHandler(updateResult())); + } else { + if (preserveAccurateTypes) { + switch (basicType) { + case BasicType::INT8: + setResultType(std::unique_ptr(new Int8ResultNode())); + break; + case BasicType::INT16: + setResultType(std::unique_ptr(new Int16ResultNode())); + break; + case BasicType::INT32: + setResultType(std::unique_ptr(new Int32ResultNode())); + break; + case BasicType::INT64: + setResultType(std::unique_ptr(new Int64ResultNode())); + break; + default: + throw std::runtime_error("This is no valid integer attribute " + attribute->getName()); + break; + } + } else { + setResultType(std::unique_ptr(new Int64ResultNode())); + } + } + } else if (attribute->isFloatingPointType()) { + if (_hasMultiValue) { + setResultType(std::unique_ptr(new FloatResultNodeVector())); + _handler.reset(new FloatHandler(updateResult())); + } else { + setResultType(std::unique_ptr(new FloatResultNode())); + } + } else if (attribute->isStringType()) { + if (_hasMultiValue) { + if (_useEnumOptimization) { + setResultType(std::unique_ptr(new EnumResultNodeVector())); + _handler.reset(new EnumHandler(updateResult())); + } else { + setResultType(std::unique_ptr(new StringResultNodeVector())); + _handler.reset(new StringHandler(updateResult())); + } + } else { + if (_useEnumOptimization) { + setResultType(std::unique_ptr(new EnumResultNode())); + } else { + setResultType(std::unique_ptr(new StringResultNode())); + } + } + } else { + throw std::runtime_error(make_string("Can not deduce correct resultclass for attribute vector '%s'", + attribute->getName().c_str())); + } + } +} + +void AttributeNode::IntegerHandler::handle(const AttributeResult & r) +{ + size_t numValues = r.getAttribute()->getValueCount(r.getDocId()); + _vector.resize(numValues); + _wVector.resize(numValues); + r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size()); + for(size_t i(0); i < numValues; i++) { + _vector[i] = _wVector[i].getValue(); + } +} + +void AttributeNode::FloatHandler::handle(const AttributeResult & r) +{ + size_t numValues = r.getAttribute()->getValueCount(r.getDocId()); + _vector.resize(numValues); + _wVector.resize(numValues); + r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size()); + for(size_t i(0); i < numValues; i++) { + _vector[i] = _wVector[i].getValue(); + } +} + +void AttributeNode::StringHandler::handle(const AttributeResult & r) +{ + size_t numValues = r.getAttribute()->getValueCount(r.getDocId()); + _vector.resize(numValues); + _wVector.resize(numValues); + r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size()); + for(size_t i(0); i < numValues; i++) { + _vector[i] = _wVector[i].getValue(); + } +} + +void AttributeNode::EnumHandler::handle(const AttributeResult & r) +{ + size_t numValues = r.getAttribute()->getValueCount(r.getDocId()); + _vector.resize(numValues); + _wVector.resize(numValues); + r.getAttribute()->get(r.getDocId(), &_wVector[0], _wVector.size()); + for(size_t i(0); i < numValues; i++) { + _vector[i] = _wVector[i].getValue(); + } +} + +bool AttributeNode::onExecute() const +{ + if (_hasMultiValue) { + _handler->handle(*_scratchResult); + } else { + updateResult().set(*_scratchResult); + } + return true; +} + +void AttributeNode::wireAttributes(const IAttributeContext & attrCtx) +{ + const IAttributeVector * attribute(_scratchResult ? _scratchResult->getAttribute() : nullptr); + if (attribute == NULL) { + if (_useEnumOptimization) { + attribute = attrCtx.getAttributeStableEnum(_attributeName); + } else { + attribute = attrCtx.getAttribute(_attributeName); + } + if (attribute == NULL) { + throw std::runtime_error(make_string("Failed locating attribute vector '%s'", _attributeName.c_str())); + } + _hasMultiValue = attribute->hasMultiValue(); + _scratchResult = createResult(attribute); + } +} + +void AttributeNode::cleanup() +{ + _scratchResult.reset(); +} + +Serializer & AttributeNode::onSerialize(Serializer & os) const +{ + FunctionNode::onSerialize(os); + return os << _attributeName; +} + +Deserializer & AttributeNode::onDeserialize(Deserializer & is) +{ + FunctionNode::onDeserialize(is); + + return is >> _attributeName; +} + +void +AttributeNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "attributeName", _attributeName); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_attributenode() {} diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.h b/searchlib/src/vespa/searchlib/expression/attributenode.h new file mode 100644 index 00000000000..c55acff2808 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/attributenode.h @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +class AttributeResult : public ResultNode +{ +public: + typedef std::unique_ptr UP; + DECLARE_RESULTNODE(AttributeResult); + AttributeResult() : _attribute(NULL), _docId(0) { } + AttributeResult(const attribute::IAttributeVector * attribute, DocId docId) : + _attribute(attribute), + _docId(docId) + { } + void setDocId(DocId docId) { _docId = docId; } + const search::attribute::IAttributeVector *getAttribute() const { return _attribute; } + DocId getDocId() const { return _docId; } +private: + virtual int64_t onGetInteger(size_t index) const { (void) index; return _attribute->getInt(_docId); } + virtual double onGetFloat(size_t index) const { (void) index; return _attribute->getFloat(_docId); } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + const char * t = _attribute->getString(_docId, buf.str(), buf.size()); + return ConstBufferRef(t, strlen(t)); + } + int64_t onGetEnum(size_t index) const override { (void) index; return (static_cast(_attribute->getEnum(_docId))); } + virtual void set(const search::expression::ResultNode&) { } + virtual size_t hash() const { return _docId; } + + const search::attribute::IAttributeVector * _attribute; + DocId _docId; +}; + +class AttributeNode : public FunctionNode +{ + typedef vespalib::BufferRef BufferRef; + typedef vespalib::ConstBufferRef ConstBufferRef; +public: + DECLARE_NBO_SERIALIZE; + class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + Configure(const search::attribute::IAttributeContext & attrCtx) : _attrCtx(attrCtx) { } + private: + virtual void execute(vespalib::Identifiable &obj) { + static_cast(obj).wireAttributes(_attrCtx); + obj.selectMembers(*this, *this); + } + virtual bool check(const vespalib::Identifiable &obj) const { + return obj.inherits(ExpressionNode::classId); + } + const search::attribute::IAttributeContext & _attrCtx; + }; + + class CleanupAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + private: + virtual void execute(vespalib::Identifiable &obj) { static_cast(obj).cleanup(); } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + DECLARE_EXPRESSIONNODE(AttributeNode); + AttributeNode(); + AttributeNode(const vespalib::stringref &name); + AttributeNode(const search::attribute::IAttributeVector & attribute); + AttributeNode(const AttributeNode & attribute); + AttributeNode & operator = (const AttributeNode & attribute); + void setDocId(DocId docId) const { _scratchResult->setDocId(docId); } + const search::attribute::IAttributeVector *getAttribute() const { + return _scratchResult ? _scratchResult->getAttribute() : nullptr; + } + const vespalib::string & getAttributeName() const { return _attributeName; } + + void useEnumOptimization(bool use=true) { _useEnumOptimization = use; } + bool hasMultiValue() const { return _hasMultiValue; } +private: + void cleanup(); + virtual void wireAttributes(const search::attribute::IAttributeContext & attrCtx); + virtual void onPrepare(bool preserveAccurateTypes); + virtual bool onExecute() const; + class Handler + { + public: + virtual ~Handler() { } + virtual void handle(const AttributeResult & r) = 0; + }; + class IntegerHandler : public Handler + { + public: + IntegerHandler(ResultNode & result) : + Handler(), + _vector(((IntegerResultNodeVector &)result).getVector()), + _wVector() + { } + virtual void handle(const AttributeResult & r); + private: + IntegerResultNodeVector::Vector & _vector; + mutable std::vector _wVector; + }; + class FloatHandler : public Handler + { + public: + FloatHandler(ResultNode & result) : + Handler(), + _vector(((FloatResultNodeVector &)result).getVector()), + _wVector() + { } + virtual void handle(const AttributeResult & r); + private: + FloatResultNodeVector::Vector & _vector; + mutable std::vector _wVector; + }; + class StringHandler : public Handler + { + public: + StringHandler(ResultNode & result) : + Handler(), + _vector(((StringResultNodeVector &)result).getVector()), + _wVector() + { } + virtual void handle(const AttributeResult & r); + private: + StringResultNodeVector::Vector & _vector; + mutable std::vector _wVector; + }; + class EnumHandler : public Handler + { + public: + EnumHandler(ResultNode & result) : + Handler(), + _vector(((EnumResultNodeVector &)result).getVector()), + _wVector() + { } + virtual void handle(const AttributeResult & r); + private: + EnumResultNodeVector::Vector &_vector; + mutable std::vector _wVector; + }; + + mutable AttributeResult::UP _scratchResult; + mutable bool _hasMultiValue; + mutable bool _useEnumOptimization; + std::unique_ptr _handler; + vespalib::string _attributeName; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h b/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h new file mode 100644 index 00000000000..b3872bf4a92 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/binaryfunctionnode.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class BinaryFunctionNode : public MultiArgFunctionNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(BinaryFunctionNode); + BinaryFunctionNode() { } + BinaryFunctionNode(const ExpressionNode::CP & arg1, const ExpressionNode::CP & arg2) : + MultiArgFunctionNode() + { + appendArg(arg1); + appendArg(arg2); + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h new file mode 100644 index 00000000000..1a2529d1038 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/bitfunctionnode.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class BitFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(BitFunctionNode); + BitFunctionNode() { } +protected: + virtual void onPrepareResult(); +private: + virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const = 0; + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp new file mode 100644 index 00000000000..e82cef0563a --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/bucketresultnode.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "bucketresultnode.h" + +namespace search { +namespace expression { + +IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, BucketResultNode, vespalib::Identifiable); + +vespalib::FieldBase BucketResultNode::_toField("to"); +vespalib::FieldBase BucketResultNode::_fromField("from"); + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_bucketresultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/bucketresultnode.h b/searchlib/src/vespa/searchlib/expression/bucketresultnode.h new file mode 100644 index 00000000000..36f0cff66f5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/bucketresultnode.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "resultnode.h" + +namespace search { +namespace expression { + +class BucketResultNode : public ResultNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(BucketResultNode); + virtual void set(const ResultNode & rhs) { (void) rhs; } +protected: + static vespalib::FieldBase _fromField; + static vespalib::FieldBase _toField; +private: + virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; } + virtual double onGetFloat(size_t index) const { (void) index; return 0; } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; return buf; } + virtual size_t getRawByteSize() const { return onGetRawByteSize(); } + virtual size_t onGetRawByteSize() const = 0; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/catfunctionnode.h b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h new file mode 100644 index 00000000000..375bf6f84b1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class CatFunctionNode : public MultiArgFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(CatFunctionNode); + CatFunctionNode() { } + CatFunctionNode(const ExpressionNode & arg) { addArg(arg); } +private: + virtual void onPrepare(bool preserveAccurateTypes); + virtual void onPrepareResult(); + virtual bool onExecute() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/catserializer.cpp b/searchlib/src/vespa/searchlib/expression/catserializer.cpp new file mode 100644 index 00000000000..bbeca330f14 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/catserializer.cpp @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "catserializer.h" +#include "rawresultnode.h" +#include "resultvector.h" +#include + +namespace search { +namespace expression { + +using vespalib::IFieldBase; +using vespalib::Serializer; +using vespalib::Deserializer; +using vespalib::string; +using vespalib::stringref; + +CatSerializer & CatSerializer::put(const IFieldBase & field, const stringref & value) +{ + (void) field; + getStream().write(value.c_str(), value.size()); + return *this; +} + +CatSerializer & CatSerializer::nop(const IFieldBase & field, const void * value) +{ + (void) field; + (void) value; + throw vespalib::Exception("search::expression::CatSerializer can not deserialize anything as it looses information on serialize"); + return *this; +} + +CatSerializer & CatSerializer::get(const IFieldBase & field, bool & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, uint8_t & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, uint16_t & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, uint32_t & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, uint64_t & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, double & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, float & value) { return nop(field, &value); } +CatSerializer & CatSerializer::get(const IFieldBase & field, string & value) { return nop(field, &value); } + +CatSerializer & CatSerializer::put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value) +{ + (void) field; + if (value.inherits(ResultNode::classId)) { + static_cast(value).onSerializeResult(*this); + } else { + value.serializeDirect(*this); + } + return *this; +} + +ResultSerializer & CatSerializer::putResult(const vespalib::IFieldBase & field, const RawResultNode & value) +{ + (void) field; + vespalib::ConstBufferRef raw(value.get()); + getStream().write(raw.c_str(), raw.size()); + return *this; +} + +ResultSerializer & CatSerializer::putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value) +{ + (void) field; + size_t sz(value.size()); + for (size_t i(0); i < sz; i++) { + value.get(i).serialize(*this); + } + return *this; +} + +void CatSerializer::proxyPut(const ResultNode & value) +{ + value.serializeDirect(*this); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_catserializer() {} diff --git a/searchlib/src/vespa/searchlib/expression/catserializer.h b/searchlib/src/vespa/searchlib/expression/catserializer.h new file mode 100644 index 00000000000..dc25e3b30d9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/catserializer.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + + +namespace search { +namespace expression { + +class RawResultNode; + +class CatSerializer : public vespalib::NBOSerializer, public ResultSerializer +{ +public: + CatSerializer(vespalib::nbostream & stream) : vespalib::NBOSerializer(stream) { } + virtual CatSerializer & put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value); + virtual CatSerializer & put(const vespalib::IFieldBase & field, const vespalib::stringref & value); + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value); + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value); + virtual void proxyPut(const ResultNode & value); + + virtual CatSerializer & get(const vespalib::IFieldBase & field, bool & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, uint8_t & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, uint16_t & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, uint32_t & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, uint64_t & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, double & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, float & value); + virtual CatSerializer & get(const vespalib::IFieldBase & field, vespalib::string & value); + +private: + CatSerializer & nop(const vespalib::IFieldBase & field, const void * value) __attribute__((noinline)); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/constantnode.h b/searchlib/src/vespa/searchlib/expression/constantnode.h new file mode 100644 index 00000000000..b461af01319 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/constantnode.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class ConstantNode : public ExpressionNode +{ +public: + DECLARE_NBO_SERIALIZE; + DECLARE_EXPRESSIONNODE(ConstantNode); + ConstantNode() : ExpressionNode(), _result() { } + ConstantNode(const ResultNode::CP & r) : ExpressionNode(), _result(r) { } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual const ResultNode & getResult() const { return *_result; } +private: + virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; } + virtual bool onExecute() const { return true; } + ResultNode::CP _result; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp new file mode 100644 index 00000000000..73ddc5c2d0a --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.cpp @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { +namespace expression { + +using vespalib::FieldBase; +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(DebugWaitFunctionNode, UnaryFunctionNode); + +DebugWaitFunctionNode::DebugWaitFunctionNode() + : _waitTime(0.0), + _busyWait(true) +{ } + +DebugWaitFunctionNode::~DebugWaitFunctionNode() +{ +} + +DebugWaitFunctionNode::DebugWaitFunctionNode(const ExpressionNode::CP & arg, double waitTime, bool busyWait) + : UnaryFunctionNode(arg), + _waitTime(waitTime), + _busyWait(busyWait) +{ +} + +bool +DebugWaitFunctionNode::onExecute() const +{ + FastOS_Time time; + time.SetNow(); + double millis = _waitTime * 1000.0; + + while (time.MilliSecsToNow() < millis) { + if (_busyWait) { + for (int i = 0; i < 1000; i++) + ; + } else { + int rem = (int)(millis - time.MilliSecsToNow()); + FastOS_Thread::Sleep(rem); + } + } + getArg().execute(); + updateResult().assign(getArg().getResult()); + return true; +} + +Serializer & +DebugWaitFunctionNode::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + return os << _waitTime << _busyWait; +} + +Deserializer & +DebugWaitFunctionNode::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + is >> _waitTime >> _busyWait; + return is; +} + +void +DebugWaitFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + UnaryFunctionNode::visitMembers(visitor); + visit(visitor, "waitTime", _waitTime); + visit(visitor, "busyWait", _busyWait); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_debugwaitfunctionnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h new file mode 100644 index 00000000000..13b171e3135 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/debugwaitfunctionnode.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + + +namespace search { +namespace expression { + +class DebugWaitFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(DebugWaitFunctionNode); + DECLARE_NBO_SERIALIZE; + DebugWaitFunctionNode(); + ~DebugWaitFunctionNode(); + DebugWaitFunctionNode(const ExpressionNode::CP & arg, double waitTime, bool busyWait); + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +private: + virtual bool onExecute() const; + double _waitTime; + bool _busyWait; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h b/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h new file mode 100644 index 00000000000..8775e71f7a0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/dividefunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class DivideFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(DivideFunctionNode); + DivideFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const; + virtual ResultNode::CP getInitialValue() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/documentaccessornode.h b/searchlib/src/vespa/searchlib/expression/documentaccessornode.h new file mode 100644 index 00000000000..971d9af792f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/documentaccessornode.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace expression { + +class DocumentAccessorNode : public ExpressionNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(DocumentAccessorNode); + class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + Configure(const document::DocumentType & documentType) : _docType(documentType) { } + private: + virtual void execute(vespalib::Identifiable &obj) { static_cast(obj).setDocType(_docType); } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(DocumentAccessorNode::classId); } + const document::DocumentType & _docType; + }; + + void setDoc(const document::Document & doc) { onDoc(doc); } + void setDocType(const document::DocumentType & docType) { onDocType(docType); } + virtual const vespalib::string & getFieldName() const { return _S_docId; } +private: + virtual void onDoc(const document::Document & doc) = 0; + virtual void onDocType(const document::DocumentType & docType) = 0; + static const vespalib::string _S_docId; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp new file mode 100644 index 00000000000..5c85e110692 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp @@ -0,0 +1,340 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include + +#include + +LOG_SETUP(".searchlib.documentfieldnode"); + +namespace search { +namespace expression { + +using namespace vespalib; +using namespace document; + +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(DocumentAccessorNode, ExpressionNode); +IMPLEMENT_EXPRESSIONNODE(DocumentFieldNode, DocumentAccessorNode); +IMPLEMENT_EXPRESSIONNODE(GetYMUMChecksumFunctionNode, DocumentAccessorNode); +IMPLEMENT_EXPRESSIONNODE(GetDocIdNamespaceSpecificFunctionNode, DocumentAccessorNode); + +const vespalib::string DocumentAccessorNode::_S_docId("documentid"); + +DocumentFieldNode::DocumentFieldNode(const DocumentFieldNode & rhs) : + DocumentAccessorNode(rhs), + _fieldPath(rhs._fieldPath), + _value(rhs._value), + _fieldName(rhs._fieldName), + _doc(NULL) +{ +} + +DocumentFieldNode & DocumentFieldNode::operator = (const DocumentFieldNode & rhs) +{ + if (this != &rhs) { + DocumentAccessorNode::operator=(rhs); + _fieldPath = rhs._fieldPath; + _value = rhs._value; + _fieldName = rhs._fieldName; + _doc = NULL; + } + return *this; +} + +std::unique_ptr deduceResultNode(const vespalib::stringref & fieldName, const FieldValue & fv, bool preserveAccurateTypes, bool nestedMultiValue) +{ + std::unique_ptr value; + const Identifiable::RuntimeClass & cInfo = fv.getClass(); + if (cInfo.inherits(ByteFieldValue::classId) || cInfo.inherits(IntFieldValue::classId) || cInfo.inherits(LongFieldValue::classId)) { + if (preserveAccurateTypes) { + if (cInfo.inherits(ByteFieldValue::classId)) { + value.reset(nestedMultiValue ? static_cast(new Int8ResultNodeVector()) : static_cast(new Int8ResultNode())); + } else if (cInfo.inherits(IntFieldValue::classId)) { + value.reset(nestedMultiValue ? static_cast(new Int32ResultNodeVector()) : static_cast(new Int32ResultNode())); + } else { + value.reset(nestedMultiValue ? static_cast(new Int64ResultNodeVector()) : static_cast(new Int64ResultNode())); + } + } else { + value.reset(nestedMultiValue ? static_cast(new Int64ResultNodeVector()) : static_cast(new Int64ResultNode())); + } + } else if (cInfo.inherits(FloatFieldValue::classId) || cInfo.inherits(DoubleFieldValue::classId)) { + value.reset(nestedMultiValue ? static_cast(new FloatResultNodeVector()) : static_cast(new FloatResultNode())); + } else if (cInfo.inherits(StringFieldValue::classId)) { + value.reset(nestedMultiValue ? static_cast(new StringResultNodeVector()) : static_cast(new StringResultNode())); + } else if (cInfo.inherits(RawFieldValue::classId)) { + value.reset(nestedMultiValue ? static_cast(new RawResultNodeVector()) : static_cast(new RawResultNode())); + } else if (cInfo.inherits(CollectionFieldValue::classId) || cInfo.inherits(MapFieldValue::classId)) { + if (cInfo.inherits(CollectionFieldValue::classId)) { + value = deduceResultNode(fieldName, *static_cast(fv).createNested(), preserveAccurateTypes, nestedMultiValue); + } else if (cInfo.inherits(MapFieldValue::classId)) { + value = deduceResultNode(fieldName, *static_cast(fv).createValue(), preserveAccurateTypes, nestedMultiValue); + } else { + throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'", fieldName.c_str(), cInfo.name())); + } + const Identifiable::RuntimeClass & rInfo = value->getClass(); + if (rInfo.inherits(ResultNodeVector::classId)) { + //Already multivalue, so we are good to go. + } else if (rInfo.inherits(Int8ResultNode::classId)) { + value.reset(new Int8ResultNodeVector()); + } else if (rInfo.inherits(Int16ResultNode::classId)) { + value.reset(new Int16ResultNodeVector()); + } else if (rInfo.inherits(Int32ResultNode::classId)) { + value.reset(new Int32ResultNodeVector()); + } else if (rInfo.inherits(Int64ResultNode::classId)) { + value.reset(new Int64ResultNodeVector()); + } else if (rInfo.inherits(FloatResultNode::classId)) { + value.reset(new FloatResultNodeVector()); + } else if (rInfo.inherits(StringResultNode::classId)) { + value.reset(new StringResultNodeVector()); + } else if (rInfo.inherits(RawResultNode::classId)) { + value.reset(new RawResultNodeVector()); + } else { + throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'. It nests down to %s which is not expected", fieldName.c_str(), cInfo.name(), rInfo.name())); + } + } else { + throw std::runtime_error(make_string("Can not deduce correct resultclass for documentfield '%s' in based on class '%s'", fieldName.c_str(), cInfo.name())); + } + return value; +} + +void DocumentFieldNode::onPrepare(bool preserveAccurateTypes) +{ + LOG(debug, "DocumentFieldNode::onPrepare(this=%p)", this); + + if ( !_fieldPath.empty() ) { + bool nestedMultiValue(false); + for(document::FieldPath::const_iterator it(_fieldPath.begin()), mt(_fieldPath.end()); !nestedMultiValue && (it != mt); it++) { + if (it->getType() == document::FieldPathEntry::STRUCT_FIELD) { + const vespalib::Identifiable::RuntimeClass & cInfo(it->getFieldValueToSet().getClass()); + nestedMultiValue = cInfo.inherits(CollectionFieldValue::classId) || cInfo.inherits(MapFieldValue::classId); + } + } + const document::FieldPathEntry & endOfPath(_fieldPath.back()); + if (endOfPath.getFieldValueToSetPtr() != NULL) { + const FieldValue& fv = endOfPath.getFieldValueToSet(); + _value.reset(deduceResultNode(_fieldName, fv, preserveAccurateTypes, nestedMultiValue).release()); + if (_value->inherits(ResultNodeVector::classId)) { + _handler.reset(new MultiHandler(static_cast(*_value))); + } else { + _handler.reset(new SingleHandler(*_value)); + } + } else { + if (endOfPath.getDataType().getClass().inherits(document::StructuredDataType::classId)) { + throw std::runtime_error(make_string("I am not able to access structured field '%s'", _fieldName.c_str())); + } else { + throw std::runtime_error(make_string("I am not able to access field '%s' for reasons I do not know", _fieldName.c_str())); + } + } + } +} + +void DocumentFieldNode::onDocType(const DocumentType & docType) +{ + LOG(debug, "DocumentFieldNode::onDocType(this=%p)", this); + FieldPath::UP path = docType.buildFieldPath(_fieldName); + if (!path.get() || path->empty()) { + throw std::runtime_error(make_string("Field %s could not be loacated in documenttype %s", _fieldName.c_str(), docType.getName().c_str())); + } + _fieldPath = *path; +} + +class FieldValue2ResultNode : public ResultNode +{ +public: + DECLARE_EXPRESSIONNODE(FieldValue2ResultNode); + FieldValue2ResultNode(const FieldValue * fv=NULL) : _fv(fv) { } + virtual int64_t onGetInteger(size_t index) const { (void) index; return _fv ? _fv->getAsLong() : 0; } + virtual double onGetFloat(size_t index) const { (void) index; return _fv ? _fv->getAsDouble() : 0; } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + if (_fv) { + std::pair raw = _fv->getAsRaw(); + return ConstBufferRef(raw.first, raw.second); + } + return buf; + } + virtual void min(const ResultNode & b) { (void) b; } + virtual void max(const ResultNode & b) { (void) b; } + virtual void add(const ResultNode & b) { (void) b; } +private: + virtual void set(const ResultNode&); + virtual size_t hash() const { return 0; } + const FieldValue * _fv; +}; + +char DefaultValue::null = 0; + +void DefaultValue::set(const ResultNode&) +{ + throw std::runtime_error("DefaultValue::set(const ResultNode&) is not possible."); +} + +void FieldValue2ResultNode::set(const ResultNode&) +{ + throw std::runtime_error("FieldValue2ResultNode::set(const ResultNode&) is not possible."); +} + +IMPLEMENT_EXPRESSIONNODE(FieldValue2ResultNode, ResultNode); +IMPLEMENT_EXPRESSIONNODE(DefaultValue, ResultNode); + +void DocumentFieldNode::onDoc(const Document & doc) +{ + _doc = & doc; + _handler->reset(); +} + +bool DocumentFieldNode::onExecute() const +{ + _doc->iterateNested(_fieldPath.begin(), _fieldPath.end(), *_handler); + return true; +} + +DefaultValue DocumentFieldNode::SingleHandler::_defaultValue; + +void +DocumentFieldNode::SingleHandler::onPrimitive(const Content & c) +{ + LOG(spam, "SingleHandler::onPrimitive: field value '%s'", c.getValue().toString().c_str()); + FieldValue2ResultNode converter(&c.getValue()); + _result.set(converter); +} + +void +DocumentFieldNode::MultiHandler::onPrimitive(const Content & c) +{ + LOG(spam, "MultiHandler::onPrimitive: field value '%s'", c.getValue().toString().c_str()); + FieldValue2ResultNode converter(&c.getValue()); + _result.push_back_safe(converter); +} + +void +DocumentFieldNode::Handler::onCollectionStart(const Content & c) +{ + const document::FieldValue & fv = c.getValue(); + LOG(spam, "onCollectionStart: field value '%s'", fv.toString().c_str()); + if (fv.inherits(document::ArrayFieldValue::classId)) { + const document::ArrayFieldValue & afv = static_cast(fv); + LOG(spam, "onCollectionStart: Array size = '%zu'", afv.size()); + } else if (fv.inherits(document::WeightedSetFieldValue::classId)) { + const document::WeightedSetFieldValue & wsfv = static_cast(fv); + LOG(spam, "onCollectionStart: WeightedSet size = '%zu'", wsfv.size()); + } +} + +void +DocumentFieldNode::Handler::onStructStart(const Content & c) +{ + LOG(spam, "onStructStart: field value '%s'", c.getValue().toString().c_str()); +} + + +Serializer & DocumentFieldNode::onSerialize(Serializer & os) const +{ + return os << _fieldName << _value; +} + +Deserializer & DocumentFieldNode::onDeserialize(Deserializer & is) +{ + return is >> _fieldName >> _value; +} + +void +DocumentFieldNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "fieldName", _fieldName); + visit(visitor, "value", _value); + visitor.openStruct("fieldPath", "FieldPath"); + _fieldPath.visitMembers(visitor); + visitor.closeStruct(); +} + +class String2ResultNode : public ResultNode +{ +public: + String2ResultNode(const vespalib::string & s) : _s(s) { } + virtual int64_t onGetInteger(size_t index) const { (void) index; return strtoul(_s.c_str(), NULL, 0); } + virtual double onGetFloat(size_t index) const { (void) index; return strtod(_s.c_str(), NULL); } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { (void) index; (void) buf; return ConstBufferRef(_s.c_str(), _s.size()); } +private: + virtual String2ResultNode * clone() const { return new String2ResultNode(_s); } + virtual void set(const ResultNode&); + virtual size_t hash() const { return 0; } + const vespalib::string & _s; +}; + +void String2ResultNode::set(const ResultNode&) +{ + throw std::runtime_error("String2ResultNode::set(const ResultNode&) is not possible."); +} + +void GetDocIdNamespaceSpecificFunctionNode::onDoc(const Document & doc) +{ + String2ResultNode converter(doc.getId().getScheme().getNamespaceSpecific()); + _value->set(converter); +} + +static const FieldBase _G_valueField("value"); + +Serializer & GetDocIdNamespaceSpecificFunctionNode::onSerialize(Serializer & os) const +{ + return os << _value; +} +Deserializer & GetDocIdNamespaceSpecificFunctionNode::onDeserialize(Deserializer & is) +{ + return is >> _value; +} + +void +GetDocIdNamespaceSpecificFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, _G_valueField.getName(), _value); +} + +void GetYMUMChecksumFunctionNode::onDoc(const Document & doc) +{ + const vespalib::string & ymumid = doc.getId().getScheme().getNamespaceSpecific(); + + try { + char decoded[20]; + int len = Base64::decode(ymumid.c_str(), ymumid.size(), decoded, sizeof(decoded)); + + if (len != 20) { + LOG(warning, "Illegal YMUMID '%s' in document id %s. Length(%d) != 20", ymumid.c_str(), doc.getId().toString().c_str(), len); + _checkSum = 0; + } else { + int32_t key[3]; + key[0] = 0; + memcpy(((char*)key) + 1, decoded + 9, sizeof(key) - 1); + _checkSum = (key[0] ^ key[1] ^ key[2]); + } + } catch (const std::exception & e) { + LOG(warning, "Illegal YMUMID '%s' in document id %s. Reason : %s", ymumid.c_str(), doc.getId().toString().c_str(), e.what()); + _checkSum = 0; + } +} + +Serializer & GetYMUMChecksumFunctionNode::onSerialize(Serializer & os) const +{ + return _checkSum.serialize(os); +} + +Deserializer & GetYMUMChecksumFunctionNode::onDeserialize(Deserializer & is) +{ + return _checkSum.deserialize(is); +} + +void +GetYMUMChecksumFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "checkSum", _checkSum); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_documentfieldnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/documentfieldnode.h b/searchlib/src/vespa/searchlib/expression/documentfieldnode.h new file mode 100644 index 00000000000..d40a9fd8836 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/documentfieldnode.h @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +class DefaultValue : public ResultNode +{ +public: + DECLARE_EXPRESSIONNODE(DefaultValue); + virtual int64_t onGetInteger(size_t index) const { (void) index; return 0; } + virtual double onGetFloat(size_t index) const { (void) index; return 0; } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + (void) buf; + return ConstBufferRef(&null, 0); + } + virtual void min(const ResultNode & b) { (void) b; } + virtual void max(const ResultNode & b) { (void) b; } + virtual void add(const ResultNode & b) { (void) b; } +private: + virtual void set(const ResultNode&); + virtual size_t hash() const { return 0; } + static char null; +}; + +class DocumentFieldNode : public DocumentAccessorNode +{ +public: + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + DECLARE_EXPRESSIONNODE(DocumentFieldNode); + DocumentFieldNode() : _fieldPath(), _value(), _fieldName(), _doc(NULL) { } + DocumentFieldNode(const vespalib::stringref &name) : _fieldPath(), _value(), _fieldName(name), _doc(NULL) { } + DocumentFieldNode(const DocumentFieldNode & rhs); + DocumentFieldNode & operator = (const DocumentFieldNode & rhs); + virtual const vespalib::string & getFieldName() const { return _fieldName; } +private: + class Handler : public document::FieldValue::IteratorHandler { + public: + virtual void reset() = 0; + protected: + typedef document::FieldValue::IteratorHandler::Content Content; + private: + virtual void onCollectionStart(const Content & c); + virtual void onStructStart(const Content & c); + }; + class SingleHandler : public Handler { + public: + SingleHandler(ResultNode & result) : _result(result) {} + private: + virtual void reset() { _result.set(_defaultValue); } + ResultNode & _result; + static DefaultValue _defaultValue; + virtual void onPrimitive(const Content & c); + }; + class MultiHandler : public Handler { + public: + MultiHandler(ResultNodeVector & result) : _result(result) {} + private: + virtual void reset() { _result.clear(); } + ResultNodeVector & _result; + virtual void onPrimitive(const Content & c); + }; + + virtual const ResultNode & getResult() const { return *_value; } + virtual void onPrepare(bool preserveAccurateTypes); + virtual bool onExecute() const; + virtual void onDoc(const document::Document & doc); + virtual void onDocType(const document::DocumentType & docType); + document::FieldPath _fieldPath; + mutable ResultNode::CP _value; + mutable std::unique_ptr _handler; + vespalib::string _fieldName; + const document::Document * _doc; + +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/enumresultnode.h b/searchlib/src/vespa/searchlib/expression/enumresultnode.h new file mode 100644 index 00000000000..b395a1a7a6f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/enumresultnode.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class EnumResultNode : public IntegerResultNodeT +{ +private: + typedef IntegerResultNodeT Base; +public: + DECLARE_RESULTNODE(EnumResultNode); + + EnumResultNode(int64_t v=0) : Base(v) { } + virtual void set(const ResultNode & rhs) { setValue(rhs.getEnum()); } + +private: + virtual int64_t onGetEnum(size_t index) const { (void) index; return getValue(); } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%" PRId64, getValue())))); + return ConstBufferRef(buf.str(), numWritten); + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/expressionnode.h b/searchlib/src/vespa/searchlib/expression/expressionnode.h new file mode 100644 index 00000000000..d5c388f18e5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/expressionnode.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace expression { + +typedef uint32_t DocId; + +class ResultNode; + +#define DECLARE_ABSTRACT_EXPRESSIONNODE(Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class) +#define DECLARE_ABSTRACT_EXPRESSIONNODE_NS1(ns, Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS3(search, expression, ns, Class) + +#define DECLARE_EXPRESSIONNODE(Class) \ + DECLARE_IDENTIFIABLE_NS2(search, expression, Class) \ + virtual Class * clone() const; + +#define DECLARE_EXPRESSIONNODE_NS1(ns, Class) \ + DECLARE_IDENTIFIABLE_NS3(search, expression, ns, Class) \ + virtual Class * clone() const; + +#define IMPLEMENT_ABSTRACT_EXPRESSIONNODE(Class, base) \ + IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class, base) + +#define IMPLEMENT_EXPRESSIONNODE(Class, base) \ + IMPLEMENT_IDENTIFIABLE_NS2(search, expression, Class, base) \ + Class * Class::clone() const { return new Class(*this); } + +class ExpressionNode : public vespalib::Identifiable +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(ExpressionNode); + typedef std::unique_ptr UP; + typedef vespalib::IdentifiablePtr CP; + typedef vespalib::IdentifiableLinkedPtr LP; + virtual const ResultNode & getResult() const = 0; + bool execute() const { return onExecute(); } + ExpressionNode & prepare(bool preserveAccurateTypes) { onPrepare(preserveAccurateTypes); return *this; } + virtual ExpressionNode * clone() const = 0; + void executeIterative(const ResultNode & arg, ResultNode & result) const; + virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx); +protected: +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual void onPrepare(bool preserveAccurateTypes) = 0; + virtual bool onExecute() const = 0; +}; + +typedef ExpressionNode::CP * ExpressionNodeArray; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/expressiontree.cpp b/searchlib/src/vespa/searchlib/expression/expressiontree.cpp new file mode 100644 index 00000000000..1a01c49ac8c --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/expressiontree.cpp @@ -0,0 +1,202 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { +namespace expression { + +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(ExpressionTree, ExpressionNode); + +void ExpressionTree::Configure::execute(vespalib::Identifiable &obj) +{ + ExpressionTree & e(static_cast(obj)); + if (e.getRoot().get()) { + e.getRoot()->prepare(false); + } + e.prepare(false); +} + +ExpressionTree::ExpressionTree() : + _root(), + _attributeNodes(), + _documentAccessorNodes(), + _relevanceNodes(), + _interpolatedLookupNodes(), + _arrayAtLookupNodes() +{ + prepare(false); +} + +ExpressionTree::ExpressionTree(const ExpressionNode & root) : + _root(root.clone()), + _attributeNodes(), + _documentAccessorNodes(), + _relevanceNodes(), + _interpolatedLookupNodes(), + _arrayAtLookupNodes() +{ + prepare(false); +} + +template +class Gather : public vespalib::ObjectOperation, public vespalib::ObjectPredicate +{ + std::vector &_list; +public: + Gather(std::vector &list) : _list(list) { _list.clear(); } + + void from(ExpressionNode::LP &root) { + root->select(*this, *this); + } +private: + virtual void execute(vespalib::Identifiable &obj) { + _list.push_back(&static_cast(obj)); + } + virtual bool check(const vespalib::Identifiable &obj) const { + return obj.inherits(NODE::classId); + } +}; + +template +Gather +gather(std::vector &list) { + return Gather(list); +} + + +void ExpressionTree::onPrepare(bool preserveAccurateTypes) +{ + (void) preserveAccurateTypes; + if (_root.get() != NULL) { + gather(_attributeNodes).from(_root); + gather(_documentAccessorNodes).from(_root); + gather(_relevanceNodes).from(_root); + gather(_interpolatedLookupNodes).from(_root); + gather(_arrayAtLookupNodes).from(_root); + } +} + +ExpressionTree::ExpressionTree(const ExpressionNode::CP & root) : + _root(root->clone()), + _attributeNodes(), + _documentAccessorNodes(), + _relevanceNodes(), + _interpolatedLookupNodes(), + _arrayAtLookupNodes() +{ + prepare(false); +} + +ExpressionTree::ExpressionTree(const ExpressionTree & rhs) : + ExpressionNode(rhs), + _root(rhs._root), + _attributeNodes(), + _documentAccessorNodes(), + _relevanceNodes(), + _interpolatedLookupNodes() +{ + prepare(false); +} + +ExpressionTree & ExpressionTree::operator = (const ExpressionTree & rhs) +{ + if (this != & rhs) { + ExpressionTree eTree(rhs); + swap(eTree); + } + return *this; +} + +void ExpressionTree::swap(ExpressionTree & e) +{ + std::swap(_root, e._root); + _attributeNodes.swap(e._attributeNodes); + _documentAccessorNodes.swap(e._documentAccessorNodes); + _relevanceNodes.swap(e._relevanceNodes); + _interpolatedLookupNodes.swap(e._interpolatedLookupNodes); +} + +ExpressionTree::~ExpressionTree() +{ +} + +bool ExpressionTree::execute(const document::Document & doc, HitRank rank) const +{ + for(DocumentAccessorNodeList::const_iterator it(_documentAccessorNodes.begin()), mt(_documentAccessorNodes.end()); it != mt; it++) { + (*it)->setDoc(doc); + } + for(RelevanceNodeList::const_iterator it(_relevanceNodes.begin()), mt(_relevanceNodes.end()); it != mt; it++) { + (*it)->setRelevance(rank); + } + return _root->execute(); +} + +struct DocIdSetter { + DocId _docId; + void operator() (InterpolatedLookup *node) { + node->setDocId(_docId); + } + void operator() (ArrayAtLookup *node) { + node->setDocId(_docId); + } + void operator() (AttributeNode *node) { + node->setDocId(_docId); + } + DocIdSetter(DocId docId) : _docId(docId) {} +}; + +struct RankSetter { + HitRank _rank; + void operator() (RelevanceNode *node) { + node->setRelevance(_rank); + } + RankSetter(HitRank rank) : _rank(rank) {} +}; + + +bool ExpressionTree::execute(DocId docId, HitRank rank) const +{ + DocIdSetter setDocId(docId); + RankSetter setHitRank(rank); + std::for_each(_attributeNodes.cbegin(), _attributeNodes.cend(), setDocId); + std::for_each(_relevanceNodes.cbegin(), _relevanceNodes.cend(), setHitRank); + std::for_each(_interpolatedLookupNodes.cbegin(), _interpolatedLookupNodes.cend(), setDocId); + std::for_each(_arrayAtLookupNodes.cbegin(), _arrayAtLookupNodes.cend(), setDocId); + + return _root->execute(); +} + +void +ExpressionTree::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "root", _root.get()); +} + +void ExpressionTree::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation) +{ + if (_root.get()) { + _root->select(predicate, operation); + } +} + + +Serializer & operator << (Serializer & os, const ExpressionTree & et) +{ + return os << et._root; +} + +Deserializer & operator >> (Deserializer & is, ExpressionTree & et) +{ + is >> et._root; + et.prepare(false); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_expressiontree() {} diff --git a/searchlib/src/vespa/searchlib/expression/expressiontree.h b/searchlib/src/vespa/searchlib/expression/expressiontree.h new file mode 100644 index 00000000000..af5c26c0efb --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/expressiontree.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +struct ConfigureStaticParams { + ConfigureStaticParams (const search::attribute::IAttributeContext * attrCtx, + const document::DocumentType * docType) + : _attrCtx(attrCtx), _docType(docType) { } + const search::attribute::IAttributeContext * _attrCtx; + const document::DocumentType * _docType; +}; + +class ExpressionTree : public ExpressionNode +{ +public: + DECLARE_EXPRESSIONNODE(ExpressionTree); + typedef vespalib::LinkedPtr LP; + class Configure : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + private: + virtual void execute(vespalib::Identifiable &obj); + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(ExpressionTree::classId); } + }; + + ExpressionTree(); + ExpressionTree(const ExpressionNode & root); + ExpressionTree(const ExpressionNode::CP & root); + ExpressionTree(const ExpressionTree & rhs); + ~ExpressionTree(); + ExpressionTree & operator = (const ExpressionTree & rhs); + bool execute(DocId docId, HitRank rank) const; + bool execute(const document::Document & doc, HitRank rank) const; + const ExpressionNode::LP & getRoot() const { return _root; } + virtual const ResultNode & getResult() const { return _root->getResult(); } + friend vespalib::Serializer & operator << (vespalib::Serializer & os, const ExpressionTree & et); + friend vespalib::Deserializer & operator >> (vespalib::Deserializer & is, ExpressionTree & et); + void swap(ExpressionTree &); +private: + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual void selectMembers(const vespalib::ObjectPredicate &predicate, vespalib::ObjectOperation &operation); + virtual bool onExecute() const { return _root->execute(); } + virtual void onPrepare(bool preserveAccurateTypes); + + typedef std::vector AttributeNodeList; + typedef std::vector DocumentAccessorNodeList; + typedef std::vector RelevanceNodeList; + typedef std::vector InterpolatedLookupList; + typedef std::vector ArrayAtLookupList; + + ExpressionNode::LP _root; + AttributeNodeList _attributeNodes; + DocumentAccessorNodeList _documentAccessorNodes; + RelevanceNodeList _relevanceNodes; + InterpolatedLookupList _interpolatedLookupNodes; + ArrayAtLookupList _arrayAtLookupNodes; +}; + + +} // namespace expression +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp new file mode 100644 index 00000000000..e3eed91fe81 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.cpp @@ -0,0 +1,134 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "fixedwidthbucketfunctionnode.h" +#include "integerresultnode.h" +#include "floatresultnode.h" +#include "integerbucketresultnode.h" +#include "floatbucketresultnode.h" +#include +#include +#include +#include + +namespace search { +namespace expression { + +IMPLEMENT_EXPRESSIONNODE(FixedWidthBucketFunctionNode, UnaryFunctionNode); + +void +FixedWidthBucketFunctionNode::IntegerBucketHandler::update(ResultNode &result, const ResultNode &value) const +{ + IntegerBucketResultNode &bucket = (IntegerBucketResultNode &)result; + int64_t n = value.getInteger(); + int64_t from = n; + int64_t to = n; + if (width > 0) { + if (n >= 0) { + from = (n/width) * width; + if (from >= (std::numeric_limits::max() - width)) { + to = std::numeric_limits::max(); + } else { + to = from + width; + } + } else { + to = ((n+1)/width) * width; + if (to <= (std::numeric_limits::min() + width)) { + from = std::numeric_limits::min(); + } else { + from = to - width; + } + } + } + bucket.setRange(from, to); +} + +void +FixedWidthBucketFunctionNode::IntegerVectorBucketHandler::update(ResultNode &result, const ResultNode &value) const +{ + const IntegerResultNodeVector::Vector & v(static_cast(value).getVector()); + IntegerBucketResultNodeVector::Vector & r(static_cast(result).getVector()); + r.resize(v.size()); + for (size_t i(0), m(v.size()); i < m; i++) { + IntegerBucketHandler::update(r[i], v[i]); + } +} + +void +FixedWidthBucketFunctionNode::FloatVectorBucketHandler::update(ResultNode &result, const ResultNode &value) const +{ + const FloatResultNodeVector::Vector & v(static_cast(value).getVector()); + FloatBucketResultNodeVector::Vector & r(static_cast(result).getVector()); + r.resize(v.size()); + for (size_t i(0), m(v.size()); i < m; i++) { + FloatBucketHandler::update(r[i], v[i]); + } +} + +void +FixedWidthBucketFunctionNode::FloatBucketHandler::update(ResultNode &result, const ResultNode &value) const +{ + FloatBucketResultNode &bucket = (FloatBucketResultNode &)result; + double n = value.getFloat(); + double from = n; + double to = n; + if (width > 0.0) { + double tmp = floor(n/width); + from = tmp * width; + to = (tmp+1) * width; + } + bucket.setRange(from, to); +} + +void +FixedWidthBucketFunctionNode::onPrepareResult() +{ + const ExpressionNode &child = getArg(); + const ResultNode &input = child.getResult(); + if (input.getClass().inherits(IntegerResultNode::classId)) { + ResultNode::UP res(new IntegerBucketResultNode()); + setResultType(std::move(res)); + _bucketHandler.reset(new IntegerBucketHandler(_width->getInteger())); + } else if (input.getClass().inherits(FloatResultNode::classId)) { + ResultNode::UP res(new FloatBucketResultNode()); + setResultType(std::move(res)); + _bucketHandler.reset(new FloatBucketHandler(_width->getFloat())); + } else if (input.getClass().inherits(IntegerResultNodeVector::classId)) { + ResultNode::UP res(new IntegerBucketResultNodeVector()); + setResultType(std::move(res)); + _bucketHandler.reset(new IntegerVectorBucketHandler(_width->getInteger())); + } else if (input.getClass().inherits(FloatResultNodeVector::classId)) { + ResultNode::UP res(new FloatBucketResultNodeVector()); + setResultType(std::move(res)); + _bucketHandler.reset(new FloatVectorBucketHandler(_width->getFloat())); + } else { + throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", input.getClass().name())); + } +} + +bool +FixedWidthBucketFunctionNode::onExecute() const +{ + getArg().execute(); + _bucketHandler->update(updateResult(), getArg().getResult()); + return true; +} + +vespalib::Serializer & +FixedWidthBucketFunctionNode::onSerialize(vespalib::Serializer &os) const +{ + UnaryFunctionNode::onSerialize(os); + return os << _width; +} + +vespalib::Deserializer & +FixedWidthBucketFunctionNode::onDeserialize(vespalib::Deserializer &is) +{ + UnaryFunctionNode::onDeserialize(is); + return is >> _width; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h new file mode 100644 index 00000000000..cf7b4561450 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/fixedwidthbucketfunctionnode.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +class FixedWidthBucketFunctionNode : public UnaryFunctionNode +{ +public: + // update result bucket based on numeric value + struct BucketHandler { + typedef vespalib::CloneablePtr CP; + virtual void update(ResultNode &result, const ResultNode &value) const = 0; + virtual BucketHandler *clone() const = 0; + virtual ~BucketHandler() {} + }; + + // update integer result bucket based on integer value + struct IntegerBucketHandler : public BucketHandler { + int64_t width; + IntegerBucketHandler(int64_t w) : width(w) {} + virtual void update(ResultNode &result, const ResultNode &value) const; + virtual IntegerBucketHandler *clone() const { return new IntegerBucketHandler(*this); } + }; + struct IntegerVectorBucketHandler : public IntegerBucketHandler { + IntegerVectorBucketHandler(int64_t w) : IntegerBucketHandler(w) { } + virtual void update(ResultNode &result, const ResultNode &value) const; + virtual IntegerVectorBucketHandler *clone() const { return new IntegerVectorBucketHandler(*this); } + }; + + // update float result bucket based on float value + struct FloatBucketHandler : public BucketHandler { + double width; + FloatBucketHandler(double w) : width(w) {} + virtual void update(ResultNode &result, const ResultNode &value) const; + virtual FloatBucketHandler *clone() const { return new FloatBucketHandler(*this); } + }; + + struct FloatVectorBucketHandler : public FloatBucketHandler { + FloatVectorBucketHandler(double w) : FloatBucketHandler(w) { } + virtual void update(ResultNode &result, const ResultNode &value) const; + virtual FloatVectorBucketHandler *clone() const { return new FloatVectorBucketHandler(*this); } + }; +private: + virtual void onPrepareResult(); + virtual bool onExecute() const; + + NumericResultNode::CP _width; + BucketHandler::CP _bucketHandler; + +public: + DECLARE_EXPRESSIONNODE(FixedWidthBucketFunctionNode); + DECLARE_NBO_SERIALIZE; + FixedWidthBucketFunctionNode() : UnaryFunctionNode(), _width(), _bucketHandler() {} + FixedWidthBucketFunctionNode(const ExpressionNode::CP &arg) : UnaryFunctionNode(arg), _width(), _bucketHandler() {} + FixedWidthBucketFunctionNode &setWidth(const NumericResultNode::CP &width) { + _width = width; + return *this; + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp new file mode 100644 index 00000000000..34bb9f0fec6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.cpp @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "floatbucketresultnode.h" +#include +#include + +namespace search { +namespace expression { + +IMPLEMENT_RESULTNODE(FloatBucketResultNode, BucketResultNode); + +FloatBucketResultNode FloatBucketResultNode::_nullResult; + +size_t +FloatBucketResultNode::hash() const +{ + size_t tmpHash(0); + memcpy(&tmpHash, &_from, sizeof(tmpHash)); + return tmpHash; +} + +int +FloatBucketResultNode::onCmp(const Identifiable &b) const +{ + double f1(_from); + double f2(static_cast(b)._from); + + if (isnan(f1)) { + return isnan(f2) ? 0 : -1; + } else { + if (f1 < f2) { + return -1; + } else if (f1 > f2) { + return 1; + } else { + double t1(_to); + double t2(static_cast(b)._to); + if (isnan(t2)) { + return 1; + } else { + if (t1 < t2) { + return -1; + } else if (t1 > t2) { + return 1; + } + } + } + } + return 0; +} + +int FloatBucketResultNode::contains(const FloatBucketResultNode & b) const +{ + double diff(_from - b._from); + if (diff < 0) { + return (_to < b._to) ? -1 : 0; + } else { + return (_to > b._to) ? 1 : 0; + } +} + +void +FloatBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, _fromField.getName(), _from); + visit(visitor, _toField.getName(), _to); +} + +vespalib::Serializer & +FloatBucketResultNode::onSerialize(vespalib::Serializer & os) const +{ + return os.put(_fromField, _from).put(_toField, _to); +} + +vespalib::Deserializer & +FloatBucketResultNode::onDeserialize(vespalib::Deserializer & is) +{ + return is.get(_fromField, _from).get(_toField, _to); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_floatbucketresultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h new file mode 100644 index 00000000000..91a4ea66059 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/floatbucketresultnode.h @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "bucketresultnode.h" + +namespace search { +namespace expression { + +class FloatBucketResultNode : public BucketResultNode +{ +private: + double _from; + double _to; + static FloatBucketResultNode _nullResult; + virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); } + virtual void create(void * buf) const { (void) buf; } + virtual void destroy(void * buf) const { (void) buf; } + virtual void encode(void * buf) const { + double * v(static_cast(buf)); + v[0] = _from; + v[1] = _to; + } + virtual size_t hash(const void * buf) const { return static_cast(buf)[0]; } + virtual void decode(const void * buf) { + const double * v(static_cast(buf)); + _from = v[0]; + _to = v[1]; + } +public: + struct GetValue { + double operator () (const ResultNode & r) { return r.getFloat(); } + }; + + DECLARE_EXPRESSIONNODE(FloatBucketResultNode); + DECLARE_NBO_SERIALIZE; + FloatBucketResultNode() : _from(0.0), _to(0.0) {} + FloatBucketResultNode(double from, double to) : _from(from), _to(to) {} + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + int contains(const FloatBucketResultNode & b) const; + int contains(double v) const { return (v < _from) ? 1 : (v >= _to) ? -1 : 0; } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + FloatBucketResultNode &setRange(double from, double to) { + _from = from; + _to = to; + return *this; + } + static const FloatBucketResultNode & getNull() { return _nullResult; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/floatresultnode.h b/searchlib/src/vespa/searchlib/expression/floatresultnode.h new file mode 100644 index 00000000000..4204e4457a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/floatresultnode.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class FloatResultNode : public NumericResultNode +{ +public: + DECLARE_EXPRESSIONNODE(FloatResultNode); + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + FloatResultNode(double v=0) : _value(v) { } + virtual size_t hash() const { size_t tmpHash(0); memcpy(&tmpHash, &_value, sizeof(tmpHash)); return tmpHash; } + virtual int onCmp(const Identifiable & b) const; + virtual void add(const ResultNode & b); + virtual void negate(); + virtual void multiply(const ResultNode & b); + virtual void divide(const ResultNode & b); + virtual void modulo(const ResultNode & b); + virtual void min(const ResultNode & b); + virtual void max(const ResultNode & b); + virtual void set(const ResultNode & rhs); + double get() const { return _value; } + void set(double value) { _value = value; } +private: + virtual int cmpMem(const void * a, const void *b) const { + const double & ai(*static_cast(a)); + const double & bi(*static_cast(b)); + return ai < bi ? -1 : ai == bi ? 0 : 1; + } + virtual void create(void * buf) const { (void) buf; } + virtual void destroy(void * buf) const { (void) buf; } + virtual void decode(const void * buf) { _value = *static_cast(buf); } + virtual void encode(void * buf) const { *static_cast(buf) = _value; } + virtual void swap(void * buf) { std::swap(*static_cast(buf), _value); } + virtual size_t hash(const void * buf) const { size_t tmpHash(0); memcpy(&tmpHash, buf, sizeof(tmpHash)); return tmpHash; } + virtual uint64_t radixAsc(const void * buf) const { return vespalib::convertForSort::convert(*static_cast(buf)); } + virtual uint64_t radixDesc(const void * buf) const { return vespalib::convertForSort::convert(*static_cast(buf)); } + + virtual size_t onGetRawByteSize() const { return sizeof(_value); } + bool isNan() const; + virtual void setMin(); + virtual void setMax(); + virtual int64_t onGetInteger(size_t index) const; + virtual double onGetFloat(size_t index) const; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const; + double _value; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/forcelink.hpp b/searchlib/src/vespa/searchlib/expression/forcelink.hpp new file mode 100644 index 00000000000..b31ff9aa091 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/forcelink.hpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +void forcelink_file_searchlib_expression_debugwaitfunctionnode(); +void forcelink_file_searchlib_expression_floatbucketresultnode(); +void forcelink_file_searchlib_expression_resultnode(); +void forcelink_file_searchlib_expression_stringbucketresultnode(); +void forcelink_file_searchlib_expression_numericfunctionnode(); +void forcelink_file_searchlib_expression_rangebucketpredef(); +void forcelink_file_searchlib_expression_strcatserializer(); +void forcelink_file_searchlib_expression_zcurve(); +void forcelink_file_searchlib_expression_expressiontree(); +void forcelink_file_searchlib_expression_mathfunctionnode(); +void forcelink_file_searchlib_expression_ucafunctionnode(); +void forcelink_file_searchlib_expression_timestamp(); +void forcelink_file_searchlib_expression_catserializer(); +void forcelink_file_searchlib_expression_documentfieldnode(); +void forcelink_file_searchlib_expression_bucketresultnode(); +void forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode(); +void forcelink_file_searchlib_expression_rawbucketresultnode(); +void forcelink_file_searchlib_expression_attributenode(); +void forcelink_file_searchlib_expression_integerbucketresultnode(); +void forcelink_file_searchlib_expression_perdocexpression(); +void forcelink_file_searchlib_expression_resultvector(); + +void forcelink_searchlib_expression() { + forcelink_file_searchlib_expression_debugwaitfunctionnode(); + forcelink_file_searchlib_expression_floatbucketresultnode(); + forcelink_file_searchlib_expression_resultnode(); + forcelink_file_searchlib_expression_stringbucketresultnode(); + forcelink_file_searchlib_expression_numericfunctionnode(); + forcelink_file_searchlib_expression_rangebucketpredef(); + forcelink_file_searchlib_expression_strcatserializer(); + forcelink_file_searchlib_expression_zcurve(); + forcelink_file_searchlib_expression_expressiontree(); + forcelink_file_searchlib_expression_mathfunctionnode(); + forcelink_file_searchlib_expression_ucafunctionnode(); + forcelink_file_searchlib_expression_timestamp(); + forcelink_file_searchlib_expression_catserializer(); + forcelink_file_searchlib_expression_documentfieldnode(); + forcelink_file_searchlib_expression_bucketresultnode(); + forcelink_file_searchlib_expression_fixedwidthbucketfunctionnode(); + forcelink_file_searchlib_expression_rawbucketresultnode(); + forcelink_file_searchlib_expression_attributenode(); + forcelink_file_searchlib_expression_integerbucketresultnode(); + forcelink_file_searchlib_expression_perdocexpression(); + forcelink_file_searchlib_expression_resultvector(); +} + diff --git a/searchlib/src/vespa/searchlib/expression/functionnode.h b/searchlib/src/vespa/searchlib/expression/functionnode.h new file mode 100644 index 00000000000..b2486ce9c88 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/functionnode.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class FunctionNode : public ExpressionNode +{ +public: + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor & visitor) const; + DECLARE_ABSTRACT_EXPRESSIONNODE(FunctionNode); + virtual const ResultNode & getResult() const { return *_tmpResult; } + ResultNode & updateResult() const { return *_tmpResult; } + virtual void reset() { _tmpResult.reset(NULL); } + + FunctionNode &setResult(const ResultNode::CP res) { _tmpResult = res; return *this; } +protected: + void setResultType(ResultNode::UP res) { _tmpResult.reset(res.release()); } + virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation); +private: + mutable ResultNode::CP _tmpResult; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/functionnodes.cpp b/searchlib/src/vespa/searchlib/expression/functionnodes.cpp new file mode 100644 index 00000000000..fff019767d4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/functionnodes.cpp @@ -0,0 +1,624 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +LOG_SETUP(".searchlib.documentexpressions"); + +namespace search { +namespace expression { + +using vespalib::asciistream; +using vespalib::nbostream; +using vespalib::Serializer; +using vespalib::Deserializer; +using vespalib::make_string; +using vespalib::Identifiable; +using vespalib::BufferRef; +using vespalib::ConstBufferRef; + +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ExpressionNode, Identifiable); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(FunctionNode, ExpressionNode); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(MultiArgFunctionNode, FunctionNode); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(UnaryFunctionNode, MultiArgFunctionNode); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(BinaryFunctionNode, MultiArgFunctionNode); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(BitFunctionNode, NumericFunctionNode); +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(UnaryBitFunctionNode, UnaryFunctionNode); + +IMPLEMENT_EXPRESSIONNODE(ConstantNode, ExpressionNode); +IMPLEMENT_EXPRESSIONNODE(AddFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(DivideFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(MultiplyFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ModuloFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(MinFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(MaxFunctionNode, NumericFunctionNode); +IMPLEMENT_EXPRESSIONNODE(XorFunctionNode, BitFunctionNode); +IMPLEMENT_EXPRESSIONNODE(AndFunctionNode, BitFunctionNode); +IMPLEMENT_EXPRESSIONNODE(OrFunctionNode, BitFunctionNode); +IMPLEMENT_EXPRESSIONNODE(CatFunctionNode, MultiArgFunctionNode); +IMPLEMENT_EXPRESSIONNODE(StrCatFunctionNode, MultiArgFunctionNode); +IMPLEMENT_EXPRESSIONNODE(NegateFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(SortFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ReverseFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(StrLenFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(NormalizeSubjectFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ToIntFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ToFloatFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(NumElemFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ToStringFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(ToRawFunctionNode, UnaryFunctionNode); +IMPLEMENT_EXPRESSIONNODE(XorBitFunctionNode, UnaryBitFunctionNode); +IMPLEMENT_EXPRESSIONNODE(MD5BitFunctionNode, UnaryBitFunctionNode); + +void ExpressionNode::onArgument(const ResultNode & arg, ResultNode & result) const +{ + (void) arg; + (void) result; + throw std::runtime_error(make_string("Class %s does not implement onArgument(const ResultNode & arg, ResultNode & result). Probably an indication that it tries to take a multivalued argument, which it can not.", getClass().name())); +} + +void ExpressionNode::executeIterative(const ResultNode & arg, ResultNode & result) const +{ + onArgument(arg, result); +} + +void ExpressionNode::wireAttributes(const search::attribute::IAttributeContext &) +{ +} + + +class ArithmeticTypeConversion +{ +public: + ArithmeticTypeConversion() : + _typeConversion() + { + _typeConversion[IntegerResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId; + _typeConversion[IntegerResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId; + _typeConversion[IntegerResultNode::classId][StringResultNode::classId] = Int64ResultNode::classId; + _typeConversion[IntegerResultNode::classId][RawResultNode::classId] = Int64ResultNode::classId; + _typeConversion[FloatResultNode::classId][IntegerResultNode::classId] = FloatResultNode::classId; + _typeConversion[FloatResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId; + _typeConversion[FloatResultNode::classId][StringResultNode::classId] = FloatResultNode::classId; + _typeConversion[FloatResultNode::classId][RawResultNode::classId] = FloatResultNode::classId; + _typeConversion[StringResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId; + _typeConversion[StringResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId; + _typeConversion[StringResultNode::classId][StringResultNode::classId] = StringResultNode::classId; + _typeConversion[StringResultNode::classId][RawResultNode::classId] = StringResultNode::classId; + _typeConversion[RawResultNode::classId][IntegerResultNode::classId] = Int64ResultNode::classId; + _typeConversion[RawResultNode::classId][FloatResultNode::classId] = FloatResultNode::classId; + _typeConversion[RawResultNode::classId][StringResultNode::classId] = StringResultNode::classId; + _typeConversion[RawResultNode::classId][RawResultNode::classId] = RawResultNode::classId; + } + ResultNode::UP getType(const ResultNode & arg1, const ResultNode & arg2); + static ResultNode::UP getType(const ResultNode & arg); +private: + static size_t getDimension(const ResultNode & r) { + if (r.getClass().inherits(ResultNodeVector::classId)) { + return 1 + getDimension(* r.createBaseType()); + } else { + return 0; + } + } + static size_t getBaseType(const ResultNode & r); + static size_t getBaseType2(const ResultNode & r); + size_t getType(size_t arg1, size_t arg2) const { + return _typeConversion.find(arg1)->second.find(arg2)->second; + } + std::map > _typeConversion; +}; + +ResultNode::UP ArithmeticTypeConversion::getType(const ResultNode & arg1, const ResultNode & arg2) +{ + size_t baseTypeId = getType(getBaseType2(arg1), getBaseType2(arg2)); + size_t dimension = std::max(getDimension(arg1), getDimension(arg2)); + ResultNode::UP result; + if (dimension == 0) { + return ResultNode::UP(static_cast(Identifiable::classFromId(baseTypeId)->create())); + } else if (dimension == 1) { + if (baseTypeId == Int64ResultNode::classId) { + result.reset(new IntegerResultNodeVector()); + } else if (baseTypeId == FloatResultNode::classId) { + result.reset(new FloatResultNodeVector()); + } else { + throw std::runtime_error("We can not handle anything but numbers."); + } + } else { + throw std::runtime_error("We are not able to handle multidimensional arrays"); + } + return result; +} + +ResultNode::UP ArithmeticTypeConversion::getType(const ResultNode & arg) +{ + size_t baseTypeId = getBaseType(arg); + return ResultNode::UP(static_cast(Identifiable::classFromId(baseTypeId)->create())); +} + +size_t ArithmeticTypeConversion::getBaseType(const ResultNode & r) +{ + if (r.getClass().inherits(ResultNodeVector::classId)) { + return getBaseType(* r.createBaseType()); + } else { + return r.getClass().id(); + } +} + +size_t ArithmeticTypeConversion::getBaseType2(const ResultNode & r) +{ + if (r.getClass().inherits(ResultNodeVector::classId)) { + return getBaseType2(* r.createBaseType()); + } else if (r.getClass().inherits(IntegerResultNode::classId)) { + return IntegerResultNode::classId; + } else { + return getBaseType(r); + } +} + +namespace { + ArithmeticTypeConversion _ArithmeticTypeConversion; +} + + +void MultiArgFunctionNode::onPrepare(bool preserveAccurateTypes) +{ + LOG(debug, "MultiArgFunctionNode::onPrepare(this=%p) Actual class = %s", this, getClass().name()); + for(size_t i(0), m(_args.size()); i < m; i++) { + _args[i]->prepare(preserveAccurateTypes); + } + prepareResult(); +} + +void MultiArgFunctionNode::onPrepareResult() +{ + if (_args.size() == 1) { + setResultType(ArithmeticTypeConversion::getType(_args[0]->getResult())); + } else if (_args.size() > 1) { + setResultType(std::unique_ptr(static_cast(_args[0]->getResult().clone()))); + for(size_t i(1), m(_args.size()); i < m; i++) { + if (&_args[i]->getResult() != NULL) { + setResultType(_ArithmeticTypeConversion.getType(getResult(), _args[i]->getResult())); + } + } + } +} + +bool MultiArgFunctionNode::onExecute() const +{ + for(size_t i(0), m(_args.size()); i < m; i++) { + _args[i]->execute(); + } + return calculate(_args, updateResult()); +} + +bool MultiArgFunctionNode::onCalculate(const ExpressionNodeVector & args, ResultNode & result) const +{ + result.set(args[0]->getResult()); + for (size_t i(1), m(args.size()); i < m; i++) { + executeIterative(args[i]->getResult(), result); + } + return true; +} + +void BitFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new Int64ResultNode(0))); +} + +void StrCatFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new StringResultNode())); +} + +void CatFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new RawResultNode())); +} + +void CatFunctionNode::onPrepare(bool preserveAccurateTypes) +{ + (void) preserveAccurateTypes; + MultiArgFunctionNode::onPrepare(true); +} + +void BitFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const +{ + onArgument(arg, static_cast(result)); +} + +void AddFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).add(arg); } +void DivideFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).divide(arg); } +void MultiplyFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).multiply(arg); } +void ModuloFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).modulo(arg); } +void MinFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).min(arg); } +void MaxFunctionNode::onArgument(const ResultNode & arg, ResultNode & result) const { static_cast(result).max(arg); } +void AndFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.andOp(arg); } +void OrFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.orOp(arg); } +void XorFunctionNode::onArgument(const ResultNode & arg, Int64ResultNode & result) const { result.xorOp(arg); } + +ResultNode::CP MaxFunctionNode::getInitialValue() const +{ + ResultNode::CP initial; + const ResultNode & arg(getArg(0).getResult()); + if (arg.inherits(FloatResultNodeVector::classId)) { + initial.reset(new FloatResultNode(std::numeric_limits::min())); + } else if (arg.inherits(IntegerResultNodeVector::classId)) { + initial.reset(new Int64ResultNode(std::numeric_limits::min())); + } else { + throw std::runtime_error(vespalib::string("Can not choose an initial value for class ") + arg.getClass().name()); + } + return initial; +} + +ResultNode::CP MinFunctionNode::getInitialValue() const +{ + ResultNode::CP initial; + const ResultNode & arg(getArg(0).getResult()); + if (arg.inherits(FloatResultNodeVector::classId)) { + initial.reset(new FloatResultNode(std::numeric_limits::max())); + } else if (arg.inherits(IntegerResultNodeVector::classId)) { + initial.reset(new Int64ResultNode(std::numeric_limits::max())); + } else { + throw std::runtime_error(vespalib::string("Can not choose an initial value for class ") + arg.getClass().name()); + } + return initial; +} + +ResultNode & ModuloFunctionNode::flatten(const ResultNodeVector &, ResultNode &) const +{ + throw std::runtime_error("ModuloFunctionNode::flatten() const not implemented since it shall never be used."); +} + +ResultNode & DivideFunctionNode::flatten(const ResultNodeVector &, ResultNode &) const +{ + throw std::runtime_error("DivideFunctionNode::flatten() const not implemented since it shall never be used."); +} + +ResultNode::CP ModuloFunctionNode::getInitialValue() const +{ + throw std::runtime_error("ModuloFunctionNode::getInitialValue() const not implemented since it shall never be used."); +} + +ResultNode::CP DivideFunctionNode::getInitialValue() const +{ + throw std::runtime_error("DivideFunctionNode::getInitialValue() const not implemented since it shall never be used."); +} + +void UnaryBitFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new RawResultNode())); +} + +void UnaryBitFunctionNode::onPrepare(bool preserveAccurateTypes) +{ + (void) preserveAccurateTypes; + UnaryFunctionNode::onPrepare(true); +} + +void UnaryFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(getArg().getResult().clone())); +} + +void ToStringFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new StringResultNode())); +} + +bool ToStringFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().set(getArg().getResult()); + return true; +} + +void ToRawFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new RawResultNode())); +} + +bool ToRawFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().set(getArg().getResult()); + return true; +} + +void ToIntFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new Int64ResultNode())); +} + +bool ToIntFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().set(getArg().getResult()); + return true; +} + +void ToFloatFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new FloatResultNode())); +} + +bool ToFloatFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().set(getArg().getResult()); + return true; +} + +void StrLenFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new Int64ResultNode())); +} + +bool StrLenFunctionNode::onExecute() const +{ + getArg().execute(); + char buf[32]; + static_cast (updateResult()).set(getArg().getResult().getString(BufferRef(buf, sizeof(buf))).size()); + return true; +} + +void NormalizeSubjectFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new StringResultNode())); +} + +bool NormalizeSubjectFunctionNode::onExecute() const +{ + getArg().execute(); + char buf[32]; + ConstBufferRef tmp(getArg().getResult().getString(BufferRef(buf, sizeof(buf)))); + + int pos = 0; + if (tmp.size() >= 4) { + if ((tmp[0] == 'R') && ((tmp[1] | 0x20) == 'e') && (tmp[2] == ':') && (tmp[3] == ' ')) { + pos = 4; + } else if ((tmp[0] == 'F') && ((tmp[1] | 0x20) == 'w')) { + if ((tmp[2] == ':') && (tmp[3] == ' ')) { + pos = 4; + } else if (((tmp[2] | 0x20) == 'd') && (tmp[3] == ':') && (tmp[4] == ' ')) { + pos = 5; + } + } + } + static_cast (updateResult()).set(vespalib::stringref(tmp.c_str() + pos, tmp.size() - pos)); + return true; +} + +void NumElemFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new Int64ResultNode(1))); +} + +bool NumElemFunctionNode::onExecute() const +{ + getArg().execute(); + if (getArg().getResult().inherits(ResultNodeVector::classId)) { + static_cast (updateResult()).set(static_cast(getArg().getResult()).size()); + } + return true; +} + +bool NegateFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().assign(getArg().getResult()); + updateResult().negate(); + return true; +} + +bool SortFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().assign(getArg().getResult()); + updateResult().sort(); + return true; +} + +bool ReverseFunctionNode::onExecute() const +{ + getArg().execute(); + updateResult().assign(getArg().getResult()); + updateResult().reverse(); + return true; +} + +bool StrCatFunctionNode::onExecute() const +{ + asciistream os; + StrCatSerializer nos(os); + for(size_t i(0), m(getNumArgs()); i < m; i++) { + getArg(i).execute(); + getArg(i).getResult().serialize(nos); + } + static_cast(updateResult()).set(os.str()); + return true; +} + +bool CatFunctionNode::onExecute() const +{ + nbostream os; + CatSerializer nos(os); + for(size_t i(0), m(getNumArgs()); i < m; i++) { + getArg(i).execute(); + getArg(i).getResult().serialize(nos); + } + static_cast(updateResult()).setBuffer(os.c_str(), os.size()); + return true; +} + +XorBitFunctionNode::XorBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) : + UnaryBitFunctionNode(arg, numBits), + _tmpXor(getNumBytes(), 0) +{ +} + +bool UnaryBitFunctionNode::onExecute() const +{ + _tmpOs.clear(); + getArg().execute(); + CatSerializer os(_tmpOs); + getArg().getResult().serialize(os); + return internalExecute(_tmpOs); +} + +void XorBitFunctionNode::onPrepareResult() +{ + UnaryBitFunctionNode::onPrepareResult(); + _tmpXor.resize(getNumBytes()); +} + +bool XorBitFunctionNode::internalExecute(const nbostream & os) const +{ + const size_t numBytes(_tmpXor.size()); + memset(&_tmpXor[0], 0, numBytes); + const char * s(os.c_str()); + for (size_t i(0), m(os.size()/numBytes); i < m; i++) { + for (size_t j(0), k(numBytes); j < k; j++) { + _tmpXor[j] ^= s[j + k*i]; + } + } + for (size_t i((os.size()/numBytes)*numBytes); i < os.size(); i++) { + _tmpXor[i%numBytes] = os.c_str()[i]; + } + static_cast(updateResult()).setBuffer(&_tmpXor[0], numBytes); + return true; +} + +bool MD5BitFunctionNode::internalExecute(const nbostream & os) const +{ + const unsigned int MD5_DIGEST_LENGTH = 16; + unsigned char md5ScratchPad[MD5_DIGEST_LENGTH]; + fastc_md5sum(os.c_str(), os.size(), md5ScratchPad); + static_cast(updateResult()).setBuffer(md5ScratchPad, std::min(sizeof(md5ScratchPad), getNumBytes())); + return true; +} + +Serializer & FunctionNode::onSerialize(Serializer & os) const +{ + return os << _tmpResult; +} +Deserializer & FunctionNode::onDeserialize(Deserializer & is) +{ + return is >> _tmpResult; +} + +void +ConstantNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "Value", _result); +} + +Serializer & ConstantNode::onSerialize(Serializer & os) const +{ + return os << _result; +} +Deserializer & ConstantNode::onDeserialize(Deserializer & is) +{ + return is >> _result; +} + + + +void +FunctionNode::visitMembers(vespalib::ObjectVisitor & visitor) const +{ + visit(visitor, "tmpResult", _tmpResult); +} + +void FunctionNode::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation) +{ + if (_tmpResult.get()) { + _tmpResult->select(predicate, operation); + } +} + +void MultiArgFunctionNode::selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation) +{ + FunctionNode::selectMembers(predicate, operation); + for(size_t i(0), m(_args.size()); i < m; i++) { + _args[i]->select(predicate, operation); + } +} + +Serializer & MultiArgFunctionNode::onSerialize(Serializer & os) const +{ + FunctionNode::onSerialize(os); + os << _args; + return os; +} +Deserializer & MultiArgFunctionNode::onDeserialize(Deserializer & is) +{ + FunctionNode::onDeserialize(is); + return is >> _args; +} + +void +MultiArgFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + FunctionNode::visitMembers(visitor); + visit(visitor, "args", _args); +} + +Serializer & UnaryBitFunctionNode::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + return os << _numBits; +} +Deserializer & UnaryBitFunctionNode::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + return is >> _numBits; +} + +void +UnaryBitFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + UnaryFunctionNode::visitMembers(visitor); + visit(visitor, "numBits", _numBits); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h b/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h new file mode 100644 index 00000000000..52114489f1f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class GetDocIdNamespaceSpecificFunctionNode : public DocumentAccessorNode +{ +public: + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + DECLARE_EXPRESSIONNODE(GetDocIdNamespaceSpecificFunctionNode); + GetDocIdNamespaceSpecificFunctionNode() : _value(new StringResultNode("")) { } + GetDocIdNamespaceSpecificFunctionNode(ResultNode::UP resultNode) : _value(resultNode.release()) { } +private: + virtual const ResultNode & getResult() const { return *_value; } + virtual void onDocType(const document::DocumentType & docType) { (void) docType; } + virtual void onDoc(const document::Document & doc); + virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; } + virtual bool onExecute() const { return true; } + ResultNode::CP _value; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h b/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h new file mode 100644 index 00000000000..786f115f6a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/getymumchecksumfunctionnode.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class GetYMUMChecksumFunctionNode : public DocumentAccessorNode +{ +public: + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + DECLARE_EXPRESSIONNODE(GetYMUMChecksumFunctionNode); +private: + virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; } + virtual const ResultNode & getResult() const { return _checkSum; } + virtual void onDocType(const document::DocumentType & docType) { (void) docType; } + virtual void onDoc(const document::Document & doc); + virtual bool onExecute() const { return true; } + Int64ResultNode _checkSum; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp new file mode 100644 index 00000000000..f821026a679 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "integerbucketresultnode.h" +#include + +namespace search { +namespace expression { + +IMPLEMENT_RESULTNODE(IntegerBucketResultNode, BucketResultNode); + +IntegerBucketResultNode IntegerBucketResultNode::_nullResult; + +size_t +IntegerBucketResultNode::hash() const +{ + return _from; +} + +int +IntegerBucketResultNode::onCmp(const Identifiable & b) const +{ + int64_t f1(_from); + int64_t f2(static_cast(b)._from); + if (f1 < f2) { + return -1; + } else if (f1 > f2) { + return 1; + } else { + int64_t t1(_to); + int64_t t2(static_cast(b)._to); + if (t1 < t2) { + return -1; + } else if (t1 > t2) { + return 1; + } + } + return 0; +} + +int IntegerBucketResultNode::contains(const IntegerBucketResultNode & b) const +{ + int64_t diff(_from - b._from); + if (diff < 0) { + return (_to < b._to) ? -1 : 0; + } else { + return (_to > b._to) ? 1 : 0; + } +} + +void +IntegerBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, _fromField.getName(), _from); + visit(visitor, _toField.getName(), _to); +} + +vespalib::Serializer & +IntegerBucketResultNode::onSerialize(vespalib::Serializer & os) const +{ + return os.put(_fromField, _from).put(_toField, _to); +} + +vespalib::Deserializer & +IntegerBucketResultNode::onDeserialize(vespalib::Deserializer & is) +{ + return is.get(_fromField, _from).get(_toField, _to); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_integerbucketresultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h new file mode 100644 index 00000000000..66f49887288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "bucketresultnode.h" + +namespace search { +namespace expression { + +class IntegerBucketResultNode : public BucketResultNode +{ +private: + int64_t _from; + int64_t _to; + static IntegerBucketResultNode _nullResult; + + virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); } + virtual void create(void * buf) const { (void) buf; } + virtual void destroy(void * buf) const { (void) buf; } + virtual void encode(void * buf) const { + int64_t * v(static_cast(buf)); + v[0] = _from; + v[1] = _to; + } + virtual size_t hash(const void * buf) const { return static_cast(buf)[0]; } + virtual void decode(const void * buf) { + const int64_t * v(static_cast(buf)); + _from = v[0]; + _to = v[1]; + } +#if 0 +#endif +public: + DECLARE_EXPRESSIONNODE(IntegerBucketResultNode); + DECLARE_NBO_SERIALIZE; + IntegerBucketResultNode() : _from(0), _to(0) {} + IntegerBucketResultNode(int64_t from, int64_t to) : _from(from), _to(to) {} + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + int contains(const IntegerBucketResultNode & b) const; + int contains(int64_t v) const { return (v < _from) ? 1 : (v >= _to) ? -1 : 0; } + IntegerBucketResultNode &setRange(int64_t from, int64_t to) { + _from = from; + _to = to; + return *this; + } + static const IntegerBucketResultNode & getNull() { return _nullResult; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/integerresultnode.h b/searchlib/src/vespa/searchlib/expression/integerresultnode.h new file mode 100644 index 00000000000..3c5a571dc27 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/integerresultnode.h @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +class IntegerResultNode : public NumericResultNode +{ +public: + DECLARE_ABSTRACT_RESULTNODE(IntegerResultNode); +}; + +template +class IntegerResultNodeT : public IntegerResultNode +{ +public: + IntegerResultNodeT(int64_t v=0) : _value(v) { } + virtual size_t hash() const { return _value; } + virtual int onCmp(const Identifiable & b) const { + T bv(static_cast(b)._value); + return (_value < bv) ? -1 : (_value > bv) ? 1 : 0; + } + virtual void add(const ResultNode & b) { _value += b.getInteger(); } + virtual void negate() { _value = - _value; } + virtual void multiply(const ResultNode & b) { _value *= b.getInteger(); } + virtual void divide(const ResultNode & b) { + int64_t val = b.getInteger(); + _value = (val == 0) ? 0 : (_value / val); + } + virtual void modulo(const ResultNode & b) { + int64_t val = b.getInteger(); + _value = (val == 0) ? 0 : (_value % val); + } + virtual void min(const ResultNode & b) { int64_t t(b.getInteger()); if (t < _value) { _value = t; } } + virtual void max(const ResultNode & b) { int64_t t(b.getInteger()); if (t > _value) { _value = t; } } + virtual void set(const ResultNode & rhs) { _value = rhs.getInteger(); } + void andOp(const ResultNode & b) { _value &= b.getInteger(); } + void orOp(const ResultNode & b) { _value |= b.getInteger(); } + void xorOp(const ResultNode & b) { _value ^= b.getInteger(); } + int64_t get() const { return _value; } + void set(int64_t value) { _value = value; } + IntegerResultNode & operator ++() { _value++; return *this; } + IntegerResultNode & operator +=(int64_t v) { _value += v; return *this; } +protected: + void setValue(const T &value) { _value = value; } + T getValue() const { return _value; } +private: + virtual int cmpMem(const void * a, const void *b) const { + const T & ai(*static_cast(a)); + const T & bi(*static_cast(b)); + return ai < bi ? -1 : ai == bi ? 0 : 1; + } + virtual void create(void * buf) const { (void) buf; } + virtual void destroy(void * buf) const { (void) buf; } + virtual void decode(const void * buf) { _value = *static_cast(buf); } + virtual void encode(void * buf) const { *static_cast(buf) = _value; } + virtual void swap(void * buf) { std::swap(*static_cast(buf), _value); } + virtual size_t hash(const void * buf) const { return *static_cast(buf); } + virtual uint64_t radixAsc(const void * buf) const { return vespalib::convertForSort::convert(*static_cast(buf)); } + virtual uint64_t radixDesc(const void * buf) const { return vespalib::convertForSort::convert(*static_cast(buf)); } + virtual size_t onGetRawByteSize() const { return sizeof(_value); } + virtual void setMin() { _value = std::numeric_limits::min(); } + virtual void setMax() { _value = std::numeric_limits::max(); } + virtual vespalib::Serializer & onSerialize(vespalib::Serializer & os) const { return os << _value; } + virtual vespalib::Deserializer & onDeserialize(vespalib::Deserializer & is) { return is >> _value; } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "value", _value); } + virtual int64_t onGetInteger(size_t index) const { (void) index; return _value; } + virtual double onGetFloat(size_t index) const { (void) index; return _value; } + T _value; +}; + +class Int8ResultNode : public IntegerResultNodeT +{ +private: + typedef IntegerResultNodeT Base; +public: + DECLARE_RESULTNODE(Int8ResultNode); + Int8ResultNode(int8_t v=0) : Base(v) { } +private: + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue())))); + return ConstBufferRef(buf.str(), numWritten); + } +}; + +class Int16ResultNode : public IntegerResultNodeT +{ +private: + typedef IntegerResultNodeT Base; +public: + DECLARE_RESULTNODE(Int16ResultNode); + Int16ResultNode(int16_t v=0) : Base(v) { } +private: + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue())))); + return ConstBufferRef(buf.str(), numWritten); + } +}; + +class Int32ResultNode : public IntegerResultNodeT +{ +private: + typedef IntegerResultNodeT Base; +public: + DECLARE_RESULTNODE(Int32ResultNode); + Int32ResultNode(int32_t v=0) : Base(v) { } +private: + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%d", getValue())))); + return ConstBufferRef(buf.str(), numWritten); + } +}; + +class Int64ResultNode : public IntegerResultNodeT +{ +private: + typedef IntegerResultNodeT Base; +public: + DECLARE_RESULTNODE(Int64ResultNode); + Int64ResultNode(int64_t v=0) : Base(v) { } +private: + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { + (void) index; + int numWritten(std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%" PRId64, getValue())))); + return ConstBufferRef(buf.str(), numWritten); + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp new file mode 100644 index 00000000000..bd42ee61eb5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.cpp @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(InterpolatedLookup, UnaryFunctionNode); + +InterpolatedLookup::InterpolatedLookup() + : _attribute(0), + _docId(0) +{ +} + +InterpolatedLookup::~InterpolatedLookup() +{ +} + +InterpolatedLookup::InterpolatedLookup(const vespalib::string &attribute, + const ExpressionNode::CP &arg) + : UnaryFunctionNode(arg), + _attributeName(attribute), + _attribute(0), + _docId(0) +{ +} + +InterpolatedLookup::InterpolatedLookup(const search::attribute::IAttributeVector &attr, + const ExpressionNode::CP &lookupArg) + : UnaryFunctionNode(lookupArg), + _attributeName(attr.getName()), + _attribute(&attr), + _docId(0) +{ +} + + +InterpolatedLookup::InterpolatedLookup(const InterpolatedLookup &rhs) : + UnaryFunctionNode(rhs), + _attributeName(rhs._attributeName), + _attribute(rhs._attribute), + _docId(rhs._docId) +{ + // why? + _docId = 0; +} + +InterpolatedLookup & InterpolatedLookup::operator= (const InterpolatedLookup &rhs) +{ + if (this != &rhs) { + UnaryFunctionNode::operator =(rhs); + _attributeName = rhs._attributeName; + _attribute = rhs._attribute; + // _docId = rhs._docId; + _docId = 0; + } + return *this; +} + +void InterpolatedLookup::onPrepareResult() +{ + setResultType(std::unique_ptr(new FloatResultNode())); +} + +static double +simpleInterpolate(size_t sz, std::vector v, double lookup) +{ + if (sz == 0 || lookup < v[0]) + return 0; + for (size_t i = 1; i < sz; ++i) { + if (lookup < v[i]) { + double total = v[i] - v[i-1]; + double above = lookup - v[i-1]; + double result = i - 1; + result += (above / total); + return result; + } + } + return sz - 1; +} + +bool InterpolatedLookup::onExecute() const +{ + getArg().execute(); + double lookup = getArg().getResult().getFloat(); + // get attribute data + size_t numValues = _attribute->getValueCount(_docId); + std::vector valueVector; + valueVector.resize(numValues); + _attribute->get(_docId, &valueVector[0], numValues); + double result = simpleInterpolate(numValues, valueVector, lookup); + static_cast(updateResult()).set(result); + return true; +} + +void InterpolatedLookup::wireAttributes(const search::attribute::IAttributeContext & attrCtx) +{ + _attribute = attrCtx.getAttribute(_attributeName); + if (_attribute == NULL) { + throw std::runtime_error(vespalib::make_string("Failed locating attribute vector '%s'", _attributeName.c_str())); + } +} + +Serializer & InterpolatedLookup::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + os << _attributeName; + return os; +} + +Deserializer & InterpolatedLookup::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + is >> _attributeName; + return is; +} + +} // namespace expression +} // namespace search diff --git a/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h new file mode 100644 index 00000000000..d81acb929f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/interpolatedlookupfunctionnode.h @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class InterpolatedLookup : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(InterpolatedLookup); + DECLARE_NBO_SERIALIZE; + + InterpolatedLookup(); + ~InterpolatedLookup(); + + InterpolatedLookup(const vespalib::string &attribute, + const ExpressionNode::CP & arg); + + InterpolatedLookup(const search::attribute::IAttributeVector &attr, + const ExpressionNode::CP &lookupArg); + + InterpolatedLookup(const InterpolatedLookup &rhs); + + InterpolatedLookup & operator= (const InterpolatedLookup &rhs); + + void setDocId(DocId docId) { _docId = docId; } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); + virtual void wireAttributes(const search::attribute::IAttributeContext &attrCtx); + vespalib::string _attributeName; + const search::attribute::IAttributeVector * _attribute; + DocId _docId; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp new file mode 100644 index 00000000000..561081129cc --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.cpp @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +namespace search { +namespace expression { + +using namespace vespalib; + +IMPLEMENT_EXPRESSIONNODE(MathFunctionNode, MultiArgFunctionNode); + +Serializer & MathFunctionNode::onSerialize(Serializer & os) const +{ + MultiArgFunctionNode::onSerialize(os); + uint8_t code(_function); + return os << code; +} + +Deserializer & MathFunctionNode::onDeserialize(Deserializer & is) +{ + MultiArgFunctionNode::onDeserialize(is); + uint8_t code(0); + is >> code; + _function = (Function)code; + return is; +} + +void MathFunctionNode::onPrepareResult() +{ + setResultType(std::unique_ptr(new FloatResultNode())); +} + +bool MathFunctionNode::onExecute() const +{ + getArg(0).execute(); + double result(0.0); + switch (_function) { + case EXP: result = exp(getArg(0).getResult().getFloat()); break; + case POW: getArg(1).execute(); result = pow(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); break; + case LOG: result = log(getArg(0).getResult().getFloat()); break; + case LOG1P: result = log1p(getArg(0).getResult().getFloat()); break; + case LOG10: result = log10(getArg(0).getResult().getFloat()); break; + case SIN: result = sin(getArg(0).getResult().getFloat()); break; + case ASIN: result = asin(getArg(0).getResult().getFloat()); break; + case COS: result = cos(getArg(0).getResult().getFloat()); break; + case ACOS: result = acos(getArg(0).getResult().getFloat()); break; + case TAN: result = tan(getArg(0).getResult().getFloat()); break; + case ATAN: result = atan(getArg(0).getResult().getFloat()); break; + case SQRT: result = sqrt(getArg(0).getResult().getFloat()); break; + case SINH: result = sinh(getArg(0).getResult().getFloat()); break; + case ASINH: result = asinh(getArg(0).getResult().getFloat()); break; + case COSH: result = cosh(getArg(0).getResult().getFloat()); break; + case ACOSH: result = acosh(getArg(0).getResult().getFloat()); break; + case TANH: result = tanh(getArg(0).getResult().getFloat()); break; + case ATANH: result = atanh(getArg(0).getResult().getFloat()); break; + case CBRT: result = cbrt(getArg(0).getResult().getFloat()); break; + case HYPOT: getArg(1).execute(); result = hypot(getArg(0).getResult().getFloat(), getArg(1).getResult().getFloat()); break; + case FLOOR: result = floor(getArg(0).getResult().getFloat()); break; + } + static_cast(updateResult()).set(result); + return true; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_mathfunctionnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h new file mode 100644 index 00000000000..3546a100b05 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/mathfunctionnode.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MathFunctionNode : public MultiArgFunctionNode +{ +public: + typedef enum {EXP=0, POW=1, LOG=2, LOG1P=3, LOG10=4, SIN=5, ASIN=6, COS=7, ACOS=8, TAN=9, ATAN=10, SQRT=11, SINH=12, + ASINH=13, COSH=14, ACOSH=15, TANH=16, ATANH=17, CBRT=18, HYPOT=19, FLOOR=20 } Function; + DECLARE_EXPRESSIONNODE(MathFunctionNode); + DECLARE_NBO_SERIALIZE; + + MathFunctionNode() { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); + Function _function; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h b/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h new file mode 100644 index 00000000000..515788611a3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/maxfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MaxFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(MaxFunctionNode); + MaxFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMax(result); } + virtual ResultNode::CP getInitialValue() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h new file mode 100644 index 00000000000..038a26f2cac --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/md5bitfunctionnode.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MD5BitFunctionNode : public UnaryBitFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(MD5BitFunctionNode); + MD5BitFunctionNode() { } + MD5BitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) : UnaryBitFunctionNode(arg, numBits) { } +private: + virtual bool internalExecute(const vespalib::nbostream & os) const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/minfunctionnode.h b/searchlib/src/vespa/searchlib/expression/minfunctionnode.h new file mode 100644 index 00000000000..1ce835fbc24 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/minfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MinFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(MinFunctionNode); + MinFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMin(result); } + virtual ResultNode::CP getInitialValue() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h b/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h new file mode 100644 index 00000000000..c6d46feea73 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/modulofunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ModuloFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ModuloFunctionNode); + ModuloFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const; + virtual ResultNode::CP getInitialValue() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h b/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h new file mode 100644 index 00000000000..349d448d753 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/multiargfunctionnode.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MultiArgFunctionNode : public FunctionNode +{ +public: + typedef std::vector ExpressionNodeVector; + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor & visitor) const; + DECLARE_ABSTRACT_EXPRESSIONNODE(MultiArgFunctionNode); + MultiArgFunctionNode() : FunctionNode() { } + MultiArgFunctionNode & appendArg(const ExpressionNode::CP & arg) { return addArg(arg); } + MultiArgFunctionNode &addArg(const ExpressionNode::CP & arg) { + _args.push_back(arg); + return *this; + } + virtual void reset() { _args.clear(); FunctionNode::reset(); } + ExpressionNodeVector & expressionNodeVector() { return _args; } +protected: + virtual bool onCalculate(const ExpressionNodeVector & args, ResultNode & result) const; + virtual bool onExecute() const; + virtual void onPrepare(bool preserveAccurateTypes); + size_t getNumArgs() const { return _args.size(); } + const ExpressionNode & getArg(size_t n) const { return *_args[n]; } + ExpressionNode & getArg(size_t n) { return *_args[n]; } +private: + virtual void selectMembers(const vespalib::ObjectPredicate & predicate, vespalib::ObjectOperation & operation); + bool calculate(const ExpressionNodeVector & args, ResultNode & result) const { return onCalculate(args, result); } + void prepareResult() { onPrepareResult(); } + virtual void onPrepareResult(); + ExpressionNodeVector _args; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h b/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h new file mode 100644 index 00000000000..d6386b24ab0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/multiplyfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class MultiplyFunctionNode : public NumericFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(MultiplyFunctionNode); + MultiplyFunctionNode() { } +private: + virtual void onArgument(const ResultNode & arg, ResultNode & result) const; + virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(1)); } + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenMultiply(result); } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h b/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h new file mode 100644 index 00000000000..564317d4fa4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/negatefunctionnode.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class NegateFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(NegateFunctionNode); + NegateFunctionNode() { } + NegateFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h b/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h new file mode 100644 index 00000000000..3e64946e7fb --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/normalizesubjectfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class NormalizeSubjectFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(NormalizeSubjectFunctionNode); + NormalizeSubjectFunctionNode() { } + NormalizeSubjectFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/nullresultnode.h b/searchlib/src/vespa/searchlib/expression/nullresultnode.h new file mode 100644 index 00000000000..d5bf6d727a4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/nullresultnode.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class NullResultNode : public SingleResultNode +{ +public: + DECLARE_EXPRESSIONNODE(NullResultNode); + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + virtual void set(const ResultNode & rhs); + virtual void min(const ResultNode & b); + virtual void max(const ResultNode & b); + virtual void add(const ResultNode & b); +private: + virtual void setMin(); + virtual void setMax(); + virtual int64_t onGetInteger(size_t index) const; + virtual double onGetFloat(size_t index) const; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const; + virtual size_t onGetRawByteSize() const { return 0; } + virtual void create(void * buf) const { (void) buf; } + virtual void destroy(void * buf) const { (void) buf;} + + virtual void decode(const void * buf) { (void) buf; } + virtual void encode(void * buf) const { (void) buf; } + virtual void swap(void * buf) { (void) buf; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h b/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h new file mode 100644 index 00000000000..00ca2fcf75a --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/numelemfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class NumElemFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(NumElemFunctionNode); + NumElemFunctionNode() { } + NumElemFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp new file mode 100644 index 00000000000..5b3e34c564f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.cpp @@ -0,0 +1,156 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +namespace search { +namespace expression { + +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(NumericFunctionNode, MultiArgFunctionNode); + +NumericFunctionNode::NumericFunctionNode(const NumericFunctionNode & rhs) : + MultiArgFunctionNode(rhs), + _handler() +{ +} + +NumericFunctionNode & NumericFunctionNode::operator = (const NumericFunctionNode & rhs) +{ + if (this != &rhs) { + MultiArgFunctionNode::operator =(rhs); + _handler.reset(); + } + return *this; +} + +void NumericFunctionNode::onPrepare(bool preserveAccurateTypes) +{ + MultiArgFunctionNode::onPrepare(preserveAccurateTypes); + if (getNumArgs() == 1) { + if (getArg(0).getResult().getClass().inherits(IntegerResultNodeVector::classId)) { + _handler.reset(new FlattenIntegerHandler(*this)); + } else if (getArg(0).getResult().getClass().inherits(FloatResultNodeVector::classId)) { + _handler.reset(new FlattenFloatHandler(*this)); + } else if (getArg(0).getResult().getClass().inherits(StringResultNodeVector::classId)) { + _handler.reset(new FlattenStringHandler(*this)); + } else { + throw std::runtime_error(vespalib::string("No FlattenHandler for ") + getArg(0).getResult().getClass().name()); + } + } else { + if (getResult().getClass().inherits(IntegerResultNodeVector::classId)) { + _handler.reset(new VectorIntegerHandler(*this)); + } else if (getResult().getClass().inherits(FloatResultNodeVector::classId)) { + _handler.reset(new VectorFloatHandler(*this)); + } else if (getResult().getClass().inherits(StringResultNodeVector::classId)) { + _handler.reset(new VectorStringHandler(*this)); + } else if (getResult().getClass().inherits(IntegerResultNode::classId)) { + _handler.reset(new ScalarIntegerHandler(*this)); + } else if (getResult().getClass().inherits(FloatResultNode::classId)) { + _handler.reset(new ScalarFloatHandler(*this)); + } else if (getResult().getClass().inherits(StringResultNode::classId)) { + _handler.reset(new ScalarStringHandler(*this)); + } else if (getResult().getClass().inherits(RawResultNode::classId)) { + _handler.reset(new ScalarRawHandler(*this)); + } else { + throw std::runtime_error(vespalib::make_string("NumericFunctionNode::onPrepare does not handle results of type %s", getResult().getClass().name())); + } + } +} + +bool NumericFunctionNode::onCalculate(const ExpressionNodeVector & args, ResultNode & result) const +{ + bool retval(true); + (void) result; + _handler->handleFirst(args[0]->getResult()); + for (size_t i(1), m(args.size()); i < m; i++) { + _handler->handle(args[i]->getResult()); + } + return retval; +} + +template +void NumericFunctionNode::VectorHandler::handle(const ResultNode & arg) +{ + typename T::Vector & result = _result.getVector(); + if (arg.getClass().inherits(ResultNodeVector::classId)) { + const ResultNodeVector & av = static_cast (arg); + const size_t argSize(av.size()); + const size_t oldRSize(result.size()); + if (argSize > oldRSize) { + result.resize(argSize); + for (size_t i(oldRSize); i < argSize; i++) { + result[i] = result[i%oldRSize]; + } + } + for (size_t i(0), m(result.size()), isize(argSize); i < m; i++) { + function().executeIterative(av.get(i%isize), result[i]); + } + } else { + for (size_t i(0), m(result.size()); i < m; i++) { + function().executeIterative(arg, result[i]); + } + } +} + +template +void NumericFunctionNode::VectorHandler::handleFirst(const ResultNode & arg) +{ + typename T::Vector & result = _result.getVector(); + if (arg.getClass().inherits(ResultNodeVector::classId)) { + const ResultNodeVector & av = static_cast (arg); + result.resize(av.size()); + for (size_t i(0), m(result.size()); i < m; i++) { + result[i].set(av.get(i)); + } + } else { + result.resize(1); + result[0].set(arg); + } +} + + +void NumericFunctionNode::ScalarIntegerHandler::handle(const ResultNode & arg) +{ + function().executeIterative(arg, _result); +} + +void NumericFunctionNode::ScalarFloatHandler::handle(const ResultNode & arg) +{ + function().executeIterative(arg, _result); +} + +void NumericFunctionNode::ScalarStringHandler::handle(const ResultNode & arg) +{ + function().executeIterative(arg, _result); +} + +void NumericFunctionNode::ScalarRawHandler::handle(const ResultNode & arg) +{ + function().executeIterative(arg, _result); +} + +void NumericFunctionNode::FlattenIntegerHandler::handle(const ResultNode & arg) +{ + _result.set(_initial); + function().flatten(static_cast (arg), _result); +} + +void NumericFunctionNode::FlattenFloatHandler::handle(const ResultNode & arg) +{ + _result.set(_initial); + function().flatten(static_cast (arg), _result); +} + +void NumericFunctionNode::FlattenStringHandler::handle(const ResultNode & arg) +{ + _result.set(_initial); + function().flatten(static_cast (arg), _result); +} + +} +} + +// this function was added by ../../forcelink.sh + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_numericfunctionnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h new file mode 100644 index 00000000000..393b89d1049 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/numericfunctionnode.h @@ -0,0 +1,178 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +class NumericFunctionNode : public MultiArgFunctionNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(NumericFunctionNode); + NumericFunctionNode() : _handler() { } + NumericFunctionNode(const NumericFunctionNode & rhs); + NumericFunctionNode & operator = (const NumericFunctionNode & rhs); + virtual void reset() { _handler.reset(); MultiArgFunctionNode::reset(); } +protected: + virtual void onPrepare(bool preserveAccurateTypes); + + class Handler + { + public: + Handler(const NumericFunctionNode & func) : _function(func) { } + virtual ~Handler() { } + virtual void handle(const ResultNode & arg) = 0; + virtual void handleFirst(const ResultNode & arg) = 0; + protected: + const NumericFunctionNode & function() const { return _function; } + private: + const NumericFunctionNode & _function; + }; + + template + class VectorHandler : public Handler + { + protected: + VectorHandler(const NumericFunctionNode & func) : + Handler(func), + _result(static_cast(func.updateResult())) + { } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg); + private: + T & _result; + }; + + class VectorIntegerHandler : public VectorHandler + { + private: + typedef VectorHandler BaseHandler; + public: + VectorIntegerHandler(const NumericFunctionNode & func) : BaseHandler(func) { } + }; + class VectorFloatHandler : public VectorHandler + { + private: + typedef VectorHandler BaseHandler; + public: + VectorFloatHandler(const NumericFunctionNode & func) : BaseHandler(func) { } + }; + class VectorStringHandler : public VectorHandler + { + private: + typedef VectorHandler BaseHandler; + public: + VectorStringHandler(const NumericFunctionNode & func) : BaseHandler(func) { } + }; +private: + virtual ResultNode::CP getInitialValue() const = 0; + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const = 0; + class ScalarIntegerHandler : public Handler + { + public: + ScalarIntegerHandler(const NumericFunctionNode & func) : + Handler(func), + _result(static_cast(func.updateResult())) + { } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { _result.set(arg.getInteger()); } + protected: + Int64ResultNode & _result; + }; + class ScalarFloatHandler : public Handler + { + public: + ScalarFloatHandler(const NumericFunctionNode & func) : + Handler(func), + _result(static_cast(func.updateResult())) + { } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { _result.set(arg.getFloat()); } + protected: + FloatResultNode & _result; + }; + class ScalarStringHandler : public Handler + { + public: + ScalarStringHandler(const NumericFunctionNode & func) : + Handler(func), + _result(static_cast(func.updateResult())) + { } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { + char buf[32]; + vespalib::ConstBufferRef b = arg.getString(vespalib::BufferRef(buf, sizeof(buf))); + _result.set(vespalib::stringref(b.c_str(), b.size())); + } + protected: + StringResultNode & _result; + }; + class ScalarRawHandler : public Handler + { + public: + ScalarRawHandler(const NumericFunctionNode & func) : + Handler(func), + _result(static_cast(func.updateResult())) + { } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { + char buf[32]; + vespalib::ConstBufferRef b = arg.getString(vespalib::BufferRef(buf, sizeof(buf))); + _result.setBuffer(b.data(), b.size()); + } + protected: + RawResultNode & _result; + }; + class FlattenIntegerHandler : public ScalarIntegerHandler + { + public: + FlattenIntegerHandler(const NumericFunctionNode & func) : + ScalarIntegerHandler(func), + _initial() + { + _initial.set(*func.getInitialValue()); + } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { handle(arg); } + private: + Int64ResultNode _initial; + }; + class FlattenFloatHandler : public ScalarFloatHandler + { + public: + FlattenFloatHandler(const NumericFunctionNode & func) : + ScalarFloatHandler(func), + _initial() + { + _initial.set(*func.getInitialValue()); + } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { handle(arg); } + private: + FloatResultNode _initial; + }; + class FlattenStringHandler : public ScalarStringHandler + { + public: + FlattenStringHandler(const NumericFunctionNode & func) : + ScalarStringHandler(func), + _initial() + { + _initial.set(*func.getInitialValue()); + } + virtual void handle(const ResultNode & arg); + virtual void handleFirst(const ResultNode & arg) { handle(arg); } + private: + StringResultNode _initial; + }; + + virtual bool onCalculate(const ExpressionNodeVector & args, ResultNode & result) const; + std::unique_ptr _handler; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/numericresultnode.h b/searchlib/src/vespa/searchlib/expression/numericresultnode.h new file mode 100644 index 00000000000..c7b16b58335 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/numericresultnode.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class NumericResultNode : public SingleResultNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(NumericResultNode); + typedef vespalib::IdentifiablePtr CP; + typedef std::unique_ptr UP; + virtual NumericResultNode *clone() const = 0; + virtual void multiply(const ResultNode & b) = 0; + virtual void divide(const ResultNode & b) = 0; + virtual void modulo(const ResultNode & b) = 0; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/orfunctionnode.h b/searchlib/src/vespa/searchlib/expression/orfunctionnode.h new file mode 100644 index 00000000000..3b374ffdc54 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/orfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class OrFunctionNode : public BitFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(OrFunctionNode); + OrFunctionNode() { } +private: + virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); } + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenOr(result); } + virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp b/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp new file mode 100644 index 00000000000..202ce85f556 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/perdocexpression.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +#include +LOG_SETUP(".searchlib.documentexpressions"); + +namespace search { +namespace expression { + +using namespace vespalib; + +IMPLEMENT_EXPRESSIONNODE(RelevanceNode, ExpressionNode); + +void +RelevanceNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "Value", _relevance); +} + +Serializer & RelevanceNode::onSerialize(Serializer & os) const +{ + return _relevance.serialize(os); +} + +Deserializer & RelevanceNode::onDeserialize(Deserializer & is) +{ + return _relevance.deserialize(is); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_perdocexpression() {} diff --git a/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h new file mode 100644 index 00000000000..94b8a7cef5f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class PositiveInfinityResultNode : public SingleResultNode +{ +public: + DECLARE_EXPRESSIONNODE(PositiveInfinityResultNode); + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + virtual void set(const ResultNode & rhs); + virtual void min(const ResultNode & b); + virtual void max(const ResultNode & b); + virtual void add(const ResultNode & b); +private: + virtual void setMin(); + virtual void setMax(); + virtual int64_t onGetInteger(size_t index) const; + virtual double onGetFloat(size_t index) const; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const; + virtual size_t onGetRawByteSize() const { return 0; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp new file mode 100644 index 00000000000..87b70cb5757 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "rangebucketpredef.h" +#include "integerresultnode.h" +#include "floatresultnode.h" +#include "integerbucketresultnode.h" +#include "floatbucketresultnode.h" +#include +#include +#include +#include + +namespace search { +namespace expression { + +IMPLEMENT_EXPRESSIONNODE(RangeBucketPreDefFunctionNode, UnaryFunctionNode); + +RangeBucketPreDefFunctionNode::RangeBucketPreDefFunctionNode(const RangeBucketPreDefFunctionNode & rhs) : + UnaryFunctionNode(rhs), + _predef(rhs._predef), + _result(NULL), + _nullResult(rhs._nullResult), + _handler() +{ +} + +RangeBucketPreDefFunctionNode & RangeBucketPreDefFunctionNode::operator = (const RangeBucketPreDefFunctionNode & rhs) +{ + if (this != & rhs) { + UnaryFunctionNode::operator = (rhs); + _predef = rhs._predef; + _result = NULL; + _nullResult = rhs._nullResult; + _handler.reset(); + } + return *this; +} + +void +RangeBucketPreDefFunctionNode::onPrepareResult() +{ + const vespalib::Identifiable::RuntimeClass & cInfo(getArg().getResult().getClass()); + if (cInfo.inherits(ResultNodeVector::classId)) { + if (cInfo.inherits(IntegerResultNodeVector::classId)) { + _nullResult = & IntegerBucketResultNode::getNull(); + } else if (cInfo.inherits(FloatResultNodeVector::classId)) { + _nullResult = & FloatBucketResultNode::getNull(); + } else if (cInfo.inherits(StringResultNodeVector::classId)) { + _nullResult = & StringBucketResultNode::getNull(); + } else if (cInfo.inherits(RawResultNodeVector::classId)) { + _nullResult = & RawBucketResultNode::getNull(); + } else { + throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", cInfo.name())); + } + setResultType(ResultNode::UP(_predef->clone())); + static_cast(updateResult()).clear(); + _handler.reset(new MultiValueHandler(*this)); + _result = & updateResult(); + } else { + if (cInfo.inherits(IntegerResultNode::classId)) { + _nullResult = & IntegerBucketResultNode::getNull(); + } else if (cInfo.inherits(FloatResultNode::classId)) { + _nullResult = & FloatBucketResultNode::getNull(); + } else if (cInfo.inherits(StringResultNode::classId)) { + _nullResult = & StringBucketResultNode::getNull(); + } else if (cInfo.inherits(RawResultNode::classId)) { + _nullResult = & RawBucketResultNode::getNull(); + } else { + throw std::runtime_error(vespalib::make_string("cannot create appropriate bucket for type '%s'", cInfo.name())); + } + _result = _nullResult; + if ( ! _predef->empty()) { + _result = & _predef->get(0); + } + _handler.reset(new SingleValueHandler(*this)); + } +} + +bool +RangeBucketPreDefFunctionNode::onExecute() const +{ + getArg().execute(); + const ResultNode * result = _handler->handle(getArg().getResult()); + _result = result ? result : _nullResult; + return true; +} + +const ResultNode * RangeBucketPreDefFunctionNode::SingleValueHandler::handle(const ResultNode & arg) +{ + return _predef.find(arg); +} + +const ResultNode * RangeBucketPreDefFunctionNode::MultiValueHandler::handle(const ResultNode & arg) +{ + const ResultNodeVector & v = static_cast(arg); + _result.clear(); + for(size_t i(0), m(v.size()); i < m; i++) { + const ResultNode * bucket = _predef.find(v.get(i)); + if (bucket != NULL) { + _result.push_back(*bucket); + } else { + _result.push_back(*_nullResult); + } + } + return &_result; +} + +vespalib::Serializer & +RangeBucketPreDefFunctionNode::onSerialize(vespalib::Serializer &os) const +{ + UnaryFunctionNode::onSerialize(os); + return os << _predef; +} + +vespalib::Deserializer & +RangeBucketPreDefFunctionNode::onDeserialize(vespalib::Deserializer &is) +{ + UnaryFunctionNode::onDeserialize(is); + return is >> _predef; +} + +void +RangeBucketPreDefFunctionNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + UnaryFunctionNode::visitMembers(visitor); + visit(visitor, "predefined", _predef); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_rangebucketpredef() {} diff --git a/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h new file mode 100644 index 00000000000..de9c5e69879 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/rangebucketpredef.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "unaryfunctionnode.h" +#include "resultvector.h" +#include "integerresultnode.h" +#include "floatresultnode.h" +#include "stringresultnode.h" + +namespace search { +namespace expression { + +class RangeBucketPreDefFunctionNode : public UnaryFunctionNode +{ +private: + virtual void onPrepareResult(); + virtual bool onExecute() const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + class Handler { + public: + Handler(const RangeBucketPreDefFunctionNode & rangeNode) : _predef(rangeNode.getBucketList()), _nullResult(rangeNode._nullResult) { } + virtual ~Handler() { } + virtual const ResultNode * handle(const ResultNode & arg) = 0; + protected: + const ResultNodeVector & _predef; + const ResultNode * _nullResult; + }; + class SingleValueHandler : public Handler { + public: + SingleValueHandler(const RangeBucketPreDefFunctionNode & rangeNode) : + Handler(rangeNode) + { } + virtual const ResultNode * handle(const ResultNode & arg); + }; + class MultiValueHandler : public Handler { + public: + MultiValueHandler(const RangeBucketPreDefFunctionNode & rangeNode) : + Handler(rangeNode), + _result(static_cast(rangeNode.updateResult())) + { } + virtual const ResultNode * handle(const ResultNode & arg); + private: + ResultNodeVector & _result; + }; + + + ResultNodeVector::CP _predef; + mutable const ResultNode * _result; + const ResultNode * _nullResult; + std::unique_ptr _handler; + static IntegerBucketResultNode _nullIntegerResult; + static FloatBucketResultNode _nullFloatResult; + static StringBucketResultNode _nullStringResult; + static RawBucketResultNode _nullRawResult; + +public: + DECLARE_EXPRESSIONNODE(RangeBucketPreDefFunctionNode); + DECLARE_NBO_SERIALIZE; + RangeBucketPreDefFunctionNode() : UnaryFunctionNode(), _predef(), _result(NULL), _nullResult(NULL) {} + RangeBucketPreDefFunctionNode(const ExpressionNode::CP &arg) : UnaryFunctionNode(arg), _predef(), _result(NULL), _nullResult(NULL) {} + RangeBucketPreDefFunctionNode(const RangeBucketPreDefFunctionNode & rhs); + RangeBucketPreDefFunctionNode & operator = (const RangeBucketPreDefFunctionNode & rhs); + virtual const ResultNode & getResult() const { return *_result; } + const ResultNodeVector & getBucketList() const { return *_predef; } + ResultNodeVector & getBucketList() { return *_predef; } + RangeBucketPreDefFunctionNode & setBucketList(const ResultNodeVector & predef) { + _predef.reset(static_cast(predef.clone())); + return *this; + } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp new file mode 100644 index 00000000000..df0d7384e35 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "rawbucketresultnode.h" +#include + +namespace search { +namespace expression { + +IMPLEMENT_RESULTNODE(RawBucketResultNode, BucketResultNode); + +RawBucketResultNode RawBucketResultNode::_nullResult; + +size_t +RawBucketResultNode::hash() const +{ +#if 0 + union { + uint8_t cxor[8]; + uint64_t ixor; + } xorResult; + xorResult.ixor = 0; + size_t i(0); + const size_t m(_from.size()); + const char * c = _from.c_str(); + const uint64_t * ic = reinterpret_cast(c); + for (; i+8 < m; i+=8) { + const size_t index(i/8); + xorResult.ixor ^= ic[index]; + } + for (; i < m; i++) { + xorResult.cxor[i%8] ^= c[i]; + } + return xorResult.ixor; +#else + return 0; +#endif +} + +int +RawBucketResultNode::onCmp(const Identifiable & rhs) const +{ + const RawBucketResultNode & b = static_cast(rhs); + int diff(_from->cmp(*b._from)); + return (diff == 0) ? _to->cmp(*b._to) : diff; +} + +int RawBucketResultNode::contains(const RawBucketResultNode & b) const +{ + int fromDiff(_from->cmp(*b._from)); + int toDiff(_to->cmp(*b._to)); + return (fromDiff < 0) ? std::min(0, toDiff) : std::max(0, toDiff); +} + +int RawBucketResultNode::contains(const ConstBufferRef & s) const +{ + RawResultNode v(s.data(), s.size()); + int diff(_from->cmp(v)); + if (diff > 0) { + return 1; + } else { + diff = _to->cmp(v); + return (diff <= 0) ? -1 : 0; + } +} + +void +RawBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, _fromField.getName(), _from); + visit(visitor, _toField.getName(), _to); +} + +vespalib::Serializer & +RawBucketResultNode::onSerialize(vespalib::Serializer & os) const +{ + _from.serialize(os); + _to.serialize(os); + return os; +} + +vespalib::Deserializer & +RawBucketResultNode::onDeserialize(vespalib::Deserializer & is) +{ + _from.deserialize(is); + _to.deserialize(is); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_rawbucketresultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h new file mode 100644 index 00000000000..abd78e64d97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/rawbucketresultnode.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "bucketresultnode.h" +#include "rawresultnode.h" + +namespace search { +namespace expression { + +class RawBucketResultNode : public BucketResultNode +{ +private: + ResultNode::CP _from; + ResultNode::CP _to; + static RawBucketResultNode _nullResult; + virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); } +public: + struct GetValue { + BufferRef _tmp; + ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); } + }; + + DECLARE_EXPRESSIONNODE(RawBucketResultNode); + DECLARE_NBO_SERIALIZE; + RawBucketResultNode() : _from(new RawResultNode()), _to(new RawResultNode()) {} + RawBucketResultNode(ResultNode::UP from, ResultNode::UP to) : _from(from.release()), _to(to.release()) {} + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + int contains(const RawBucketResultNode & b) const; + int contains(const ConstBufferRef & v) const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + static const RawBucketResultNode & getNull() { return _nullResult; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/rawresultnode.h b/searchlib/src/vespa/searchlib/expression/rawresultnode.h new file mode 100644 index 00000000000..fd6b41b549c --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/rawresultnode.h @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class RawResultNode : public SingleResultNode +{ +public: + DECLARE_EXPRESSIONNODE(RawResultNode); + DECLARE_NBO_SERIALIZE; + DECLARE_RESULTNODE_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + RawResultNode() : _value(1) { setBuffer("", 0); } + RawResultNode(const void * buf, size_t sz) { setBuffer(buf, sz); } + virtual int onCmp(const Identifiable & b) const; + virtual size_t hash() const; + virtual void set(const ResultNode & rhs); + void setBuffer(const void * buf, size_t sz); + ConstBufferRef get() const { return ConstBufferRef(&_value[0], _value.size()); } + virtual void min(const ResultNode & b); + virtual void max(const ResultNode & b); + virtual void add(const ResultNode & b); + virtual void negate(); +private: + typedef std::vector V; + virtual int cmpMem(const void * a, const void *b) const { + const V & ai(*static_cast(a)); + const V & bi(*static_cast(b)); + int result = memcmp(&ai[0], &bi[0], std::min(ai.size(), bi.size())); + if (result == 0) { + result = ai.size() < bi.size() ? -1 : ai.size() > bi.size() ? 1 : 0; + } + return result; + } + virtual void decode(const void * buf) { _value = *static_cast(buf); } + virtual void encode(void * buf) const { *static_cast(buf) = _value; } + virtual size_t hash(const void * buf) const; + + virtual size_t onGetRawByteSize() const { return sizeof(_value); } + virtual void setMin(); + virtual void setMax(); + virtual int64_t onGetInteger(size_t index) const; + virtual double onGetFloat(size_t index) const; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const; + V _value; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/relevancenode.h b/searchlib/src/vespa/searchlib/expression/relevancenode.h new file mode 100644 index 00000000000..6f867329961 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/relevancenode.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class RelevanceNode : public ExpressionNode +{ +public: + DECLARE_NBO_SERIALIZE; + DECLARE_EXPRESSIONNODE(RelevanceNode); + RelevanceNode() : ExpressionNode(), _relevance() { } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual const ResultNode & getResult() const { return _relevance; } + void setRelevance(double relevance) { _relevance.set(relevance); } +private: + virtual void onPrepare(bool preserveAccurateTypes) { (void) preserveAccurateTypes; } + virtual bool onExecute() const { return true; } + FloatResultNode _relevance; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/resultnode.cpp b/searchlib/src/vespa/searchlib/expression/resultnode.cpp new file mode 100644 index 00000000000..387b05e7add --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/resultnode.cpp @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +namespace search { +namespace expression { + +uint64_t ResultNode::radixAsc(const void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::radixAsc(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +uint64_t ResultNode::radixDesc(const void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::radixDesc(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +size_t ResultNode::hash(const void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::hash(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +void ResultNode::decode(const void * buf) +{ + (void) buf; + throw std::runtime_error("ResultNode::decode(const void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +void ResultNode::encode(void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::encode(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +void ResultNode::swap(void * buf) +{ + (void) buf; + throw std::runtime_error("ResultNode::swap(void * buf) must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +void ResultNode::create(void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::create(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +void ResultNode::destroy(void * buf) const +{ + (void) buf; + throw std::runtime_error("ResultNode::destroy(void * buf) const must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +int ResultNode::cmpMem(const void * a, const void *b) const +{ + (void) a; + (void) b; + throw std::runtime_error("ResultNode::cmpMem(const void * a, const void *b) const must be overloaded by'" + vespalib::string(getClass().name()) + "'."); +} + +size_t ResultNode::getRawByteSize() const +{ + throw std::runtime_error("ResultNode::getRawByteSize() const must be overloaded by '" + vespalib::string(getClass().name()) + "'."); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_resultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/resultnode.h b/searchlib/src/vespa/searchlib/expression/resultnode.h new file mode 100644 index 00000000000..0d83be292c7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/resultnode.h @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +#define DECLARE_ABSTRACT_RESULTNODE(Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class) +#define DECLARE_ABSTRACT_RESULTNODE_NS1(ns, Class) DECLARE_IDENTIFIABLE_ABSTRACT_NS3(search, expression, ns, Class) + +#define DECLARE_RESULTNODE(Class) \ + DECLARE_IDENTIFIABLE_NS2(search, expression, Class) \ + virtual Class * clone() const; + +#define DECLARE_RESULTNODE_NS1(ns, Class) \ + DECLARE_IDENTIFIABLE_NS3(search, expression, ns, Class) \ + virtual Class * clone() const; + +#define DECLARE_RESULTNODE_SERIALIZE \ + virtual ResultSerializer & onSerializeResult(ResultSerializer & os) const; \ + virtual ResultDeserializer & onDeserializeResult(ResultDeserializer & is); + +#define IMPLEMENT_ABSTRACT_RESULTNODE(Class, base) IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS2(search, expression, Class, base) + +#define IMPLEMENT_RESULTNODE(Class, base) \ + IMPLEMENT_IDENTIFIABLE_NS2(search, expression, Class, base) \ + Class * Class::clone() const { return new Class(*this); } + +class ResultNode : public vespalib::Identifiable +{ +public: + typedef vespalib::BufferRef BufferRef; + typedef vespalib::ConstBufferRef ConstBufferRef; +public: + int64_t getInteger() const { return onGetInteger(0); } + int64_t getEnum() const { return onGetEnum(0); } + double getFloat() const { return onGetFloat(0); } + ConstBufferRef getString(BufferRef buf) const { return onGetString(0, buf); } + + int64_t getInteger(size_t index) const { return onGetInteger(index); } + double getFloat(size_t index) const { return onGetFloat(index); } + ConstBufferRef getString(size_t index, BufferRef buf) const { return onGetString(index, buf); } + +private: + virtual int64_t onGetInteger(size_t index) const = 0; + virtual int64_t onGetEnum(size_t index) const { + (void) index; + throw vespalib::Exception("search::expression::ResultNode onGetEnum is not implemented"); + } + virtual double onGetFloat(size_t index) const = 0; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const = 0; + +public: + DECLARE_ABSTRACT_RESULTNODE(ResultNode); + virtual ~ResultNode() { } + typedef std::unique_ptr UP; + typedef vespalib::IdentifiablePtr CP; + virtual void set(const ResultNode & rhs) = 0; + + /** + * Will initialize a memory area that must be destroyed. After creation it can be encoded or decoded. + * Memory must be fixed size. + * This interface is used to efficiently store data in vectors without the overhead of virtual objects. + * @param memory area to initialize + */ + virtual void create(void * buf) const; + /** + * Will initialize itself with the memory area supplied. + * @param memory area containing alrady encoded data. + */ + virtual void decode(const void * buf); + /** + * Will decode itself into the memory area supplied. + * @param memory area used as storage. + */ + virtual void encode(void * buf) const; + /** + * Will return a radixsortable value that will sort ascending. + * @param memory area used as storage. + */ + virtual uint64_t radixAsc(const void * buf) const; + /** + * Will return a radixsortable value that will sort descending. + * @param memory area used as storage. + */ + virtual uint64_t radixDesc(const void * buf) const; + /** + * Will return the typed hash of memory area supplied. + * @param memory area used as storage. + */ + virtual size_t hash(const void * buf) const; + /** + * Will decode itself into the memory area supplied. + * It will also encode itself from the memory area. + * @param memory area used as storage. + */ + virtual void swap(void * buf); + /** + * Will destroy any initialized memory. + * @param memory area used as storage. + */ + virtual void destroy(void * buf) const; + /** + * Will do a typed compare of the given memory a and b. + * @param a memory area of a + * @param b memory area of b + * @return -1 if ab + */ + virtual int cmpMem(const void * a, const void *b) const; + + virtual void negate(); + virtual void sort(); + virtual void reverse(); + virtual size_t hash() const = 0; + virtual ResultNode * clone() const = 0; + ResultNode::UP createBaseType() const { return ResultNode::UP(static_cast(getBaseClass().create())); } + virtual ResultSerializer & onSerializeResult(ResultSerializer & os) const; + virtual ResultDeserializer & onDeserializeResult(ResultDeserializer & is); + virtual size_t getRawByteSize() const; + virtual bool isMultiValue() const { return false; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/resultnodes.cpp b/searchlib/src/vespa/searchlib/expression/resultnodes.cpp new file mode 100644 index 00000000000..9a1d3639a8e --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/resultnodes.cpp @@ -0,0 +1,410 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP(".searchlib.documentexpressions"); + +namespace search { +namespace expression { + +using vespalib::nbostream; +using vespalib::Serializer; +using vespalib::Deserializer; +using vespalib::make_string; +using vespalib::Identifiable; +using vespalib::BufferRef; +using vespalib::ConstBufferRef; + +IMPLEMENT_ABSTRACT_RESULTNODE(ResultNode, Identifiable); +IMPLEMENT_ABSTRACT_RESULTNODE(SingleResultNode, ResultNode); +IMPLEMENT_ABSTRACT_RESULTNODE(NumericResultNode, SingleResultNode); +IMPLEMENT_ABSTRACT_RESULTNODE(IntegerResultNode, NumericResultNode); +IMPLEMENT_RESULTNODE(StringResultNode, SingleResultNode); +IMPLEMENT_RESULTNODE(NullResultNode, SingleResultNode); +IMPLEMENT_RESULTNODE(PositiveInfinityResultNode, SingleResultNode); +IMPLEMENT_RESULTNODE(RawResultNode, SingleResultNode); +IMPLEMENT_RESULTNODE(Int8ResultNode, IntegerResultNode); +IMPLEMENT_RESULTNODE(Int16ResultNode, IntegerResultNode); +IMPLEMENT_RESULTNODE(Int32ResultNode, IntegerResultNode); +IMPLEMENT_RESULTNODE(Int64ResultNode, IntegerResultNode); +IMPLEMENT_RESULTNODE(EnumResultNode, IntegerResultNode); +IMPLEMENT_RESULTNODE(FloatResultNode, NumericResultNode); + +void ResultNode::sort() +{ +} + +void ResultNode::reverse() +{ +} + +void ResultNode::negate() +{ + throw std::runtime_error(make_string("Class %s does not implement 'negate'", getClass().name())); +} + +ResultSerializer & ResultNode::onSerializeResult(ResultSerializer & os) const +{ + os.proxyPut(*this); + return os; +} + +ResultDeserializer & ResultNode::onDeserializeResult(ResultDeserializer & is) +{ + is.proxyGet(*this); + return is; +} + +int64_t FloatResultNode::onGetInteger(size_t index) const { (void) index; return static_cast(round(_value)); } +double FloatResultNode::onGetFloat(size_t index) const { (void) index; return _value; } +void FloatResultNode::add(const ResultNode & b) { _value += b.getFloat(); } +void FloatResultNode::negate() { _value = - _value; } +void FloatResultNode::multiply(const ResultNode & b) { _value *= b.getFloat(); } +void FloatResultNode::divide(const ResultNode & b) { + double val = b.getFloat(); + _value = (val == 0.0) ? 0.0 : (_value / val); +} +void FloatResultNode::modulo(const ResultNode & b) { _value = ResultNode::getInteger() % b.getInteger(); } +void FloatResultNode::min(const ResultNode & b) { double t(b.getFloat()); if (t < _value) { _value = t; } } +void FloatResultNode::max(const ResultNode & b) { double t(b.getFloat()); if (t > _value) { _value = t; } } +void FloatResultNode::set(const ResultNode & rhs) { _value = rhs.getFloat(); } +Serializer & FloatResultNode::onSerialize(Serializer & os) const { os << _value; return os; } +Deserializer & FloatResultNode::onDeserialize(Deserializer & is) { is >> _value; return is; } + +void +FloatResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "value", _value); +} + +ResultNode::ConstBufferRef FloatResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const +{ + (void) index; + int numWritten = std::min(buf.size(), (size_t)std::max(0, snprintf(buf.str(), buf.size(), "%g", _value))); + return ConstBufferRef(buf.str(), numWritten); +} + +bool FloatResultNode::isNan() const +{ + return isnan(_value); +} + +int FloatResultNode::onCmp(const Identifiable & b) const +{ + const FloatResultNode & rhs(static_cast(b)); + if (isNan()) { + return rhs.isNan() ? 0 : -1; + } else { + if (rhs.isNan()) { + return 1; + } else { + return (_value > rhs._value) ? 1 : (_value < rhs._value) ? -1 : 0; + } + } +} + +void StringResultNode::setMin() { _value.clear(); } +void StringResultNode::setMax() { _value.clear(); _value.append(char(-1)); } +void RawResultNode::setMin() { _value.clear(); } +void RawResultNode::setMax() { _value.push_back(-1); } +void FloatResultNode::setMin() { _value = -std::numeric_limits::max(); } +void FloatResultNode::setMax() { _value = std::numeric_limits::max(); } + +void NullResultNode::setMin() { } +void NullResultNode::setMax() { } +void NullResultNode::add(const ResultNode & b) { (void) b; } +void NullResultNode::min(const ResultNode & b) { (void) b; } +void NullResultNode::max(const ResultNode & b) { (void) b; } +int64_t NullResultNode::onGetInteger(size_t index) const { (void) index; return 0; } +double NullResultNode::onGetFloat(size_t index) const { (void) index; return 0.0; } +int NullResultNode::onCmp(const Identifiable & b) const { (void) b; return (b.getClass().id() == NullResultNode::classId) ? 0 : 1; } +void NullResultNode::set(const ResultNode & rhs) { (void) rhs; } +size_t NullResultNode::hash() const { return 0; } +ResultNode::ConstBufferRef NullResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const { (void) index; return buf; } +void PositiveInfinityResultNode::setMin() { } +void PositiveInfinityResultNode::setMax() { } +void PositiveInfinityResultNode::add(const ResultNode & b) { (void) b; } +void PositiveInfinityResultNode::min(const ResultNode & b) { (void) b; } +void PositiveInfinityResultNode::max(const ResultNode & b) { (void) b; } +int64_t PositiveInfinityResultNode::onGetInteger(size_t index) const { (void) index; return 0; } +double PositiveInfinityResultNode::onGetFloat(size_t index) const { (void) index; return 0.0; } +void PositiveInfinityResultNode::set(const ResultNode & rhs) { (void) rhs; } +size_t PositiveInfinityResultNode::hash() const { return 0; } +ResultNode::ConstBufferRef PositiveInfinityResultNode::onGetString(size_t index, ResultNode::BufferRef buf) const { (void) index; return buf; } + +int PositiveInfinityResultNode::onCmp(const Identifiable & b) const +{ + if (b.inherits(PositiveInfinityResultNode::classId)) { + return 0; + } + return 1; +} + +int64_t StringResultNode::onGetInteger(size_t index) const { (void) index; return strtoll(_value.c_str(), NULL, 0); } +double StringResultNode::onGetFloat(size_t index) const { (void) index; return strtod(_value.c_str(), NULL); } +Serializer & StringResultNode::onSerialize(Serializer & os) const +{ + os << _value; + return os; +} + +int StringResultNode::onCmp(const Identifiable & b) const +{ + if (b.inherits(PositiveInfinityResultNode::classId)) { + return -1; + } else { + const StringResultNode & sb(static_cast(b)); + size_t sz(std::min(_value.size(), sb._value.size())); + int result = memcmp(_value.c_str(), sb._value.c_str(), sz); + if (result == 0) { + result = _value.size() < sb._value.size() ? -1 : _value.size() > sb._value.size() ? 1 : 0; + } + return result; + } +} + +Deserializer & StringResultNode::onDeserialize(Deserializer & is) +{ + is >> _value; + return is; +} + + +void RawResultNode::add(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + const uint8_t *raw = static_cast(s.data()); + + size_t i(0); + for (; i < _value.size() && i < s.size(); i++) { + _value[i] += raw[i]; + } + if (i < s.size()) { + for (; i < s.size(); i++) { + _value.push_back(raw[i]); + } + } + +} + +void RawResultNode::min(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + + if (memcmp(&_value[0], s.data(), std::min(s.size(), _value.size())) > 0) { + setBuffer(s.data(), s.size()); + } +} + +void RawResultNode::max(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + + if (memcmp(&_value[0], s.data(), std::min(s.size(), _value.size())) < 0) { + setBuffer(s.data(), s.size()); + } +} + +void RawResultNode::negate() +{ + for (size_t i(0); i < _value.size(); i++) { + _value[i] = - _value[i]; + } +} + +void StringResultNode::add(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + vespalib::stringref bs(s.c_str(), s.size()); + size_t i(0); + for (; i < _value.length() && i < bs.length(); i++) { + _value[i] += bs[i]; + } + if (i < bs.length()) { + // XXX: Should have some way of appending with iterators + _value.append(bs.data() + i, (bs.length() - i)); + } +} + +void StringResultNode::min(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + vespalib::stringref bs(s.c_str(), s.size()); + if (_value > bs) { + _value = bs; + } +} + +void StringResultNode::max(const ResultNode & b) +{ + char buf[32]; + ConstBufferRef s(b.getString(BufferRef(buf, sizeof(buf)))); + vespalib::stringref bs(s.c_str(), s.size()); + if (_value < bs) { + _value = bs; + } +} + +void StringResultNode::negate() +{ + for (size_t i(0); i < _value.length(); i++) { + _value[i] = - _value[i]; + } +} + +void +StringResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "value", _value); +} + +ResultNode::ConstBufferRef StringResultNode::onGetString(size_t index, ResultNode::BufferRef ) const { (void) index; return ConstBufferRef(_value.c_str(), _value.size()); } + +void StringResultNode::set(const ResultNode & rhs) +{ + char buf[32]; + ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf)))); + _value.assign(b.c_str(), b.size()); +} + +StringResultNode & StringResultNode::append(const ResultNode & rhs) +{ + char buf[32]; + ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf)))); + _value.append(b.c_str(), b.size()); + return *this; +} + +namespace { + +size_t hashBuf(const void *s, size_t sz) +{ + size_t result(0); + const size_t * value = static_cast(s); + for(size_t i(0), m(sz/sizeof(size_t)); i < m; i++) { + result ^= value[i]; + } + unsigned left(sz%sizeof(size_t)); + if (left) { + size_t lastValue(0); + memcpy(&lastValue, static_cast(s)+sz-left, left); + result ^= lastValue; + } + return result; +} + +} + +size_t StringResultNode::hash() const { return hashBuf(_value.c_str(), _value.size()); } + +size_t StringResultNode::hash(const void * buf) const +{ + const vespalib::string & s = *static_cast(buf); + return hashBuf(s.c_str(), s.size()); +} + +int64_t RawResultNode::onGetInteger(size_t index) const +{ + (void) index; + union { + int64_t _int64; + uint8_t _bytes[8]; + } nbo; + nbo._int64 = 0; + memcpy(nbo._bytes, &_value[0], std::min(sizeof(nbo._bytes), _value.size())); + return nbostream::n2h(nbo._int64); +} + +double RawResultNode::onGetFloat(size_t index) const +{ + (void) index; + union { + double _double; + uint8_t _bytes[8]; + } nbo; + nbo._double = 0; + memcpy(nbo._bytes, &_value[0], std::min(sizeof(nbo._bytes), _value.size())); + return nbostream::n2h(nbo._double); +} + +Serializer & RawResultNode::onSerialize(Serializer & os) const +{ + os << _value; + return os; +} + +ResultSerializer & RawResultNode::onSerializeResult(ResultSerializer & os) const +{ + return os.putResult(getClass(), *this); +} + +int RawResultNode::onCmp(const Identifiable & b) const +{ + if (b.inherits(PositiveInfinityResultNode::classId)) { + return -1; + } else { + const RawResultNode & rb( static_cast(b) ); + int result = memcmp(&_value[0], &rb._value[0], std::min(_value.size(), rb._value.size())); + if (result == 0) { + result = _value.size() < rb._value.size() ? -1 : _value.size() > rb._value.size() ? 1 : 0; + } + return result; + } +} + +size_t RawResultNode::hash() const { return hashBuf(&_value[0], _value.size()); } + +size_t RawResultNode::hash(const void * buf) const +{ + const std::vector & s = *static_cast *>(buf); + return hashBuf(&s[0], s.size()); +} + +Deserializer & RawResultNode::onDeserialize(Deserializer & is) +{ + is >> _value; + return is; +} + +ResultDeserializer & RawResultNode::onDeserializeResult(ResultDeserializer & is) +{ + return is.getResult(getClass(), *this); +} + +void +RawResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "value", _value); +} + +void RawResultNode::set(const ResultNode & rhs) +{ + char buf[32]; + ConstBufferRef b(rhs.getString(BufferRef(buf, sizeof(buf)))); + setBuffer(b.data(), b.size()); +} +void RawResultNode::setBuffer(const void *buf, size_t sz) +{ + _value.resize(sz + 1); + memcpy(&_value[0], buf, sz); + _value.back() = 0; + _value.resize(sz); +} + +ResultNode::ConstBufferRef RawResultNode::onGetString(size_t index, ResultNode::BufferRef ) const { (void) index; return ConstBufferRef(&_value[0], _value.size()); } + + +} +} diff --git a/searchlib/src/vespa/searchlib/expression/resultvector.cpp b/searchlib/src/vespa/searchlib/expression/resultvector.cpp new file mode 100644 index 00000000000..169b4ea6159 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/resultvector.cpp @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { +namespace expression { + +IMPLEMENT_ABSTRACT_EXPRESSIONNODE(ResultNodeVector, ResultNode); +IMPLEMENT_RESULTNODE(Int8ResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(Int16ResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(Int32ResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(Int64ResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(EnumResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(FloatResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(StringResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(RawResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(IntegerBucketResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(FloatBucketResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(StringBucketResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(RawBucketResultNodeVector, ResultNodeVector); +IMPLEMENT_RESULTNODE(GeneralResultNodeVector, ResultNodeVector); + +const ResultNode * +GeneralResultNodeVector::find(const ResultNode & key) const +{ + for (size_t i(0); i < _v.size(); i++) { + const ResultNode * r = _v[i].get(); + if (r && (key.cmp(*r) == 0)) { + return _v[i].get(); + } + } + return NULL; +} + +size_t +GeneralResultNodeVector::hash() const +{ + size_t h(0); + for (size_t i(0); i < _v.size(); i++) { + h ^= _v[i]->hash(); + } + return h; +} + +ResultSerializer & +ResultNodeVector::onSerializeResult(ResultSerializer & os) const +{ + return os.putResult(getClass(), *this); +} + +ResultDeserializer & +ResultNodeVector::onDeserializeResult(ResultDeserializer & is) +{ + return is.getResult(getClass(), *this); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_resultvector() {} diff --git a/searchlib/src/vespa/searchlib/expression/resultvector.h b/searchlib/src/vespa/searchlib/expression/resultvector.h new file mode 100644 index 00000000000..c24a68f0da5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/resultvector.h @@ -0,0 +1,399 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace expression { + +class ResultNodeVector : public ResultNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(ResultNodeVector); + DECLARE_RESULTNODE_SERIALIZE; + typedef std::unique_ptr UP; + typedef vespalib::IdentifiablePtr CP; + virtual const ResultNode * find(const ResultNode & key) const = 0; + virtual ResultNodeVector & push_back(const ResultNode & node) = 0; + virtual ResultNodeVector & push_back_safe(const ResultNode & node) = 0; + virtual const ResultNode & get(size_t index) const = 0; + virtual ResultNodeVector & set(size_t index, const ResultNode & node) = 0; + virtual ResultNode & get(size_t index) = 0; + virtual void clear() = 0; + virtual void resize(size_t sz) = 0; + size_t size() const { return onSize(); } + bool empty() const { return size() == 0; } + /** + * Sum yourself to the argument + * @param result the argument + */ + virtual ResultNode & flattenMultiply(ResultNode & r) const { return r; } + virtual ResultNode & flattenSum(ResultNode & r) const { return r; } + virtual ResultNode & flattenMax(ResultNode & r) const { return r; } + virtual ResultNode & flattenMin(ResultNode & r) const { return r; } + virtual ResultNode & flattenAnd(ResultNode & r) const { return r; } + virtual ResultNode & flattenOr(ResultNode & r) const { return r; } + virtual ResultNode & flattenXor(ResultNode & r) const { return r; } + virtual void min(const ResultNode & b) { (void) b; } + virtual void max(const ResultNode & b) { (void) b; } + virtual void add(const ResultNode & b) { (void) b; } +private: + virtual size_t onSize() const = 0; + virtual void set(const ResultNode & rhs) { (void) rhs; } + virtual bool isMultiValue() const { return true; } +}; + +template +struct cmpT { + struct less : public std::binary_function { + bool operator()(const B & a, const B & b) { return a.cmp(b) < 0; } + }; + struct equal : public std::binary_function { + bool operator()(const B & a, const B & b) { return a.cmp(b) == 0; } + }; +}; + +template +struct contains { + struct less : public std::binary_function { + bool operator()(const B & a, const V & b) { return a.contains(b) < 0; } + }; + struct equal : public std::binary_function { + bool operator()(const B & a, const V & b) { return a.contains(b) == 0; } + }; +}; + +template +class ResultNodeVectorT : public ResultNodeVector +{ +public: + DECLARE_NBO_SERIALIZE; + typedef std::vector Vector; + const Vector & getVector() const { return _result; } + Vector & getVector() { return _result; } + virtual const ResultNode * find(const ResultNode & key) const; + virtual void sort(); + virtual void reverse(); + virtual ResultNodeVector & push_back(const ResultNode & node); + virtual ResultNodeVector & push_back_safe(const ResultNode & node); + virtual ResultNodeVector & set(size_t index, const ResultNode & node); + virtual const ResultNode & get(size_t index) const { return _result[index]; } + virtual ResultNode & get(size_t index) { return _result[index]; } + virtual void clear() { _result.clear(); } + virtual void resize(size_t sz) { _result.resize(sz); } + virtual void negate(); +private: + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { visit(visitor, "Vector", _result); } + virtual size_t onSize() const { return _result.size(); } + virtual const vespalib::Identifiable::RuntimeClass & getBaseClass() const { return B::_RTClass; } + virtual int64_t onGetInteger(size_t index) const { return _result[index].getInteger(index); } + virtual double onGetFloat(size_t index) const { return _result[index].getFloat(index); } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { return _result[index].getString(index, buf); } + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + Vector _result; +}; + +template +ResultNodeVector & ResultNodeVectorT::set(size_t index, const ResultNode & node) +{ + _result[index].set(node); + return *this; +} + +template +ResultNodeVector & ResultNodeVectorT::push_back_safe(const ResultNode & node) +{ + if (node.inherits(B::classId)) { + _result.push_back(static_cast(node)); + } else { + B value; + value.set(node); + _result.push_back(value); + } + return *this; +} + +template +ResultNodeVector & ResultNodeVectorT::push_back(const ResultNode & node) +{ + _result.push_back(static_cast(node)); + return *this; +} + +template +int ResultNodeVectorT::onCmp(const Identifiable & rhs) const +{ + const ResultNodeVectorT & b(static_cast(rhs)); + int diff = _result.size() - b._result.size(); + for (size_t i(0), m(_result.size()); (diff == 0) && (i < m); i++) { + diff = _result[i].cmp(b._result[i]); + } + return diff; +} + +template +void ResultNodeVectorT::sort() +{ + typedef cmpT LC; + std::sort(_result.begin(), _result.end(), typename LC::less()); +} + +template +void ResultNodeVectorT::reverse() +{ + std::reverse(_result.begin(), _result.end()); +} + +template +size_t ResultNodeVectorT::hash() const +{ + size_t h(0); + for(typename Vector::const_iterator it(_result.begin()), mt(_result.end()); it != mt; it++) { + h ^= it->hash(); + } + return h; +} + +template +void ResultNodeVectorT::negate() +{ + for(typename Vector::iterator it(_result.begin()), mt(_result.end()); it != mt; it++) { + it->negate(); + } +} + +template +const ResultNode * ResultNodeVectorT::find(const ResultNode & key) const +{ + G getter; + typename Vector::const_iterator found = std::lower_bound(_result.begin(), _result.end(), getter(key), typename C::less() ); + if (found != _result.end()) { + typename C::equal equal; + return equal(*found, getter(key)) ? &(*found) : NULL; + } + return NULL; +} + +template +vespalib::Serializer & ResultNodeVectorT::onSerialize(vespalib::Serializer & os) const +{ + return serialize(_result, os); +} + +template +vespalib::Deserializer & ResultNodeVectorT::onDeserialize(vespalib::Deserializer & is) +{ + return deserialize(_result, is); +} + +struct GetInteger { + int64_t operator () (const ResultNode & r) { return r.getInteger(); } +}; + +struct GetFloat { + double operator () (const ResultNode & r) { return r.getFloat(); } +}; + +struct GetString { + ResultNode::BufferRef _tmp; + ResultNode::ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); } +}; + +template +class NumericResultNodeVectorT : public ResultNodeVectorT, std::_Identity > +{ +public: + virtual ResultNode & flattenMultiply(ResultNode & r) const { + B v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.multiply(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenAnd(ResultNode & r) const { + Int64ResultNode v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.andOp(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenOr(ResultNode & r) const { + Int64ResultNode v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.orOp(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenXor(ResultNode & r) const { + Int64ResultNode v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.xorOp(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenSum(ResultNode & r) const { + B v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.add(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenMax(ResultNode & r) const { + B v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.max(vec[i]); + } + r.set(v); + return r; + } + virtual ResultNode & flattenMin(ResultNode & r) const { + B v; + v.set(r); + const std::vector & vec(this->getVector()); + for(size_t i(0), m(vec.size()); i < m; i++) { + v.min(vec[i]); + } + r.set(v); + return r; + } +}; + +class Int8ResultNodeVector : public NumericResultNodeVectorT +{ +public: + Int8ResultNodeVector() { } + DECLARE_RESULTNODE(Int8ResultNodeVector); +}; + +class Int16ResultNodeVector : public NumericResultNodeVectorT +{ +public: + Int16ResultNodeVector() { } + DECLARE_RESULTNODE(Int16ResultNodeVector); +}; + +class Int32ResultNodeVector : public NumericResultNodeVectorT +{ +public: + Int32ResultNodeVector() { } + DECLARE_RESULTNODE(Int32ResultNodeVector); +}; + +class Int64ResultNodeVector : public NumericResultNodeVectorT +{ +public: + Int64ResultNodeVector() { } + DECLARE_RESULTNODE(Int64ResultNodeVector); +}; + +typedef Int64ResultNodeVector IntegerResultNodeVector; + +class EnumResultNodeVector : public NumericResultNodeVectorT +{ +public: + EnumResultNodeVector() {} + DECLARE_RESULTNODE(EnumResultNodeVector); +}; + +class FloatResultNodeVector : public NumericResultNodeVectorT +{ +public: + FloatResultNodeVector() { } + DECLARE_RESULTNODE(FloatResultNodeVector); +}; + +class StringResultNodeVector : public ResultNodeVectorT, std::_Identity > +{ +public: + StringResultNodeVector() { } + DECLARE_RESULTNODE(StringResultNodeVector); +}; + +class RawResultNodeVector : public ResultNodeVectorT, std::_Identity > +{ +public: + RawResultNodeVector() { } + DECLARE_RESULTNODE(RawResultNodeVector); +}; + +class IntegerBucketResultNodeVector : public ResultNodeVectorT, GetInteger > +{ +public: + IntegerBucketResultNodeVector() { } + DECLARE_RESULTNODE(IntegerBucketResultNodeVector); +}; + +class FloatBucketResultNodeVector : public ResultNodeVectorT, GetFloat > +{ +public: + FloatBucketResultNodeVector() { } + DECLARE_RESULTNODE(FloatBucketResultNodeVector); +}; + +class StringBucketResultNodeVector : public ResultNodeVectorT, GetString > +{ +public: + StringBucketResultNodeVector() { } + DECLARE_RESULTNODE(StringBucketResultNodeVector); +}; + +class RawBucketResultNodeVector : public ResultNodeVectorT, GetString > +{ +public: + RawBucketResultNodeVector() { } + DECLARE_RESULTNODE(RawBucketResultNodeVector); +}; + +class GeneralResultNodeVector : public ResultNodeVector +{ +public: + DECLARE_EXPRESSIONNODE(GeneralResultNodeVector); + virtual const ResultNode * find(const ResultNode & key) const; + virtual ResultNodeVector & push_back(const ResultNode & node) { _v.push_back(node); return *this; } + virtual ResultNodeVector & push_back_safe(const ResultNode & node) { _v.push_back(node); return *this; } + virtual const ResultNode & get(size_t index) const { return *_v[index]; }; + virtual ResultNodeVector & set(size_t index, const ResultNode & node) { _v[index] = node; return *this; } + virtual ResultNode & get(size_t index) { return *_v[index]; } + virtual void clear() { _v.clear(); } + virtual void resize(size_t sz) { _v.resize(sz); } +private: + virtual int64_t onGetInteger(size_t index) const { return _v[index]->getInteger(index); } + virtual double onGetFloat(size_t index) const { return _v[index]->getFloat(index); } + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const { return _v[index]->getString(index, buf); } + virtual size_t hash() const; + virtual size_t onSize() const { return _v.size(); } + std::vector _v; +}; + + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h b/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h new file mode 100644 index 00000000000..2cc788d1424 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/reversefunctionnode.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ReverseFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ReverseFunctionNode); + ReverseFunctionNode() { } + ReverseFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/serializer.h b/searchlib/src/vespa/searchlib/expression/serializer.h new file mode 100644 index 00000000000..1c11c7b3951 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/serializer.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace expression { + +class RawResultNode; +class ResultNodeVector; +class ResultNode; + +class ResultSerializer +{ +public: + virtual ~ResultSerializer() { } + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value) = 0; + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value) = 0; + virtual void proxyPut(const ResultNode & value) = 0; +}; + +class ResultDeserializer +{ +public: + virtual ~ResultDeserializer() { } + virtual ResultDeserializer & getResult(const vespalib::IFieldBase & field, RawResultNode & value) = 0; + virtual ResultDeserializer & getResult(const vespalib::IFieldBase & field, ResultNodeVector & value) = 0; + virtual void proxyGet(const ResultNode & value) = 0; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/singleresultnode.h b/searchlib/src/vespa/searchlib/expression/singleresultnode.h new file mode 100644 index 00000000000..c0d50559ca5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/singleresultnode.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class SingleResultNode : public ResultNode +{ +public: + virtual ~SingleResultNode() { } + DECLARE_ABSTRACT_RESULTNODE(SingleResultNode); + typedef vespalib::IdentifiablePtr CP; + typedef std::unique_ptr UP; + virtual SingleResultNode *clone() const = 0; + + virtual void min(const ResultNode & b) = 0; + virtual void max(const ResultNode & b) = 0; + virtual void add(const ResultNode & b) = 0; + + virtual void setMin() = 0; + virtual void setMax() = 0; + virtual size_t getRawByteSize() const { return onGetRawByteSize(); } + virtual size_t onGetRawByteSize() const = 0; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h b/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h new file mode 100644 index 00000000000..837563b6ee2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/sortfunctionnode.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class SortFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(SortFunctionNode); + SortFunctionNode() { } + SortFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h b/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h new file mode 100644 index 00000000000..8ceebd95bb0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/strcatfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class StrCatFunctionNode : public MultiArgFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(StrCatFunctionNode); + StrCatFunctionNode() { } + StrCatFunctionNode(const ExpressionNode & arg) { addArg(arg); } +private: + virtual void onPrepareResult(); + virtual bool onExecute() const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp b/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp new file mode 100644 index 00000000000..5749c5505b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/strcatserializer.cpp @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "strcatserializer.h" +#include "rawresultnode.h" +#include "resultvector.h" +#include + +namespace search { +namespace expression { + +using vespalib::IFieldBase; +using vespalib::Serializer; +using vespalib::string; +using vespalib::stringref; + +StrCatSerializer & StrCatSerializer::put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value) +{ + (void) field; + if (value.inherits(ResultNode::classId)) { + static_cast(value).onSerializeResult(*this); + } else { + value.serializeDirect(*this); + } + return *this; +} + +ResultSerializer & StrCatSerializer::putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value) +{ + (void) field; + size_t sz(value.size()); + for (size_t i(0); i < sz; i++) { + value.get(i).serialize(*this); + } + return *this; +} + +ResultSerializer & StrCatSerializer::putResult(const vespalib::IFieldBase & field, const RawResultNode & value) +{ + (void) field; + vespalib::ConstBufferRef buf(value.get()); + getStream() << stringref(buf.c_str(), buf.size()); + return *this; +} + +void StrCatSerializer::proxyPut(const ResultNode & value) +{ + value.serializeDirect(*this); +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_strcatserializer() {} diff --git a/searchlib/src/vespa/searchlib/expression/strcatserializer.h b/searchlib/src/vespa/searchlib/expression/strcatserializer.h new file mode 100644 index 00000000000..455e9828b40 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/strcatserializer.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + + +namespace search { +namespace expression { + +class RawResultNode; + +class StrCatSerializer : public vespalib::AsciiSerializer, public ResultSerializer +{ +public: + StrCatSerializer(vespalib::asciistream & stream) : vespalib::AsciiSerializer(stream) { } + virtual StrCatSerializer & put(const vespalib::IFieldBase & field, const vespalib::Identifiable & value); + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const ResultNodeVector & value); + virtual ResultSerializer & putResult(const vespalib::IFieldBase & field, const RawResultNode & value); + virtual void proxyPut(const ResultNode & value); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp new file mode 100644 index 00000000000..2b7b4f096ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "stringbucketresultnode.h" +#include + +namespace search { +namespace expression { + +IMPLEMENT_RESULTNODE(StringBucketResultNode, BucketResultNode); + +StringBucketResultNode StringBucketResultNode::_nullResult; + +size_t +StringBucketResultNode::hash() const +{ +#if 0 + union { + uint8_t cxor[8]; + uint64_t ixor; + } xorResult; + xorResult.ixor = 0; + size_t i(0); + const size_t m(_from.size()); + const char * c = _from.c_str(); + const uint64_t * ic = reinterpret_cast(c); + for (; i+8 < m; i+=8) { + const size_t index(i/8); + xorResult.ixor ^= ic[index]; + } + for (; i < m; i++) { + xorResult.cxor[i%8] ^= c[i]; + } + return xorResult.ixor; +#else + return 0; +#endif +} + +int +StringBucketResultNode::onCmp(const Identifiable & rhs) const +{ + const StringBucketResultNode & b = static_cast(rhs); + int diff(_from->cmp(*b._from)); + return (diff == 0) ? _to->cmp(*b._to) : diff; +} + +int StringBucketResultNode::contains(const StringBucketResultNode & b) const +{ + int fromDiff(_from->cmp(*b._from)); + int toDiff(_to->cmp(*b._to)); + return (fromDiff < 0) ? std::min(0, toDiff) : std::max(0, toDiff); +} + +int StringBucketResultNode::contains(const char * s) const +{ + StringResultNode v(s); + int diff(_from->cmp(v)); + if (diff > 0) { + return 1; + } else { + diff = _to->cmp(v); + return (diff <= 0) ? -1 : 0; + } +} + +void +StringBucketResultNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, _fromField.getName(), _from); + visit(visitor, _toField.getName(), _to); +} + +vespalib::Serializer & +StringBucketResultNode::onSerialize(vespalib::Serializer & os) const +{ + _from.serialize(os); + _to.serialize(os); + return os; +} + +vespalib::Deserializer & +StringBucketResultNode::onDeserialize(vespalib::Deserializer & is) +{ + _from.deserialize(is); + _to.deserialize(is); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_stringbucketresultnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h new file mode 100644 index 00000000000..df096b7350c --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/stringbucketresultnode.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "bucketresultnode.h" +#include "stringresultnode.h" + +namespace search { +namespace expression { + +class StringBucketResultNode : public BucketResultNode +{ +private: + ResultNode::CP _from; + ResultNode::CP _to; + static StringBucketResultNode _nullResult; + virtual size_t onGetRawByteSize() const { return sizeof(_from) + sizeof(_to); } +public: + struct GetValue { + BufferRef _tmp; + ConstBufferRef operator () (const ResultNode & r) { return r.getString(_tmp); } + }; + + DECLARE_EXPRESSIONNODE(StringBucketResultNode); + DECLARE_NBO_SERIALIZE; + StringBucketResultNode() : _from(new StringResultNode()), _to(new StringResultNode()) {} + StringBucketResultNode(const vespalib::stringref & from, const vespalib::stringref & to) : _from(new StringResultNode(from)), _to(new StringResultNode(to)) {} + StringBucketResultNode(ResultNode::UP from, ResultNode::UP to) : _from(from.release()), _to(to.release()) {} + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + int contains(const StringBucketResultNode & b) const; + int contains(const ConstBufferRef & v) const { return contains(v.c_str()); } + int contains(const char * v) const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + StringBucketResultNode &setRange(const vespalib::stringref & from, const vespalib::stringref & to) { + _from.reset(new StringResultNode(from)); + _to.reset(new StringResultNode(to)); + return *this; + } + static const StringBucketResultNode & getNull() { return _nullResult; } +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/stringresultnode.h b/searchlib/src/vespa/searchlib/expression/stringresultnode.h new file mode 100644 index 00000000000..e2530f79fb9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/stringresultnode.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class StringResultNode : public SingleResultNode +{ +public: + DECLARE_EXPRESSIONNODE(StringResultNode); + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + StringResultNode(const char * v="") : _value(v) { } + StringResultNode(const vespalib::stringref & v) : _value(v) { } + virtual size_t hash() const; + virtual int onCmp(const Identifiable & b) const; + virtual void set(const ResultNode & rhs); + StringResultNode & append(const ResultNode & rhs); + StringResultNode & clear() { _value.clear(); return *this; } + const vespalib::string & get() const { return _value; } + void set(const vespalib::stringref & value) { _value = value; } + virtual void min(const ResultNode & b); + virtual void max(const ResultNode & b); + virtual void add(const ResultNode & b); + virtual void negate(); + +private: + virtual int cmpMem(const void * a, const void *b) const { + return static_cast(a)->compare(*static_cast(b)); + } + virtual void create(void * buf) const { new (buf) vespalib::string(); } + virtual void destroy(void * buf) const { static_cast(buf)->vespalib::string::~string(); } + + virtual void decode(const void * buf) { _value = *static_cast(buf); } + virtual void encode(void * buf) const { *static_cast(buf) = _value; } + virtual void swap(void * buf) { std::swap(*static_cast(buf), _value); } + virtual size_t hash(const void * buf) const; + + virtual size_t onGetRawByteSize() const { return sizeof(_value); } + virtual void setMin(); + virtual void setMax(); + virtual int64_t onGetInteger(size_t index) const; + virtual double onGetFloat(size_t index) const; + virtual ConstBufferRef onGetString(size_t index, BufferRef buf) const; + vespalib::string _value; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h b/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h new file mode 100644 index 00000000000..294b69a8172 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/strlenfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class StrLenFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(StrLenFunctionNode); + StrLenFunctionNode() { } + StrLenFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/timestamp.cpp b/searchlib/src/vespa/searchlib/expression/timestamp.cpp new file mode 100644 index 00000000000..6191af0f546 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/timestamp.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { +namespace expression { + +using vespalib::FieldBase; +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(TimeStampFunctionNode, UnaryFunctionNode); + +TimeStampFunctionNode::TimeStampFunctionNode(const TimeStampFunctionNode & rhs) : + UnaryFunctionNode(rhs), + _timePart(rhs._timePart), + _isGmt(rhs._isGmt), + _handler() +{ +} + +TimeStampFunctionNode & TimeStampFunctionNode::operator = (const TimeStampFunctionNode & rhs) +{ + if (this != &rhs) { + UnaryFunctionNode::operator =(rhs); + _timePart = rhs._timePart; + _isGmt = rhs._isGmt; + _handler.reset(); + } + return *this; +} + +void TimeStampFunctionNode::onPrepareResult() +{ + if (getArg().getResult().inherits(ResultNodeVector::classId)) { + setResultType(std::unique_ptr(new IntegerResultNodeVector)); + _handler.reset(new MultiValueHandler(*this)); + } else { + setResultType(std::unique_ptr(new Int64ResultNode)); + _handler.reset(new SingleValueHandler(*this)); + } +} + +unsigned TimeStampFunctionNode::getTimePart(time_t secSince70, TimePart tp, bool gmt) +{ + tm ts; + if (gmt) { + gmtime_r(&secSince70, &ts); + } else { + localtime_r(&secSince70, &ts); + } + switch (tp) { + case Year: return ts.tm_year + 1900; + case Month: return ts.tm_mon + 1; + case MonthDay:return ts.tm_mday; + case WeekDay: return ts.tm_wday; + case Hour: return ts.tm_hour; + case Minute: return ts.tm_min; + case Second: return ts.tm_sec; + case YearDay: return ts.tm_yday; + case IsDST: return ts.tm_isdst; + } + return 0; +} + +bool TimeStampFunctionNode::onExecute() const +{ + getArg().execute(); + _handler->handle(getArg().getResult()); + return true; +} + +void TimeStampFunctionNode::SingleValueHandler::handle(const ResultNode & arg) +{ + handleOne(arg, _result); +} + +void TimeStampFunctionNode::MultiValueHandler::handle(const ResultNode & arg) +{ + const ResultNodeVector & v(static_cast(arg)); + _result.getVector().resize(v.size()); + for(size_t i(0), m(_result.getVector().size()); i < m; i++) { + handleOne(v.get(i), _result.getVector()[i]); + } +} + +Serializer & TimeStampFunctionNode::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + uint8_t code(getTimePart() | (isGmt() ? 0x80 : 0x00)); + return os << code; +} + +Deserializer & TimeStampFunctionNode::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + uint8_t code(0); + is >> code; + _isGmt = code & 0x80; + _timePart = static_cast(code & 0x7f); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_timestamp() {} diff --git a/searchlib/src/vespa/searchlib/expression/timestamp.h b/searchlib/src/vespa/searchlib/expression/timestamp.h new file mode 100644 index 00000000000..16f92e233a4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/timestamp.h @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +class TimeStampFunctionNode : public UnaryFunctionNode +{ +public: + enum TimePart { Year=0, Month=1, MonthDay=2, WeekDay=3, Hour=4, Minute=5, Second=6, YearDay=7, IsDST=8 }; + DECLARE_EXPRESSIONNODE(TimeStampFunctionNode); + DECLARE_NBO_SERIALIZE; + TimeStampFunctionNode() : _timePart(Year), _isGmt(true) { } + TimeStampFunctionNode(const ExpressionNode::CP & arg, TimePart timePart, bool gmt=true) : UnaryFunctionNode(arg), _timePart(timePart), _isGmt(gmt) { } + TimeStampFunctionNode(const TimeStampFunctionNode & rhs); + TimeStampFunctionNode & operator = (const TimeStampFunctionNode & rhs); + unsigned int getTime() const { return getResult().getInteger(); } // Not valid until after node has been prepared + TimePart getTimePart() const { return _timePart; } + TimeStampFunctionNode & setTimePart(TimePart timePart) { _timePart = timePart; return *this; } + bool isGmt() const { return _isGmt; } + bool isLocal() const { return ! isGmt(); } +protected: +/* +unsigned year(timestamp); [1970 - 2039] +unsigned month(timestamp); [1-12] +unsigned date(timestamp); [1-31] +unsigned weekday(timestamp); [1-7] +unsigned hour(timestamp); [0-23] +unsigned minute(timestamp);[0-59] +unsigned second(timestamp);[0-59] +*/ + virtual bool onExecute() const; + virtual void onPrepareResult(); +private: + class Handler { + public: + Handler(const TimeStampFunctionNode & ts) : _timePart(ts.getTimePart()), _isGmt(ts.isGmt()) { } + virtual ~Handler() { } + virtual void handle(const ResultNode & arg) = 0; + protected: + void handleOne(const ResultNode & arg, Int64ResultNode & result) const { + result.set(TimeStampFunctionNode::getTimePart(arg.getInteger(), _timePart, _isGmt)); + } + private: + TimePart _timePart; + bool _isGmt; + }; + class SingleValueHandler : public Handler { + public: + SingleValueHandler(TimeStampFunctionNode & ts) : Handler(ts), _result(static_cast(ts.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + Int64ResultNode & _result; + }; + class MultiValueHandler : public Handler { + public: + MultiValueHandler(TimeStampFunctionNode & ts) : Handler(ts), _result(static_cast(ts.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + IntegerResultNodeVector & _result; + }; + + const ResultNode & getTimeStamp() const { return getArg().getResult(); } + void init(); + Int64ResultNode & updateIntegerResult() const { return static_cast(updateResult()); } + static unsigned getTimePart(time_t time, TimePart, bool gmt); + TimePart _timePart; + bool _isGmt; + std::unique_ptr _handler; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h new file mode 100644 index 00000000000..48d93ee282c --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/tofloatfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ToFloatFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ToFloatFunctionNode); + ToFloatFunctionNode() { } + ToFloatFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h new file mode 100644 index 00000000000..420d6707215 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/tointfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ToIntFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ToIntFunctionNode); + ToIntFunctionNode() { } + ToIntFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h b/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h new file mode 100644 index 00000000000..4c80bbab7d4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/torawfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ToRawFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ToRawFunctionNode); + ToRawFunctionNode() { } + ToRawFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h b/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h new file mode 100644 index 00000000000..fd6d80d850e --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/tostringfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class ToStringFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(ToStringFunctionNode); + ToStringFunctionNode() { } + ToStringFunctionNode(const ExpressionNode::CP & arg) : UnaryFunctionNode(arg) { } +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp new file mode 100644 index 00000000000..2cd4df49c5b --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +namespace search { +namespace expression { + +using vespalib::FieldBase; +using vespalib::Serializer; +using vespalib::Deserializer; + +IMPLEMENT_EXPRESSIONNODE(UcaFunctionNode, UnaryFunctionNode); + +UcaFunctionNode::UcaFunctionNode() +{ +} + +UcaFunctionNode::~UcaFunctionNode() +{ +} + +UcaFunctionNode::UcaFunctionNode(const ExpressionNode::CP & arg, const vespalib::string & locale, const vespalib::string & strength) : + UnaryFunctionNode(arg), + _locale(locale), + _strength(strength), + _collator(new common::UcaConverter(locale, strength)) +{ +} + +UcaFunctionNode::UcaFunctionNode(const UcaFunctionNode & rhs) : + UnaryFunctionNode(rhs), + _locale(rhs._locale), + _strength(rhs._strength), + _collator(rhs._collator), + _handler() +{ +} + +UcaFunctionNode & UcaFunctionNode::operator = (const UcaFunctionNode & rhs) +{ + if (this != &rhs) { + UnaryFunctionNode::operator =(rhs); + _locale = rhs._locale; + _strength = rhs._strength; + _collator = rhs._collator; + _handler.reset(); + } + return *this; +} + +void UcaFunctionNode::onPrepareResult() +{ + if (getArg().getResult().inherits(ResultNodeVector::classId)) { + setResultType(std::unique_ptr(new RawResultNodeVector)); + _handler.reset(new MultiValueHandler(*this)); + } else { + setResultType(std::unique_ptr(new RawResultNode)); + _handler.reset(new SingleValueHandler(*this)); + } +} + +UcaFunctionNode::Handler::Handler(const UcaFunctionNode & uca) : + _converter(*uca._collator), + _backingBuffer(), + _buffer(_backingBuffer, sizeof(_backingBuffer)) +{ +} + +void UcaFunctionNode::Handler::handleOne(const ResultNode & arg, RawResultNode & result) const +{ + vespalib::ConstBufferRef buf = _converter.convert(arg.getString(_buffer)); + result.set(RawResultNode(buf.c_str(), buf.size())); +} + +bool UcaFunctionNode::onExecute() const +{ + getArg().execute(); + _handler->handle(getArg().getResult()); + return true; +} + +void UcaFunctionNode::SingleValueHandler::handle(const ResultNode & arg) +{ + handleOne(arg, _result); +} + +void UcaFunctionNode::MultiValueHandler::handle(const ResultNode & arg) +{ + const ResultNodeVector & v(static_cast(arg)); + _result.getVector().resize(v.size()); + for(size_t i(0), m(_result.getVector().size()); i < m; i++) { + handleOne(v.get(i), _result.getVector()[i]); + } +} + +Serializer & UcaFunctionNode::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + return os << _locale << _strength; +} + +Deserializer & UcaFunctionNode::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + is >> _locale >> _strength; + _collator.reset(new common::UcaConverter(_locale, _strength)); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_ucafunctionnode() {} diff --git a/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h new file mode 100644 index 00000000000..78242d9cbd1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/ucafunctionnode.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + + +namespace search { +namespace expression { + +class UcaFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(UcaFunctionNode); + DECLARE_NBO_SERIALIZE; + UcaFunctionNode(); + ~UcaFunctionNode(); + UcaFunctionNode(const ExpressionNode::CP & arg, const vespalib::string & locale, const vespalib::string & strength); + UcaFunctionNode(const UcaFunctionNode & rhs); + UcaFunctionNode & operator = (const UcaFunctionNode & rhs); +private: + virtual bool onExecute() const; + virtual void onPrepareResult(); + class Handler { + public: + Handler(const UcaFunctionNode & uca); + virtual ~Handler() { } + virtual void handle(const ResultNode & arg) = 0; + protected: + void handleOne(const ResultNode & arg, RawResultNode & result) const; + private: + const common::BlobConverter & _converter; + char _backingBuffer[32]; + vespalib::BufferRef _buffer; + }; + class SingleValueHandler : public Handler { + public: + SingleValueHandler(UcaFunctionNode & uca) : Handler(uca), _result(static_cast(uca.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + RawResultNode & _result; + }; + class MultiValueHandler : public Handler { + public: + MultiValueHandler(UcaFunctionNode & uca) : Handler(uca), _result(static_cast(uca.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + RawResultNodeVector & _result; + }; + vespalib::string _locale; + vespalib::string _strength; + common::BlobConverter::LP _collator; + std::unique_ptr _handler; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h new file mode 100644 index 00000000000..fa0184dc3e5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/unarybitfunctionnode.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class UnaryBitFunctionNode : public UnaryFunctionNode +{ +public: + DECLARE_NBO_SERIALIZE; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + DECLARE_ABSTRACT_EXPRESSIONNODE(UnaryBitFunctionNode); + UnaryBitFunctionNode() : _numBits(0) { } + UnaryBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits) : UnaryFunctionNode(arg), _numBits(numBits) { } +protected: + size_t getNumBits() const { return _numBits; } + size_t getNumBytes() const { return (_numBits+7)/8; } + virtual void onPrepareResult(); +private: + virtual void onPrepare(bool preserveAccurateTypes); + virtual bool internalExecute(const vespalib::nbostream & os) const = 0; + virtual bool onExecute() const; + uint32_t _numBits; + mutable vespalib::nbostream _tmpOs; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h b/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h new file mode 100644 index 00000000000..366e7d9191f --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/unaryfunctionnode.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class UnaryFunctionNode : public MultiArgFunctionNode +{ +public: + DECLARE_ABSTRACT_EXPRESSIONNODE(UnaryFunctionNode); + UnaryFunctionNode() { } + UnaryFunctionNode(const ExpressionNode::CP & arg) : + MultiArgFunctionNode() + { + appendArg(arg); + } +protected: + const ExpressionNode & getArg() const { return MultiArgFunctionNode::getArg(0); } +private: + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h b/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h new file mode 100644 index 00000000000..b8d00b6ebdb --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/xorbitfunctionnode.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class XorBitFunctionNode : public UnaryBitFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(XorBitFunctionNode); + XorBitFunctionNode() { } + XorBitFunctionNode(const ExpressionNode::CP & arg, unsigned numBits); +private: + mutable std::vector _tmpXor; + virtual bool internalExecute(const vespalib::nbostream & os) const; + virtual void onPrepareResult(); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h b/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h new file mode 100644 index 00000000000..117c55f69c8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/xorfunctionnode.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace expression { + +class XorFunctionNode : public BitFunctionNode +{ +public: + DECLARE_EXPRESSIONNODE(XorFunctionNode); + XorFunctionNode() { } +private: + virtual ResultNode::CP getInitialValue() const { return ResultNode::CP(new Int64ResultNode(0)); } + virtual ResultNode & flatten(const ResultNodeVector & v, ResultNode & result) const { return v.flattenXor(result); } + virtual void onArgument(const ResultNode & arg, Int64ResultNode & result) const; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/expression/zcurve.cpp b/searchlib/src/vespa/searchlib/expression/zcurve.cpp new file mode 100644 index 00000000000..dd22ca7c8fe --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/zcurve.cpp @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +using vespalib::FieldBase; +using vespalib::Serializer; +using vespalib::Deserializer; + +namespace search { +namespace expression { + +IMPLEMENT_EXPRESSIONNODE(ZCurveFunctionNode, UnaryFunctionNode); + +ZCurveFunctionNode::ZCurveFunctionNode(const ZCurveFunctionNode & rhs) : + UnaryFunctionNode(rhs), + _dim(rhs._dim), + _handler() +{ +} + +ZCurveFunctionNode & ZCurveFunctionNode::operator = (const ZCurveFunctionNode & rhs) +{ + if (this != &rhs) { + UnaryFunctionNode::operator =(rhs); + _dim = rhs._dim; + _handler.reset(); + } + return *this; +} + +void ZCurveFunctionNode::onPrepareResult() +{ + if (getArg().getResult().inherits(ResultNodeVector::classId)) { + setResultType(std::unique_ptr(new IntegerResultNodeVector)); + _handler.reset(new MultiValueHandler(*this)); + } else { + setResultType(std::unique_ptr(new Int64ResultNode)); + _handler.reset(new SingleValueHandler(*this)); + } +} + +int32_t ZCurveFunctionNode::Handler::getXorY(uint64_t z) const +{ + int32_t x, y; + vespalib::geo::ZCurve::decode(z, &x, &y); + return (_dim==X) ? x : y; +} + +bool ZCurveFunctionNode::onExecute() const +{ + getArg().execute(); + _handler->handle(getArg().getResult()); + return true; +} + +void ZCurveFunctionNode::SingleValueHandler::handle(const ResultNode & arg) +{ + handleOne(arg, _result); +} + +void ZCurveFunctionNode::MultiValueHandler::handle(const ResultNode & arg) +{ + const ResultNodeVector & v(static_cast(arg)); + _result.getVector().resize(v.size()); + for(size_t i(0), m(_result.getVector().size()); i < m; i++) { + handleOne(v.get(i), _result.getVector()[i]); + } +} + +Serializer & ZCurveFunctionNode::onSerialize(Serializer & os) const +{ + UnaryFunctionNode::onSerialize(os); + uint8_t code(_dim); + return os << code; +} + +Deserializer & ZCurveFunctionNode::onDeserialize(Deserializer & is) +{ + UnaryFunctionNode::onDeserialize(is); + uint8_t code(0); + is >> code; + _dim = static_cast(code); + return is; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_expression_zcurve() {} diff --git a/searchlib/src/vespa/searchlib/expression/zcurve.h b/searchlib/src/vespa/searchlib/expression/zcurve.h new file mode 100644 index 00000000000..88d2a7938a7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/expression/zcurve.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace expression { + +class ZCurveFunctionNode : public UnaryFunctionNode +{ +public: + enum Dimension {X=0, Y=1}; + DECLARE_EXPRESSIONNODE(ZCurveFunctionNode); + DECLARE_NBO_SERIALIZE; + ZCurveFunctionNode() : _dim(X) { } + ZCurveFunctionNode(const ExpressionNode::CP & arg, Dimension dim) : UnaryFunctionNode(arg), _dim(dim) { } + ZCurveFunctionNode(const ZCurveFunctionNode & rhs); + ZCurveFunctionNode & operator = (const ZCurveFunctionNode & rhs); + Dimension getDim() const { return _dim; } +private: + class Handler { + public: + Handler(Dimension dim) : _dim(dim) { } + virtual ~Handler() { } + virtual void handle(const ResultNode & arg) = 0; + protected: + void handleOne(const ResultNode & arg, Int64ResultNode & result) const { + result.set(getXorY(arg.getInteger())); + } + private: + int32_t getXorY(uint64_t z) const; + Dimension _dim; + }; + class SingleValueHandler : public Handler { + public: + SingleValueHandler(ZCurveFunctionNode & ts) : Handler(ts.getDim()), _result(static_cast(ts.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + Int64ResultNode & _result; + }; + class MultiValueHandler : public Handler { + public: + MultiValueHandler(ZCurveFunctionNode & ts) : Handler(ts.getDim()), _result(static_cast(ts.updateResult())) { } + virtual void handle(const ResultNode & arg); + private: + IntegerResultNodeVector & _result; + }; + + virtual bool onExecute() const; + virtual void onPrepareResult(); + Dimension _dim; + std::unique_ptr _handler; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/features/.gitignore b/searchlib/src/vespa/searchlib/features/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/features/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/CMakeLists.txt new file mode 100644 index 00000000000..ec21aa87fae --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/CMakeLists.txt @@ -0,0 +1,64 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_features + SOURCES + agefeature.cpp + array_parser.cpp + attributefeature.cpp + attributematchfeature.cpp + closenessfeature.cpp + debug_attribute_wait.cpp + debug_wait.cpp + distancefeature.cpp + distancetopathfeature.cpp + dotproductfeature.cpp + element_completeness_feature.cpp + element_similarity_feature.cpp + euclidean_distance_feature.cpp + fieldinfofeature.cpp + fieldlengthfeature.cpp + fieldmatchfeature.cpp + fieldtermmatchfeature.cpp + firstphasefeature.cpp + flow_completeness_feature.cpp + foreachfeature.cpp + freshnessfeature.cpp + item_raw_score_feature.cpp + jarowinklerdistancefeature.cpp + matchesfeature.cpp + matchfeature.cpp + native_dot_product_feature.cpp + nativeattributematchfeature.cpp + nativefieldmatchfeature.cpp + nativeproximityfeature.cpp + nativerankfeature.cpp + nowfeature.cpp + proximityfeature.cpp + querycompletenessfeature.cpp + queryfeature.cpp + queryterm.cpp + querytermcountfeature.cpp + randomfeature.cpp + rankingexpressionfeature.cpp + raw_score_feature.cpp + reverseproximityfeature.cpp + setup.cpp + subqueries_feature.cpp + tensor_factory_blueprint.cpp + tensor_from_labels_feature.cpp + tensor_from_tensor_attribute_executor.cpp + tensor_from_weighted_set_feature.cpp + term_field_md_feature.cpp + termdistancecalculator.cpp + termdistancefeature.cpp + termeditdistancefeature.cpp + termfeature.cpp + terminfofeature.cpp + text_similarity_feature.cpp + utils.cpp + valuefeature.cpp + weighted_set_parser.cpp + $ + $ + INSTALL lib64 + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/features/OWNERS b/searchlib/src/vespa/searchlib/features/OWNERS new file mode 100644 index 00000000000..12b533ec610 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/OWNERS @@ -0,0 +1 @@ +havardpe diff --git a/searchlib/src/vespa/searchlib/features/agefeature.cpp b/searchlib/src/vespa/searchlib/features/agefeature.cpp new file mode 100644 index 00000000000..0b8c652b5c7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/agefeature.cpp @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.agefeature"); + +#include +#include +#include "agefeature.h" +#include "valuefeature.h" + +using search::attribute::IAttributeVector; + +namespace search { + +typedef fef::FeatureNameBuilder FNB; + +namespace features { + +AgeExecutor::AgeExecutor(const IAttributeVector *attribute) : + search::fef::FeatureExecutor(), + _attribute(attribute), + _buf() +{ + if (_attribute != NULL) { + _buf.allocate(attribute->getMaxValueCount()); + } +} + +void +AgeExecutor::execute(search::fef::MatchData &data) +{ + feature_t age = 10000000000.0; + if (_attribute != NULL) { + _buf.fill(*_attribute, data.getDocId()); + int64_t docTime = _buf[0]; + feature_t currTime = *data.resolveFeature(inputs()[0]); + age = currTime - docTime; + if (age < 0) { + age = 0; + } + } + *data.resolveFeature(outputs()[0]) = age; +} + +void +AgeBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +AgeBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _attribute = params[0].getValue(); + defineInput("now"); + + describeOutput("out", "The age of the document, in seconds."); + env.hintAttributeAccess(_attribute); + return true; +} + +search::fef::Blueprint::UP +AgeBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new AgeBlueprint()); +} + +search::fef::FeatureExecutor::LP +AgeBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + // Get docdate attribute vector + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attribute); + return search::fef::FeatureExecutor::LP(new AgeExecutor(attribute)); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/features/agefeature.h b/searchlib/src/vespa/searchlib/features/agefeature.h new file mode 100644 index 00000000000..795d04798f8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/agefeature.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the documentage feature outputting the + * difference between document time (stored in an attribute) and current + * system time + **/ +class AgeExecutor : public search::fef::FeatureExecutor { +private: + const search::attribute::IAttributeVector *_attribute; + search::attribute::IntegerContent _buf; + +public: + /** + * Constructs a new executor. + **/ + AgeExecutor(const search::attribute::IAttributeVector *attribute); + virtual void execute(search::fef::MatchData & data); +}; + +/** + * Implements the blueprint for 'documentage' feature. It uses the 'now' feature + * to get current time and reads document time from a specified attribute + */ +class AgeBlueprint : public search::fef::Blueprint { +private: + vespalib::string _attribute; + +public: + AgeBlueprint() : search::fef::Blueprint("age") { } + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/features/array_parser.cpp b/searchlib/src/vespa/searchlib/features/array_parser.cpp new file mode 100644 index 00000000000..0faac986033 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/array_parser.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.array_parser"); + +#include "array_parser.h" + +namespace search { +namespace features { + +void +ArrayParser::logWarning(const vespalib::string &msg) +{ + LOG(warning, "%s", msg.c_str()); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/array_parser.h b/searchlib/src/vespa/searchlib/features/array_parser.h new file mode 100644 index 00000000000..07b9d09b277 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/array_parser.h @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace features { + +/** + * Utility for parsing a string representation of an array with values (numeric or string) + * that is typically passed down with the query. + * + * The format of the array is as follows: + * 1) Dense form: [value0 value1 ... valueN] (where value0 has index 0) + * + * 2) Sparse form: {idxA:valueA,idxB:valueB,...,idxN:valueN}. + * In the sparse form all non-specified indexes get the value 0.0 and + * has values for indexes in the range [0,max index specified]. + * The parsed array is sorted in index order. + */ +class ArrayParser +{ +private: + static void logWarning(const vespalib::string &msg); + +public: + template + class ValueAndIndex { + public: + typedef T ValueType; + ValueAndIndex(T value, uint32_t index) : _value(value), _index(index) { } + T getValue() const { return _value; } + uint32_t getIndex() const { return _index; } + bool operator < (const ValueAndIndex & b) const { return _index < b._index; } + private: + T _value; + uint32_t _index; + }; + + template + static void parse(const vespalib::string &input, OutputType &output); + + template + static void parsePartial(const vespalib::string &input, OutputType &output); +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/array_parser.hpp b/searchlib/src/vespa/searchlib/features/array_parser.hpp new file mode 100644 index 00000000000..cfa161798ab --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/array_parser.hpp @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "array_parser.h" +#include +#include +#include +#include + +namespace search { +namespace features { + +template +void +ArrayParser::parse(const vespalib::string &input, OutputType &output) +{ + typedef std::vector> SparseVector; + SparseVector sparse; + parsePartial(input, sparse); + std::sort(sparse.begin(), sparse.end()); + if ( ! sparse.empty() ) { + output.resize(sparse.back().getIndex()+1); + for (const typename SparseVector::value_type &elem : sparse) { + output[elem.getIndex()] = elem.getValue(); + } + } +} + +template +void +ArrayParser::parsePartial(const vespalib::string &input, OutputType &output) +{ + size_t len = input.size(); + if (len >= 2) { + vespalib::stringref s(input.c_str()+1, len - 2); + typedef typename OutputType::value_type ValueAndIndexType; + typename ValueAndIndexType::ValueType value; + if ((input[0] == '{' && input[len - 1] == '}') || + (input[0] == '(' && input[len - 1] == ')') ) { + size_t key; + char colon; + while ( ! s.empty() ) { + vespalib::string::size_type commaPos(s.find(',')); + vespalib::stringref item(s.substr(0, commaPos)); + vespalib::asciistream is(item); + try { + is >> key >> colon >> value; + if ((colon == ':') && is.eof()) { + output.push_back(ValueAndIndexType(value, key)); + } else { + logWarning(vespalib::make_string( + "Could not parse item '%s' in query vector '%s', skipping. " + "Expected ':' between dimension and component.", + item.c_str(), input.c_str())); + return; + } + } catch (vespalib::IllegalArgumentException & e) { + logWarning(vespalib::make_string( + "Could not parse item '%s' in query vector '%s', skipping. " + "Incorrect type of operands", item.c_str(), input.c_str())); + return; + } + if (commaPos != vespalib::string::npos) { + s = s.substr(commaPos+1); + } else { + s = vespalib::stringref(); + } + } + } else if (len >= 2 && input[0] == '[' && input[len - 1] == ']') { + vespalib::asciistream is(s); + uint32_t index(0); + while (!is.eof()) { + try { + is >> value; + output.push_back(ValueAndIndexType(value, index++)); + } catch (vespalib::IllegalArgumentException & e) { + logWarning(vespalib::make_string( + "Could not parse item[%ld] = '%s' in query vector '%s', skipping. " + "Incorrect type of operands", output.size(), is.c_str(), s.c_str())); + return; + } + } + } + } else { + logWarning(vespalib::make_string( + "Could not parse query vector '%s'. Expected surrounding '(' and ')' or '{' and '}'.", + input.c_str())); + } +} + +template void +ArrayParser::parse(const vespalib::string &input, std::vector &); + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.cpp b/searchlib/src/vespa/searchlib/features/attributefeature.cpp new file mode 100644 index 00000000000..b6eb2421ff5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/attributefeature.cpp @@ -0,0 +1,433 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.attributefeature"); +#include "attributefeature.h" +#include "utils.h" +#include "valuefeature.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::attribute::IAttributeVector; +using search::attribute::CollectionType; +using search::attribute::BasicType; +using search::attribute::ConstCharContent; +using search::attribute::IntegerContent; +using search::attribute::FloatContent; +using search::attribute::TensorAttribute; +using search::attribute::WeightedConstCharContent; +using search::attribute::WeightedIntegerContent; +using search::attribute::WeightedFloatContent; +using search::fef::FeatureExecutor; +using search::features::util::ConstCharPtr; +using vespalib::tensor::TensorType; +using vespalib::eval::ValueType; +using search::fef::FeatureType; + +using namespace search::fef::indexproperties; + +namespace { +template +bool equals(const X & lhs, const Y & rhs) { + return lhs == rhs; +} + +template <> +bool equals(const ConstCharPtr & lhs, const vespalib::stringref & rhs) { + return strcmp(lhs, rhs.c_str()) == 0; +} + +template +bool +isUndefined(const T & value, const BasicType::Type & type) +{ + switch (type) { + case BasicType::INT8: + return search::attribute::isUndefined(static_cast(value)); + case BasicType::INT16: + return search::attribute::isUndefined(static_cast(value)); + case BasicType::INT32: + return search::attribute::isUndefined(static_cast(value)); + case BasicType::INT64: + return search::attribute::isUndefined(static_cast(value)); + case BasicType::FLOAT: + return search::attribute::isUndefined(static_cast(value)); + case BasicType::DOUBLE: + return search::attribute::isUndefined(static_cast(value)); + default: + return false; + } +} + +template <> +bool +isUndefined(const vespalib::stringref &, const BasicType::Type &) +{ + return false; +} + +template +search::feature_t +considerUndefined(const T & value, const BasicType::Type & type) +{ + if (isUndefined(value, type)) { + return search::attribute::getUndefined(); + } + return search::features::util::getAsFeature(value); +} + +template <> +search::feature_t +considerUndefined(const ConstCharPtr & value, const BasicType::Type &) +{ + return search::features::util::getAsFeature(value); +} + + +} + + +namespace search { +namespace features { + +/** + * Implements the executor for fetching values from a single or array attribute vector + */ +template +class SingleAttributeExecutor : public fef::FeatureExecutor { +private: + const T & _attribute; + +public: + /** + * Constructs an executor. + * + * @param attribute The attribute vector to use. + */ + SingleAttributeExecutor(const T & attribute) : _attribute(attribute) { } + + // Inherit doc from FeatureExecutor. + virtual void execute(search::fef::MatchData & data); +}; + +class CountOnlyAttributeExecutor : public fef::FeatureExecutor { +private: + const attribute::IAttributeVector & _attribute; + +public: + /** + * Constructs an executor. + * + * @param attribute The attribute vector to use. + */ + CountOnlyAttributeExecutor(const attribute::IAttributeVector & attribute) : _attribute(attribute) { } + + // Inherit doc from FeatureExecutor. + virtual void execute(search::fef::MatchData & data); +}; +/** + * Implements the executor for fetching values from a single or array attribute vector + */ +template +class AttributeExecutor : public fef::FeatureExecutor { +private: + const attribute::IAttributeVector * _attribute; + attribute::BasicType::Type _attrType; + uint32_t _idx; + T _buffer; // used when fetching values from the attribute + feature_t _defaultCount; + +public: + /** + * Constructs an executor. + * + * @param attribute The attribute vector to use. + * @param idx The index used for an array attribute. + */ + AttributeExecutor(const search::attribute::IAttributeVector * attribute, uint32_t idx); + + // Inherit doc from FeatureExecutor. + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the executor for fetching weights from a weighted set attribute + */ +template +class WeightedSetAttributeExecutor : public fef::FeatureExecutor { +private: + const attribute::IAttributeVector * _attribute; + attribute::BasicType::Type _attrType; + BT _buffer; // used when fetching values and weights from the attribute + T _key; // the key to find a weight for + bool _useKey; + +public: + /** + * Constructs an executor. + * + * @param attribue The attribute vector to use. + * @param key The key to find a corresponding weight for. + * @param useKey Whether we should consider the key. + */ + WeightedSetAttributeExecutor(const search::attribute::IAttributeVector * attribute, T key, bool useKey); + + // Inherit doc from FeatureExecutor. + virtual void execute(search::fef::MatchData & data); +}; + +template +void +SingleAttributeExecutor::execute(search::fef::MatchData & match) +{ + typename T::LoadedValueType v = _attribute.getFast(match.getDocId()); + // value + *match.resolveFeature(outputs()[0]) = __builtin_expect(attribute::isUndefined(v), false) + ? attribute::getUndefined() + : util::getAsFeature(v); + *match.resolveFeature(outputs()[1]) = 0.0f; // weight + *match.resolveFeature(outputs()[2]) = 0.0f; // contains + *match.resolveFeature(outputs()[3]) = 1.0f; // count +} + +void +CountOnlyAttributeExecutor::execute(search::fef::MatchData & match) +{ + *match.resolveFeature(outputs()[0]) = 0.0f; // value + *match.resolveFeature(outputs()[1]) = 0.0f; // weight + *match.resolveFeature(outputs()[2]) = 0.0f; // contains + *match.resolveFeature(outputs()[3]) = _attribute.getValueCount(match.getDocId()); // count +} + +template +AttributeExecutor::AttributeExecutor(const IAttributeVector * attribute, uint32_t idx) : + fef::FeatureExecutor(), + _attribute(attribute), + _attrType(attribute->getBasicType()), + _idx(idx), + _buffer(), + _defaultCount((attribute->getCollectionType() == CollectionType::ARRAY) ? 0 : 1) +{ + _buffer.allocate(_attribute->getMaxValueCount()); +} + +template +void +AttributeExecutor::execute(search::fef::MatchData & match) +{ + feature_t value = 0.0f; + _buffer.fill(*_attribute, match.getDocId()); + if (_idx < _buffer.size()) { + value = considerUndefined(_buffer[_idx], _attrType); + } + *match.resolveFeature(outputs()[0]) = value; // value + *match.resolveFeature(outputs()[1]) = 0.0f; // weight + *match.resolveFeature(outputs()[2]) = 0.0f; // contains + *match.resolveFeature(outputs()[3]) = _defaultCount; // count +} + + +template +WeightedSetAttributeExecutor::WeightedSetAttributeExecutor(const IAttributeVector * attribute, T key, bool useKey) : + fef::FeatureExecutor(), + _attribute(attribute), + _attrType(attribute->getBasicType()), + _buffer(), + _key(key), + _useKey(useKey) +{ +} + +template +void +WeightedSetAttributeExecutor::execute(search::fef::MatchData & match) +{ + feature_t value = 0.0f; + feature_t weight = 0.0f; + feature_t contains = 0.0f; + feature_t count = 0.0f; + if (_useKey) { + _buffer.fill(*_attribute, match.getDocId()); + for (uint32_t i = 0; i < _buffer.size(); ++i) { + if (equals(_buffer[i].getValue(), _key)) { + value = considerUndefined(_key, _attrType); + weight = static_cast(_buffer[i].getWeight()); + contains = 1.0f; + break; + } + } + } else { + count = _attribute->getValueCount(match.getDocId()); + } + *match.resolveFeature(outputs()[0]) = value; // value + *match.resolveFeature(outputs()[1]) = weight; // weight + *match.resolveFeature(outputs()[2]) = contains; // contains + *match.resolveFeature(outputs()[3]) = count; // count +} + + +AttributeBlueprint::AttributeBlueprint() : + search::fef::Blueprint("attribute"), + _attrName(), + _extra(), + _tensorType(TensorType::number()) +{ +} + +void +AttributeBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +bool +AttributeBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + // params[0] = attribute name + // params[1] = index (array attribute) or key (weighted set attribute) + _attrName = params[0].getValue(); + if (params.size() == 2) { + _extra = params[1].getValue(); + } + vespalib::string attrType = type::Attribute::lookup(env.getProperties(), _attrName); + if (!attrType.empty()) { + _tensorType = TensorType::fromSpec(attrType); + } + FeatureType output_type = _tensorType.is_tensor() + ? FeatureType::object(_tensorType.as_value_type()) + : FeatureType::number(); + describeOutput("value", "The value of a single value attribute, " + "the value at the given index of an array attribute, " + "the given key of a weighted set attribute, or" + "the tensor of a tensor attribute", output_type); + if (!_tensorType.is_tensor()) { + describeOutput("weight", "The weight associated with the given key in a weighted set attribute."); + describeOutput("contains", "1 if the given key is present in a weighted set attribute, 0 otherwise."); + describeOutput("count", "Returns the number of elements in this array or weighted set attribute."); + } + env.hintAttributeAccess(_attrName); + return true; +} + +search::fef::Blueprint::UP +AttributeBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new AttributeBlueprint()); +} + +#define CREATE_AND_RETURN_IF_SINGLE_NUMERIC(a, T) \ + if (dynamic_cast *>(a) != NULL) { \ + return FeatureExecutor::LP(new SingleAttributeExecutor>(*static_cast *>(a))); \ + } + +namespace { + +search::fef::FeatureExecutor::LP +createAttributeExecutor(const IAttributeVector *attribute, const vespalib::string &attrName, const vespalib::string &extraParam) +{ + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning default values.", + attrName.c_str()); + std::vector values(4, 0.0f); + return FeatureExecutor::LP(new ValueExecutor(values)); + } + if (attribute->getCollectionType() == CollectionType::WSET) { + bool useKey = !extraParam.empty(); + if (useKey) { + if (attribute->isStringType()) { + return FeatureExecutor::LP + (new WeightedSetAttributeExecutor(attribute, extraParam, useKey)); + } else if (attribute->isIntegerType()) { + return FeatureExecutor::LP + (new WeightedSetAttributeExecutor(attribute, util::strToNum(extraParam), useKey)); + } else { // FLOAT + return FeatureExecutor::LP + (new WeightedSetAttributeExecutor(attribute, util::strToNum(extraParam), useKey)); + } + } else { + return FeatureExecutor::LP(new CountOnlyAttributeExecutor(*attribute)); + } + } else { // SINGLE or ARRAY + if ((attribute->getCollectionType() == CollectionType::SINGLE) && (attribute->isIntegerType() || attribute->isFloatingPointType())) { + CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, FloatingPointAttributeTemplate); + CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, FloatingPointAttributeTemplate); + CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, IntegerAttributeTemplate); + CREATE_AND_RETURN_IF_SINGLE_NUMERIC(attribute, IntegerAttributeTemplate); + } + { + uint32_t idx = 0; + if (!extraParam.empty()) { + idx = util::strToNum(extraParam); + } else if (attribute->getCollectionType() == CollectionType::ARRAY) { + return FeatureExecutor::LP(new CountOnlyAttributeExecutor(*attribute)); + } + if (attribute->isStringType()) { + return FeatureExecutor::LP(new AttributeExecutor(attribute, idx)); + } else if (attribute->isIntegerType()) { + return FeatureExecutor::LP(new AttributeExecutor(attribute, idx)); + } else { // FLOAT + return FeatureExecutor::LP(new AttributeExecutor(attribute, idx)); + } + } + } +} + +search::fef::FeatureExecutor::LP +createTensorAttributeExecutor(const IAttributeVector *attribute, const vespalib::string &attrName, + const TensorType &tensorType) +{ + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager." + " Returning empty tensor.", attrName.c_str()); + return ConstantTensorExecutor::createEmpty(); + } + if (attribute->getCollectionType() != search::attribute::CollectionType::SINGLE || + attribute->getBasicType() != search::attribute::BasicType::TENSOR) { + LOG(warning, "The attribute vector '%s' is NOT of type tensor." + " Returning empty tensor.", attribute->getName().c_str()); + return ConstantTensorExecutor::createEmpty(); + } + const TensorAttribute *tensorAttribute = dynamic_cast(attribute); + if (tensorAttribute == nullptr) { + LOG(warning, "The attribute vector '%s' could not be converted to a tensor attribute." + " Returning empty tensor.", attribute->getName().c_str()); + return ConstantTensorExecutor::createEmpty(); + } + if (tensorType != tensorAttribute->getConfig().tensorType()) { + LOG(warning, "The tensor attribute '%s' has tensor type '%s'," + " while the feature executor expects type '%s'. Returning empty tensor.", + tensorAttribute->getName().c_str(), + tensorAttribute->getConfig().tensorType().toSpec().c_str(), + tensorType.toSpec().c_str()); + return ConstantTensorExecutor::createEmpty(); + } + return FeatureExecutor::LP(new TensorFromTensorAttributeExecutor(tensorAttribute)); +} + +} + +search::fef::FeatureExecutor::LP +AttributeBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + const IAttributeVector *attribute = env.getAttributeContext().getAttribute(_attrName); + if (_tensorType.is_tensor()) { + return createTensorAttributeExecutor(attribute, _attrName, _tensorType); + } else { + return createAttributeExecutor(attribute, _attrName, _extra); + } +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/attributefeature.h b/searchlib/src/vespa/searchlib/features/attributefeature.h new file mode 100644 index 00000000000..c25b2b558b1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/attributefeature.h @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + + +namespace search { +namespace features { + + +/** + * Implements the blueprint for the attribute executor. + * + * An executor of this outputs number(s) if used with regular attributes + * or a tensor value if used with tensor attributes. + */ +class AttributeBlueprint : public search::fef::Blueprint { +private: + vespalib::string _attrName; // the name of the attribute vector + vespalib::string _extra; // the index or key + vespalib::tensor::TensorType _tensorType; + +public: + /** + * Constructs a blueprint. + */ + AttributeBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc().attribute(search::fef::ParameterCollection::ANY). + desc().attribute(search::fef::ParameterCollection::ANY).string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp b/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp new file mode 100644 index 00000000000..bca39ef4b73 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/attributematchfeature.cpp @@ -0,0 +1,350 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.attributematchfeature"); + +#include +#include +#include +#include +#include "attributematchfeature.h" +#include "utils.h" +#include "valuefeature.h" + +using namespace search::attribute; +using namespace search::fef; +using search::feature_t; + +namespace { +feature_t adjustToOne(feature_t value) { + if (value > 1.0f) { + return 1.0f; + } + return value; +} + +bool hasAttribute(const IQueryEnvironment &env, const ITermData &term_data) +{ + typedef ITermFieldRangeAdapter FRA; + + for (FRA iter(term_data); iter.valid(); iter.next()) { + const FieldInfo *info = env.getIndexEnvironment().getField(iter.get().getFieldId()); + if (info != 0 && info->type() == FieldType::ATTRIBUTE) { + return true; + } + } + return false; +} +} // namespace + +namespace search { +namespace features { + +template +AttributeMatchExecutor::Computer::Computer(const IQueryEnvironment & env, AttributeMatchParams params) : + _params(params), + _buffer(), + _numAttrTerms(0), + _totalTermWeight(0), + _totalTermSignificance(0), + _totalAttrTermWeight(0), + _queryTerms(), + _matches(0), + _matchedTermWeight(0), + _matchedTermSignificance(0), + _totalWeight(0), + _normalizedWeightedWeight(0), + _weightSum(0), + _valueCount(0) +{ + _buffer.allocate(_params.attribute->getMaxValueCount()); + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + QueryTerm qt = QueryTermFactory::create(env, i); + _totalTermWeight += qt.termData()->getWeight().percent(); + _totalTermSignificance += qt.significance(); + + if (hasAttribute(env, *qt.termData())) { + _numAttrTerms++; + _totalAttrTermWeight += qt.termData()->getWeight().percent(); + const ITermFieldData *field = qt.termData()->lookupField(_params.attrInfo->id()); + if (field != 0) { + qt.fieldHandle(field->getHandle()); + _queryTerms.push_back(qt); + } + } + } + LOG(debug, "attributeMatch(%s): weightedSet(%s), numAttributeTerms(%u), totalAttrTermWeight(%u), numTerms(%u), " + "totalTermWeight(%u), totalTermSignificance(%f)", + _params.attrInfo->name().c_str(), _params.weightedSet ? "true" : "false", + _numAttrTerms, _totalAttrTermWeight, getNumTerms(), _totalTermWeight, _totalTermSignificance); +} + +template +void +AttributeMatchExecutor::Computer::reset() +{ + _matches = 0; + _matchedTermWeight = 0, + _matchedTermSignificance = 0, + _totalWeight = 0; + _normalizedWeightedWeight = 0; + _weightSum = 0; + _valueCount = 0; +} + +template +void +AttributeMatchExecutor::Computer::run(MatchData & match) +{ + for (size_t i = 0; i < _queryTerms.size(); ++i) { + const ITermData * td = _queryTerms[i].termData(); + feature_t significance = _queryTerms[i].significance(); + const TermFieldMatchData *tfmd = match.resolveTermField(_queryTerms[i].fieldHandle()); + if (tfmd->getDocId() == match.getDocId()) { // hit on this document + _matches++; + _matchedTermWeight += td->getWeight().percent(); + _matchedTermSignificance += significance; + if (_params.weightedSet) { + int32_t weight = tfmd->getWeight(); + _totalWeight += weight; + // attribute weight * query term weight + _normalizedWeightedWeight += weight * static_cast(td->getWeight().percent()); + } + } + } + if (_params.weightedSet) { + _buffer.fill(*_params.attribute, match.getDocId()); + for (uint32_t i = 0; i < _buffer.size(); ++i) { + _weightSum += _buffer[i].getWeight(); + } + } else { + _valueCount = _params.attribute->getValueCount(match.getDocId()); + } + + LOG(debug, "attributeMatch(%s)::Computer::run(): matches(%u), totalWeight(%d), normalizedWeightedWeight(%f), " + "weightSum(%d), valueCount(%u), matchedTermWeight(%u), matchedTermSignificance(%f)", + _params.attrInfo->name().c_str(), _matches, _totalWeight, _normalizedWeightedWeight, + _weightSum, _valueCount, _matchedTermWeight, _matchedTermSignificance); +} + +template +feature_t +AttributeMatchExecutor::Computer::getAverageWeight() const +{ + if (_matches != 0) { + return (_totalWeight / static_cast(_matches)); + } + return 0; +} + +template +feature_t +AttributeMatchExecutor::Computer::getQueryCompleteness() const +{ + if (getNumTerms() != 0) { + return (_matches / static_cast(getNumTerms())); + } + return 0; +} + +template +feature_t +AttributeMatchExecutor::Computer::getNormalizedWeight() const +{ + if (_params.weightedSet) { + feature_t normalizedWeight = _totalWeight > 0 ? _totalWeight / ((feature_t)_params.maxWeight * _numAttrTerms) : 0.0f; + return adjustToOne(normalizedWeight); + } + return 0; +} + +template +feature_t +AttributeMatchExecutor::Computer::getNormalizedWeightedWeight() const +{ + if (_params.weightedSet) { + feature_t divider = _totalAttrTermWeight > 0 ? ((feature_t)_params.maxWeight * _totalAttrTermWeight) : _params.maxWeight; + feature_t normalized = _normalizedWeightedWeight > 0 ? _normalizedWeightedWeight / divider : 0.0f; + return adjustToOne(normalized); + } + return 0; +} + +template +feature_t +AttributeMatchExecutor::Computer::getFieldCompleteness() const +{ + if (_params.weightedSet) { + if (_totalWeight <= 0) { + return 0; + } else if (_weightSum <= 0) { + return 1; + } else { + feature_t fieldCompleteness = (_totalWeight / static_cast(_weightSum)); + return adjustToOne(fieldCompleteness); + } + } else { + if (_valueCount > 0) { + feature_t fieldCompleteness = _matches / static_cast(_valueCount); + return adjustToOne(fieldCompleteness); + } else { + return 0; + } + } +} + +template +feature_t +AttributeMatchExecutor::Computer::getCompleteness() const +{ + return (getQueryCompleteness() * ( 1.0f - _params.fieldCompletenessImportance + + (_params.fieldCompletenessImportance * getFieldCompleteness()) )); +} + +template +feature_t +AttributeMatchExecutor::Computer::getWeight() const +{ + if (_totalTermWeight > 0) { + return (feature_t)_matchedTermWeight / _totalTermWeight; + } + return 0; +} + +template +feature_t +AttributeMatchExecutor::Computer::getSignificance() const +{ + if (_totalTermSignificance > 0) { + return (feature_t)_matchedTermSignificance / _totalTermSignificance; + } + return 0; +} + +template +AttributeMatchExecutor::AttributeMatchExecutor(const IQueryEnvironment & env, AttributeMatchParams params) : + FeatureExecutor(), + _cmp(env, params) +{ +} + + +template +void +AttributeMatchExecutor::execute(MatchData & match) +{ + //LOG(debug, "Execute for field '%s':", _params.attrInfo->name().c_str()); + _cmp.reset(); + _cmp.run(match); + + *match.resolveFeature(outputs()[0]) = _cmp.getCompleteness(); + *match.resolveFeature(outputs()[1]) = _cmp.getQueryCompleteness(); + *match.resolveFeature(outputs()[2]) = _cmp.getFieldCompleteness(); + *match.resolveFeature(outputs()[3]) = _cmp.getNormalizedWeight(); + *match.resolveFeature(outputs()[4]) = _cmp.getNormalizedWeightedWeight(); + *match.resolveFeature(outputs()[5]) = _cmp.getWeight(); + *match.resolveFeature(outputs()[6]) = _cmp.getSignificance(); + *match.resolveFeature(outputs()[7]) = _cmp.getImportance(); + *match.resolveFeature(outputs()[8]) = static_cast(_cmp.getMatches()); + *match.resolveFeature(outputs()[9]) = static_cast(_cmp.getTotalWeight()); + *match.resolveFeature(outputs()[10]) = _cmp.getAverageWeight(); +} + + +AttributeMatchBlueprint::AttributeMatchBlueprint() : + Blueprint("attributeMatch"), + _params() +{ + // empty +} + +void +AttributeMatchBlueprint::visitDumpFeatures(const IIndexEnvironment &env, + IDumpFeatureVisitor &visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const FieldInfo * field = env.getField(i); + if (field->type() == FieldType::ATTRIBUTE) { + FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field->name()); + visitor.visitDumpFeature(fnb.buildName()); + visitor.visitDumpFeature(fnb.output("completeness").buildName()); + visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName()); + visitor.visitDumpFeature(fnb.output("fieldCompleteness").buildName()); + visitor.visitDumpFeature(fnb.output("normalizedWeight").buildName()); + visitor.visitDumpFeature(fnb.output("normalizedWeightedWeight").buildName()); + visitor.visitDumpFeature(fnb.output("weight").buildName()); + visitor.visitDumpFeature(fnb.output("significance").buildName()); + visitor.visitDumpFeature(fnb.output("importance").buildName()); + visitor.visitDumpFeature(fnb.output("matches").buildName()); + visitor.visitDumpFeature(fnb.output("totalWeight").buildName()); + visitor.visitDumpFeature(fnb.output("averageWeight").buildName()); + } + } +} + +Blueprint::UP +AttributeMatchBlueprint::createInstance() const +{ + return Blueprint::UP(new AttributeMatchBlueprint()); +} + +bool +AttributeMatchBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + // params[0] = attribute name + _params.attrInfo = params[0].asField(); + _params.maxWeight = util::strToNum(env.getProperties().lookup(getName(), "maxWeight").get("256")); + _params.fieldCompletenessImportance = + util::strToNum(env.getProperties().lookup(getName(), "fieldCompletenessImportance").get("0.05")); + + // normalized + describeOutput("completeness", "The normalized total completeness, where field completeness is more important"); + describeOutput("queryCompleteness", "The query completeness for this attribute: matches/the number of query terms searching this attribute"); + describeOutput("fieldCompleteness", "The normalized ratio of query tokens which was matched in the field"); + describeOutput("normalizedWeight", "A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to the maxWeight configuration value), 0 otherwise"); + describeOutput("normalizedWeightedWeight", "A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to the maxWeight configuration value), and where highly weighted query terms has more impact, 0 otherwise"); + // normalized and relative to the whole query + describeOutput("weight", "The normalized weight of this match relative to the whole query"); + describeOutput("significance", "Returns the normalized term significance of the terms of this match relative to the whole query"); + describeOutput("importance", "Returns the average of significance and weight"); + + // not normalized + describeOutput("matches", "The number of query terms which was matched in this attribute"); + describeOutput("totalWeight", "The sum of the weights of the attribute keys matched in a weighted set attribute"); + describeOutput("averageWeight", "totalWeight/matches"); + + env.hintAttributeAccess(_params.attrInfo->name()); + return true; +} + +FeatureExecutor::LP +AttributeMatchBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_params.attrInfo->name()); + if (attribute == NULL) { + LOG(error, "The attribute vector '%s' was not found in the attribute manager.", _params.attrInfo->name().c_str()); + return FeatureExecutor::LP(NULL); + } + + AttributeMatchParams amp = _params; + amp.attribute = attribute; + amp.weightedSet = attribute->getCollectionType() == attribute::CollectionType::WSET; + + if (attribute->isStringType()) { + return FeatureExecutor::LP + (new AttributeMatchExecutor(env, amp)); + } else if (attribute->isIntegerType()) { + return FeatureExecutor::LP + (new AttributeMatchExecutor(env, amp)); + } else { // FLOAT + return FeatureExecutor::LP + (new AttributeMatchExecutor(env, amp)); + } +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/attributematchfeature.h b/searchlib/src/vespa/searchlib/features/attributematchfeature.h new file mode 100644 index 00000000000..391b92ce2b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/attributematchfeature.h @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "queryterm.h" + +namespace search { +namespace features { + +struct AttributeMatchParams { + AttributeMatchParams() : + attrInfo(NULL), attribute(NULL), weightedSet(false), maxWeight(256), fieldCompletenessImportance(0.05f) {} + const search::fef::FieldInfo * attrInfo; + const search::attribute::IAttributeVector * attribute; + bool weightedSet; + // config values + int32_t maxWeight; + feature_t fieldCompletenessImportance; +}; + +/** + * Implements the executor for the attribute match feature. + */ +template +class AttributeMatchExecutor : public search::fef::FeatureExecutor { +private: + /** + * This class is used to compute metrics for match in an attribute vector. + */ + class Computer { + private: + // TermData pointer and significance + AttributeMatchParams _params; + mutable T _buffer; // used when fetching weights from a weighted set attribute + + // per query + uint32_t _numAttrTerms; + uint32_t _totalTermWeight; // total weight of all terms + feature_t _totalTermSignificance; // total significance of all terms + uint32_t _totalAttrTermWeight; // weight of all attribute terms + QueryTermVector _queryTerms; // the terms searching this attribute + + // per doc + uint32_t _matches; + uint32_t _matchedTermWeight; // term weight of matched terms + feature_t _matchedTermSignificance; // significance of matched terms + int32_t _totalWeight; + feature_t _normalizedWeightedWeight; + int32_t _weightSum; // sum of the weights for a weighted set attribute + uint32_t _valueCount; // the number of values for a non-weighted set attribute + + public: + Computer(const search::fef::IQueryEnvironment & env, + AttributeMatchParams params); + void run(search::fef::MatchData & data); + void reset(); + uint32_t getNumTerms() const { return _queryTerms.size(); } + uint32_t getMatches() const { return _matches; } + int32_t getTotalWeight() const { return _totalWeight; } + feature_t getAverageWeight() const; + feature_t getQueryCompleteness() const; + feature_t getNormalizedWeight() const; + feature_t getNormalizedWeightedWeight() const; + feature_t getFieldCompleteness() const; + feature_t getCompleteness() const; + feature_t getWeight() const; + feature_t getSignificance() const; + feature_t getImportance() const { return (getWeight() + getSignificance()) * 0.5; } + }; + + Computer _cmp; + +public: + /** + * Constructs an executor. + */ + AttributeMatchExecutor(const search::fef::IQueryEnvironment & env, + AttributeMatchParams params); + + // Inherit doc from FeatureExecutor. + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the attribute match executor. + */ +class AttributeMatchBlueprint : public search::fef::Blueprint { +private: + AttributeMatchParams _params; + +public: + /** + * Constructs a blueprint. + */ + AttributeMatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().attributeField(search::fef::ParameterCollection::ANY); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.cpp b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp new file mode 100644 index 00000000000..f6b289bfd16 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.closenessfeature"); +#include +#include "closenessfeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +ClosenessExecutor::ClosenessExecutor(feature_t maxDistance, feature_t scaleDistance) : + FeatureExecutor(), + _maxDistance(maxDistance), + _logCalc(maxDistance, scaleDistance) +{ +} + +void +ClosenessExecutor::execute(MatchData & match) +{ + feature_t distance = *match.resolveFeature(inputs()[0]); + feature_t closeness = std::max(1 - (distance / _maxDistance), (feature_t)0); + *match.resolveFeature(outputs()[0]) = closeness; + *match.resolveFeature(outputs()[1]) = _logCalc.get(distance); +} + + +// Polar Earth radius r = 6356.8 km +// Polar Earth diameter = 2 * pi * r = 39940.952 km +// 1 diameter = 39940.952 km = 360 degrees = 360 * 1000000 microdegrees +// -> 1 km = 9013.30536007 microdegrees + +ClosenessBlueprint::ClosenessBlueprint() : + Blueprint("closeness"), + _maxDistance(9013305.0), // default value (about 250 km) + _scaleDistance(5.0*9013.305), // default value (about 5 km) + _halfResponse(1) +{ +} + +void +ClosenessBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +bool +ClosenessBlueprint::setup(const IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + // params[0] = attribute name + Property p = env.getProperties().lookup(getName(), "maxDistance"); + if (p.found()) { + _maxDistance = util::strToNum(p.get()); + } + p = env.getProperties().lookup(getName(), "halfResponse"); + bool useHalfResponse = false; + if (p.found()) { + _halfResponse = util::strToNum(p.get()); + useHalfResponse = true; + } + // sanity checks: + if (_maxDistance < 1) { + LOG(warning, "Invalid %s.maxDistance = %g, using 1.0", + getName().c_str(), (double)_maxDistance); + _maxDistance = 1.0; + } + if (_halfResponse < 1) { + LOG(warning, "Invalid %s.halfResponse = %g, using 1.0", + getName().c_str(), (double)_halfResponse); + _halfResponse = 1.0; + } + if (_halfResponse >= _maxDistance / 2) { + feature_t newResponse = (_maxDistance / 2) - 1; + LOG(warning, "Invalid %s.halfResponse = %g, using %g ((%s.maxDistance / 2) - 1)", + getName().c_str(), (double)_halfResponse, (double)newResponse, getName().c_str()); + _halfResponse = newResponse; + } + + if (useHalfResponse) { + _scaleDistance = LogarithmCalculator::getScale(_halfResponse, _maxDistance); + } + + + defineInput("distance(" + params[0].getValue() + ")"); + describeOutput("out", "The closeness of the document (linear)"); + describeOutput("logscale", "The closeness of the document (logarithmic shape)"); + + return true; +} + +Blueprint::UP +ClosenessBlueprint::createInstance() const +{ + return Blueprint::UP(new ClosenessBlueprint()); +} + +FeatureExecutor::LP +ClosenessBlueprint::createExecutor(const IQueryEnvironment &) const +{ + return FeatureExecutor::LP(new ClosenessExecutor(_maxDistance, _scaleDistance)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/closenessfeature.h b/searchlib/src/vespa/searchlib/features/closenessfeature.h new file mode 100644 index 00000000000..c86196f1e29 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/closenessfeature.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "logarithmcalculator.h" + +namespace search { +namespace features { + +/** + * Implements the executor for the closeness feature. + */ +class ClosenessExecutor : public search::fef::FeatureExecutor { +private: + feature_t _maxDistance; + LogarithmCalculator _logCalc; + +public: + /** + * Constructs an executor. + */ + ClosenessExecutor(feature_t maxDistance, feature_t scaleDistance); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the closeness executor. + */ +class ClosenessBlueprint : public search::fef::Blueprint { +private: + feature_t _maxDistance; + feature_t _scaleDistance; + feature_t _halfResponse; + +public: + /** + * Constructs a blueprint. + */ + ClosenessBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h b/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h new file mode 100644 index 00000000000..11b875df96b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/constant_tensor_executor.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Feature executor that returns a constant tensor. + */ +class ConstantTensorExecutor : public fef::FeatureExecutor +{ +private: + const vespalib::eval::TensorValue::UP _tensor; + +public: + ConstantTensorExecutor(vespalib::eval::TensorValue::UP tensor) + : _tensor(std::move(tensor)) + {} + virtual bool isPure() override { return true; } + virtual void execute(fef::MatchData &data) override { + *data.resolve_object_feature(outputs()[0]) = *_tensor; + } + static fef::FeatureExecutor::LP create(vespalib::tensor::Tensor::UP tensor) { + return FeatureExecutor::LP(new ConstantTensorExecutor + (std::make_unique(std::move(tensor)))); + } + static fef::FeatureExecutor::LP createEmpty() { + // XXX: we should use numbers instead of empty tensors + vespalib::tensor::DefaultTensor::builder builder; + return FeatureExecutor::LP(new ConstantTensorExecutor + (std::make_unique + (builder.build()))); + } +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/create-class-cpp.sh b/searchlib/src/vespa/searchlib/features/create-class-cpp.sh new file mode 100755 index 00000000000..6ec335ffa3d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/create-class-cpp.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` +name=`echo $class | tr 'A-Z' 'a-z'` + +cat < +LOG_SETUP(".$name"); +#include +#include "$name.h" + +namespace search { +namespace features { + +$class::$class() +{ +} + +$class::~$class() +{ +} + +} // namespace features +} // namespace search +EOF diff --git a/searchlib/src/vespa/searchlib/features/create-class-h.sh b/searchlib/src/vespa/searchlib/features/create-class-h.sh new file mode 100644 index 00000000000..b8236f60d46 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/create-class-h.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` + +cat < +#include +LOG_SETUP(".features.debug_wait"); +#include "debug_attribute_wait.h" + +using search::attribute::IAttributeVector; + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +DebugAttributeWaitExecutor::DebugAttributeWaitExecutor(const search::fef::IQueryEnvironment &env, + const IAttributeVector *attribute, + const DebugAttributeWaitParams ¶ms) + : _attribute(attribute), + _buf(), + _params(params) +{ + (void)env; +} + +void +DebugAttributeWaitExecutor::execute(search::fef::MatchData &data) +{ + double waitTime = 0.0; + FastOS_Time time; + time.SetNow(); + + if (_attribute != NULL) { + _buf.fill(*_attribute, data.getDocId()); + waitTime = _buf[0]; + } + double millis = waitTime * 1000.0; + + while (time.MilliSecsToNow() < millis) { + if (_params.busyWait) { + for (int i = 0; i < 1000; i++) + ; + } else { + int rem = (int)(millis - time.MilliSecsToNow()); + FastOS_Thread::Sleep(rem); + } + } + *data.resolveFeature(outputs()[0]) = 1.0e-6 * time.MicroSecsToNow(); +} + +//----------------------------------------------------------------------------- + +DebugAttributeWaitBlueprint::DebugAttributeWaitBlueprint() + : Blueprint("debugAttributeWait"), + _params() +{ +} + +void +DebugAttributeWaitBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + (void)env; + (void)visitor; +} + +search::fef::Blueprint::UP +DebugAttributeWaitBlueprint::createInstance() const +{ + return Blueprint::UP(new DebugAttributeWaitBlueprint()); +} + +bool +DebugAttributeWaitBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + (void)env; + _attribute = params[0].getValue(); + _params.busyWait = (params[1].asDouble() == 1.0); + + describeOutput("out", "actual time waited"); + env.hintAttributeAccess(_attribute); + return true; +} + +search::fef::FeatureExecutor::LP +DebugAttributeWaitBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + // Get attribute vector + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attribute); + return search::fef::FeatureExecutor::LP(new DebugAttributeWaitExecutor(env, attribute, _params)); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h new file mode 100644 index 00000000000..d7043ce9f30 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/debug_attribute_wait.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +struct DebugAttributeWaitParams { + bool busyWait; +}; + +//----------------------------------------------------------------------------- + +class DebugAttributeWaitExecutor : public search::fef::FeatureExecutor +{ +private: + const search::attribute::IAttributeVector *_attribute; + search::attribute::FloatContent _buf; + DebugAttributeWaitParams _params; + +public: + DebugAttributeWaitExecutor(const search::fef::IQueryEnvironment &env, + const search::attribute::IAttributeVector * + attribute, + const DebugAttributeWaitParams ¶ms); + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class DebugAttributeWaitBlueprint : public search::fef::Blueprint +{ +private: + vespalib::string _attribute; + DebugAttributeWaitParams _params; + +public: + DebugAttributeWaitBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY).number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/debug_wait.cpp b/searchlib/src/vespa/searchlib/features/debug_wait.cpp new file mode 100644 index 00000000000..58fb7925a74 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/debug_wait.cpp @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.debug_wait"); +#include "debug_wait.h" + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +DebugWaitExecutor::DebugWaitExecutor(const search::fef::IQueryEnvironment &env, + const DebugWaitParams ¶ms) + : _params(params) +{ + (void)env; +} + +void +DebugWaitExecutor::execute(search::fef::MatchData &data) +{ + FastOS_Time time; + time.SetNow(); + double millis = _params.waitTime * 1000.0; + + while (time.MilliSecsToNow() < millis) { + if (_params.busyWait) { + for (int i = 0; i < 1000; i++) + ; + } else { + int rem = (int)(millis - time.MilliSecsToNow()); + FastOS_Thread::Sleep(rem); + } + } + *data.resolveFeature(outputs()[0]) = 1.0e-6 * time.MicroSecsToNow(); +} + +//----------------------------------------------------------------------------- + +DebugWaitBlueprint::DebugWaitBlueprint() + : Blueprint("debugWait"), + _params() +{ +} + +void +DebugWaitBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + (void)env; + (void)visitor; +} + +search::fef::Blueprint::UP +DebugWaitBlueprint::createInstance() const +{ + return Blueprint::UP(new DebugWaitBlueprint()); +} + +bool +DebugWaitBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + (void)env; + _params.waitTime = params[0].asDouble(); + _params.busyWait = (params[1].asDouble() == 1.0); + + describeOutput("out", "actual time waited"); + return true; +} + +search::fef::FeatureExecutor::LP +DebugWaitBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new DebugWaitExecutor(env, _params)); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/debug_wait.h b/searchlib/src/vespa/searchlib/features/debug_wait.h new file mode 100644 index 00000000000..69c7612381b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/debug_wait.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +struct DebugWaitParams { + double waitTime; + bool busyWait; +}; + +//----------------------------------------------------------------------------- + +class DebugWaitExecutor : public search::fef::FeatureExecutor +{ +private: + DebugWaitParams _params; + +public: + DebugWaitExecutor(const search::fef::IQueryEnvironment &env, + const DebugWaitParams ¶ms); + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class DebugWaitBlueprint : public search::fef::Blueprint +{ +private: + DebugWaitParams _params; + +public: + DebugWaitBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().number().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.cpp b/searchlib/src/vespa/searchlib/features/distancefeature.cpp new file mode 100644 index 00000000000..2002729b049 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/distancefeature.cpp @@ -0,0 +1,148 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.distancefeature"); +#include +#include +#include +#include +#include +#include +#include "distancefeature.h" + + +using namespace search::fef; + +namespace search { +namespace features { + +feature_t +DistanceExecutor::calculateDistance(uint32_t docId) +{ + if (_location.isValid() && _pos != NULL) { + return calculate2DZDistance(docId); + } + return DEFAULT_DISTANCE; +} + + +feature_t +DistanceExecutor::calculate2DZDistance(uint32_t docId) +{ + _intBuf.fill(*_pos, docId); + uint32_t numValues = _intBuf.size(); + uint64_t sqabsdist = std::numeric_limits::max(); + int32_t docx = 0; + int32_t docy = 0; + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::geo::ZCurve::decode(_intBuf[i], &docx, &docy); + uint32_t dx; + uint32_t dy; + if (_location.getXPosition() > docx) { + dx = _location.getXPosition() - docx; + } else { + dx = docx - _location.getXPosition(); + } + if (_location.getXAspect() != 0) { + dx = ((uint64_t) dx * _location.getXAspect()) >> 32; + } + if (_location.getYPosition() > docy) { + dy = _location.getYPosition() - docy; + } else { + dy = docy - _location.getYPosition(); + } + uint64_t sqdist = (uint64_t) dx * dx + (uint64_t) dy * dy; + if (sqdist < sqabsdist) { + sqabsdist = sqdist; + } + } + return static_cast(sqrt(static_cast(sqabsdist))); +} + +DistanceExecutor::DistanceExecutor(const Location & location, + const search::attribute::IAttributeVector * pos) : + FeatureExecutor(), + _location(location), + _pos(pos), + _intBuf() +{ + if (_pos != NULL) { + _intBuf.allocate(_pos->getMaxValueCount()); + } +} + +void +DistanceExecutor::execute(MatchData & match) +{ + *match.resolveFeature(outputs()[0]) = calculateDistance(match.getDocId()); +} + +const feature_t DistanceExecutor::DEFAULT_DISTANCE(6400000000.0); + + +DistanceBlueprint::DistanceBlueprint() : + Blueprint("distance"), + _posAttr() +{ +} + +void +DistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +DistanceBlueprint::createInstance() const +{ + return Blueprint::UP(new DistanceBlueprint()); +} + +bool +DistanceBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _posAttr = params[0].getValue(); + describeOutput("out", "The euclidian distance from the query position."); + env.hintAttributeAccess(_posAttr); + env.hintAttributeAccess(document::PositionDataType::getZCurveFieldName(_posAttr)); + return true; +} + +FeatureExecutor::LP +DistanceBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + const search::attribute::IAttributeVector * pos = NULL; + const Location & location = env.getLocation(); + LOG(debug, "DistanceBlueprint::createExecutor location.valid='%s', '%s', alternatively '%s'", + location.isValid() ? "true" : "false", _posAttr.c_str(), document::PositionDataType::getZCurveFieldName(_posAttr).c_str()); + if (location.isValid()) { + pos = env.getAttributeContext().getAttribute(_posAttr); + if (pos == NULL) { + LOG(debug, "Failed to find attribute '%s', resorting too '%s'", + _posAttr.c_str(), document::PositionDataType::getZCurveFieldName(_posAttr).c_str()); + pos = env.getAttributeContext().getAttribute(document::PositionDataType::getZCurveFieldName(_posAttr)); + } + if (pos != NULL) { + if (!pos->isIntegerType()) { + LOG(warning, "The position attribute '%s' is not an integer attribute. Will use default distance.", + pos->getName().c_str()); + pos = NULL; + } else if (pos->getCollectionType() == attribute::CollectionType::WSET) { + LOG(warning, "The position attribute '%s' is a weighted set attribute. Will use default distance.", + pos->getName().c_str()); + pos = NULL; + } + } else { + LOG(warning, "The position attribute '%s' was not found. Will use default distance.", _posAttr.c_str()); + } + } + + return FeatureExecutor::LP(new DistanceExecutor(location, pos)); +} + + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/distancefeature.h b/searchlib/src/vespa/searchlib/features/distancefeature.h new file mode 100644 index 00000000000..bf9d4cb54da --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/distancefeature.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the distance feature. + */ +class DistanceExecutor : public search::fef::FeatureExecutor { +private: + const search::fef::Location & _location; + const search::attribute::IAttributeVector * _pos; + search::attribute::IntegerContent _intBuf; + + feature_t calculateDistance(uint32_t docId); + feature_t calculate2DZDistance(uint32_t docId); + +public: + /** + * Constructs an executor for the distance feature. + * + * @param location the location object associated with the query environment. + * @param pos the attribute to use for positions (expects zcurve encoding). + */ + DistanceExecutor(const search::fef::Location & location, + const search::attribute::IAttributeVector * pos); + virtual void execute(search::fef::MatchData & data); + + static const feature_t DEFAULT_DISTANCE; +}; + +/** + * Implements the blueprint for the distance executor. + */ +class DistanceBlueprint : public search::fef::Blueprint { +private: + vespalib::string _posAttr; + +public: + /** + * Constructs a blueprint for the distance executor. + */ + DistanceBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp new file mode 100644 index 00000000000..05d17c33a79 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.cpp @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.distancetopathfeature"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "distancetopathfeature.h" +#include "utils.h" + +namespace search { +namespace features { + +const feature_t DistanceToPathExecutor::DEFAULT_DISTANCE(6400000000.0); + +DistanceToPathExecutor::DistanceToPathExecutor(std::vector &path, + const search::attribute::IAttributeVector *pos) : + search::fef::FeatureExecutor(), + _intBuf(), + _path(), + _pos(pos) +{ + if (_pos != NULL) { + _intBuf.allocate(_pos->getMaxValueCount()); + } + _path.swap(path); // avoid copy +} + +void +DistanceToPathExecutor::execute(search::fef::MatchData & match) +{ + if (_path.size() > 1 && _pos != NULL) { + double pos = -1, trip = 0, product = 0; + double minSqDist = std::numeric_limits::max(); + _intBuf.fill(*_pos, match.getDocId()); + + // For each line segment, do + for (uint32_t seg = 1; seg < _path.size(); ++seg) { + const Vector2 &p1 = _path[seg - 1]; + const Vector2 &p2 = _path[seg]; + double len2 = (p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y); + double len = sqrt(len2); + + // For each document location, do + for (uint32_t loc = 0; loc < _intBuf.size(); ++loc) { + int32_t x = 0, y = 0; + vespalib::geo::ZCurve::decode(_intBuf[loc], &x, &y); + + double u = 0, dx, dy; + if (len < 1e-6) { + dx = p1.x - x; // process as point + dy = p1.y - y; + } else { + u = std::min(1.0, std::max(0.0, (((x - p1.x) * (p2.x - p1.x)) + ((y - p1.y) * (p2.y - p1.y))) / len2)); + if (u == 0) { + dx = p1.x - x; // intersection before segment + dy = p1.y - y; + } else if (u == 1) { + dx = p2.x - x; // intersection after segment + dy = p2.y - y; + } else { + dx = p1.x + u * (p2.x - p1.x) - x; + dy = p1.y + u * (p2.y - p1.y) - y; + } + } + + double sqDist = dx * dx + dy * dy; + if (sqDist < minSqDist) { + minSqDist = sqDist; + pos = trip + u * len; + product = (p2.x - p1.x) * dy - (p2.y - p1.y) * dx; + } + } + trip += len; + } + + *match.resolveFeature(outputs()[0]) = static_cast(sqrt(static_cast(minSqDist))); + *match.resolveFeature(outputs()[1]) = static_cast(pos > -1 ? (trip > 0 ? pos / trip : 0) : 1); + *match.resolveFeature(outputs()[2]) = static_cast(product); + } else { + *match.resolveFeature(outputs()[0]) = DEFAULT_DISTANCE; + *match.resolveFeature(outputs()[1]) = 1; + *match.resolveFeature(outputs()[2]) = 0; + } +} + +DistanceToPathBlueprint::DistanceToPathBlueprint() : + Blueprint("distanceToPath"), + _posAttr() +{ + // empty +} + +void +DistanceToPathBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +search::fef::Blueprint::UP +DistanceToPathBlueprint::createInstance() const +{ + return Blueprint::UP(new DistanceToPathBlueprint()); +} + +bool +DistanceToPathBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params) +{ + _posAttr = params[0].getValue(); + describeOutput("distance", "The euclidian distance from the query path."); + describeOutput("traveled", "The normalized distance traveled along the path before intersection."); + describeOutput("product", "The cross-product of the intersecting line segment and the intersection-to-document vector."); + env.hintAttributeAccess(_posAttr); + env.hintAttributeAccess(document::PositionDataType::getZCurveFieldName(_posAttr)); + return true; +} + +search::fef::FeatureExecutor::LP +DistanceToPathBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + // Retrieve path from query using the name of this and "path" as property. + std::vector path; + search::fef::Property pro = env.getProperties().lookup(getName(), "path"); + if (pro.found()) { + vespalib::string str = pro.getAt(0); + uint32_t len = str.size(); + if (str[0] == '(' && len > 1 && str[len - 1] == ')') { + str = str.substr(1, len - 1); // remove braces + std::vector arr; + boost::split(arr, str, boost::is_any_of(",")); + len = arr.size() - 1; + for (uint32_t i = 0; i < len; i += 2) { + double x = util::strToNum(arr[i]); + double y = util::strToNum(arr[i + 1]); + path.push_back(Vector2(x, y)); + } + } + } + + // Lookup the attribute vector that holds document positions. + const search::attribute::IAttributeVector *pos = NULL; + if (path.size() > 1) { + pos = env.getAttributeContext().getAttribute(_posAttr); + if (pos == NULL) { + pos = env.getAttributeContext().getAttribute(document::PositionDataType::getZCurveFieldName(_posAttr)); + } + if (pos != NULL) { + if (!pos->isIntegerType()) { + LOG(warning, "The position attribute '%s' is not an integer attribute. Will use default distance.", + pos->getName().c_str()); + pos = NULL; + } else if (pos->getCollectionType() == attribute::CollectionType::WSET) { + LOG(warning, "The position attribute '%s' is a weighted set attribute. Will use default distance.", + pos->getName().c_str()); + pos = NULL; + } + } else { + LOG(warning, "The position attribute '%s' was not found. Will use default distance.", _posAttr.c_str()); + } + } else { + LOG(warning, "No path given in query. Will use default distance."); + } + + // Create and return a compatible executor. + return search::fef::FeatureExecutor::LP(new DistanceToPathExecutor(path, pos)); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/distancetopathfeature.h b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h new file mode 100644 index 00000000000..d82b55aef03 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/distancetopathfeature.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Define the point type that makes up the end-points in our path. + */ +struct Vector2 { + Vector2(double _x, double _y) : x(_x), y(_y) { } + double x, y; +}; + +/** + * Implements the executor for the distance to path feature. + */ +class DistanceToPathExecutor : public search::fef::FeatureExecutor { +private: + search::attribute::IntegerContent _intBuf; // Position value buffer. + std::vector _path; // Path given by query. + const search::attribute::IAttributeVector *_pos; // Position attribute. + +public: + /** + * Constructs an executor for the distance to path feature. + * + * @param path The path associated with the query environment. + * @param pos The attribute to use for positions (expects zcurve encoding). + */ + DistanceToPathExecutor(std::vector &path, + const search::attribute::IAttributeVector *pos); + virtual void execute(search::fef::MatchData & data); + + /** + * Defines a default distance value to use if a proper one can not be determined. + */ + static const feature_t DEFAULT_DISTANCE; +}; + +/** + * Implements the blueprint for the distance to path feature. + */ +class DistanceToPathBlueprint : public search::fef::Blueprint { +private: + vespalib::string _posAttr; // Name of the position attribute. + +public: + /** + * Constructs a blueprint for the distance to path feature. + */ + DistanceToPathBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp new file mode 100644 index 00000000000..51385a0b816 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.cpp @@ -0,0 +1,457 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.dotproduct"); +#include +#include +#include +#include + +#include "dotproductfeature.h" +#include "array_parser.hpp" +#include "utils.h" +#include "valuefeature.h" +#include "weighted_set_parser.hpp" +#include +#include + +using namespace search::attribute; +using namespace search::fef; +using vespalib::hwaccelrated::IAccelrated; + +namespace search { +namespace features { +namespace dotproduct { +namespace wset { + +template +DotProductExecutor::DotProductExecutor(const IAttributeVector * attribute, const Vector & vector) : + FeatureExecutor(), + _attribute(attribute), + _vector(vector), + _buffer() +{ + _buffer.allocate(_attribute->getMaxValueCount()); + _vector.syncMap(); +} + +template +void +DotProductExecutor::execute(MatchData & match) +{ + feature_t val = 0; + if (!_vector.getDimMap().empty()) { + _buffer.fill(*_attribute, match.getDocId()); + for (size_t i = 0; i < _buffer.size(); ++i) { + typename Vector::HashMap::const_iterator itr = _vector.getDimMap().find(_buffer[i].getValue()); + if (itr != _vector.getDimMap().end()) { + val += _buffer[i].getWeight() * itr->second; + } + } + } + *match.resolveFeature(outputs()[0]) = val; +} + +} + +namespace array { + +template +DotProductExecutor
::DotProductExecutor(const A * attribute, const V & vector) : + FeatureExecutor(), + _attribute(attribute), + _multiplier(IAccelrated::getAccelrator()), + _vector(vector) +{ +} + +template +size_t +DotProductExecutor::getAttributeValues(uint32_t docId, const AT * & values) +{ + return _attribute->getRawValues(docId, values); +} + +template +void +DotProductExecutor::execute(MatchData & match) +{ + const AT *values(NULL); + size_t count = getAttributeValues(match.getDocId(), values); + size_t commonRange = std::min(count, _vector.size()); + *match.resolveFeature(outputs()[0]) = _multiplier->dotProduct(&_vector[0], reinterpret_cast(values), commonRange); +} + +template +SparseDotProductExecutor::SparseDotProductExecutor(const A * attribute, const V & values, const IV & indexes) : + DotProductExecutor(attribute, values), + _indexes(indexes), + _scratch(std::max(static_cast(attribute->getMaxValueCount()), indexes.size())) +{ +} + +template +size_t +SparseDotProductExecutor::getAttributeValues(uint32_t docId, const AT * & values) +{ + const AT *allValues(NULL); + size_t count = this->_attribute->getRawValues(docId, allValues); + values = &_scratch[0]; + size_t i(0); + for (; (i < _indexes.size()) && (_indexes[i] < count); i++) { + _scratch[i] = allValues[_indexes[i]]; + } + return i; +} + +template +DotProductByCopyExecutor::DotProductByCopyExecutor(const A * attribute, const V & values) : + DotProductExecutor(attribute, values), + _copy(static_cast(attribute->getMaxValueCount())) +{ +} + +template +size_t +DotProductByCopyExecutor::getAttributeValues(uint32_t docId, const AT * & values) +{ + size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); + if (count > _copy.size()) { + _copy.resize(count); + count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); + } + values = reinterpret_cast(&_copy[0]); + return count; +} + +template +SparseDotProductByCopyExecutor::SparseDotProductByCopyExecutor(const A * attribute, const V & values, const IV & indexes) : + SparseDotProductExecutor(attribute, values, indexes), + _copy(std::max(static_cast(attribute->getMaxValueCount()), indexes.size())) +{ +} + +template +size_t +SparseDotProductByCopyExecutor::getAttributeValues(uint32_t docId, const AT * & values) +{ + size_t count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); + if (count > _copy.size()) { + _copy.resize(count); + count = this->_attribute->getAll(docId, &_copy[0], _copy.size()); + } + size_t i(0); + for (const IV & iv(this->_indexes); (i < iv.size()) && (iv[i] < count); i++) { + if (i != iv[i]) { + _copy[i] = _copy[iv[i]]; + } + } + values = reinterpret_cast(&_copy[0]); + return i; +} + +} + +} + + +DotProductBlueprint::DotProductBlueprint() : + Blueprint("dotProduct"), + _defaultAttribute(), + _queryVector() +{ +} + +vespalib::string +DotProductBlueprint::getAttribute(const IQueryEnvironment & env) const +{ + Property prop = env.getProperties().lookup(getBaseName(), _defaultAttribute + ".override.name"); + if (prop.found() && !prop.get().empty()) { + return prop.get(); + } + return _defaultAttribute; +} + +void +DotProductBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const +{ +} + +bool +DotProductBlueprint::setup(const IIndexEnvironment & env, const ParameterList & params) +{ + _defaultAttribute = params[0].getValue(); + _queryVector = params[1].getValue(); + describeOutput("scalar", "The result after calculating the dot product of the vector represented by the weighted set " + "and the vector sent down with the query"); + env.hintAttributeAccess(_defaultAttribute); + return true; +} + +Blueprint::UP +DotProductBlueprint::createInstance() const +{ + return Blueprint::UP(new DotProductBlueprint()); +} + +namespace { + +template +void +parseVectors(const Property & prop, std::vector & values, std::vector & indexes) +{ + typedef std::vector> SparseV; + SparseV sparse; + ArrayParser::parsePartial(prop.get(), sparse); + if ( ! sparse.empty()) { + std::sort(sparse.begin(), sparse.end()); + if ((sparse.back().getIndex()+1)/sparse.size() < 10) { + values.resize(sparse.back().getIndex()+1); + for(const typename SparseV::value_type & a : sparse) { + values[a.getIndex()] = a.getValue(); + } + } else { + values.reserve(sparse.size()); + indexes.reserve(sparse.size()); + for(const typename SparseV::value_type & a : sparse) { + values.push_back(a.getValue()); + indexes.push_back(a.getIndex()); + } + } + } +} + +template +FeatureExecutor::LP +create(const IAttributeVector * attribute, const Property & prop) +{ + std::vector values; + std::vector indexes; + parseVectors(prop, values, indexes); + if (values.empty()) { + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + } + const A & iattr = dynamic_cast(*attribute); + if (indexes.empty()) { + try { + const multivalue::Value * tmp; + iattr.getRawValues(0, tmp); + return FeatureExecutor::LP(new dotproduct::array::DotProductExecutor(&iattr, values)); + } catch (const std::runtime_error & e) { + (void) e; + return FeatureExecutor::LP(new dotproduct::array::DotProductByCopyExecutor(&iattr, values)); + } + } else { + try { + const multivalue::Value * tmp; + iattr.getRawValues(0, tmp); + return FeatureExecutor::LP(new dotproduct::array::SparseDotProductExecutor(&iattr, values, indexes)); + } catch (const std::runtime_error & e) { + (void) e; + return FeatureExecutor::LP(new dotproduct::array::SparseDotProductByCopyExecutor(&iattr, values, indexes)); + } + } + return FeatureExecutor::LP(new SingleZeroValueExecutor()); +} + +template +struct ArrayParam : public fef::Anything +{ + ArrayParam(const Property & prop) { + parseVectors(prop, values, indexes); + } + std::vector values; + std::vector indexes; +}; + +template +FeatureExecutor::LP +create(const IAttributeVector * attribute, const ArrayParam & arguments) +{ + if (arguments.values.empty()) { + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + } + const A & iattr = dynamic_cast(*attribute); + if (arguments.indexes.empty()) { + try { + const multivalue::Value * tmp; + iattr.getRawValues(0, tmp); + return FeatureExecutor::LP(new dotproduct::array::DotProductExecutor(&iattr, arguments.values)); + } catch (const std::runtime_error & e) { + (void) e; + return FeatureExecutor::LP(new dotproduct::array::DotProductByCopyExecutor(&iattr, arguments.values)); + } + } else { + try { + const multivalue::Value * tmp; + iattr.getRawValues(0, tmp); + return FeatureExecutor::LP(new dotproduct::array::SparseDotProductExecutor(&iattr, arguments.values, arguments.indexes)); + } catch (const std::runtime_error & e) { + (void) e; + return FeatureExecutor::LP(new dotproduct::array::SparseDotProductByCopyExecutor(&iattr, arguments.values, arguments.indexes)); + } + } + return FeatureExecutor::LP(new SingleZeroValueExecutor()); +} + +//const char * BINARY = "binary"; +const char * OBJECT = "object"; + + +FeatureExecutor::LP +createFromObject(const IAttributeVector * attribute, const fef::Anything & object) +{ + if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + return create>(attribute, dynamic_cast &>(object)); + case BasicType::INT64: + return create>(attribute, dynamic_cast &>(object)); + case BasicType::FLOAT: + return create>(attribute, dynamic_cast &>(object)); + case BasicType::DOUBLE: + return create>(attribute, dynamic_cast &>(object)); + default: + break; + } + } + // TODO: Add support for creating executor for weighted set string / integer attribute + // where the query vector is represented as an object instead of a string. + LOG(warning, "The attribute vector '%s' is NOT of type array" + ", returning executor with default value.", attribute->getName().c_str()); + return FeatureExecutor::LP(new SingleZeroValueExecutor()); +} + +FeatureExecutor::LP +createFromString(const IAttributeVector * attribute, const Property & prop) +{ + if (attribute->getCollectionType() == attribute::CollectionType::WSET) { + if (attribute->isStringType()) { + if (attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + return FeatureExecutor::LP + (new dotproduct::wset::DotProductExecutor(attribute, vector)); + } else { + dotproduct::wset::StringVector vector; + WeightedSetParser::parse(prop.get(), vector); + return FeatureExecutor::LP + (new dotproduct::wset::DotProductExecutor(attribute, vector)); + } + } else if (attribute->isIntegerType()) { + if (attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + return FeatureExecutor::LP + (new dotproduct::wset::DotProductExecutor(attribute, vector)); + + } else { + dotproduct::wset::IntegerVector vector; + WeightedSetParser::parse(prop.get(), vector); + return FeatureExecutor::LP + (new dotproduct::wset::DotProductExecutor(attribute, vector)); + } + } + } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + return create>(attribute, prop); + case BasicType::INT64: + return create>(attribute, prop); + case BasicType::FLOAT: + return create>(attribute, prop); + case BasicType::DOUBLE: + return create>(attribute, prop); + default: + break; + } + } + LOG(warning, "The attribute vector '%s' is not of type weighted set string/integer nor" + " array, returning executor with default value.", attribute->getName().c_str()); + return FeatureExecutor::LP(new SingleZeroValueExecutor()); +} + +} + +void +DotProductBlueprint::prepareSharedState(const IQueryEnvironment & env, IObjectStore & store) const +{ + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env)); + if (attribute != NULL) { + if ((attribute->getCollectionType() == attribute::CollectionType::WSET) && + attribute->hasEnum() && + (attribute->isStringType() || attribute->isIntegerType())) + { + attribute = env.getAttributeContext().getAttributeStableEnum(getAttribute(env)); + } + Property prop = env.getProperties().lookup(getBaseName(), _queryVector); + if (prop.found() && !prop.get().empty()) { + fef::Anything::UP arguments; + if (attribute->getCollectionType() == attribute::CollectionType::WSET) { + if (attribute->isStringType() && attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + } else if (attribute->isIntegerType()) { + if (attribute->hasEnum()) { + dotproduct::wset::EnumVector vector(attribute); + WeightedSetParser::parse(prop.get(), vector); + } else { + dotproduct::wset::IntegerVector vector; + WeightedSetParser::parse(prop.get(), vector); + } + } + } else if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { + switch (attribute->getBasicType()) { + case BasicType::INT32: + arguments.reset(new ArrayParam(prop)); + break; + case BasicType::INT64: + arguments.reset(new ArrayParam(prop)); + break; + case BasicType::FLOAT: + arguments.reset(new ArrayParam(prop)); + break; + case BasicType::DOUBLE: + arguments.reset(new ArrayParam(prop)); + break; + default: + break; + } + } + if ( arguments.get()) { + store.add(getBaseName() + "." + _queryVector + "." + OBJECT, std::move(arguments)); + } + } + } +} + +FeatureExecutor::LP +DotProductBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(getAttribute(env)); + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning executor with default value.", + getAttribute(env).c_str()); + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + } + if ((attribute->getCollectionType() == attribute::CollectionType::WSET) && + attribute->hasEnum() && + (attribute->isStringType() || attribute->isIntegerType())) + { + attribute = env.getAttributeContext().getAttributeStableEnum(getAttribute(env)); + } + const fef::Anything * argument = env.getObjectStore().get(getBaseName() + "." + _queryVector + "." + OBJECT); + if (argument != NULL) { + return createFromObject(attribute, *argument); + } else { + Property prop = env.getProperties().lookup(getBaseName(), _queryVector); + if (prop.found() && !prop.get().empty()) { + return createFromString(attribute, prop); + } + } + return FeatureExecutor::LP(new SingleZeroValueExecutor()); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/dotproductfeature.h b/searchlib/src/vespa/searchlib/features/dotproductfeature.h new file mode 100644 index 00000000000..6142914ae32 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/dotproductfeature.h @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "utils.h" + + +namespace search { +namespace features { + +namespace dotproduct { + +struct ConstCharComparator { + bool operator()(const char * lhs, const char * rhs) const { + return strcmp(lhs, rhs) == 0; + } +}; + +template +struct Converter { + Dst convert(const Src & value) const { return value; } +}; + +template <> +struct Converter { + const char * convert(const vespalib::string & value) const { return value.c_str(); } +}; + +namespace wset { + +template > +class VectorBase { +public: + typedef std::pair Element; // + typedef std::vector Vector; + typedef vespalib::hash_map, HashMapComparator> HashMap; +protected: + Vector _vector; + HashMap _dimMap; // dimension -> component +public: + const Vector & getVector() const { return _vector; } + void syncMap() { + Converter conv; + _dimMap.clear(); + _dimMap.resize(_vector.size()*2); + for (size_t i = 0; i < _vector.size(); ++i) { + _dimMap.insert(std::make_pair(conv.convert(_vector[i].first), _vector[i].second)); + } + } + const HashMap & getDimMap() const { return _dimMap; } +}; + +/** + * Represents a vector where the dimensions are integers. + **/ +class IntegerVector : public VectorBase { +public: + void insert(const vespalib::stringref & label, const vespalib::stringref & value) { + _vector.push_back(std::make_pair(util::strToNum(label), util::strToNum(value))); + } +}; + +/** + * Represents a vector where the dimensions are string values. + **/ +class StringVector : public VectorBase { +public: + void insert(const vespalib::stringref & label, const vespalib::stringref & value) { + _vector.push_back(std::make_pair(label, util::strToNum(value))); + } +}; + +/** + * Represents a vector where the dimensions are enum values for strings. + **/ +class EnumVector : public VectorBase { +private: + const search::attribute::IAttributeVector * _attribute; +public: + EnumVector(const search::attribute::IAttributeVector * attribute) : _attribute(attribute) {} + void insert(const vespalib::stringref & label, const vespalib::stringref & value) { + search::attribute::EnumHandle e; + if (_attribute->findEnum(label.c_str(), e)) { + _vector.push_back(std::make_pair(e, util::strToNum(value))); + } + } +}; + + +/** + * Implements the executor for the dotproduct feature. + */ +template +class DotProductExecutor : public fef::FeatureExecutor { +private: + const search::attribute::IAttributeVector * _attribute; + Vector _vector; + Buffer _buffer; + +public: + DotProductExecutor(const search::attribute::IAttributeVector * attribute, const Vector & vector); + virtual void execute(fef::MatchData & data); +}; + +} + +namespace array { + +/** + * Implements the executor for the dotproduct feature. + */ +template +class DotProductExecutor : public fef::FeatureExecutor { +public: + typedef multivalue::Value AT; + typedef std::vector V; +protected: + const A * _attribute; +private: + vespalib::hwaccelrated::IAccelrated::UP _multiplier; + V _vector; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count); +public: + DotProductExecutor(const A * attribute, const V & vector); + virtual void execute(fef::MatchData & data); +}; + +template +class DotProductByCopyExecutor : public DotProductExecutor { +public: + typedef typename DotProductExecutor::V V; + DotProductByCopyExecutor(const A * attribute, const V & vector); +private: + typedef typename DotProductExecutor::AT AT; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count); + std::vector _copy; +}; + +template +class SparseDotProductExecutor : public DotProductExecutor { +public: + typedef std::vector IV; + typedef typename DotProductExecutor::V V; + SparseDotProductExecutor(const A * attribute, const V & vector, const IV & indexes); +private: + typedef typename DotProductExecutor::AT AT; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count); +protected: + IV _indexes; + std::vector _scratch; +}; + +template +class SparseDotProductByCopyExecutor : public SparseDotProductExecutor { +public: + typedef std::vector IV; + typedef typename DotProductExecutor::V V; + SparseDotProductByCopyExecutor(const A * attribute, const V & vector, const IV & indexes); +private: + typedef typename DotProductExecutor::AT AT; + virtual size_t getAttributeValues(uint32_t docid, const AT * & count); + std::vector _copy; +}; + +} + +} + + +/** + * Implements the blueprint for the foreach executor. + */ +class DotProductBlueprint : public fef::Blueprint { +private: + vespalib::string _defaultAttribute; + vespalib::string _queryVector; + + vespalib::string getAttribute(const fef::IQueryEnvironment & env) const; + +public: + /** + * Constructs a blueprint. + */ + DotProductBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const fef::IIndexEnvironment & env, + fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual fef::ParameterDescriptions getDescriptions() const { + return fef::ParameterDescriptions().desc().attribute(fef::ParameterCollection::ANY).string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const fef::IIndexEnvironment & env, + const fef::ParameterList & params); + + virtual void prepareSharedState(const fef::IQueryEnvironment & queryEnv, fef::IObjectStore & objectStore) const; + + // Inherit doc from Blueprint. + virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const; + +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp b/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp new file mode 100644 index 00000000000..9b5945432e1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/element_completeness_feature.cpp @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.elementcompleteness"); +#include "element_completeness_feature.h" + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +ElementCompletenessExecutor::ElementCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const ElementCompletenessParams ¶ms) + : _params(params), + _terms(), + _queue(), + _sumTermWeight(0) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const search::fef::ITermData *termData = env.getTerm(i); + if (termData->getWeight().percent() != 0) { // only consider query terms with contribution + typedef search::fef::ITermFieldRangeAdapter FRA; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const search::fef::ITermFieldData &tfd = iter.get(); + if (tfd.getFieldId() == _params.fieldId) { + int termWeight = termData->getWeight().percent(); + _sumTermWeight += termWeight; + _terms.push_back(Term(tfd.getHandle(), termWeight)); + } + } + } + } +} + +void +ElementCompletenessExecutor::execute(search::fef::MatchData &data) +{ + assert(_queue.empty()); + for (size_t i = 0; i < _terms.size(); ++i) { + search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms[i].termHandle); + if (tfmd->getDocId() == data.getDocId()) { + Item item(i, tfmd->begin(), tfmd->end()); + if (item.pos != item.end) { + _queue.push(item); + } + } + } + State best(0, 0); + while (!_queue.empty()) { + uint32_t elementId = _queue.front().pos->getElementId(); + State state(_queue.front().pos->getElementWeight(), + _queue.front().pos->getElementLen()); + while (!_queue.empty() && _queue.front().pos->getElementId() == elementId) { + state.addMatch(_terms[_queue.front().termIdx].termWeight); + Item &item = _queue.front(); + while (item.pos != item.end && item.pos->getElementId() == elementId) { + ++item.pos; + } + if (item.pos == item.end) { + _queue.pop_front(); + } else { + _queue.adjust(); + } + } + state.calculateScore(_sumTermWeight, _params.fieldCompletenessImportance); + if (state.score > best.score) { + best = state; + } + } + *data.resolveFeature(outputs()[0]) = best.completeness; + *data.resolveFeature(outputs()[1]) = best.fieldCompleteness; + *data.resolveFeature(outputs()[2]) = best.queryCompleteness; + *data.resolveFeature(outputs()[3]) = best.elementWeight; +} + +//----------------------------------------------------------------------------- + +ElementCompletenessBlueprint::ElementCompletenessBlueprint() + : Blueprint("elementCompleteness"), + _output(), + _params() +{ + _output.push_back("completeness"); + _output.push_back("fieldCompleteness"); + _output.push_back("queryCompleteness"); + _output.push_back("elementWeight"); +} + +void +ElementCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo &field = *env.getField(i); + if (field.type() == search::fef::FieldType::INDEX) { + if (!field.isFilter()) { + search::fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field.name()); + for (size_t out = 0; out < _output.size(); ++out) { + visitor.visitDumpFeature(fnb.output(_output[out]).buildName()); + } + } + } + } +} + +search::fef::Blueprint::UP +ElementCompletenessBlueprint::createInstance() const +{ + return Blueprint::UP(new ElementCompletenessBlueprint()); +} + +bool +ElementCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + const search::fef::FieldInfo *field = params[0].asField(); + + _params.fieldId = field->id(); + const search::fef::Properties &lst = env.getProperties(); + search::fef::Property obj = lst.lookup(getName(), "fieldCompletenessImportance"); + if (obj.found()) { + _params.fieldCompletenessImportance = atof(obj.get().c_str()); + } + describeOutput(_output[0], "combined completeness for best scored element"); + describeOutput(_output[1], "best scored element completeness"); + describeOutput(_output[2], "query completeness for best scored element"); + describeOutput(_output[3], "element weight of best scored element"); + env.hintFieldAccess(field->id()); + return true; +} + +search::fef::FeatureExecutor::LP +ElementCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new ElementCompletenessExecutor(env, _params)); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/element_completeness_feature.h b/searchlib/src/vespa/searchlib/features/element_completeness_feature.h new file mode 100644 index 00000000000..b092fcd8fa1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/element_completeness_feature.h @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +struct ElementCompletenessParams { + uint32_t fieldId; + feature_t fieldCompletenessImportance; + ElementCompletenessParams() + : fieldId(search::fef::IllegalFieldId), + fieldCompletenessImportance(0.5) {} +}; + +//----------------------------------------------------------------------------- + +class ElementCompletenessExecutor : public search::fef::FeatureExecutor +{ +private: + struct Term { + search::fef::TermFieldHandle termHandle; + int termWeight; + Term(search::fef::TermFieldHandle handle, int weight) + : termHandle(handle), termWeight(weight) {} + }; + + struct Item { + uint32_t termIdx; + search::fef::TermFieldMatchData::PositionsIterator pos; + search::fef::TermFieldMatchData::PositionsIterator end; + Item(uint32_t idx, + search::fef::TermFieldMatchData::PositionsIterator p, + search::fef::TermFieldMatchData::PositionsIterator e) + : termIdx(idx), pos(p), end(e) {} + bool operator<(const Item &other) const { + return (pos->getElementId() < other.pos->getElementId()); + } + }; + + struct State { + int elementWeight; + uint32_t elementLength; + uint32_t matchedTerms; + int sumTermWeight; + double score; + feature_t completeness; + feature_t fieldCompleteness; + feature_t queryCompleteness; + + State(int weight, uint32_t length) + : elementWeight(weight), elementLength(length), + matchedTerms(0), sumTermWeight(0), + score(0.0), + completeness(0.0), fieldCompleteness(0.0), queryCompleteness(0.0) {} + + void addMatch(int termWeight) { + ++matchedTerms; + sumTermWeight += termWeight; + } + + void calculateScore(int totalTermWeight, double factor) { + double matches = std::min(elementLength, matchedTerms); + queryCompleteness = ((double)sumTermWeight / (double)totalTermWeight); + fieldCompleteness = (matches / (double)elementLength); + completeness = (fieldCompleteness * factor) + + (queryCompleteness * (1 - factor)); + score = completeness * (double)elementWeight; + } + }; + + ElementCompletenessParams _params; + std::vector _terms; + vespalib::PriorityQueue _queue; + int _sumTermWeight; + + static bool nextElement(Item &item); + +public: + ElementCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const ElementCompletenessParams ¶ms); + virtual bool isPure() { return _terms.empty(); } + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class ElementCompletenessBlueprint : public search::fef::Blueprint +{ +private: + std::vector _output; + ElementCompletenessParams _params; + +public: + ElementCompletenessBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; + + // for testing + const ElementCompletenessParams &getParams() const { return _params; } +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp new file mode 100644 index 00000000000..a0b294d390e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/element_similarity_feature.cpp @@ -0,0 +1,417 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.elementsimilarity"); +#include "element_similarity_feature.h" +#include +#include + +namespace search { +namespace features { + +namespace { + +//----------------------------------------------------------------------------- + +struct Aggregator { + typedef std::unique_ptr UP; + virtual UP create() const = 0; + virtual void clear() = 0; + virtual void add(double) = 0; + virtual double get() const = 0; + virtual ~Aggregator() {} +}; + +struct MaxAggregator : Aggregator { + size_t count; + double value; + MaxAggregator() : count(0), value(0.0) {} + virtual UP create() const override { return UP(new MaxAggregator()); } + virtual void clear() override { count = 0; value = 0.0; } + virtual void add(double v) override { value = ((++count == 1) || (v > value)) ? v : value; } + virtual double get() const override { return value; } +}; + +struct AvgAggregator : Aggregator { + size_t count; + double value; + AvgAggregator() : count(0), value(0.0) {} + virtual UP create() const override { return UP(new AvgAggregator()); } + virtual void clear() override { count = 0; value = 0.0; } + virtual void add(double v) override { ++count; value += v; } + virtual double get() const override { return (count == 0) ? 0.0 : (value/count); } +}; + +struct SumAggregator : Aggregator { + double value; + SumAggregator() : value(0.0) {} + virtual UP create() const override { return UP(new SumAggregator()); } + virtual void clear() override { value = 0.0; } + virtual void add(double v) override { value += v; } + virtual double get() const override { return value; } +}; + +Aggregator::UP create_aggregator(const vespalib::string &name) { + if (name == "max") { + return Aggregator::UP(new MaxAggregator()); + } + if (name == "avg") { + return Aggregator::UP(new AvgAggregator()); + } + if (name == "sum") { + return Aggregator::UP(new SumAggregator()); + } + return Aggregator::UP(nullptr); +} + +//----------------------------------------------------------------------------- + +typedef double (*function_5)(double, double, double, double, double); +typedef std::pair OutputSpec; + +//----------------------------------------------------------------------------- + +struct VectorizedQueryTerms { + struct Term { + fef::TermFieldHandle handle; + int weight; + int index; + Term(fef::TermFieldHandle handle_in, int weight_in, int index_in) + : handle(handle_in), weight(weight_in), index(index_in) {} + }; + + std::vector handles; + std::vector weights; + int total_weight; + + VectorizedQueryTerms(const VectorizedQueryTerms &) = delete; + VectorizedQueryTerms(VectorizedQueryTerms &&rhs) + : handles(std::move(rhs.handles)), weights(std::move(rhs.weights)), total_weight(rhs.total_weight) {} + VectorizedQueryTerms(const fef::IQueryEnvironment &env, uint32_t field_id) + : handles(), weights(), total_weight(0) + { + std::vector terms; + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const fef::ITermData *termData = env.getTerm(i); + if (termData->getWeight().percent() != 0) { // only consider query terms with contribution + typedef fef::ITermFieldRangeAdapter FRA; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const fef::ITermFieldData &tfd = iter.get(); + if (tfd.getFieldId() == field_id) { + int term_weight = termData->getWeight().percent(); + total_weight += term_weight; + terms.push_back(Term(tfd.getHandle(), term_weight, + termData->getTermIndex())); + } + } + } + } + std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b){ return (a.index < b.index); }); + handles.reserve(terms.size()); + weights.reserve(terms.size()); + for (size_t i = 0; i < terms.size(); ++i) { + handles.push_back(terms[i].handle); + weights.push_back(terms[i].weight); + } + } +}; + +//----------------------------------------------------------------------------- + +struct State { + uint32_t element_length; + uint32_t matched_terms; + int sum_term_weight; + uint32_t last_pos; + double sum_proximity_score; + uint32_t last_idx; + uint32_t num_in_order; + + double proximity; + double order; + double query_coverage; + double field_coverage; + double element_weight; + + State(uint32_t element_length_in, int32_t element_weight_in, + uint32_t first_pos, int32_t first_weight, uint32_t first_idx) + : element_length(element_length_in), + matched_terms(1), sum_term_weight(first_weight), + last_pos(first_pos), sum_proximity_score(0.0), + last_idx(first_idx), num_in_order(0), + proximity(0.0), order(0.0), + query_coverage(0.0), field_coverage(0.0), + element_weight(element_weight_in) {} + + double proximity_score(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); + } + + bool want_match(uint32_t pos) { + return (pos > last_pos); + } + + void addMatch(uint32_t pos, int32_t weight, uint32_t idx) { + sum_proximity_score += proximity_score(pos - last_pos); + num_in_order += (idx > last_idx) ? 1 : 0; + last_pos = pos; + last_idx = idx; + ++matched_terms; + sum_term_weight += weight; + } + + void calculate_scores(size_t num_query_terms, int total_term_weight) { + double matches = std::min(element_length, matched_terms); + if (matches < 2) { + proximity = proximity_score(element_length); + order = (num_query_terms == 1) ? 1.0 : 0.0; + } else { + proximity = sum_proximity_score / (matches - 1); + order = num_in_order / (double) (matches - 1); + } + query_coverage = sum_term_weight / (double) total_term_weight; + field_coverage = matches / (double) element_length; + } +}; + +//----------------------------------------------------------------------------- + +class ElementSimilarityExecutor : public fef::FeatureExecutor +{ +private: + typedef fef::TermFieldMatchData::PositionsIterator ITR; + + struct CmpPosition { + ITR *pos; + CmpPosition(ITR *pos_in) : pos(pos_in) {} + bool operator()(uint16_t a, uint16_t b) { + return (pos[a]->getPosition() == pos[b]->getPosition()) + ? (a < b) + : (pos[a]->getPosition() < pos[b]->getPosition()); + } + }; + + struct CmpElement { + ITR *pos; + CmpElement(ITR *pos_in) : pos(pos_in) {} + bool operator()(uint16_t a, uint16_t b) { + return pos[a]->getElementId() < pos[b]->getElementId(); + } + }; + + typedef vespalib::PriorityQueue PositionQueue; + typedef vespalib::PriorityQueue ElementQueue; + + VectorizedQueryTerms _terms; + std::vector _pos; + std::vector _end; + PositionQueue _position_queue; + ElementQueue _element_queue; + std::vector _outputs; + +public: + ElementSimilarityExecutor(VectorizedQueryTerms &&terms, std::vector &&outputs_in) + : _terms(std::move(terms)), + _pos(_terms.handles.size(), nullptr), + _end(_terms.handles.size(), nullptr), + _position_queue(CmpPosition(&_pos[0])), + _element_queue(CmpElement(&_pos[0])), + _outputs(std::move(outputs_in)) {} + + virtual bool isPure() { return _terms.handles.empty(); } + + void requeue_term(uint16_t term, uint32_t element) { + while (_pos[term] != _end[term] && + _pos[term]->getElementId() == element) + { + ++_pos[term]; + } + if (_pos[term] != _end[term]) { + _element_queue.push(term); + } + } + + virtual void execute(fef::MatchData &data) { + for (auto &output: _outputs) { + output.second->clear(); + } + for (size_t i = 0; i < _terms.handles.size(); ++i) { + fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms.handles[i]); + if (tfmd->getDocId() == data.getDocId()) { + _pos[i] = tfmd->begin(); + _end[i] = tfmd->end(); + if (_pos[i] != _end[i]) { + _element_queue.push(i); + } + } + } + while (!_element_queue.empty()) { + uint32_t elementId = _pos[_element_queue.front()]->getElementId(); + while (!_element_queue.empty() && _pos[_element_queue.front()]->getElementId() == elementId) { + _position_queue.push(_element_queue.front()); + _element_queue.pop_front(); + } + uint16_t first = _position_queue.front(); + State state(_pos[first]->getElementLen(), + _pos[first]->getElementWeight(), + _pos[first]->getPosition(), + _terms.weights[first], + first); + requeue_term(_position_queue.front(), elementId); + _position_queue.pop_front(); + while (!_position_queue.empty()) { + uint16_t item = _position_queue.front(); + if (state.want_match(_pos[item]->getPosition())) { + state.addMatch(_pos[item]->getPosition(), + _terms.weights[item], + item); + requeue_term(_position_queue.front(), elementId); + _position_queue.pop_front(); + } else { + ++_pos[item]; + if (_pos[item] == _end[item]) { + _position_queue.pop_front(); + } else { + _position_queue.adjust(); + } + } + } + state.calculate_scores(_terms.handles.size(), _terms.total_weight); + for (auto &output: _outputs) { + output.second->add(output.first(state.proximity, state.order, + state.query_coverage, state.field_coverage, + state.element_weight)); + } + } + for (size_t i = 0; i < _outputs.size(); ++i) { + *data.resolveFeature(outputs()[i]) = _outputs[i].second->get(); + } + } +}; + +//----------------------------------------------------------------------------- + +std::vector > extract_properties(const fef::Properties &props, + const vespalib::string &ns, const vespalib::string &first_name, const vespalib::string &first_default) +{ + struct MyVisitor : fef::IPropertiesVisitor { + const vespalib::string &first_name; + std::vector > &result; + MyVisitor(const vespalib::string &first_name_in, + std::vector > &result_in) + : first_name(first_name_in), result(result_in) {} + virtual void visitProperty(const fef::Property::Value &key, + const fef::Property &values) override + { + if (key != first_name) { + result.emplace_back(key, values.get()); + } + } + }; + std::vector > result; + result.emplace_back(first_name, props.lookup(ns, first_name).get(first_default)); + MyVisitor my_visitor(first_name, result); + props.visitNamespace(ns, my_visitor); + return result; +} + +std::vector > get_outputs(const fef::Properties &props, + const vespalib::string &feature) +{ + return extract_properties(props, feature + ".output", "default", "max((0.35*p+0.15*o+0.30*q+0.20*f)*w)"); +} + +} // namespace features:: + +//----------------------------------------------------------------------------- + +struct ElementSimilarityBlueprint::OutputContext { + vespalib::eval::CompileCache::Token::UP compile_token; + Aggregator::UP aggregator_factory; + OutputContext(const vespalib::eval::Function &function, + Aggregator::UP aggregator) + : compile_token(vespalib::eval::CompileCache::compile(function, vespalib::eval::PassParams::SEPARATE)), + aggregator_factory(std::move(aggregator)) {} +}; + +//----------------------------------------------------------------------------- + +ElementSimilarityBlueprint::ElementSimilarityBlueprint() + : Blueprint("elementSimilarity"), _field_id(fef::IllegalHandle), _outputs() {} + +ElementSimilarityBlueprint::~ElementSimilarityBlueprint() {} + +void +ElementSimilarityBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &env, + fef::IDumpFeatureVisitor &visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const fef::FieldInfo &field = *env.getField(i); + if ((field.type() == fef::FieldType::INDEX) && + (field.collection() != fef::CollectionType::SINGLE) && + ( ! field.isFilter())) + { + fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field.name()); + auto outputs = get_outputs(env.getProperties(), fnb.buildName()); + visitor.visitDumpFeature(fnb.output("").buildName()); + for (size_t out_idx = 1; out_idx < outputs.size(); ++out_idx) { + visitor.visitDumpFeature(fnb.output(outputs[out_idx].first).buildName()); + } + } + } +} + +bool +ElementSimilarityBlueprint::setup(const fef::IIndexEnvironment &env, + const fef::ParameterList ¶ms) +{ + const fef::FieldInfo *field = params[0].asField(); + _field_id = field->id(); + fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field->name()); + auto outputs = get_outputs(env.getProperties(), fnb.buildName()); + for (const auto &entry: outputs) { + describeOutput(entry.first, entry.second); + vespalib::string aggr_name; + vespalib::string expr; + vespalib::string error; + if (!vespalib::eval::Function::unwrap(entry.second, aggr_name, expr, error)) { + LOG(warning, "'%s': could not extract aggregator and expression for output '%s' from config value '%s' (%s)", + fnb.buildName().c_str(), entry.first.c_str(), entry.second.c_str(), error.c_str()); + return false; + } + Aggregator::UP aggr = create_aggregator(aggr_name); + if (aggr.get() == nullptr) { + LOG(warning, "'%s': unknown aggregator '%s'", fnb.buildName().c_str(), aggr_name.c_str()); + return false; + } + std::vector args({"p","o","q","f","w"}); + vespalib::eval::Function function = vespalib::eval::Function::parse(args, expr); + if (function.has_error()) { + LOG(warning, "'%s': per-element expression parse error: %s", + fnb.buildName().c_str(), function.get_error().c_str()); + return false; + } + _outputs.push_back(OutputContext_UP(new OutputContext(function, std::move(aggr)))); + } + env.hintFieldAccess(field->id()); + return true; +} + +fef::FeatureExecutor::LP +ElementSimilarityBlueprint::createExecutor(const fef::IQueryEnvironment &env) const +{ + std::vector output_specs; + for (const auto &output: _outputs) { + output_specs.emplace_back(output->compile_token->get().get_function<5>(), + output->aggregator_factory->create()); + } + return fef::FeatureExecutor::LP(new ElementSimilarityExecutor(VectorizedQueryTerms(env, _field_id), std::move(output_specs))); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/element_similarity_feature.h b/searchlib/src/vespa/searchlib/features/element_similarity_feature.h new file mode 100644 index 00000000000..e4424b29b1f --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/element_similarity_feature.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +class ElementSimilarityBlueprint : public search::fef::Blueprint +{ +private: + struct OutputContext; + typedef std::unique_ptr OutputContext_UP; + + uint32_t _field_id; + std::vector _outputs; + +public: + ElementSimilarityBlueprint(); + virtual ~ElementSimilarityBlueprint(); + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + virtual search::fef::Blueprint::UP createInstance() const { + return Blueprint::UP(new ElementSimilarityBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp new file mode 100644 index 00000000000..c77f47e3d08 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.cpp @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +LOG_SETUP(".features.euclidean_distance_feature"); +#include +#include +#include +#include "valuefeature.h" + +#include "euclidean_distance_feature.h" +#include "array_parser.hpp" +#include +#include + +using namespace search::attribute; +using namespace search::fef; + +namespace search { +namespace features { + + +template +EuclideanDistanceExecutor::EuclideanDistanceExecutor(const search::attribute::IAttributeVector &attribute, QueryVectorType vector) : + FeatureExecutor(), + _attribute(attribute), + _vector(std::move(vector)), + _attributeBuffer() +{ +} + +template +feature_t EuclideanDistanceExecutor::euclideanDistance(const BufferType &v1, const QueryVectorType &v2) +{ + feature_t val = 0; + size_t commonRange = std::min(static_cast( v1.size() ), v2.size()); + for (size_t i = 0; i < commonRange; ++i) { + feature_t diff = v1[i] - v2[i]; + val += diff * diff; + } + return std::sqrt(val); +} + + +template +void +EuclideanDistanceExecutor::execute(MatchData &match) +{ + _attributeBuffer.fill(_attribute, match.getDocId()); + *match.resolveFeature(outputs()[0]) = euclideanDistance(_attributeBuffer, _vector); +} + + +EuclideanDistanceBlueprint::EuclideanDistanceBlueprint() : + Blueprint("euclideanDistance"), + _attributeName(), + _queryVector() +{ +} + +void +EuclideanDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const +{ +} + +bool +EuclideanDistanceBlueprint::setup(const IIndexEnvironment &env, const ParameterList ¶ms) +{ + _attributeName = params[0].getValue(); + _queryVector = params[1].getValue(); + describeOutput("distance", "The result after calculating the euclidean distance of the vector represented by the array " + "and the vector sent down with the query"); + env.hintAttributeAccess(_attributeName); + return true; +} + +Blueprint::UP +EuclideanDistanceBlueprint::createInstance() const +{ + return Blueprint::UP(new EuclideanDistanceBlueprint()); +} + +namespace { + +template +FeatureExecutor::LP create(const IAttributeVector &attribute, const Property &queryVector) +{ + std::vector v; + ArrayParser::parse(queryVector.get(), v); + return FeatureExecutor::LP(new EuclideanDistanceExecutor(attribute, std::move(v))); +} + +} + +FeatureExecutor::LP +EuclideanDistanceBlueprint::createExecutor(const IQueryEnvironment &env) const +{ + const IAttributeVector * attribute = env.getAttributeContext().getAttribute(_attributeName); + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager, returning executor with default value.", + _attributeName.c_str()); + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + } + + Property queryVector = env.getProperties().lookup(getBaseName(), _queryVector); + + if (attribute->getCollectionType() == attribute::CollectionType::ARRAY) { + if (attribute->isIntegerType()) { + return create(*attribute, queryVector); + } else if (attribute->isFloatingPointType()) { + return create(*attribute, queryVector); + } + } + LOG(warning, "The attribute vector '%s' is NOT of type array" + ", returning executor with default value.", attribute->getName().c_str()); + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + +} + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h new file mode 100644 index 00000000000..23df79621ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/euclidean_distance_feature.h @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + + +namespace search { +namespace features { + + +/** + * Implements the executor for the eucledian distance feature. + */ +template +class EuclideanDistanceExecutor : public fef::FeatureExecutor { + +public: + typedef search::attribute::AttributeContent BufferType; + typedef std::vector QueryVectorType; + +private: + const search::attribute::IAttributeVector &_attribute; + const QueryVectorType _vector; + BufferType _attributeBuffer; + + feature_t euclideanDistance(const BufferType &v1, const QueryVectorType &v2); + +public: + + EuclideanDistanceExecutor(const search::attribute::IAttributeVector &attribute, QueryVectorType vector); + virtual void execute(fef::MatchData &data) override; +}; + + +/** + * Implements the blueprint for the euclidean distance executor. + */ +class EuclideanDistanceBlueprint : public fef::Blueprint { +private: + vespalib::string _attributeName; + vespalib::string _queryVector; + +public: + /** + * Constructs a blueprint. + */ + EuclideanDistanceBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const fef::IIndexEnvironment &env, + fef::IDumpFeatureVisitor &visitor) const override; + + // Inherit doc from Blueprint. + virtual fef::Blueprint::UP createInstance() const override; + + // Inherit doc from Blueprint. + virtual fef::ParameterDescriptions getDescriptions() const override { + return fef::ParameterDescriptions().desc().attribute(fef::ParameterCollection::ANY).string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const fef::IIndexEnvironment &env, + const fef::ParameterList ¶ms) override; + + // Inherit doc from Blueprint. + virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment &env) const override; + +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp b/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp new file mode 100644 index 00000000000..539dc3b0343 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldinfofeature.cpp @@ -0,0 +1,235 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.fieldinfo"); + +#include +#include +#include +#include +#include +#include +#include +#include "fieldinfofeature.h" +#include "valuefeature.h" +#include "utils.h" + +namespace search { +namespace features { + +IndexFieldInfoExecutor::IndexFieldInfoExecutor(feature_t type, feature_t isFilter, + uint32_t field, uint32_t fieldHandle) + : fef::FeatureExecutor(), + _type(type), + _isFilter(isFilter), + _field(field), + _fieldHandle(fieldHandle) +{ + // empty +} + +void +IndexFieldInfoExecutor::execute(fef::MatchData &data) +{ + *data.resolveFeature(outputs()[0]) = _type; + *data.resolveFeature(outputs()[1]) = _isFilter; + *data.resolveFeature(outputs()[2]) = 1.0f; // searched + fef::TermFieldMatchData *tfmd = data.resolveTermField(_fieldHandle); + if (tfmd->getDocId() == data.getDocId()) { + *data.resolveFeature(outputs()[3]) = 1.0f; // hit + } else { + *data.resolveFeature(outputs()[3]) = 0.0f; // no hit + } + fef::FieldPositionsIterator itr = tfmd->getIterator(); + *data.resolveFeature(outputs()[4]) = itr.getFieldLength(); + if (itr.valid()) { + uint32_t first = itr.getPosition(); + uint32_t last = 0; + uint32_t cnt = 0; + for (; itr.valid(); itr.next()) { + last = itr.getPosition(); + ++cnt; + } + *data.resolveFeature(outputs()[5]) = first; + *data.resolveFeature(outputs()[6]) = last; + *data.resolveFeature(outputs()[7]) = cnt; + } else { + *data.resolveFeature(outputs()[5]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // first + *data.resolveFeature(outputs()[6]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // last + *data.resolveFeature(outputs()[7]) = 0.0f; + } +} + +//----------------------------------------------------------------------------- + +AttrFieldInfoExecutor::AttrFieldInfoExecutor(feature_t type, uint32_t fieldHandle) : + FeatureExecutor(), + _type(type), + _fieldHandle(fieldHandle) +{ + // empty +} + +void +AttrFieldInfoExecutor::execute(fef::MatchData &data) +{ + *data.resolveFeature(outputs()[0]) = _type; + *data.resolveFeature(outputs()[1]) = 0.0; // not filter + *data.resolveFeature(outputs()[2]) = 1.0f; // searched + fef::TermFieldMatchData *tfmd = data.resolveTermField(_fieldHandle); + if (tfmd->getDocId() == data.getDocId()) { + *data.resolveFeature(outputs()[3]) = 1.0f; // hit + *data.resolveFeature(outputs()[4]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // len + *data.resolveFeature(outputs()[5]) = 0.0f; // first + *data.resolveFeature(outputs()[6]) = 0.0f; // last + *data.resolveFeature(outputs()[7]) = 1.0f; + } else { + *data.resolveFeature(outputs()[3]) = 0.0f; // no hit + *data.resolveFeature(outputs()[4]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // len + *data.resolveFeature(outputs()[5]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // first + *data.resolveFeature(outputs()[6]) = fef::FieldPositionsIterator::UNKNOWN_LENGTH; // last + *data.resolveFeature(outputs()[7]) = 0.0f; + } +} + +//----------------------------------------------------------------------------- + +FieldInfoBlueprint::FieldInfoBlueprint() : + fef::Blueprint("fieldInfo"), + _overview(false), + _indexcnt(0.0f), + _attrcnt(0.0f), + _type(0.0f), + _isFilter(0.0f), + _fieldId(fef::IllegalFieldId) +{ + // empty +} + +void +FieldInfoBlueprint::visitDumpFeatures(const fef::IIndexEnvironment &indexEnv, + fef::IDumpFeatureVisitor &visitor) const +{ + if (!indexEnv.getProperties().lookup(getBaseName(), "enable").get("").empty()) { + fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()); + for (uint32_t i = 0; i < indexEnv.getNumFields(); ++i) { + const fef::FieldInfo *fi = indexEnv.getField(i); + fnb.clearParameters().parameter(fi->name()); + fnb.output("type"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("filter"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("search"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("hit"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("len"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("first"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("last"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("cnt"); + visitor.visitDumpFeature(fnb.buildName()); + } + fnb.clearParameters(); + fnb.output("indexCnt"); + visitor.visitDumpFeature(fnb.buildName()); + fnb.output("attrCnt"); + visitor.visitDumpFeature(fnb.buildName()); + } +} + +bool +FieldInfoBlueprint::setup(const fef::IIndexEnvironment &indexEnv, + const fef::ParameterList ¶ms) +{ + if (params.empty()) { + _overview = true; + for (uint32_t i = 0; i < indexEnv.getNumFields(); ++i) { + if (indexEnv.getField(i)->type() == fef::FieldType::INDEX) { + _indexcnt += 1.0; + } + if (indexEnv.getField(i)->type() == fef::FieldType::ATTRIBUTE) { + _attrcnt += 1.0; + } + } + describeOutput("indexCnt", "total number of fields of type index"); + describeOutput("attrCnt", "total number of fields of type attribute"); + return true; + } + if (params.size() == 1) { + vespalib::string name = params[0].getValue(); + const fef::FieldInfo *fi = indexEnv.getFieldByName(name); + if (fi != 0) { + _fieldId = fi->id(); + if (fi->type() == fef::FieldType::INDEX) { + indexEnv.hintFieldAccess(_fieldId); + _type = 1.0; + } else if (fi->type() == fef::FieldType::ATTRIBUTE) { + _type = 2.0; + } + if (fi->isFilter()) { + _isFilter = 1.0; + } else { + _isFilter = 0.0; + } + } + describeOutput("type", "1.0 for INDEX, 2.0 for ATTRIBUTE, 0.0 for unknown (from index env)"); + describeOutput("filter", "1.0 if this is a filter, 0.0 otherwise (from index env)"); + describeOutput("search", "1.0 means first term searched this field, 0.0 means it did not"); + describeOutput("hit", "1.0 means first term got a hit in this field, 0.0 means it did not"); + describeOutput("len", "field length in number of words"); + describeOutput("first", "position of the first hit of the first term in this field"); + describeOutput("last", "position of the last hit of the first term in this field"); + describeOutput("cnt", "number of hits for the first term in this field"); + return true; + } + return false; +} + +fef::FeatureExecutor::LP +FieldInfoBlueprint::createExecutor(const fef::IQueryEnvironment &queryEnv) const +{ + if (_overview) { + std::vector values; + values.push_back(_indexcnt); + values.push_back(_attrcnt); + return fef::FeatureExecutor::LP(new ValueExecutor(values)); + } + uint32_t fieldHandle = util::getTermFieldHandle(queryEnv, 0, _fieldId); + if (fieldHandle == fef::IllegalHandle) { + std::vector values; + values.push_back(_type); + values.push_back(_isFilter); + values.push_back(0.0f); // not searched + values.push_back(0.0f); // no hit + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default field length + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default first pos + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default last pos + values.push_back(0.0f); // number of hits + return fef::FeatureExecutor::LP(new ValueExecutor(values)); + } + if (_type == 1.0) { // index + return fef::FeatureExecutor:: + LP(new IndexFieldInfoExecutor(_type, _isFilter, _fieldId, fieldHandle)); + } else if (_type == 2.0) { // attribute + return fef::FeatureExecutor::LP( + new AttrFieldInfoExecutor(_type, fieldHandle)); + } + std::vector values; + values.push_back(_type); + values.push_back(_isFilter); + values.push_back(1.0f); // searched + values.push_back(0.0f); // no hit + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default field length + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default first pos + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); // default last pos + values.push_back(0.0f); // number of hits + return fef::FeatureExecutor::LP(new ValueExecutor(values)); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/fieldinfofeature.h b/searchlib/src/vespa/searchlib/features/fieldinfofeature.h new file mode 100644 index 00000000000..a7438873f97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldinfofeature.h @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +class IndexFieldInfoExecutor : public search::fef::FeatureExecutor +{ +private: + feature_t _type; // from index env + feature_t _isFilter; // from index env + uint32_t _field; + uint32_t _fieldHandle; + +public: + IndexFieldInfoExecutor(feature_t type, feature_t isFilter, + uint32_t field, uint32_t fieldHandle); + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class AttrFieldInfoExecutor : public search::fef::FeatureExecutor +{ +private: + feature_t _type; // from index env + uint32_t _fieldHandle; + +public: + AttrFieldInfoExecutor(feature_t type, uint32_t fieldHandle); + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class FieldInfoBlueprint : public search::fef::Blueprint +{ +private: + bool _overview; + feature_t _indexcnt; + feature_t _attrcnt; + feature_t _type; + feature_t _isFilter; + uint32_t _fieldId; + +public: + FieldInfoBlueprint(); + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &indexEnv, + search::fef::IDumpFeatureVisitor &visitor) const; + virtual search::fef::Blueprint::UP createInstance() const { return search::fef::Blueprint::UP(new FieldInfoBlueprint()); } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc(0). + desc(1).string(); + } + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &queryEnv) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp new file mode 100644 index 00000000000..fa356a9e012 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.cpp @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldlength"); + +#include +#include +#include +#include +#include +#include "fieldlengthfeature.h" +#include "valuefeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +FieldLengthExecutor:: +FieldLengthExecutor(const IQueryEnvironment &env, + uint32_t fieldId) + : FeatureExecutor(), + _fieldHandles() +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId); + if (handle != IllegalHandle) { + _fieldHandles.push_back(handle); + } + } +} + +void +FieldLengthExecutor::execute(MatchData &match) +{ + uint32_t val = 0; + bool validVal = false; + for (std::vector::const_iterator + hi = _fieldHandles.begin(), hie = _fieldHandles.end(); + hi != hie; ++hi) + { + TermFieldMatchData &tfmd = *match.resolveTermField(*hi); + if (tfmd.getDocId() == match.getDocId()) { + FieldPositionsIterator it = tfmd.getIterator(); + if (it.valid()) { + if (val < it.getFieldLength()) + val = it.getFieldLength(); + validVal = true; + } + } + } + if (!validVal) { + val = fef::FieldPositionsIterator::UNKNOWN_LENGTH; + } + feature_t value = val; + *match.resolveFeature(outputs()[0]) = value; // field length +} + +FieldLengthBlueprint::FieldLengthBlueprint() + : Blueprint("fieldLength"), + _field(NULL) +{ +} + +void +FieldLengthBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +bool +FieldLengthBlueprint::setup(const IIndexEnvironment &env, + const ParameterList ¶ms) +{ + (void) env; + _field = params[0].asField(); + describeOutput("out", "The length of this field."); + return true; +} + +Blueprint::UP +FieldLengthBlueprint::createInstance() const +{ + return Blueprint::UP(new FieldLengthBlueprint()); +} + +FeatureExecutor::LP +FieldLengthBlueprint::createExecutor(const IQueryEnvironment &env) const +{ + if (_field == 0) { + std::vector values; + values.push_back(fef::FieldPositionsIterator::UNKNOWN_LENGTH); + return FeatureExecutor::LP(new ValueExecutor(values)); + } + return FeatureExecutor::LP(new FieldLengthExecutor(env, _field->id())); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h new file mode 100644 index 00000000000..0d55881f0a8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldlengthfeature.h @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for field length. + */ +class FieldLengthExecutor : public search::fef::FeatureExecutor { +private: + std::vector _fieldHandles; + +public: + /** + * Constructs an executor for field length. + * + * @param env The query environment + * @param fieldId The field id + */ + FieldLengthExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId); + virtual void execute(search::fef::MatchData &data); +}; + +/** + * Implements the blueprint for field length. + */ +class FieldLengthBlueprint : public search::fef::Blueprint { +private: + const search::fef::FieldInfo *_field; + +public: + /** + * Constructs a blueprint for field length. + */ + FieldLengthBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore b/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt new file mode 100644 index 00000000000..2bbdf179763 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_fieldmatch OBJECT + SOURCES + computer.cpp + metrics.cpp + params.cpp + segmentstart.cpp + simplemetrics.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp new file mode 100644 index 00000000000..f2e1601ed28 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.cpp @@ -0,0 +1,558 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldmatch.computer"); + +#include +#include +#include +#include +#include +#include +#include "computer.h" + +using namespace search::fef; + +namespace search { +namespace features { +namespace fieldmatch { + + +Computer::Computer(const vespalib::string &propertyNamespace, const PhraseSplitter &splitter, + const FieldInfo &fieldInfo, const Params ¶ms) : + _splitter(splitter), + _fieldId(fieldInfo.id()), + _params(params), + _tracing(false), + _trace(), + _useCachedHits(true), + _queryTerms(), + _queryTermFieldMatch(), + _totalTermWeight(0), + _totalTermSignificance(0.0f), + _match(NULL), + _fieldLength(FieldPositionsIterator::UNKNOWN_LENGTH), + _currentMetrics(this), + _finalMetrics(this), + _simpleMetrics(params), + _segments(), + _alternativeSegmentationsTried(0), + _cachedHits() +{ + // Store term data for all terms searching in this field + for (uint32_t i = 0; i < splitter.getNumTerms(); ++i) { + QueryTerm qt = QueryTermFactory::create(splitter, i, true, true); + _totalTermWeight += qt.termData()->getWeight().percent(); + _totalTermSignificance += qt.significance(); + _simpleMetrics.addQueryTerm(qt.termData()->getWeight().percent()); + const ITermFieldData *field = qt.termData()->lookupField(_fieldId); + if (field != 0) { + qt.fieldHandle(field->getHandle()); + _queryTerms.push_back(qt); + _simpleMetrics.addSearchedTerm(qt.termData()->getWeight().percent()); + _queryTermFieldMatch.push_back(NULL); + _cachedHits.push_back(BitVectorData()); + } + } + + _totalTermWeight = atoi(splitter.getProperties().lookup(propertyNamespace, "totalTermWeight"). + get(vespalib::make_string("%d", _totalTermWeight)).c_str()); + _totalTermSignificance = atof(splitter.getProperties().lookup(propertyNamespace, "totalTermSignificance"). + get(vespalib::make_string("%f", _totalTermSignificance)).c_str()); + if (splitter.getProperties().lookup(propertyNamespace, "totalTermWeight").found()) { + _simpleMetrics.setTotalWeightInQuery(_totalTermWeight); + } + + // update current and final metrics after initialization + _currentMetrics = Metrics(this); + _finalMetrics = Metrics(this); + + // num query terms searching in this field + 1 + for (uint32_t i = 0; i < (getNumQueryTerms() + 1); ++i) { + _segments.push_back(SegmentData(SegmentStart::SP(new SegmentStart(this, _currentMetrics)))); + } +} + +void +Computer::reset(const MatchData & match) +{ + _currentMetrics.reset(); + _finalMetrics.reset(); + _simpleMetrics.resetMatchData(); + for (uint32_t i = 0; i < _segments.size(); ++i) { + if (_segments[i].valid) { + _segments[i].valid = false; + } + } + _alternativeSegmentationsTried = 0; + for (uint32_t i = 0; i < _cachedHits.size(); ++i) { + if (_cachedHits[i].valid) { + _cachedHits[i].valid = false; + } + } + + _match = &match; + _fieldLength = FieldPositionsIterator::UNKNOWN_LENGTH; + + for (uint32_t i = 0; i < _queryTerms.size(); ++i) { + const ITermData *td = _queryTerms[i].termData(); + const TermFieldMatchData *tfmd = _splitter.resolveTermField(_queryTerms[i].fieldHandle()); + if (tfmd->getDocId() != match.getDocId()) { // only term match data if we have a hit + tfmd = NULL; + } else { + FieldPositionsIterator it = tfmd->getIterator(); + uint32_t fieldLength = it.getFieldLength(); + if (it.valid()) { + _simpleMetrics.addMatchWithPosOcc(td->getWeight().percent()); + if (fieldLength == 0 || fieldLength == FieldPositionsIterator::UNKNOWN_LENGTH) { + _simpleMetrics.hasMatchWithInvalidFieldLength(); + } + } else { + _simpleMetrics.addMatch(td->getWeight().percent()); + } + if (_fieldLength == FieldPositionsIterator::UNKNOWN_LENGTH) { + _fieldLength = fieldLength; // save away the first valid field length + } + + if (_useCachedHits && it.valid() && fieldLength != FieldPositionsIterator::UNKNOWN_LENGTH) { + // cache the field position iterator in a bit vector for faster lookup in + // findClosestInFieldBySemanticDistance() + _cachedHits[i].bitvector.clear(); + _cachedHits[i].valid = true; + if (_cachedHits[i].bitvector.size() < _fieldLength) { + _cachedHits[i].bitvector.resize(_fieldLength); + } + for (; it.valid(); it.next()) { + uint32_t fieldPos = it.getPosition(); + if (__builtin_expect(fieldPos < _fieldLength, true)) + _cachedHits[i].bitvector.setBit(fieldPos); + else { + handleError(fieldPos, match.getDocId()); + } + } + } + } + _queryTermFieldMatch[i] = tfmd; + } +} + +void +Computer::handleError(uint32_t fieldPos, uint32_t docId) const +{ + static int errcnt; + if (errcnt < 1000) { + errcnt++; + const FieldInfo * finfo = _splitter.getIndexEnvironment().getField(getFieldId()); + LOG(debug, "Bad field position %u >= fieldLength %u for field '%s' document %u. " + "Document was probably refed during query (Ticket 7104969)", + fieldPos, _fieldLength, + finfo != NULL ? finfo->name().c_str() : "unknown field", + docId); + } +} + +const Metrics & +Computer::run() +{ + exploreSegments(); + return _finalMetrics; +} + +int +Computer::findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t startSemanticDistance) +{ + if (_useCachedHits) { + if (!_cachedHits[i].valid) { + return -1; // not matched + } + + const BitVector & hits = _cachedHits[i].bitvector; + + for (uint32_t distance = startSemanticDistance; distance < _fieldLength; distance++) { + int j = semanticDistanceToFieldIndex(distance, previousJ); + if (j < 0) { + continue; + } + + if (hits.testBit((uint32_t)j)) { + return distance; + } + } + return -1; + } + + const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i]; + if (termFieldMatch == NULL) { + return -1; // not matched + } + + for (uint32_t distance = startSemanticDistance; distance < _fieldLength; distance++) { + int j = semanticDistanceToFieldIndex(distance, previousJ); + if (j < 0) { + continue; + } + + FieldPositionsIterator it = termFieldMatch->getIterator(); + while (it.valid() && it.getPosition() < (uint32_t)j) { + it.next(); + } + if (it.valid() && it.getPosition() == (uint32_t)j) { + return distance; + } + } + return -1; +} + +int +Computer::semanticDistanceToFieldIndex(int semanticDistance, uint32_t zeroJ) const +{ + if (semanticDistance == -1) { + return -1; + } + int firstSegmentLength = std::min(_params.getProximityLimit(), _fieldLength - zeroJ); + int secondSegmentLength = std::min(_params.getProximityLimit(), zeroJ); + if (semanticDistance < firstSegmentLength) { + return zeroJ + semanticDistance; + } + else if (semanticDistance < firstSegmentLength + secondSegmentLength) { + return zeroJ - semanticDistance - 1 + firstSegmentLength; + } + else if ((uint32_t)semanticDistance < _fieldLength - zeroJ + secondSegmentLength) { + return zeroJ + semanticDistance - secondSegmentLength; + } + else { + return _fieldLength - semanticDistance - 1; + } +} + +int +Computer::fieldIndexToSemanticDistance(int j, uint32_t zeroJ) const +{ + if (j == -1) { + return -1; + } + uint32_t firstSegmentLength = std::min(_params.getProximityLimit(), _fieldLength - zeroJ); + uint32_t secondSegmentLength = std::min(_params.getProximityLimit(), zeroJ); + if ((uint32_t)j >= zeroJ) { + if ((j - zeroJ) < firstSegmentLength) { + return j - zeroJ; // 0..limit + } + else { + return j - zeroJ + secondSegmentLength; // limit*2..field.length-zeroJ + } + } + else { + if ((zeroJ - j - 1) < secondSegmentLength) { + return zeroJ - j + firstSegmentLength - 1; // limit..limit*2 + } + else { + return (zeroJ - j - 1) + _fieldLength - zeroJ; // field.length-zeroJ.. + } + } +} + +Computer & +Computer::trace(const vespalib::string &str) +{ + if (_tracing) { + _trace.push_back(str); + //LOG(info, "%s", str.c_str()); + } + return *this; +} + +vespalib::string +Computer::getTrace() const +{ + vespalib::string ret = ""; + for (std::vector::const_iterator it = _trace.begin(); + it != _trace.end(); ++it) { + ret += *it; + } + return ret; +} + +vespalib::string +Computer::toString() const +{ + return vespalib::make_string("Computer(%d query terms,%d field terms,%s)", + getNumQueryTerms(), _fieldLength, + _currentMetrics.toString().c_str()); +} + +void +Computer::exploreSegments() +{ + if (isTracing()) { + trace(vespalib::make_string("Calculating matches for %d query terms, %d field terms.", + getNumQueryTerms(), _fieldLength)); + } + + _segments[0].segment->reset(_currentMetrics); + _segments[0].valid = true; + SegmentStart *segment = _segments[0].segment.get(); + while (segment != NULL) { + if (isTracing()) { + trace(vespalib::make_string("Looking for segment from %s...", + segment->toString().c_str())); + } + + _currentMetrics = segment->getMetrics(); // take a copy of the segment returned from the current segment. + bool found = findAlternativeSegmentFrom(segment); + if (found) { + if (isTracing()) { + vespalib::string segments = "[ "; + const std::vector &lst = _currentMetrics.getSegmentStarts(); + for (uint32_t i = 0; i < lst.size(); ++i) { + segments += vespalib::make_string("%d", lst[i]); + if (i < lst.size() - 1) { + segments += ", "; + } + } + segments += " ]"; + trace(vespalib::make_string("...found segments: %s, score %f.", + segments.c_str(), + _currentMetrics.getSegmentationScore())); + } + } else { + if (isTracing()) { + trace("...no complete and improved segment existed."); + } + segment->setOpen(false); + } + segment = findOpenSegment(segment->getI()); + } + _finalMetrics = findLastStartPoint()->getMetrics(); + setOccurrenceCounts(_finalMetrics); + _finalMetrics.onComplete(); + _finalMetrics.setComplete(true); +} + +bool +Computer::findAlternativeSegmentFrom(SegmentStart *segment) { + int semanticDistanceExplored = segment->getSemanticDistanceExplored(); + int previousI = -1; + int previousJ = segment->getPreviousJ(); + bool hasOpenSequence = false; + bool isFirst = true; + for (uint32_t i = segment->getStartI(); i < getNumQueryTerms(); i++) { + int semanticDistance = findClosestInFieldBySemanticDistance(i, previousJ, semanticDistanceExplored); + int j = semanticDistanceToFieldIndex(semanticDistance, previousJ); + + if (j == -1 && semanticDistanceExplored > 0 && isFirst) { + return false; // segment explored before; no more matches found + } + if (hasOpenSequence && (j == -1 || j != previousJ + 1)) { + _currentMetrics.onSequenceEnd(previousJ); + hasOpenSequence = false; + } + if (isFirst) { + if (j != -1) { + segmentStart(i, j, isFirst ? -1 : previousJ); + segment->exploredTo(j); + isFirst = false; + } + else { + segment->incrementStartI(); // there are no matches for this i + } + } + else { + if ((unsigned int)abs(j - previousJ) >= _params.getProximityLimit()) { + segmentEnd(i - 1, previousJ); + return true; + } + else if (j != -1) { + inSegment(i, j, previousJ, previousI); + } + } + if (j != -1) { + _currentMetrics.onMatch(i); + if (!hasOpenSequence) { + _currentMetrics.onSequenceStart(j); + hasOpenSequence=true; + } + semanticDistanceExplored = 1; // skip the current match when looking for the next + } else { + semanticDistanceExplored = 0; + // we have a match for this term but no position information + if (_queryTermFieldMatch[i] != NULL && !_cachedHits[i].valid) { + _currentMetrics.onMatch(i); + } + } + if (j >= 0) { + previousI = i; + previousJ = j; + } + } + if (hasOpenSequence) { + _currentMetrics.onSequenceEnd(previousJ); + } + if (!isFirst) { + segmentEnd(getNumQueryTerms() - 1, previousJ); + return true; + } + else { + return false; + } +} + +void +Computer::inSegment(int i, int j, int previousJ, int previousI) +{ + _currentMetrics.onPair(i, j, previousJ); + if (j == previousJ + 1 && i == previousI + 1) { + _currentMetrics.onInSequence(i, j, previousJ); + } + else { + _currentMetrics.onInSegmentGap(i, j, previousJ); + if (isTracing()) { + trace(vespalib::make_string(" in segment gap: %d -> %d", i, j)); + } + } +} + +bool +Computer::segmentStart(int i, int j, int previousJ) +{ + _currentMetrics.onNewSegment(i, j, previousJ); + if (previousJ >= 0) { + _currentMetrics.onPair(i, j, previousJ); + } + if (isTracing()) { + trace(vespalib::make_string(" new segment at: %d -> %d", i, j)); + } + return true; +} + +void +Computer::segmentEnd(int i, int j) +{ + if (isTracing()) { + trace(vespalib::make_string(" segment ended at: %d -> %d", i, j)); + } + SegmentStart *startOfNext = _segments[i + 1].segment.get(); + if (!_segments[i + 1].valid) { + startOfNext->reset(_currentMetrics, j, i + 1); + _segments[i + 1].valid = true; + } + else { + startOfNext->offerHistory(j, _currentMetrics); + } +} + +SegmentStart * +Computer::findOpenSegment(uint32_t startI) { + for (uint32_t i = startI; i < _segments.size(); i++) { + SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL; + if (startPoint == NULL || !startPoint->isOpen()) { + continue; + } + if (startPoint->getSemanticDistanceExplored() == 0) { + return startPoint; // first attempt + } + if (_alternativeSegmentationsTried >= _params.getMaxAlternativeSegmentations()) { + continue; + } + _alternativeSegmentationsTried++; + return startPoint; + } + return NULL; +} + +SegmentStart * +Computer::findLastStartPoint() +{ + for (int i = _segments.size(); --i >= 0; ) { + SegmentStart *startPoint = _segments[i].valid ? _segments[i].segment.get() : NULL; + if (startPoint != NULL) { + return startPoint; + } + } + LOG(error, "findLastStartPoint() could not find any segment start. This should never happen!"); + return NULL; +} + +void +Computer::setOccurrenceCounts(Metrics &metrics) +{ + // Find all unique query terms. + std::vector uniqueTerms; + std::set firstOccs; + for (uint32_t i = 0; i < _queryTermFieldMatch.size(); ++i) { + const TermFieldMatchData *termFieldMatch = _queryTermFieldMatch[i]; + if (termFieldMatch == NULL) { + continue; // not for this match + } + FieldPositionsIterator it = termFieldMatch->getIterator(); + if (it.valid()) { + if (firstOccs.find(it.getPosition()) == firstOccs.end()) { + uniqueTerms.push_back(i); + firstOccs.insert(it.getPosition()); + } + } + } + + // Commence occurence logic. + std::vector weightedOccurrences; + std::vector significantOccurrences; + + uint32_t divider = std::min(_fieldLength, (uint32_t)(_params.getMaxOccurrences() * uniqueTerms.size())); + uint32_t maxOccurence = std::min(_fieldLength, _params.getMaxOccurrences()); + + feature_t occurrence = 0; + feature_t absoluteOccurrence = 0; + feature_t weightedAbsoluteOccurrence = 0; + int totalWeight = 0; + feature_t totalWeightedOccurrences = 0; + feature_t totalSignificantOccurrences = 0; + + for (std::vector::iterator it = uniqueTerms.begin(); + it != uniqueTerms.end(); ++it) + { + const QueryTerm &queryTerm = _queryTerms[*it]; + const ITermData &termData = *queryTerm.termData(); + const TermFieldMatchData &termFieldMatch = *_queryTermFieldMatch[*it]; + + uint32_t termOccurrences = 0; + FieldPositionsIterator pos = termFieldMatch.getIterator(); + while (pos.valid() && termOccurrences < _params.getMaxOccurrences()) { + termOccurrences++; + pos.next(); + } + + occurrence += (feature_t)termOccurrences / divider; + absoluteOccurrence += (feature_t)termOccurrences / (_params.getMaxOccurrences() * uniqueTerms.size()); + + weightedAbsoluteOccurrence += (feature_t)termOccurrences * termData.getWeight().percent() / _params.getMaxOccurrences(); + totalWeight += termData.getWeight().percent(); + + totalWeightedOccurrences += (feature_t)maxOccurence * termData.getWeight().percent() / divider; + weightedOccurrences.push_back((feature_t)termOccurrences * termData.getWeight().percent() / divider); + + totalSignificantOccurrences += (feature_t)maxOccurence * queryTerm.significance() / divider; + significantOccurrences.push_back((feature_t)termOccurrences * queryTerm.significance() / divider); + } + metrics.setOccurrence(occurrence); + metrics.setAbsoluteOccurrence(absoluteOccurrence); + metrics.setWeightedAbsoluteOccurrence(weightedAbsoluteOccurrence / (totalWeight > 0 ? totalWeight : 1)); + + feature_t weightedOccurrenceSum = 0; + for (std::vector::iterator it = weightedOccurrences.begin(); + it != weightedOccurrences.end(); ++it) + { + weightedOccurrenceSum += totalWeightedOccurrences > 0.0f ? *it / totalWeightedOccurrences : 0.0f; + } + metrics.setWeightedOccurrence(weightedOccurrenceSum); + + feature_t significantOccurrenceSum = 0; + for (std::vector::iterator it = significantOccurrences.begin(); + it != significantOccurrences.end(); ++it) + { + significantOccurrenceSum += totalSignificantOccurrences > 0.0f ? *it / totalSignificantOccurrences : 0.0f; + } + metrics.setSignificantOccurrence(significantOccurrenceSum); +} + +} // fieldmatch +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h new file mode 100644 index 00000000000..558bee1443a --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/computer.h @@ -0,0 +1,382 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "metrics.h" +#include "params.h" +#include "segmentstart.h" +#include "simplemetrics.h" + +namespace search { +namespace features { +namespace fieldmatch { + +/** + *

Calculates a set of metrics capturing information about the degree of agreement between a query and a field + * string. This algorithm attempts to capture the property of text that very close tokens are usuall part of the same + * semantic structure, while tokens farther apart are much more loosely related. The algorithm will locate alternative + * such regions containing multiple query tokens (segments), do a more detailed analysis of these segments and choose + * the ones producing the best overall set of match metrics.

+ * + *

Such segments are found by looking at query terms in sequence from left top right and finding matches in the + * field. All alternative segment start points are explored, and the segmentation achieving the best overall string + * match metric score is preferred. The dynamic programming paradigm is used to avoid redoing work on segmentations.

+ * + *

When a segment start point is found, subsequenc tokens from the query are searched in the field from this starting + * point in "semantic order". This search order can be defined independently of the algorithm. The current order + * searches proximityLimit tokens ahead first, then the same distance backwards (so if you need to go two steps + * backwards in the field from the segment starting point, the real distance is -2, but the "semantic distance" is + * proximityLimit+2.

+ * + *

The actual metrics are calculated during execution of this algorithm by the {@link Metrics} class, by + * receiving events emitted from the algorithm. Any set of metrics derivable from these events a computable using this + * algorithm.

+ * + *

Terminology: + *

    + *
  • Sequence - A set of adjacent matched tokens in the field.
  • + *
  • Segment - A field area containing matches to a continuous section of the query.
  • + *
  • Gap - A chunk of adjacent tokens inside a segment separating two matched characters.
  • + *
  • Semantic distance - A non-continuous distance between tokens in j.
  • + *
+ * + *

Notation: A position index in the query is denoted i. A position index in the field is denoted + * j.

+ * + *

This class is not multithread safe, but is reusable across queries for a single thread.

+ * + * @author
Jon Bratseth + * @author Simon Thoresen + * @version $Id$ + */ +class Computer { +public: + /** + * Constructs a new computer object. + * + * @param propertyNamespace The namespace used in query properties. + * @param splitter The environment that holds all query information. + * @param fieldInfo The info object of the matched field. + * @param params The parameter object for this computer. + */ + Computer(const vespalib::string &propertyNamespace, const search::fef::PhraseSplitter &splitter, + const search::fef::FieldInfo &fieldInfo, const Params ¶ms); + + /** + * Resets this object according to the given match data object. + * + * @param match The match data object containing match information for this field. + */ + void reset(const search::fef::MatchData & match); + + /** + * Runs this computer using the environment, match and parameters given to the constructor. + * + * @return The final metrics. + */ + const Metrics & run(); + + /** + * Returns the final metrics. + * + * @return The final metrics. + */ + const Metrics & getFinalMetrics() const { + return _finalMetrics; + } + + /** + * Implements the prefered search order for finding a match to a query item - first + * looking close in the right order, then close in the reverse order, then far in the right order + * and lastly far in the reverse order. + * + * @param i The query term index. + * @param previousJ The previous field index. + * @param startSemanticDistance The semantic distance we must be larger than or equal to. + * @return The semantic distance of the next mathing j larger than startSemanticDistance, or -1 if + * there are no matches larger than startSemanticDistance + */ + int findClosestInFieldBySemanticDistance(int i, int previousJ, uint32_t startSemanticDistance); + + /** + * Returns the field index (j) from a starting point zeroJ and the distance form zeroJ in the + * semantic distance space. + * + * @param semanticDistance The semantic distance to transform to field index. + * @param zeroJ The starting point. + * @returns The field index, or -1 (undefined) if the semanticDistance is -1. + */ + int semanticDistanceToFieldIndex(int semanticDistance, uint32_t zeroJ) const; + + /** + * Returns the semantic distance from a starting point zeroJ to a field index j. + * + * @param j The field index to transform to semantic distance. + * @param zeroJ The starting point. + * @returns The semantic distance, or -1 (undefined) if j is -1. + */ + int fieldIndexToSemanticDistance(int j, uint32_t zeroJ) const; + + /** + * Returns the query environment of this. This contains information about the query. + * + * @return The query environment. + */ + const search::fef::IQueryEnvironment &getQueryEnvironment() const { + return _splitter; + } + + /** + * Returns the match data of this. This contains information about how the query was matched to the current + * document. + * + * @return The match data. + */ + const search::fef::MatchData &getMatchData() const { + return *_match; + } + + /** + * Returns the id of the searched field. + * + * @return The field id. + */ + uint32_t getFieldId() const { + return _fieldId; + } + + /** + * Returns the number of terms present in the searched field. + * + * @return The field length. + */ + uint32_t getFieldLength() const { + return _fieldLength; + } + + /** + * Returns the parameter object that was used to instantiate this. + * + * @return The parameters. + */ + const Params &getParams() const { + return _params; + } + + /** + * Adds the given string to the trace of this, if tracing is enabled. + * + * @param str The string to trace. + * @return This, to allow chaining. + */ + Computer &trace(const vespalib::string &str); + + /** + * Returns a textual trace of the last execution of this algorithm, if tracing is on. + * + * @return The trace string. + */ + vespalib::string getTrace() const; + + /** + * Set to true to collect a textual trace from the computation, which can be retrieved using {@link #getTrace}. + * + * @param tracing Whether or not to trace. + * @return This, to allow chaining. + */ + Computer &setTracing(bool tracing) { + _tracing = tracing; + return *this; + } + + /** + * Returns whether tracing is on. + * + * @return True if tracing is on. + */ + bool isTracing() const { return _tracing; } + + /** + * Returns the number of terms searching on this field. + * + * @return The number of terms. + */ + uint32_t getNumQueryTerms() const { + return _queryTerms.size(); + } + + /** + * Returns the query term data for a specified term. + * + * @param The index of the term to return. + * @return The query term data. + */ + const QueryTerm & getQueryTermData(int term) const { + return _queryTerms[term]; + } + + /** + * Returns the term match for a specified term. + * + * @param The index of the term match to return. + * @return The term match. + */ + const search::fef::TermFieldMatchData *getQueryTermFieldMatch(int term) const { + return _queryTermFieldMatch[term]; + } + + /** + * Returns the total weight of all query terms. + * + * @return The total weight. + */ + uint32_t getTotalTermWeight() const { + return _totalTermWeight; + } + + /** + * Returns the total significance of all query terms. + * + * @return The total significance. + */ + feature_t getTotalTermSignificance() const { + return _totalTermSignificance; + } + + /** + * Returns a string representation of this computer. + * + * @return A string representation. + */ + vespalib::string toString() const; + + /** + * Returns the simple metrics computed while traversing the list of query terms in the constructor. + * + * @return the simple metrics object. + */ + const SimpleMetrics & getSimpleMetrics() const { + return _simpleMetrics; + } + + +private: + /** + * Finds segment candidates and explores them until we have the best segmentation history of the entire query. + */ + void exploreSegments(); + + /** + * Find correspondences from a segment starting point startI. + * + * @param segment The segment starting point. + * @return True if a segment was found, false if none could be found. + */ + bool findAlternativeSegmentFrom(SegmentStart *segment); + + /** + * A match occured within a segment, report this to the metric as appropriate. + * + * @param i The current query term index. + * @param j The current field term index. + * @param previousJ The previous field term index. + * @param previousI The previous query term index. + */ + void inSegment(int i, int j, int previousJ, int previousI); + + /** + * Returns whether this segment was accepted as a starting point. + * + * @param i The current query term index. + * @param j The current field term index. + * @param previousJ The previous field term index. + * @return Whether this segment was accepted or not. + */ + bool segmentStart(int i, int j, int previousJ); + + /** + * Registers an end of a segment. + * + * @param i The i at which this segment ends. + * @param j The j at which this segment ends. + */ + void segmentEnd(int i, int j); + + /** + * Returns the next open segment to explore, or null if no more segments exists or should be explored. + * + * @param The i to start searching from. + * @return The next open segment, or null. + */ + SegmentStart *findOpenSegment(uint32_t startI); + + /** + * Returns the last segment start point in the internal list. + * + * @return The last segment start. + */ + SegmentStart *findLastStartPoint(); + + /** + * Counts all occurrences of terms of the query in the field and set those metrics. + * + * @param metrics The metrics to update. + */ + void setOccurrenceCounts(Metrics &metrics); + + void handleError(uint32_t fieldPos, uint32_t docId) const __attribute__((noinline)); + + +private: + typedef std::shared_ptr BitVectorPtr; + typedef std::vector TermFieldMatchDataVector; + + struct SegmentData { + SegmentData() : segment(), valid(false) {} + SegmentData(const SegmentStart::SP & ss, bool v = false) : segment(ss), valid(v) {} + SegmentStart::SP segment; + bool valid; + }; + + struct BitVectorData { + BitVectorData() : bitvector(0), valid(false) {} + search::AllocatedBitVector bitvector; + bool valid; + }; + + // per query + const search::fef::PhraseSplitter & _splitter; + uint32_t _fieldId; + Params _params; + bool _tracing; + std::vector _trace; + bool _useCachedHits; + + QueryTermVector _queryTerms; + TermFieldMatchDataVector _queryTermFieldMatch; + uint32_t _totalTermWeight; + feature_t _totalTermSignificance; + + // per docid + const search::fef::MatchData * _match; + uint32_t _fieldLength; + Metrics _currentMetrics; // The metrics of the currently explored segmentation. + Metrics _finalMetrics; // The final metrics, null during and before metric computation. + SimpleMetrics _simpleMetrics; // The metrics used to compute simple features. + std::vector _segments; // Known segment starting points. + uint32_t _alternativeSegmentationsTried; + std::vector _cachedHits; +}; + +} // fieldmatch +} // features +} // search + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp new file mode 100644 index 00000000000..89da22f079e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.cpp @@ -0,0 +1,344 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldmatch.metrics"); + +#include +#include +#include +#include "computer.h" +#include "metrics.h" + +namespace search { +namespace features { +namespace fieldmatch { + +Metrics::Metrics(const Computer *source) : + _source(source), + _complete(false), + _outOfOrder(0), + _segments(0), + _gaps(0), + _gapLength(0), + _longestSequence(1), + _head(-1), + _tail(-1), + _matches(0), + _proximity(0), + _unweightedProximity(0), + _segmentDistance(0), + _pairs(0), + _weight(0), + _significance(0), + _occurrence(0), // default not given + _weightedOccurrence(0), // default not given + _absoluteOccurrence(0), // default not given + _weightedAbsoluteOccurrence(0), // default not given + _significantOccurrence(0), // default not given + _currentSequence(0), + _segmentStarts(), + _queryLength(_source->getNumQueryTerms()) +{ + _segmentStarts.reserve(100); +} + +Metrics::Metrics(const Metrics &rhs) : + _source(rhs._source), + _complete(rhs._complete), + _outOfOrder(rhs._outOfOrder), + _segments(rhs._segments), + _gaps(rhs._gaps), + _gapLength(rhs._gapLength), + _longestSequence(rhs._longestSequence), + _head(rhs._head), + _tail(rhs._tail), + _matches(rhs._matches), + _proximity(rhs._proximity), + _unweightedProximity(rhs._unweightedProximity), + _segmentDistance(rhs._segmentDistance), + _pairs(rhs._pairs), + _weight(rhs._weight), + _significance(rhs._significance), + _occurrence(rhs._occurrence), + _weightedOccurrence(rhs._weightedOccurrence), + _absoluteOccurrence(rhs._absoluteOccurrence), + _weightedAbsoluteOccurrence(rhs._weightedAbsoluteOccurrence), + _significantOccurrence(rhs._significantOccurrence), + _currentSequence(rhs._currentSequence), + _segmentStarts(rhs._segmentStarts), + _queryLength(rhs._queryLength) +{ +} + +Metrics & +Metrics::operator=(const Metrics & rhs) +{ + if (this != &rhs) { + _source = rhs._source; + _complete = rhs._complete; + _outOfOrder = rhs._outOfOrder; + _segments = rhs._segments; + _gaps = rhs._gaps; + _gapLength = rhs._gapLength; + _longestSequence = rhs._longestSequence; + _head = rhs._head; + _tail = rhs._tail; + _matches = rhs._matches; + _proximity = rhs._proximity; + _unweightedProximity = rhs._unweightedProximity; + _segmentDistance = rhs._segmentDistance; + _pairs = rhs._pairs; + _weight = rhs._weight; + _significance = rhs._significance; + _occurrence = rhs._occurrence; + _weightedOccurrence = rhs._weightedOccurrence; + _absoluteOccurrence = rhs._absoluteOccurrence; + _weightedAbsoluteOccurrence = rhs._weightedAbsoluteOccurrence; + _significantOccurrence = rhs._significantOccurrence; + _currentSequence = rhs._currentSequence; + _segmentStarts = rhs._segmentStarts; + _queryLength = rhs._queryLength; + } + return *this; +} + +void +Metrics::reset() +{ + _complete = false; + _outOfOrder = 0; + _segments = 0; + _gaps = 0; + _gapLength = 0; + _longestSequence = 1; + _head = -1; + _tail = -1; + _matches = 0; + _proximity = 0; + _unweightedProximity = 0; + _segmentDistance = 0; + _pairs = 0; + _weight = 0; + _significance = 0; + _occurrence = 0; + _weightedOccurrence = 0; + _absoluteOccurrence = 0; + _weightedAbsoluteOccurrence = 0; + _significantOccurrence = 0; + _currentSequence = 0; + _segmentStarts.clear(); + _queryLength = _source->getNumQueryTerms(); +} + +feature_t +Metrics::getQueryCompleteness() const +{ + return _queryLength > 0 ? (feature_t)_matches / _queryLength : 0.0f; +} + +feature_t +Metrics::getFieldCompleteness() const +{ + if (_source->getFieldLength() == 0) { + return 0; // default + } + return (feature_t)_matches / _source->getFieldLength(); +} + +feature_t +Metrics::getCompleteness() const +{ + feature_t importance = _source->getParams().getFieldCompletenessImportance(); + return getQueryCompleteness() * (1 - importance) + (importance * getFieldCompleteness()); +} + +feature_t +Metrics::getRelatedness() const +{ + if (_matches == 0) { + return 0; + } + else if (_matches == 1) { + return 1; + } + else { + return 1 - (feature_t)(_segments - 1) / (_matches - 1); + } +} + +feature_t +Metrics::getSegmentProximity() const +{ + if (_source->getFieldLength() == 0) { + return 0; // default + } + return _matches == 0 ? 0.0f : 1 - (feature_t)_segmentDistance / _source->getFieldLength(); +} + +feature_t +Metrics::getProximity() const +{ + feature_t totalConnectedness = 0; + for (uint32_t i = 1; i < _queryLength; i++) { + totalConnectedness += std::max(0.1, _source->getQueryTermData(i).connectedness()); + } + feature_t averageConnectedness = 0.1f; + if (_queryLength > 1) { + averageConnectedness = totalConnectedness / (_queryLength - 1); + } + return getAbsoluteProximity() / averageConnectedness; +} + +feature_t +Metrics::getEarliness() const +{ + if (_matches == 0) { + return 0; // covers (field.length == 0) too + } + else if (_source->getFieldLength() == 1) { + return 1; + } + else { + return 1 - (feature_t)_head / (std::max(6u, _source->getFieldLength()) - 1); + } +} + +feature_t +Metrics::getMatch() const +{ + feature_t proximityCompletenessImportance = _source->getParams().getProximityCompletenessImportance(); + feature_t earlinessImportance = _source->getParams().getEarlinessImportance(); + feature_t relatednessImportance = _source->getParams().getRelatednessImportance(); + feature_t segmentProximityImportance = _source->getParams().getSegmentProximityImportance(); + feature_t occurrenceImportance = _source->getParams().getOccurrenceImportance(); + + feature_t scaledRelatedness = 1 - relatednessImportance + relatednessImportance * getRelatedness(); + + return + (proximityCompletenessImportance * scaledRelatedness * getProximity() * getCompleteness()*getCompleteness() + + earlinessImportance * getEarliness() + + segmentProximityImportance * getSegmentProximity() + + occurrenceImportance * getOccurrence()) / + (proximityCompletenessImportance + earlinessImportance + segmentProximityImportance + occurrenceImportance); +} + +feature_t +Metrics::getSegmentationScore() const +{ + feature_t retval = 0.0f; + if (_segments > 0) { + retval = getAbsoluteProximity() / (_segments * _segments); + } + return retval; +} + +void +Metrics::onMatch(uint32_t i) +{ + if (_matches >= _source->getFieldLength()) { + return; + } + _matches++; + _weight += _source->getTotalTermWeight() > 0 ? + (feature_t)_source->getQueryTermData(i).termData()->getWeight().percent() / _source->getTotalTermWeight() : 0.0f; + _significance += _source->getTotalTermSignificance() > 0.0f ? + _source->getQueryTermData(i).significance() / _source->getTotalTermSignificance() : 0.0f; +} + +void +Metrics::onSequenceStart(uint32_t j) +{ + if (_head == -1 || (int)j < _head) { + _head = j; + } + _currentSequence = 1; +} + +void +Metrics::onSequenceEnd(uint32_t j) +{ + int sequenceTail = _source->getFieldLength() - j - 1; + if (_tail == -1 || sequenceTail < _tail) { + _tail = sequenceTail; + } + if (_currentSequence > _longestSequence) { + _longestSequence = _currentSequence; + } + _currentSequence = 0; +} + +void +Metrics::onComplete() +{ + if (_segmentStarts.size() <= 1) { + _segmentDistance = 0; + } + else { + std::sort(_segmentStarts.begin(), _segmentStarts.end()); + for (uint32_t i = 1; i < _segmentStarts.size(); i++) { + _segmentDistance += _segmentStarts[i] - _segmentStarts[i - 1] + 1; + } + } + if (_head == -1) { + _head = 0; + } + if (_tail == -1) { + _tail = 0; + } +} + +void +Metrics::onPair(uint32_t i, uint32_t j, uint32_t previousJ) +{ + int distance = j - previousJ - 1; + if (distance < 0) { + distance++; // discontinuity if two letters are in the same position + } + if (((unsigned int)std::abs(distance)) > _source->getParams().getProximityLimit()) { + return; // no contribution + } + feature_t pairProximity = _source->getParams().getProximityTable()[distance + + _source->getParams().getProximityLimit()]; + _unweightedProximity += pairProximity; + + feature_t connectedness = _source->getQueryTermData(i).connectedness(); + _proximity += pow(pairProximity, connectedness / 0.1) * std::max(0.1, connectedness); + _pairs++; +} + +void +Metrics::onInSequence(uint32_t, uint32_t, uint32_t) +{ + _currentSequence++; +} + +void +Metrics::onInSegmentGap(uint32_t, uint32_t j, uint32_t previousJ) +{ + _gaps++; + if (j > previousJ) { + _gapLength += abs((int)j - (int)previousJ) - 1; // gap length may be 0 if the gap was in the query + } + else { + _outOfOrder++; + _gapLength += abs((int)j - (int)previousJ); + } +} + +void +Metrics::onNewSegment(uint32_t, uint32_t j, uint32_t) +{ + _segments++; + _segmentStarts.push_back(j); +} + +vespalib::string +Metrics::toString() const +{ + return vespalib::make_string("Metrics(match %f)", getMatch()); +} + + +} // fieldmatch +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h new file mode 100644 index 00000000000..6b826f09e57 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/metrics.h @@ -0,0 +1,563 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace features { +namespace fieldmatch { + +class Computer; + +/** + * The collection of metrics calculated by the string match metric calculator. + * + * @author Jon Bratseth + * @author Simon Thoresen + * @version $Id$ + */ +class Metrics { +public: + /** + * Convenience typedefs. + */ + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + +public: + /** + * Constructs a new metrics object. + * + * @param source The source of this. + */ + Metrics(const Computer *source); + + /** + * Implements the copy constructor. + * + * @param rhs The metrics to copy. + */ + Metrics(const Metrics &rhs); + + /** + * Implements the assignment operator. + */ + Metrics & operator=(const Metrics & rhs); + + /** + * Resets this object. + */ + void reset(); + + /** + * Are these metrics representing a complete match. + * + * @return Whether or not this represents a complete match. + */ + bool isComplete() const { + return _complete; + } + + /** + * Sets whether or not these metrics represent a complete match. + * + * @param complete Whether or not this represents a complete match. + * @return This, to allow chaining. + */ + Metrics &setComplete(bool complete) { + _complete = complete; + return *this; + } + + /** + * Returns the segment start points. + * + * @return The start point list. + */ + std::vector &getSegmentStarts() { + return _segmentStarts; + } + + /** + * Returns the total number of out of order token sequences within field segments. + * + * @return The number of tokens. + */ + uint32_t getOutOfOrder() const { + return _outOfOrder; + } + + /** + * Returns the number of field text segments which are needed to match the query as completely as possible. + * + * @return The number of segments. + */ + uint32_t getSegments() const { + return _segments; + } + + /** + * Returns the total number of position jumps (backward or forward) within document segments. + * + * @return The number of position jumps. + */ + uint32_t getGaps() const { + return _gaps; + } + + /** + * Returns the summed size of all gaps within segments. + * + * @return The summed size. + */ + uint32_t getGapLength() const { + return _gapLength; + } + + /** + * Returns the size of the longest matched continuous, in-order sequence in the document. + * + * @return The size of the sequence. + */ + uint32_t getLongestSequence() const { + return _longestSequence; + } + + /** + * Returns the number of tokens in the field preceding the start of the first matched segment. + * + * @return The number of tokens. + */ + int getHead() const { + return _head; + } + + /** + * Returns the number of tokens in the field following the end of the last matched segment. + * + * @return The number of tokens. + */ + int getTail() const { + return _tail; + } + + /** + * Returns the number of query terms which was matched in this field. + * + * @return The number of matched terms. + */ + uint32_t getMatches() const { + return _matches; + } + + /** + * Returns the number of in-segment token pairs. + * + * @return The number of token pairs. + */ + uint32_t getPairs() const { + return _pairs; + } + + /** + * Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. This + * number is 0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they are + * following in sequence and have a high connectedness, and close to 0 if they are far from each other in the + * segment or out of order. + * + * @return The proximity. + */ + feature_t getAbsoluteProximity() const { + return _pairs < 1 ? 0.1f : _proximity / _pairs; + } + + /** + * Returns the normalized proximity of the matched terms, not taking term connectedness into account. This number + * is close to 1 if all the matched terms are following each other in sequence, and close to 0 if they are far from + * each other or out of order + * + * @return The proximity. + */ + feature_t getUnweightedProximity() const { + return _pairs < 1 ? 1.0f : _unweightedProximity / _pairs; + } + + /** + * Returns the sum of the distance between all segments making up a match to the query, measured as the sum of the + * number of token positions separating the start of each field adjacent segment. + * + * @return The sum distance. + */ + feature_t getSegmentDistance() const { + return _segmentDistance; + } + + /** + *

Returns the normalized weight of this match relative to the whole query: The sum of the weights of all + * matched terms/the sum of the weights of all query terms If all the query terms were matched, this + * is 1. If no terms were matched, or these matches has weight zero, this is 0.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank + * features for each field multiplied by this number for the same field will produce a normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ * + * @return The normalized weight. + */ + feature_t getWeight() const { + return _weight; + } + + /** + *

Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query: + * The sum of the significance of all matched terms/the sum of the significance of all query terms If + * all the query terms were matched, this is 1. If no terms were matched, or if the significance of all the matched + * terms is zero (they are present in all (possible) documents), this number is zero.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank + * features for each field multiplied by this number for the same field will produce a normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ * + * @return The normalized significance. + */ + feature_t getSignificance() const { + return _significance; + } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query. This number is 1 if there + * are many occurences of the query terms in absolute terms, or relative to the total content of the field, + * and 0 if there are none.

+ * + *

This is suitable for occurence in fields containing regular text.

+ * + * @return The normalized number of occurences. + */ + feature_t getOccurrence() const { + return _occurrence; + } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query: + * + * sum over all query terms(min(number of occurences of the term, maxOccurrences)) / (query term count * + * 100) + * + *

This number is 1 if there are many occurrences of the query terms, and 0 if there are none. This number does + * not take the actual length of the field into account, so it is suitable for uses of occurrence to denote + * importance across multiple terms.

+ * + * @return The normalized number of occurences. + */ + feature_t getAbsoluteOccurrence() const { + return _absoluteOccurrence; + } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight. + * This number is close to 1 if there are many occurrences of highly weighted query terms, in absolute terms, or + * relative to the total content of the field, and 0 if there are none.

+ * + * @return The normalized measure of weighted occurences. + */ + feature_t getWeightedOccurrence() const { + return _weightedOccurrence; + } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query, taking weights into + * account so that occurrences of higher weighted query terms has more impact than lower weighted terms.

+ * + *

This number is 1 if there are many occurrences of the highly weighted terms, and 0 if there are none. This + * number does not take the actual length of the field into account, so it is suitable for uses of occurrence to + * denote importance across multiple terms.

+ * + * @return The normalized measure of weighted occurences. + */ + feature_t getWeightedAbsoluteOccurrence() const { + return _weightedAbsoluteOccurrence; + } + + /** + *

Returns a normalized measure of the number of occurrence of the terms of the query in absolute terms, or + * relative to the total content of the field, weighted by term significance. + * + *

This number is 1 if there are many occurrences of the highly significant terms, and 0 if there are none.

+ * + * @return The normalized measure of occurences, weighted by significance. + */ + feature_t getSignificantOccurrence() const { + return _significantOccurrence; + } + + /** + * The ratio of query tokens which was matched in the field: matches/queryLength. + * + * @return The query completeness. + */ + feature_t getQueryCompleteness() const; + + /** + * The ratio of query tokens which was matched in the field: matches/fieldLength. + * + * @return The field completeness. + */ + feature_t getFieldCompleteness() const; + + /** + * Total completeness, where field completeness is more important: queryCompleteness * ( 1 - + * fieldCompletenessImportancy + fieldCompletenessImportancy * fieldCompleteness ) + * + * @return The total completeness. + */ + feature_t getCompleteness() const; + + /** + * Returns how well the order of the terms agreed in segments: 1-outOfOrder/pairs. + * + * @return The orderness of terms. + */ + feature_t getOrderness() const { + return _pairs < 1 ? 1.0f : 1 - (feature_t)_outOfOrder / _pairs; + } + + /** + * Returns the degree to which different terms are related (occurring in the same segment): + * 1-segments/(matches-1). + * + * @return The relatedness of terms. + */ + feature_t getRelatedness() const; + + /** + * Returns longestSequence/matches + * + * @return The longest sequence ratio. + */ + feature_t getLongestSequenceRatio() const { + return _matches == 0 ? 0.0f : (feature_t)_longestSequence / _matches; + } + + /** + * Returns the closeness of the segments in the field: 1-segmentDistance/fieldLength. + * + * @return The segment proximity. + */ + feature_t getSegmentProximity() const; + + /** + * Returns a value which is close to 1 when matched terms are close and close to zero when they are far apart in the + * segment. Relatively more connected terms influence this value more. This is absoluteProximity/average + * connectedness. + * + * @return The matched term proximity. + */ + feature_t getProximity() const; + + /** + *

Returns the average of significance and weight.

+ * + *

As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank + * features for each field multiplied by this number for the same field will produce a normalized number.

+ * + *

Note that this scales with the number of matched query terms in the field. If you want a component which does + * not, divide by matches.

+ * + * @return The importance. + */ + feature_t getImportance() const { + return (getSignificance() + getWeight()) / 2; + } + + /** + * A normalized measure of how early the first segment occurs in this field: + * 1-(head+1)/max(6,field.length). + * + * @return The earliness of the first segment. + */ + feature_t getEarliness() const; + + /** + *

A ready-to-use aggregate match score. Use this if you don't have time to find a better application specific + * aggregate score of the fine grained match metrics.

+ * + *

The current forumla is + * + * ( proximityCompletenessImportance * (1-relatednessImportance + relatednessImportance*relatedness) + * proximity * completeness^2 + earlinessImportance * earliness + segmentProximityImportance * segmentProximity ) / + * (proximityCompletenessImportance + earlinessImportance + relatednessImportance) + * + * but this is subject to change (i.e improvement) at any time.

+ * + *

Weight and significance are not taken into account because this is mean to capture tha quality of the match in + * this field, while those measures relate this match to matches in other fields. This number can be multiplied with + * those values when combining with other field match scores.

+ * + * @return The match score. + */ + feature_t getMatch() const; + + /** + *

The metric use to select the best segments during execution of the string match metric algoritm.

+ * + *

This metric, and any metric it dependends on, must be correct each time a segment is completed, not only when + * the metrics are complete, because this metric is used to choose segments during calculation.

+ * + * @return The score of the segmentation. + */ + feature_t getSegmentationScore() const; + + /** + * Called once for every match. + * + * @param i The index of the matched query term. + */ + void onMatch(uint32_t i); + + + /** + * Called once per sequence, when the sequence starts. + * + * @param j Sequence starts at this position. + */ + void onSequenceStart(uint32_t j); + + /** + * Called once per sequence when the sequence ends. + * + * @param j Sequence ends at this position. + */ + void onSequenceEnd(uint32_t j) ; + + /** + * Called once when this value is calculated, before onComplete. + * + * @param occurence The new occurence value. + */ + void setOccurrence(feature_t occurrence) { + _occurrence = occurrence; + } + + /** + * Called once when this value is calculated, before onComplete. + * + * @param weightedOccurence The new occurence weight. + */ + void setWeightedOccurrence(feature_t weightedOccurrence) { + _weightedOccurrence = weightedOccurrence; + } + + /** + * Called once when this value is calculated, before onComplete. + * + * @param absoluteOccurence The new absolute occurence value. + */ + void setAbsoluteOccurrence(feature_t absoluteOccurrence) { + _absoluteOccurrence = absoluteOccurrence; + } + + /** + * Called once when this value is calculated, before onComplete. + * + * @param weightedAbsoluteOccurence The new absolute occurence weight. + */ + void setWeightedAbsoluteOccurrence(feature_t weightedAbsoluteOccurrence) { + _weightedAbsoluteOccurrence = weightedAbsoluteOccurrence; + } + + /** + * Called once when this value is calculated, before onComplete. + * + * @param significantOccurence The new significant occurence value. + */ + void setSignificantOccurrence(feature_t significantOccurrence) { + _significantOccurrence = significantOccurrence; + } + + /** + * Called once when matching is complete. + */ + void onComplete(); + + /** + * Called when any pair is encountered. + * + * @param i The query term matched. + * @param j The field term index. + * @param previousJ The end of the previous segment, or -1 if this is the first segment. + */ + void onPair(uint32_t i, uint32_t j, uint32_t previousJ); + + /** + * Called when an in-sequence pair is encountered. + * + * @param i The query term matched. + * @param j The field term index. + * @param previousJ The end of the previous segment, or -1 if this is the first segment. + */ + void onInSequence(uint32_t i, uint32_t j, uint32_t previousJ); + + /** + * Called when a gap (within a sequence) is encountered. + * + * @param i The query term matched. + * @param j The field term index. + * @param previousJ The end of the previous segment, or -1 if this is the first segment. + */ + void onInSegmentGap(uint32_t i, uint32_t j, uint32_t previousJ); + + /** + * Called when a new segment is started + * + * @param i The query term matched. + * @param j The field term index. + * @param previousJ The end of the previous segment, or -1 if this is the first segment. + * */ + void onNewSegment(uint32_t i, uint32_t j, uint32_t previousJ); + + /** + * Returns a string representation of this. + * + * @return A string representation. + */ + vespalib::string toString() const; + +private: + const Computer *_source; + bool _complete; + + // Metrics + uint32_t _outOfOrder; + uint32_t _segments; + uint32_t _gaps; + uint32_t _gapLength; + uint32_t _longestSequence; + int _head; + int _tail; + uint32_t _matches; + feature_t _proximity; + feature_t _unweightedProximity; + feature_t _segmentDistance; + uint32_t _pairs; + feature_t _weight; + feature_t _significance; + feature_t _occurrence; + feature_t _weightedOccurrence; + feature_t _absoluteOccurrence; + feature_t _weightedAbsoluteOccurrence; + feature_t _significantOccurrence; + + // Temporary variables + uint32_t _currentSequence; + std::vector _segmentStarts; + uint32_t _queryLength; // num terms searching this field +}; + +} // fieldmatch +} // features +} // search + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp new file mode 100644 index 00000000000..34a23bb3642 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/params.cpp @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldmatch.params"); + +#include "params.h" + +namespace search { +namespace features { +namespace fieldmatch { + +Params::Params() : + _proximityLimit(10), + _maxAlternativeSegmentations(1000), + _maxOccurrences(100), + _proximityCompletenessImportance(0.9f), + _relatednessImportance(0.9f), + _earlinessImportance(0.05f), + _segmentProximityImportance(0.05f), + _occurrenceImportance(0.05f), + _fieldCompletenessImportance(0.05f), + _proximityTable() +{ + feature_t table[] = { 0.01f, 0.02f, 0.03f, 0.04f, 0.06f, 0.08f, 0.12f, 0.17f, 0.24f, 0.33f, 1, + 0.71f, 0.50f, 0.35f, 0.25f, 0.18f, 0.13f, 0.09f, 0.06f, 0.04f, 0.03f }; + for (uint32_t i = 0; i < _proximityLimit * 2 + 1; ++i) { + _proximityTable.push_back(table[i]); + } +} + +bool +Params::valid() +{ + if (_proximityTable.size() != (_proximityLimit * 2 + 1)) { + LOG(error, "Proximity table length is invalid. Proximity limit is %d, but table has only %zd elements " + "(must be proximityLimit * 2 + 1).", + _proximityLimit, _proximityTable.size()); + return false; + } + return true; +} + +} +} +} diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/params.h b/searchlib/src/vespa/searchlib/features/fieldmatch/params.h new file mode 100644 index 00000000000..f3ff7558971 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/params.h @@ -0,0 +1,261 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace features { +namespace fieldmatch { + +/** + * The parameters to a string match metric calculator. + * + * @author Jon Bratseth + * @author Simon Thoresen + * @version $Id$ + */ +class Params { +public: + /** + * Creates a marcg metrics object initialized to the default values. + */ + Params(); + + /** + * Returns whether or not this parameter object contains valid content. If it is NOT valid, a descriptive string + * will be logged for reference. + * + * @return Whether or not this object is valid. + */ + bool valid(); + + /** + * Sets the number of tokens within which proximity matters. Default: 10 + * + * @param proximityLimit The number of tokens. + * @param This, to allow chaining. + */ + Params &setProximityLimit(uint32_t proximityLimit) { + _proximityLimit = proximityLimit; + return *this; + } + + /** + * Returns the number of tokens within which proximity matters. Default: 10 + * + * @return The number of tokens. + */ + uint32_t getProximityLimit() const { + return _proximityLimit; + } + + /** + * Sets the proximity table deciding the importance of separations of various distances, The table must have size + * proximityLimit*2+1, where the first half is for reverse direction distances. The table must only contain values + * between 0 and 1, where 1 is "perfect" and 0 is "worst". + * + * @param proximityTable The proximity table. + * @return This, to allow chaining. + */ + Params &setProximityTable(const std::vector &proximityTable) { + _proximityTable = proximityTable; + return *this; + } + + /** + * Returns the current proxmity table. The default table is calculated by 1/2^(n/2) on the right order + * side, and 1/2^(n/2) /3 on the reverse order side where n is the distance between the tokens. + * + * @return The proximity table. + */ + const std::vector &getProximityTable() const { + return _proximityTable; + } + + /** + * Returns the maximal number of alternative segmentations allowed in addition to the first one found. + * Default is 10000. This will prefer to not consider iterations on segments that are far out in the field, and + * which starts late in the query. + * + * @return The max number of alternative iterations. + */ + uint32_t getMaxAlternativeSegmentations() const { + return _maxAlternativeSegmentations; + } + + /** + * Sets the maximal number of alternative segmentations allowed in addition to the first one found. + * + * @param maxAlternativeSegmentations The max number of alternative iterations. + * @return This, to allow chaining. + */ + Params &setMaxAlternativeSegmentations(uint32_t maxAlternativeSegmentations) { + _maxAlternativeSegmentations = maxAlternativeSegmentations; + return *this; + } + + /** + * Returns the number of occurrences each word is normalized against. This should be set as the number above which + * additional occurrences of the term has no real significance. The default is 100. + * + * @return The max number of occurences. + */ + uint32_t getMaxOccurrences() const { + return _maxOccurrences; + } + + /** + * Sets the number occurences each word is normalized against. + * + * @params maxOccurences The max number of occurences. + * @return This, to allow chaining. + */ + Params &setMaxOccurrences(uint32_t maxOccurrences) { + _maxOccurrences = maxOccurrences; + return *this; + } + + /** + * Returns a number between 0 and 1 which determines the importance of field completeness in relation to query + * completeness in the match and completeness metrics. Default is 0.05 + * + * @return The importance of field completeness. + */ + feature_t getFieldCompletenessImportance() const { + return _fieldCompletenessImportance; + } + + /** + * Sets the importance of this field's completeness. + * + * @param fieldCompletenessImportance The importance of field completeness. + * @return This, to allow chaining. + */ + Params &setFieldCompletenessImportance(feature_t fieldCompletenessImportance) { + _fieldCompletenessImportance = fieldCompletenessImportance; + return *this; + } + + /** + * Returns the importance of the match having high proximity and being complete, relative to + * segmentProximityImportance, occurrenceImportance and earlinessImportance in the match + * metric. Default: 0.9 + * + * @return The importance of proximity AND completeness. + */ + feature_t getProximityCompletenessImportance() const { + return _proximityCompletenessImportance; + } + + /** + * Sets the importance of this fiel's proximity AND completeness. + * + * @param proximityCompletenessImportance The importance of proximity AND completeness. + * @return This, to allow chaining. + */ + Params &setProximityCompletenessImportance(feature_t proximityCompletenessImportance) { + _proximityCompletenessImportance = proximityCompletenessImportance; + return *this; + } + + /** + * Returns the importance of the match occuring early in the query, relative to segmentProximityImportance, + * occurrenceImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + * + * @return The importance of earliness. + */ + feature_t getEarlinessImportance() const { + return _earlinessImportance; + } + + /** + * Sets the importance of the match occuring early in the query. + * + * @param earlinessImportance The importance of earliness. + * @return This, to allow chaining. + */ + Params &setEarlinessImportance(feature_t earlinessImportance) { + _earlinessImportance = earlinessImportance; + return *this; + } + + /** + * Returns the importance of multiple segments being close to each other, relative to earlinessImportance, + * occurrenceImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + * + * @return The importance of segment proximity. + */ + feature_t getSegmentProximityImportance() const { + return _segmentProximityImportance; + } + + /** + * Sets the importance of multiple segments being close to each other. + * + * @param segmentProximityImportance The importance of segment proximity. + * @return This, to allow chaining. + */ + Params &setSegmentProximityImportance(feature_t segmentProximityImportance) { + _segmentProximityImportance = segmentProximityImportance; + return *this; + } + + /** + * Returns the importance of having many occurrences of the query terms, relative to earlinessImportance, + * segmentProximityImportance and proximityCompletenessImportance in the match metric. Default: 0.05 + * + * @return The importance of many occurences. + */ + feature_t getOccurrenceImportance() const { + return _occurrenceImportance; + } + + /** + * Sets the importance of having many occurences of the query terms. + * + * @param occurenceImportance The importance of many occurences. + * @return This, to allow chaining. + */ + Params &setOccurrenceImportance(feature_t occurrenceImportance) { + _occurrenceImportance = occurrenceImportance; + return *this; + } + + /** + * Returns the normalized importance of relatedness used in the match metric. Default: 0.9 + * + * @return The importance of relatedness. + */ + feature_t getRelatednessImportance() const { + return _relatednessImportance; + } + + /** + * Sets the normalized importance of relatedness used in the match metric. + * + * @param relatednessImportance The importance of relatedness. + * @return This, to allow chaining. + */ + Params &setRelatednessImportance(feature_t relatednessImportance) { + _relatednessImportance = relatednessImportance; + return *this; + } + +private: + uint32_t _proximityLimit; + uint32_t _maxAlternativeSegmentations; + uint32_t _maxOccurrences; + feature_t _proximityCompletenessImportance; + feature_t _relatednessImportance; + feature_t _earlinessImportance; + feature_t _segmentProximityImportance; + feature_t _occurrenceImportance; + feature_t _fieldCompletenessImportance; + std::vector _proximityTable; +}; + +} // fieldmatch +} // features +} // search + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp new file mode 100644 index 00000000000..93be549bc1f --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.cpp @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldmatch.segmentstart"); + +#include +#include "computer.h" +#include "metrics.h" +#include "segmentstart.h" + +namespace search { +namespace features { +namespace fieldmatch { + +SegmentStart::SegmentStart(Computer *owner, const Metrics & metrics, uint32_t previousJ, uint32_t i, uint32_t j) : + _owner(owner), + _metrics(metrics), + _i(i), + _skipI(0), + _previousJ(previousJ), + _semanticDistanceExplored(0), + _open(true) +{ + if (j < std::numeric_limits::max()) { + exploredTo(j); + } +} + +void +SegmentStart::reset(const Metrics & metrics, uint32_t previousJ, uint32_t i, uint32_t j) +{ + _metrics = metrics; + _i = i; + _skipI = 0; + _previousJ = previousJ; + _semanticDistanceExplored = 0; + _open = true; + if (j < std::numeric_limits::max()) { + exploredTo(j); + } +} + +SegmentStart & +SegmentStart::exploredTo(uint32_t j) +{ + _semanticDistanceExplored = _owner->fieldIndexToSemanticDistance(j, _previousJ) + 1; + return *this; +} + +bool +SegmentStart::offerHistory(int previousJ, const Metrics & metrics) +{ + if (metrics.getSegmentationScore() <= _metrics.getSegmentationScore()) { + if (_owner->isTracing()) { + _owner->trace(vespalib::make_string(" Rejected offered history [score %f, ending at %d] at %s.\n", + metrics.getSegmentationScore(), + previousJ, + toString().c_str())); + } + return false; // reject + } + +#if 0 + // Starting over like this achieves higher correctness if the match metric is dependent on relative distance between + // segments but is more expensive + if (_previousJ != previousJ) { + semanticDistanceExplored = 0; + open = true; + } +#endif + + if (_owner->isTracing()) { + _owner->trace(vespalib::make_string(" Accepted offered history [score %f, ending at %d] at %s.\n", + metrics.getSegmentationScore(), + previousJ, + toString().c_str())); + } + _previousJ = previousJ; + _metrics = metrics; // take a copy of the given metrics + return true; // accept +} + +vespalib::string +SegmentStart::toString() { + if (_i == _owner->getNumQueryTerms()) { + return vespalib::make_string("Last segment: Complete match %f, previous j %d (%s).", + _metrics.getMatch(), + _previousJ, + _open ? "open" : "closed"); + } + else { + return vespalib::make_string("Segment at %d: Match %f, previous j %d, explored to %d (%s).", + _i, + _metrics.getMatch(), + _previousJ, + _semanticDistanceExplored, + _open ? "open" : "closed"); + } +} + +} // fieldmatch +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h new file mode 100644 index 00000000000..8865b45a448 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/segmentstart.h @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include "metrics.h" + +namespace search { +namespace features { +namespace fieldmatch { + +/** + *

Information on segment start points stored temporarily during string match metric calculation.

+ * + *

Given that we want to start a segment at i, this holdes the best known metrics up to i and the end of the previous + * segment. In addition it holds information on how far we have tried to look for alternative segments from this + * starting point (skipI and previousJ).

+ * + * @author Jon Bratseth + * @author Simon Thoresen + * @version $Id$ + */ +class SegmentStart { +public: + /** + * Convenience typedefs. + */ + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + +public: + /** + * Creates a segment start point for any i position where the j is not known. + * + * @param owner The computar that pwns th1s. + * @param metrics The best known metric. + * @param previousJ The previous j. + * @param i The start position. + * @param j The end position. + */ + SegmentStart(Computer *owner, const Metrics & metrics, + uint32_t previousJ = 0, uint32_t i = 0, + uint32_t j = std::numeric_limits::max()); + + /** + * Resets this object. + * + * @param metrics The best known metric. + * @param previousJ The previous j. + * @param i The start position. + * @param j The end position. + */ + void reset(const Metrics & metrics, uint32_t previousJ = 0, uint32_t i = 0, + uint32_t j = std::numeric_limits::max()); + + /** + * Returns the current best metrics for this starting point + * + * @return The metrics. + */ + const Metrics & getMetrics() const { + return _metrics; + } + + /** + * Stores that we have explored to a certain j from the current previousJ. + * + * @param j The new position we have explored to. + * @return This, to allow chaining. + */ + SegmentStart &exploredTo(uint32_t j); + + /** + * Offers an alternative history leading up to this point, which is accepted and stored if it is better than the + * current history + * + * @param previousJ The previous j offered. + * @param metrics The offered metrics. + * @return Whether or not the new history was accepted. + */ + bool offerHistory(int previousJ, const Metrics & metrics); + + /** + * Returns whether there are still unexplored j's for this i. + * + * @return Whether or not there are unexplored j's. + */ + bool isOpen() const { + return _open; + } + + /** + * Sets whether there are still unexplored j's for this i. + * + * @param open Whehter or not there are unexplored j's. + * @return This, to allow chaining. + */ + SegmentStart &setOpen(bool open) { + _open = open; + return *this; + } + + /** + * Returns the i for which this is the possible segment starting points. + * + * @return The i value. + */ + uint32_t getI() const { + return _i; + } + + /** + * Returns the j ending the previous segmentation producing those best metrics. + * + * @return The previous j value. + */ + uint32_t getPreviousJ() const { + return _previousJ; + } + + /** + * Returns the semantic distance from the previous j which is explored so far, exclusive + * (meaning, if the value is 0, 0 is not explored yet) + * + * @return The distance explored. + */ + uint32_t getSemanticDistanceExplored() const { + return _semanticDistanceExplored; + } + + /** + * Sets the semantic distance from the previous j which is explored so far, exclusive. + * + * @param distance The distance explored. + * @return This, to allow chaining. + */ + SegmentStart &setSemanticDistanceExplored(uint32_t distance) { + _semanticDistanceExplored = distance; + return *this; + } + + /** + * Returns the position startI we should start at from this start point i. startI==i except when there are i's from + * this starting point which are not found anywhere in the field. In that case, startI==i+the number of terms + * following i which are known not to be present. + * + * @return The start i value. + */ + uint32_t getStartI() const { + return _i + _skipI; + } + + /** + * Increments the startI by one because we have discovered that the term at the current startI is not present in the + * field. + * + * @return This, to allow chaining. + */ + SegmentStart &incrementStartI() { + _skipI++; + return *this; + } + + /** + * Returns a string representation of this. + * + * @return A string representation. + */ + vespalib::string toString(); + +private: + Computer *_owner; + Metrics _metrics; // The best known metrics up to this starting point. + + uint32_t _i; // The i for which this is the possible segment starting points. + uint32_t _skipI; + uint32_t _previousJ; // The j ending the previous segmentation producing those best metrics. + uint32_t _semanticDistanceExplored; // The semantic distance from the current previousJ which is already explored. + bool _open; // There are possibly more j's to try at this starting point. +}; + +} // fieldmatch +} // features +} // search + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp new file mode 100644 index 00000000000..6a6e1935ad3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.cpp @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldmatch.simplemetrics"); + +#include "simplemetrics.h" +#include + +namespace search { +namespace features { +namespace fieldmatch { + +SimpleMetrics::SimpleMetrics(const Params & params) : + _params(params), + _matches(0), + _matchesWithPosOcc(0), + _matchWithInvalidFieldLength(false), + _numTerms(0), + _matchedWeight(0), + _totalWeightInField(0), + _totalWeightInQuery(0) +{ +} + +vespalib::string SimpleMetrics::toString() const +{ + vespalib::asciistream ss; + ss << "matches(" << _matches << "), matchedWithPosOcc(" << _matchesWithPosOcc << "), "; + ss << "matchWithInvalidFieldLength(" << (_matchWithInvalidFieldLength ? "true" : "false") << "), "; + ss << "numTerms(" << _numTerms << "), "; + ss << "matchedWeight(" << _matchedWeight << "), totalWeightInField(" << _totalWeightInField << "), "; + ss << "totalWeightInQuery(" << _totalWeightInQuery << ")"; + return ss.str(); +} + + +} // fieldmatch +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h new file mode 100644 index 00000000000..b2de310f080 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatch/simplemetrics.h @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include "params.h" + +namespace search { +namespace features { +namespace fieldmatch { + +/** + * The collection of simple metrics calculated when traversing the query terms of the query environment. + **/ +class SimpleMetrics { +private: + const Params & _params; + uint32_t _matches; + uint32_t _matchesWithPosOcc; + bool _matchWithInvalidFieldLength; // 0 or UNKNOWN_LENGTH + uint32_t _numTerms; + uint32_t _matchedWeight; + uint32_t _totalWeightInField; + uint32_t _totalWeightInQuery; + +public: + /** + * Constructs a new object. + **/ + SimpleMetrics(const Params & params); + + /** + * Resets the match data of this object. + **/ + void resetMatchData() { + _matches = 0; + _matchesWithPosOcc = 0; + _matchWithInvalidFieldLength = false; + _matchedWeight = 0; + } + + /** + * Registers a match in the field in question. + * + * @param weight The weight of the term matching. + **/ + void addMatch(uint32_t weight) { + ++_matches; + _matchedWeight += weight; + } + + /** + * Registers a match in the field in question. + * We have position information for this term match. + * + * @param weight The weight of the term matching. + **/ + void addMatchWithPosOcc(uint32_t weight) { + addMatch(weight); + ++_matchesWithPosOcc; + } + + /** + * Registers that a match has invalid field length. + **/ + void hasMatchWithInvalidFieldLength() { + _matchWithInvalidFieldLength = true; + } + + + /** + * Registers a term that is searching in the field in question. + * + * @param weight The weight of the term. + **/ + void addSearchedTerm(uint32_t weight) { + ++_numTerms; + _totalWeightInField += weight; + } + + /** + * Registers a query term with the given weight. + * + * @param weight The weight of the term. + **/ + void addQueryTerm(uint32_t weight) { + _totalWeightInQuery += weight; + } + + /** + * Overrides the total weight for all query terms. + * + * @param weight The total weight. + **/ + void setTotalWeightInQuery(uint32_t weight) { + _totalWeightInQuery = weight; + } + + /** + * Returns the normalized score for this object. + * total weight of matched terms in the field / total weight of searched terms in the field + * + * @return The score. + **/ + feature_t getScore() const { + return _totalWeightInField > 0 ? _matchedWeight / static_cast(_totalWeightInField) : 0; + } + + /** + * Returns the completeness score for this object. + * queryCompleteness * ( 1 - fieldCompletenessImportance ) + * + * @return The completeness. + **/ + feature_t getCompleteness() const { + return getQueryCompleteness() * (1 - _params.getFieldCompletenessImportance()); + } + + /** + * Returns the query completeness score for this object. + * total number of matched terms in the field / total number of searched terms in the field + * + * @return The query completeness. + **/ + feature_t getQueryCompleteness() const { + return _numTerms > 0 ? _matches / static_cast(_numTerms) : 0; + } + + /** + * Returns the weight score for this object. + * total weight of matched terms in the field / total weight of all query terms + * + * @return The weight. + **/ + feature_t getWeight() const { + return _totalWeightInQuery > 0 ? _matchedWeight / static_cast(_totalWeightInQuery) : 0; + } + + /** + * Returns the number of matches in the field in question. + * + * @return The number of matches. + **/ + uint32_t getMatches() const { + return _matches; + } + + /** + * Returns the number of matches in the field in question with position information. + * + * @return The number of matches with position information. + **/ + uint32_t getMatchesWithPosOcc() const { + return _matchesWithPosOcc; + } + + /** + * Returns the number of degraded matches (no position information) in the field in question. + * + * @return The number of degraded matches. + **/ + uint32_t getDegradedMatches() const { + return getMatches() - getMatchesWithPosOcc(); + } + + /** + * Returns whether we have a match in the field in question with invalid field length. + * + * @return Whether we have seen an invalid field length. + **/ + bool getMatchWithInvalidFieldLength() const { + return _matchWithInvalidFieldLength; + } + + /** + * Returns a string representation of this object. + * + * @return String representation. + **/ + vespalib::string toString() const; +}; + +} // fieldmatch +} // features +} // search + diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp new file mode 100644 index 00000000000..cec68d0c367 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.cpp @@ -0,0 +1,311 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.fieldmatchfeature"); +#include "fieldmatchfeature.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; + +namespace search { +namespace features { + +FieldMatchExecutor::FieldMatchExecutor(const IQueryEnvironment & queryEnv, + const FieldInfo & field, + const fieldmatch::Params & params) : + FeatureExecutor(), + _splitter(queryEnv, field.id()), + _field(field), + _params(params), + _cmp(vespalib::make_string("fieldMatch(%s)", _field.name().c_str()), + _splitter, field, params) +{ + // empty +} + +void +FieldMatchExecutor::execute(search::fef::MatchData & match) +{ + //LOG(info, "execute for field '%s' and docId(%u)", _field.name().c_str(), match.getDocId()); + + _splitter.update(match); + _cmp.reset(match); + //_cmp.setTracing(true); + + const fieldmatch::SimpleMetrics & simple = _cmp.getSimpleMetrics(); + + // only run the computer if we have at least one match with position information + // and that the matches with position information have valid field lengths + bool runCmp = (simple.getMatches() > 0 && + simple.getMatchesWithPosOcc() > 0 && + !simple.getMatchWithInvalidFieldLength()); + + //LOG(info, "runCmp(%s), simpleMetrics(%s)", runCmp ? "true" : "false", simple.toString().c_str()); + + if (runCmp) { + _cmp.run(); + } + + const fieldmatch::Metrics & result = _cmp.getFinalMetrics(); + + *match.resolveFeature(outputs()[0]) = runCmp ? result.getMatch() : 0; // score + *match.resolveFeature(outputs()[1]) = runCmp ? result.getProximity() : 0; // proximity + *match.resolveFeature(outputs()[2]) = runCmp ? result.getCompleteness() : simple.getCompleteness(); // completeness + *match.resolveFeature(outputs()[3]) = runCmp ? result.getQueryCompleteness() : simple.getQueryCompleteness(); // queryCompleteness + *match.resolveFeature(outputs()[4]) = result.getFieldCompleteness(); // fieldCompleteness + *match.resolveFeature(outputs()[5]) = runCmp ? result.getOrderness() : 0; // orderness + *match.resolveFeature(outputs()[6]) = result.getRelatedness(); // relatedness + *match.resolveFeature(outputs()[7]) = result.getEarliness(); // earliness + *match.resolveFeature(outputs()[8]) = result.getLongestSequenceRatio(); // longestSequenceRatio + *match.resolveFeature(outputs()[9]) = result.getSegmentProximity(); // segmentProximity + *match.resolveFeature(outputs()[10]) = runCmp ? result.getUnweightedProximity() : 0; // unweightedProximity + *match.resolveFeature(outputs()[11]) = runCmp ? result.getAbsoluteProximity() : 0; // absoluteProximity + *match.resolveFeature(outputs()[12]) = result.getOccurrence(); // occurrence + *match.resolveFeature(outputs()[13]) = result.getAbsoluteOccurrence(); // absoluteOccurence + *match.resolveFeature(outputs()[14]) = result.getWeightedOccurrence(); // weightedOccurence + *match.resolveFeature(outputs()[15]) = result.getWeightedAbsoluteOccurrence(); // weightedAbsoluteOccurence + *match.resolveFeature(outputs()[16]) = result.getSignificantOccurrence(); // significantOccurence + + *match.resolveFeature(outputs()[17]) = runCmp ? result.getWeight() : simple.getWeight(); // weight + *match.resolveFeature(outputs()[18]) = result.getSignificance(); // significance + *match.resolveFeature(outputs()[19]) = result.getImportance(); // importance + + *match.resolveFeature(outputs()[20]) = result.getSegments(); // segments + *match.resolveFeature(outputs()[21]) = runCmp ? result.getMatches() : simple.getMatches(); // matches + *match.resolveFeature(outputs()[22]) = result.getOutOfOrder(); // outOfOrder + *match.resolveFeature(outputs()[23]) = result.getGaps(); // gaps + *match.resolveFeature(outputs()[24]) = result.getGapLength(); // gapLength + *match.resolveFeature(outputs()[25]) = runCmp ? result.getLongestSequence() : 0; // longestSequence + *match.resolveFeature(outputs()[26]) = runCmp ? result.getHead() : 0; // head + *match.resolveFeature(outputs()[27]) = runCmp ? result.getTail() : 0; // tail + *match.resolveFeature(outputs()[28]) = result.getSegmentDistance(); // segmentDistance + *match.resolveFeature(outputs()[29]) = simple.getDegradedMatches(); // degradedMatches +} + + +FieldMatchBlueprint::FieldMatchBlueprint() : + Blueprint("fieldMatch"), + _field(NULL), + _params() +{ + // empty +} + +void +FieldMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo * field = env.getField(i); + if (field->type() == search::fef::FieldType::INDEX && + field->collection() == search::fef::CollectionType::SINGLE) + { + FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field->name()); + if (field->isFilter()) { + visitor.visitDumpFeature(fnb.buildName()); + visitor.visitDumpFeature(fnb.output("completeness").buildName()); + visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName()); + visitor.visitDumpFeature(fnb.output("weight").buildName()); + visitor.visitDumpFeature(fnb.output("matches").buildName()); + visitor.visitDumpFeature(fnb.output("degradedMatches").buildName()); + } else { + visitor.visitDumpFeature(fnb.buildName()); + visitor.visitDumpFeature(fnb.output("proximity").buildName()); + visitor.visitDumpFeature(fnb.output("completeness").buildName()); + visitor.visitDumpFeature(fnb.output("queryCompleteness").buildName()); + visitor.visitDumpFeature(fnb.output("fieldCompleteness").buildName()); + visitor.visitDumpFeature(fnb.output("orderness").buildName()); + visitor.visitDumpFeature(fnb.output("relatedness").buildName()); + visitor.visitDumpFeature(fnb.output("earliness").buildName()); + visitor.visitDumpFeature(fnb.output("longestSequenceRatio").buildName()); + visitor.visitDumpFeature(fnb.output("segmentProximity").buildName()); + visitor.visitDumpFeature(fnb.output("unweightedProximity").buildName()); + visitor.visitDumpFeature(fnb.output("absoluteProximity").buildName()); + visitor.visitDumpFeature(fnb.output("occurrence").buildName()); + visitor.visitDumpFeature(fnb.output("absoluteOccurrence").buildName()); + visitor.visitDumpFeature(fnb.output("weightedOccurrence").buildName()); + visitor.visitDumpFeature(fnb.output("weightedAbsoluteOccurrence").buildName()); + visitor.visitDumpFeature(fnb.output("significantOccurrence").buildName()); + visitor.visitDumpFeature(fnb.output("weight").buildName()); + visitor.visitDumpFeature(fnb.output("significance").buildName()); + visitor.visitDumpFeature(fnb.output("importance").buildName()); + visitor.visitDumpFeature(fnb.output("segments").buildName()); + visitor.visitDumpFeature(fnb.output("matches").buildName()); + visitor.visitDumpFeature(fnb.output("outOfOrder").buildName()); + visitor.visitDumpFeature(fnb.output("gaps").buildName()); + visitor.visitDumpFeature(fnb.output("gapLength").buildName()); + visitor.visitDumpFeature(fnb.output("longestSequence").buildName()); + visitor.visitDumpFeature(fnb.output("head").buildName()); + visitor.visitDumpFeature(fnb.output("tail").buildName()); + visitor.visitDumpFeature(fnb.output("segmentDistance").buildName()); + visitor.visitDumpFeature(fnb.output("degradedMatches").buildName()); + } + } + } +} + +Blueprint::UP +FieldMatchBlueprint::createInstance() const +{ + return Blueprint::UP(new FieldMatchBlueprint()); +} + +bool +FieldMatchBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _field = params[0].asField(); + + const Properties & lst = env.getProperties(); + Property obj; + obj = lst.lookup(getName(), "proximityLimit"); + if (obj.found()) { + _params.setProximityLimit(atoi(obj.get().c_str())); + } + obj = lst.lookup(getName(), "maxAlternativeSegmentations"); + if (obj.found()) { + _params.setMaxAlternativeSegmentations(atoi(obj.get().c_str())); + } + obj = lst.lookup(getName(), "maxOccurrences"); + if (obj.found()) { + _params.setMaxOccurrences(atoi(obj.get().c_str())); + } + obj = lst.lookup(getName(), "proximityCompletenessImportance"); + if (obj.found()) { + _params.setProximityCompletenessImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "relatednessImportance"); + if (obj.found()) { + _params.setRelatednessImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "earlinessImportance"); + if (obj.found()) { + _params.setEarlinessImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "segmentProximityImportance"); + if (obj.found()) { + _params.setSegmentProximityImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "occurrenceImportance"); + if (obj.found()) { + _params.setOccurrenceImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "fieldCompletenessImportance"); + if (obj.found()) { + _params.setFieldCompletenessImportance(atof(obj.get().c_str())); + } + obj = lst.lookup(getName(), "proximityTable"); + if (obj.found()) { + std::vector table; + for (uint32_t i = 0; i < obj.size(); ++i) { + table.push_back(atof(obj.getAt(i).c_str())); + } + _params.setProximityTable(table); + } + if (!_params.valid()) { + return false; + } + + // normalized + describeOutput("score", + "A normalized measure of the degree to which this query and field matched (default, the long name of this is match). Use " + "this if you don't want to create your own combination function of more fine grained fieldmatch features."); + describeOutput("proximity", + "Normalized proximity - a value which is close to 1 when matched terms are close inside each segment, and close to zero " + "when they are far apart inside segments. Relatively more connected terms influence this value more. This is " + "absoluteProximity/average connectedness for the query terms for this field."); + describeOutput("completeness", + "The normalized total completeness, where field completeness is more important."); + describeOutput("queryCompleteness", + "The normalized ratio of query tokens matched in the field."); + describeOutput("fieldCompleteness", + "The normalized ratio of query tokens which was matched in the field."); + describeOutput("orderness", + "A normalized metric of how well the order of the terms agrees in the chosen segments."); + describeOutput("relatedness", + "A normalized measure of the degree to which different terms are related (occurring in the same segment)."); + describeOutput("earliness", + "A normalized measure of how early the first segment occurs in this field."); + describeOutput("longestSequenceRatio", + "A normalized metric of the relative size of the longest sequence."); + describeOutput("segmentProximity", + "A normalized metric of the closeness (inverse of spread) of segments in the field."); + describeOutput("unweightedProximity", + "The normalized proximity of the matched terms, not taking term connectedness into account. This number is close to 1 if " + "all the matched terms are following each other in sequence, and close to 0 if they are far from each other or out of " + "order."); + describeOutput("absoluteProximity", + "Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. This number is " + "0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they are following in sequence " + "and have a high connectedness, and close to 0 if they are far from each other in the segments or out of order."); + describeOutput("occurrence", + "Returns a normalized measure of the number of occurrence of the terms of the query. This number is 1 if there are many " + " occurrences of the query terms in absolute terms, or relative to the total content of the field, and 0 if there are " + "none."); + describeOutput("absoluteOccurrence", + "Returns a normalized measure of the number of occurrence of the terms of the query."); + describeOutput("weightedOccurrence", + "Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight. This number " + "is close to 1 if there are many occurrences of highly weighted query terms, in absolute terms, or relative to the total " + "content of the field, and 0 if there are none."); + describeOutput("weightedAbsoluteOccurrence", + "Returns a normalized measure of the number of occurrence of the terms of the query, taking weights into account so that " + "occurrences of higher weighted query terms has more impact than lower weighted terms."); + describeOutput("significantOccurrence", + "Returns a normalized measure of the number of occurrence of the terms of the query in absolute terms, or relative to the " + "total content of the field, weighted by term significance."); + + // normalized and relative to the whole query + describeOutput("weight", + "The normalized weight of this match relative to the whole query."); + describeOutput("significance", + "Returns the normalized term significance (1-frequency) of the terms of this match relative to the whole query."); + describeOutput("importance", + "Returns the average of significance and weight. This has the same properties as those metrics."); + + // not normalized + describeOutput("segments", + "The number of field text segments which are needed to match the query as completely as possible."); + describeOutput("matches", + "The number of query terms which was matched in this field."); + describeOutput("outOfOrder", + "The total number of out of order token sequences within matched field segments."); + describeOutput("gaps", + "The total number of position jumps (backward or forward) within field segments."); + describeOutput("gapLength", + "The summed length of all gaps within segments."); + describeOutput("longestSequence", + "The size of the longest matched continuous, in-order sequence in the field."); + describeOutput("head", + "The number of tokens in the field preceeding the start of the first matched segment."); + describeOutput("tail", + "The number of tokens in the field following the end of the last matched segment."); + describeOutput("segmentDistance", + "The sum of the distance between all segments making up a match to the query, measured as the sum of the number of token " + "positions separating the start of each field adjacent segment."); + describeOutput("degradedMatches", + "The number of degraded query terms (no position information available) which was matched in this field."); + env.hintFieldAccess(_field->id()); + return true; +} + +FeatureExecutor::LP +FieldMatchBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + return FeatureExecutor::LP(new FieldMatchExecutor(env, *_field, _params)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h new file mode 100644 index 00000000000..5aaa75cda2d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldmatchfeature.h @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for THE field match feature. + */ +class FieldMatchExecutor : public search::fef::FeatureExecutor { +private: + search::fef::PhraseSplitter _splitter; + const search::fef::FieldInfo & _field; + const fieldmatch::Params & _params; + fieldmatch::Computer _cmp; + +public: + /** + * Constructs an executor. + */ + FieldMatchExecutor(const search::fef::IQueryEnvironment & queryEnv, + const search::fef::FieldInfo & field, + const fieldmatch::Params & params); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for THE field match feature. + */ +class FieldMatchBlueprint : public search::fef::Blueprint { +private: + const search::fef::FieldInfo * _field; + fieldmatch::Params _params; + +public: + /** + * Constructs a blueprint. + */ + FieldMatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp new file mode 100644 index 00000000000..7ce785e0c78 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.cpp @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.fieldtermmatchfeature"); + +#include +#include +#include +#include +#include +#include +#include "fieldtermmatchfeature.h" +#include "utils.h" + +namespace search { +namespace features { + +FieldTermMatchExecutor::FieldTermMatchExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId, uint32_t termId) : + search::fef::FeatureExecutor(), + _fieldHandle(util::getTermFieldHandle(env, termId, fieldId)) +{ +} + +void +FieldTermMatchExecutor::execute(search::fef::MatchData &match) +{ + if (_fieldHandle == search::fef::IllegalHandle) { + *match.resolveFeature(outputs()[0]) = 1000000; // firstPosition + *match.resolveFeature(outputs()[1]) = 1000000; // lastPosition + *match.resolveFeature(outputs()[2]) = 0.0f; // occurrences + *match.resolveFeature(outputs()[3]) = 0.0f; // sum weight + *match.resolveFeature(outputs()[4]) = 0.0f; // avg exactness + return; + } + + search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(_fieldHandle); + uint32_t firstPosition = 1000000; + uint32_t lastPosition = 1000000; + uint32_t occurrences = 0; + double sumExactness = 0; + int64_t weight = 0; + if (tfmd.getDocId() == match.getDocId()) { + search::fef::FieldPositionsIterator it = tfmd.getIterator(); + if (it.valid()) { + lastPosition = 0; + while (it.valid()) { + firstPosition = std::min(firstPosition, it.getPosition()); + lastPosition = std::max(lastPosition, it.getPosition()); + ++occurrences; + weight += it.getElementWeight(); + sumExactness += it.getMatchExactness(); + it.next(); + } + } else { + lastPosition = 1000000; + occurrences = 1; + } + } + *match.resolveFeature(outputs()[0]) = firstPosition; + *match.resolveFeature(outputs()[1]) = lastPosition; + *match.resolveFeature(outputs()[2]) = occurrences; + *match.resolveFeature(outputs()[3]) = weight; + *match.resolveFeature(outputs()[4]) = (occurrences > 0) ? (sumExactness / occurrences) : 0; +} + +FieldTermMatchBlueprint::FieldTermMatchBlueprint() : + search::fef::Blueprint("fieldTermMatch"), + _fieldId(0), + _termId(0) +{ + // empty +} + +void +FieldTermMatchBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + const search::fef::Properties &props = env.getProperties(); + const vespalib::string &baseName = getBaseName(); + int baseNumTerms = atoi(props.lookup(baseName, "numTerms").get("5").c_str()); + + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo& field = *env.getField(i); + if (field.type() == search::fef::FieldType::INDEX) { + const vespalib::string &fieldName = field.name(); + const search::fef::Property &prop = props.lookup(baseName, "numTerms", fieldName); + int numTerms = prop.found() ? atoi(prop.get().c_str()) : baseNumTerms; + for (int term = 0; term < numTerms; ++term) { + search::fef::FeatureNameBuilder fnb; + fnb.baseName(baseName) + .parameter(fieldName) + .parameter(vespalib::make_string("%d", term)); + visitor.visitDumpFeature(fnb.output("firstPosition").buildName()); + visitor.visitDumpFeature(fnb.output("occurrences").buildName()); + visitor.visitDumpFeature(fnb.output("weight").buildName()); + } + } + } +} + +bool +FieldTermMatchBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _fieldId = params[0].asField()->id(); + _termId = params[1].asInteger(); + describeOutput("firstPosition", "The first occurrence of this term."); + describeOutput("lastPosition", "The last occurrence of this term."); + describeOutput("occurrences", "The number of occurrence of this term."); + describeOutput("weight", "The sum occurence weights of this term."); + describeOutput("exactness", "The average exactness this term."); + env.hintFieldAccess(_fieldId); + return true; +} + +search::fef::Blueprint::UP +FieldTermMatchBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new FieldTermMatchBlueprint()); +} + +search::fef::FeatureExecutor::LP +FieldTermMatchBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new FieldTermMatchExecutor(env, _fieldId, _termId)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h new file mode 100644 index 00000000000..be5ef27ef5e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/fieldtermmatchfeature.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for term feature. + */ +class FieldTermMatchExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs an executor for term feature. + * + * @param env The query environment. + * @param fieldId The field to match to. + * @param termId The term to match. + */ + FieldTermMatchExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId, uint32_t termId); + virtual void execute(search::fef::MatchData &data); + +private: + search::fef::TermFieldHandle _fieldHandle; +}; + +/** + * Implements the blueprint for term feature. + */ +class FieldTermMatchBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a blueprint for term feature. + */ + FieldTermMatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + +private: + uint32_t _fieldId; + uint32_t _termId; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp b/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp new file mode 100644 index 00000000000..e3c7f7f5332 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/firstphasefeature.cpp @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.firstphasefeature"); +#include "firstphasefeature.h" + +#include +#include +#include + +using namespace search::fef; + +namespace search { +namespace features { + +void +FirstPhaseExecutor::execute(search::fef::MatchData & match) +{ + *match.resolveFeature(outputs()[0]) = *match.resolveFeature(inputs()[0]); +} + + +FirstPhaseBlueprint::FirstPhaseBlueprint() : + Blueprint("firstPhase") +{ + // empty +} + +void +FirstPhaseBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor & visitor) const +{ + // havardpe: dumping this is a really bad idea + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +FirstPhaseBlueprint::createInstance() const +{ + return Blueprint::UP(new FirstPhaseBlueprint()); +} + +bool +FirstPhaseBlueprint::setup(const IIndexEnvironment & env, + const ParameterList &) +{ + describeOutput("score", "The ranking score for first phase.", + defineInput(indexproperties::rank::FirstPhase::lookup(env.getProperties()), + AcceptInput::ANY)); + return true; +} + +FeatureExecutor::LP +FirstPhaseBlueprint::createExecutor(const IQueryEnvironment &) const +{ + return FeatureExecutor::LP(new FirstPhaseExecutor()); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/firstphasefeature.h b/searchlib/src/vespa/searchlib/features/firstphasefeature.h new file mode 100644 index 00000000000..67deea23984 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/firstphasefeature.h @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace features { + +/** + * Implements the executor outputting the first phase ranking. + */ +class FirstPhaseExecutor : public search::fef::FeatureExecutor { +public: + virtual bool isPure() { return true; } + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the first phase feature. + */ +class FirstPhaseBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a blueprint. + */ + FirstPhaseBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp new file mode 100644 index 00000000000..b33e367f4f0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.cpp @@ -0,0 +1,309 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.flowcompleteness"); +#include "flow_completeness_feature.h" +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +FlowCompletenessExecutor::FlowCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const FlowCompletenessParams ¶ms) + : _params(params), + _terms(), + _queue(), + _sumTermWeight(0) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + LOG(spam, "consider term %u", i); + const search::fef::ITermData *termData = env.getTerm(i); + LOG(spam, "term %u weight %u", i, termData->getWeight().percent()); + if (termData->getWeight().percent() != 0) { // only consider query terms with contribution + typedef search::fef::ITermFieldRangeAdapter FRA; + uint32_t j = 0; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const search::fef::ITermFieldData &tfd = iter.get(); + LOG(spam, "term %u field data %u for field id %u (my field id %u)", + i, j++, tfd.getFieldId(), _params.fieldId); + if (tfd.getFieldId() == _params.fieldId) { + int termWeight = termData->getWeight().percent(); + _sumTermWeight += termWeight; + _terms.push_back(Term(tfd.getHandle(), termWeight)); + } + } + } + } + LOG(spam, "added %zu terms", _terms.size()); +} + +typedef std::vector TermIdxList; +typedef std::vector PosList; + +typedef vespalib::hash_map TermIdxMap; + +struct State { + int elementWeight; + uint32_t elementLength; + uint32_t matchedTerms; + int sumTermWeight; + + std::vector positionsForTerm; + uint32_t posLimit; + PosList matchedPosForTerm; + TermIdxMap matchedTermForPos; // maps pos -> term + + double score; + double flow; + feature_t completeness; + feature_t fieldCompleteness; + feature_t queryCompleteness; + + State(int weight, uint32_t length) + : elementWeight(weight), elementLength(length), + matchedTerms(0), sumTermWeight(0), + posLimit(0), + score(0.0), flow(0.0), + completeness(0.0), fieldCompleteness(0.0), queryCompleteness(0.0) {} + + void addMatch(int termWeight) { + ++matchedTerms; + sumTermWeight += termWeight; + } + + struct Path { + std::vector path; + bool operator< (const Path& other) const { + return path.size() < other.path.size(); + } + }; + + Path bfs(vespalib::PriorityQueue &queue) + { + TermIdxList seen(matchedTerms, 0); + while (!queue.empty()) { + Path firstP = queue.front(); + queue.pop_front(); + uint32_t startTerm = firstP.path.back(); + seen[startTerm] = 1; + PosList &edges = positionsForTerm[startTerm]; + for (size_t j = 0; j < edges.size(); ++j) { + Path nextP = firstP; + uint32_t pos = edges[j]; + nextP.path.push_back(pos); + TermIdxMap::const_iterator it = matchedTermForPos.find(pos); + if (it == matchedTermForPos.end()) { + return nextP; + } else { + uint32_t nextTerm = it->second; + if (seen[nextTerm] == 0) { + seen[nextTerm] = 1; + nextP.path.push_back(nextTerm); + queue.push(nextP); + } + } + } + } + return Path(); + } + + int findMatches() { + vespalib::PriorityQueue q; + + for (size_t i = 0; i < matchedTerms; ++i) { + if (matchedPosForTerm[i] == IllegalPosId) { + Path p; + p.path.push_back(i); + q.push(p); + } + } + if (q.empty()) { + return 0; + } + Path p = bfs(q); + if (p.path.size() == 0) { + return 0; + } + while (p.path.size() > 1) { + uint32_t pos = p.path.back(); + assert(pos < posLimit); + p.path.pop_back(); + uint32_t tix = p.path.back(); + assert(tix < matchedTerms); + p.path.pop_back(); + matchedTermForPos[pos] = tix; + matchedPosForTerm[tix] = pos; + } + assert(p.path.size() == 0); + return 1; + } + + int findSimpleMatches() { + int found = 0; + for (size_t tix = 0; tix < matchedTerms; ++tix) { + assert(matchedPosForTerm[tix] == IllegalPosId); + assert(positionsForTerm[tix].size() > 0); + uint32_t pos = positionsForTerm[tix][0]; + assert(pos < posLimit); + + TermIdxMap::const_iterator it = matchedTermForPos.find(pos); + if (it == matchedTermForPos.end()) { + ++found; + matchedTermForPos[pos] = tix; + matchedPosForTerm[tix] = pos; + } + } + return found; + } + + void calculateScore(uint32_t queryTerms, double factor) { + matchedPosForTerm.resize(matchedTerms, IllegalPosId); + int more = findSimpleMatches(); + flow += more; + while ((more = findMatches()) > 0) { + flow += more; + } + queryCompleteness = (flow / (double)queryTerms); + fieldCompleteness = (flow / (double)elementLength); + completeness = (fieldCompleteness * factor) + + (queryCompleteness * (1 - factor)); + score = completeness * (double)sumTermWeight; + } +}; + + +void +FlowCompletenessExecutor::execute(search::fef::MatchData &data) +{ + assert(_queue.empty()); + for (size_t i = 0; i < _terms.size(); ++i) { + search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_terms[i].termHandle); + Item item(i, tfmd->begin(), tfmd->end()); + LOG(spam, "found tfmd item with %zu positions", (item.end - item.pos)); + if (item.pos != item.end) { + _queue.push(item); + } + } + State best(0, 0); + while (!_queue.empty()) { + Item &start = _queue.front(); + uint32_t elementId = start.elemId; + LOG_ASSERT(start.pos != start.end); + State state(start.pos->getElementWeight(), start.pos->getElementLen()); + + while (!_queue.empty() && _queue.front().elemId == elementId) { + Item &item = _queue.front(); + + // update state + state.positionsForTerm.push_back(PosList()); + while (item.pos != item.end && item.pos->getElementId() == elementId) { + uint32_t pos = item.pos->getPosition(); + state.positionsForTerm.back().push_back(pos); + state.posLimit = std::max(state.posLimit, pos + 1); + ++item.pos; + } + state.addMatch(_terms[item.termIdx].termWeight); + + // adjust item and its place in queue + if (item.pos == item.end) { + _queue.pop_front(); + } else { + item.elemId = item.pos->getElementId(); + _queue.adjust(); + } + } + state.calculateScore(_terms.size(), _params.fieldCompletenessImportance); + if (state.score > best.score) { + best = state; + } + } + *data.resolveFeature(outputs()[0]) = best.completeness; + *data.resolveFeature(outputs()[1]) = best.fieldCompleteness; + *data.resolveFeature(outputs()[2]) = best.queryCompleteness; + *data.resolveFeature(outputs()[3]) = best.elementWeight; + *data.resolveFeature(outputs()[4]) = _params.fieldWeight; + *data.resolveFeature(outputs()[5]) = best.flow; + +} + +//----------------------------------------------------------------------------- + +FlowCompletenessBlueprint::FlowCompletenessBlueprint() + : Blueprint("flowCompleteness"), + _output(), + _params() +{ + _output.push_back("completeness"); + _output.push_back("fieldCompleteness"); + _output.push_back("queryCompleteness"); + _output.push_back("elementWeight"); + _output.push_back("weight"); + _output.push_back("flow"); +} + +void +FlowCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ +#ifdef notyet + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo &field = *env.getField(i); + if (field.type() == search::fef::FieldType::INDEX) { + if (!field.isFilter()) { + search::fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field.name()); + for (size_t out = 0; out < _output.size(); ++out) { + visitor.visitDumpFeature(fnb.output(_output[out]).buildName()); + } + } + } + } +#else + (void)env; + (void)visitor; +#endif +} + +search::fef::Blueprint::UP +FlowCompletenessBlueprint::createInstance() const +{ + return Blueprint::UP(new FlowCompletenessBlueprint()); +} + +bool +FlowCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + const search::fef::FieldInfo *field = params[0].asField(); + + _params.fieldId = field->id(); + const search::fef::Properties &lst = env.getProperties(); + search::fef::Property obj = lst.lookup(getName(), "fieldCompletenessImportance"); + if (obj.found()) { + _params.fieldCompletenessImportance = atof(obj.get().c_str()); + } + _params.fieldWeight = search::fef::indexproperties::FieldWeight::lookup(lst, field->name()); + + describeOutput(_output[0], "combined completeness for best scored element"); + describeOutput(_output[1], "best scored element completeness"); + describeOutput(_output[2], "query completeness for best scored element"); + describeOutput(_output[3], "element weight of best scored element"); + describeOutput(_output[4], "field weight"); + describeOutput(_output[5], "query terms matching in best element (measured by flow)"); + env.hintFieldAccess(field->id()); + return true; +} + +search::fef::FeatureExecutor::LP +FlowCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new FlowCompletenessExecutor(env, _params)); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h new file mode 100644 index 00000000000..07ff6f55884 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/flow_completeness_feature.h @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +struct FlowCompletenessParams { + uint32_t fieldId; + feature_t fieldWeight; + feature_t fieldCompletenessImportance; + FlowCompletenessParams() + : fieldId(search::fef::IllegalFieldId), + fieldWeight(0), + fieldCompletenessImportance(0.5) {} +}; + +//----------------------------------------------------------------------------- + +const uint32_t IllegalElementId = 0xffffffff; +const uint32_t IllegalTermId = 0xffffffff; +const uint32_t IllegalPosId = 0xffffffff; + +class FlowCompletenessExecutor : public search::fef::FeatureExecutor +{ +private: + struct Term { + search::fef::TermFieldHandle termHandle; + int termWeight; + Term(search::fef::TermFieldHandle handle, int weight) + : termHandle(handle), termWeight(weight) {} + }; + + struct Item { + uint32_t elemId; + uint32_t termIdx; + search::fef::TermFieldMatchData::PositionsIterator pos; + search::fef::TermFieldMatchData::PositionsIterator end; + + Item(uint32_t idx, + search::fef::TermFieldMatchData::PositionsIterator p, + search::fef::TermFieldMatchData::PositionsIterator e) + : elemId(IllegalElementId), termIdx(idx), pos(p), end(e) + { + if (p != e) elemId = p->getElementId(); + } + + bool operator< (const Item &other) const { + return (elemId < other.elemId); + } + }; + + FlowCompletenessParams _params; + std::vector _terms; + vespalib::PriorityQueue _queue; + int _sumTermWeight; + + static bool nextElement(Item &item); + +public: + FlowCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const FlowCompletenessParams ¶ms); + virtual bool isPure() { return _terms.empty(); } + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class FlowCompletenessBlueprint : public search::fef::Blueprint +{ +private: + std::vector _output; + FlowCompletenessParams _params; + +public: + FlowCompletenessBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/foreachfeature.cpp b/searchlib/src/vespa/searchlib/features/foreachfeature.cpp new file mode 100644 index 00000000000..4ee92e9177d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/foreachfeature.cpp @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.foreachfeature"); +#include "foreachfeature.h" +#include "utils.h" + +#include +#include +#include +#include +#include + +using namespace search::fef; + +namespace search { +namespace features { + +template +ForeachExecutor::ForeachExecutor(const CO & condition, uint32_t numInputs) : + FeatureExecutor(), + _condition(condition), + _operation(), + _numInputs(numInputs) +{ +} + +template +void +ForeachExecutor::execute(MatchData & match) +{ + _operation.reset(); + for (uint32_t i = 0; i < inputs().size(); ++i) { + feature_t val = *match.resolveFeature(inputs()[i]); + if (_condition.useValue(val)) { + _operation.onValue(val); + } + } + *match.resolveFeature(outputs()[0]) = _operation.getResult(); +} + + +bool +ForeachBlueprint::decideDimension(const vespalib::string & param) +{ + if (param == "terms") { + _dimension = TERMS; + } else if (param == "fields") { + _dimension = FIELDS; + } else if (param == "attributes") { + _dimension = ATTRIBUTES; + } else { + LOG(error, "Expected dimension parameter to be 'terms', 'fields', or 'attributes', but was '%s'", + param.c_str()); + return false; + } + return true; +} + +bool +ForeachBlueprint::decideCondition(const vespalib::string & condition, const vespalib::string & operation) +{ + if (condition == "true") { + return decideOperation(TrueCondition(), operation); + } else if (condition.size() >= 2 && condition[0] == '<') { + return decideOperation(LessThanCondition(util::strToNum(condition.substr(1))), operation); + } else if (condition.size() >= 2 && condition[0] == '>') { + return decideOperation(GreaterThanCondition(util::strToNum(condition.substr(1))), operation); + } else { + LOG(error, "Expected condition parameter to be 'true', 'a', but was '%s'", + condition.c_str()); + return false; + } +} + +template +bool +ForeachBlueprint::decideOperation(CO condition, const vespalib::string & operation) +{ + if (operation == "sum") { + setExecutorCreator(condition); + } else if (operation == "product") { + setExecutorCreator(condition); + } else if (operation == "average") { + setExecutorCreator(condition); + } else if (operation == "max") { + setExecutorCreator(condition); + } else if (operation == "min") { + setExecutorCreator(condition); + } else if (operation == "count") { + setExecutorCreator(condition); + } else { + LOG(error, "Expected operation parameter to be 'sum', 'product', 'average', 'max', 'min', or 'count', but was '%s'", + operation.c_str()); + return false; + } + return true; +} + +template +void +ForeachBlueprint::setExecutorCreator(CO condition) +{ + class ExecutorCreator : public ExecutorCreatorBase { + private: + CO _condition; + public: + ExecutorCreator(CO cond) : _condition(cond) {} + virtual search::fef::FeatureExecutor::LP create(uint32_t numInputs) const { + return search::fef::FeatureExecutor::LP(new ForeachExecutor(_condition, numInputs)); + } + }; + _executorCreator.reset(new ExecutorCreator(condition)); +} + +ForeachBlueprint::ForeachBlueprint() : + Blueprint("foreach"), + _dimension(ILLEGAL), + _executorCreator(), + _num_inputs(0) +{ +} + +void +ForeachBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +bool +ForeachBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + if (!decideDimension(params[0].getValue())) { + return false; + } + if (!decideCondition(params[3].getValue(), params[4].getValue())) { + return false; + } + + const vespalib::string & variable = params[1].getValue(); + const vespalib::string & feature = params[2].getValue(); + + if (_dimension == TERMS) { + uint32_t maxTerms = util::strToNum(env.getProperties().lookup(getBaseName(), "maxTerms").get("16")); + for (uint32_t i = 0; i < maxTerms; ++i) { + defineInput(boost::algorithm::replace_all_copy(feature, variable, vespalib::make_vespa_string("%u", i))); + ++_num_inputs; + } + } else { + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const FieldInfo * info = env.getField(i); + if (info->type() == FieldType::INDEX && _dimension == FIELDS) { + defineInput(boost::algorithm::replace_all_copy(feature, variable, info->name())); + ++_num_inputs; + } else if (info->type() == FieldType::ATTRIBUTE && _dimension == ATTRIBUTES) { + defineInput(boost::algorithm::replace_all_copy(feature, variable, info->name())); + ++_num_inputs; + } + } + } + + describeOutput("value", "The result after iterating over the input feature values using the specified operation"); + + return true; +} + +Blueprint::UP +ForeachBlueprint::createInstance() const +{ + return Blueprint::UP(new ForeachBlueprint()); +} + +FeatureExecutor::LP +ForeachBlueprint::createExecutor(const IQueryEnvironment &) const +{ + if (_executorCreator.get() != NULL) { + return _executorCreator->create(_num_inputs); + } + return FeatureExecutor::LP(NULL); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/foreachfeature.h b/searchlib/src/vespa/searchlib/features/foreachfeature.h new file mode 100644 index 00000000000..6485b579971 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/foreachfeature.h @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + + +namespace search { +namespace features { + +/** + * Implements the executor for the foreach feature. + * Uses a condition and operation template class to perform the computation. + */ +template +class ForeachExecutor : public search::fef::FeatureExecutor { +private: + CO _condition; + OP _operation; + uint32_t _numInputs; + +public: + ForeachExecutor(const CO & condition, uint32_t numInputs); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Base class for condition template class. + **/ +class ConditionBase { +protected: + feature_t _param; +public: + ConditionBase(feature_t param = 0) : _param(param) {} +}; + +/** + * Implements the true condition. + **/ +struct TrueCondition : public ConditionBase { + bool useValue(feature_t val) { (void) val; return true; } +}; + +/** + * Implements the less than condition. + **/ +struct LessThanCondition : public ConditionBase { + LessThanCondition(feature_t param) : ConditionBase(param) {} + bool useValue(feature_t val) { return val < _param; } +}; + +/** + * Implements the greater than condition. + **/ +struct GreaterThanCondition : public ConditionBase { + GreaterThanCondition(feature_t param) : ConditionBase(param) {} + bool useValue(feature_t val) { return val > _param; } +}; + + +/** + * Base class for operation template class. + */ +class OperationBase { +protected: + feature_t _result; +public: + OperationBase() : _result(0) {} + feature_t getResult() const { return _result; } +}; + +/** + * Implements sum operation. + **/ +struct SumOperation : public OperationBase { + void reset() { _result = 0; } + void onValue(feature_t val) { _result += val; } +}; + +/** + * Implements product operation. + **/ +struct ProductOperation : public OperationBase { + void reset() { _result = 1; } + void onValue(feature_t val) { _result *= val; } +}; + +/** + * Implements average operation. + **/ +class AverageOperation : public OperationBase { +private: + uint32_t _numValues; +public: + AverageOperation() : OperationBase(), _numValues(0) {} + void reset() { _result = 0; _numValues = 0; } + void onValue(feature_t val) { _result += val; ++_numValues; } + feature_t getResult() const { return _numValues != 0 ? _result / _numValues : 0; } +}; + +/** + * Implements max operation. + **/ +struct MaxOperation : public OperationBase { + void reset() { _result = -std::numeric_limits::max(); } + void onValue(feature_t val) { _result = std::max(val, _result); } +}; + +/** + * Implements min operation. + **/ +struct MinOperation : public OperationBase { + void reset() { _result = std::numeric_limits::max(); } + void onValue(feature_t val) { _result = std::min(val, _result); } +}; + +/** + * Implements count operation. + **/ +struct CountOperation : public OperationBase { + void reset() { _result = 0; } + void onValue(feature_t val) { (void) val; _result += 1; } +}; + + +/** + * Implements the blueprint for the foreach executor. + */ +class ForeachBlueprint : public search::fef::Blueprint { +private: + enum Dimension { + TERMS, + FIELDS, + ATTRIBUTES, + ILLEGAL + }; + struct ExecutorCreatorBase { + virtual search::fef::FeatureExecutor::LP create(uint32_t numInputs) const = 0; + virtual ~ExecutorCreatorBase() {} + }; + + Dimension _dimension; + std::unique_ptr _executorCreator; + size_t _num_inputs; + + bool decideDimension(const vespalib::string & param); + bool decideCondition(const vespalib::string & condition, const vespalib::string & operation); + template + bool decideOperation(CO condition, const vespalib::string & operation); + template + void setExecutorCreator(CO condition); + +public: + /** + * Constructs a blueprint. + */ + ForeachBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string().string().feature().string().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp b/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp new file mode 100644 index 00000000000..cc6d1c24c50 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/freshnessfeature.cpp @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.freshnessfeature"); +#include +#include "freshnessfeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +FreshnessExecutor::FreshnessExecutor(feature_t maxAge, feature_t scaleAge) : + FeatureExecutor(), + _maxAge(maxAge), + _logCalc(maxAge, scaleAge) +{ +} + +void +FreshnessExecutor::execute(MatchData & match) +{ + feature_t age = *match.resolveFeature(inputs()[0]); + LOG(debug, "Age: %f Maxage: %f res: %f\n", age, _maxAge, (age / _maxAge)); + feature_t freshness = std::max(1 - (age / _maxAge), (feature_t)0); + *match.resolveFeature(outputs()[0]) = freshness; + *match.resolveFeature(outputs()[1]) = _logCalc.get(age); +} + + +FreshnessBlueprint::FreshnessBlueprint() : + Blueprint("freshness"), + _maxAge(3*30*24*60*60), // default value (90 days) + _halfResponse(7*24*60*60), // makes sure freshness.logscale = 0.5 when age is 7 days + _scaleAge(LogarithmCalculator::getScale(_halfResponse, _maxAge)) +{ +} + +void +FreshnessBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +bool +FreshnessBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + // params[0] = attribute name + Property p = env.getProperties().lookup(getName(), "maxAge"); + if (p.found()) { + _maxAge = util::strToNum(p.get()); + } + p = env.getProperties().lookup(getName(), "halfResponse"); + if (p.found()) { + _halfResponse = util::strToNum(p.get()); + } + // sanity checks: + if (_maxAge < 1) { + LOG(warning, "Invalid %s.maxAge = %g, using 1.0", + getName().c_str(), (double)_maxAge); + _maxAge = 1.0; + } + if (_halfResponse < 1) { + LOG(warning, "Invalid %s.halfResponse = %g, using 1.0", + getName().c_str(), (double)_halfResponse); + _halfResponse = 1.0; + } + if (_halfResponse >= _maxAge / 2) { + feature_t newResponse = (_maxAge / 2) - 1; + LOG(warning, "Invalid %s.halfResponse = %g, using %g ((%s.maxAge / 2) - 1)", + getName().c_str(), (double)_halfResponse, (double)newResponse, getName().c_str()); + _halfResponse = newResponse; + } + _scaleAge = LogarithmCalculator::getScale(_halfResponse, _maxAge); + + defineInput("age(" + params[0].getValue() + ")"); + describeOutput("out", "The freshness of the document (linear)"); + describeOutput("logscale", "The freshness of the document (logarithmic shape)"); + + return true; +} + +Blueprint::UP +FreshnessBlueprint::createInstance() const +{ + return Blueprint::UP(new FreshnessBlueprint()); +} + +FeatureExecutor::LP +FreshnessBlueprint::createExecutor(const IQueryEnvironment &) const +{ + return FeatureExecutor::LP(new FreshnessExecutor(_maxAge, _scaleAge)); +} + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/freshnessfeature.h b/searchlib/src/vespa/searchlib/features/freshnessfeature.h new file mode 100644 index 00000000000..979966b48ca --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/freshnessfeature.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "logarithmcalculator.h" + +namespace search { +namespace features { + +/** + * Implements the executor for the freshness feature. + */ +class FreshnessExecutor : public search::fef::FeatureExecutor { +private: + feature_t _maxAge; + LogarithmCalculator _logCalc; + +public: + /** + * Constructs an executor. + */ + FreshnessExecutor(feature_t maxAge, feature_t scaleAge); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the freshness executor. + */ +class FreshnessBlueprint : public search::fef::Blueprint { +private: + feature_t _maxAge; + feature_t _halfResponse; + feature_t _scaleAge; + +public: + /** + * Constructs a blueprint. + */ + FreshnessBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp new file mode 100644 index 00000000000..1fc8203a58e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.cpp @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.item_raw_score_feature"); +#include "item_raw_score_feature.h" +#include "valuefeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +void +ItemRawScoreExecutor::execute(MatchData &data) +{ + feature_t output = 0.0; + for (uint32_t i = 0; i < _handles.size(); ++i) { + const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]); + if (tfmd->getDocId() == data.getDocId()) { + output += tfmd->getRawScore(); + } + } + *data.resolveFeature(outputs()[0]) = output; +} + +//----------------------------------------------------------------------------- + +void +SimpleItemRawScoreExecutor::execute(MatchData &data) +{ + feature_t output = 0.0; + const TermFieldMatchData *tfmd = data.resolveTermField(_handle); + if (tfmd->getDocId() == data.getDocId()) { + output = tfmd->getRawScore(); + } + *data.resolveFeature(outputs()[0]) = output; +} + +//----------------------------------------------------------------------------- + +bool +ItemRawScoreBlueprint::setup(const IIndexEnvironment &, + const ParameterList ¶ms) +{ + _label = params[0].getValue(); + describeOutput("out", "raw score for the given query item"); + return true; +} + +FeatureExecutor::LP +ItemRawScoreBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const +{ + HandleVector handles = resolve(queryEnv, _label); + if (handles.size() == 1) { + return FeatureExecutor::LP(new SimpleItemRawScoreExecutor(handles[0])); + } else if (handles.size() == 0) { + return FeatureExecutor::LP(new SingleZeroValueExecutor()); + } else { + return FeatureExecutor::LP(new ItemRawScoreExecutor(handles)); + } +} + +ItemRawScoreBlueprint::HandleVector +ItemRawScoreBlueprint::resolve(const search::fef::IQueryEnvironment &env, + const vespalib::string &label) +{ + HandleVector handles; + const ITermData *term = util::getTermByLabel(env, label); + for (uint32_t i = 0; (term != 0) && (i < term->numFields()); ++i) { + TermFieldHandle handle = term->field(i).getHandle(); + if (handle != IllegalHandle) { + handles.push_back(handle); + } + } + return handles; +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h new file mode 100644 index 00000000000..10a6c30611d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/item_raw_score_feature.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +class ItemRawScoreExecutor : public search::fef::FeatureExecutor +{ +public: + typedef std::vector HandleVector; +private: + HandleVector _handles; +public: + ItemRawScoreExecutor(HandleVector handles) + : FeatureExecutor(), _handles(handles) {} + virtual void execute(search::fef::MatchData &data); +}; + +class SimpleItemRawScoreExecutor : public search::fef::FeatureExecutor +{ +private: + search::fef::TermFieldHandle _handle; +public: + SimpleItemRawScoreExecutor(search::fef::TermFieldHandle handle) + : FeatureExecutor(), _handle(handle) {} + virtual void execute(search::fef::MatchData &data); +}; + + +//----------------------------------------------------------------------------- + +class ItemRawScoreBlueprint : public search::fef::Blueprint +{ +private: + typedef std::vector HandleVector; + vespalib::string _label; +public: + ItemRawScoreBlueprint() : Blueprint("itemRawScore"), _label() {} + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const {} + virtual search::fef::Blueprint::UP createInstance() const { + return Blueprint::UP(new ItemRawScoreBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const; + + static HandleVector resolve(const search::fef::IQueryEnvironment &env, + const vespalib::string &label); +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp new file mode 100644 index 00000000000..b2d80324bec --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.cpp @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.jarowinklerdistance"); + +#include +#include +#include +#include +#include +#include +#include "jarowinklerdistancefeature.h" +#include "utils.h" +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- +// JaroWinklerDistanceConfig +//----------------------------------------------------------------------------- +JaroWinklerDistanceConfig::JaroWinklerDistanceConfig() : + fieldId(search::fef::IllegalHandle), + fieldBegin(0), + fieldEnd(std::numeric_limits::max()), + boostThreshold(0.7f), + prefixSize(4u) +{ + // empty +} + +//----------------------------------------------------------------------------- +// JaroWinklerDistanceExecutor +//----------------------------------------------------------------------------- +JaroWinklerDistanceExecutor::JaroWinklerDistanceExecutor(const search::fef::IQueryEnvironment &env, + const JaroWinklerDistanceConfig &config) : + search::fef::FeatureExecutor(), + _config(config), + _termFieldHandles(), + _lenHandle(search::fef::IllegalHandle) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + _termFieldHandles.push_back(util::getTermFieldHandle(env, i, config.fieldId)); + } +} + +void +JaroWinklerDistanceExecutor::execute(search::fef::MatchData &match) +{ + // Build a list of field position iterators, one per query term. + std::vector pos; + for (uint32_t term = 0; term < _termFieldHandles.size(); ++term) { + search::fef::FieldPositionsIterator it; // this is not vaild + const search::fef::TermFieldHandle &handle = _termFieldHandles[term]; + if (handle != search::fef::IllegalHandle) { + search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(handle); + if (tfmd.getDocId() == match.getDocId()) { + it = tfmd.getIterator(); + } + } + pos.push_back(it); + } + + // Assign the jaroWinkler distance to this executor's output. + *match.resolveFeature(outputs()[0]) = 1 - jaroWinklerProximity(pos, (uint32_t)*match.resolveFeature(_lenHandle)); +} + +namespace { +uint32_t +matches(const std::vector &termPos, + uint32_t fieldLen, uint32_t *numTransposes) +{ + (*numTransposes) = 0u; + uint32_t ret = 0; + uint32_t halfLen = termPos.size() > fieldLen ? (fieldLen / 2 + 1) : (termPos.size() / 2 + 1); + for (uint32_t i = 0; i < termPos.size(); ++i) { + uint32_t min = i > halfLen ? i - halfLen : 0u; + uint32_t max = std::min(fieldLen, i + halfLen); + for (search::fef::FieldPositionsIterator it = termPos[i]; it.valid() && it.getPosition() <= max; it.next()) { + uint32_t pos = it.getPosition(); + if (pos >= min && pos <= max) { + if (pos != i) { + (*numTransposes)++; + } + ret++; + break; + } + } + } + (*numTransposes) /= 2; + return ret; +} + +uint32_t +prefixMatch(const std::vector &termPos, uint32_t fieldLen, uint32_t maxLen) +{ + uint32_t len = std::min((uint32_t)termPos.size(), std::min(fieldLen, maxLen)); + for (uint32_t i = 0; i < len; ++i) { + if (!termPos[i].valid() || termPos[i].getPosition() != i) { + return i; + } + } + return len; +} + +feature_t +jaroMeasure(const std::vector &termPos, uint32_t fieldLen) +{ + // _P_A_R_A_N_O_I_A_ + if (termPos.empty() || fieldLen == 0) { + return 0.0f; + } + uint32_t numTransposes = 0; + uint32_t numMatches = matches(termPos, fieldLen, &numTransposes); + if (numMatches == 0u) { + return 0.0f; + } + return (((feature_t)numMatches / termPos.size()) + + ((feature_t)numMatches / fieldLen) + + ((feature_t)numMatches - numTransposes) / numMatches) / 3.0f; +} +} // namespace + +feature_t +JaroWinklerDistanceExecutor::jaroWinklerProximity(const std::vector &termPos, uint32_t fieldLen) +{ + feature_t ret = std::min(1.0, std::max(0.0, jaroMeasure(termPos, fieldLen))); + //LOG(debug, "Jaro measure is %f.", ret); + if (ret > _config.boostThreshold) { + ret += 0.1f * prefixMatch(termPos, fieldLen, _config.prefixSize) * (1 - ret); // less boost close to 1 + //LOG(debug, "Applying Winkler boost."); + } + //LOG(debug, "JaroWinkler measure is %f.", ret); + return ret; +} + +//----------------------------------------------------------------------------- +// JaroWinklerDistanceBlueprint +//----------------------------------------------------------------------------- +JaroWinklerDistanceBlueprint::JaroWinklerDistanceBlueprint() : + search::fef::Blueprint("jaroWinklerDistance"), + _config() +{ + // empty +} + +void +JaroWinklerDistanceBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +JaroWinklerDistanceBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _config.fieldId = params[0].asField()->id(); + + vespalib::string boostThreshold = env.getProperties().lookup(getName(), "boostThreshold").getAt(0); + _config.boostThreshold = boostThreshold.empty() ? 0.7f : atof(boostThreshold.c_str()); + + vespalib::string prefixSize = env.getProperties().lookup(getName(), "prefixSize").getAt(0); + _config.prefixSize = prefixSize.empty() ? 4 : atoi(prefixSize.c_str()); + + defineInput(vespalib::make_string("fieldLength(%s)", params[0].getValue().c_str())); + describeOutput("out", "JaroWinklerDistance distance measure."); + env.hintFieldAccess(_config.fieldId); + return true; +} + +search::fef::Blueprint::UP +JaroWinklerDistanceBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new JaroWinklerDistanceBlueprint()); +} + +search::fef::FeatureExecutor::LP +JaroWinklerDistanceBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new JaroWinklerDistanceExecutor(env, _config)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h new file mode 100644 index 00000000000..a287618dd75 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/jarowinklerdistancefeature.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the necessary config to pass from the jaro winkler distance blueprint to the executor. + */ +struct JaroWinklerDistanceConfig { + JaroWinklerDistanceConfig(); + + uint32_t fieldId; // The id of field to process. + uint32_t fieldBegin; // The first field term to evaluate. + uint32_t fieldEnd; // The last field term to evaluate. + feature_t boostThreshold; // The jaro threshold to exceed to apply boost. + uint32_t prefixSize; // The number of characters to use for boost. +}; + +/** + * Implements the executor for the jaro winkler distance calculator. + */ +class JaroWinklerDistanceExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs a new executor for the jaro winkler distance calculator. + * + * @param config The config for this executor. + */ + JaroWinklerDistanceExecutor(const search::fef::IQueryEnvironment &env, + const JaroWinklerDistanceConfig &config); + void inputs_done() override { _lenHandle = inputs()[0]; } + virtual void execute(search::fef::MatchData &data); + +private: + feature_t jaroWinklerProximity(const std::vector &termPos, uint32_t fieldLen); + +private: + const JaroWinklerDistanceConfig &_config; // The config for this executor. + std::vector _termFieldHandles; // The handles of all query terms. + search::fef::FeatureHandle _lenHandle; // Handle to the length input feature. +}; + +/** + * Implements the blueprint for the jaro winkler distance calculator. + */ +class JaroWinklerDistanceBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a new blueprint for the jaro winkler distance calculator. + */ + JaroWinklerDistanceBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + +private: + JaroWinklerDistanceConfig _config; // The config for this blueprint. +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/logarithmcalculator.h b/searchlib/src/vespa/searchlib/features/logarithmcalculator.h new file mode 100644 index 00000000000..4faad71a289 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/logarithmcalculator.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace features { + +/** + * This class is used to calculate a logarithmic-shaped function that goes from 1 to 0. + * The function is: + * logscale(x, m, s) = (x > m ? 0 : (( log(m + s) - log(x + s)) / (log(m + s) - log(s)))), + * where m specifies for which x the function should output 0 (max parameter), + * and s controls the shape of the function (scale parameter). + * + * If you decide a value for x for when the function should output 0.5, + * s can be calculated as -x^2/(2x - m). + **/ +class LogarithmCalculator { +private: + feature_t _m; + feature_t _s; + feature_t _maxLog; + feature_t _minLog; + feature_t _divMult; + +public: + /** + * Creates a calculator for the given values for m (max) and s (scale). + **/ + LogarithmCalculator(feature_t m, feature_t s) : + _m(m), + _s(s), + _maxLog(log(_m + _s)), + _minLog(log(_s)), + _divMult(1.0 / (_maxLog - _minLog)) + { + } + + /** + * Calculate the function for the given x. + **/ + feature_t get(feature_t x) const { + if (x > _m) x = _m; + if (x < 0) x = 0; + return (_maxLog - log(x + _s)) * _divMult; + } + + /** + * Calculate the scale parameter to use if the function should output 0.5 + * for the given x and max parameter. + */ + static feature_t getScale(feature_t x, feature_t m) { + return (x * x) / (m - 2*x); + } +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/matchesfeature.cpp b/searchlib/src/vespa/searchlib/features/matchesfeature.cpp new file mode 100644 index 00000000000..459fe4487af --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/matchesfeature.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.matchesfeature"); +#include +#include "matchesfeature.h" +#include "utils.h" +#include "valuefeature.h" + +using namespace search::fef; + +namespace search { +namespace features { + +MatchesExecutor::MatchesExecutor(uint32_t fieldId, + const search::fef::IQueryEnvironment &env, + uint32_t begin, uint32_t end) + : FeatureExecutor(), + _handles() +{ + for (uint32_t i = begin; i < end; ++i) { + search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId); + if (handle != search::fef::IllegalHandle) { + _handles.push_back(handle); + } + } +} + +void +MatchesExecutor::execute(MatchData &match) +{ + size_t output = 0; + for (uint32_t i = 0; i < _handles.size(); ++i) { + const TermFieldMatchData *tfmd = match.resolveTermField(_handles[i]); + if (tfmd->getDocId() == match.getDocId()) { + output = 1; + break; + } + } + *match.resolveFeature(outputs()[0]) = static_cast(output); +} + + +MatchesBlueprint::MatchesBlueprint() : + Blueprint("matches"), + _field(NULL), + _termIdx(std::numeric_limits::max()) +{ +} + +void +MatchesBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +bool +MatchesBlueprint::setup(const IIndexEnvironment &, + const ParameterList & params) +{ + _field = params[0].asField(); + if (params.size() == 2) { + _termIdx = params[1].asInteger(); + } + describeOutput("out", "Returns 1 if the given field is matched by the query, 0 otherwise"); + return true; +} + +Blueprint::UP +MatchesBlueprint::createInstance() const +{ + return Blueprint::UP(new MatchesBlueprint()); +} + +FeatureExecutor::LP +MatchesBlueprint::createExecutor(const IQueryEnvironment & queryEnv) const +{ + if (_field == 0) { + return search::fef::FeatureExecutor::LP(new ValueExecutor(std::vector(1, 0.0))); + } + if (_termIdx != std::numeric_limits::max()) { + return FeatureExecutor::LP(new MatchesExecutor(_field->id(), queryEnv, _termIdx, _termIdx + 1)); + } else { + return FeatureExecutor::LP(new MatchesExecutor(_field->id(), queryEnv, 0, queryEnv.getNumTerms())); + } +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/matchesfeature.h b/searchlib/src/vespa/searchlib/features/matchesfeature.h new file mode 100644 index 00000000000..9f380ad8ac9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/matchesfeature.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the matches feature for index and + * attribute fields. + */ +class MatchesExecutor : public search::fef::FeatureExecutor +{ +private: + std::vector _handles; + +public: + MatchesExecutor(uint32_t fieldId, + const search::fef::IQueryEnvironment &env, + uint32_t begin, uint32_t end); + virtual void execute(search::fef::MatchData & data); +}; + +/** + * Implements the blueprint for the matches executor. + * + * matches(name) + * - returns 1 if there is an index or attribute with this name which matched the query, 0 otherwise + * matches(name,n) + * - returns 1 if there is an index or attribute with this name which matched with the query term at the given position, 0 otherwise + */ +class MatchesBlueprint : public search::fef::Blueprint +{ +private: + const search::fef::FieldInfo *_field; + uint32_t _termIdx; + +public: + /** + * Constructs a blueprint. + */ + MatchesBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc().field(). + desc().field().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.cpp b/searchlib/src/vespa/searchlib/features/matchfeature.cpp new file mode 100644 index 00000000000..e80d56a2edd --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/matchfeature.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.matchfeature"); +#include "matchfeature.h" + +#include +#include +#include +#include +#include +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +MatchExecutor::MatchExecutor(const MatchParams & params) : + FeatureExecutor(), + _params(params) +{ + // empty +} + +void +MatchExecutor::execute(MatchData & match) +{ + feature_t sum = 0.0f; + feature_t totalWeight = 0.0f; + for (uint32_t i = 0; i < _params.weights.size(); ++i) { + feature_t weight = static_cast(_params.weights[i]); + feature_t matchScore = *match.resolveFeature(inputs()[i]); + if (matchScore > 0.0f) { + totalWeight += weight; + sum += (weight * matchScore); + } + *match.resolveFeature(outputs()[i + 2]) = weight; + } + + *match.resolveFeature(outputs()[0]) = totalWeight > 0.0f ? sum / totalWeight : 0.0f; + *match.resolveFeature(outputs()[1]) = totalWeight; +} + + +MatchBlueprint::MatchBlueprint() : + Blueprint("match"), + _params() +{ + // empty +} + +void +MatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + (void) visitor; +} + +Blueprint::UP +MatchBlueprint::createInstance() const +{ + return Blueprint::UP(new MatchBlueprint()); +} + +bool +MatchBlueprint::setup(const IIndexEnvironment & env, + const ParameterList &) +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const FieldInfo * info = env.getField(i); + if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) { + _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name())); + if (info->type() == FieldType::INDEX) { + if (info->collection() == CollectionType::SINGLE) { + defineInput("fieldMatch(" + info->name() + ")"); + } else { + defineInput("elementCompleteness(" + info->name() + ")"); + } + } else if (info->type() == FieldType::ATTRIBUTE) { + defineInput("attributeMatch(" + info->name() + ")"); + } + } + } + describeOutput("score", "Normalized sum over all matched fields"); + describeOutput("totalWeight", "Sum of rank weights for all matched fields"); + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const FieldInfo * info = env.getField(i); + if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) { + describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'"); + } + } + return true; +} + +FeatureExecutor::LP +MatchBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + (void) env; + return FeatureExecutor::LP(new MatchExecutor(_params)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.h b/searchlib/src/vespa/searchlib/features/matchfeature.h new file mode 100644 index 00000000000..26ecfb85132 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/matchfeature.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +struct MatchParams { + MatchParams() : weights() {} + std::vector weights; +}; + +/** + * Implements the executor for the match feature. + */ +class MatchExecutor : public search::fef::FeatureExecutor { +private: + const MatchParams & _params; + +public: + /** + * Constructs an executor. + */ + MatchExecutor(const MatchParams & params); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the match executor. + */ +class MatchBlueprint : public search::fef::Blueprint { +private: + MatchParams _params; + +public: + /** + * Constructs a blueprint. + */ + MatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp new file mode 100644 index 00000000000..d135ecfdc91 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.cpp @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.native_dot_product_feature"); +#include "native_dot_product_feature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +NativeDotProductExecutor::NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId) + : FeatureExecutor(), + _pairs() +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId); + if (handle != search::fef::IllegalHandle) { + _pairs.push_back(std::make_pair(handle, env.getTerm(i)->getWeight())); + } + } +} + +void +NativeDotProductExecutor::execute(MatchData &data) +{ + feature_t output = 0.0; + for (uint32_t i = 0; i < _pairs.size(); ++i) { + const TermFieldMatchData *tfmd = data.resolveTermField(_pairs[i].first); + if (tfmd->getDocId() == data.getDocId()) { + output += (tfmd->getWeight() * (int32_t)_pairs[i].second.percent()); + } + } + *data.resolveFeature(outputs()[0]) = output; +} + +//----------------------------------------------------------------------------- + +bool +NativeDotProductBlueprint::setup(const IIndexEnvironment &, + const ParameterList ¶ms) +{ + _field = params[0].asField(); + describeOutput("out", "dot product between query term weights and match weights for the given field"); + return true; +} + +FeatureExecutor::LP +NativeDotProductBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const +{ + return FeatureExecutor::LP(new NativeDotProductExecutor(queryEnv, _field->id())); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h new file mode 100644 index 00000000000..addff898298 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/native_dot_product_feature.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +class NativeDotProductExecutor : public search::fef::FeatureExecutor +{ +private: + typedef std::pair Pair; + std::vector _pairs; +public: + NativeDotProductExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId); + virtual void execute(search::fef::MatchData &data); +}; + +//----------------------------------------------------------------------------- + +class NativeDotProductBlueprint : public search::fef::Blueprint +{ +private: + const search::fef::FieldInfo *_field; +public: + NativeDotProductBlueprint() : Blueprint("nativeDotProduct"), _field(0) {} + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const {} + virtual search::fef::Blueprint::UP createInstance() const { + return Blueprint::UP(new NativeDotProductBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp new file mode 100644 index 00000000000..a3e68c2907d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.cpp @@ -0,0 +1,150 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include "valuefeature.h" +#include "nativeattributematchfeature.h" +#include "utils.h" +LOG_SETUP(".features.nativeattributematchfeature"); + +using namespace search::fef; + +namespace search { +namespace features { + +feature_t +NativeAttributeMatchExecutor::calculateScore(const CachedTermData &td, const TermFieldMatchData &tfmd) +{ + return (td.weightBoostTable->get(tfmd.getWeight()) * td.scale); +} + +NativeAttributeMatchExecutor::Precomputed +NativeAttributeMatchExecutor::preComputeSetup(const IQueryEnvironment & env, + const NativeAttributeMatchParams & params) +{ + NativeAttributeMatchExecutor::Precomputed precomputed; + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const ITermData *termData = env.getTerm(i); + if (termData->getWeight().percent() != 0) // only consider query terms with contribution + { + typedef search::fef::ITermFieldRangeAdapter FRA; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const ITermFieldData& tfd = iter.get(); + uint32_t fieldId = tfd.getFieldId(); + if (params.considerField(fieldId)) { // only consider fields with contribution + const NativeAttributeMatchParams::Param & param = params.vector[fieldId]; + precomputed.first.push_back(CachedTermData(params, tfd, + param.fieldWeight * termData->getWeight().percent() / param.maxTableSum)); + precomputed.second += (param.fieldWeight * termData->getWeight().percent()); + } + } + } + } + return precomputed; +} + +FeatureExecutor::LP +NativeAttributeMatchExecutor::createExecutor(const IQueryEnvironment & env, + const NativeAttributeMatchParams & params) +{ + Precomputed setup = preComputeSetup(env, params); + if (setup.first.size() == 0) { + return LP(new ValueExecutor(std::vector(1, 0.0))); + } else if (setup.first.size() == 1) { + return LP(new NativeAttributeMatchExecutorSingle(setup)); + } else { + return LP(new NativeAttributeMatchExecutorMulti(setup)); + } +} + +void +NativeAttributeMatchExecutorMulti::execute(MatchData & match) +{ + feature_t score = 0; + for (size_t i = 0; i < _queryTermData.size(); ++i) { + const TermFieldMatchData *tfmd = match.resolveTermField(_queryTermData[i].tfh); + if (tfmd->getDocId() == match.getDocId()) { + score += calculateScore(_queryTermData[i], *tfmd); + } + } + *match.resolveFeature(outputs()[0]) = score / _divisor; +} + +void +NativeAttributeMatchExecutorSingle::execute(MatchData & match) +{ + const TermFieldMatchData &tfmd = *match.resolveTermField(_queryTermData.tfh); + *match.resolveFeature(outputs()[0]) = (tfmd.getDocId() == match.getDocId()) + ? calculateScore(_queryTermData, tfmd) + : 0; +} + + +NativeAttributeMatchBlueprint::NativeAttributeMatchBlueprint() : + Blueprint("nativeAttributeMatch"), + _params() +{ +} + +namespace { +const vespalib::string DefaultWeightTable = "linear(1,0)"; +const vespalib::string WeightTableName = "weightTable"; +} + +void +NativeAttributeMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +NativeAttributeMatchBlueprint::createInstance() const +{ + return Blueprint::UP(new NativeAttributeMatchBlueprint()); +} + +bool +NativeAttributeMatchBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _params.resize(env.getNumFields()); + FieldWrapper fields(env, params, FieldType::ATTRIBUTE); + for (uint32_t i = 0; i < fields.getNumFields(); ++i) { + const FieldInfo * info = fields.getField(i); + + uint32_t fieldId = info->id(); + NativeAttributeMatchParams::Param & param = _params.vector[fieldId]; + param.field = true; + const Table * weightBoostTable = util::lookupTable(env, getBaseName(), WeightTableName, info->name(), DefaultWeightTable); + if (weightBoostTable == NULL) { + return false; + } + param.weightBoostTable = SymmetricTable(*weightBoostTable); + param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name()); + if (param.fieldWeight == 0) { + param.field = false; + } + if (NativeRankBlueprint::useTableNormalization(env)) { + _params.setMaxTableSums(fieldId, param.weightBoostTable.max()); + } + } + + describeOutput("score", "The native attribute match score"); + return true; +} + +FeatureExecutor::LP +NativeAttributeMatchBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + return FeatureExecutor::LP(NativeAttributeMatchExecutor::createExecutor(env, _params)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h new file mode 100644 index 00000000000..411a07d4067 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativeattributematchfeature.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "nativerankfeature.h" +#include + +namespace search { +namespace features { + +/** + * This struct contains parameters used by the executor. + **/ +struct NativeAttributeMatchParam : public NativeParamBase +{ + NativeAttributeMatchParam() : NativeParamBase() { } + fef::SymmetricTable weightBoostTable; +}; +typedef NativeRankParamsBase NativeAttributeMatchParams; + +/** + * Implements the executor for calculating the native attribute match score. + **/ +class NativeAttributeMatchExecutor : public fef::FeatureExecutor { +protected: + struct CachedTermData { + CachedTermData() : scale(0), weightBoostTable(NULL), tfh(search::fef::IllegalHandle) { } + CachedTermData(const NativeAttributeMatchParams & params, const fef::ITermFieldData & tfd, feature_t s) : + scale(s), + weightBoostTable(¶ms.vector[tfd.getFieldId()].weightBoostTable), + tfh(tfd.getHandle()) + { } + feature_t scale; + const fef::SymmetricTable * weightBoostTable; + fef::TermFieldHandle tfh; + }; + typedef std::vector CachedVector; + typedef std::pair Precomputed; + + static feature_t calculateScore(const CachedTermData &td, const fef::TermFieldMatchData &tfmd); +private: + static Precomputed preComputeSetup(const fef::IQueryEnvironment & env, + const NativeAttributeMatchParams & params); + +public: + static fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env, + const NativeAttributeMatchParams & params); +}; + +class NativeAttributeMatchExecutorMulti : public NativeAttributeMatchExecutor +{ +private: + feature_t _divisor; + std::vector _queryTermData; +public: + NativeAttributeMatchExecutorMulti(const Precomputed & setup) : _divisor(setup.second), _queryTermData(setup.first) { } + // Inherit doc from FeatureExecutor. + virtual void execute(fef::MatchData & data); +}; + +class NativeAttributeMatchExecutorSingle : public NativeAttributeMatchExecutor +{ +private: + CachedTermData _queryTermData; +public: + NativeAttributeMatchExecutorSingle(const Precomputed & setup) : + _queryTermData(setup.first[0]) + { + _queryTermData.scale /= setup.second; + } + // Inherit doc from FeatureExecutor. + virtual void execute(fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the native attribute match executor. + **/ +class NativeAttributeMatchBlueprint : public fef::Blueprint { +private: + NativeAttributeMatchParams _params; + +public: + NativeAttributeMatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const fef::IIndexEnvironment & env, + fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual fef::ParameterDescriptions getDescriptions() const { + return fef::ParameterDescriptions().desc().attribute(search::fef::ParameterCollection::ANY).repeat(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const fef::IIndexEnvironment & env, + const fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const; + + /** + * Obtains the parameters used by the executor. + **/ + const NativeAttributeMatchParams & getParams() const { return _params; } +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp new file mode 100644 index 00000000000..e19d54e8d09 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.cpp @@ -0,0 +1,179 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.nativefieldmatchfeature"); +#include +#include +#include +#include +#include "nativefieldmatchfeature.h" +#include "valuefeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +const uint32_t NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH(std::numeric_limits::max()); + +feature_t +NativeFieldMatchExecutor::calculateScore(const MyQueryTerm &qt, MatchData &md) +{ + feature_t termScore = 0; + for (size_t i = 0; i < qt.handles().size(); ++i) { + TermFieldHandle tfh = qt.handles()[i]; + TermFieldMatchData *tfmd = md.resolveTermField(tfh); + const NativeFieldMatchParam & param = _params.vector[tfmd->getFieldId()]; + if (tfmd->getDocId() == md.getDocId()) { // do we have a hit + FieldPositionsIterator pos = tfmd->getIterator(); + if (pos.valid()) { + uint32_t fieldLength = getFieldLength(param, pos.getFieldLength()); + termScore += + ((getFirstOccBoost(param, pos.getPosition(), fieldLength) * param.firstOccImportance) + + (getNumOccBoost(param, pos.size(), fieldLength) * (1 - param.firstOccImportance))) * + param.fieldWeight / param.maxTableSum; + } + } + } + termScore *= (qt.significance() * qt.termData()->getWeight().percent()); + return termScore; +} + +NativeFieldMatchExecutor::NativeFieldMatchExecutor(const IQueryEnvironment & env, + const NativeFieldMatchParams & params) : + FeatureExecutor(), + _params(params), + _queryTerms(), + _totalTermWeight(0), + _divisor(0) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + MyQueryTerm qt(QueryTermFactory::create(env, i)); + if (qt.termData()->getWeight().percent() != 0) // only consider query terms with contribution + { + typedef search::fef::ITermFieldRangeAdapter FRA; + uint32_t totalFieldWeight = 0; + for (FRA iter(*qt.termData()); iter.valid(); iter.next()) { + const ITermFieldData& tfd = iter.get(); + uint32_t fieldId = tfd.getFieldId(); + if (_params.considerField(fieldId)) { // only consider fields with contribution + totalFieldWeight += _params.vector[fieldId].fieldWeight; + qt.handles().push_back(tfd.getHandle()); + } + } + if (!qt.handles().empty()) { + _queryTerms.push_back(qt); + _divisor += (qt.significance() * qt.termData()->getWeight().percent() * totalFieldWeight); + } + } + } +} + +void +NativeFieldMatchExecutor::execute(search::fef::MatchData &match) +{ + feature_t score = 0; + for (size_t i = 0; i < _queryTerms.size(); ++i) { + score += calculateScore(_queryTerms[i], match); + } + if (_divisor > 0) { + score /= _divisor; + } + *match.resolveFeature(outputs()[0]) = score; +} + + +NativeFieldMatchBlueprint::NativeFieldMatchBlueprint() : + Blueprint("nativeFieldMatch"), + _params(), + _defaultFirstOcc("expdecay(8000,12.50)"), + _defaultNumOcc("loggrowth(1500,4000,19)") +{ +} + +void +NativeFieldMatchBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +NativeFieldMatchBlueprint::createInstance() const +{ + return Blueprint::UP(new NativeFieldMatchBlueprint()); +} + +bool +NativeFieldMatchBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _params.resize(env.getNumFields()); + FieldWrapper fields(env, params, FieldType::INDEX); + vespalib::string defaultFirstOccImportance = env.getProperties().lookup(getBaseName(), "firstOccurrenceImportance").get("0.5"); + for (uint32_t i = 0; i < fields.getNumFields(); ++i) { + const FieldInfo * info = fields.getField(i); + uint32_t fieldId = info->id(); + NativeFieldMatchParam & param = _params.vector[fieldId]; + param.field = true; + if ((param.firstOccTable = + util::lookupTable(env, getBaseName(), "firstOccurrenceTable", info->name(), _defaultFirstOcc)) == NULL) + { + return false; + } + if ((param.numOccTable = + util::lookupTable(env, getBaseName(), "occurrenceCountTable", info->name(), _defaultNumOcc)) == NULL) + { + return false; + } + param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name()); + if (param.fieldWeight == 0 || + info->isFilter()) + { + param.field = false; + } + Property afl = env.getProperties().lookup(getBaseName(), "averageFieldLength", info->name()); + if (afl.found()) { + param.averageFieldLength = util::strToNum(afl.get()); + } + + param.firstOccImportance = util::strToNum + (env.getProperties().lookup(getBaseName(), "firstOccurrenceImportance", info->name()). + get(defaultFirstOccImportance)); + + if (NativeRankBlueprint::useTableNormalization(env)) { + const Table * fo = param.firstOccTable; + const Table * no = param.numOccTable; + if (fo != NULL && no != NULL) { + double value = (fo->max() * param.firstOccImportance) + + (no->max() * (1 - param.firstOccImportance)); + _params.setMaxTableSums(fieldId, value); + } + } + if (param.field) { + env.hintFieldAccess(fieldId); + } + } + _params.minFieldLength = util::strToNum(env.getProperties().lookup + (getBaseName(), "minFieldLength").get("6")); + + describeOutput("score", "The native field match score"); + return true; +} + +FeatureExecutor::LP +NativeFieldMatchBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + std::unique_ptr native(new NativeFieldMatchExecutor(env, _params)); + if (native->empty()) { + return FeatureExecutor::LP(new ValueExecutor(std::vector(1, 0.0))); + } else { + return FeatureExecutor::LP(native.release()); + } +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h new file mode 100644 index 00000000000..00cb2a9e316 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativefieldmatchfeature.h @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "nativerankfeature.h" +#include "queryterm.h" + +namespace search { +namespace features { + +/** + * This struct contains parameters used by the executor. + **/ +struct NativeFieldMatchParam : public NativeParamBase +{ + static const uint32_t NOT_DEF_FIELD_LENGTH; + NativeFieldMatchParam() : NativeParamBase(), firstOccTable(NULL), numOccTable(NULL), averageFieldLength(NOT_DEF_FIELD_LENGTH), firstOccImportance(0.5) { } + const search::fef::Table * firstOccTable; + const search::fef::Table * numOccTable; + uint32_t averageFieldLength; + feature_t firstOccImportance; +}; + +class NativeFieldMatchParams : public NativeRankParamsBase +{ +public: + uint32_t minFieldLength; + NativeFieldMatchParams() : minFieldLength(6) { } +}; + +/** + * Implements the executor for calculating the native field match score. + **/ +class NativeFieldMatchExecutor : public search::fef::FeatureExecutor +{ +private: + typedef std::vector HandleVector; + + class MyQueryTerm : public QueryTerm + { + private: + HandleVector _handles; // field match handles + public: + MyQueryTerm(const QueryTerm & qt) : QueryTerm(qt), _handles() {} + HandleVector &handles() { return _handles; } + const HandleVector &handles() const { return _handles; } + }; + const NativeFieldMatchParams & _params; + std::vector _queryTerms; + uint32_t _totalTermWeight; + feature_t _divisor; + + VESPA_DLL_LOCAL feature_t calculateScore(const MyQueryTerm &qt, search::fef::MatchData &md); + + uint32_t getFieldLength(const NativeFieldMatchParam & param, uint32_t fieldLength) const { + if (param.averageFieldLength != NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH) { + return param.averageFieldLength; + } + return fieldLength; + } + + feature_t getFirstOccBoost(const NativeFieldMatchParam & param, uint32_t position, uint32_t fieldLength) const { + const search::fef::Table * table = param.firstOccTable; + size_t index = (position * (table->size() - 1)) / (std::max(_params.minFieldLength, fieldLength) - 1); + return table->get(index); + } + + feature_t getNumOccBoost(const NativeFieldMatchParam & param, uint32_t occs, uint32_t fieldLength) const { + const search::fef::Table * table = param.numOccTable; + size_t index = (occs * (table->size() - 1)) / (std::max(_params.minFieldLength, fieldLength)); + return table->get(index); + } + +public: + NativeFieldMatchExecutor(const search::fef::IQueryEnvironment & env, + const NativeFieldMatchParams & params); + virtual void execute(search::fef::MatchData & data); + + feature_t getFirstOccBoost(uint32_t field, uint32_t position, uint32_t fieldLength) const { + return getFirstOccBoost(_params.vector[field], position, fieldLength); + } + + feature_t getNumOccBoost(uint32_t field, uint32_t occs, uint32_t fieldLength) const { + return getNumOccBoost(_params.vector[field], occs, fieldLength); + } + bool empty() const { return _queryTerms.empty(); } +}; + + +/** + * Implements the blueprint for the native field match executor. + **/ +class NativeFieldMatchBlueprint : public search::fef::Blueprint { +private: + NativeFieldMatchParams _params; + vespalib::string _defaultFirstOcc; + vespalib::string _defaultNumOcc; + +public: + NativeFieldMatchBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field().repeat(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; + + /** + * Obtains the parameters used by the executor. + **/ + const NativeFieldMatchParams & getParams() const { return _params; } +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp new file mode 100644 index 00000000000..6d39aea8780 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.cpp @@ -0,0 +1,218 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.nativeproximityfeature"); +#include +#include +#include +#include +#include "nativeproximityfeature.h" +#include "valuefeature.h" +#include "utils.h" +#include + +using namespace search::fef; + +namespace search { +namespace features { + +feature_t +NativeProximityExecutor::calculateScoreForField(const FieldSetup & fs, MatchData & match) +{ + feature_t score = 0; + for (size_t i = 0; i < fs.pairs.size(); ++i) { + score += calculateScoreForPair(fs.pairs[i], fs.fieldId, match); + } + score *= _params.vector[fs.fieldId].fieldWeight; + if (fs.divisor > 0) { + score /= fs.divisor; + } + return score; +} + +feature_t +NativeProximityExecutor::calculateScoreForPair(const TermPair & pair, uint32_t fieldId, MatchData & match) +{ + const NativeProximityParam & param = _params.vector[fieldId]; + TermDistanceCalculator::Result result; + const QueryTerm & a = pair.first; + const QueryTerm & b = pair.second; + TermDistanceCalculator::run(a, b, match, result); + uint32_t forwardIdx = result.forwardDist > 0 ? result.forwardDist - 1 : 0; + uint32_t reverseIdx = result.reverseDist > 0 ? result.reverseDist - 1 : 0; + feature_t forwardScore = param.proximityTable->get(forwardIdx) * param.proximityImportance; + feature_t reverseScore = param.revProximityTable->get(reverseIdx) * (1 - param.proximityImportance); + feature_t termPairWeight = pair.connectedness * + (a.significance() * a.termData()->getWeight().percent() + + b.significance() * b.termData()->getWeight().percent()); + feature_t score = (forwardScore + reverseScore) * termPairWeight / param.maxTableSum; + //LOG(debug, "calculateScoreForPair: pair(%u,%u), fieldId(%u), forwardScore(%f), reverseScore(%f), " + //"termPairWeight(%f), maxTableSum(%f), score(%f)", + //fieldId, a.termData()->getUniqueId(), b.termData()->getUniqueId(), forwardScore, reverseScore, + //termPairWeight, _params.maxTableSums[fieldId], score); + return score; +} + + +NativeProximityExecutor::NativeProximityExecutor(const IQueryEnvironment & env, + const NativeProximityParams & params) : + FeatureExecutor(), + _params(params), + _setups(), + _totalFieldWeight(0) +{ + std::map fields; + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + QueryTerm qt = QueryTermFactory::create(env, i); + + typedef search::fef::ITermFieldRangeAdapter FRA; + + for (FRA iter(*qt.termData()); iter.valid(); iter.next()) { + + uint32_t fieldId = iter.get().getFieldId(); + if (_params.considerField(fieldId)) { // only consider fields with contribution + qt.fieldHandle(iter.get().getHandle()); + fields[fieldId].push_back(qt); + } + } + } + for (std::map::const_iterator itr = fields.begin(); itr != fields.end(); ++itr) { + if (itr->second.size() >= 2) { + FieldSetup setup(itr->first); + generateTermPairs(env, itr->second, _params.slidingWindow, setup); + if (!setup.pairs.empty()) { + _setups.push_back(setup); + _totalFieldWeight += params.vector[itr->first].fieldWeight; + } + } + } +} + +void +NativeProximityExecutor::execute(search::fef::MatchData & match) +{ + feature_t score = 0; + for (size_t i = 0; i < _setups.size(); ++i) { + score += calculateScoreForField(_setups[i], match); + } + if (_totalFieldWeight > 0) { + score /= _totalFieldWeight; + } + *match.resolveFeature(outputs()[0]) = score; +} + +void +NativeProximityExecutor::generateTermPairs(const IQueryEnvironment & env, const QueryTermVector & terms, + uint32_t slidingWindow, FieldSetup & setup) +{ + TermPairVector & pairs = setup.pairs; + for (size_t i = 0; i < terms.size(); ++i) { + for (size_t j = i + 1; (j < i + slidingWindow) && (j < terms.size()); ++j) { + feature_t connectedness = 1; + for (size_t k = j; k > i; --k) { + connectedness = std::min(util::lookupConnectedness(env, terms[k].termData()->getUniqueId(), + terms[k-1].termData()->getUniqueId(), 0.1), + connectedness); + } + connectedness /= (j - i); + if (terms[i].termData()->getWeight().percent() != 0 || + terms[j].termData()->getWeight().percent() != 0) + { // only consider term pairs with contribution + pairs.push_back(TermPair(terms[i], terms[j], connectedness)); + setup.divisor += (terms[i].significance() * terms[i].termData()->getWeight().percent() + + terms[j].significance() * terms[j].termData()->getWeight().percent()) * connectedness; + } + } + } +} + + +NativeProximityBlueprint::NativeProximityBlueprint() : + Blueprint("nativeProximity"), + _params(), + _defaultProximityBoost("expdecay(500,3)"), + _defaultRevProximityBoost("expdecay(400,3)") +{ +} + +void +NativeProximityBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +NativeProximityBlueprint::createInstance() const +{ + return Blueprint::UP(new NativeProximityBlueprint()); +} + +bool +NativeProximityBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _params.resize(env.getNumFields()); + _params.slidingWindow = util::strToNum(env.getProperties().lookup(getBaseName(), "slidingWindowSize").get("4")); + FieldWrapper fields(env, params, FieldType::INDEX); + vespalib::string defaultProximityImportance = env.getProperties().lookup(getBaseName(), "proximityImportance").get("0.5"); + for (uint32_t i = 0; i < fields.getNumFields(); ++i) { + const FieldInfo * info = fields.getField(i); + uint32_t fieldId = info->id(); + NativeProximityParam & param = _params.vector[fieldId]; + param.field = true; + if ((param.proximityTable = + util::lookupTable(env, getBaseName(), "proximityTable", info->name(), _defaultProximityBoost)) == NULL) + { + return false; + } + if ((param.revProximityTable = + util::lookupTable(env, getBaseName(), "reverseProximityTable", info->name(), _defaultRevProximityBoost)) == NULL) + { + return false; + } + param.fieldWeight = indexproperties::FieldWeight::lookup(env.getProperties(), info->name()); + if (param.fieldWeight == 0 || + info->isFilter()) + { + param.field = false; + } + param.proximityImportance = util::strToNum + (env.getProperties().lookup(getBaseName(), "proximityImportance", info->name()). + get(defaultProximityImportance)); + + if (NativeRankBlueprint::useTableNormalization(env)) { + const Table * fp = param.proximityTable; + const Table * rp = param.revProximityTable; + if (fp != NULL && rp != NULL) { + double value = (fp->max() * param.proximityImportance) + + (rp->max() * (1 - param.proximityImportance)); + _params.setMaxTableSums(fieldId, value); + } + } + if (param.field) { + env.hintFieldAccess(fieldId); + } + } + + describeOutput("score", "The native proximity score"); + return true; +} + +FeatureExecutor::LP +NativeProximityBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + std::unique_ptr native(new NativeProximityExecutor(env, _params)); + if (native->empty()) { + return FeatureExecutor::LP(new ValueExecutor(std::vector(1, 0.0))); + } else { + return FeatureExecutor::LP(native.release()); + } + +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h new file mode 100644 index 00000000000..be79ee7beac --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativeproximityfeature.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "nativerankfeature.h" +#include "queryterm.h" +#include "termdistancecalculator.h" + +namespace search { +namespace features { + +/** + * This struct contains parameters used by the executor. + **/ +struct NativeProximityParam : public NativeParamBase +{ + NativeProximityParam() : NativeParamBase(), proximityTable(NULL), revProximityTable(NULL), proximityImportance(0.5) { } + const search::fef::Table * proximityTable; + const search::fef::Table * revProximityTable; + feature_t proximityImportance; +}; + +class NativeProximityParams : public NativeRankParamsBase +{ +public: + uint32_t slidingWindow; + NativeProximityParams() : slidingWindow(4) { } +}; + +/** + * Implements the executor for calculating the native proximity score. + **/ +class NativeProximityExecutor : public search::fef::FeatureExecutor { +public: + /** + * Represents a term pair with connectedness and associated term distance calculator. + **/ + struct TermPair { + QueryTerm first; + QueryTerm second; + feature_t connectedness; + TermPair(QueryTerm f, QueryTerm s, feature_t c) : + first(f), second(s), connectedness(c) {} + }; + typedef std::vector TermPairVector; + /** + * Represents the setup needed to calculate the proximity score for a single field. + **/ + struct FieldSetup { + uint32_t fieldId; + TermPairVector pairs; + feature_t divisor; + FieldSetup(uint32_t fid) : fieldId(fid), pairs(), divisor(0) {} + }; + +private: + const NativeProximityParams & _params; + std::vector _setups; + uint32_t _totalFieldWeight; + + feature_t calculateScoreForField(const FieldSetup & fs, search::fef::MatchData & match); + feature_t calculateScoreForPair(const TermPair & pair, uint32_t fieldId, search::fef::MatchData & match); + +public: + NativeProximityExecutor(const search::fef::IQueryEnvironment & env, + const NativeProximityParams & params); + virtual void execute(search::fef::MatchData & data); + + static void generateTermPairs(const search::fef::IQueryEnvironment & env, const QueryTermVector & terms, + uint32_t slidingWindow, FieldSetup & setup); + + bool empty() const { return _setups.empty(); } +}; + + +/** + * Implements the blueprint for the native proximity executor. + **/ +class NativeProximityBlueprint : public search::fef::Blueprint { +private: + NativeProximityParams _params; + vespalib::string _defaultProximityBoost; + vespalib::string _defaultRevProximityBoost; + +public: + NativeProximityBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field().repeat(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; + + /** + * Obtains the parameters used by the executor. + **/ + const NativeProximityParams & getParams() const { return _params; } +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp b/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp new file mode 100644 index 00000000000..b4d549df9cf --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativerankfeature.cpp @@ -0,0 +1,173 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.nativerankfeature"); +#include +#include +#include "nativerankfeature.h" +#include "valuefeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace { + +vespalib::string +buildFeatureName(const vespalib::string & baseName, const search::features::FieldWrapper & fields) +{ + std::ostringstream oss; + oss << baseName << "("; + for (size_t i = 0; i < fields.getNumFields(); ++i) { + if (i > 0) { + oss << ","; + } + oss << fields.getField(i)->name(); + } + oss << ")"; + return oss.str(); +} + +} + +namespace search { +namespace features { + +FieldWrapper::FieldWrapper(const IIndexEnvironment & env, + const ParameterList & fields, + const FieldType filter) : + _fields() +{ + if (!fields.empty()) { + for (size_t i = 0; i < fields.size(); ++i) { + const search::fef::FieldInfo * info = fields[i].asField(); + if (info->type() == filter) { + _fields.push_back(info); + } + } + } else { + for (size_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo * info = env.getField(i); + LOG_ASSERT(info->id() == i && "The field ids must be the same in FieldInfo as in IIndexEnvironment"); + if (info->type() == filter) { + _fields.push_back(info); + } + } + } +} + + +NativeRankExecutor::NativeRankExecutor(const NativeRankParams & params) : + FeatureExecutor(), + _params(params), + _divisor(0) +{ + _divisor += _params.fieldMatchWeight; + _divisor += _params.attributeMatchWeight; + _divisor += _params.proximityWeight; +} + +void +NativeRankExecutor::execute(search::fef::MatchData & match) +{ + *match.resolveFeature(outputs()[0]) = (*match.resolveFeature(inputs()[0]) * _params.fieldMatchWeight + + *match.resolveFeature(inputs()[1]) * _params.proximityWeight + + *match.resolveFeature(inputs()[2]) * _params.attributeMatchWeight) / _divisor; +} + + +NativeRankBlueprint::NativeRankBlueprint() : + Blueprint("nativeRank"), + _params() +{ +} + +void +NativeRankBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +NativeRankBlueprint::createInstance() const +{ + return Blueprint::UP(new NativeRankBlueprint()); +} + +bool +NativeRankBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _params.fieldMatchWeight = util::strToNum + (env.getProperties().lookup(getBaseName(), "fieldMatchWeight").get("100")); + _params.attributeMatchWeight = util::strToNum + (env.getProperties().lookup(getBaseName(), "attributeMatchWeight").get("100")); + vespalib::string defProxWeight = "25"; + if (!useTableNormalization(env)) { + defProxWeight = "100"; // must use another weight to match the default boost tables + } + _params.proximityWeight = util::strToNum + (env.getProperties().lookup(getBaseName(), "proximityWeight").get(defProxWeight)); + + vespalib::string nfm = "nativeFieldMatch"; + vespalib::string np = "nativeProximity"; + vespalib::string nam = "nativeAttributeMatch"; + vespalib::string zero = "value(0)"; + + // handle parameter list + if (!params.empty()) { + FieldWrapper indexFields(env, params, FieldType::INDEX); + FieldWrapper attrFields(env, params, FieldType::ATTRIBUTE); + if (indexFields.getNumFields() > 0) { + nfm = buildFeatureName("nativeFieldMatch", indexFields); + np = buildFeatureName("nativeProximity", indexFields); + } else { + nfm = zero; + np = zero; + } + if (attrFields.getNumFields() > 0) { + nam = buildFeatureName("nativeAttributeMatch", attrFields); + } else { + nam = zero; + } + } + // optimizations when weight == 0 + if (_params.fieldMatchWeight == 0) { + nfm = zero; + } + if (_params.proximityWeight == 0) { + np = zero; + } + if (_params.attributeMatchWeight == 0) { + nam = zero; + } + + defineInput(nfm); + defineInput(np); + defineInput(nam); + describeOutput("score", "The native rank score"); + return true; +} + +FeatureExecutor::LP +NativeRankBlueprint::createExecutor(const IQueryEnvironment &) const +{ + if (_params.proximityWeight + _params.fieldMatchWeight + _params.attributeMatchWeight > 0) { + return FeatureExecutor::LP(new NativeRankExecutor(_params)); + } else { + return FeatureExecutor::LP(new ValueExecutor(std::vector(1, 0.0))); + } +} + +bool +NativeRankBlueprint::useTableNormalization(const search::fef::IIndexEnvironment & env) +{ + Property norm = env.getProperties().lookup("nativeRank", "useTableNormalization"); + return (!(norm.found() && (norm.get() == vespalib::string("false")))); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/nativerankfeature.h b/searchlib/src/vespa/searchlib/features/nativerankfeature.h new file mode 100644 index 00000000000..c6a27d80784 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nativerankfeature.h @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * This struct contains parameters used by the nativeRank executor. + **/ +struct NativeRankParams { + feature_t fieldMatchWeight; + feature_t attributeMatchWeight; + feature_t proximityWeight; + NativeRankParams() : fieldMatchWeight(0), attributeMatchWeight(0), proximityWeight(0) {} +}; + +/** + * The base class for parameter classes used by native rank sub executors. + **/ +struct NativeParamBase { + NativeParamBase() : maxTableSum(1), fieldWeight(100), field(false) { } + double maxTableSum; + uint32_t fieldWeight; + bool field; +}; +template +class NativeRankParamsBase { +public: + typedef P Param; + std::vector

vector; + NativeRankParamsBase() : vector() {} + void resize(size_t numFields) { + vector.resize(numFields); + } + void setMaxTableSums(size_t fieldId, double value) { + vector[fieldId].maxTableSum = value; + if (vector[fieldId].maxTableSum == 0) { + vector[fieldId].maxTableSum = 1; + } + } + bool considerField(size_t fieldId) const { + assert(fieldId < vector.size()); + return vector[fieldId].field; + } +}; + +/** + * This class wraps an index environment and serves fields of a certain type. + * You can specify a set of field names to consider instead of all found in the index environment. + **/ +class FieldWrapper { +public: + std::vector _fields; + +public: + /** + * Creates a new wrapper. + * + * @param env the environment to wrap. + * @param fieldNames the set of field names to consider. If empty all found in the environment are used. + * @param filter the field type this wrapper should let through. + **/ + FieldWrapper(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & fields, + const search::fef::FieldType filter); + size_t getNumFields() const { return _fields.size(); } + const search::fef::FieldInfo * getField(size_t idx) const { return _fields[idx]; } +}; + +/** + * Implements the executor for calculating the native rank score. + **/ +class NativeRankExecutor : public search::fef::FeatureExecutor { +private: + const NativeRankParams & _params; + feature_t _divisor; + +public: + NativeRankExecutor(const NativeRankParams & params); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the native rank executor. + **/ +class NativeRankBlueprint : public search::fef::Blueprint { +private: + NativeRankParams _params; + +public: + NativeRankBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field().repeat(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; + + /** + * Obtains the parameters used by the executor. + **/ + const NativeRankParams & getParams() const { return _params; } + + /** + * Returns whether we should use table normalization for the setup using the given environment. + **/ + static bool useTableNormalization(const search::fef::IIndexEnvironment & env); +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/nowfeature.cpp b/searchlib/src/vespa/searchlib/features/nowfeature.cpp new file mode 100644 index 00000000000..28eb844c6b0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nowfeature.cpp @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.nowfeature"); + +#include +#include +#include +#include "nowfeature.h" +#include "valuefeature.h" + +namespace search { +namespace features { + +NowExecutor::NowExecutor(int64_t timestamp) : + search::fef::FeatureExecutor(), + _timestamp(timestamp) +{ + // empty +} + +void +NowExecutor::execute(search::fef::MatchData &data) { + *data.resolveFeature(outputs()[0]) = _timestamp; +} + +void +NowBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &visitor) const +{ + visitor.visitDumpFeature(getBaseName()); +} + +bool +NowBlueprint::setup(const search::fef::IIndexEnvironment &, + const search::fef::ParameterList &) +{ + describeOutput("out", "The timestamp (seconds since epoch) of query execution."); + return true; +} + +search::fef::Blueprint::UP +NowBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new NowBlueprint()); +} + +search::fef::FeatureExecutor::LP +NowBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + int64_t timestamp; + const fef::Property &prop = env.getProperties().lookup(fef::queryproperties::now::SystemTime::NAME); + if (prop.found()) { + timestamp = atoll(prop.get().c_str()); + } else { + FastOS_Time now; + now.SetNow(); + timestamp = (int64_t)now.Secs(); + } + return search::fef::FeatureExecutor::LP(new NowExecutor(timestamp)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/nowfeature.h b/searchlib/src/vespa/searchlib/features/nowfeature.h new file mode 100644 index 00000000000..006ebb72446 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/nowfeature.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for the 'now' feature. This executor returns the current + * system time, or the time specified by the query argument 'vespa.now'. + * Time is returned in two formats. First as seconds since epoch (first output), + * then as days since epoch and seconds within that day (second and third output). + * This is due to precision problems when encoding current time as a float. + **/ +class NowExecutor : public search::fef::FeatureExecutor { +private: + // Current time, in seconds since epoch + int64_t _timestamp; + +public: + /** + * Constructs a new executor. + **/ + NowExecutor(int64_t timestamp); + virtual void execute(search::fef::MatchData & data); +}; + +/** + * Implements the blueprint for 'now' feature. + */ +class NowBlueprint : public search::fef::Blueprint { +public: + NowBlueprint() : search::fef::Blueprint("now") { } + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/proximityfeature.cpp b/searchlib/src/vespa/searchlib/features/proximityfeature.cpp new file mode 100644 index 00000000000..5fd590650fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/proximityfeature.cpp @@ -0,0 +1,149 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.proximity"); + +#include +#include +#include +#include +#include "proximityfeature.h" +#include "utils.h" + +namespace search { +namespace features { + +ProximityConfig::ProximityConfig() : + fieldId(search::fef::IllegalHandle), + termA(std::numeric_limits::max()), + termB(std::numeric_limits::max()) +{ + // empty +} + +ProximityExecutor::ProximityExecutor(const search::fef::IQueryEnvironment &env, + const ProximityConfig &config) : + search::fef::FeatureExecutor(), + _config(config), + _termA(util::getTermFieldHandle(env, _config.termA, _config.fieldId)), + _termB(util::getTermFieldHandle(env, _config.termB, _config.fieldId)) +{ +} + +void +ProximityExecutor::execute(search::fef::MatchData &match) +{ + // Cannot calculate proximity in this case + if (_termA != search::fef::IllegalHandle && + _termB != search::fef::IllegalHandle) + { + search::fef::TermFieldMatchData &matchA = *match.resolveTermField(_termA); + search::fef::TermFieldMatchData &matchB = *match.resolveTermField(_termB); + + if (matchA.getDocId() == match.getDocId() && + matchB.getDocId() == match.getDocId()) + { + if (findBest(match, matchA, matchB)) return; + } + } + // no match + *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out + *match.resolveFeature(outputs()[1]) = util::FEATURE_MAX; // posA + *match.resolveFeature(outputs()[2]) = util::FEATURE_MIN; // posB + return; +} + +bool +ProximityExecutor::findBest(search::fef::MatchData &match, + search::fef::TermFieldMatchData &matchA, + search::fef::TermFieldMatchData &matchB) +{ + // Look for optimal positions for term A and B. + uint32_t optA = 0, optB = 0xFFFFFFFFu; + + search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB; + itA = matchA.begin(); + itB = matchB.begin(); + epA = matchA.end(); + epB = matchB.end(); + + while (itB != epB) { + uint32_t eid = itB->getElementId(); + while (itA != epA && itA->getElementId() < eid) { + ++itA; + } + if (itA != epA && itA->getElementId() == eid) { + // there is a pair somewhere here + while (itA != epA && + itB != epB && + itA->getElementId() == eid && + itB->getElementId() == eid) + { + uint32_t a = itA->getPosition(); + uint32_t b = itB->getPosition(); + if (a < b) { + if (b - a < optB - optA) { + optA = a; + optB = b; + } + ++itA; + } else { + ++itB; + } + } + } else { + ++itB; + } + } + if (optB != 0xFFFFFFFFu) { + // Output proximity score. + *match.resolveFeature(outputs()[0]) = optB - optA; + *match.resolveFeature(outputs()[1]) = optA; + *match.resolveFeature(outputs()[2]) = optB; + return true; + } else { + return false; + } +} + +ProximityBlueprint::ProximityBlueprint() : + search::fef::Blueprint("proximity"), + _config() +{ + // empty +} + +void +ProximityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +ProximityBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _config.fieldId = params[0].asField()->id(); + _config.termA = params[1].asInteger(); + _config.termB = params[2].asInteger(); + describeOutput("out" , "The proximity of the query terms."); + describeOutput("posA", "The best position of the first query term."); + describeOutput("posB", "The best position of the second query term."); + env.hintFieldAccess(_config.fieldId); + return true; +} + +search::fef::Blueprint::UP +ProximityBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new ProximityBlueprint()); +} + +search::fef::FeatureExecutor::LP +ProximityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new ProximityExecutor(env, _config)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/proximityfeature.h b/searchlib/src/vespa/searchlib/features/proximityfeature.h new file mode 100644 index 00000000000..08a963a099d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/proximityfeature.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the necessary config for proximity. + */ +struct ProximityConfig { + ProximityConfig(); + + uint32_t fieldId; // The id of field to process. + uint32_t termA; // The id of the first query term in the pair (a, b). + uint32_t termB; // The id of the second query term. +}; + +/** + * Implements the executor for proximity. + */ +class ProximityExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs an executor for proximity. + * + * @param env The query environment. + * @param config The completeness config. + */ + ProximityExecutor(const search::fef::IQueryEnvironment &env, + const ProximityConfig &config); + virtual void execute(search::fef::MatchData &data); + +private: + const ProximityConfig &_config; // The proximity config. + search::fef::TermFieldHandle _termA; // Handle to the first query term. + search::fef::TermFieldHandle _termB; // Handle to the second query term. + + bool findBest(search::fef::MatchData &match, + search::fef::TermFieldMatchData &matchA, + search::fef::TermFieldMatchData &matchB); +}; + +/** + * Implements the blueprint for proximity. + */ +class ProximityBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a proximity blueprint. + */ + ProximityBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + +private: + ProximityConfig _config; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp new file mode 100644 index 00000000000..8d944f970c1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.cpp @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.querycompleteness"); + +#include +#include +#include +#include "querycompletenessfeature.h" +#include "utils.h" +#include + +namespace search { +namespace features { + +QueryCompletenessConfig::QueryCompletenessConfig() : + fieldId(search::fef::IllegalHandle), + fieldBegin(0), + fieldEnd(std::numeric_limits::max()) +{ + // empty +} + +QueryCompletenessExecutor::QueryCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const QueryCompletenessConfig &config) : + search::fef::FeatureExecutor(), + _config(config), + _fieldHandles() +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, config.fieldId); + if (handle != search::fef::IllegalHandle) { + _fieldHandles.push_back(handle); + } + } +} + +void +QueryCompletenessExecutor::execute(search::fef::MatchData &match) +{ + uint32_t hit = 0, miss = 0; + for (std::vector::iterator it = _fieldHandles.begin(); + it != _fieldHandles.end(); ++it) + { + search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(*it); + if (tfmd.getDocId() == match.getDocId()) { + search::fef::FieldPositionsIterator field = tfmd.getIterator(); + while (field.valid() && field.getPosition() < _config.fieldBegin) { + field.next(); + } + if (field.valid() && field.getPosition() < _config.fieldEnd) { + ++hit; + } else { + ++miss; + } + } else { + ++miss; + } + } + *match.resolveFeature(outputs()[0]) = hit; + *match.resolveFeature(outputs()[1]) = miss; +} + +QueryCompletenessBlueprint::QueryCompletenessBlueprint() : + search::fef::Blueprint("queryCompleteness"), + _config() +{ + // empty +} + +void +QueryCompletenessBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +QueryCompletenessBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _config.fieldId = params[0].asField()->id(); + if (params.size() > 1) { + _config.fieldBegin = params[1].asInteger(); + if (params.size() == 3) { + _config.fieldEnd = params[2].asInteger(); + } + if (_config.fieldBegin >= _config.fieldEnd) { + LOG(error, "Can not calculate query completeness for field '%s' because range is malformed (from %d to %d).", + params[0].getValue().c_str(), _config.fieldBegin, _config.fieldEnd); + return false; + } + } + describeOutput("hit", "The number of query terms matched in field."); + describeOutput("miss", "The number of query terms not matched in field."); + env.hintFieldAccess(_config.fieldId); + return true; +} + +search::fef::Blueprint::UP +QueryCompletenessBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new QueryCompletenessBlueprint()); +} + +search::fef::FeatureExecutor::LP +QueryCompletenessBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new QueryCompletenessExecutor(env, _config)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h new file mode 100644 index 00000000000..7d28c504e79 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/querycompletenessfeature.h @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the necessary config for query completeness. + */ +struct QueryCompletenessConfig { + QueryCompletenessConfig(); + + uint32_t fieldId; // The id of field to process. + uint32_t fieldBegin; // The first field token to evaluate. + uint32_t fieldEnd; // The last field token to evaluate. +}; + +/** + * Implements the executor for query completeness. + */ +class QueryCompletenessExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs an executor for query completenes. + * + * @param env The query environment. + * @param config The completeness config. + */ + QueryCompletenessExecutor(const search::fef::IQueryEnvironment &env, + const QueryCompletenessConfig &config); + virtual void execute(search::fef::MatchData &data); + +private: + const QueryCompletenessConfig &_config; + std::vector _fieldHandles; +}; + +/** + * Implements the blueprint for query completeness. + */ +class QueryCompletenessBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a completeness blueprint. + */ + QueryCompletenessBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc().indexField(search::fef::ParameterCollection::ANY). + desc().indexField(search::fef::ParameterCollection::ANY).number(). + desc().indexField(search::fef::ParameterCollection::ANY).number().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + +private: + QueryCompletenessConfig _config; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.cpp b/searchlib/src/vespa/searchlib/features/queryfeature.cpp new file mode 100644 index 00000000000..e2dbc2d668c --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/queryfeature.cpp @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.queryfeature"); +#include "queryfeature.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "valuefeature.h" +#include +#include + +using namespace search::fef; +using namespace search::fef::indexproperties; +using vespalib::tensor::DefaultTensor; +using vespalib::tensor::TensorBuilder; +using vespalib::tensor::TensorType; +using vespalib::eval::ValueType; +using search::fef::FeatureType; + +namespace search { +namespace features { + +namespace { + +/** + * Convert a string to a feature value using special quoting + * mechanics; a string that can be converted directly into a feature + * (numeric value) will be converted. If the string cannot be + * converted directly, it will be hashed, after stripping the leading + * "'" if it exists. + * + * @return feature value + * @param str string value to be converted + **/ +feature_t asFeature(const vespalib::string &str) { + char *end; + errno = 0; + double val = strtod(str.c_str(), &end); + if (errno != 0 || *end != '\0') { // not happy + if (str.size() > 0 && str[0] == '\'') { + val = vespalib::hash_code(str.substr(1)); + } else { + val = vespalib::hash_code(str); + } + } + return val; +} + +} // namespace search::features:: + +QueryBlueprint::QueryBlueprint() : + Blueprint("query"), + _key(), + _key2(), + _defaultValue(0), + _tensorType(TensorType::number()) +{ +} + +void +QueryBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +QueryBlueprint::createInstance() const +{ + return Blueprint::UP(new QueryBlueprint()); +} + +bool +QueryBlueprint::setup(const IIndexEnvironment &env, + const ParameterList ¶ms) +{ + _key = params[0].getValue(); + _key2 = "$"; + _key2.append(_key); + + vespalib::string key3; + key3.append("query("); + key3.append(_key); + key3.append(")"); + Property p = env.getProperties().lookup(key3); + if (!p.found()) { + p = env.getProperties().lookup(_key2); + } + if (p.found()) { + _defaultValue = asFeature(p.get()); + } + vespalib::string queryFeatureType = type::QueryFeature::lookup(env.getProperties(), _key); + if (!queryFeatureType.empty()) { + _tensorType = TensorType::fromSpec(queryFeatureType); + } + FeatureType output_type = _tensorType.is_tensor() + ? FeatureType::object(_tensorType.as_value_type()) + : FeatureType::number(); + describeOutput("out", "The value looked up in query properties using the given key.", + output_type); + return true; +} + +namespace { + +FeatureExecutor::LP +createTensorExecutor(const search::fef::IQueryEnvironment &env, + const vespalib::string &queryKey, + const TensorType &tensorType) +{ + search::fef::Property prop = env.getProperties().lookup(queryKey); + if (prop.found() && !prop.get().empty()) { + DefaultTensor::builder tensorBuilder; + const vespalib::string &value = prop.get(); + vespalib::nbostream stream(value.data(), value.size()); + vespalib::tensor::TypedBinaryFormat::deserialize(stream, tensorBuilder); + vespalib::tensor::Tensor::UP tensor = tensorBuilder.build(); + if (tensor->getType() != tensorType) { + vespalib::tensor::TensorMapper mapper(tensorType); + vespalib::tensor::Tensor::UP mappedTensor = mapper.map(*tensor); + tensor = std::move(mappedTensor); + } + return ConstantTensorExecutor::create(std::move(tensor)); + } + return ConstantTensorExecutor::createEmpty(); +} + +} + +FeatureExecutor::LP +QueryBlueprint::createExecutor(const IQueryEnvironment &env) const +{ + if (_tensorType.is_tensor()) { + return createTensorExecutor(env, _key, _tensorType); + } else { + std::vector values; + Property p = env.getProperties().lookup(_key); + if (!p.found()) { + p = env.getProperties().lookup(_key2); + } + if (p.found()) { + values.push_back(asFeature(p.get())); + } else { + values.push_back(_defaultValue); + } + return FeatureExecutor::LP(new ValueExecutor(values)); + } +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/queryfeature.h b/searchlib/src/vespa/searchlib/features/queryfeature.h new file mode 100644 index 00000000000..fa3194b30c5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/queryfeature.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +/** + * Implements the blueprint for the query feature. + * + * An executor of this outputs the value of a feature passed down with the query. + * This can either be a number or a tensor value. + */ +class QueryBlueprint : public search::fef::Blueprint { +private: + vespalib::string _key; // 'foo' + vespalib::string _key2; // '$foo' + feature_t _defaultValue; + vespalib::tensor::TensorType _tensorType; + +public: + /** + * Constructs a query blueprint. + */ + QueryBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/queryterm.cpp b/searchlib/src/vespa/searchlib/features/queryterm.cpp new file mode 100644 index 00000000000..f8c659be359 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/queryterm.cpp @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.queryterm"); +#include "queryterm.h" +#include "utils.h" + +using namespace search::fef; +using search::feature_t; + +namespace search { +namespace features { + +QueryTerm::QueryTerm() : + _termData(NULL), + _handle(IllegalHandle), + _significance(0), + _connectedness(0) +{ +} + +QueryTerm::QueryTerm(const ITermData * td, feature_t sig, feature_t con) : + _termData(td), + _handle(IllegalHandle), + _significance(sig), + _connectedness(con) +{ +} + +QueryTerm +QueryTermFactory::create(const IQueryEnvironment & env, + uint32_t termIdx, + bool lookupSignificance, + bool lookupConnectedness) +{ + const ITermData *termData = env.getTerm(termIdx); + feature_t significance = 0; + if (lookupSignificance) { + feature_t fallback = util::getSignificance(*termData); + significance = util::lookupSignificance(env, termIdx, fallback); + } + feature_t connectedness = 0; + if (lookupConnectedness) { + connectedness = search::features::util::lookupConnectedness(env, termIdx); + } + return QueryTerm(termData, significance, connectedness); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/queryterm.h b/searchlib/src/vespa/searchlib/features/queryterm.h new file mode 100644 index 00000000000..ba92eeb3ec7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/queryterm.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace features { + +/** + * This class represents a query term with the relevant data. Now also + * with an optional attachment of a TermFieldData pointer. + */ +class QueryTerm { +private: + const fef::ITermData *_termData; + fef::TermFieldHandle _handle; + feature_t _significance; + feature_t _connectedness; +public: + QueryTerm(); + QueryTerm(const fef::ITermData *td, feature_t sig = 0, feature_t con = 0); + const fef::ITermData *termData() const { return _termData; } + feature_t significance() const { return _significance; } + feature_t connectedness() const { return _connectedness; } + fef::TermFieldHandle fieldHandle() const { return _handle; } + void fieldHandle(fef::TermFieldHandle handle) { _handle = handle; } + void fieldHandle(const fef::ITermFieldData *fd) { + if (fd) { + _handle = fd->getHandle(); + } + } +}; + +/** + * Convenience typedef for a vector of QueryTerm objects. + */ +typedef std::vector QueryTermVector; + +/** + * This class is a factory for creating QueryTerm objects. + */ +class QueryTermFactory { +public: + /** + * Creates a new QueryTerm object for the term with the given term index. + * + * @param env the environment used to lookup TermData object, significance, and connectedness. + * @param termIndex the index to use when looking up the TermData object. + * @param lookupSignificance whether we should look up the significance for this term. + * @param lookupConnectedness whether we should look up the connectedness this term has with the previous term. + */ + static QueryTerm create(const fef::IQueryEnvironment & env, + uint32_t termIndex, + bool lookupSignificance = true, + bool lookupConnectedness = false); +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp b/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp new file mode 100644 index 00000000000..7069b4208be --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/querytermcountfeature.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.querytermcountfeature"); + +#include +#include +#include +#include +#include +#include +#include "querytermcountfeature.h" +#include "valuefeature.h" + +using namespace search::fef; + +namespace search { +namespace features { + +QueryTermCountBlueprint::QueryTermCountBlueprint() : + Blueprint("queryTermCount") +{ +} + +void +QueryTermCountBlueprint::visitDumpFeatures(const IIndexEnvironment & env, + IDumpFeatureVisitor & visitor) const +{ + (void) env; + visitor.visitDumpFeature(getBaseName()); +} + +Blueprint::UP +QueryTermCountBlueprint::createInstance() const +{ + return Blueprint::UP(new QueryTermCountBlueprint()); +} + +bool +QueryTermCountBlueprint::setup(const IIndexEnvironment &, + const ParameterList &) +{ + describeOutput("out", "The number of query terms found in the query environment."); + return true; +} + +FeatureExecutor::LP +QueryTermCountBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + std::vector values; + values.push_back(static_cast(env.getNumTerms())); + return FeatureExecutor::LP(new ValueExecutor(values)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/querytermcountfeature.h b/searchlib/src/vespa/searchlib/features/querytermcountfeature.h new file mode 100644 index 00000000000..54cfee056f0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/querytermcountfeature.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the blueprint for the query term count feature. + */ +class QueryTermCountBlueprint : public search::fef::Blueprint { +private: +public: + /** + * Constructs a blueprint. + */ + QueryTermCountBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/randomfeature.cpp b/searchlib/src/vespa/searchlib/features/randomfeature.cpp new file mode 100644 index 00000000000..2c470808a62 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/randomfeature.cpp @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.randomfeature"); +#include "randomfeature.h" +#include "utils.h" +#include + +namespace search { +namespace features { + +RandomExecutor::RandomExecutor(uint64_t seed, uint64_t matchSeed) : + search::fef::FeatureExecutor(), + _rnd(), + _matchRnd(), + _matchSeed(matchSeed) +{ + LOG(debug, "RandomExecutor: seed=%" PRIu64 ", matchSeed=%" PRIu64, + seed, matchSeed); + _rnd.srand48(seed); +} + +void +RandomExecutor::execute(search::fef::MatchData & match) +{ + feature_t rndScore = _rnd.lrand48() / (feature_t)0x80000000u; // 2^31 + _matchRnd.srand48(_matchSeed + match.getDocId()); + feature_t matchRndScore = _matchRnd.lrand48() / (feature_t)0x80000000u; // 2^31 + //LOG(debug, "execute: %f", rndScore); + *match.resolveFeature(outputs()[0]) = rndScore; + *match.resolveFeature(outputs()[1]) = matchRndScore; +} + + +RandomBlueprint::RandomBlueprint() : + search::fef::Blueprint("random"), + _seed(0) +{ + // empty +} + +void +RandomBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +search::fef::Blueprint::UP +RandomBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RandomBlueprint()); +} + +bool +RandomBlueprint::setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList &) +{ + search::fef::Property p = env.getProperties().lookup(getName(), "seed"); + if (p.found()) { + _seed = util::strToNum(p.get()); + } + describeOutput("out" , "A random value in the interval [0, 1>"); + describeOutput("match" , "A random value in the interval [0, 1> that is stable for a given match (document and query)"); + return true; +} + +search::fef::FeatureExecutor::LP +RandomBlueprint::createExecutor(const search::fef::IQueryEnvironment & env) const +{ + uint64_t seed = _seed; + if (seed == 0) { + FastOS_Time time; + time.SetNow(); + seed = static_cast(time.MicroSecs()) ^ + reinterpret_cast(&seed); // results in different seeds in different threads + } + uint64_t matchSeed = util::strToNum + (env.getProperties().lookup(getName(), "match", "seed").get("1024")); // default seed + + return search::fef::FeatureExecutor::LP(new RandomExecutor(seed, matchSeed)); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/randomfeature.h b/searchlib/src/vespa/searchlib/features/randomfeature.h new file mode 100644 index 00000000000..f2932876a10 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/randomfeature.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + + +/** + * Implements the executor for the random feature outputting a number in the interval [0, 1>. + **/ +class RandomExecutor : public search::fef::FeatureExecutor { +private: + Rand48 _rnd; // seeded once per query + Rand48 _matchRnd; // seeded once per match + uint64_t _matchSeed; + +public: + /** + * Constructs a new executor. + **/ + RandomExecutor(uint64_t seed, uint64_t matchSeed); + virtual void execute(search::fef::MatchData & data); +}; + + +/** + * Implements the blueprint for the random feature. + */ +class RandomBlueprint : public search::fef::Blueprint { +private: + uint64_t _seed; + +public: + /** + * Constructs a new blueprint. + */ + RandomBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc(). + desc().string(); // in order to name different features + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore b/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore new file mode 100644 index 00000000000..31d063a8460 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/.gitignore @@ -0,0 +1,6 @@ +*.So +.depend +Makefile +lex.yy.cpp +parser.tab.cpp +parser.tab.h diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt new file mode 100644 index 00000000000..2853a06c49e --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_rankingexpression OBJECT + SOURCES + feature_name_extractor.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp new file mode 100644 index 00000000000..4dc5124df67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.cpp @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "feature_name_extractor.h" + +namespace search { +namespace features { +namespace rankingexpression { + +namespace { + +struct LegalChar { + bool legal[256]; + LegalChar(std::initializer_list extra_chars) { + for (int c = 0; c < 256; ++c) { + legal[c] = isalnum(c); + } + for (uint8_t c: extra_chars) { + legal[c] = true; + } + } + bool is_legal(uint8_t c) { return legal[c]; } +}; + +static LegalChar prefix({'_', '$', '@'}); +static LegalChar suffix({'_', '.', '$', '@'}); + +struct CountParen { + size_t depth = 0; + bool quoted = false; + bool escaped = false; + bool done(char c) { + if (quoted) { + if (escaped) { + escaped = false; + } else { + if (c == '\\') { + escaped = true; + } else if (c == '"') { + quoted = false; + } + } + } else { + if (c == '"') { + quoted = true; + } else if (c == '(') { + ++depth; + } else if (c == ')') { + if (--depth == 0) { + return true; + } + } + } + return false; + } +}; + +} // namespace + +void +FeatureNameExtractor::extract_symbol(const char *pos_in, const char *end_in, + const char *&pos_out, vespalib::string &symbol_out) const +{ + while ((pos_in < end_in) && prefix.is_legal(*pos_in)) { + symbol_out.push_back(*pos_in++); + } + if ((pos_in < end_in) && (*pos_in == '(')) { + CountParen paren; + while (pos_in < end_in) { + symbol_out.push_back(*pos_in); + if (paren.done(*pos_in++)) { + break; + } + } + } + if ((pos_in < end_in) && (*pos_in == '.')) { + symbol_out.push_back(*pos_in++); + while ((pos_in < end_in) && suffix.is_legal(*pos_in)) { + symbol_out.push_back(*pos_in++); + } + } + pos_out = pos_in; +} + +} // namespace rankingexpression +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h new file mode 100644 index 00000000000..34551cc8503 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpression/feature_name_extractor.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace features { +namespace rankingexpression { + +/** + * Custom symbol extractor used to extract ranking feature names when + * parsing ranking expressions. + **/ +struct FeatureNameExtractor : public vespalib::eval::SymbolExtractor { + virtual void extract_symbol(const char *pos_in, const char *end_in, + const char *&pos_out, vespalib::string &symbol_out) const; +}; + +} // namespace rankingexpression +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp new file mode 100644 index 00000000000..80724d2d3ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.cpp @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.rankingexpression"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "rankingexpressionfeature.h" +#include "utils.h" +#include +#include +#include +#include + +using vespalib::eval::Function; +using vespalib::eval::PassParams; +using vespalib::eval::CompileCache; +using vespalib::eval::CompiledFunction; +using vespalib::eval::InterpretedFunction; +using vespalib::eval::ValueType; +using vespalib::eval::NodeTypes; +using vespalib::tensor::DefaultTensorEngine; +using search::fef::FeatureType; + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +CompiledRankingExpressionExecutor::CompiledRankingExpressionExecutor(const vespalib::eval::CompiledFunction &compiled_function) + : _ranking_function(compiled_function.get_function()), + _params(compiled_function.num_params(), 0.0) +{ +} + +void +CompiledRankingExpressionExecutor::execute(search::fef::MatchData &data) +{ + for (size_t i = 0; i < _params.size(); ++i) { + _params[i] = *data.resolveFeature(inputs()[i]); + } + *data.resolveFeature(outputs()[0]) = _ranking_function(&_params[0]); +} + +//----------------------------------------------------------------------------- + +InterpretedRankingExpressionExecutor::InterpretedRankingExpressionExecutor(const vespalib::eval::InterpretedFunction &function) + : _context(), + _function(function) +{ +} + +void +InterpretedRankingExpressionExecutor::execute(search::fef::MatchData &data) +{ + _context.clear_params(); + for (size_t i = 0; i < _function.num_params(); ++i) { + if (data.feature_is_object(inputs()[i])) { + _context.add_param(*data.resolve_object_feature(inputs()[i])); + } else { + _context.add_param(*data.resolveFeature(inputs()[i])); + } + } + *data.resolve_object_feature(outputs()[0]) = _function.eval(_context); +} + +//----------------------------------------------------------------------------- + +RankingExpressionBlueprint::RankingExpressionBlueprint() + : search::fef::Blueprint("rankingExpression"), + _interpreted_function(), + _compile_token() +{ +} + +void +RankingExpressionBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +RankingExpressionBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + // Retrieve and concatenate whatever config is available. + vespalib::string script = ""; + search::fef::Property property = env.getProperties().lookup(getName(), "rankingScript"); + if (property.size() > 0) { + for (uint32_t i = 0; i < property.size(); ++i) { + script.append(property.getAt(i)); + } + //LOG(debug, "Script from config: '%s'\n", script.c_str()); + } else if (params.size() == 1) { + script = params[0].getValue(); + //LOG(debug, "Script from param: '%s'\n", script.c_str()); + } else { + LOG(error, "No expression given."); + return false; + } + Function rank_function = Function::parse(script, rankingexpression::FeatureNameExtractor()); + if (rank_function.has_error()) { + LOG(error, "Failed to parse expression '%s': %s", script.c_str(), rank_function.get_error().c_str()); + return false; + } + bool do_compile = true; + std::vector input_types; + for (size_t i = 0; i < rank_function.num_params(); ++i) { + const FeatureType &input = defineInput(rank_function.param_name(i), AcceptInput::ANY); + if (input.is_object()) { + do_compile = false; + input_types.push_back(input.type()); + } else { + input_types.push_back(ValueType::double_type()); + } + } + NodeTypes node_types(rank_function, input_types); + if (!node_types.all_types_are_double()) { + do_compile = false; + } + ValueType root_type = node_types.get_type(rank_function.root()); + if (root_type.is_error()) { + LOG(error, "rank expression contains type errors: %s\n", script.c_str()); + return false; + } + if (root_type.is_any()) { + LOG(warning, "rank expression could produce run-time type errors: %s\n", script.c_str()); + } + // avoid costly compilation when only verifying setup + if (env.getFeatureMotivation() != env.FeatureMotivation::VERIFY_SETUP) { + if (do_compile) { + _compile_token = CompileCache::compile(rank_function, PassParams::ARRAY); + } else { + _interpreted_function.reset(new InterpretedFunction(DefaultTensorEngine::ref(), rank_function)); + } + } + FeatureType output_type = do_compile + ? FeatureType::number() + : FeatureType::object(root_type); + describeOutput("out", "The result of running the contained ranking expression.", output_type); + return true; +} + +search::fef::Blueprint::UP +RankingExpressionBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new RankingExpressionBlueprint()); +} + +search::fef::FeatureExecutor::LP +RankingExpressionBlueprint::createExecutor(const search::fef::IQueryEnvironment &) const +{ + if (_interpreted_function) { + return search::fef::FeatureExecutor::LP(new InterpretedRankingExpressionExecutor(*_interpreted_function)); + } + assert(_compile_token.get() != nullptr); // will be nullptr for VERIFY_SETUP feature motivation + return search::fef::FeatureExecutor::LP(new CompiledRankingExpressionExecutor(_compile_token->get())); +} + +//----------------------------------------------------------------------------- + +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h new file mode 100644 index 00000000000..af60c0de456 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/rankingexpressionfeature.h @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +/** + * Implements the executor for compiled ranking expressions + **/ +class CompiledRankingExpressionExecutor : public search::fef::FeatureExecutor +{ +private: + typedef double (*arr_function)(const double *); + arr_function _ranking_function; + std::vector _params; + +public: + CompiledRankingExpressionExecutor(const vespalib::eval::CompiledFunction &compiled_function); + virtual void execute(search::fef::MatchData &data); +}; + +//----------------------------------------------------------------------------- + +/** + * Implements the executor for interpreted ranking expressions (with tensor support) + **/ +class InterpretedRankingExpressionExecutor : public search::fef::FeatureExecutor +{ +private: + vespalib::eval::InterpretedFunction::Context _context; + const vespalib::eval::InterpretedFunction &_function; + +public: + InterpretedRankingExpressionExecutor(const vespalib::eval::InterpretedFunction &function); + virtual void execute(search::fef::MatchData &data); +}; + +//----------------------------------------------------------------------------- + +/** + * Implements the blueprint for ranking expression. + */ +class RankingExpressionBlueprint : public search::fef::Blueprint +{ +private: + vespalib::eval::InterpretedFunction::UP _interpreted_function; + vespalib::eval::CompileCache::Token::UP _compile_token; + +public: + /** + * Constructs a ranking expression blueprint. + */ + RankingExpressionBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions(). + desc(). + desc().string(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +//----------------------------------------------------------------------------- + +} // features +} // search diff --git a/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp b/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp new file mode 100644 index 00000000000..c6689a45ecd --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/raw_score_feature.cpp @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.raw_score_feature"); +#include "raw_score_feature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +RawScoreExecutor::RawScoreExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId) + : FeatureExecutor(), + _handles() +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + search::fef::TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId); + if (handle != search::fef::IllegalHandle) { + _handles.push_back(handle); + } + } +} + +void +RawScoreExecutor::execute(MatchData &data) +{ + feature_t output = 0.0; + for (uint32_t i = 0; i < _handles.size(); ++i) { + const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]); + if (tfmd->getDocId() == data.getDocId()) { + output += tfmd->getRawScore(); + } + } + *data.resolveFeature(outputs()[0]) = output; +} + +//----------------------------------------------------------------------------- + +bool +RawScoreBlueprint::setup(const IIndexEnvironment &, + const ParameterList ¶ms) +{ + _field = params[0].asField(); + describeOutput("out", "accumulated raw score for the given field"); + return true; +} + +FeatureExecutor::LP +RawScoreBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const +{ + return FeatureExecutor::LP(new RawScoreExecutor(queryEnv, _field->id())); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/raw_score_feature.h b/searchlib/src/vespa/searchlib/features/raw_score_feature.h new file mode 100644 index 00000000000..f357989dda6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/raw_score_feature.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +class RawScoreExecutor : public search::fef::FeatureExecutor +{ +private: + std::vector _handles; +public: + RawScoreExecutor(const search::fef::IQueryEnvironment &env, uint32_t fieldId); + virtual void execute(search::fef::MatchData &data); +}; + +//----------------------------------------------------------------------------- + +class RawScoreBlueprint : public search::fef::Blueprint +{ +private: + const search::fef::FieldInfo *_field; +public: + RawScoreBlueprint() : Blueprint("rawScore"), _field(0) {} + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const {} + virtual search::fef::Blueprint::UP createInstance() const { + return Blueprint::UP(new RawScoreBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp new file mode 100644 index 00000000000..5a297a8da8a --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.cpp @@ -0,0 +1,136 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.reverseproximity"); + +#include +#include +#include +#include +#include +#include "reverseproximityfeature.h" +#include "utils.h" + +namespace search { +namespace features { + +ReverseProximityConfig::ReverseProximityConfig() : + fieldId(search::fef::IllegalHandle), + termA(std::numeric_limits::max()), + termB(std::numeric_limits::max()) +{ + // empty +} + +ReverseProximityExecutor::ReverseProximityExecutor(const search::fef::IQueryEnvironment &env, + const ReverseProximityConfig &config) : + search::fef::FeatureExecutor(), + _config(config), + _termA(util::getTermFieldHandle(env, _config.termA, _config.fieldId)), + _termB(util::getTermFieldHandle(env, _config.termB, _config.fieldId)) +{ +} + +void +ReverseProximityExecutor::execute(search::fef::MatchData &match) +{ + // Cannot calculate proximity in this case + if (_termA == search::fef::IllegalHandle || _termB == search::fef::IllegalHandle) { + *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out + *match.resolveFeature(outputs()[1]) = util::FEATURE_MIN; // posA + *match.resolveFeature(outputs()[2]) = util::FEATURE_MAX; // posB + return; + } + + // Look for an initial pair to use as guess. + uint32_t posA = 0, posB = 0; + search::fef::FieldPositionsIterator itA, itB; + search::fef::TermFieldMatchData &matchA = *match.resolveTermField(_termA); + search::fef::TermFieldMatchData &matchB = *match.resolveTermField(_termB); + if (matchA.getDocId() == match.getDocId() && matchB.getDocId() == match.getDocId()) { + itA = matchA.getIterator(); + itB = matchB.getIterator(); + if (itA.valid() && itB.valid()) { + for(posA = itA.getPosition(), posB = itB.getPosition(); + itA.valid() && itA.getPosition() < posB; itA.next()) + { + // empty + } + } + } + //LOG(debug, "Initial guess; posA is '%u' and posB is '%u'.", posA, posB); + + // _P_A_R_A_N_O_I_A_ + if (!itA.valid() || !itB.valid()) { + //LOG(debug, "Initial guess is invalid."); + *match.resolveFeature(outputs()[0]) = util::FEATURE_MAX; // out + *match.resolveFeature(outputs()[1]) = util::FEATURE_MIN; // posA + *match.resolveFeature(outputs()[2]) = util::FEATURE_MAX; // posB + return; + } + + // Look for optimal positions for term A and B. + uint32_t optA = posA, optB = posB; + while (itA.valid() && itB.valid()) { + uint32_t a = itA.getPosition(), b = itB.getPosition(); + if (b < posA) { + posB = b; + itB.next(); + } + else { + if (posA - posB < optA - optB) { + optA = posA; + optB = posB; + } + posA = a; + itA.next(); + } + } + + // Output proximity score. + *match.resolveFeature(outputs()[0]) = optA - optB; + *match.resolveFeature(outputs()[1]) = optA; + *match.resolveFeature(outputs()[2]) = optB; +} + +ReverseProximityBlueprint::ReverseProximityBlueprint() : + search::fef::Blueprint("reverseProximity"), + _config() +{ + // empty +} + +void +ReverseProximityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +ReverseProximityBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _config.fieldId = params[0].asField()->id(); + _config.termA = params[1].asInteger(); + _config.termB = params[2].asInteger(); + describeOutput("out" , "The reverse proximity of the query terms."); + describeOutput("posA", "The best position of the first query term."); + describeOutput("posB", "The best position of the second query term."); + env.hintFieldAccess(_config.fieldId); + return true; +} + +search::fef::Blueprint::UP +ReverseProximityBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new ReverseProximityBlueprint()); +} + +search::fef::FeatureExecutor::LP +ReverseProximityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new ReverseProximityExecutor(env, _config)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h new file mode 100644 index 00000000000..4311af3dc81 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/reverseproximityfeature.h @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the necessary config for reverse proximity. + */ +struct ReverseProximityConfig { + ReverseProximityConfig(); + + uint32_t fieldId; // The id of field to process. + uint32_t termA; // The id of the first query term in the pair (a, b). + uint32_t termB; // The id of the second query term. +}; + +/** + * Implements the executor for reverse proximity. + */ +class ReverseProximityExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs an executor for reverse proximity. + * + * @param env The query environment. + * @param config The completeness config. + */ + ReverseProximityExecutor(const search::fef::IQueryEnvironment &env, + const ReverseProximityConfig &config); + virtual void execute(search::fef::MatchData &data); + +private: + const ReverseProximityConfig &_config; // The proximity config. + search::fef::TermFieldHandle _termA; // Handle to the first query term. + search::fef::TermFieldHandle _termB; // Handle to the second query term. +}; + +/** + * Implements the blueprint for proximity. + */ +class ReverseProximityBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a blueprint for reverse proximity. + */ + ReverseProximityBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + +private: + ReverseProximityConfig _config; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/setup.cpp b/searchlib/src/vespa/searchlib/features/setup.cpp new file mode 100644 index 00000000000..e05569c0b6d --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/setup.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "setup.h" + +#include "agefeature.h" +#include "attributefeature.h" +#include "attributematchfeature.h" +#include "closenessfeature.h" +#include "debug_attribute_wait.h" +#include "debug_wait.h" +#include "distancefeature.h" +#include "distancetopathfeature.h" +#include "dotproductfeature.h" +#include "element_completeness_feature.h" +#include "element_similarity_feature.h" +#include "euclidean_distance_feature.h" +#include "fieldinfofeature.h" +#include "fieldlengthfeature.h" +#include "fieldmatchfeature.h" +#include "fieldtermmatchfeature.h" +#include "firstphasefeature.h" +#include "flow_completeness_feature.h" +#include "foreachfeature.h" +#include "freshnessfeature.h" +#include "item_raw_score_feature.h" +#include "jarowinklerdistancefeature.h" +#include "matchesfeature.h" +#include "matchfeature.h" +#include "native_dot_product_feature.h" +#include "nativeattributematchfeature.h" +#include "nativefieldmatchfeature.h" +#include "nativeproximityfeature.h" +#include "nativerankfeature.h" +#include "nowfeature.h" +#include "proximityfeature.h" +#include "querycompletenessfeature.h" +#include "queryfeature.h" +#include "querytermcountfeature.h" +#include "randomfeature.h" +#include "rankingexpressionfeature.h" +#include "raw_score_feature.h" +#include "reverseproximityfeature.h" +#include "subqueries_feature.h" +#include "tensor_from_labels_feature.h" +#include "tensor_from_weighted_set_feature.h" +#include "term_field_md_feature.h" +#include "termdistancefeature.h" +#include "termeditdistancefeature.h" +#include "termfeature.h" +#include "terminfofeature.h" +#include "text_similarity_feature.h" +#include "valuefeature.h" + +using search::fef::Blueprint; + +namespace search { +namespace features { + +void setup_search_features(fef::IBlueprintRegistry & registry) +{ + // Prod features. + registry.addPrototype(Blueprint::SP(new AgeBlueprint())); + registry.addPrototype(Blueprint::SP(new AttributeBlueprint())); + registry.addPrototype(Blueprint::SP(new AttributeMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new ClosenessBlueprint())); + registry.addPrototype(Blueprint::SP(new DistanceBlueprint())); + registry.addPrototype(Blueprint::SP(new DistanceToPathBlueprint())); + registry.addPrototype(Blueprint::SP(new DebugAttributeWaitBlueprint())); + registry.addPrototype(Blueprint::SP(new DebugWaitBlueprint())); + registry.addPrototype(Blueprint::SP(new DotProductBlueprint())); + registry.addPrototype(Blueprint::SP(new ElementCompletenessBlueprint())); + registry.addPrototype(Blueprint::SP(new ElementSimilarityBlueprint())); + registry.addPrototype(Blueprint::SP(new EuclideanDistanceBlueprint())); + registry.addPrototype(Blueprint::SP(new FieldInfoBlueprint())); + registry.addPrototype(Blueprint::SP(new FlowCompletenessBlueprint())); + registry.addPrototype(Blueprint::SP(new FieldLengthBlueprint())); + registry.addPrototype(Blueprint::SP(new FieldMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new FieldTermMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new FirstPhaseBlueprint())); + registry.addPrototype(Blueprint::SP(new ForeachBlueprint())); + registry.addPrototype(Blueprint::SP(new FreshnessBlueprint())); + registry.addPrototype(Blueprint::SP(new ItemRawScoreBlueprint())); + registry.addPrototype(Blueprint::SP(new MatchesBlueprint())); + registry.addPrototype(Blueprint::SP(new MatchBlueprint())); + registry.addPrototype(Blueprint::SP(new NativeAttributeMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new NativeDotProductBlueprint())); + registry.addPrototype(Blueprint::SP(new NativeFieldMatchBlueprint())); + registry.addPrototype(Blueprint::SP(new NativeProximityBlueprint())); + registry.addPrototype(Blueprint::SP(new NativeRankBlueprint())); + registry.addPrototype(Blueprint::SP(new NowBlueprint())); + registry.addPrototype(Blueprint::SP(new QueryBlueprint())); + registry.addPrototype(Blueprint::SP(new QueryTermCountBlueprint())); + registry.addPrototype(Blueprint::SP(new RandomBlueprint())); + registry.addPrototype(Blueprint::SP(new RankingExpressionBlueprint())); + registry.addPrototype(Blueprint::SP(new RawScoreBlueprint())); + registry.addPrototype(Blueprint::SP(new SubqueriesBlueprint)); + registry.addPrototype(Blueprint::SP(new TensorFromLabelsBlueprint())); + registry.addPrototype(Blueprint::SP(new TensorFromWeightedSetBlueprint())); + registry.addPrototype(Blueprint::SP(new TermBlueprint())); + registry.addPrototype(Blueprint::SP(new TermDistanceBlueprint())); + registry.addPrototype(Blueprint::SP(new TermInfoBlueprint())); + registry.addPrototype(Blueprint::SP(new TextSimilarityBlueprint())); + registry.addPrototype(Blueprint::SP(new ValueBlueprint())); + + // Beta features. + registry.addPrototype(Blueprint::SP(new JaroWinklerDistanceBlueprint())); + registry.addPrototype(Blueprint::SP(new ProximityBlueprint())); + registry.addPrototype(Blueprint::SP(new QueryCompletenessBlueprint())); + registry.addPrototype(Blueprint::SP(new ReverseProximityBlueprint())); + registry.addPrototype(Blueprint::SP(new TermEditDistanceBlueprint())); + registry.addPrototype(Blueprint::SP(new TermFieldMdBlueprint())); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/setup.h b/searchlib/src/vespa/searchlib/features/setup.h new file mode 100644 index 00000000000..34e36ed5746 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/setup.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace features { + +/** + * Adds prototypes for all features in this library to the given registry. + * + * @param registry The blueprint registry to add prototypes to. + **/ +void setup_search_features(fef::IBlueprintRegistry & registry); + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp b/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp new file mode 100644 index 00000000000..f6736f2cac9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/subqueries_feature.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.subqueries_feature"); +#include "subqueries_feature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +SubqueriesExecutor::SubqueriesExecutor(const IQueryEnvironment &env, + uint32_t fieldId) + : FeatureExecutor(), + _handles() { + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + TermFieldHandle handle = util::getTermFieldHandle(env, i, fieldId); + if (handle != IllegalHandle) { + _handles.push_back(handle); + } + } +} + +void SubqueriesExecutor::execute(MatchData &data) { + uint32_t lsb = 0; + uint32_t msb = 0; + for (uint32_t i = 0; i < _handles.size(); ++i) { + const TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]); + if (tfmd->getDocId() == data.getDocId()) { + lsb |= static_cast(tfmd->getSubqueries()); + msb |= tfmd->getSubqueries() >> 32; + } + } + *data.resolveFeature(outputs()[0]) = lsb; + *data.resolveFeature(outputs()[1]) = msb; +} + +//----------------------------------------------------------------------------- + +bool SubqueriesBlueprint::setup(const IIndexEnvironment &, + const ParameterList ¶ms) { + _field = params[0].asField(); + describeOutput("lsb", "32 least significant bits of the subquery bitmap" + " for the given field"); + describeOutput("msb", "32 most significant bits of the subquery bitmap" + " for the given field"); + return true; +} + +FeatureExecutor::LP +SubqueriesBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const { + return FeatureExecutor::LP(new SubqueriesExecutor(queryEnv, _field->id())); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/subqueries_feature.h b/searchlib/src/vespa/searchlib/features/subqueries_feature.h new file mode 100644 index 00000000000..2ac727ba4b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/subqueries_feature.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +class SubqueriesExecutor : public search::fef::FeatureExecutor { + std::vector _handles; +public: + SubqueriesExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId); + virtual void execute(search::fef::MatchData &data); +}; + +//----------------------------------------------------------------------------- + +class SubqueriesBlueprint : public search::fef::Blueprint +{ +private: + const search::fef::FieldInfo *_field; +public: + SubqueriesBlueprint() : Blueprint("subqueries"), _field(nullptr) {} + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const {} + virtual search::fef::Blueprint::UP createInstance() const { + return Blueprint::UP(new SubqueriesBlueprint); + } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().field(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp new file mode 100644 index 00000000000..addb90426d7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.cpp @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.tensor_factory_blueprint"); + +#include "tensor_factory_blueprint.h" + +#include + +using namespace search::fef; +using vespalib::eval::Function; + +namespace search { +namespace features { + +vespalib::string TensorFactoryBlueprint::ATTRIBUTE_SOURCE = "attribute"; +vespalib::string TensorFactoryBlueprint::QUERY_SOURCE = "query"; + +bool +TensorFactoryBlueprint::extractSource(const vespalib::string &source) +{ + vespalib::string error; + bool unwrapOk = Function::unwrap(source, _sourceType, _sourceParam, error); + if (!unwrapOk) { + LOG(error, "Failed to extract source param: '%s'", error.c_str()); + return false; + } + if (_sourceType != ATTRIBUTE_SOURCE && _sourceType != QUERY_SOURCE) { + LOG(error, "Expected source type '%s' or '%s', but it was '%s'", + ATTRIBUTE_SOURCE.c_str(), QUERY_SOURCE.c_str(), _sourceType.c_str()); + return false; + } + return true; +} + +TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName) + : Blueprint(baseName), + _sourceType(), + _sourceParam(), + _dimension("0") // default dimension is set to the source param if not specified. +{ +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h new file mode 100644 index 00000000000..5d9ec8eafad --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_factory_blueprint.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace features { + +/** + * Factory class for tensor rank features. + */ +class TensorFactoryBlueprint : public search::fef::Blueprint +{ +protected: + static vespalib::string ATTRIBUTE_SOURCE; + static vespalib::string QUERY_SOURCE; + + vespalib::string _sourceType; + vespalib::string _sourceParam; + vespalib::string _dimension; + + bool extractSource(const vespalib::string &source); + TensorFactoryBlueprint(const vespalib::string &baseName); + +public: + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const override {} +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h new file mode 100644 index 00000000000..220fea0c849 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_attribute_executor.h @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Feature executor that extracts the content from an attribute vector + * and converts that into a tensor. + */ +template +class TensorFromAttributeExecutor : public fef::FeatureExecutor +{ +private: + const search::attribute::IAttributeVector *_attribute; + vespalib::string _dimension; + WeightedBufferType _attrBuffer; + vespalib::eval::TensorValue::UP _tensor; + +public: + TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute, + const vespalib::string &dimension) + : _attribute(attribute), + _dimension(dimension), + _attrBuffer(), + _tensor() + { + _attrBuffer.allocate(_attribute->getMaxValueCount()); + } + virtual void execute(fef::MatchData &data); +}; + +template +void +TensorFromAttributeExecutor::execute(fef::MatchData &data) +{ + _attrBuffer.fill(*_attribute, data.getDocId()); + vespalib::tensor::DefaultTensor::builder builder; + vespalib::tensor::TensorBuilder::Dimension dimensionEnum = builder.define_dimension(_dimension); + for (size_t i = 0; i < _attrBuffer.size(); ++i) { + builder.add_label(dimensionEnum, vespalib::string(_attrBuffer[i].value())); + builder.add_cell(_attrBuffer[i].weight()); + } + _tensor = vespalib::eval::TensorValue::UP(new vespalib::eval::TensorValue(builder.build())); + *data.resolve_object_feature(outputs()[0]) = *_tensor; +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp new file mode 100644 index 00000000000..819ca5c4ff1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.cpp @@ -0,0 +1,122 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.tensor_from_labels_feature"); + +#include "tensor_from_labels_feature.h" +#include "array_parser.hpp" +#include "constant_tensor_executor.h" +#include "tensor_from_attribute_executor.h" +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using search::attribute::IAttributeVector; +using search::attribute::WeightedConstCharContent; +using search::attribute::WeightedStringContent; +using vespalib::tensor::DefaultTensor; +using vespalib::tensor::TensorBuilder; +using vespalib::eval::ValueType; +using search::fef::FeatureType; + +namespace search { +namespace features { + +TensorFromLabelsBlueprint::TensorFromLabelsBlueprint() + : TensorFactoryBlueprint("tensorFromLabels") +{ +} + +bool +TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + (void) env; + // _params[0] = source ('attribute(name)' OR 'query(param)'); + // _params[1] = dimension (optional); + bool validSource = extractSource(params[0].getValue()); + if (params.size() == 2) { + _dimension = params[1].getValue(); + } else { + _dimension = _sourceParam; + } + describeOutput("tensor", + "The tensor created from the given array source (attribute field or query parameter)", + FeatureType::object(ValueType::tensor_type({{_dimension}}))); + return validSource; +} + +namespace { + +FeatureExecutor::LP +createAttributeExecutor(const search::fef::IQueryEnvironment &env, + const vespalib::string &attrName, + const vespalib::string &dimension) +{ + const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager." + " Returning empty tensor.", attrName.c_str()); + return ConstantTensorExecutor::createEmpty(); + } + if (attribute->getCollectionType() != search::attribute::CollectionType::ARRAY || + attribute->isFloatingPointType()) { + LOG(warning, "The attribute vector '%s' is NOT of type array of string or integer." + " Returning empty tensor.", attrName.c_str()); + return ConstantTensorExecutor::createEmpty(); + } + // Note that for array attribute vectors the default weight is 1.0 for all values. + // This means we can get the attribute content as weighted content and build + // the tensor the same way as with weighted set attributes in tensorFromWeightedSet. + if (attribute->isIntegerType()) { + // Using WeightedStringContent ensures that the integer values are converted + // to strings while extracting them from the attribute. + return FeatureExecutor::LP + (new TensorFromAttributeExecutor(attribute, dimension)); + } + // When the underlying attribute is of type string we can reference these values + // using WeightedConstCharContent. + return FeatureExecutor::LP + (new TensorFromAttributeExecutor(attribute, dimension)); +} + +FeatureExecutor::LP +createQueryExecutor(const search::fef::IQueryEnvironment &env, + const vespalib::string &queryKey, + const vespalib::string &dimension) +{ + search::fef::Property prop = env.getProperties().lookup(queryKey); + if (prop.found() && !prop.get().empty()) { + std::vector vector; + ArrayParser::parse(prop.get(), vector); + DefaultTensor::builder tensorBuilder; + TensorBuilder::Dimension dimensionEnum = tensorBuilder.define_dimension(dimension); + for (const auto &elem : vector) { + tensorBuilder.add_label(dimensionEnum, elem); + tensorBuilder.add_cell(1.0); + } + return ConstantTensorExecutor::create(tensorBuilder.build()); + } + return ConstantTensorExecutor::createEmpty(); +} + +} + +FeatureExecutor::LP +TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + if (_sourceType == ATTRIBUTE_SOURCE) { + return createAttributeExecutor(env, _sourceParam, _dimension); + } else if (_sourceType == QUERY_SOURCE) { + return createQueryExecutor(env, _sourceParam, _dimension); + } + return ConstantTensorExecutor::createEmpty(); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h new file mode 100644 index 00000000000..3da8d07b063 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_labels_feature.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_factory_blueprint.h" + +namespace search { +namespace features { + +/** + * Blueprint for a rank feature that creates a tensor from an array + * where the elements in the array are used as labels in the tensor addresses. + * The tensor cells all get the value 1.0. + * + * The array source can be either an attribute vector or query parameter. + */ +class TensorFromLabelsBlueprint : public TensorFactoryBlueprint +{ +public: + TensorFromLabelsBlueprint(); + virtual search::fef::Blueprint::UP createInstance() const override { + return Blueprint::UP(new TensorFromLabelsBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const override { + return search::fef::ParameterDescriptions(). + desc().string(). + desc().string().string(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) override; + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const override; +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp new file mode 100644 index 00000000000..2e00b5d4f19 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tensor_from_tensor_attribute_executor.h" +#include + +namespace search { +namespace features { + +TensorFromTensorAttributeExecutor:: +TensorFromTensorAttributeExecutor(const search::attribute::TensorAttribute * + attribute) + : _attribute(attribute), + _tensor(), + _builder(), + // XXX: we should use numbers instead of empty tensors + _emptyTensor(std::make_unique(_builder.build())) +{ +} + + +void +TensorFromTensorAttributeExecutor::execute(fef::MatchData &data) +{ + auto tensor = _attribute->getTensor(data.getDocId()); + if (!tensor) { + *data.resolve_object_feature(outputs()[0]) = *_emptyTensor; + return; + } + _tensor = std::make_unique(std::move(tensor)); + *data.resolve_object_feature(outputs()[0]) = *_tensor; +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h new file mode 100644 index 00000000000..aa037b4ab59 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_tensor_attribute_executor.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace attribute { class TensorAttribute; } +namespace features { + +class TensorFromTensorAttributeExecutor : public fef::FeatureExecutor +{ +private: + const search::attribute::TensorAttribute *_attribute; + vespalib::eval::TensorValue::UP _tensor; + vespalib::tensor::DefaultTensor::builder _builder; + vespalib::eval::TensorValue::UP _emptyTensor; + +public: + TensorFromTensorAttributeExecutor(const search::attribute::TensorAttribute * + attribute); + virtual void execute(fef::MatchData &data); +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp new file mode 100644 index 00000000000..5754649109b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.cpp @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.tensor_from_weighted_set_feature"); + +#include "tensor_from_weighted_set_feature.h" + +#include "constant_tensor_executor.h" +#include "utils.h" +#include "tensor_from_attribute_executor.h" +#include "weighted_set_parser.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace search::fef; +using search::attribute::IAttributeVector; +using search::attribute::WeightedConstCharContent; +using search::attribute::WeightedStringContent; +using vespalib::tensor::DefaultTensor; +using vespalib::tensor::TensorBuilder; +using vespalib::eval::ValueType; +using search::fef::FeatureType; + +namespace search { +namespace features { + +namespace { + +struct WeightedStringVector +{ + std::vector _data; + void insert(const vespalib::stringref &key, const vespalib::stringref &weight) { + _data.emplace_back(key, util::strToNum(weight)); + } +}; + +} + +TensorFromWeightedSetBlueprint::TensorFromWeightedSetBlueprint() + : TensorFactoryBlueprint("tensorFromWeightedSet") +{ +} + +bool +TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + (void) env; + // _params[0] = source ('attribute(name)' OR 'query(param)'); + // _params[1] = dimension (optional); + bool validSource = extractSource(params[0].getValue()); + if (params.size() == 2) { + _dimension = params[1].getValue(); + } else { + _dimension = _sourceParam; + } + describeOutput("tensor", + "The tensor created from the given weighted set source (attribute field or query parameter)", + FeatureType::object(ValueType::tensor_type({{_dimension}}))); + return validSource; +} + +namespace { + +FeatureExecutor::LP +createAttributeExecutor(const search::fef::IQueryEnvironment &env, + const vespalib::string &attrName, + const vespalib::string &dimension) +{ + const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName); + if (attribute == NULL) { + LOG(warning, "The attribute vector '%s' was not found in the attribute manager." + " Returning empty tensor.", attrName.c_str()); + return ConstantTensorExecutor::createEmpty(); + } + if (attribute->getCollectionType() != search::attribute::CollectionType::WSET || + attribute->isFloatingPointType()) { + LOG(warning, "The attribute vector '%s' is NOT of type weighted set of string or integer." + " Returning empty tensor.", attrName.c_str()); + return ConstantTensorExecutor::createEmpty(); + } + if (attribute->isIntegerType()) { + // Using WeightedStringContent ensures that the integer values are converted + // to strings while extracting them from the attribute. + return FeatureExecutor::LP + (new TensorFromAttributeExecutor(attribute, dimension)); + } + // When the underlying attribute is of type string we can reference these values + // using WeightedConstCharContent. + return FeatureExecutor::LP + (new TensorFromAttributeExecutor(attribute, dimension)); +} + +FeatureExecutor::LP +createQueryExecutor(const search::fef::IQueryEnvironment &env, + const vespalib::string &queryKey, + const vespalib::string &dimension) +{ + search::fef::Property prop = env.getProperties().lookup(queryKey); + if (prop.found() && !prop.get().empty()) { + WeightedStringVector vector; + WeightedSetParser::parse(prop.get(), vector); + DefaultTensor::builder tensorBuilder; + TensorBuilder::Dimension dimensionEnum = tensorBuilder.define_dimension(dimension); + for (const auto &elem : vector._data) { + tensorBuilder.add_label(dimensionEnum, elem.value()); + tensorBuilder.add_cell(elem.weight()); + } + return ConstantTensorExecutor::create(tensorBuilder.build()); + } + return ConstantTensorExecutor::createEmpty(); +} + +} + +FeatureExecutor::LP +TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + if (_sourceType == ATTRIBUTE_SOURCE) { + return createAttributeExecutor(env, _sourceParam, _dimension); + } else if (_sourceType == QUERY_SOURCE) { + return createQueryExecutor(env, _sourceParam, _dimension); + } + return ConstantTensorExecutor::createEmpty(); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h new file mode 100644 index 00000000000..f38b811fa36 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/tensor_from_weighted_set_feature.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tensor_factory_blueprint.h" + +namespace search { +namespace features { + +/** + * Feature blueprint for a rank feature that creates a tensor from a weighted set. + * The weighted set source can be either an attribute vector or query parameter. + */ +class TensorFromWeightedSetBlueprint : public TensorFactoryBlueprint +{ +public: + TensorFromWeightedSetBlueprint(); + virtual search::fef::Blueprint::UP createInstance() const override { + return Blueprint::UP(new TensorFromWeightedSetBlueprint()); + } + virtual search::fef::ParameterDescriptions getDescriptions() const override { + return search::fef::ParameterDescriptions(). + desc().string(). + desc().string().string(); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) override; + virtual search::fef::FeatureExecutor::LP + createExecutor(const search::fef::IQueryEnvironment &env) const override; +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp b/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp new file mode 100644 index 00000000000..1e242f8b4f3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/term_field_md_feature.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include "term_field_md_feature.h" +#include "utils.h" +LOG_SETUP(".features.term_field_md_feature"); + +using namespace search::fef; + +namespace search { +namespace features { + + +TermFieldMdExecutor::TermFieldMdExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId) + : _terms() +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const search::fef::ITermData *td = env.getTerm(i); + LOG_ASSERT(td != 0); + const search::fef::ITermFieldData *tfd = td->lookupField(fieldId); + if (tfd != 0) { + LOG_ASSERT(tfd->getHandle() != search::fef::IllegalHandle); + _terms.push_back(std::make_pair(tfd->getHandle(), td->getWeight())); + } + } +} + +void +TermFieldMdExecutor::execute(MatchData & match) +{ + uint32_t termsmatched = 0; + uint32_t occs = 0; + feature_t score = 0; + feature_t weight = 0; + feature_t maxTermWeight = 0; + + for (size_t i = 0; i < _terms.size(); ++i) { + const TermFieldMatchData &tfmd = *match.resolveTermField(_terms[i].first); + int32_t termWeight = _terms[i].second.percent(); + + if (tfmd.getDocId() == match.getDocId()) { + ++termsmatched; + score += tfmd.getWeight(); + occs += (tfmd.end() - tfmd.begin()); + if (weight == 0) { + weight = tfmd.getWeight(); + } + if (termWeight > maxTermWeight) { + maxTermWeight = termWeight; + } + } + + } + *match.resolveFeature(outputs()[0]) = score; + *match.resolveFeature(outputs()[1]) = _terms.size(); + *match.resolveFeature(outputs()[2]) = (termsmatched > 0 ? 1.0 : 0.0); + *match.resolveFeature(outputs()[3]) = termsmatched; + *match.resolveFeature(outputs()[4]) = weight; + *match.resolveFeature(outputs()[5]) = occs; + *match.resolveFeature(outputs()[6]) = maxTermWeight; +} + + +TermFieldMdBlueprint::TermFieldMdBlueprint() : + Blueprint("termFieldMd"), + _field(0) +{ +} + +void +TermFieldMdBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +TermFieldMdBlueprint::createInstance() const +{ + return Blueprint::UP(new TermFieldMdBlueprint()); +} + +bool +TermFieldMdBlueprint::setup(const IIndexEnvironment & env, + const ParameterList & params) +{ + _field = params[0].asField(); + LOG_ASSERT(_field != 0); + + describeOutput("score", "The term field match score"); + describeOutput("terms", "The number of ranked terms searching this field"); + describeOutput("match", "1.0 if some ranked term matched this field, 0.0 otherwise"); + describeOutput("termsmatched", "The number of ranked terms matching this field"); + describeOutput("firstweight", "The first element weight seen"); + describeOutput("occurrences", "The sum of occurrences (positions) in the match data"); + describeOutput("maxTermWeight", "The max term weight among ranked terms matching this field"); + + env.hintFieldAccess(_field->id()); + return true; +} + +FeatureExecutor::LP +TermFieldMdBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + return FeatureExecutor::LP(new TermFieldMdExecutor(env, _field->id())); +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/term_field_md_feature.h b/searchlib/src/vespa/searchlib/features/term_field_md_feature.h new file mode 100644 index 00000000000..b2752b52e80 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/term_field_md_feature.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for looking at term field match data + **/ +class TermFieldMdExecutor : public fef::FeatureExecutor { + + typedef std::pair Element; + std::vector _terms; + virtual void execute(fef::MatchData &data); + +public: + TermFieldMdExecutor(const search::fef::IQueryEnvironment &env, + uint32_t fieldId); +}; + + +/** + * Implements the blueprint for the term field md executor. + **/ +class TermFieldMdBlueprint : public fef::Blueprint { + const search::fef::FieldInfo * _field; +public: + TermFieldMdBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const fef::IIndexEnvironment & env, + fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual fef::ParameterDescriptions getDescriptions() const { + return fef::ParameterDescriptions().desc().field(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const fef::IIndexEnvironment & env, + const fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual fef::FeatureExecutor::LP createExecutor(const fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp b/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp new file mode 100644 index 00000000000..a581aea2867 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termdistancecalculator.cpp @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.termdistancecalculator"); +#include +#include +#include "termdistancecalculator.h" + +using namespace search::fef; + +namespace search { +namespace features { + +const uint32_t TermDistanceCalculator::UNDEFINED_VALUE(1000000); + + +void +TermDistanceCalculator::run(const QueryTerm &termX, const QueryTerm &termY, + MatchData & match, Result & r) +{ + const TermFieldMatchData *tmdX = match.resolveTermField(termX.fieldHandle()); + const TermFieldMatchData *tmdY = match.resolveTermField(termY.fieldHandle()); + if (tmdX->getDocId() != match.getDocId() || tmdY->getDocId() != match.getDocId()) { + return; + } + findBest(tmdX, tmdY, termX.termData()->getPhraseLength(), r.forwardDist, r.forwardTermPos); + findBest(tmdY, tmdX, termY.termData()->getPhraseLength(), r.reverseDist, r.reverseTermPos); +} + + +void +TermDistanceCalculator::findBest(const TermFieldMatchData *tmdX, + const TermFieldMatchData *tmdY, + uint32_t numTermsX, + uint32_t & bestDist, + uint32_t & bestPos) +{ + search::fef::TermFieldMatchData::PositionsIterator itA, itB, epA, epB; + itA = tmdX->begin(); + epA = tmdX->end(); + + itB = tmdY->begin(); + epB = tmdY->end(); + + uint32_t addA = numTermsX - 1; + + while (itB != epB) { + uint32_t eid = itB->getElementId(); + while (itA != epA && itA->getElementId() < eid) { + ++itA; + } + if (itA != epA && itA->getElementId() == eid) { + // there is a pair somewhere here + while (itA != epA && + itB != epB && + itA->getElementId() == eid && + itB->getElementId() == eid) + { + uint32_t a = itA->getPosition(); + uint32_t b = itB->getPosition(); + if (a < b) { + if (b - a < bestDist + addA) { + bestDist = b - (a + addA); + bestPos = a; + } + itA++; + } else { + itB++; + } + } + } else { + ++itB; + } + } + +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/termdistancecalculator.h b/searchlib/src/vespa/searchlib/features/termdistancecalculator.h new file mode 100644 index 00000000000..39da5987ff4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termdistancecalculator.h @@ -0,0 +1,81 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryterm.h" + +namespace search { +namespace features { + +/** + * This class is used to calculate the minimal forward and reverse term distance + * between two terms matching in the same field using the position information for both terms. + * + * The terms 'a' and 'b' matching the field 'a b x a' will give the following result: + * - forwardDist = 1 + * - forwardTermPos = 0 + * - reverseDist = 2 + * - reverseTermPos = 1 + * + * Note that if we have a phrase 'a b' and term 'c' matching the field 'a b x c' we will get: + * - forwardDist = 2 (between b and c) + * - forwardTermPos = 0 (pos of first word) + **/ +class TermDistanceCalculator { +public: + /** + * Represents an undefined value. + **/ + static const uint32_t UNDEFINED_VALUE; + + /** + * Contains the result from running the calculator. + **/ + struct Result { + uint32_t forwardDist; // min distance between term X and term Y in the field + uint32_t forwardTermPos; // the position of term X for that distance + uint32_t reverseDist; // min distance between term Y and term X in the field + uint32_t reverseTermPos; // the position of term Y for that distance + + /** + * Creates a new object with undefined values. + **/ + Result() { reset(); } + + /** + * Creates a new object with the given values. + **/ + Result(uint32_t fd, uint32_t ftp, uint32_t rd, uint32_t rtp) : + forwardDist(fd), forwardTermPos(ftp), reverseDist(rd), reverseTermPos(rtp) {} + + /** + * Sets all variables to the undefined value. + **/ + void reset() { + forwardDist = UNDEFINED_VALUE; + forwardTermPos = UNDEFINED_VALUE; + reverseDist = UNDEFINED_VALUE; + reverseTermPos = UNDEFINED_VALUE; + } + }; + +private: + static void findBest(const search::fef::TermFieldMatchData *tmdX, + const search::fef::TermFieldMatchData *tmdY, + uint32_t numTermsX, + uint32_t & bestDist, + uint32_t & bestPos); + +public: + /** + * Calculates the min forward and reverse distances based on the given + * match data and field id. The calculated values are stored in the given result object. + * NB: Both query terms must have attached term fields with valid term field handles. + **/ + static void run(const QueryTerm &termX, const QueryTerm &termY, + search::fef::MatchData & match, Result & r); +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp new file mode 100644 index 00000000000..8bfc191cc98 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termdistancefeature.cpp @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.nativeproximityfeature"); +#include +#include +#include "termdistancefeature.h" +#include "valuefeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + + +TermDistanceExecutor::TermDistanceExecutor(const IQueryEnvironment & env, + const TermDistanceParams & params) : + FeatureExecutor(), + _params(params), + _termA(env.getTerm(params.termX)), + _termB(env.getTerm(params.termY)) +{ + _termA.fieldHandle(util::getTermFieldData(env, params.termX, params.fieldId)); + _termB.fieldHandle(util::getTermFieldData(env, params.termY, params.fieldId)); +} + +bool TermDistanceExecutor::valid() const +{ + return ((_termA.termData() != 0) && (_termB.termData() != 0) && + (_termA.fieldHandle() != IllegalHandle) && (_termB.fieldHandle() != IllegalHandle)); +} + +void +TermDistanceExecutor::execute(MatchData & match) +{ + TermDistanceCalculator::Result result; + TermDistanceCalculator::run(_termA, _termB, match, result); + *match.resolveFeature(outputs()[0]) = result.forwardDist; + *match.resolveFeature(outputs()[1]) = result.forwardTermPos; + *match.resolveFeature(outputs()[2]) = result.reverseDist; + *match.resolveFeature(outputs()[3]) = result.reverseTermPos; +} + + +TermDistanceBlueprint::TermDistanceBlueprint() : + Blueprint("termDistance"), + _params() +{ +} + +void +TermDistanceBlueprint::visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const +{ +} + +Blueprint::UP +TermDistanceBlueprint::createInstance() const +{ + return Blueprint::UP(new TermDistanceBlueprint()); +} + +bool +TermDistanceBlueprint::setup(const IIndexEnvironment &, + const ParameterList & params) +{ + _params.fieldId = params[0].asField()->id(); + _params.termX = params[1].asInteger(); + _params.termY = params[2].asInteger(); + + describeOutput("forward", "the min distance between term X and term Y in the field"); + describeOutput("forwardTermPosition", "the position of term X for the forward distance"); + describeOutput("reverse", "the min distance between term Y and term X in the field"); + describeOutput("reverseTermPosition", "the position of term Y for the reverse distance"); + + return true; +} + +FeatureExecutor::LP +TermDistanceBlueprint::createExecutor(const IQueryEnvironment & env) const +{ + std::unique_ptr tde(new TermDistanceExecutor(env, _params)); + if (tde->valid()) { + return FeatureExecutor::LP(tde.release()); + } else { + TermDistanceCalculator::Result r; + std::vector values(4); + values[0] = r.forwardDist; + values[1] = r.forwardTermPos; + values[2] = r.reverseDist; + values[3] = r.reverseTermPos; + return FeatureExecutor::LP(new ValueExecutor(values)); + } +} + + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/termdistancefeature.h b/searchlib/src/vespa/searchlib/features/termdistancefeature.h new file mode 100644 index 00000000000..b18227494a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termdistancefeature.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "termdistancecalculator.h" + +namespace search { +namespace features { + +/** + * This struct contains parameters used by the executor. + **/ +struct TermDistanceParams { + uint32_t fieldId; + uint32_t termX; + uint32_t termY; + TermDistanceParams() : fieldId(0), termX(0), termY(0) {} +}; + +/** + * Implements the executor for calculating min term distance (forward and reverse). + **/ +class TermDistanceExecutor : public search::fef::FeatureExecutor +{ +private: + const TermDistanceParams & _params; + QueryTerm _termA; + QueryTerm _termB; + +public: + TermDistanceExecutor(const search::fef::IQueryEnvironment & env, + const TermDistanceParams & params); + virtual void execute(search::fef::MatchData & data); + bool valid() const; +}; + + +/** + * Implements the blueprint for the term distance executor. + **/ +class TermDistanceBlueprint : public search::fef::Blueprint { +private: + TermDistanceParams _params; + +public: + TermDistanceBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & env, + search::fef::IDumpFeatureVisitor & visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::ANY).number().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp new file mode 100644 index 00000000000..82b2e0b5058 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.cpp @@ -0,0 +1,234 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.termeditdistance"); + +#include +#include +#include +#include +#include +#include +#include "termeditdistancefeature.h" +#include "utils.h" + +namespace search { +namespace features { + +//--------------------------------------------------------------------------------------------------------------------- +// TedCell +//--------------------------------------------------------------------------------------------------------------------- +TedCell::TedCell() : + cost(util::FEATURE_MAX), + numDel(0), + numIns(0), + numSub(0) +{ + // empty +} + +TedCell::TedCell(feature_t argCost, uint32_t argNumDel, uint32_t argNumIns, uint32_t argNumSub) : + cost(argCost), + numDel(argNumDel), + numIns(argNumIns), + numSub(argNumSub) +{ + // empty +} + +//--------------------------------------------------------------------------------------------------------------------- +// TermEditDistanceConfig +//--------------------------------------------------------------------------------------------------------------------- +TermEditDistanceConfig::TermEditDistanceConfig() : + fieldId(search::fef::IllegalHandle), + fieldBegin(0), + fieldEnd(std::numeric_limits::max()), + costDel(1), + costIns(1), + costSub(1) +{ + // empty +} + +//--------------------------------------------------------------------------------------------------------------------- +// TermEditDistanceExecutor +//--------------------------------------------------------------------------------------------------------------------- +TermEditDistanceExecutor::TermEditDistanceExecutor(const search::fef::IQueryEnvironment &env, + const TermEditDistanceConfig &config) : + search::fef::FeatureExecutor(), + _config(config), + _fieldHandles(), + _termWeights(), + _lenHandle(search::fef::IllegalHandle), + _prevRow(16), + _thisRow(_prevRow.size()) +{ + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + _fieldHandles.push_back(util::getTermFieldHandle(env, i, config.fieldId)); + _termWeights.push_back(1.0f); + + // XXX was intended to use something like this instead of 1.0f: + // const search::fef::TermData& term = *env.getTerm(i); + // term.isMandatory() ? (feature_t)term.getWeight() : 0.0f + } +} + +void +TermEditDistanceExecutor::execute(search::fef::MatchData &match) +{ + // Determine the number of terms in the field. + uint32_t numQueryTerms = _fieldHandles.size(); + uint32_t fieldBegin = _config.fieldBegin; + uint32_t fieldEnd = std::min(_config.fieldEnd, + (uint32_t)*match.resolveFeature(_lenHandle)); + + // _P_A_R_A_N_O_I_A_ + TedCell last; + if (fieldBegin < fieldEnd) { + // Construct the cost table. + uint32_t numFieldTerms = fieldEnd - fieldBegin; + if (_prevRow.size() < numFieldTerms + 1) { + _prevRow.resize(numFieldTerms + 1); + _thisRow.resize(_prevRow.size()); + } + for (uint32_t field = 0; field <= numFieldTerms; ++field) { + _prevRow[field] = TedCell(field * _config.costIns, 0, field, 0); + } + //LOG(debug, "[ F I E L D S ]"); + //logRow(_prevRow, numFieldTerms + 1); + + // Iterate over each query term. + for (uint32_t query = 1; query <= numQueryTerms; ++query) { + search::fef::FieldPositionsIterator it; // this is not vaild + + // Look for a match of this term. + search::fef::TermFieldHandle handle = _fieldHandles[query - 1]; + if (handle != search::fef::IllegalHandle) { + search::fef::TermFieldMatchData &tfmd = *match.resolveTermField(handle); + if (tfmd.getDocId() == match.getDocId()) { + it = tfmd.getIterator(); // this is now valid + while (it.valid() && it.getPosition() < fieldBegin) { + it.next(); // forward to window + } + } + } + + // Predefine the cost of operations on the current term. + feature_t weight = _termWeights[query - 1]; + feature_t costDel = _config.costDel * weight; + feature_t costIns = _config.costIns * weight; + feature_t costSub = _config.costSub * weight; + + // Iterate over each field term. + _thisRow[0] = TedCell(_prevRow[0].cost + costDel, query, 0, 0); + for (uint32_t field = 1; field <= numFieldTerms; ++field) { + // If the iterator is still valid, we _might_ have a match. + if (it.valid()) { + // If the iterator knows an occurance at this field term, this is a match. + if (it.getPosition() == fieldBegin + (field - 1)) { + _thisRow[field] = _prevRow[field - 1]; // no cost + it.next(); + continue; // skip calculations + } + } + + // Determine the least-cost operation. + feature_t del = _prevRow[field ].cost + costDel; // cost per previous query term, ie. ignoring this query term. + feature_t ins = _thisRow[field - 1].cost + costIns; // cost per previous field term, ie. insert this query term. + feature_t sub = _prevRow[field - 1].cost + costSub; // cost to replace field term with query term. + + feature_t min = std::min(del, std::min(ins, sub)); + if (min == del) { + const TedCell &cell = _prevRow[field]; + _thisRow[field] = TedCell(del, cell.numDel + 1, cell.numIns, cell.numSub); + } + else if(min == ins) { + const TedCell &cell = _thisRow[field - 1]; + _thisRow[field] = TedCell(ins, cell.numDel, cell.numIns + 1, cell.numSub); + } + else { + const TedCell &cell = _prevRow[field - 1]; + _thisRow[field] = TedCell(sub, cell.numDel, cell.numIns, cell.numSub + 1); + } + } + _thisRow.swap(_prevRow); + //logRow(_prevRow, numFieldTerms + 1); + } + + // Retrieve the bottom-right value. + last = _prevRow[numFieldTerms]; + } + *match.resolveFeature(outputs()[0]) = last.cost; + *match.resolveFeature(outputs()[1]) = last.numDel; + *match.resolveFeature(outputs()[2]) = last.numIns; + *match.resolveFeature(outputs()[3]) = last.numSub; +} + +void +TermEditDistanceExecutor::logRow(const std::vector &row, size_t numCols) +{ + if (logger.wants(ns_log::Logger::info)) { + vespalib::string str = "[ "; + for (size_t i = 0; i < numCols; ++i) { + str.append(vespalib::make_string("%5.2f", row[i].cost)); + if (i < numCols - 1) { + str.append(" "); + } + } + str.append(" ]"); + LOG(debug, "%s", str.c_str()); + } +} + +//--------------------------------------------------------------------------------------------------------------------- +// TermEditDistanceBlueprint +//--------------------------------------------------------------------------------------------------------------------- +TermEditDistanceBlueprint::TermEditDistanceBlueprint() : + search::fef::Blueprint("termEditDistance"), + _config() +{ + // empty +} + +void +TermEditDistanceBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +TermEditDistanceBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + _config.fieldId = params[0].asField()->id(); + + vespalib::string costDel = env.getProperties().lookup(getName(), "costDel").getAt(0); + _config.costDel = costDel.empty() ? 1.0f : atof(costDel.c_str()); + vespalib::string costIns = env.getProperties().lookup(getName(), "costIns").getAt(0); + _config.costIns = costIns.empty() ? 1.0f : atof(costIns.c_str()); + vespalib::string costSub = env.getProperties().lookup(getName(), "costSub").getAt(0); + _config.costSub = costSub.empty() ? 1.0f : atof(costSub.c_str()); + + defineInput(vespalib::make_string("fieldLength(%s)", params[0].getValue().c_str())); + describeOutput("out", "Term-wise edit distance."); + describeOutput("del", "Number of deletions performed."); + describeOutput("ins", "Number of insertions performed."); + describeOutput("sub", "Number of substitutions performed."); + env.hintFieldAccess(_config.fieldId); + return true; +} + +search::fef::Blueprint::UP +TermEditDistanceBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new TermEditDistanceBlueprint()); +} + +search::fef::FeatureExecutor::LP +TermEditDistanceBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new TermEditDistanceExecutor(env, _config)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h new file mode 100644 index 00000000000..2f897a1c826 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termeditdistancefeature.h @@ -0,0 +1,153 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements a cell class for the cost table constructed when running the term edit distance calculator. This is + * necessary to keep track of the route actually chosen through the table, since the algorithm itself merely find the + * minimum cost. + */ +class TedCell { +public: + TedCell(); + TedCell(feature_t cost, uint32_t numDel, uint32_t numIns, uint32_t numSub); + + feature_t cost; // The cost at this point. + uint32_t numDel; // The number of deletions to get here. + uint32_t numIns; // The number of insertions to get here. + uint32_t numSub; // The number of substitutions to get here. +}; + +/** + * Implements the necessary config for the term edit distance calculator. This class exists so that the executor does + * not need a separate copy of the config parsed by the blueprint, and at the same time avoiding that the executor needs + * to know about the blueprint. + */ +struct TermEditDistanceConfig { + TermEditDistanceConfig(); + + uint32_t fieldId; // The id of field to process. + uint32_t fieldBegin; // The first field term to evaluate. + uint32_t fieldEnd; // The last field term to evaluate. + feature_t costDel; // The cost of a delete. + feature_t costIns; // The cost of an insert. + feature_t costSub; // The cost of a substitution. +}; + +/** + * Implements the executor for the term edit distance calculator. + */ +class TermEditDistanceExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs a new executor for the term edit distance calculator. + * + * @param config The config for this executor. + */ + TermEditDistanceExecutor(const search::fef::IQueryEnvironment &env, + const TermEditDistanceConfig &config); + + void inputs_done() override { _lenHandle = inputs()[0]; } + + /** + * + * This executor prepares a matrix that has one row per query term, and one column per field term. Initialize this + * array as follows: + * + * |f i e l d + * -+--------- + * q|0 1 2 3 4 + * u|1 . . . . + * e|2 . . . . + * r|3 . . . . + * y|4 . . . . + * + * Run through this matrix per field term, per query term; i.e. column by column, row by row. Compare the field term + * at that column with the query term at that row. Then set the value of that cell to the minimum of: + * + * 1. The cost of substitution; the above-left value plus the cost (0 if equal). + * 2. The cost of insertion; the left value plus the cost. + * 3. The cost of deletion; the above value plus the cost. + * + * After completing the matrix, the minimum cost is contained in the bottom-right. + * + * @param data All available match data. + */ + virtual void execute(search::fef::MatchData &data); + +private: + /** + * Writes the given list of feature values to log so that it can be viewed for instrumentation. + * + * @param row The list of feature values to write. + * @param numCols The number of columns to write. + */ + void logRow(const std::vector &row, size_t numCols); + +private: + const TermEditDistanceConfig &_config; // The config for this executor. + std::vector _fieldHandles; // The handles of all query terms. + std::vector _termWeights; // The weights of all query terms. + search::fef::FeatureHandle _lenHandle; // Handle to the length input feature. + std::vector _prevRow; // Optimized representation of the cost table. + std::vector _thisRow; // +}; + +/** + * Implements the blueprint for the term edit distance calculator. + */ +class TermEditDistanceBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a new blueprint for the term edit distance calculator. + */ + TermEditDistanceBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE); + } + + /** + * The cost of each operation is specified by the parameters to the {@link #setup} method of this blueprint. All + * costs are multiplied by the relative weight of eacht query term. Furthermore, if the query term is not mandatory, + * all operations are free. The parameters are: + * + * 1. The name of the field to calculate the distance for. + * 2. The cost of ignoring a query term, this is typically HIGH. + * 3. The cost of inserting a field term into the query term, this is typically LOW. + * 4. The cost of substituting a field term with a query term, this is also typically LOW. + * 5. Optional: The field position to begin iteration. + * 6. Optional: The field position to end iteration. + * + * @param env The index environment. + * @param params A list of the parameters mentioned above. + * @return Whether or not setup was possible. + */ + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + +private: + TermEditDistanceConfig _config; // The config for this blueprint. +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/termfeature.cpp b/searchlib/src/vespa/searchlib/features/termfeature.cpp new file mode 100644 index 00000000000..61cd3347845 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termfeature.cpp @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".features.termfeature"); + +#include +#include +#include +#include +#include +#include +#include "termfeature.h" +#include "utils.h" + +using namespace search::fef; + +namespace search { +namespace features { + +TermExecutor::TermExecutor(const search::fef::IQueryEnvironment &env, + uint32_t termId) : + search::fef::FeatureExecutor(), + _termData(env.getTerm(termId)), + _connectedness(util::lookupConnectedness(env, termId)), + _significance(0) +{ + if (_termData != NULL) { + feature_t fallback = util::getSignificance(*_termData); + _significance = util::lookupSignificance(env, termId, fallback); + } +} + +void +TermExecutor::execute(search::fef::MatchData &match) +{ + if (_termData == NULL) { // this query term is not present in the query + *match.resolveFeature(outputs()[0]) = 0.0f; // connectedness + *match.resolveFeature(outputs()[1]) = 0.0f; // significance (1 - frequency) + *match.resolveFeature(outputs()[2]) = 0.0f; // weight + return; + } + *match.resolveFeature(outputs()[0]) = _connectedness; + *match.resolveFeature(outputs()[1]) = _significance; + *match.resolveFeature(outputs()[2]) = (feature_t)_termData->getWeight().percent(); +} + +TermBlueprint::TermBlueprint() : + search::fef::Blueprint("term"), + _termId(0) +{ + // empty +} + +void +TermBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + int numTerms = atoi(env.getProperties().lookup(getBaseName(), "numTerms").get("5").c_str()); + for (int term = 0; term < numTerms; ++term) { + search::fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(vespalib::make_string("%d", term)); + visitor.visitDumpFeature(fnb.output("connectedness").buildName()); + visitor.visitDumpFeature(fnb.output("significance").buildName()); + visitor.visitDumpFeature(fnb.output("weight").buildName()); + } +} + +bool +TermBlueprint::setup(const search::fef::IIndexEnvironment &, + const search::fef::ParameterList ¶ms) +{ + _termId = params[0].asInteger(); + describeOutput("connectedness", "The normalized strength with which this term is connected to the next term in the query."); + describeOutput("significance", "1 - the normalized frequency of documents containing this query term."); + describeOutput("weight", "The normalized importance of matching this query term."); + return true; +} + +search::fef::Blueprint::UP +TermBlueprint::createInstance() const +{ + return search::fef::Blueprint::UP(new TermBlueprint()); +} + +search::fef::FeatureExecutor::LP +TermBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new TermExecutor(env, _termId)); +} + +}} diff --git a/searchlib/src/vespa/searchlib/features/termfeature.h b/searchlib/src/vespa/searchlib/features/termfeature.h new file mode 100644 index 00000000000..6394fd10936 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/termfeature.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +/** + * Implements the executor for term feature. + */ +class TermExecutor : public search::fef::FeatureExecutor { +public: + /** + * Constructs an executor for term feature. + * + * @param env The query environment. + * @param termId The id of the query term to evaluate. + */ + TermExecutor(const search::fef::IQueryEnvironment &env, + uint32_t termId); + virtual void execute(search::fef::MatchData &data); + +private: + const search::fef::ITermData *_termData; + feature_t _connectedness; + feature_t _significance; +}; + +/** + * Implements the blueprint for term feature. + */ +class TermBlueprint : public search::fef::Blueprint { +public: + /** + * Constructs a blueprint for term feature. + */ + TermBlueprint(); + + // Inherit doc from Blueprint. + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + + // Inherit doc from Blueprint. + virtual search::fef::Blueprint::UP createInstance() const; + + // Inherit doc from Blueprint. + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().number(); + } + + // Inherit doc from Blueprint. + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + + // Inherit doc from Blueprint. + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &env) const; + +private: + uint32_t _termId; +}; + +}} + diff --git a/searchlib/src/vespa/searchlib/features/terminfofeature.cpp b/searchlib/src/vespa/searchlib/features/terminfofeature.cpp new file mode 100644 index 00000000000..5ca385b0440 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/terminfofeature.cpp @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.terminfo"); + +#include +#include +#include +#include +#include +#include +#include +#include "terminfofeature.h" +#include "valuefeature.h" + +namespace search { +namespace features { + +TermInfoBlueprint::TermInfoBlueprint() + : search::fef::Blueprint("termInfo"), + _termIdx(0) +{ +} + +void +TermInfoBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ +} + +bool +TermInfoBlueprint::setup(const search::fef::IIndexEnvironment &, + const search::fef::ParameterList & params) +{ + _termIdx = params[0].asInteger(); + describeOutput("queryidx", "The index of the first term with the given " + "term index in the query term ordering. -1 if not found."); + return true; +} + +search::fef::FeatureExecutor::LP +TermInfoBlueprint::createExecutor(const search::fef::IQueryEnvironment &queryEnv) const +{ + feature_t queryIdx = -1.0; + if (queryEnv.getNumTerms() > _termIdx) { + queryIdx = _termIdx; + } + std::vector values; + values.push_back(queryIdx); + return search::fef::FeatureExecutor::LP(new ValueExecutor(values)); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/terminfofeature.h b/searchlib/src/vespa/searchlib/features/terminfofeature.h new file mode 100644 index 00000000000..063277c9b1f --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/terminfofeature.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +class TermInfoBlueprint : public search::fef::Blueprint +{ +private: + uint32_t _termIdx; + +public: + TermInfoBlueprint(); + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &indexEnv, + search::fef::IDumpFeatureVisitor &visitor) const; + virtual search::fef::Blueprint::UP createInstance() const { return search::fef::Blueprint::UP(new TermInfoBlueprint()); } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().number(); + } + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment &queryEnv) const; +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp b/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp new file mode 100644 index 00000000000..794e67560b4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/text_similarity_feature.cpp @@ -0,0 +1,220 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.textsimilarity"); +#include "text_similarity_feature.h" + +namespace search { +namespace features { + +namespace { + +struct Term { + search::fef::TermFieldHandle handle; + int weight; + int index; + Term(search::fef::TermFieldHandle handle_in, int weight_in, int index_in) + : handle(handle_in), weight(weight_in), index(index_in) {} +}; + +struct State { + uint32_t field_length; + uint32_t matched_terms; + int sum_term_weight; + uint32_t last_pos; + double sum_proximity_score; + uint32_t last_idx; + uint32_t num_in_order; + + State(uint32_t length, uint32_t first_pos, int32_t first_weight, uint32_t first_idx) + : field_length(length), + matched_terms(1), sum_term_weight(first_weight), + last_pos(first_pos), sum_proximity_score(0.0), + last_idx(first_idx), num_in_order(0) {} + + double proximity_score(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); + } + + bool want_match(uint32_t pos) { + return (pos > last_pos); + } + + void addMatch(uint32_t pos, int32_t weight, uint32_t idx) { + sum_proximity_score += proximity_score(pos - last_pos); + num_in_order += (idx > last_idx) ? 1 : 0; + last_pos = pos; + last_idx = idx; + ++matched_terms; + sum_term_weight += weight; + } + + void calculateScore(size_t num_query_terms, int total_term_weight, + double &score_out, + double &proximity_out, double &order_out, + double &query_coverage_out, double &field_coverage_out) + { + double matches = std::min(field_length, matched_terms); + if (matches < 2) { + proximity_out = proximity_score(field_length); + order_out = (num_query_terms == 1) ? 1.0 : 0.0; + } else { + proximity_out = sum_proximity_score / (matches - 1); + order_out = num_in_order / (double) (matches - 1); + } + query_coverage_out = sum_term_weight / (double) total_term_weight; + field_coverage_out = matches / (double) field_length; + score_out = (0.35 * proximity_out) + (0.15 * order_out) + + (0.30 * query_coverage_out) + (0.20 * field_coverage_out); + } +}; + +} // namespace search::features:: + +//----------------------------------------------------------------------------- + +TextSimilarityExecutor::TextSimilarityExecutor(const search::fef::IQueryEnvironment &env, + uint32_t field_id) + : _handles(), + _weights(), + _total_term_weight(0), + _queue() +{ + std::vector terms; + for (uint32_t i = 0; i < env.getNumTerms(); ++i) { + const search::fef::ITermData *termData = env.getTerm(i); + if (termData->getWeight().percent() != 0) { // only consider query terms with contribution + typedef search::fef::ITermFieldRangeAdapter FRA; + for (FRA iter(*termData); iter.valid(); iter.next()) { + const search::fef::ITermFieldData &tfd = iter.get(); + if (tfd.getFieldId() == field_id) { + int term_weight = termData->getWeight().percent(); + _total_term_weight += term_weight; + terms.push_back(Term(tfd.getHandle(), term_weight, + termData->getTermIndex())); + } + } + } + } + std::sort(terms.begin(), terms.end(), [](const Term &a, const Term &b){ return (a.index < b.index); }); + _handles.reserve(terms.size()); + _weights.reserve(terms.size()); + for (size_t i = 0; i < terms.size(); ++i) { + _handles.push_back(terms[i].handle); + _weights.push_back(terms[i].weight); + } +} + +void +TextSimilarityExecutor::execute(search::fef::MatchData &data) +{ + for (size_t i = 0; i < _handles.size(); ++i) { + search::fef::TermFieldMatchData *tfmd = data.resolveTermField(_handles[i]); + if (tfmd->getDocId() == data.getDocId()) { + Item item(i, tfmd->begin(), tfmd->end()); + if (item.pos != item.end) { + _queue.push(item); + } + } + } + if (_queue.empty()) { + *data.resolveFeature(outputs()[0]) = 0.0; + *data.resolveFeature(outputs()[1]) = 0.0; + *data.resolveFeature(outputs()[2]) = 0.0; + *data.resolveFeature(outputs()[3]) = 0.0; + *data.resolveFeature(outputs()[4]) = 0.0; + return; + } + const Item &first = _queue.front(); + State state(first.pos->getElementLen(), + first.pos->getPosition(), + _weights[first.idx], + first.idx); + _queue.pop_front(); + while (!_queue.empty()) { + Item &item = _queue.front(); + if (state.want_match(item.pos->getPosition())) { + state.addMatch(item.pos->getPosition(), + _weights[item.idx], + item.idx); + _queue.pop_front(); + } else { + ++item.pos; + if (item.pos == item.end) { + _queue.pop_front(); + } else { + _queue.adjust(); + } + } + } + state.calculateScore(_handles.size(), _total_term_weight, + *data.resolveFeature(outputs()[0]), + *data.resolveFeature(outputs()[1]), + *data.resolveFeature(outputs()[2]), + *data.resolveFeature(outputs()[3]), + *data.resolveFeature(outputs()[4])); +} + +//----------------------------------------------------------------------------- + +const vespalib::string TextSimilarityBlueprint::score_output("score"); +const vespalib::string TextSimilarityBlueprint::proximity_output("proximity"); +const vespalib::string TextSimilarityBlueprint::order_output("order"); +const vespalib::string TextSimilarityBlueprint::query_coverage_output("queryCoverage"); +const vespalib::string TextSimilarityBlueprint::field_coverage_output("fieldCoverage"); + +TextSimilarityBlueprint::TextSimilarityBlueprint() + : Blueprint("textSimilarity"), _field_id(fef::IllegalHandle) {} + +void +TextSimilarityBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const +{ + for (uint32_t i = 0; i < env.getNumFields(); ++i) { + const search::fef::FieldInfo &field = *env.getField(i); + if (field.type() == search::fef::FieldType::INDEX) { + if (!field.isFilter() && field.collection() == fef::CollectionType::SINGLE) { + search::fef::FeatureNameBuilder fnb; + fnb.baseName(getBaseName()).parameter(field.name()); + visitor.visitDumpFeature(fnb.output(score_output).buildName()); + visitor.visitDumpFeature(fnb.output(proximity_output).buildName()); + visitor.visitDumpFeature(fnb.output(order_output).buildName()); + visitor.visitDumpFeature(fnb.output(query_coverage_output).buildName()); + visitor.visitDumpFeature(fnb.output(field_coverage_output).buildName()); + } + } + } +} + +search::fef::Blueprint::UP +TextSimilarityBlueprint::createInstance() const +{ + return Blueprint::UP(new TextSimilarityBlueprint()); +} + +bool +TextSimilarityBlueprint::setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms) +{ + const search::fef::FieldInfo *field = params[0].asField(); + _field_id = field->id(); + describeOutput(score_output, "default normalized combination of other outputs"); + describeOutput(proximity_output, "normalized match proximity score"); + describeOutput(order_output, "normalized match order score"); + describeOutput(query_coverage_output, "normalized query match coverage"); + describeOutput(field_coverage_output, "normalized field match coverage"); + env.hintFieldAccess(field->id()); + return true; +} + +search::fef::FeatureExecutor::LP +TextSimilarityBlueprint::createExecutor(const search::fef::IQueryEnvironment &env) const +{ + return search::fef::FeatureExecutor::LP(new TextSimilarityExecutor(env, _field_id)); +} + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/text_similarity_feature.h b/searchlib/src/vespa/searchlib/features/text_similarity_feature.h new file mode 100644 index 00000000000..88969b13ac6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/text_similarity_feature.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace features { + +//----------------------------------------------------------------------------- + +class TextSimilarityExecutor : public search::fef::FeatureExecutor +{ +private: + std::vector _handles; + std::vector _weights; + int _total_term_weight; + + struct Item { + uint32_t idx; + search::fef::TermFieldMatchData::PositionsIterator pos; + search::fef::TermFieldMatchData::PositionsIterator end; + Item(uint32_t idx_in, + search::fef::TermFieldMatchData::PositionsIterator pos_in, + search::fef::TermFieldMatchData::PositionsIterator end_in) + : idx(idx_in), pos(pos_in), end(end_in) {} + bool operator<(const Item &other) const { + return (pos->getPosition() == other.pos->getPosition()) + ? (idx < other.idx) + : (pos->getPosition() < other.pos->getPosition()); + } + }; + + vespalib::PriorityQueue _queue; + +public: + TextSimilarityExecutor(const search::fef::IQueryEnvironment &env, uint32_t field_id); + virtual bool isPure() { return _handles.empty(); } + virtual void execute(search::fef::MatchData & data); +}; + +//----------------------------------------------------------------------------- + +class TextSimilarityBlueprint : public search::fef::Blueprint +{ +private: + static const vespalib::string score_output; + static const vespalib::string proximity_output; + static const vespalib::string order_output; + static const vespalib::string query_coverage_output; + static const vespalib::string field_coverage_output; + + uint32_t _field_id; + +public: + TextSimilarityBlueprint(); + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment &env, + search::fef::IDumpFeatureVisitor &visitor) const; + virtual search::fef::Blueprint::UP createInstance() const; + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().indexField(search::fef::ParameterCollection::SINGLE); + } + virtual bool setup(const search::fef::IIndexEnvironment &env, + const search::fef::ParameterList ¶ms); + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & env) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/utils.cpp b/searchlib/src/vespa/searchlib/features/utils.cpp new file mode 100644 index 00000000000..0f19a2b4e3c --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/utils.cpp @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.utils"); +#include "utils.h" +#include +#include +#include +#include + +#include +#include + +using namespace search::fef; + +namespace search { +namespace features { +namespace util { + +feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback) +{ + if (termId == 0) { + return fallback; // no previous term + } + + const ITermData * data = env.getTerm(termId); + const ITermData * prev = env.getTerm(termId - 1); + if (data == NULL || prev == NULL) { + return fallback; // default value + } + return lookupConnectedness(env, data->getUniqueId(), prev->getUniqueId(), fallback); +} + +feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, + uint32_t currUniqueId, uint32_t prevUniqueId, feature_t fallback) +{ + // Connectedness of 0.5 between term with unique id 2 and term with unique id 1 is represented as: + // [vespa.term.2.connexity: "1", vespa.term.2.connexity: "0.5"] + vespalib::asciistream os; + os << "vespa.term." << currUniqueId << ".connexity"; + Property p = env.getProperties().lookup(os.str()); + if (p.size() == 2) { + // we have a defined connectedness with the previous term + if (strToNum(p.getAt(0)) == prevUniqueId) { + return strToNum(p.getAt(1)); + } + } + return fallback; +} + +feature_t lookupSignificance(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback) +{ + const ITermData * data = env.getTerm(termId); + if (data == NULL) { + return fallback; + } + + // Significance of 0.5 for term with unique id 1 is represented as: + // [vespa.term.1.significance: "0.5"] + vespalib::asciistream os; + os << "vespa.term." << data->getUniqueId() << ".significance"; + Property p = env.getProperties().lookup(os.str()); + if (p.found()) { + return strToNum(p.get()); + } + + return fallback; +} + +double getRobertsonSparckJonesWeight(double docCount, double docsInCorpus) +{ + return log((docsInCorpus - docCount + 0.5)/(docCount + 0.5)); +} + +static const double N = 1000000.0; + +feature_t getSignificance(double docFreq) +{ + if (docFreq < (1.0/N)) { + docFreq = 1.0/N; + } + if (docFreq > 1.0) { + docFreq = 1.0; + } + double d = log(docFreq)/log(1.0/N); + return 0.5 + 0.5 * d; +#if 0 + double n = docFreq * N; + n = (n == 0) ? 1 : (n > N ? N : n); + double a = getRobertsonSparckJonesWeight(1, N + 1); + double b = getRobertsonSparckJonesWeight(N + 1, N + 1); + double w = getRobertsonSparckJonesWeight(n, N + 1); + return ((w - b)/(a - b)); +#endif +} + +feature_t getSignificance(const search::fef::ITermData &termData) +{ + typedef search::fef::ITermFieldRangeAdapter FRA; + double df = 0; + for (FRA iter(termData); iter.valid(); iter.next()) { + df = std::max(df, iter.get().getDocFreq()); + } + + feature_t signif = getSignificance(df); + LOG(debug, "getSignificance %e %f [ %e %f ] = %e", df, df, df * N, df * N, signif); + return signif; +} + +const search::fef::Table * +lookupTable(const search::fef::IIndexEnvironment & env, const vespalib::string & featureName, + const vespalib::string & table, const vespalib::string & fieldName, const vespalib::string & fallback) +{ + vespalib::string tn1 = env.getProperties().lookup(featureName, table).get(fallback); + vespalib::string tn2 = env.getProperties().lookup(featureName, table, fieldName).get(tn1); + const search::fef::Table * retval = env.getTableManager().getTable(tn2); + if (retval == NULL) { + LOG(warning, "Could not find the %s '%s' to be used for field '%s' in feature '%s'", + table.c_str(), tn2.c_str(), fieldName.c_str(), featureName.c_str()); + } + return retval; +} + +const search::fef::ITermData * +getTermByLabel(const search::fef::IQueryEnvironment &env, const vespalib::string &label) +{ + // Labeling the query item with unique id '5' with the label 'foo' + // is represented as: [vespa.label.foo.id: "5"] + vespalib::asciistream os; + os << "vespa.label." << label << ".id"; + Property p = env.getProperties().lookup(os.str()); + if (!p.found()) { + return 0; + } + uint32_t uid = strToNum(p.get()); + if (uid == 0) { + LOG(warning, "Query label '%s' was attached to invalid unique id: '%s'", + label.c_str(), p.get().c_str()); + return 0; + } + for (uint32_t i(0), m(env.getNumTerms()); i < m; ++i) { + const ITermData *term = env.getTerm(i); + if (term->getUniqueId() == uid) { + return term; + } + } + LOG(warning, "Query label '%s' was attached to non-existing unique id: '%s'", + label.c_str(), p.get().c_str()); + return 0; +} + +} // namespace util +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/utils.h b/searchlib/src/vespa/searchlib/features/utils.h new file mode 100644 index 00000000000..bc830aaa9d3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/utils.h @@ -0,0 +1,234 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace features { +namespace util { + +/** + * Maximum feature value + */ +const feature_t FEATURE_MAX = std::numeric_limits::max(); + +/** + * Minimum feature value + */ +const feature_t FEATURE_MIN = -std::numeric_limits::max(); + +typedef const char * ConstCharPtr; + +/** + * Converts the given string to a numeric value. + * + * @param str The string to convert. + * @return The numeric value. + */ +template +T strToNum(const vespalib::stringref &str) +{ + vespalib::asciistream iss(str); + T retval = 0; + try { + iss >> retval; + } catch (const vespalib::IllegalArgumentException &) { + } + return retval; +} + +template +feature_t getAsFeature(const T &value) __attribute__((__always_inline__)); + +/** + * Converts the given value to a feature value. + * + * @param value The value to convert. + * @return The feature value. + */ +template +inline feature_t getAsFeature(const T &value) +{ + return static_cast(value); +} + +/** + * Specialization for const char *. + * + * @param value The string to convert. + * @return The feature value. + */ +template <> +inline feature_t getAsFeature(const ConstCharPtr & value) { + return static_cast(vespalib::hash_code(value, strlen(value))); +} + +/** + * Specialization for a string value. + * + * @param value The string to convert. + * @return The feature value. + */ +template <> +inline feature_t getAsFeature(const vespalib::string & value) { + return static_cast(vespalib::hash_code(value)); +} + +/** + * Specialization for a string value. + * + * @param value The string to convert. + * @return The feature value. + */ +template <> +inline feature_t getAsFeature(const vespalib::stringref & value) { + return static_cast(vespalib::hash_code(value)); +} + + +/** + * This method inputs a value to cap to the range [capFloor, capCeil] and then normalize this + * value to the unit range [0, 1]. + * + * @param val The value to unit normalize. + * @param capFloor The minimum value of the cap range. + * @param capCeil The maximum value of the cap range. + * @return The unit normalized value. + */ +template +T unitNormalize(const T &val, const T &capFloor, const T &capCeil) +{ + return (std::max(capFloor, std::min(capCeil, val)) - capFloor) / (capCeil - capFloor); +} + +/** + * Returns the normalized strength with which the given term is connected to the previous term in the query. + * Uses the property map of the query environment to lookup this data. + * + * @param env The query environment. + * @param termId The term id. + * @param fallback The value to return if the connectedness was not found in the property map. + * @return The connectedness. + */ +feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback = 0.1f); + +/** + * Returns the normalized strength with which the given current term is connected to the given previous term. + * Uses the property map of the query environment to lookup this data. + * + * @param env The query environment. + * @param currUniqueId Unique id of the current term. + * @param prevUniqueId Unique id of the previous term. + * @param fallback The value to return if the connectedness was not found in the property map. + * @return The connectedness between the current term and previous term. + */ +feature_t lookupConnectedness(const search::fef::IQueryEnvironment & env, + uint32_t currUniqueId, uint32_t prevUniqueId, feature_t fallback = 0.1f); + +/** + * Returns the significance of the given term. + * Uses the property map of the query environment to lookup this data. + * + * @param env The query environment. + * @param termId The term id. + * @param fallback The value to return if the significance was not found in the property map. + * @return The significance. + */ +feature_t lookupSignificance(const search::fef::IQueryEnvironment & env, uint32_t termId, feature_t fallback = 0.0f); + +/** + * Returns the Robertson-Sparck-Jones weight based on the given document count + * (number of documents containing the term) and the number of documents in the corpus. + * This weight is a variant of inverse document frequency. + */ +double getRobertsonSparckJonesWeight(double docCount, double docsInCorpus); + +/** + * Returns the significance based on the given scaled number of documents containing the term. + * + * @param docFreq The scaled number of documents containing the term. + * @return The significance. + */ +feature_t getSignificance(double docFreq); + +/** + * Returns the significance based on max known frequency of the term + * + * @param termData Data for the term + * @return The significance. + */ +feature_t getSignificance(const search::fef::ITermData &termData); + +/** + * Lookups a table by using the properties and the table manager in the given index environment. + * The table name is found by looking up the following properties and using the first found: + * 'featureName.table.fieldName', 'featureName.table'. + * The table name 'fallback' is used if no properties are found. + * + * @param env the index environment. + * @param featureName the name of the feature. + * @param table the table to be used by the feature. + * @param fieldName the name of the field we want to lookup a table for. + * @param fallback the actual name of the table to use if we do not find any properties. + * @return the table pointer or NULL if not found. + **/ +const search::fef::Table * +lookupTable(const search::fef::IIndexEnvironment & env, const vespalib::string & featureName, + const vespalib::string & table, const vespalib::string & fieldName, const vespalib::string & fallback); + +/** + * Obtain query information for a term/field combination. + * + * @return query information for a term/field combination, or 0 if not found + * @param env query environment + * @param termId the term id + * @param fieldId the field id + **/ +inline const search::fef::ITermFieldData * +getTermFieldData(const search::fef::IQueryEnvironment &env, uint32_t termId, uint32_t fieldId) { + const search::fef::ITermData *td = env.getTerm(termId); + return (td == 0) ? 0 : td->lookupField(fieldId); +} + +/** + * Obtain the match handle for the given term within the given field. + * + * @return match handle, or IllegalHandle if not found + * @param env query environment + * @param termId the term id + * @param fieldId the field id + **/ +inline search::fef::TermFieldHandle +getTermFieldHandle(const search::fef::IQueryEnvironment &env, uint32_t termId, uint32_t fieldId) { + const search::fef::ITermFieldData *tfd = getTermFieldData(env, termId, fieldId); + return (tfd == 0) ? search::fef::IllegalHandle : tfd->getHandle(); +} + +/** + * Obtain the term annotated with the given label. This function will + * reverse map label to unique id and then traverse the query + * environment trying to locate the term with the appropriate unique + * id. If no such term can be found, 0 will be returned. + * + * @return term with given label, or 0 if not found + * @param env query environment + * @param label query item label + **/ +const search::fef::ITermData * +getTermByLabel(const search::fef::IQueryEnvironment &env, const vespalib::string &label); + +} // namespace util +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/valuefeature.cpp b/searchlib/src/vespa/searchlib/features/valuefeature.cpp new file mode 100644 index 00000000000..1a1a202744b --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/valuefeature.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.value"); +#include "valuefeature.h" + +#include + +namespace search { +namespace features { + +ValueExecutor::ValueExecutor(const std::vector & values) : + search::fef::FeatureExecutor(), + _values(values) +{ + // empty +} + +void +ValueExecutor::execute(search::fef::MatchData & data) +{ + for (uint32_t i = 0; i < _values.size(); ++i) { + *data.resolveFeature(outputs()[i]) = _values[i]; + } +} + +void +SingleZeroValueExecutor::execute(search::fef::MatchData & data) +{ + *data.resolveFeature(outputs()[0]) = 0.0; +} + +ValueBlueprint::ValueBlueprint() : + search::fef::Blueprint("value"), + _values() +{ + // empty +} + +void +ValueBlueprint::visitDumpFeatures(const search::fef::IIndexEnvironment &, + search::fef::IDumpFeatureVisitor &) const +{ + // empty +} + +bool +ValueBlueprint::setup(const search::fef::IIndexEnvironment &, + const search::fef::ParameterList & params) +{ + for (uint32_t i = 0; i < params.size(); ++i) { + _values.push_back(params[i].asDouble()); + std::ostringstream name; + name << i; + std::ostringstream desc; + desc << "value " << i; + describeOutput(name.str(), desc.str()); + // we have no inputs + } + return true; +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/valuefeature.h b/searchlib/src/vespa/searchlib/features/valuefeature.h new file mode 100644 index 00000000000..c1d462b645c --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/valuefeature.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace features { + +class ValueExecutor : public search::fef::FeatureExecutor +{ +private: + std::vector _values; + +public: + ValueExecutor(const std::vector & values); + virtual bool isPure() { return true; } + virtual void execute(search::fef::MatchData & data); + const std::vector & getValues() const { return _values; } +}; + +class SingleZeroValueExecutor : public search::fef::FeatureExecutor +{ +public: + SingleZeroValueExecutor() : FeatureExecutor() {} + virtual bool isPure() { return true; } + virtual void execute(search::fef::MatchData & data); +}; + + +class ValueBlueprint : public search::fef::Blueprint +{ +private: + std::vector _values; + +public: + ValueBlueprint(); + + virtual void visitDumpFeatures(const search::fef::IIndexEnvironment & indexEnv, + search::fef::IDumpFeatureVisitor & visitor) const; + virtual search::fef::Blueprint::UP createInstance() const { return Blueprint::UP(new ValueBlueprint()); } + virtual search::fef::ParameterDescriptions getDescriptions() const { + return search::fef::ParameterDescriptions().desc().number().number().repeat(); + } + virtual bool setup(const search::fef::IIndexEnvironment & env, + const search::fef::ParameterList & params); + virtual search::fef::FeatureExecutor::LP createExecutor(const search::fef::IQueryEnvironment & queryEnv) const { + (void) queryEnv; + return search::fef::FeatureExecutor::LP(new ValueExecutor(_values)); + } +}; + +} // namespace features +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp b/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp new file mode 100644 index 00000000000..3d2f03d54eb --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".features.weighted_set_parser"); + +#include "weighted_set_parser.h" + +namespace search { +namespace features { + +void +WeightedSetParser::logWarning(const vespalib::string &msg) +{ + LOG(warning, "%s", msg.c_str()); +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.h b/searchlib/src/vespa/searchlib/features/weighted_set_parser.h new file mode 100644 index 00000000000..929b1356a08 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace features { + +/** + * Utility for parsing a string representation of a weighted set + * that is typically passed down with the query. + * + * The format of the weighted set is as follows: + * {key1:weight1,key2:weight2,...,keyN:weightN}. + */ +class WeightedSetParser +{ +private: + static void logWarning(const vespalib::string &msg); + +public: + template + static void parse(const vespalib::string &input, OutputType &output); +}; + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp b/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp new file mode 100644 index 00000000000..cbc67d411d3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/features/weighted_set_parser.hpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "weighted_set_parser.h" +#include + +namespace search { +namespace features { + +template +void +WeightedSetParser::parse(const vespalib::string &input, OutputType &output) +{ + size_t len = input.size(); + // Note that we still handle '(' and ')' for backward compatibility. + if (len >= 2 && ((input[0] == '{' && input[len - 1] == '}') || + (input[0] == '(' && input[len - 1] == ')')) ) { + vespalib::stringref s(input.c_str()+1, len - 2); + while ( ! s.empty() ) { + vespalib::string::size_type commaPos(s.find(',')); + vespalib::stringref item(s.substr(0, commaPos)); + vespalib::string::size_type colonPos(item.find(':')); + if (colonPos != vespalib::string::npos) { + vespalib::string tmpKey(item.substr(0, colonPos)); + vespalib::string::size_type start(tmpKey.find_first_not_of(' ')); + vespalib::stringref key(tmpKey.c_str() + start, colonPos - start); + vespalib::stringref value(item.substr(colonPos+1)); + output.insert(key, value); + } else { + logWarning(vespalib::make_string( + "Could not parse item '%s' in input string '%s', skipping. " + "Expected ':' between key and weight.", item.c_str(), input.c_str())); + } + if (commaPos != vespalib::string::npos) { + s = s.substr(commaPos+1); + } else { + s = vespalib::stringref(); + } + } + } else { + logWarning(vespalib::make_string("Could not parse input string '%s'. " + "Expected surrounding '(' and ')' or '{' and '}'.", input.c_str())); + } +} + +} // namespace features +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/.gitignore b/searchlib/src/vespa/searchlib/fef/.gitignore new file mode 100644 index 00000000000..38092de6898 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/.gitignore @@ -0,0 +1,4 @@ +*.So +.depend +Makefile +html diff --git a/searchlib/src/vespa/searchlib/fef/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt new file mode 100644 index 00000000000..0004779ed91 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_fef OBJECT + SOURCES + blueprint.cpp + blueprintfactory.cpp + blueprintresolver.cpp + collection_type.cpp + feature_type.cpp + featureexecutor.cpp + featurenamebuilder.cpp + featurenameparser.cpp + featureoverrider.cpp + fef.cpp + fieldinfo.cpp + fieldpositionsiterator.cpp + fieldtype.cpp + filetablefactory.cpp + functiontablefactory.cpp + indexproperties.cpp + location.cpp + matchdata.cpp + matchdatalayout.cpp + objectstore.cpp + parameter.cpp + parameterdescriptions.cpp + parametervalidator.cpp + phrasesplitter.cpp + properties.cpp + queryproperties.cpp + rank_program.cpp + ranksetup.cpp + simpletermdata.cpp + simpletermfielddata.cpp + sumexecutor.cpp + symmetrictable.cpp + table.cpp + tablemanager.cpp + termfieldmatchdata.cpp + termfieldmatchdataposition.cpp + termmatchdatamerger.cpp + utils.cpp + verify_feature.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/fef/Doxyfile b/searchlib/src/vespa/searchlib/fef/Doxyfile new file mode 100644 index 00000000000..9c3496fc985 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/Doxyfile @@ -0,0 +1,1162 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Doxyfile 1.3.9.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = "Feature Execution Framework" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of source +# files, where putting all generated files in the same directory would otherwise +# cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, +# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, +# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, +# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, +# Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# This tag can be used to specify the encoding used in the generated output. +# The encoding is not always determined by the language that is chosen, +# but also whether or not the output is meant for Windows or non-Windows users. +# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES +# forces the Windows encoding (this is the default for the Windows binary), +# whereas setting the tag to NO uses a Unix-style encoding (the default for +# all platforms other than Windows). + +USE_WINDOWS_ENCODING = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is used +# as the annotated text. Otherwise, the brief description is used as-is. If left +# blank, the following values are used ("$name" is automatically replaced with the +# name of the entity): "The $name class" "The $name widget" "The $name file" +# "is" "provides" "specifies" "contains" "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited +# members of a class in the documentation of that class as if those members were +# ordinary class members. Constructors, destructors and assignment operators of +# the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources +# only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. + +SHOW_DIRECTORIES = YES + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = . + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp +# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories +# that are symbolic links (a Unix filesystem feature) are excluded from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = IAM_DOXYGEN + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse the +# parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. Note that this +# option is superseded by the HAVE_DOT option below. This is only a fallback. It is +# recommended to install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes that +# lay further from the root node will be omitted. Note that setting this option to +# 1 or 2 may greatly reduce the computation time needed for large code bases. Also +# note that a graph may be further truncated if the graph's image dimensions are +# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT). +# If 0 is used for the depth value (the default), the graph is not depth-constrained. + +MAX_DOT_GRAPH_DEPTH = 0 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/searchlib/src/vespa/searchlib/fef/OWNERS b/searchlib/src/vespa/searchlib/fef/OWNERS new file mode 100644 index 00000000000..12b533ec610 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/OWNERS @@ -0,0 +1 @@ +havardpe diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.cpp b/searchlib/src/vespa/searchlib/fef/blueprint.cpp new file mode 100644 index 00000000000..c9a9b94eb44 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprint.cpp @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.blueprint"); +#include "blueprint.h" +#include "parametervalidator.h" + +namespace search { +namespace fef { + +const FeatureType & +Blueprint::defineInput(const vespalib::stringref &inName, + AcceptInput accept) +{ + assert(_dependency_handler != nullptr); + return _dependency_handler->resolve_input(inName, accept); +} + +void +Blueprint::describeOutput(const vespalib::stringref &outName, + const vespalib::stringref &desc, + const FeatureType &type) +{ + (void) desc; + assert(_dependency_handler != nullptr); + _dependency_handler->define_output(outName, type); +} + +Blueprint::Blueprint(const vespalib::stringref & baseName) + : _baseName(baseName), + _name(), + _dependency_handler(nullptr) +{ +} + +Blueprint::~Blueprint() +{ +} + +ParameterDescriptions +Blueprint::getDescriptions() const +{ + // desc: 0-n parameters + return ParameterDescriptions().desc().string().repeat(); +} + +bool +Blueprint::setup(const IIndexEnvironment &indexEnv, + const StringVector ¶ms) +{ + ParameterDescriptions descs = getDescriptions(); + ParameterValidator validator(indexEnv, params, descs); + ParameterValidator::Result result = validator.validate(); + if (result.valid()) { + return setup(indexEnv, result.getParameters()); + } else { + LOG(error, "The parameter list used for setting up rank feature %s is not valid: %s", + getBaseName().c_str(), result.getError().c_str()); + return false; + } +} + +bool +Blueprint::setup(const IIndexEnvironment &indexEnv, + const ParameterList ¶ms) +{ + (void) indexEnv; (void) params; + LOG(error, "The setup function using a typed parameter list does not have a default implementation. " + "Make sure the setup function is implemented in the rank feature %s.", getBaseName().c_str()); + return false; +} + + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/blueprint.h b/searchlib/src/vespa/searchlib/fef/blueprint.h new file mode 100644 index 00000000000..9b7a7c541a2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprint.h @@ -0,0 +1,252 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "featureexecutor.h" +#include "iindexenvironment.h" +#include "iqueryenvironment.h" +#include "idumpfeaturevisitor.h" +#include "parameter.h" +#include "parameterdescriptions.h" +#include "feature_type.h" + +namespace search { +namespace fef { + +/** + * A blueprint is a description of a named feature executor with a + * given set of parameters that also acts as a factory for that + * feature executor. During setup, the blueprint will look at the + * parameters and generate a list of input feature names and also name + * and describe its outputs. A blueprint will be created per rank + * setup and used to create feature executors per query. A single + * instance is used as a prototype to create actual blueprints used by + * the framework. The prototype instance will also get a chance to + * name features that should be dumped when doing a full feature dump + * (feature dumps are used for things like MLR training). It will be + * possible to define additional dump features in the config. + **/ +class Blueprint +{ +public: + /** + * A feature can be either a number (double) or an object + * (vespalib::eval::Value::CREF). This enum is used to describe + * the accepted type for a specific input to a feature executor. + **/ + enum class AcceptInput { NUMBER, OBJECT, ANY }; + + /** + * Interface used to set up feature dependencies recursively. This + * is needed to know the exact type of an input feature during + * executor setup. + **/ + struct DependencyHandler { + virtual const FeatureType &resolve_input(const vespalib::string &feature_name, AcceptInput accept_type) = 0; + virtual void define_output(const vespalib::string &output_name, const FeatureType &type) = 0; + virtual ~DependencyHandler() {} + }; + + /** + * Convenience typedef for an auto pointer to this class. + **/ + typedef std::unique_ptr UP; + + /** + * Convenience typedef for an shared pointer to this class. + **/ + typedef std::shared_ptr SP; + + typedef vespalib::string string; + typedef std::vector StringVector; + +private: + Blueprint(const Blueprint &); + Blueprint &operator=(const Blueprint &); + + string _baseName; + string _name; + DependencyHandler *_dependency_handler; + +protected: + /** + * Define an input feature for this blueprint. This method should + * be invoked by the @ref setup method. Note that the order in + * which the inputs are defined is extremely important, since this + * must exactly match the input order of the corresponding feature + * executor. Note that inputs must be addressed with full feature + * names, for example 'foo(a,b).out'. + * + * @param inName feature name of input + * @param type accepted input type + **/ + const FeatureType &defineInput(const vespalib::stringref &inName, + AcceptInput accept = AcceptInput::NUMBER); + + /** + * Describe an output for this blueprint. This method should be + * invoked by the @ref setup method. Note that the order in which + * the outputs are described is extremely important, since this + * must exactly match the output order of the corresponding + * feature executor. Note that the output name is local to this + * blueprint. As an example, the blueprint 'foo(a,b)' having the + * feature 'foo(a,b).out' as output, would describe it simply as + * 'out'. + * + * @param outName output name + * @param desc output description + **/ + void describeOutput(const vespalib::stringref &outName, const vespalib::stringref &desc, + const FeatureType &type = FeatureType::number()); + +public: + /** + * Create an empty blueprint. Blueprints in their initial state + * are used as prototypes to create other instances of the same + * class. The @ref setup method is used to tailor a blueprint + * object for a specific set of parameters. + **/ + Blueprint(const vespalib::stringref & baseName); + + /** + * Obtain the base name of this blueprint. This method will + * typically only be invoked on the prototype object. The given + * name is the base name of all feature executors that will be + * indirectly created with this blueprint. + * + * An example scenario: A blueprint prototype is added with the + * base name 'foo'. If the framework needs to calculate the feature + * 'foo(a,b).out' it will first use the 'foo' prototype to create + * a new instance of the appropriate class. The name of the newly + * created blueprint will be set to 'foo(a,b)' and the setup + * method will be invoked with 'a' and 'b' as parameters. After + * inspecting the output names to find out which output has the + * name 'out', the blueprint can be used to create a feature + * executor that can perform the actual calculation of the + * feature. + * + * @return blueprint base name + **/ + const vespalib::string & getBaseName() const { return _baseName; } + + /** + * This method may indicate which features that should be dumped + * during a full feature dump by naming them to the given + * visitor. The index environment is also given, since it may + * affect the choice of which features to dump. Note that any + * feature names can be given, but politeness indicate that only + * those calculated by feature executors created through this + * class should be given. Also note that naming non-existing + * features here will break feature dumping. + * + * @param indexEnv the index environment + * @param visitor the object visiting dump features + **/ + virtual void visitDumpFeatures(const IIndexEnvironment &indexEnv, + IDumpFeatureVisitor &visitor) const = 0; + + /** + * Create another instance of this class. This must be implemented + * by all the leafs in the inheritance hierarchy. (ref prototype + * pattern) + * + * @return a new instance of this class (wrapped in an auto pointer) + **/ + virtual UP createInstance() const = 0; + + /** + * Set the name of this blueprint. This is the full name including + * parameters. If the base name of a feature executor is 'foo' and + * we are going to set up a blueprint for this executor with the + * parameters 'a' and 'b', the name of this blueprint will be + * 'foo(a,b)'. This method will be invoked by the framework right + * before invoking the @ref setup method (and must not be invoked + * by others). + **/ + void setName(const vespalib::stringref &name) { _name = name; } + + /** + * Obtain the name of this blueprint. + * + * @return blueprint name + **/ + const string &getName() const { return _name; } + + /** + * Returns the parameter descriptions for this blueprint. + * The default implementation will return a description accepting all parameter lists. + * + * @return the parameter descriptions. + **/ + virtual ParameterDescriptions getDescriptions() const; + + void attach_dependency_handler(DependencyHandler &dependency_handler) { + _dependency_handler = &dependency_handler; + } + + void detach_dependency_handler() { + _dependency_handler = nullptr; + } + + /** + * Tailor this blueprint for the given set of parameters. The + * implementation of this method should use the @ref defineInput + * and @ref describeOutput methods. + * + * The default implementation of this function will validate + * the parameters based on the parameter descriptions for this + * blueprint, convert them to a parameter list, and call the + * other setup function. + * + * @return false if the parameters does not make sense for this + * blueprint (aka setup failed) + * @param indexEnv the index environment + * @param params the parameters as simple strings + **/ + virtual bool setup(const IIndexEnvironment &indexEnv, + const StringVector ¶ms); + + /** + * Setups this blueprint for the given set of parameters. The + * implementation of this method should use the @ref defineInput + * and @ref describeOutput methods. + * + * @return false if the parameters does not make sense for this + * blueprint (aka setup failed) + * @param indexEnv the index environment. + * @param params the parameters as a list of actual parameters. + **/ + virtual bool setup(const IIndexEnvironment &indexEnv, + const ParameterList ¶ms); + + /** + * Here you can do some preprocessing. State must be stored in the IObjectStore. + * This is called before creating multiple execution threads. + * @param queryEnv The query environment. + */ + virtual void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const { + (void) queryEnv; + (void) objectStore; + } + + /** + * Create a feature executor based on this blueprint. Failure to + * initialize a feature executor for this blueprint may be + * signaled by returning a shared pointer to 0. + * + * @return feature executor wrapped in a shared pointer + * @param queryEnv query environment + **/ + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &queryEnv) const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~Blueprint(); +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp b/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp new file mode 100644 index 00000000000..af03194abb7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprintfactory.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.blueprintfactory"); +#include "blueprintfactory.h" + +namespace search { +namespace fef { + +BlueprintFactory::BlueprintFactory() + : _blueprintMap() +{ +} + +void +BlueprintFactory::addPrototype(Blueprint::SP proto) +{ + vespalib::string name = proto->getBaseName(); + if (_blueprintMap.find(name) != _blueprintMap.end()) { + LOG(warning, "Blueprint prototype overwritten: %s", name.c_str()); + } + _blueprintMap[name] = proto; +} + +void +BlueprintFactory::visitDumpFeatures(const IIndexEnvironment &indexEnv, + IDumpFeatureVisitor &visitor) const +{ + BlueprintMap::const_iterator itr = _blueprintMap.begin(); + BlueprintMap::const_iterator end = _blueprintMap.end(); + for (; itr != end; ++itr) { + itr->second->visitDumpFeatures(indexEnv, visitor); + } +} + +Blueprint::SP +BlueprintFactory::createBlueprint(const vespalib::string &name) const +{ + BlueprintMap::const_iterator itr = _blueprintMap.find(name); + if (itr == _blueprintMap.end()) { + return Blueprint::SP(); + } + Blueprint::UP bp = itr->second->createInstance(); + return Blueprint::SP(bp.release()); +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/blueprintfactory.h b/searchlib/src/vespa/searchlib/fef/blueprintfactory.h new file mode 100644 index 00000000000..8d9924d67a8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprintfactory.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "blueprint.h" +#include "iblueprintregistry.h" + +namespace search { +namespace fef { + +/** + * This class implements the blueprint repository interface and acts + * as a blueprint factory for the framework itself. + **/ +class BlueprintFactory : public IBlueprintRegistry +{ +private: + BlueprintFactory(const BlueprintFactory &); + BlueprintFactory &operator=(const BlueprintFactory &); + + typedef std::map BlueprintMap; + + BlueprintMap _blueprintMap; + +public: + /** + * Create an empty factory. + **/ + BlueprintFactory(); + + // inherit doc + virtual void addPrototype(Blueprint::SP proto); + + /** + * This method will visit features to be dumped by forwarding the + * visiting request to each of the prototypes registered in this + * factory. + * + * @param indexEnv the index environment + * @param visitor the object visiting dump features + **/ + void visitDumpFeatures(const IIndexEnvironment &indexEnv, + IDumpFeatureVisitor &visitor) const; + + /** + * Create a new blueprint instance by using the appropriate + * prototype contained in this factory. The name given is the + * feature executor base name (the same one used in the @ref + * addPrototype method) + * + * @return fresh and clean blueprint of the appropriate class + * @param name feature executor base name + **/ + Blueprint::SP createBlueprint(const vespalib::string &name) const; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp b/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp new file mode 100644 index 00000000000..505d7c102ce --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprintresolver.cpp @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.blueprintresolver"); +#include "blueprintresolver.h" +#include "blueprintfactory.h" +#include "featurenameparser.h" +#include "featurenamebuilder.h" +#include +#include + +namespace search { +namespace fef { + +namespace { + +using Accept = Blueprint::AcceptInput; + +bool is_compatible(bool is_object, Accept accept_type) { + return ((accept_type == Accept::ANY) || + ((accept_type == Accept::OBJECT) == (is_object))); +} + +const char *type_str(bool is_object) { + return (is_object ? "object" : "number"); +} + +const char *accept_type_str(Accept accept_type) { + switch (accept_type) { + case Accept::NUMBER: return "number"; + case Accept::OBJECT: return "object"; + case Accept::ANY: return "any"; + } + return "(not reached)"; +} + +struct Compiler : public Blueprint::DependencyHandler { + using ExecutorSpec = BlueprintResolver::ExecutorSpec; + using ExecutorSpecList = BlueprintResolver::ExecutorSpecList; + using FeatureRef = BlueprintResolver::FeatureRef; + using FeatureMap = BlueprintResolver::FeatureMap; + + struct Frame { + ExecutorSpec spec; + const FeatureNameParser &parser; + Frame(Blueprint::SP blueprint, const FeatureNameParser &parser_in) + : spec(blueprint), parser(parser_in) {} + }; + using Stack = std::vector; + + struct FrameGuard { + Stack &stack; + FrameGuard(Stack &stack_in) : stack(stack_in) {} + ~FrameGuard() { stack.pop_back(); } + }; + + const BlueprintFactory &factory; + const IIndexEnvironment &index_env; + bool compile_error; + Stack resolve_stack; + ExecutorSpecList &spec_list; + FeatureMap &feature_map; + + Compiler(const BlueprintFactory &factory_in, + const IIndexEnvironment &index_env_in, + ExecutorSpecList &spec_list_out, + FeatureMap &feature_map_out) + : factory(factory_in), + index_env(index_env_in), + compile_error(false), + resolve_stack(), + spec_list(spec_list_out), + feature_map(feature_map_out) {} + + Frame &self() { return resolve_stack.back(); } + + FeatureRef failed(const vespalib::string &feature_name, const vespalib::string &reason) { + if (!compile_error) { + LOG(warning, "invalid rank feature: '%s' (%s)", feature_name.c_str(), reason.c_str()); + for (size_t i = resolve_stack.size(); i > 0; --i) { + const auto &frame = resolve_stack[i - 1]; + if (&frame != &self()) { + LOG(warning, " ... needed by rank feature '%s'", frame.parser.featureName().c_str()); + } + } + compile_error = true; + } + return FeatureRef(); + } + + FeatureRef verify_type(const FeatureNameParser &parser, FeatureRef ref, Accept accept_type) { + const auto &spec = spec_list[ref.executor]; + bool is_object = spec.output_types[ref.output]; + if (!is_compatible(is_object, accept_type)) { + return failed(parser.featureName(), + vespalib::make_string("output '%s' has wrong type: was %s, expected %s", + parser.output().c_str(), type_str(is_object), accept_type_str(accept_type))); + } + return ref; + } + + FeatureRef setup_feature(const FeatureNameParser &parser, Accept accept_type) { + Blueprint::SP blueprint = factory.createBlueprint(parser.baseName()); + if (blueprint.get() == nullptr) { + return failed(parser.featureName(), + vespalib::make_string("unknown basename: '%s'", parser.baseName().c_str())); + } + resolve_stack.emplace_back(blueprint, parser); + FrameGuard frame_guard(resolve_stack); + self().spec.blueprint->setName(parser.executorName()); + self().spec.blueprint->attach_dependency_handler(*this); + if (!self().spec.blueprint->setup(index_env, parser.parameters())) { + return failed(parser.featureName(), "invalid parameters"); + } + if (parser.output().empty() && self().spec.output_types.empty()) { + return failed(parser.featureName(), "has no output value"); + } + const auto &feature = feature_map.find(parser.featureName()); + if (feature == feature_map.end()) { + return failed(parser.featureName(), + vespalib::make_string("unknown output: '%s'", parser.output().c_str())); + } + spec_list.push_back(self().spec); + return verify_type(parser, feature->second, accept_type); + } + + FeatureRef resolve_feature(const vespalib::string &feature_name, Accept accept_type) { + FeatureNameParser parser(feature_name); + if (!parser.valid()) { + return failed(feature_name, "malformed name"); + } + const auto &feature = feature_map.find(parser.featureName()); + if (feature != feature_map.end()) { + return verify_type(parser, feature->second, accept_type); + } + if ((resolve_stack.size() + 1) > BlueprintResolver::MAX_DEP_DEPTH) { + return failed(parser.featureName(), "dependency graph too deep"); + } + for (const Frame &frame: resolve_stack) { + if (frame.parser.executorName() == parser.executorName()) { + return failed(parser.featureName(), "dependency cycle detected"); + } + } + return setup_feature(parser, accept_type); + } + + const FeatureType &resolve_input(const vespalib::string &feature_name, Accept accept_type) override { + assert(self().spec.output_types.empty()); // require: 'resolve inputs' before 'define outputs' + auto ref = resolve_feature(feature_name, accept_type); + if (!ref.valid()) { + return FeatureType::number(); + } + self().spec.inputs.push_back(ref); + return spec_list[ref.executor].output_types[ref.output]; + } + + void define_output(const vespalib::string &output_name, const FeatureType &type) override { + vespalib::string feature_name = self().parser.executorName(); + if (!output_name.empty()) { + feature_name.push_back('.'); + feature_name.append(output_name); + } + FeatureRef output_ref(spec_list.size(), self().spec.output_types.size()); + if (output_ref.output == 0) { + feature_map.emplace(self().parser.executorName(), output_ref); + } + feature_map.emplace(feature_name, output_ref); + self().spec.output_types.push_back(type); + } +}; + +} // namespace search::fef:: + +BlueprintResolver::BlueprintResolver(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv) + : _factory(factory), + _indexEnv(indexEnv), + _seeds(), + _executorSpecs(), + _featureMap(), + _seedMap() +{ +} + +void +BlueprintResolver::addSeed(const vespalib::stringref &feature) +{ + _seeds.push_back(feature); +} + +bool +BlueprintResolver::compile() +{ + assert(_executorSpecs.empty()); // only one compilation allowed + Compiler compiler(_factory, _indexEnv, _executorSpecs, _featureMap); + for (const auto &seed: _seeds) { + auto ref = compiler.resolve_feature(seed, Blueprint::AcceptInput::ANY); + if (compiler.compile_error) { + return false; + } + _seedMap.emplace(FeatureNameParser(seed).featureName(), ref); + } + return true; +} + +const BlueprintResolver::ExecutorSpecList & +BlueprintResolver::getExecutorSpecs() const +{ + return _executorSpecs; +} + +const BlueprintResolver::FeatureMap & +BlueprintResolver::getFeatureMap() const +{ + return _featureMap; +} + +const BlueprintResolver::FeatureMap & +BlueprintResolver::getSeedMap() const +{ + return _seedMap; +} + + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/blueprintresolver.h b/searchlib/src/vespa/searchlib/fef/blueprintresolver.h new file mode 100644 index 00000000000..ceab7125ba8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/blueprintresolver.h @@ -0,0 +1,150 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "blueprint.h" +#include "feature_type.h" + +namespace search { +namespace fef { + +class BlueprintFactory; +class IIndexEnvironment; +class FeatureNameParser; + +/** + * This class is used by the framework to resolve blueprint + * dependencies. A blueprint factory is used to create new blueprints + * when needed during dependency resolving. Note that this class is + * not inteded for direct use. It is used by the @ref RankSetup + * class. It may also be used for low-level testing. + **/ +class BlueprintResolver +{ +public: + typedef std::shared_ptr SP; + + /** + * Low-level reference to a single output from a feature + * executor. 'executor' is the offset into the topological + * ordering of all executors. This order is defined by the return + * value from the getExecutorSpecs function. 'output' is the + * offset into the ordered list of outputs from the relevant + * executor. + **/ + struct FeatureRef { + uint32_t executor; + uint32_t output; + static constexpr uint32_t undef = -1; + + FeatureRef() : executor(undef), output(0) {} + FeatureRef(uint32_t executor_in, uint32_t output_in) + : executor(executor_in), output(output_in) {} + bool valid() { return (executor != undef); } + }; + typedef std::map FeatureMap; + + /** + * Thin blueprint wrapper with additional information about how + * the executor created from the blueprint should be wired with + * other executors. + **/ + struct ExecutorSpec { + Blueprint::SP blueprint; + std::vector inputs; + std::vector output_types; + + ExecutorSpec(Blueprint::SP blueprint_in) + : blueprint(blueprint_in), inputs(), output_types() {} + }; + typedef std::vector ExecutorSpecList; + + /** + * The maximum dependency depth. This value is defined to protect + * against infinitely deep dependency graphs and exposed for + * testing purposes. It should be set high enough to avoid + * problems for 'sane' developers and low enough to avoid stack + * overflow. + **/ + static const uint32_t MAX_DEP_DEPTH = 64; + +private: + const BlueprintFactory &_factory; + const IIndexEnvironment &_indexEnv; + std::vector _seeds; + ExecutorSpecList _executorSpecs; + FeatureMap _featureMap; + FeatureMap _seedMap; + +public: + BlueprintResolver(const BlueprintResolver &) = delete; + BlueprintResolver &operator=(const BlueprintResolver &) = delete; + + /** + * Create a new blueprint resolver within the given index + * environment and backed by the given factory. + * + * @param factory blueprint factory + * @param indexEnv index environment + **/ + BlueprintResolver(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv); + + /** + * Add a feature name to the list of seeds. During compilation, + * blueprints for all seeds and dependencies will be instantiated + * and enumerated. + * + * @param feature feature name to use as a seed + **/ + void addSeed(const vespalib::stringref &feature); + + /** + * Create Blueprints for all seeds and dependencies and enumerate + * blueprints in such a way that blueprints only depend on other + * blueprints with lower enum values. Compilation will typically + * fail if a dependency cannot be created or if you have circular + * dependencies. + * + * @return true if ok, false if compilation error + **/ + bool compile(); + + /** + * Obtain a vector indicating the order of instantiation of + * feature executors and also how they should be wired together. + * The enum value of an executor spec may be used directly as an + * index into the returned vector. + * + * @return feature executor assembly directions + **/ + const ExecutorSpecList &getExecutorSpecs() const; + + /** + * Obtain the location of all named features known to this + * resolver. This may be used to dump a list of feature name/value + * pairs after all feature values have been computed. The seeds + * are the keys in the returned map, and the feature locations are + * the values. + * + * @return feature locations + **/ + const FeatureMap &getFeatureMap() const; + + /** + * Obtain the location of all seeds used by this resolver. This + * may be used to dump a list of feature name/value pairs after + * all feature values have been computed. The seeds are the keys + * in the returned map, and the feature locations are the + * values. + * + * @return seed locations + **/ + const FeatureMap &getSeedMap() const; +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/collection_type.cpp b/searchlib/src/vespa/searchlib/fef/collection_type.cpp new file mode 100644 index 00000000000..c5b31a4bd0c --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/collection_type.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "collection_type.h" + +namespace search { +namespace fef { + +CollectionType::CollectionType(uint32_t value) + : _value(value) +{ +} + +const CollectionType CollectionType::SINGLE(1); + +const CollectionType CollectionType::ARRAY(2); + +const CollectionType CollectionType::WEIGHTEDSET(3); + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/collection_type.h b/searchlib/src/vespa/searchlib/fef/collection_type.h new file mode 100644 index 00000000000..55c9a7a143c --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/collection_type.h @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace fef { + +/** + * Typesafe enum used to indicate the collection type of a field. + **/ +class CollectionType +{ +private: + uint32_t _value; + + CollectionType(uint32_t value); +public: + /** + * Indicating that the field is single-value + **/ + static const CollectionType SINGLE; + + /** + * Indicating that the field is multi-value without element weights + **/ + static const CollectionType ARRAY; + + /** + * Indicating that the field is multi-value with element weights + **/ + static const CollectionType WEIGHTEDSET; + + /** + * Less than operator; needed to be handled as a value by the standard library. + **/ + bool operator<(const CollectionType &rhs) const { return (_value < rhs._value); } + + /** + * Check if two collection types are equal. + **/ + bool operator==(const CollectionType &rhs) const { return (_value == rhs._value); } + + /** + * Check if two collection types are not equal. + **/ + bool operator!=(const CollectionType &rhs) const { return (_value != rhs._value); } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh b/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh new file mode 100755 index 00000000000..e47cc402dca --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/create-class-cpp.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` +name=`echo $class | tr 'A-Z' 'a-z'` + +cat < +LOG_SETUP(".fef.$name"); +#include +#include "$name.h" + +namespace search { +namespace fef { + +$class::$class() +{ +} + +$class::~$class() +{ +} + +} // namespace fef +} // namespace search +EOF diff --git a/searchlib/src/vespa/searchlib/fef/create-class-h.sh b/searchlib/src/vespa/searchlib/fef/create-class-h.sh new file mode 100644 index 00000000000..9a4444c30bc --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/create-class-h.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` + +cat <" +echo "" + +for f in *.h; do + if [ $f != "fef.h" ]; then + echo "#include \"$f\"" + fi +done + +echo "" diff --git a/searchlib/src/vespa/searchlib/fef/create-interface.sh b/searchlib/src/vespa/searchlib/fef/create-interface.sh new file mode 100644 index 00000000000..128f4a16711 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/create-interface.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` + +cat < +#include "feature_type.h" + +namespace search { +namespace fef { + +const FeatureType FeatureType::_number = FeatureType(TYPE_UP()); + +FeatureType::FeatureType(const FeatureType &rhs) + : _type() +{ + if (rhs.is_object()) { + _type = std::make_unique(rhs.type()); + } +} + +FeatureType +FeatureType::object(const TYPE &type_in) +{ + return FeatureType(std::make_unique(type_in)); +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/feature_type.h b/searchlib/src/vespa/searchlib/fef/feature_type.h new file mode 100644 index 00000000000..0251fe7ab25 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/feature_type.h @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +/** + * The full type of a feature calculated by the ranking framework. The + * ranking framework wraps a thin layer on top of the types defined in + * the low-level eval library. A feature can either be a simple number + * represented by a double or a polymorph value represented with an + * object. The ranking framework itself will mostly care about the + * representation (number/object) and not the specific type, hence the + * implicit cast to bool. The type function is used to extract the + * underlying type and is only allowed for features that are objects. + **/ +class FeatureType { +private: + using TYPE = vespalib::eval::ValueType; + using TYPE_UP = std::unique_ptr; + TYPE_UP _type; + static const FeatureType _number; + FeatureType(TYPE_UP type_in) : _type(std::move(type_in)) {} +public: + FeatureType(const FeatureType &rhs); + bool is_object() const { return (_type.get() != nullptr); } + operator bool() const { return is_object(); } + const TYPE &type() const { + assert(_type); + return *_type; + } + static const FeatureType &number() { return _number; } + static FeatureType object(const TYPE &type_in); +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp new file mode 100644 index 00000000000..b28bec39b19 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "featureexecutor.h" + +namespace search { +namespace fef { + +FeatureExecutor::FeatureExecutor() + : _inputs(), + _outputs() +{ +} + +bool +FeatureExecutor::isPure() +{ + return false; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featureexecutor.h b/searchlib/src/vespa/searchlib/fef/featureexecutor.h new file mode 100644 index 00000000000..75110889ab7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featureexecutor.h @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "handle.h" +#include "matchdata.h" +#include +#include + +namespace search { +namespace fef { + +/** + * A feature executor is a general component that calculates one or + * more feature values. It may take multiple features as input. A + * feature executor may also use term match data as input, or whatever + * it has access to regarding the index. + **/ +class FeatureExecutor +{ +public: + class SharedInputs { + std::vector _inputs; + public: + SharedInputs() : _inputs() {} + void add(FeatureHandle handle) { _inputs.push_back(handle); } + size_t size() const { return _inputs.size(); } + FeatureHandle operator[](size_t idx) const { return _inputs[idx]; } + }; + + class Inputs { + SharedInputs *_inputs; + uint32_t _offset; + uint32_t _size; + public: + Inputs() : _inputs(nullptr), _offset(0), _size(0) {} + void bind(SharedInputs &inputs) { + _inputs = &inputs; + _offset = _inputs->size(); + _size = 0; + } + void add(FeatureHandle handle) { + assert(_inputs != nullptr); + assert(_inputs->size() == (_offset + _size)); + _inputs->add(handle); + ++_size; + } + bool empty() const { return (_size == 0); } + size_t size() const { return _size; } + FeatureHandle operator[](size_t idx) const { + assert(idx < _size); + return (*_inputs)[_offset + idx]; + } + }; + + class Outputs { + FeatureHandle _begin; + FeatureHandle _end; + public: + Outputs() : _begin(IllegalHandle), _end(IllegalHandle) {} + void add(FeatureHandle handle) { + if (_begin == IllegalHandle) { + _begin = handle; + _end = (_begin + 1); + } else if (handle == _end) { + ++_end; + } else { + assert(handle == _end); + } + } + bool empty() const { return (_end == _begin); } + size_t size() const { return (_end - _begin); } + FeatureHandle operator[](size_t idx) const { + assert(idx < (_end - _begin)); + return (_begin + idx); + } + }; + +private: + FeatureExecutor(const FeatureExecutor &); + FeatureExecutor &operator=(const FeatureExecutor &); + + Inputs _inputs; + Outputs _outputs; + +public: + /** + * Convenience typedef for a shared pointer to this class. + **/ + typedef vespalib::LinkedPtr LP; + + typedef std::unique_ptr UP; + + /** + * Create a feature executor that has not yet been bound to neither + * inputs nor outputs. + **/ + FeatureExecutor(); + + /** + * Bind shared external storage to this feature executor. The + * shared storage will be used to store the handle of feature + * inputs. This function must be called before starting to add + * inputs. + * + * @param shared_inputs shared store for input feature handles + **/ + void bind_shared_inputs(SharedInputs &shared_inputs) { _inputs.bind(shared_inputs); } + + /** + * Add an input to this feature executor. All inputs must be added + * before this object is added to the feature execution manager. + * + * @param handle the feature handle of the input to add + **/ + void addInput(FeatureHandle handle) { _inputs.add(handle); } + virtual void inputs_done() {} // needed for feature decorators + + /** + * Access the input features for this executor. Use {@link + * MatchData#resolveFeature} to resolve these handles. + * + * @return const view of input features + **/ + const Inputs &inputs() const { return _inputs; } + + /** + * Assign a feature handle to the next unbound output feature. + * This method will be invoked by the @ref FeatureExecutionManager + * when new feature executors are added. It may also be used for + * testing, but should not be invoked directly from application + * code. Note that this method must be invoked exactly the number + * of times indicated by the @ref getNumOutputs method. + * + * @param handle feature handle to be assigned to the next unbound + * output feature. + **/ + void bindOutput(FeatureHandle handle) { _outputs.add(handle); } + virtual void outputs_done() {} // needed for feature decorators + + /** + * Access the output features for this executor. Use {@link + * MatchData#resolveFeature} to resolve these handles. + * + * @return const view of output features + **/ + const Outputs &outputs() const { return _outputs; } + + /** + * Check if this feature executor is pure. A feature executor + * claiming to be pure must satisfy the requirement that its + * output feature values only depend on the values of its input + * features (in other words: if the input features does not change + * in value, neither does the outputs). This method is implemented + * to return false by default, but may be overridden by feature + * executors that are pure. Whether a feature executor is pure or + * not may be used by the framework to optimize feature + * execution. It is always safe to let this method return false, + * but letting pure executors return true may increase + * performance. + * + * @return true if this feature executor is pure + **/ + virtual bool isPure(); + + /** + * Execute this feature executor on the given data. + * + * @param data data storage + **/ + virtual void execute(MatchData &data) = 0; + + /** + * Virtual destructor to allow subclassing. + **/ + virtual ~FeatureExecutor() {} +}; + +} // namespace fef +} // namespace search + + +// LocalWords: param diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp new file mode 100644 index 00000000000..292f5ac5bcc --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "featurenamebuilder.h" +#include "featurenameparser.h" + +namespace { + +// ref: http://en.wikipedia.org/wiki/ASCII +// note: we also consider space to be printable +bool isPrintable(char c) { + return (static_cast(c) >= 32 && + static_cast(c) <= 126); +} + +bool isSpace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': + return true; + default: + return false; + } +} + +bool isBlank(const vespalib::string &str) { + for (uint32_t i = 0; i < str.size(); ++i) { + if (!isSpace(str[i])) { + return false; + } + } + return true; +} + +void appendQuoted(char c, vespalib::string &str) { + switch (c) { + case '\\': + str.append("\\\\"); + break; + case '"': + str.append("\\\""); + break; + case '\t': + str.append("\\t"); + break; + case '\n': + str.append("\\n"); + break; + case '\r': + str.append("\\r"); + break; + case '\f': + str.append("\\f"); + break; + default: + if (isPrintable(c)) { + str.push_back(c); + } else { + const char *lookup = "0123456789abcdef"; + str.append("\\x"); + str.push_back(lookup[(c >> 4) & 0xf]); + str.push_back(lookup[c & 0xf]); + } + } +} + +vespalib::string quoteString(const vespalib::string &str) +{ + vespalib::string res; + res.push_back('"'); + for (uint32_t i = 0; i < str.size(); ++i) { + appendQuoted(str[i], res); + } + res.push_back('"'); + return res; +} + +} // namespace + +namespace search { +namespace fef { + +FeatureNameBuilder::FeatureNameBuilder() + : _baseName(), + _parameters(), + _output() +{ +} + +FeatureNameBuilder::~FeatureNameBuilder() +{ +} + +FeatureNameBuilder & +FeatureNameBuilder::baseName(const vespalib::string &str) +{ + _baseName = str; + return *this; +} + +FeatureNameBuilder & +FeatureNameBuilder::parameter(const vespalib::string &str, bool exact) +{ + if (str.empty() || (!exact && isBlank(str))) { + _parameters.push_back(""); + } else { + FeatureNameParser parser(str); + if (!parser.valid() || (exact && str != parser.featureName())) { + _parameters.push_back(quoteString(str)); + } else { + _parameters.push_back(parser.featureName()); + } + } + return *this; +} + +FeatureNameBuilder & +FeatureNameBuilder::clearParameters() +{ + _parameters.resize(0); + return *this; +} + +FeatureNameBuilder & +FeatureNameBuilder::output(const vespalib::string &str) +{ + _output = str; + return *this; +} + +vespalib::string +FeatureNameBuilder::buildName() const +{ + vespalib::string ret; + if (!_baseName.empty()) { + ret = _baseName; + if (!_parameters.empty() > 0) { + ret += "("; + for (uint32_t i = 0; i < _parameters.size(); ++i) { + if (i > 0) { + ret += ","; + } + ret += _parameters[i]; + } + ret += ")"; + } + if (!_output.empty()) { + ret += "."; + ret += _output; + } + } + return ret; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h new file mode 100644 index 00000000000..3bcc49114bc --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featurenamebuilder.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace fef { + +/** + * An object of this class may be used to build feature names in a + * convenient way. Using this class will ensure things like correct + * quoting of reserved characters used in parameters. + **/ +class FeatureNameBuilder +{ +private: + vespalib::string _baseName; + std::vector _parameters; + vespalib::string _output; + +public: + /** + * Create an empty builder. + **/ + FeatureNameBuilder(); + ~FeatureNameBuilder(); + + /** + * Set the base name. + * + * @return this object, for chaining + * @param str base name + **/ + FeatureNameBuilder &baseName(const vespalib::string &str); + + /** + * Add a parameter to the end of the parameter list. + * + * @return this object, for chaining + * @param str a parameter + * @param exact if this is true, the parameter will preserve its + * exact string value. If this is false, the framework is allowed + * to normalize the string as if it was a feature name. + **/ + FeatureNameBuilder ¶meter(const vespalib::string &str, bool exact = true); + + /** + * Clear the list of parameters. + * + * @return this object, for chaining + **/ + FeatureNameBuilder &clearParameters(); + + /** + * Set the output name + * + * @return this object, for chaining + * @param str output name + **/ + FeatureNameBuilder &output(const vespalib::string &str); + + /** + * Build a full feature name from the information put into this + * object. + * + * @return feature name + **/ + vespalib::string buildName() const; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp b/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp new file mode 100644 index 00000000000..2d646de5a72 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featurenameparser.cpp @@ -0,0 +1,499 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.featurenameparser"); +#include +#include "featurenameparser.h" +#include "featurenamebuilder.h" + +namespace { + +//----------------------------------------------------------------------------- + +int decodeHex(char c) { + if (c >= '0' && c <= '9') { + return (c - '0'); + } + if (c >= 'a' && c <= 'f') { + return ((c - 'a') + 10); + } + if (c >= 'A' && c <= 'F') { + return ((c - 'A') + 10); + } + return -1; +} + +//----------------------------------------------------------------------------- + +template +class IsLogged +{ +private: + A _a; + vespalib::string _name; + +public: + IsLogged(A a) : _a(a), _name(a.getName()) {} + bool operator()(char c) { + bool res = _a(c); + LOG(info, "%s returned %s for char '%c'", + _name.c_str(), res ? "true" : "false", c); + return res; + } +}; + +template +class DoLog +{ +private: + A _a; + vespalib::string _name; + +public: + DoLog(A a) : _a(a), _name(a.getName()) {} + bool operator()(char c) { + bool res = _a(c); + LOG(info, "%s returned %s for char '%c'", + _name.c_str(), res ? "true" : "false", c); + return res; + } + bool done() { + bool res = _a.done(); + LOG(info, "%s returned %s on done signal", + _name.c_str(), res ? "true" : "false"); + return res; + } +}; + +//----------------------------------------------------------------------------- + +template +IsLogged isLogged(A a) { + return IsLogged(a); +} + +template +DoLog doLog(A a) { + return DoLog(a); +} + +//----------------------------------------------------------------------------- + +class ParseContext +{ +private: + const vespalib::string &_str; // the input string + uint32_t _pos; // current position + char _curr; // current character, 0 means eos + bool _error; // flag indicating whether we have a parse error + +public: + ParseContext(const vespalib::string &in) : _str(in), _pos(0), + _curr((in.empty()) ? 0 : in[0]), + _error(false) {} + uint32_t pos() const { return _pos; } + char get() const { return _curr; } + bool eos() const { return !_curr; } + bool signalError() { + _curr = 0; // also signals eos + _error = true; + return false; + } + bool error() { + return _error; + } + void next() { + if (eos()) { + return; + } + if (++_pos < _str.size()) { + _curr = _str[_pos]; + } else { + _curr = 0; + } + } + bool eatChar(char c) { + if (get() != c) { + return false; + } + next(); + return true; + } + template + bool scan(CHECK check, SINK sink) { + while (!eos()) { + if (!check(get())) { + break; + } + if (!sink(get())) { + signalError(); + } + next(); + } + if (!sink.done()) { + signalError(); + } + return !error(); + } +}; + +//----------------------------------------------------------------------------- + +class IsSpace +{ +public: + bool operator()(char c) const { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': + return true; + default: + return false; + } + } + vespalib::string getName() const { return "IsSpace"; } +}; + +class Ident +{ +public: + Ident() { + for(size_t i(0), m(256); i < m; i++) { _valid[i] = false; } + for(size_t i('a'), m('z'); i <= m; i++) { _valid[i] = true; } + for(size_t i('A'), m('Z'); i <= m; i++) { _valid[i] = true; } + for(size_t i('0'), m('9'); i <= m; i++) { _valid[i] = true; } + _valid[uint8_t('_')] = true; + _valid[uint8_t('+')] = true; + _valid[uint8_t('-')] = true; + _valid[uint8_t('$')] = true; + _valid[uint8_t('@')] = true; + } + bool isValid(uint8_t c) { return _valid[c]; } +private: + bool _valid[256]; +}; + +static Ident _G_ident; + +class IsIdent +{ +public: + bool operator()(char c) const { + return _G_ident.isValid(c); + } + vespalib::string getName() const { return "IsIdent"; } +}; + +class IsChar +{ +private: + char _c; + +public: + IsChar(char c) : _c(c) {} + bool operator()(char c) const { + return (c == _c); + } + vespalib::string getName() const { return vespalib::make_string("IsChar(%c)", _c); } +}; + +template +class IsNot +{ +private: + A _a; + +public: + IsNot(A a) : _a(a) {} + bool operator()(char c) { + return !(_a(c)); + } + vespalib::string getName() const { return vespalib::make_string("IsNot(%s)", _a.getName().c_str()); } +}; + +template +class IsEither +{ +private: + A _a; + B _b; + +public: + IsEither(A a, B b) : _a(a), _b(b) {} + bool operator()(char c) { + return (_a(c) || _b(c)); + } + vespalib::string getName() const { return vespalib::make_string("IsEither(%s,%s)", + _a.getName().c_str(), _b.getName().c_str()); } +}; + +class IsEndQuote +{ +private: + bool _escape; + +public: + IsEndQuote() : _escape(false) {} + bool operator()(char c) { + if (_escape) { + _escape = false; + return false; + } + if (c == '\\') { + _escape = true; + return false; + } + return (c == '"'); + } + vespalib::string getName() const { return "IsEndQuote"; } +}; + +//----------------------------------------------------------------------------- + +class DoIgnore +{ +public: + bool operator()(char) { return true; } + bool done() { return true; } + vespalib::string getName() const { return "doIgnore"; } +}; + +class DoSave +{ +private: + vespalib::string &_dst; + +public: + DoSave(vespalib::string &str) : _dst(str) {} + bool operator()(char c) { + _dst.push_back(c); + return true; + } + bool done() { return !_dst.empty(); } + vespalib::string getName() const { return "doSave"; } +}; + +class DoDequote +{ +private: + bool _escape; // true means we are dequoting something + int _hex; // how many hex numbers left to read + unsigned char _c; // save up hex decoded char here + vespalib::string &_dst; // where to save the dequoted string + +public: + DoDequote(vespalib::string &str) : _escape(false), _hex(0), _c(0), _dst(str) {} + bool operator()(char c) { + if (_escape) { + if (_hex > 0) { + --_hex; + int val = decodeHex(c); + if (val < 0) { + return false; + } + _c |= ((val & 0xf) << (_hex * 4)); + if (_hex == 0) { + if (_c == 0) { + return false; + } + _dst.push_back(_c); + _escape = false; + } + } else { + switch (c) { + case '"': + _dst.push_back('\"'); + _escape = false; + break; + case '\\': + _dst.push_back('\\'); + _escape = false; + break; + case 't': + _dst.push_back('\t'); + _escape = false; + break; + case 'n': + _dst.push_back('\n'); + _escape = false; + break; + case 'r': + _dst.push_back('\r'); + _escape = false; + break; + case 'f': + _dst.push_back('\f'); + _escape = false; + break; + case 'x': + _hex = 2; + _c = 0; + break; + default: + return false; // signal error + } + } + } else { + if (c == '\\') { + _escape = true; + } else { + _dst.push_back(c); // normal case (no dequoting needed) + } + } + return true; + } + bool done() { return !_escape; } + vespalib::string getName() const { return "doDequote"; } +}; + +//----------------------------------------------------------------------------- + +IsSpace isSpace() { return IsSpace(); } + +IsIdent isIdent() { return IsIdent(); } + +IsChar isChar(char c) { return IsChar(c); } + +template +IsNot isNot(A a) { + return IsNot(a); +} + +template +IsEither isEither(A a, B b) { + return IsEither(a, b); +} + +IsEndQuote isEndQuote() { return IsEndQuote(); } + +DoIgnore doIgnore() { return DoIgnore(); } + +DoSave doSave(vespalib::string &str) { return DoSave(str); } + +DoDequote doDequote(vespalib::string &str) { return DoDequote(str); } + +//----------------------------------------------------------------------------- + +// need forward declaration of this for recursive parsing +bool normalizeFeatureName(ParseContext &ctx, vespalib::string &name); + +bool parseParameters(ParseContext &ctx, std::vector ¶meters) +{ + ctx.scan(isSpace(), doIgnore()); + if (!ctx.eatChar('(')) { + return true; // no parameters = ok + } + for (;;) { + vespalib::string param; + ctx.scan(isSpace(), doIgnore()); + switch (ctx.get()) { + case ')': + case ',': + break; // empty param + case '"': // parse param as quoted string + ctx.next(); // eat opening '"' + if (!ctx.scan(isNot(isEndQuote()), doDequote(param))) { + return false; + } + if (!ctx.eatChar('"')) { // missing end quote + return ctx.signalError(); + } + break; + default: // parse param as feature name + if (!normalizeFeatureName(ctx, param)) { + return false; + } + break; + } + parameters.push_back(param); + ctx.scan(isSpace(), doIgnore()); + if (ctx.eatChar(')')) { // done + return true; + } else if (!ctx.eatChar(',')) { // illegal param list + return ctx.signalError(); + } + } +} + +bool parseOutput(ParseContext &ctx, vespalib::string &output) +{ + ctx.scan(isSpace(), doIgnore()); + if (!ctx.eatChar('.')) { + return true; // output is optional + } + ctx.scan(isSpace(), doIgnore()); + return ctx.scan(isEither(isIdent(), isChar('.')), doSave(output)); +} + +bool parseFeatureName(ParseContext &ctx, vespalib::string &baseName, + std::vector ¶meters, vespalib::string &output) +{ + return (ctx.scan(isIdent(), doSave(baseName)) && + parseParameters(ctx, parameters) && + parseOutput(ctx, output)); +} + +bool normalizeFeatureName(ParseContext &ctx, vespalib::string &name) { + vespalib::string baseName; + std::vector params; + vespalib::string output; + if (!parseFeatureName(ctx, baseName, params, output)) { + return false; + } + search::fef::FeatureNameBuilder builder; + builder.baseName(baseName); + for (uint32_t i = 0; i < params.size(); ++i) { + builder.parameter(params[i]); + } + builder.output(output); + name = builder.buildName(); + return true; +} + +} // namespace + +namespace search { +namespace fef { + +FeatureNameParser::FeatureNameParser(const string &input) + : _valid(false), + _endPos(0), + _baseName(), + _parameters(), + _output(), + _executorName(), + _featureName() +{ + ParseContext ctx(input); + ctx.scan(isSpace(), doIgnore()); + _valid = parseFeatureName(ctx, _baseName, _parameters, _output); + ctx.scan(isSpace(), doIgnore()); + if (!ctx.eos()) { + _valid = ctx.signalError(); + } + _endPos = ctx.pos(); + if (_valid && ctx.eos()) { + FeatureNameBuilder builder; + builder.baseName(_baseName); + for (uint32_t i = 0; i < _parameters.size(); ++i) { + builder.parameter(_parameters[i]); + } + _executorName = builder.buildName(); + builder.output(_output); + _featureName = builder.buildName(); + } else { + _baseName = ""; + { + StringVector tmp; + _parameters.swap(tmp); + } + _output = ""; + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featurenameparser.h b/searchlib/src/vespa/searchlib/fef/featurenameparser.h new file mode 100644 index 00000000000..fea86479d0b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featurenameparser.h @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace fef { + +/** + * Simple parser used to split feature names into components by the + * framework. + **/ +class FeatureNameParser +{ +public: + typedef vespalib::string string; + typedef std::vector StringVector; +private: + bool _valid; + uint32_t _endPos; + string _baseName; + StringVector _parameters; + string _output; + string _executorName; + string _featureName; + +public: + /** + * The constructor parses the given feature name, splitting it + * into components. If the given string is not a valid feature + * name, all components will be empty and the @ref valid method + * will return false. + * + * @param featureName feature name + **/ + FeatureNameParser(const vespalib::string &featureName); + + /** + * Does this object represent a valid feature name? + * + * @return true if valid, false if invalid + **/ + bool valid() const { return _valid; } + + /** + * Obtain the number of bytes from the original feature name that + * was successfully parsed. If the feature name was valid, this + * method will simply return the size of the string given to the + * constructor. If a parse error occurred, this method will return + * the index of the offending character in the string given to the + * constructor. + * + * @return number of bytes successfully parsed + **/ + uint32_t parsedBytes() const { return _endPos; } + + /** + * Obtain the base name from the parsed feature name. + * + * @return base name + **/ + const string &baseName() const { return _baseName; } + + /** + * Obtain the parameter list from the parsed feature name. + * + * @return parameter list + **/ + const StringVector ¶meters() const { return _parameters; } + + /** + * Obtain the output name from the parsed feature name. + * + * @return output name + **/ + const string &output() const { return _output; } + + /** + * Obtain a normalized name for the executor making this + * feature. This includes the parameter list. The @ref + * FeatureNameBuilder is used to make this name. + * + * @return normalized executor name with parameters + **/ + const string &executorName() const { return _executorName; } + + /** + * Obtain a normalized full feature name. The @ref + * FeatureNameBuilder is used to make this name. + * + * @return normalized full feature name + **/ + const string &featureName() const { return _featureName; } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp new file mode 100644 index 00000000000..5bb2a2789bf --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.cpp @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "featureoverrider.h" + +namespace search { +namespace fef { + +FeatureOverrider::FeatureOverrider(FeatureExecutor::LP executor, uint32_t outputIdx, feature_t value) + : _executor(executor), + _outputIdx(outputIdx), + _handle(IllegalHandle), + _value(value) +{ +} + +void +FeatureOverrider::inputs_done() +{ + for (uint32_t i = 0; i < inputs().size(); ++i) { + _executor->addInput(inputs()[i]); + } + _executor->inputs_done(); +} + +void +FeatureOverrider::outputs_done() +{ + if (_outputIdx < outputs().size()) { + _handle = outputs()[_outputIdx]; + } + for (uint32_t i = 0; i < outputs().size(); ++i) { + _executor->bindOutput(outputs()[i]); + } + _executor->outputs_done(); +} + +bool +FeatureOverrider::isPure() +{ + return _executor->isPure(); +} + +void +FeatureOverrider::execute(MatchData &data) +{ + _executor->execute(data); + if (_handle != IllegalHandle) { + *data.resolveFeature(_handle) = _value; + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/featureoverrider.h b/searchlib/src/vespa/searchlib/fef/featureoverrider.h new file mode 100644 index 00000000000..432a8ea4736 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/featureoverrider.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "featureexecutor.h" + +namespace search { +namespace fef { + +/** + * A Feature Overrider is a simple decorator class that wraps a single + * Feature Executor instance and overrides one of its output + * features. All method invocations are passed through to the inner + * feature executor. Each time the execute method is invoked, the + * appropriate feature value is overwritten. + **/ +class FeatureOverrider : public FeatureExecutor +{ +private: + FeatureOverrider(const FeatureOverrider &); + FeatureOverrider &operator=(const FeatureOverrider &); + + FeatureExecutor::LP _executor; + uint32_t _outputIdx; + FeatureHandle _handle; + feature_t _value; + +public: + /** + * Create a feature overrider that will override the given output + * with the given feature value. + * + * @param executor the feature executor for which we should override an output + * @param outputIdx which output to override + * @param value what value to override with + **/ + FeatureOverrider(FeatureExecutor::LP executor, uint32_t outputIdx, feature_t value); + void inputs_done() override; + void outputs_done() override; + bool isPure() override; + void execute(MatchData &data) override; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/fef.cpp b/searchlib/src/vespa/searchlib/fef/fef.cpp new file mode 100644 index 00000000000..6a29c098479 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fef.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "fef.h" + +namespace search { +namespace fef { + +// this file is just to verify the fef.h file + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/fef.h b/searchlib/src/vespa/searchlib/fef/fef.h new file mode 100644 index 00000000000..94ac5daf26e --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fef.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// NOTE: This file was generated by the 'create-fef-includes.sh' script + + +/** + * @file fef.h + * + * This is a convenience header that will include everything you need + * to use this library. + **/ + +#pragma once + +#include + +#include "blueprint.h" +#include "blueprintfactory.h" +#include "blueprintresolver.h" +#include "collection_type.h" +#include "featureexecutor.h" +#include "featurenamebuilder.h" +#include "featurenameparser.h" +#include "featureoverrider.h" +#include "fieldinfo.h" +#include "fieldpositionsiterator.h" +#include "fieldtype.h" +#include "filetablefactory.h" +#include "functiontablefactory.h" +#include "handle.h" +#include "iblueprintregistry.h" +#include "idumpfeaturevisitor.h" +#include "iindexenvironment.h" +#include "indexproperties.h" +#include "iqueryenvironment.h" +#include "itablefactory.h" +#include "itablemanager.h" +#include "itermdata.h" +#include "itermfielddata.h" +#include "location.h" +#include "matchdata.h" +#include "matchdatalayout.h" +#include "parameter.h" +#include "parameterdescriptions.h" +#include "parametervalidator.h" +#include "phrasesplitter.h" +#include "properties.h" +#include "queryproperties.h" +#include "rank_program.h" +#include "ranksetup.h" +#include "simpletermdata.h" +#include "simpletermfielddata.h" +#include "sumexecutor.h" +#include "symmetrictable.h" +#include "table.h" +#include "tablemanager.h" +#include "termfieldmatchdata.h" +#include "termfieldmatchdataarray.h" +#include "termfieldmatchdataposition.h" +#include "termmatchdatamerger.h" +#include "utils.h" +#include "verify_feature.h" + diff --git a/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp b/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp new file mode 100644 index 00000000000..234352ddb31 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldinfo.cpp @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "fieldinfo.h" + +namespace search { +namespace fef { + +FieldInfo::FieldInfo(FieldType type_in, CollectionType collection_in, + const string &name_in, uint32_t id_in) + : _type(type_in), + _data_type(DataType::DOUBLE), + _collection(collection_in), + _name(name_in), + _id(id_in), + _isFilter(false), + _hasAttribute(type_in == FieldType::ATTRIBUTE) +{ +} + + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/fieldinfo.h b/searchlib/src/vespa/searchlib/fef/fieldinfo.h new file mode 100644 index 00000000000..8c0625f0c27 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldinfo.h @@ -0,0 +1,112 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "fieldtype.h" +#include "collection_type.h" +#include + +namespace search { +namespace fef { + +const uint32_t IllegalFieldId = 0xffffffff; + +/** + * Information about a single field. This class is used by the @ref + * IIndexEnvironment to expose information. + **/ +class FieldInfo +{ +public: + using DataType = search::index::Schema::DataType; + typedef vespalib::string string; +private: + FieldType _type; + DataType _data_type; + CollectionType _collection; + string _name; + uint32_t _id; + bool _isFilter; + bool _hasAttribute; + +public: + /** + * Create a new field info object. The id of a field acts as both + * an index used to iterate all fields through the index + * environment and as an enumeration of fields. Multiple fields + * owned by the same index environment may not have the same name. + **/ + FieldInfo(FieldType type_in, CollectionType collection_in, + const string &name_in, uint32_t id_in); + + /** + * Check if an attribute vector is available for this + * field. Attributes are, and therefore have attributes. Index + * fields may also have attributes available, or attributes may be + * generated on-the-fly when needed. This function will tell you + * whether attribute value lookup for a field will be possible. + * + *@return true if an attribute can be obtained for this field + **/ + bool hasAttribute() const { return _hasAttribute; } + + /** + * Add the power of attribute lookup to this field. This is used + * to verify rank features using attributes during setup. If you + * call this function to allow rank setup, but do not supply the + * needed attributes during query execution; the poo is on you. + **/ + void addAttribute() { _hasAttribute = true; } + + /** + * Obtain the type of this field + * + * @return the type of this field + **/ + FieldType type() const { return _type; } + + void set_data_type(DataType data_type_in) { _data_type = data_type_in; } + DataType get_data_type() const { return _data_type; } + + /** + * Obtain the collection type of this field + * + * @return collection type of this field + **/ + CollectionType collection() const { return _collection; } + + /** + * Obtain the name of this field + * + * @return the name of this field + **/ + const string & name() const { return _name; } + + /** + * Obtain the id of this field + * + * @return the id of this field + **/ + uint32_t id() const { return _id; } + + /** + * Set the flag indicating whether this field should be treated as + * a filter field (fast searching and low complexity ranking). + * + * @param flag true if this field should be treated as a filter + **/ + void setFilter(bool flag) { _isFilter = flag; } + + /** + * Obtain the flag indicating whether this field should be treated + * as a filter field (fast searching and low complexity ranking). + * + * @return true if this field should be treated as a filter + **/ + bool isFilter() const { return _isFilter; } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp new file mode 100644 index 00000000000..8994c9492a7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.cpp @@ -0,0 +1,14 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "fieldpositionsiterator.h" +#include + +namespace search { +namespace fef { + +const uint32_t FieldPositionsIterator::UNKNOWN_LENGTH = +SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h new file mode 100644 index 00000000000..933858b79c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldpositionsiterator.h @@ -0,0 +1,164 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "termfieldmatchdataposition.h" + +namespace search { +namespace fef { + +/** + * Iterator used to iterate over all positions of a term inside a + * specific field. + **/ +class FieldPositionsIterator +{ +public: + /** + * The iterator type of the underlying data, which have all + * positions for a term across all fields searched. + **/ + typedef const TermFieldMatchDataPosition * PositionsIterator; + +private: + uint32_t _length; + PositionsIterator _begin; + PositionsIterator _pos; + PositionsIterator _end; + +public: + /** + * The length reported for fields for which we do not know the + * real length. + **/ + static const uint32_t UNKNOWN_LENGTH; + + /** + * Create a new iterator for a field we know nothing about. This + * will give the field no position data and a length of 0. + **/ + FieldPositionsIterator() + : _length(UNKNOWN_LENGTH), _begin(0), _pos(0), _end(0) {} + + /** + * Create a new iterator for a field with the given offset and + * length, using a slice of the underlying position data. + * + * @param length the length of the field in words + * @param begin start of position data slice + * @param end end of position data slice + **/ + FieldPositionsIterator(uint32_t length, + PositionsIterator begin, + PositionsIterator end) + : _length(length), _begin(begin), _pos(begin), _end(end) {} + + /** + * Relocate the references held by this object into the actual + * occurrence data. This method assumes iterators are random + * access and cheap to copy. This method must be invoked if the + * underlying occurrence data is moved in memory. + * + * @param oldRef old reference iterator + * @param newRef new reference iterator + **/ + void relocate(PositionsIterator oldRef, PositionsIterator newRef) { + if (_begin != PositionsIterator(0)) { + _begin = newRef + (_begin - oldRef); + _pos = newRef + (_pos - oldRef); + _end = newRef + (_end - oldRef); + } + } + + /** + * Check if there is valid data available at the current position + * of this iterator. + * + * @return false if no more data is available + **/ + bool valid() const { return _pos != _end; } + + /** + * Step this iterator to the next position. This method may only + * be invoked if the @ref valid method returns true. + **/ + void next() { ++_pos; } + + /** + * Try to step this iterator backwards. This method will return + * false if the iterator is already located at the beginning. + * + * @return false if we are unable to step backwards + **/ + bool prev() { + if (_pos == _begin) { + return false; + } + --_pos; + return true; + } + + /** + * Obtain the word position within the field for the entry + * indicated by the current position of this iterator. This method + * may only be invoked if the @ref valid method returns true. + * + * @return word position within the field + **/ + uint32_t getPosition() const { return _pos->getPosition(); } + + /** + * Obtain the element id within the field for the entry + * indicated by the current position of this iterator. This method + * may only be invoked if the @ref valid method returns true. + * + * @return element id within the field + **/ + uint32_t getElementId() const { return _pos->getElementId(); } + + /** + * Obtain the element length within the field for the entry + * indicated by the current position of this iterator. This method + * may only be invoked if the @ref valid method returns true. + * + * @return element id within the field + **/ + uint32_t getElementLen() const { return _pos->getElementLen(); } + + /** + * Obtain the element weight within the field for the entry + * indicated by the current position of this iterator. This method + * may only be invoked if the @ref valid method returns true. + * + * @return element id within the field + **/ + int32_t getElementWeight() const { return _pos->getElementWeight(); } + + /** + * Obtain the match exactness indicated by the current position of + * this iterator. This method may only be invoked if the @ref valid + * method returns true. + * + * @return exactness measure + **/ + double getMatchExactness() const { return _pos->getMatchExactness(); } + + /** + * Obtain the total number of words in the field. + * + * @return field length in words. + **/ + uint32_t getFieldLength() const { return _length; } + + /** + * Obtain the number of positions in this iterator. + * + * @return number of positions + **/ + uint32_t size() const { return (_end - _begin); } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/fieldtype.cpp b/searchlib/src/vespa/searchlib/fef/fieldtype.cpp new file mode 100644 index 00000000000..39cb1be7997 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldtype.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "fieldtype.h" + +namespace search { +namespace fef { + +FieldType::FieldType(uint32_t value) + : _value(value) +{ +} + +const FieldType FieldType::INDEX(1); + +const FieldType FieldType::ATTRIBUTE(2); + +const FieldType FieldType::HIDDEN_ATTRIBUTE(3); + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/fieldtype.h b/searchlib/src/vespa/searchlib/fef/fieldtype.h new file mode 100644 index 00000000000..8b1b17f9801 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/fieldtype.h @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace fef { + +/** + * Typesafe enum used to indicate the type of a field. + **/ +class FieldType +{ +private: + uint32_t _value; + + FieldType(uint32_t value); +public: + /** + * Indicating that the field is indexed + **/ + static const FieldType INDEX; + + /** + * Indicating that the field is kept in an attribute vector + **/ + static const FieldType ATTRIBUTE; + + /** + * Indicating that the field is kept in an attribute vector + **/ + static const FieldType HIDDEN_ATTRIBUTE; + + /** + * Less than operator; needed to be handled as a value by the standard library. + **/ + bool operator<(const FieldType &rhs) const { return (_value < rhs._value); } + + /** + * Check if two field types are equal. + **/ + bool operator==(const FieldType &rhs) const { return (_value == rhs._value); } + + /** + * Check if two field types are not equal. + **/ + bool operator!=(const FieldType &rhs) const { return (_value != rhs._value); } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp b/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp new file mode 100644 index 00000000000..c113efe33d9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/filetablefactory.cpp @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.filetablefactory"); +#include "filetablefactory.h" +#include + +namespace search { +namespace fef { + +FileTableFactory::FileTableFactory(const vespalib::string & path) : + _path(path) +{ +} + +Table::SP +FileTableFactory::createTable(const vespalib::string & name) const +{ + vespalib::string completeName(_path); + completeName.append("/"); + completeName.append(name); + std::ifstream file(completeName.c_str(), std::ifstream::in); + if (file.is_open()) { + Table::SP table(new Table()); + for (;;) { + double val = 0; + file >> val; + if (!file.good()) { + break; + } + table->add(val); + } + return table; + } + LOG(warning, "Could not open file '%s' for creating table '%s'", completeName.c_str(), name.c_str()); + return Table::SP(NULL); +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/filetablefactory.h b/searchlib/src/vespa/searchlib/fef/filetablefactory.h new file mode 100644 index 00000000000..74e0a09e6dd --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/filetablefactory.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "itablefactory.h" + +namespace search { +namespace fef { + +/** + * This factory class is used to instantiate tables that are stored in files on disk. + **/ +class FileTableFactory : public ITableFactory +{ +private: + vespalib::string _path; + +public: + /** + * Creates a new factory for table files that are located in the given path. + **/ + FileTableFactory(const vespalib::string & path); + + /** + * Creates a table by reading the file 'path/name' and setting up a Table object. + * The numbers in the file should be separated with ' ' or '\n'. + * Table::SP(NULL) is returned if the file 'path/name' is not found. + **/ + virtual Table::SP createTable(const vespalib::string & name) const; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp new file mode 100644 index 00000000000..a901ecc90ea --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.cpp @@ -0,0 +1,134 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.functiontablefactory"); +#include +#include +#include +#include "functiontablefactory.h" + +namespace { + +void logArgumentWarning(const vespalib::string & name, size_t exp, size_t act) +{ + LOG(warning, "Cannot create table for function '%s'. Wrong number of arguments: expected %zu to %zu, but got %zu", + name.c_str(), exp, exp + 1, act); +} + +} + +namespace search { +namespace fef { + +bool +FunctionTableFactory::checkArgs(const std::vector & args, size_t exp, size_t & tableSize) const +{ + if (exp <= args.size() && args.size() <= (exp + 1)) { + if (args.size() == (exp + 1)) { + tableSize = atoi(args.back().c_str()); + } else { + tableSize = _defaultTableSize; + } + return true; + } + return false; +} + +bool +FunctionTableFactory::isSupported(const vespalib::string & type) const +{ + return (isExpDecay(type) || isLogGrowth(type) || isLinear(type)); +} + +Table::SP +FunctionTableFactory::createExpDecay(double w, double t, size_t len) const +{ + Table::SP table(new Table()); + for (size_t x = 0; x < len; ++x) { + table->add(w * exp(-(x / t))); + } + return table; +} + +Table::SP +FunctionTableFactory::createLogGrowth(double w, double t, double s, size_t len) const +{ + Table::SP table(new Table()); + for (size_t x = 0; x < len; ++x) { + table->add(w * (log(1 + (x / s))) + t); + } + return table; +} + +Table::SP +FunctionTableFactory::createLinear(double w, double t, size_t len) const +{ + Table::SP table(new Table()); + for (size_t x = 0; x < len; ++x) { + table->add(w * x + t); + } + return table; +} + +FunctionTableFactory::FunctionTableFactory(size_t defaultTableSize) : + _defaultTableSize(defaultTableSize) +{ +} + +Table::SP +FunctionTableFactory::createTable(const vespalib::string & name) const +{ + ParsedName p; + if (parseFunctionName(name, p)) { + if (isSupported(p.type)) { + size_t tableSize = _defaultTableSize; + if (isExpDecay(p.type)) { + if (checkArgs(p.args, 2, tableSize)) { + return createExpDecay(atof(p.args[0].c_str()), atof(p.args[1].c_str()), tableSize); + } + logArgumentWarning(name, 2, p.args.size()); + } else if (isLogGrowth(p.type)) { + if (checkArgs(p.args, 3, tableSize)) { + return createLogGrowth(atof(p.args[0].c_str()), atof(p.args[1].c_str()), atof(p.args[2].c_str()), tableSize); + } + logArgumentWarning(name, 3, p.args.size()); + } else if (isLinear(p.type)) { + if (checkArgs(p.args, 2, tableSize)) { + return createLinear(atof(p.args[0].c_str()), atof(p.args[1].c_str()), tableSize); + } + logArgumentWarning(name, 2, p.args.size()); + } + } else { + LOG(warning, "Cannot create table for function '%s'. Function type '%s' is not supported", + name.c_str(), p.type.c_str()); + } + } else { + LOG(warning, "Cannot create table for function '%s'. Could not be parsed.", name.c_str()); + } + return Table::SP(NULL); +} + +bool +FunctionTableFactory::parseFunctionName(const vespalib::string & name, ParsedName & parsed) +{ + size_t ps = name.find('('); + size_t pe = name.find(')'); + if (ps == vespalib::string::npos || pe == vespalib::string::npos) { + LOG(warning, "Parse error: Did not find '(' and ')' in function name '%s'", name.c_str()); + return false; + } + if (ps >= pe) { + LOG(warning, "Parse error: Found ')' before '(' in function name '%s'", name.c_str()); + return false; + } + parsed.type = name.substr(0, ps); + vespalib::string args = name.substr(ps + 1, pe - ps - 1); + if (!args.empty()) { + boost::split(parsed.args, args, boost::is_any_of(",")); + } + return true; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/functiontablefactory.h b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h new file mode 100644 index 00000000000..10a610f3c04 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/functiontablefactory.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "itablefactory.h" + +namespace search { +namespace fef { + +/** + * This factory class is used to instantiate tables based on a function. + * The name of the table specifies the function and arguments to use. + * The following functions are supported: + * - expdecay(w,t) : w * exp(-x/t) + * - loggrowth(w,t,s) : w * log(1 + x/s) + t + * - linear(w,t) : w * x + t + * All functions support an optional last parameter for setting the table size. + **/ +class FunctionTableFactory : public ITableFactory +{ +public: + struct ParsedName { + vespalib::string type; + std::vector args; + ParsedName() : type(), args() {} + }; + +private: + size_t _defaultTableSize; + + bool checkArgs(const std::vector & args, size_t exp, size_t & tableSize) const; + bool isSupported(const vespalib::string & type) const; + bool isExpDecay(const vespalib::string & type) const { return type == "expdecay"; } + bool isLogGrowth(const vespalib::string & type) const { return type == "loggrowth"; } + bool isLinear(const vespalib::string & type) const { return type == "linear"; } + Table::SP createExpDecay(double w, double t, size_t len) const; + Table::SP createLogGrowth(double w, double t, double s, size_t len) const; + Table::SP createLinear(double w, double t, size_t len) const; + +public: + /** + * Creates a new factory able to create tables with the given default size. + **/ + FunctionTableFactory(size_t defaultTableSize); + + /** + * Creates a table where the given name specifies the function and arguments to use. + **/ + virtual Table::SP createTable(const vespalib::string & name) const; + + /** + * Parses the given function name and returns true if success. + **/ + static bool parseFunctionName(const vespalib::string & name, ParsedName & parsed); +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/handle.h b/searchlib/src/vespa/searchlib/fef/handle.h new file mode 100644 index 00000000000..3e608b251b7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/handle.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +typedef uint32_t FeatureHandle; +typedef uint32_t TermFieldHandle; + +const uint32_t IllegalHandle = 0xffffffff; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h b/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h new file mode 100644 index 00000000000..20092719d2f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/iblueprintregistry.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace fef { + +/** + * This is an interface used during plugin setup to register blueprint + * prototypes. + **/ +class IBlueprintRegistry +{ +public: + /** + * Add a blueprint prototype to the registry. + **/ + virtual void addPrototype(Blueprint::SP proto) = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IBlueprintRegistry() {} +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h b/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h new file mode 100644 index 00000000000..0014dd677e4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/idumpfeaturevisitor.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +namespace search { +namespace fef { + +/** + * This interface is implemented by objects that want to visit all + * dump features. + **/ +class IDumpFeatureVisitor +{ +public: + /** + * Visit a feature that should be dumped when doing a full feature + * dump. Note that full feature names must be used, for example + * 'foo(a,b).out'. + * + * @param name full feature name + **/ + virtual void visitDumpFeature(const vespalib::string &name) = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IDumpFeatureVisitor() {} +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/iindexenvironment.h b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h new file mode 100644 index 00000000000..c2f489abe3b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/iindexenvironment.h @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +class Properties; +class FieldInfo; +class ITableManager; + +/** + * Abstract view of index related information available to the + * framework. + **/ +class IIndexEnvironment +{ +public: + typedef vespalib::string string; + /** + * This enum defines the different motivations the framework has + * for configuring a feature blueprint. RANK means the feature is + * needed for ranking calculations in normal operation. DUMP means + * the feature is needed to perform a feature dump. VERIFY_SETUP + * means that we are just trying to figure out if this setup is + * valid; the feature will never actually be executed. + **/ + enum FeatureMotivation { + UNKNOWN = 0, + RANK = 1, + DUMP = 2, + VERIFY_SETUP = 3 + }; + + /** + * Obtain the set of properties associated with this index + * environment. + * + * @return properties + **/ + virtual const Properties &getProperties() const = 0; + + /** + * Obtain the number of fields + * + * @return number of fields + **/ + virtual uint32_t getNumFields() const = 0; + + /** + * Obtain a field by using the field enumeration. The legal range + * for id is [0, getNumFields>. If id is out of bounds, 0 will be + * returned. + * + * @return information about a single field + **/ + virtual const FieldInfo *getField(uint32_t id) const = 0; + + /** + * Obtain a field by using the field name. If the field is not + * found, 0 will be returned. + * + * @return information about a single field + **/ + virtual const FieldInfo *getFieldByName(const string &name) const = 0; + + /** + * Obtain the table manager associated with this index environment. + * + * @return table manager + **/ + virtual const ITableManager &getTableManager() const = 0; + + /** + * Obtain the current motivation behind feature setup. The + * motivation is typically that we want to set up features for + * ranking or dumping. In some cases we are also setting things up + * just to verify that it is possible. + * + * @return current feature motivation + **/ + virtual FeatureMotivation getFeatureMotivation() const = 0; + + /** + * Hint about the nature of the feature blueprints we are about to + * configure. This method provides additional information that may + * be useful when interpreting hints about future field and + * attribute access. + * + * @param motivation the motivation behind the feature blueprints + * the framework is about to configure. + **/ + virtual void hintFeatureMotivation(FeatureMotivation motivation) const = 0; + + /** + * Hint about the future access of a field. This method may be + * used by blueprints during setup to hint the enclosing system + * that a feature executor created by it might try to access the + * field iterator for a specific field during execution. + * + * @param fieldId field id + **/ + virtual void hintFieldAccess(uint32_t fieldId) const = 0; + + /** + * Hint about the future access of an attribute. This method may + * be used by blueprints during setup to hint the enclosing system + * that a feature executor created by it might try to access a + * specific attribute during execution. + * + * @param name attribute name + **/ + virtual void hintAttributeAccess(const string &name) const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IIndexEnvironment() {} +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp new file mode 100644 index 00000000000..14d92e3a909 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp @@ -0,0 +1,373 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "indexproperties.h" +#include "properties.h" +#include + +namespace search { +namespace fef { +namespace indexproperties { + +namespace { + +vespalib::string +lookupString(const Properties &props, const vespalib::string &name, + const vespalib::string &defaultValue) +{ + Property p = props.lookup(name); + if (p.found()) { + return p.get(); + } + return defaultValue; +} + +std::vector +lookupStringVector(const Properties &props, const vespalib::string &name, + const std::vector &defaultValue) +{ + Property p = props.lookup(name); + if (p.found()) { + std::vector retval; + for (uint32_t i = 0; i < p.size(); ++i) { + retval.push_back(p.getAt(i)); + } + return retval; + } + return defaultValue; +} + +double +lookupDouble(const Properties &props, const vespalib::string &name, double defaultValue) +{ + Property p = props.lookup(name); + if (p.found()) { + return strtod(p.get().c_str(), NULL); + } + return defaultValue; +} + +uint32_t +lookupUint32(const Properties &props, const vespalib::string &name, uint32_t defaultValue) +{ + Property p = props.lookup(name); + if (p.found()) { + return atoi(p.get().c_str()); + } + return defaultValue; +} + +bool +lookupBool(const Properties &props, const vespalib::string &name, bool defaultValue) +{ + Property p = props.lookup(name); + if (p.found()) { + return (p.get() == "true"); + } + return defaultValue; +} + +bool +checkIfTrue(const Properties &props, const vespalib::string &name, + const vespalib::string &defaultValue) +{ + return (props.lookup(name).get(defaultValue) == "true"); +} + +} + +namespace rank { + +const vespalib::string FirstPhase::NAME("vespa.rank.firstphase"); +const vespalib::string FirstPhase::DEFAULT_VALUE("nativeRank"); + +vespalib::string +FirstPhase::lookup(const Properties &props) +{ + return lookupString(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string SecondPhase::NAME("vespa.rank.secondphase"); +const vespalib::string SecondPhase::DEFAULT_VALUE(""); + +vespalib::string +SecondPhase::lookup(const Properties &props) +{ + return lookupString(props, NAME, DEFAULT_VALUE); +} + +} // namespace rank + +namespace summary { + +const vespalib::string Feature::NAME("vespa.summary.feature"); +const std::vector Feature::DEFAULT_VALUE; + +std::vector +Feature::lookup(const Properties &props) +{ + return lookupStringVector(props, NAME, DEFAULT_VALUE); +} + +} // namespace summary + +namespace dump { + +const vespalib::string Feature::NAME("vespa.dump.feature"); +const std::vector Feature::DEFAULT_VALUE; + +std::vector +Feature::lookup(const Properties &props) +{ + return lookupStringVector(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string IgnoreDefaultFeatures::NAME("vespa.dump.ignoredefaultfeatures"); +const vespalib::string IgnoreDefaultFeatures::DEFAULT_VALUE("false"); + +bool +IgnoreDefaultFeatures::check(const Properties &props) +{ + return checkIfTrue(props, NAME, DEFAULT_VALUE); +} + +} // namespace dump + +namespace matching { + +const vespalib::string TermwiseLimit::NAME("vespa.matching.termwise_limit"); +const double TermwiseLimit::DEFAULT_VALUE(1.0); + +double +TermwiseLimit::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string NumThreadsPerSearch::NAME("vespa.matching.numthreadspersearch"); +const uint32_t NumThreadsPerSearch::DEFAULT_VALUE(std::numeric_limits::max()); + +uint32_t +NumThreadsPerSearch::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string NumSearchPartitions::NAME("vespa.matching.numsearchpartitions"); +const uint32_t NumSearchPartitions::DEFAULT_VALUE(1); + +uint32_t +NumSearchPartitions::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} +} // namespace matching + +namespace matchphase { + +const vespalib::string DegradationAttribute::NAME("vespa.matchphase.degradation.attribute"); +const vespalib::string DegradationAttribute::DEFAULT_VALUE(""); + +const vespalib::string DegradationAscendingOrder::NAME("vespa.matchphase.degradation.ascendingorder"); +const bool DegradationAscendingOrder::DEFAULT_VALUE(false); + +const vespalib::string DegradationMaxHits::NAME("vespa.matchphase.degradation.maxhits"); +const uint32_t DegradationMaxHits::DEFAULT_VALUE(0); + +const vespalib::string DegradationSamplePercentage::NAME("vespa.matchphase.degradation.samplepercentage"); +const double DegradationSamplePercentage::DEFAULT_VALUE(0.2); + +const vespalib::string DegradationMaxFilterCoverage::NAME("vespa.matchphase.degradation.maxfiltercoverage"); +const double DegradationMaxFilterCoverage::DEFAULT_VALUE(1.0); + +const vespalib::string DegradationPostFilterMultiplier::NAME("vespa.matchphase.degradation.postfiltermultiplier"); +const double DegradationPostFilterMultiplier::DEFAULT_VALUE(1.0); + +const vespalib::string DiversityAttribute::NAME("vespa.matchphase.diversity.attribute"); +const vespalib::string DiversityAttribute::DEFAULT_VALUE(""); + +const vespalib::string DiversityMinGroups::NAME("vespa.matchphase.diversity.mingroups"); +const uint32_t DiversityMinGroups::DEFAULT_VALUE(1); + +const vespalib::string DiversityCutoffFactor::NAME("vespa.matchphase.diversity.cutoff.factor"); +const double DiversityCutoffFactor::DEFAULT_VALUE(10.0); + +const vespalib::string DiversityCutoffStrategy::NAME("vespa.matchphase.diversity.cutoff.strategy"); +const vespalib::string DiversityCutoffStrategy::DEFAULT_VALUE("loose"); + +vespalib::string +DegradationAttribute::lookup(const Properties &props) +{ + return lookupString(props, NAME, DEFAULT_VALUE); +} + +bool +DegradationAscendingOrder::lookup(const Properties &props) +{ + return lookupBool(props, NAME, DEFAULT_VALUE); +} + +uint32_t +DegradationMaxHits::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +double +DegradationSamplePercentage::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +double +DegradationMaxFilterCoverage::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +double +DegradationPostFilterMultiplier::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +vespalib::string +DiversityAttribute::lookup(const Properties &props) +{ + return lookupString(props, NAME, DEFAULT_VALUE); +} + +uint32_t +DiversityMinGroups::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +double +DiversityCutoffFactor::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +vespalib::string +DiversityCutoffStrategy::lookup(const Properties &props) +{ + return lookupString(props, NAME, DEFAULT_VALUE); +} + + +} + +namespace hitcollector { + +const vespalib::string HeapSize::NAME("vespa.hitcollector.heapsize"); +const uint32_t HeapSize::DEFAULT_VALUE(100); + +uint32_t +HeapSize::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string ArraySize::NAME("vespa.hitcollector.arraysize"); +const uint32_t ArraySize::DEFAULT_VALUE(10000); + +uint32_t +ArraySize::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string EstimatePoint::NAME("vespa.hitcollector.estimatepoint"); +const uint32_t EstimatePoint::DEFAULT_VALUE(0xffffffff); + +uint32_t +EstimatePoint::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string EstimateLimit::NAME("vespa.hitcollector.estimatelimit"); +const uint32_t EstimateLimit::DEFAULT_VALUE(0xffffffff); + +uint32_t +EstimateLimit::lookup(const Properties &props) +{ + return lookupUint32(props, NAME, DEFAULT_VALUE); +} + +const vespalib::string RankScoreDropLimit::NAME("vespa.hitcollector.rankscoredroplimit"); +const feature_t RankScoreDropLimit::DEFAULT_VALUE(-std::numeric_limits::quiet_NaN()); + +feature_t +RankScoreDropLimit::lookup(const Properties &props) +{ + return lookupDouble(props, NAME, DEFAULT_VALUE); +} + +} // namspace hitcollector + + +const vespalib::string FieldWeight::BASE_NAME("vespa.fieldweight."); +const uint32_t FieldWeight::DEFAULT_VALUE(100); + +uint32_t +FieldWeight::lookup(const Properties &props, const vespalib::string &fieldName) +{ + return lookupUint32(props, BASE_NAME + fieldName, DEFAULT_VALUE); +} + + +const vespalib::string IsFilterField::BASE_NAME("vespa.isfilterfield."); +const vespalib::string IsFilterField::DEFAULT_VALUE("false"); + +void +IsFilterField::set(Properties &props, const vespalib::string &fieldName) +{ + props.add(BASE_NAME + fieldName, "true"); +} + +bool +IsFilterField::check(const Properties &props, const vespalib::string &fieldName) +{ + return checkIfTrue(props, BASE_NAME + fieldName, DEFAULT_VALUE); +} + + +namespace type { + +const vespalib::string Attribute::BASE_NAME("vespa.type.attribute."); +const vespalib::string Attribute::DEFAULT_VALUE(""); + +vespalib::string +Attribute::lookup(const Properties &props, const vespalib::string &attributeName) +{ + return lookupString(props, BASE_NAME + attributeName, DEFAULT_VALUE); +} + +void +Attribute::set(Properties &props, const vespalib::string &attributeName, const vespalib::string &type) +{ + props.add(BASE_NAME + attributeName, type); +} + +const vespalib::string QueryFeature::BASE_NAME("vespa.type.query."); +const vespalib::string QueryFeature::DEFAULT_VALUE(""); + +vespalib::string +QueryFeature::lookup(const Properties &props, const vespalib::string &queryFeatureName) +{ + return lookupString(props, BASE_NAME + queryFeatureName, DEFAULT_VALUE); +} + +void +QueryFeature::set(Properties &props, const vespalib::string &queryFeatureName, const vespalib::string &type) +{ + props.add(BASE_NAME + queryFeatureName, type); +} + +} // namespace type + +} // namespace indexproperties +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h new file mode 100644 index 00000000000..8dcd08dfc49 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h @@ -0,0 +1,307 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace fef { + +class Properties; + +/** + * This namespace is a placeholder for several structs, each representing + * an index property with name and default value. All property names + * defined here will have the prefix "vespa." and are known by the + * feature execution framework. When accessing an index property from a @ref Properties + * instance one should use the property names defined here to perform the lookup. + * If the property is not present the default value is used. + **/ +namespace indexproperties { + +namespace rank { + + /** + * Property for the feature name used for first phase rank. + **/ + struct FirstPhase { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props); + }; + + /** + * Property for the feature name used for second phase rank. + **/ + struct SecondPhase { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props); + }; + +} // namespace rank + +namespace summary { + + /** + * Property for the set of features to be inserted into the + * summaryfeatures docsum field + **/ + struct Feature { + static const vespalib::string NAME; + static const std::vector DEFAULT_VALUE; + static std::vector lookup(const Properties &props); + }; + +} // namespace summary + +namespace dump { + + /** + * Property for the set of feature names used for dumping. + **/ + struct Feature { + static const vespalib::string NAME; + static const std::vector DEFAULT_VALUE; + static std::vector lookup(const Properties &props); + }; + + /** + * Property that may be used to ignore default rank features when + * dumping. + **/ + struct IgnoreDefaultFeatures { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static bool check(const Properties &props); + }; + +} // namespace dump + +namespace matching { + + /** + * A number in the range [0,1] indicating how much of the corpus + * the query must match for termwise evaluation to be enabled. 1 + * means never allowed. 0 means always allowed. The default value + * is 1 (never). + **/ + struct TermwiseLimit { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + }; + + /** + * Property for the number of threads used per search. + **/ + struct NumThreadsPerSearch { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + /** + * Property for the number of threads used per search. + **/ + struct NumSearchPartitions { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; +} + +namespace matchphase { + + /** + * Property for the attribute used for graceful degradation during match phase. + **/ + struct DegradationAttribute { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props); + }; + + /** + * Property for the order used for graceful degradation during match phase. + **/ + struct DegradationAscendingOrder { + static const vespalib::string NAME; + static const bool DEFAULT_VALUE; + static bool lookup(const Properties &props); + }; + + /** + * Property for how many hits the used wanted for graceful degradation during match phase. + **/ + struct DegradationMaxHits { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + /** + * Property for how many hits out of wanted hits to collect before considering graceful degradation during match phase. + **/ + struct DegradationSamplePercentage { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + }; + + struct DegradationMaxFilterCoverage { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + }; + + /** + * Property for moving the swithpoint between pre and post filtering. + * > 1 favors pre filtering, less favour post filtering + **/ + struct DegradationPostFilterMultiplier { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + }; + + /** + * The name of the attribute used to ensure result diversity + * during match phase limiting. If this property is "" (empty + * string; the default) diversity will be disabled. + **/ + struct DiversityAttribute { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props); + }; + + /** + * If we were to later group on the diversity attribute, try not + * to end up with fewer groups than this number. If this property + * is 1 (the default) diversity will be disabled. + **/ + struct DiversityMinGroups { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + struct DiversityCutoffFactor { + static const vespalib::string NAME; + static const double DEFAULT_VALUE; + static double lookup(const Properties &props); + }; + struct DiversityCutoffStrategy { + static const vespalib::string NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props); + }; + +} // namespace matchphase + + +namespace hitcollector { + + /** + * Property for the heap size used in the hit collector. + **/ + struct HeapSize { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + /** + * Property for the array size used in the hit collector. + **/ + struct ArraySize { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + /** + * Property for the estimate point used in parallel query evaluation. + * Specifies when to estimate the total number of hits. + **/ + struct EstimatePoint { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + /** + * Property for the estimate limit used in parallel query evaluation. + * Specifies the limit for a hit estimate. If the estimate is above the limit abort ranking. + **/ + struct EstimateLimit { + static const vespalib::string NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props); + }; + + /** + * Property for the rank score drop limit used in parallel query evaluation. + * Drop a hit if the rank score <= drop limit. + **/ + struct RankScoreDropLimit { + static const vespalib::string NAME; + static const feature_t DEFAULT_VALUE; + static feature_t lookup(const Properties &props); + }; + + +} // namespace hitcollector + +/** + * Property for the field weight of a field. + **/ +struct FieldWeight { + static const vespalib::string BASE_NAME; + static const uint32_t DEFAULT_VALUE; + static uint32_t lookup(const Properties &props, const vespalib::string &fieldName); +}; + +/** + * Property for whether a field is a filter field. + **/ +struct IsFilterField { + static const vespalib::string BASE_NAME; + static const vespalib::string DEFAULT_VALUE; + static void set(Properties &props, const vespalib::string &fieldName); + static bool check(const Properties &props, const vespalib::string &fieldName); +}; + +namespace type { + +/** + * Property for the type of an attribute. + * Currently, only tensor types are specified using this. + */ +struct Attribute { + static const vespalib::string BASE_NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props, const vespalib::string &attributeName); + static void set(Properties &props, const vespalib::string &attributeName, const vespalib::string &type); +}; + +/** + * Property for the type of a query feature. + * Currently, only tensor types are specified using this. + */ +struct QueryFeature { + static const vespalib::string BASE_NAME; + static const vespalib::string DEFAULT_VALUE; + static vespalib::string lookup(const Properties &props, const vespalib::string &queryFeatureName); + static void set(Properties &props, const vespalib::string &queryFeatureName, const vespalib::string &type); +}; + +} // namespace type + + +} // namespace indexproperties +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h new file mode 100644 index 00000000000..b84782995d2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "iindexenvironment.h" +#include +#include + +namespace search { +namespace fef { + +class Location; +class Properties; +class ITermData; + +/** + * Abstract view of query related information available to the + * framework. + **/ +class IQueryEnvironment +{ +public: + /** + * Convenience typedef. + **/ + typedef std::shared_ptr SP; + + /** + * Obtain the set of properties associated with this query + * environment. This set of properties is known through the system + * as 'rankProperties', and is tagged with the name 'rank' when + * propagated down through the system. + * + * @return properties + **/ + virtual const Properties &getProperties() const = 0; + + /** + * Obtain the number of ranked terms in the query. The order of the + * terms are not yet strongly defined. + * + * @return number of ranked terms in the query + **/ + virtual uint32_t getNumTerms() const = 0; + + /** + * Obtain information about a single ranked term in the query. If + * idx is out of bounds, 0 will be returned. + * + * TODO: this must return an ordering that corresponds to the connexity of the term data. + * TODO: any other ordering seems inappropriate when we offer connexity as an attribute of + * TODO: the term data. + * + * @return information about a ranked term + * @param idx the term we want information about + **/ + virtual const ITermData *getTerm(uint32_t idx) const = 0; + + /** + * Obtain the location information associated with this query environment. + * + * @return location object. + **/ + virtual const Location & getLocation() const = 0; + + /** + * Returns the attribute context for this query. + * + * @return attribute context + **/ + virtual const search::attribute::IAttributeContext & getAttributeContext() const = 0; + + /** + * Returns a const view of the index environment. + * + * @return index environment + **/ + virtual const IIndexEnvironment & getIndexEnvironment() const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IQueryEnvironment() { } + + IObjectStore & getObjectStore() { return _objectStore; } + const IObjectStore & getObjectStore() const { return _objectStore; } +protected: + IQueryEnvironment() { } +private: + ObjectStore _objectStore; +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/itablefactory.h b/searchlib/src/vespa/searchlib/fef/itablefactory.h new file mode 100644 index 00000000000..b2c1146d7d9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/itablefactory.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "table.h" + +namespace search { +namespace fef { + +/** + * This is an interface for a factory used to create tables. + **/ +class ITableFactory +{ +public: + /** + * Convenience typedef for a shared pointer to this class. + **/ + typedef std::shared_ptr SP; + + /** + * Creates a table with the given name. + * Table::SP(NULL) is returned if the table cannot be created. + **/ + virtual Table::SP createTable(const vespalib::string & name) const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~ITableFactory() {} +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/itablemanager.h b/searchlib/src/vespa/searchlib/fef/itablemanager.h new file mode 100644 index 00000000000..e84c0d50db4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/itablemanager.h @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "table.h" + +namespace search { +namespace fef { + +/** + * This is an interface used to access registered tables. + **/ +class ITableManager +{ +public: + /** + * Returns a const view of the table with the given name or NULL if not found. + **/ + virtual const Table * getTable(const vespalib::string & name) const = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~ITableManager() {} +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/itermdata.h b/searchlib/src/vespa/searchlib/fef/itermdata.h new file mode 100644 index 00000000000..f8e1cf9c0c8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/itermdata.h @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "handle.h" +#include "itermfielddata.h" +#include + +namespace search { +namespace fef { + +/** + * Interface to static match data for a single unit (term/phrase/etc). + **/ +class ITermData +{ +protected: + virtual ~ITermData() {} + +public: + /** + * Returns the term weight. + **/ + virtual query::Weight getWeight() const = 0; + + /** + * Returns the number of terms represented by this term data object. + **/ + virtual uint32_t getPhraseLength() const = 0; + + /** + * Obtain the location of this term in the original user query. + * + * @return term index + **/ + virtual uint32_t getTermIndex() const = 0; + + /** + * Obtain the unique id of this term. 0 means not set. + * + * @return unique id or 0 + **/ + virtual uint32_t getUniqueId() const = 0; + + /** + * Get number of fields searched + **/ + virtual size_t numFields() const = 0; + + /** + * Direct access to data for individual fields + * @param i local index, must have: 0 <= i < numFields() + */ + virtual const ITermFieldData &field(size_t i) const = 0; + + /** + * Obtain information about a specific field that may be searched + * by this term. If the requested field is not searched by this + * term, NULL will be returned. + * + * @param fieldId global field ID + * @return term field data, or NULL if not found + **/ + virtual const ITermFieldData *lookupField(uint32_t fieldId) const = 0; +}; + +/** + * convenience adapter for easy iteration + **/ +class ITermFieldRangeAdapter +{ + const ITermData& _ref; + size_t _idx; + size_t _lim; +public: + explicit ITermFieldRangeAdapter(const ITermData& ref) + : _ref(ref), _idx(0), _lim(ref.numFields()) + {} + + bool valid() const { return (_idx < _lim); } + + const ITermFieldData& get() const { return _ref.field(_idx); } + + void next() { assert(valid()); ++_idx; } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/itermfielddata.h b/searchlib/src/vespa/searchlib/fef/itermfielddata.h new file mode 100644 index 00000000000..f86bba1af4a --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/itermfielddata.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "handle.h" + +namespace search { +namespace fef { + +/** + * Interface to information about a single field that is being + * searched for a term (described by the ITermData interface). The + * field may be either an index field or an attribute field. If more + * information about the field is needed, the field id may be used to + * consult the index environment. + **/ +class ITermFieldData +{ +protected: + virtual ~ITermFieldData() {} + +public: + /** + * Obtain the global field id. + * + * @return field id + **/ + virtual uint32_t getFieldId() const = 0; + + /** + * Obtain the document frequency. This is a value between 0 and 1 + * indicating the ratio of the matching documents to the corpus. + * + * @return document frequency + **/ + virtual double getDocFreq() const = 0; + + /** + * Obtain the match handle for this field. + * + * @return match handle (or IllegalHandle) + **/ + virtual TermFieldHandle getHandle() const = 0; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/location.cpp b/searchlib/src/vespa/searchlib/fef/location.cpp new file mode 100644 index 00000000000..9bed7305a1e --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/location.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "location.h" + +namespace search { +namespace fef { + +Location::Location() : + _attr(), + _xPos(0), + _yPos(0), + _xAspect(0), + _valid(false) +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/location.h b/searchlib/src/vespa/searchlib/fef/location.h new file mode 100644 index 00000000000..cfb66016cd5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/location.h @@ -0,0 +1,111 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +/** + * This class contains location data that is associated with a query. + **/ +class Location +{ +private: + vespalib::string _attr; + int32_t _xPos; + int32_t _yPos; + uint32_t _xAspect; + bool _valid; + +public: + /** + * Creates an empty object. + **/ + Location(); + + /** + * Sets the name of the attribute to use for x positions. + * + * @param xAttr the attribute name. + * @return this to allow chaining. + **/ + Location & + setAttribute(const vespalib::string & attr) + { + _attr = attr; + return *this; + } + + /** + * Returns the name of the attribute to use for x positions. + * + * @return the attribute name. + **/ + const vespalib::string & getAttribute() const { return _attr; } + + /** + * Sets the x position of this location. + * + * @param xPos the x position. + * @return this to allow chaining. + **/ + Location & setXPosition(int32_t xPos) { _xPos = xPos; return *this; } + + /** + * Returns the x position of this location. + * + * @return the x position. + **/ + int32_t getXPosition() const { return _xPos; } + + /** + * Sets the y position of this location. + * + * @param yPos the y position. + * @return this to allow chaining. + **/ + Location & setYPosition(int32_t yPos) { _yPos = yPos; return *this; } + + /** + * Returns the y position of this location. + * + * @return the y position. + **/ + int32_t getYPosition() const { return _yPos; } + + /** + * Sets the x distance multiplier fraction. + * + * @param xAspect the x aspect. + * @return this to allow chaining. + **/ + Location & setXAspect(uint32_t xAspect) { _xAspect = xAspect; return *this; } + + /** + * Returns the x distance multiplier fraction. + * + * @return the x aspect. + **/ + uint32_t getXAspect() const { return _xAspect; } + + /** + * Sets whether this is a valid location object. + * + * @param valid true if this is valid. + * @return this to allow chaining. + **/ + Location & setValid(bool valid) { _valid = valid; return *this; } + + /** + * Returns whether this is a valid location object. + * + * @param true if this is a valid. + **/ + bool isValid() const { return _valid; } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.cpp b/searchlib/src/vespa/searchlib/fef/matchdata.cpp new file mode 100644 index 00000000000..4dc411dce72 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/matchdata.cpp @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "matchdata.h" +#include + +namespace search { +namespace fef { + +MatchData::MatchData(const Params &cparams) + : _docid(TermFieldMatchData::invalidId()), + _termFields(cparams.numTermFields()), + _features(cparams.numFeatures()), + _feature_is_object(cparams.numFeatures(), false), + _termwise_limit(1.0) +{ +} + +MatchData::UP +MatchData::makeTestInstance(uint32_t numFeatures, uint32_t numHandles, uint32_t fieldIdLimit) +{ + MatchData::UP data(new MatchData(Params().numFeatures(numFeatures).numTermFields(numHandles))); + for (uint32_t i = 0; i < numHandles; ++i) { + data->resolveTermField(i)->setFieldId(i % fieldIdLimit); + } + return data; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.h b/searchlib/src/vespa/searchlib/fef/matchdata.h new file mode 100644 index 00000000000..1f836eddfdc --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/matchdata.h @@ -0,0 +1,181 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "handle.h" +#include "termfieldmatchdata.h" +#include +#include +#include +#include + +namespace search { +namespace fef { + +/** + * An object of this class is used to store all basic data and derived + * features for a single hit. + **/ +class MatchData +{ +private: + union NumberOrObject { + feature_t as_number; + vespalib::eval::Value::CREF as_object; + NumberOrObject() { memset(this, 0, sizeof(NumberOrObject)); } + ~NumberOrObject() {} + }; + uint32_t _docid; + std::vector _termFields; + std::vector _features; + std::vector _feature_is_object; + double _termwise_limit; + +public: + /** + * Wrapper for constructor parameters + **/ + class Params + { + private: + uint32_t _numTermFields; + uint32_t _numFeatures; + + friend class ::search::fef::MatchData; + Params() : _numTermFields(0), _numFeatures(0) {} + public: + uint32_t numTermFields() const { return _numTermFields; } + Params & numTermFields(uint32_t value) { + _numTermFields = value; + return *this; + } + + uint32_t numFeatures() const { return _numFeatures; } + Params & numFeatures(uint32_t value) { + _numFeatures = value; + return *this; + } + }; + /** + * Avoid C++'s most vexing parse problem. + * (reference: http://www.amazon.com/dp/0201749629/) + **/ + static Params params() { return Params(); } + + /** + * Convenience typedef for an auto-pointer to this class. + **/ + typedef std::unique_ptr UP; + + /** + * Create a new object with the given number of term, attribute, and feature + * slots. + * + * @param numTerms number of term slots + * @param numAttributes number of attribute slots + * @param numFeatures number of feature slots + **/ + explicit MatchData(const Params &cparams); + + MatchData(const MatchData &rhs) = delete; + MatchData & operator=(const MatchData &rhs) = delete; + + /** + * A number in the range [0,1] indicating how much of the corpus + * the query must match for termwise evaluation to be enabled. 1 + * means never allowed. 0 means always allowed. The initial value + * is 1 (never). This value is used when creating a search + * (queryeval::Blueprint::createSearch). + **/ + double get_termwise_limit() const { return _termwise_limit; } + void set_termwise_limit(double value) { _termwise_limit = value; } + + /** + * Set the document id for this match object. This method is + * invoked by the parallel query evaluation driver code during + * term data unpacking. + * + * @param docid docid for this match data + **/ + void setDocId(uint32_t docid) { _docid = docid; } + + /** + * Obtain the document id for this match data. This may be used to + * check if we have term match data for the document we are + * processing or not. Also, it will be used when merging hits from + * the heap back into the full result set. + * + * @return document id for this match data + **/ + uint32_t getDocId() const { return _docid; } + + /** + * Obtain the number of term fields allocated in this match data + * structure. + * + * @return number of term fields allocated + **/ + uint32_t getNumTermFields() const { return _termFields.size(); } + + /** + * Obtain the number of features allocated in this match data + * structure. + * + * @return number of features allocated + **/ + uint32_t getNumFeatures() const { return _features.size(); } + + /** + * Resolve a term field handle into a pointer to the actual data. + * + * @return term field match data + * @param handle term field handle + **/ + TermFieldMatchData *resolveTermField(TermFieldHandle handle) { return &_termFields[handle]; } + + /** + * Resolve a term field handle into a pointer to the actual data. + * + * @return term field match data + * @param handle term field handle + **/ + const TermFieldMatchData *resolveTermField(TermFieldHandle handle) const { return &_termFields[handle]; } + + /** + * Resolve a feature handle into a pointer to the actual data. + * This is used to resolve both {@link FeatureExecutor#inputs} + * and {@link FeatureExecutor#outputs}. + * + * @return feature location + * @param handle feature handle + **/ + feature_t *resolveFeature(FeatureHandle handle) { return &_features[handle].as_number; } + + /** + * Resolve a feature handle into a pointer to the actual data. + * This is used to resolve both {@link FeatureExecutor#inputs} + * and {@link FeatureExecutor#outputs}. + * + * @return feature location + * @param handle feature handle + **/ + const feature_t *resolveFeature(FeatureHandle handle) const { return &_features[handle].as_number; } + + void tag_feature_as_object(FeatureHandle handle) { _feature_is_object[handle] = true; } + bool feature_is_object(FeatureHandle handle) const { return _feature_is_object[handle]; } + + vespalib::eval::Value::CREF *resolve_object_feature(FeatureHandle handle) { + assert(_feature_is_object[handle]); + return &_features[handle].as_object; + } + + const vespalib::eval::Value::CREF *resolve_object_feature(FeatureHandle handle) const { + assert(_feature_is_object[handle]); + return &_features[handle].as_object; + } + + static MatchData::UP makeTestInstance(uint32_t numFeatures, uint32_t numHandles, uint32_t fieldIdLimit); +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp b/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp new file mode 100644 index 00000000000..64070006b59 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/matchdatalayout.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "matchdatalayout.h" + +namespace search { +namespace fef { + +MatchDataLayout::MatchDataLayout() + : _numTermFields(0), + _numFeatures(0), + _fieldIds(), + _object_features() +{ +} + +MatchData::UP +MatchDataLayout::createMatchData() const +{ + MatchData::UP md(new MatchData(MatchData::params() + .numTermFields(_numTermFields) + .numFeatures(_numFeatures))); + + assert(_numTermFields == _fieldIds.size()); + for (size_t i = 0; i < _numTermFields; ++i) { + md->resolveTermField(i)->setFieldId(_fieldIds[i]); + } + for (FeatureHandle object_handle: _object_features) { + md->tag_feature_as_object(object_handle); + } + return md; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/matchdatalayout.h b/searchlib/src/vespa/searchlib/fef/matchdatalayout.h new file mode 100644 index 00000000000..5b8240d3caa --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/matchdatalayout.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "handle.h" +#include "matchdata.h" + +namespace search { +namespace fef { + +/** + * This class is used to describe the layout of term match data and + * features within MatchData objects for a single query. + **/ +class MatchDataLayout +{ +private: + uint32_t _numTermFields; + uint32_t _numFeatures; + std::vector _fieldIds; + std::vector _object_features; + +public: + /** + * Create an empty object. + **/ + MatchDataLayout(); + + /** + * Allocate space for a term field match data structure. + * + * @param fieldId the field ID the space will be used for + * @return handle to be used with match data objects + **/ + TermFieldHandle allocTermField(uint32_t fieldId) { + _fieldIds.push_back(fieldId); + return _numTermFields++; + } + + /** + * Allocate space for a feature. + * + * @return handle to be used with match data objects + **/ + FeatureHandle allocFeature(bool is_object = false) { + if (is_object) { + _object_features.push_back(_numFeatures); + } + return _numFeatures++; + } + + /** + * Create a match data object with the layout described by this + * object. Note that this method should only be invoked after all + * terms and features have been allocated. + * + * @return auto-pointer to a match data object + **/ + MatchData::UP createMatchData() const; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.cpp b/searchlib/src/vespa/searchlib/fef/objectstore.cpp new file mode 100644 index 00000000000..9e7aa5d9b81 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/objectstore.cpp @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +namespace search { +namespace fef { + +ObjectStore::ObjectStore() : + _objectMap() +{ +} + +ObjectStore::~ObjectStore() +{ + for(auto & it : _objectMap) { + delete it.second; + it.second = NULL; + } +} + +void +ObjectStore::add(const vespalib::string & key, Anything::UP value) +{ + ObjectMap::iterator found = _objectMap.find(key); + if (found != _objectMap.end()) { + delete found->second; + found->second = NULL; + } + _objectMap[key] = value.release(); +} + +const Anything * +ObjectStore::get(const vespalib::string & key) const +{ + ObjectMap::const_iterator found = _objectMap.find(key); + return (found != _objectMap.end()) ? found->second : NULL; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/fef/objectstore.h b/searchlib/src/vespa/searchlib/fef/objectstore.h new file mode 100644 index 00000000000..457371c4ebf --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/objectstore.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace fef { + +class Anything +{ +public: + typedef std::unique_ptr UP; + virtual ~Anything() { } +}; + +class IObjectStore +{ +public: + virtual ~IObjectStore() { } + virtual void add(const vespalib::string & key, Anything::UP value) = 0; + virtual const Anything * get(const vespalib::string & key) const = 0; +}; + +class ObjectStore : public IObjectStore +{ +public: + ObjectStore(); + virtual ~ObjectStore(); + virtual void add(const vespalib::string & key, Anything::UP value); + virtual const Anything * get(const vespalib::string & key) const; +private: + typedef vespalib::hash_map ObjectMap; + ObjectMap _objectMap; +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/fef/parameter.cpp b/searchlib/src/vespa/searchlib/fef/parameter.cpp new file mode 100644 index 00000000000..583061ad383 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parameter.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "parameter.h" + +namespace search { +namespace fef { + +Parameter::Parameter(ParameterType::Enum type, const vespalib::string & value) : + _type(type), + _stringVal(value), + _doubleVal(0), + _intVal(0), + _fieldVal(NULL) +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/parameter.h b/searchlib/src/vespa/searchlib/fef/parameter.h new file mode 100644 index 00000000000..049ea7f76ab --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parameter.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "fieldinfo.h" +#include "parameterdescriptions.h" + +namespace search { +namespace fef { + +/** + * This class represents a parameter with type and value. + * You can use convenience functions to access the parameter value as different types. + */ +class Parameter { +private: + ParameterType::Enum _type; + vespalib::string _stringVal; + double _doubleVal; + int64_t _intVal; + const search::fef::FieldInfo * _fieldVal; + +public: + Parameter(ParameterType::Enum type, const vespalib::string & value); + Parameter & setDouble(double val) { _doubleVal = val; return *this; } + Parameter & setInteger(int64_t val) { _intVal = val; return *this; } + Parameter & setField(const search::fef::FieldInfo * val) { _fieldVal = val; return *this; } + ParameterType::Enum getType() const { return _type; } + const vespalib::string & getValue() const { return _stringVal; } + double asDouble() const { return _doubleVal; } + int64_t asInteger() const { return _intVal; } + const search::fef::FieldInfo * asField() const { return _fieldVal; } +}; + +typedef std::vector ParameterList; + + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp new file mode 100644 index 00000000000..bcaf75450c8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.cpp @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "parameterdescriptions.h" + +namespace search { +namespace fef { + +ParameterDescriptions::Description::Description(size_t tag) : + _tag(tag), + _params(), + _repeat(0) +{ +} + +ParamDescItem +ParameterDescriptions::Description::getParam(size_t i) const +{ + if (i < _params.size()) { + return _params[i]; + } + size_t offset = (i - _params.size()) % _repeat; + size_t realIndex = _params.size() - _repeat + offset; + return _params[realIndex]; +} + +ParameterDescriptions::ParameterDescriptions() : + _descriptions(), + _nextTag(0) +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h new file mode 100644 index 00000000000..df1e1f75f73 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parameterdescriptions.h @@ -0,0 +1,197 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +/** + * Represents the type of a parameter. + */ +struct ParameterType { + enum Enum { + NONE, + FIELD, // for match information in a field + INDEX_FIELD, // for match information in an index field + ATTRIBUTE_FIELD, // for match information in an attribute field + ATTRIBUTE, // for accessing an attribute vector + FEATURE, // a complete rank feature name + NUMBER, + STRING + }; +}; + +/** + * Represents the accepted collection types for a field parameter. + **/ +struct ParameterCollection { + enum Enum { + NONE, + SINGLE, // single value + ARRAY, // unweighted multi-value + WEIGHTEDSET, // weighted multi-value + ANY // any collection type + }; +}; + +/** + * The description of a single parameter within a single + * ParameterDescription object. + **/ +struct ParamDescItem { + ParameterType::Enum type; + ParameterCollection::Enum collection; + ParamDescItem(ParameterType::Enum t, + ParameterCollection::Enum c) + : type(t), collection(c) {} +}; + +/** + * This class represents a set of parameter descriptions that each indicate what are a valid input parameter list for a Blueprint. + * During setup of a Blueprint the descriptions can be used to validate the input parameter + * list for that Blueprint. The parameters are valid if one of the descriptions match the actual parameter list. + */ +class ParameterDescriptions { +public: + /** + * This class represents a single parameter description, consisting of a list of parameter types. + * This list of types must match the actual parameter list. + */ + class Description { + private: + size_t _tag; + std::vector _params; + size_t _repeat; + public: + /** + * Creates a new object with the given tag. + */ + Description(size_t tag); + Description & addParameter(const ParamDescItem ¶m) { + _params.push_back(param); + return *this; + } + + /** + * Sets the repeat number. + * This indicates that the last parameter types can occur 0-n times. + * The repeat should only be set after all parameter types are added. + */ + Description & setRepeat(size_t repeat) { + _repeat = repeat; + return *this; + } + size_t getTag() const { return _tag; } + const std::vector & getParams() const { return _params; } + /** + * Returns the parameter type with the given index. + * If this description has repeat the index can be out of bounds (the correct repeat parameter will be returned). + */ + ParamDescItem getParam(size_t i) const; + bool hasRepeat() const { return _repeat != 0; } + size_t getRepeat() const { return _repeat; } + }; + typedef std::vector DescriptionVector; + +private: + DescriptionVector _descriptions; + size_t _nextTag; + + Description & getCurrent() { return _descriptions.back(); } + void addParameter(const ParamDescItem ¶m) { + assert(!_descriptions.empty()); + assert(!getCurrent().hasRepeat()); + getCurrent().addParameter(param); + } + void addParameter(ParameterType::Enum type, ParameterCollection::Enum collection) { + addParameter(ParamDescItem(type, collection)); + } + void addParameter(ParameterType::Enum type) { + addParameter(type, ParameterCollection::ANY); + } + +public: + /** + * Creates a new object with no descriptions. + */ + ParameterDescriptions(); + const DescriptionVector & getDescriptions() const { return _descriptions; } + ParameterDescriptions & desc() { + _descriptions.push_back(Description(_nextTag++)); + return *this; + } + /** + * Starts a new description with the given tag. + */ + ParameterDescriptions & desc(size_t tag) { + _descriptions.push_back(Description(tag)); + _nextTag = tag + 1; + return *this; + } + /** + * Adds a field parameter to the current description. + */ + ParameterDescriptions & field() { + addParameter(ParameterType::FIELD); + return *this; + } + /** + * Adds an index field parameter to the current description. + */ + ParameterDescriptions & indexField(ParameterCollection::Enum collection) { + addParameter(ParameterType::INDEX_FIELD, collection); + return *this; + } + /** + * Adds an attribute field parameter to the current description. + */ + ParameterDescriptions & attributeField(ParameterCollection::Enum collection) { + addParameter(ParameterType::ATTRIBUTE_FIELD, collection); + return *this; + } + /** + * Adds an attribute parameter to the current description. + */ + ParameterDescriptions & attribute(ParameterCollection::Enum collection) { + addParameter(ParameterType::ATTRIBUTE, collection); + return *this; + } + /** + * Adds a feature parameter to the current description. + */ + ParameterDescriptions & feature() { + addParameter(ParameterType::FEATURE); + return *this; + } + /** + * Adds a number parameter to the current description. + */ + ParameterDescriptions & number() { + addParameter(ParameterType::NUMBER); + return *this; + } + /** + * Adds a string parameter to the current description. + */ + ParameterDescriptions & string() { + addParameter(ParameterType::STRING); + return *this; + } + /** + * Sets the repeat number on the current description. + */ + ParameterDescriptions & repeat(size_t n = 1) { + assert(!_descriptions.empty()); + assert(getCurrent().getParams().size() >= n); + getCurrent().setRepeat(n); + return *this; + } +}; + + + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp b/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp new file mode 100644 index 00000000000..203de1ba8f0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parametervalidator.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include "fieldinfo.h" +#include "fieldtype.h" +#include "parametervalidator.h" + +using vespalib::make_vespa_string; + +namespace search { +namespace fef { + +namespace { + +bool checkCollectionType(ParameterCollection::Enum accept, CollectionType actual) { + switch (accept) { + case ParameterCollection::NONE: return false; + case ParameterCollection::SINGLE: return (actual == CollectionType::SINGLE); + case ParameterCollection::ARRAY: return (actual == CollectionType::ARRAY); + case ParameterCollection::WEIGHTEDSET: return (actual == CollectionType::WEIGHTEDSET); + case ParameterCollection::ANY: return true; + } + return false; +} + +class ValidateException +{ +public: + ValidateException(const vespalib::string & message) : _message(message) { } + const vespalib::string & getMessage() const { return _message; } +private: + vespalib::string _message; +}; + +} // namespace search::fef:: + +ParameterValidator::Result::Result(size_t tag) : + _params(), + _tag(tag), + _errorStr(), + _valid(true) +{ +} + +void +ParameterValidator::validateField(ParameterType::Enum type, ParameterCollection::Enum collection, + size_t i, Result & result) +{ + const FieldInfo * field = _indexEnv.getFieldByName(_params[i]); + if (field == NULL) { + throw ValidateException(make_vespa_string("Param[%zu]: Field '%s' was not found in the index environment", + i, _params[i].c_str())); + } + if (type == ParameterType::INDEX_FIELD) { + if (field->type() != FieldType::INDEX) { + throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to be an index field, but it was not", + i, _params[i].c_str())); + } + } else if (type == ParameterType::ATTRIBUTE_FIELD) { + if (field->type() != FieldType::ATTRIBUTE) { + throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to be an attribute field, but it was not", + i, _params[i].c_str())); + } + } else if (type == ParameterType::ATTRIBUTE) { + if (!field->hasAttribute()) { + throw ValidateException(make_vespa_string("Param[%zu]: Expected field '%s' to support attribute lookup, but it does not", + i, _params[i].c_str())); + } + } + if (!checkCollectionType(collection, field->collection())) { + throw ValidateException(make_vespa_string("Param[%zu]: field '%s' has inappropriate collection type", + i, _params[i].c_str())); + } + result.addParameter(Parameter(type, _params[i]).setField(field)); +} + +void +ParameterValidator::validateNumber(ParameterType::Enum type, size_t i, Result & result) +{ + try { + double doubleVal = boost::lexical_cast(_params[i]); + int64_t intVal = static_cast(doubleVal); + result.addParameter(Parameter(type, _params[i]).setInteger(intVal).setDouble(doubleVal)); + } catch (const boost::bad_lexical_cast &) { + throw ValidateException(make_vespa_string("Param[%zu]: Could not convert '%s' to a number", i, _params[i].c_str())); + } +} + +ParameterValidator::Result +ParameterValidator::validate(const ParameterDescriptions::Description & desc) +{ + Result result(desc.getTag()); + if (desc.hasRepeat()) { + size_t minParams = desc.getParams().size() - desc.getRepeat(); // the repeat params can occur 0-n times + if (minParams > _params.size() || + ((_params.size() - desc.getParams().size()) % desc.getRepeat() != 0)) + { + throw ValidateException(make_vespa_string("Expected %zd+%zdx parameter(s), but got %zd", + minParams, desc.getRepeat(), _params.size())); + } + } else if (desc.getParams().size() != _params.size()) { + throw ValidateException(make_vespa_string("Expected %zd parameter(s), but got %zd", desc.getParams().size(), _params.size())); + } + for (size_t i = 0; i < _params.size(); ++i) { + ParamDescItem param = desc.getParam(i); + ParameterType::Enum type = param.type; + switch (type) { + case ParameterType::FIELD: + case ParameterType::INDEX_FIELD: + case ParameterType::ATTRIBUTE_FIELD: + case ParameterType::ATTRIBUTE: + validateField(type, param.collection, i, result); + break; + case ParameterType::NUMBER: + validateNumber(type, i, result); + break; + case ParameterType::FEATURE: + case ParameterType::STRING: + result.addParameter(Parameter(type, _params[i])); + break; + default: + break; + } + } + return result; +} + +ParameterValidator::ParameterValidator(const IIndexEnvironment & indexEnv, + const StringVector & params, + const ParameterDescriptions & descs) : + _indexEnv(indexEnv), + _params(params), + _descs(descs) +{ +} + +ParameterValidator::Result +ParameterValidator::validate() +{ + Result invalid; + for (size_t i = 0; i < _descs.getDescriptions().size(); ++i) { + try { + return validate(_descs.getDescriptions()[i]); + } catch (const ValidateException & e) { + if (invalid.valid()) { + Result tmp(_descs.getDescriptions()[i].getTag()); + tmp.setError(e.getMessage()); + invalid = tmp; + } + } + } + return invalid; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/parametervalidator.h b/searchlib/src/vespa/searchlib/fef/parametervalidator.h new file mode 100644 index 00000000000..e416ea1ecdf --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/parametervalidator.h @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "iindexenvironment.h" +#include "parameter.h" +#include "parameterdescriptions.h" + +namespace search { +namespace fef { + +/** + * This class is a validator for a string parameter list given an index environment and a set of parameter descriptions. + * The string parameter list is valid if it is matched with one of the parameter descriptions. + * In case of a match the string parameter list is converted into a parameter list with type information. + */ +class ParameterValidator { +public: + typedef vespalib::string string; + typedef std::vector StringVector; + /** + * This class contains the result after running a validation for a given parameter description. + * If the result is valid the parameter description matched the string parameter list + * and the converted parameter list is stored. + * If the result is not valid the reason for this is found in the error string. + */ + class Result { + private: + ParameterList _params; + size_t _tag; + string _errorStr; + bool _valid; + + public: + /** + * Creates a result for the parameter description with the given tag. + */ + Result(size_t tag = 0); + Result & addParameter(const Parameter & param) { _params.push_back(param); return *this; } + Result & setError(const vespalib::stringref & str) { + _errorStr = str; + _params.clear(); + _valid = false; + return *this; + } + const ParameterList & getParameters() const { return _params; } + size_t getTag() const { return _tag; } + const string & getError() const { return _errorStr; } + bool valid() const { return _valid; } + }; +private: + const IIndexEnvironment & _indexEnv; + const StringVector & _params; + const ParameterDescriptions & _descs; + + void validateField(ParameterType::Enum type, ParameterCollection::Enum collection, + size_t i, Result & result); + void validateNumber(ParameterType::Enum type, size_t i, Result & result); + Result validate(const ParameterDescriptions::Description & desc); + +public: + /** + * Creates a new validator. + * + * @param indexEnv the index environment used to lookup fields. + * @param params the string parameter list to validate. + * @param descs the parameter descriptions to use during validation. + */ + ParameterValidator(const IIndexEnvironment & indexEnv, + const StringVector & params, + const ParameterDescriptions & descs); + /** + * Runs the validator and returns the result. + * The result object for the first parameter description that match is returned. + * In case of no match the result object for the first registered parameter description is returned. + */ + Result validate(); +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp new file mode 100644 index 00000000000..a18587efeef --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.phrasesplitter"); +#include "phrasesplitter.h" + +namespace search { +namespace fef { + +void +PhraseSplitter::considerTerm(uint32_t termIdx, const ITermData &term, std::vector &phraseTerms, uint32_t fieldId) +{ + typedef search::fef::ITermFieldRangeAdapter FRA; + + for (FRA iter(term); iter.valid(); iter.next()) { + if (iter.get().getFieldId() == fieldId) { + TermFieldHandle h = iter.get().getHandle(); + _maxHandle = std::max(_maxHandle, h); + if (term.getPhraseLength() > 1) { + SimpleTermData prototype; + prototype.setWeight(term.getWeight()); + prototype.setPhraseLength(1); + prototype.setTermIndex(term.getTermIndex()); + prototype.setUniqueId(term.getUniqueId()); + prototype.addField(fieldId); + phraseTerms.push_back(PhraseTerm(term, _terms.size(), h)); + for (uint32_t i = 0; i < term.getPhraseLength(); ++i) { + _terms.push_back(prototype); + _termIdxMap.push_back(TermIdx(_terms.size() - 1, true)); + } + return; + } + } + } + _termIdxMap.push_back(TermIdx(termIdx, false)); +} + +PhraseSplitter::PhraseSplitter(const IQueryEnvironment & queryEnv, + uint32_t fieldId) : + _queryEnv(queryEnv), + _matchData(NULL), + _terms(), + _termMatches(), + _termIdxMap(), + _maxHandle(0), + _skipHandles(0) +{ + TermFieldHandle numHandles = 0; // how many handles existed in underlying data + std::vector phraseTerms; // data about original phrase terms + + for (uint32_t i = 0; i < queryEnv.getNumTerms(); ++i) { + const ITermData *td = queryEnv.getTerm(i); + LOG_ASSERT(td != NULL); + considerTerm(i, *td, phraseTerms, fieldId); + numHandles += td->numFields(); + } + + _skipHandles = _maxHandle + 1 + numHandles; + for (uint32_t i = 0; i < _terms.size(); ++i) { + // start at _skipHandles + 0 + _terms[i].field(0).setHandle(_skipHandles + _termMatches.size()); + TermFieldMatchData empty; + empty.setFieldId(fieldId); + _termMatches.push_back(empty); + } + + for (uint32_t i = 0; i < phraseTerms.size(); ++i) { + const PhraseTerm &pterm = phraseTerms[i]; + + for (uint32_t j = 0; j < pterm.term.getPhraseLength(); ++j) { + const ITermData &splitp_td = _terms[pterm.idx + j]; + const ITermFieldData& splitp_tfd = splitp_td.field(0); + HowToCopy meta; + meta.orig_handle = pterm.orig_handle; + meta.split_handle = splitp_tfd.getHandle(); + meta.offsetInPhrase = j; + _copyInfo.push_back(meta); + } + } + +} + +void +PhraseSplitter::copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset) +{ + dst.reset(src.getDocId()); + + for (TermFieldMatchData::PositionsIterator itr = src.begin(), end = src.end(); itr != end; ++itr) { + TermFieldMatchDataPosition pos(*itr); + pos.setPosition(pos.getPosition() + hitOffset); + dst.appendPosition(TermFieldMatchDataPosition(pos)); + } +} + +void +PhraseSplitter::update(const MatchData & matchData) +{ + _matchData = &matchData; + for (uint32_t i = 0; i < _copyInfo.size(); ++i) { + const TermFieldMatchData *src = matchData.resolveTermField(_copyInfo[i].orig_handle); + TermFieldMatchData *dst = resolveSplittedTermField(_copyInfo[i].split_handle); + LOG_ASSERT(src != NULL && dst != NULL); + copyTermFieldMatchData(*dst, *src, _copyInfo[i].offsetInPhrase); + } + +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/phrasesplitter.h b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h new file mode 100644 index 00000000000..5438954f380 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/phrasesplitter.h @@ -0,0 +1,146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "iqueryenvironment.h" +#include "matchdata.h" +#include "simpletermdata.h" +#include "termfieldmatchdata.h" +#include "fieldinfo.h" + +namespace search { +namespace fef { + +/** + * This class is used to split all phrase terms in a query environment + * into separate terms. New TermData and TermFieldMatchData objects + * are created for each splitted phrase term and managed by this + * class. Unmodified single terms are served from the query + * environment and match data. + * + * The TermFieldMatchData objects managed by this class are updated + * based on the TermFieldMatchData objects associated with the + * original phrase terms. Positions are adjusted with +1 for each term + * after the first one. + * + * Use this class if you want to handle a phrase term the same way as + * single terms. + **/ +class PhraseSplitter : public IQueryEnvironment +{ +private: + struct TermIdx { + uint32_t idx; // index into either query environment or vector of TermData objects + bool splitted; // whether this term has been splitted or not + TermIdx(uint32_t i, bool s) : idx(i), splitted(s) {} + }; + struct PhraseTerm { + const ITermData & term; // for original phrase + uint32_t idx; // index into vector of our TermData objects + TermFieldHandle orig_handle; + PhraseTerm(const ITermData & t, uint32_t i, uint32_t h) : term(t), idx(i), orig_handle(h) {} + }; + struct HowToCopy { + TermFieldHandle orig_handle; + TermFieldHandle split_handle; + uint32_t offsetInPhrase; + }; + + const IQueryEnvironment &_queryEnv; + const MatchData *_matchData; + std::vector _terms; // splitted terms + std::vector _termMatches; // match objects associated with splitted terms + std::vector _copyInfo; + std::vector _termIdxMap; // renumbering of terms + TermFieldHandle _maxHandle; // the largest among original term field handles + TermFieldHandle _skipHandles; // how many handles to skip + + void considerTerm(uint32_t termIdx, const ITermData &term, std::vector &phraseTerms, uint32_t fieldId); + void splitPhrase(const ITermData &phrase, std::vector &phraseTerms, uint32_t fieldId); + + TermFieldMatchData *resolveSplittedTermField(TermFieldHandle handle) { + return &_termMatches[handle - _skipHandles]; + } + + const TermFieldMatchData *resolveSplittedTermField(TermFieldHandle handle) const { + return &_termMatches[handle - _skipHandles]; + } + +public: + /** + * Create a phrase splitter based on the given query environment. + * + * @param queryEnv the query environment to wrap. + * @param field the field where we need to split phrases + **/ + PhraseSplitter(const IQueryEnvironment & queryEnv, uint32_t fieldId); + + /** + * Copy the source object to the destination object. + * Use the given hit offset when copying position information. pos (x) -> pos (x + hitOffset). + * + * @param dst the destination object. + * @param src the source object. + * @param hitOffset the offset to use when copying position information. + **/ + static void copyTermFieldMatchData(TermFieldMatchData & dst, const TermFieldMatchData & src, uint32_t hitOffset); + + /** + * Update the underlying TermFieldMatchData objects based on the given MatchData object. + * + * @param matchData the MatchData object containing original TermFieldMatchData objects. + **/ + void update(const MatchData & matchData); + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual uint32_t getNumTerms() const { + return _termIdxMap.size(); + } + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual const ITermData * getTerm(uint32_t idx) const { + if (idx >= _termIdxMap.size()) { + return NULL; + } + const TermIdx & ti = _termIdxMap[idx]; + return ti.splitted ? &_terms[ti.idx] : _queryEnv.getTerm(ti.idx); + } + + /** + * Inherit doc from MatchData. + **/ + const TermFieldMatchData * resolveTermField(TermFieldHandle handle) const { + if (_matchData == NULL) { + return NULL; + } + return handle < _skipHandles ? _matchData->resolveTermField(handle) : resolveSplittedTermField(handle); + } + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual const Properties & getProperties() const { return _queryEnv.getProperties(); } + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual const Location & getLocation() const { return _queryEnv.getLocation(); } + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual const attribute::IAttributeContext & getAttributeContext() const { return _queryEnv.getAttributeContext(); } + + /** + * Inherit doc from IQueryEnvironment. + **/ + virtual const IIndexEnvironment & getIndexEnvironment() const { return _queryEnv.getIndexEnvironment(); } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/properties.cpp b/searchlib/src/vespa/searchlib/fef/properties.cpp new file mode 100644 index 00000000000..7351bc4e5bf --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/properties.cpp @@ -0,0 +1,269 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.properties"); +#include "properties.h" + +namespace search { +namespace fef { + +const Property::Value Property::_emptyValue; +const Property::Values Property::_emptyValues; + +Property::Property(const Property::Values &values) + : _values(&values) +{ +} + +Property::Property() + : _values(&_emptyValues) +{ +} + +bool +Property::found() const +{ + return !(*_values).empty(); +} + +const Property::Value & +Property::get() const +{ + if ((*_values).empty()) { + return _emptyValue; + } + return (*_values)[0]; +} + +const Property::Value & +Property::get(const Property::Value &fallBack) const +{ + if ((*_values).empty()) { + return fallBack; + } + return (*_values)[0]; +} + +uint32_t +Property::size() const +{ + return (*_values).size(); +} + +const Property::Value & +Property::getAt(uint32_t idx) const +{ + if (idx < (*_values).size()) { + return (*_values)[idx]; + } + return _emptyValue; +} + +//----------------------------------------------------------------------------- + +uint32_t +Properties::rawHash(const void *buf, uint32_t len) +{ + uint32_t res = 0; + unsigned const char *pt = (unsigned const char *) buf; + unsigned const char *end = pt + len; + while (pt < end) { + res = (res << 7) + (res >> 25) + *pt++; + } + return res; +} + +Properties::Properties() + : _numValues(0), + _data() +{ +} + +Properties::~Properties() +{ + LOG_ASSERT(_numValues >= _data.size()); +} + +Properties & +Properties::add(const vespalib::stringref &key, const vespalib::stringref &value) +{ + if (!key.empty()) { + Value & v = _data[key]; + v.push_back(value); + ++_numValues; + } + return *this; +} + +uint32_t +Properties::count(const vespalib::stringref &key) const +{ + if (!key.empty()) { + Map::const_iterator node = _data.find(key); + if (node != _data.end()) { + return node->second.size(); + } + } + return 0; +} + +Properties & +Properties::remove(const vespalib::stringref &key) +{ + if (!key.empty()) { + Map::iterator node = _data.find(key); + if (node != _data.end()) { + _numValues -= node->second.size(); + _data.erase(node); + } + } + return *this; +} + +Properties & +Properties::import(const Properties &src) +{ + Map::const_iterator itr = src._data.begin(); + Map::const_iterator end = src._data.end(); + for (; itr != end; ++itr) { + Map::insert_result res = _data.insert(Map::value_type(itr->first, itr->second)); + if ( ! res.second) { + _numValues -= res.first->second.size(); + res.first->second = itr->second; + } + _numValues += itr->second.size(); + } + return *this; +} + +Properties & +Properties::clear() +{ + if (_data.empty()) { + return *this; + } + { + Map empty; + std::swap(_data, empty); + } + _numValues = 0; + return *this; +} + +bool +Properties::operator==(const Properties &rhs) const +{ + return (_numValues == rhs._numValues && + _data == rhs._data); +} + +uint32_t +Properties::hashCode() const +{ + uint32_t hash = numKeys() + numValues(); + Map::const_iterator itr = _data.begin(); + Map::const_iterator end = _data.end(); + for (; itr != end; ++itr) { + const Key &key = itr->first; + const Value &value = itr->second; + Value::const_iterator v_itr = value.begin(); + Value::const_iterator v_end = value.end(); + hash += rawHash(key.data(), key.size()); + for (; v_itr != v_end; ++v_itr) { + hash += rawHash(v_itr->data(), v_itr->size()); + } + } + return hash; +} + +void +Properties::visitProperties(IPropertiesVisitor &visitor) const +{ + Map::const_iterator itr = _data.begin(); + Map::const_iterator end = _data.end(); + for (; itr != end; ++itr) { + visitor.visitProperty(itr->first, Property(itr->second)); + } +} + +void +Properties::visitNamespace(const vespalib::stringref &ns, + IPropertiesVisitor &visitor) const +{ + vespalib::string tmp; + vespalib::string prefix = ns + "."; + Map::const_iterator itr = _data.begin(); + Map::const_iterator end = _data.end(); + for (; itr != end; ++itr) { + if ((itr->first.find(prefix) == 0) && + (itr->first.size() > prefix.size())) + { + tmp = vespalib::stringref(itr->first.data() + prefix.size(), + itr->first.size() - prefix.size()); + visitor.visitProperty(tmp, Property(itr->second)); + } + } +} + +Property +Properties::lookup(const vespalib::stringref &key) const +{ + if (key.empty()) { + return Property(); + } + Map::const_iterator node = _data.find(key); + if (node == _data.end()) { + return Property(); + } + return Property(node->second); +} + +Property Properties::lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &key) const +{ + if (namespace1.empty() || + key.empty()) + { + return Property(); + } + return lookup(namespace1 + "." + key); +} + +Property Properties::lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &namespace2, + const vespalib::stringref &key) const +{ + if (namespace1.empty() || + namespace2.empty() || + key.empty()) + { + return Property(); + } + return lookup(namespace1 + "." + namespace2 + "." + key); +} + +Property Properties::lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &namespace2, + const vespalib::stringref &namespace3, + const vespalib::stringref &key) const +{ + if (namespace1.empty() || + namespace2.empty() || + namespace3.empty() || + key.empty()) + { + return Property(); + } + return lookup(namespace1 + "." + namespace2 + "." + + namespace3 + "." + key); +} + +void Properties::swap(Properties & rhs) +{ + _data.swap(rhs._data); + std::swap(_numValues, rhs._numValues); +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/properties.h b/searchlib/src/vespa/searchlib/fef/properties.h new file mode 100644 index 00000000000..e808b77e2af --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/properties.h @@ -0,0 +1,324 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace fef { + +class Properties; + +//----------------------------------------------------------------------------- + +/** + * This object represents the result of a lookup in a @ref Properties + * object. This class is also used for property visitation. It + * contains all values associated with the key used for lookup. The + * values are accessible in the order in which they were originally + * added. This object is only valid until the @ref Properties object + * it was obtained from is changed or deleted. + **/ +class Property +{ +public: + typedef vespalib::string Value; + typedef std::vector Values; +private: + friend class Properties; + + static const Value _emptyValue; + static const Values _emptyValues; + const Values *_values; + + /** + * Create a new property using the given value vector. + * + * @param values the values for this property + **/ + Property(const Values &values); + +public: + /** + * Create a property that represents the result of a lookup that + * did not find anything. This method may be used to allocate an + * object on the stack in the application, and will also be used + * by the @ref Properties class when a lookup gives no results. + **/ + Property(); + + /** + * Check if we found what we were looking for or not. + * + * @return true if the key we looked up had at least one value + **/ + bool found() const; + + /** + * Get the first value assigned to the looked up key. This method + * will return an empty string if no values were found. + * + * @return first value for the looked up key, or "" + **/ + const Value &get() const; + + /** + * Get the first value assigned to the looked up key. This method + * will return the specified fallback string if no values were + * found. + * + * @return first value for the looked up key, or fallBack + * @param fallBack value to return if no values were found + **/ + const Value & get(const Value &fallBack) const; + + /** + * The number of values found for the looked up key. + * + * @return number of values for this property + **/ + uint32_t size() const; + + /** + * Obtain a specific value for the looked up key. + * + * @return the requested value, or "" if idx was out of bounds + * @param idx the index of the value we want to access + **/ + const Value &getAt(uint32_t idx) const; +}; + +//----------------------------------------------------------------------------- + +/** + * This interface is implemented by objects that want to visit all + * properties contained in a Properties object. + **/ +class IPropertiesVisitor +{ +public: + /** + * Visit a single key and all its values. Keys are visited in + * sorting order according to the less operator of the string + * class. The values are wrapped in a Property object that is + * equivalent to the object that would be returned if the key had + * been used as parameter to the lookup method in the Properties + * object. + * + * @param key the key + * @param values the values + **/ + virtual void visitProperty(const Property::Value &key, + const Property &values) = 0; + + /** + * Virtual destructor to allow safe subclassing. + **/ + virtual ~IPropertiesVisitor() {} +}; + +//----------------------------------------------------------------------------- + +/** + * A simple wrapper for a set of key/value pairs. Each key may be + * added multiple times, resulting in multiple values for a single + * key. When data is imported from one object to another, the set of + * values for common keys are totally replaced. + **/ +class Properties +{ +private: + typedef vespalib::string Key; + typedef Property::Values Value; + typedef vespalib::hash_map Map; + + uint32_t _numValues; + Map _data; + + /** + * Calculate a hash code from raw data. + * + * @return hash code + * @param buf data pointer + * @param len data length + **/ + static uint32_t rawHash(const void *buf, uint32_t len); + +public: + typedef std::unique_ptr UP; + + /** + * Create an empty properties object. + **/ + Properties(); + + /** + * The destructor asserts that key/value counts look sane before + * deleting the internal data. + **/ + ~Properties(); + + /** + * Add a value to a key. If the key is an empty string, the value + * will be ignored. + * + * @return this object, for chaining + * @param key the key + * @param value the value + **/ + Properties &add(const vespalib::stringref &key, const vespalib::stringref &value); + + /** + * Obtain the number of values for a given key. + * + * @return number of values for the given key + * @param key the key + **/ + uint32_t count(const vespalib::stringref &key) const; + + /** + * Remove all values for the given key. + * + * @return this object, for chaining + * @param key the key + **/ + Properties &remove(const vespalib::stringref &key); + + /** + * Import all key/value pairs from src into this object. All + * values stored in this object for keys present in src will be + * removed during this operation. + * + * @return this object, for chaining + * @param src where to import from + **/ + Properties &import(const Properties &src); + + /** + * Remove all key/value pairs from this object, making it + * equivalent with a freshly created object. It is relatively + * cheap to clear an already empty object. + * + * @return this object, for chaining + **/ + Properties &clear(); + + /** + * Obtain the total number of keys stored in this object. + * + * @return number of keys + **/ + uint32_t numKeys() const { return _data.size(); } + + /** + * Obtain the total number of values stored in this object. + * + * @return number of values + **/ + uint32_t numValues() const { return _numValues; } + + /** + * Check if rhs contains the same key/value pairs as this + * object. If a key has multiple values, they need to be in the + * same order to match. + * + * @return true if we are equal to rhs + **/ + bool operator==(const Properties &rhs) const; + + /** + * Calculate a hash code for this object + * + * @return hash code for this object + **/ + uint32_t hashCode() const; + + /** + * Visit all key/value pairs + * + * @param visitor the object being notified of all key/value pairs + **/ + void visitProperties(IPropertiesVisitor &visitor) const; + + /** + * Visit all key/value pairs inside a namespace. The namespace + * itself will be stripped from the keys that are visited. + * + * @param ns the namespace to visit + * @param visitor the object being notified of key/value pairs inside the namespace + **/ + void visitNamespace(const vespalib::stringref &ns, + IPropertiesVisitor &visitor) const; + + /** + * Look up a key in this object. An empty key will result in an + * empty property. + * + * @return object encapsulating lookup result + * @param key the key to look up + **/ + Property lookup(const vespalib::stringref &key) const; + + /** + * Look up a key inside a namespace using the proposed namespace + * syntax. When using namespaces, the actual key is generated by + * concatenating all namespaces and the key, inserting a '.' + * between elements. An empty key and/or namespace will result in + * an empty property. + * + * @return object encapsulating lookup result + * @param namespace1 the namespace + * @param key the key to look up + **/ + Property lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &key) const; + + /** + * Look up a key inside a namespace using the proposed namespace + * syntax. When using namespaces, the actual key is generated by + * concatenating all namespaces and the key, inserting a '.' + * between elements. An empty key and/or namespace will result in + * an empty property. + * + * @return object encapsulating lookup result + * @param namespace the first namespace + * @param namespace the second namespace + * @param key the key to look up + **/ + Property lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &namespace2, + const vespalib::stringref &key) const; + + /** + * Look up a key inside a namespace using the proposed namespace + * syntax. When using namespaces, the actual key is generated by + * concatenating all namespaces and the key, inserting a '.' + * between elements. An empty key and/or namespace will result in + * an empty property. + * + * @return object encapsulating lookup result + * @param namespace the first namespace + * @param namespace the second namespace + * @param namespace the third namespace + * @param key the key to look up + **/ + Property lookup(const vespalib::stringref &namespace1, + const vespalib::stringref &namespace2, + const vespalib::stringref &namespace3, + const vespalib::stringref &key) const; + + void swap(Properties & rhs); +}; + +inline void +swap(Properties & a, Properties & b) +{ + a.swap(b); +} + + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/queryproperties.cpp b/searchlib/src/vespa/searchlib/fef/queryproperties.cpp new file mode 100644 index 00000000000..a5dd5dc9229 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/queryproperties.cpp @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "queryproperties.h" + +namespace search { +namespace fef { +namespace queryproperties { +namespace now { + +const vespalib::string SystemTime::NAME("vespa.now"); + +} // namespace now +} // namespace queryproperties +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/queryproperties.h b/searchlib/src/vespa/searchlib/fef/queryproperties.h new file mode 100644 index 00000000000..661bc460415 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/queryproperties.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +/** + * This namespace is a placeholder for several structs, each + * representing a query property with name and default value. All + * property names defined here will have the prefix "vespa." and are + * known by the feature execution framework. When accessing a query + * property from a @ref Properties instance one should use the + * property names defined here to perform the lookup. The query + * properties are the set of properties available through the query + * environment. These properties are denoted as rank properties in + * other parts of the system. + **/ +namespace queryproperties { + +namespace now { + /** + * Property indicating the time to be used for time-sensitive + * relevancy computations. This affects the value returned by the + * global feature 'now'. The time is given in seconds since epoch. + **/ + struct SystemTime { + + /** + * Property name. + **/ + static const vespalib::string NAME; + }; + +} // namespace now + +} // namespace queryproperties +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.cpp b/searchlib/src/vespa/searchlib/fef/rank_program.cpp new file mode 100644 index 00000000000..69cd76917cd --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/rank_program.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.rank_program"); +#include "rank_program.h" +#include "featureoverrider.h" +#include + +namespace search { +namespace fef { + +namespace { + +struct Override +{ + BlueprintResolver::FeatureRef ref; + feature_t value; + + Override(const BlueprintResolver::FeatureRef &r, feature_t v) + : ref(r), value(v) {} + + bool operator<(const Override &rhs) const { + return (ref.executor < rhs.ref.executor); + } +}; + +struct OverrideVisitor : public IPropertiesVisitor +{ + const BlueprintResolver::FeatureMap &feature_map; + std::vector &overrides; + + OverrideVisitor(const BlueprintResolver::FeatureMap &feature_map_in, + std::vector &overrides_out) + : feature_map(feature_map_in), overrides(overrides_out) {} + + virtual void visitProperty(const Property::Value & key, + const Property & values) + { + auto pos = feature_map.find(key); + if (pos != feature_map.end()) { + overrides.push_back(Override(pos->second, strtod(values.get().c_str(), nullptr))); + } + } +}; + +std::vector prepare_overrides(const BlueprintResolver::FeatureMap &feature_map, + const Properties &featureOverrides) +{ + std::vector overrides; + overrides.reserve(featureOverrides.numValues()); + OverrideVisitor visitor(feature_map, overrides); + featureOverrides.visitProperties(visitor); + std::sort(overrides.begin(), overrides.end()); + return overrides; +} + +struct UnboxingExecutor : FeatureExecutor { + UnboxingExecutor(SharedInputs &shared_inputs, + FeatureHandle old_feature, + FeatureHandle new_feature) + { + bind_shared_inputs(shared_inputs); + addInput(old_feature); + bindOutput(new_feature); + } + bool isPure() override { return true; } + void execute(search::fef::MatchData &md) override { + double number_value = md.resolve_object_feature(inputs()[0])->get().as_double(); + *md.resolveFeature(outputs()[0]) = number_value; + } +}; + +} // namespace search::fef:: + +void +RankProgram::add_unboxing_executors(MatchDataLayout &my_mdl) +{ + const auto &specs = _resolver->getExecutorSpecs(); + for (const auto &seed_entry: _resolver->getSeedMap()) { + auto seed = seed_entry.second; + if (specs[seed.executor].output_types[seed.output]) { + FeatureHandle old_handle = _executors[seed.executor]->outputs()[seed.output]; + FeatureHandle new_handle = my_mdl.allocFeature(false); + _executors.emplace_back(new UnboxingExecutor(_shared_inputs, old_handle, new_handle)); + _unboxed_seeds[seed_entry.first] = std::make_pair(old_handle, new_handle); + } + } +} + +void +RankProgram::compile() +{ + MatchData &md = match_data(); + std::vector is_calculated(md.getNumFeatures(), false); + for (size_t i = 0; i < _executors.size(); ++i) { + FeatureExecutor &executor = *_executors[i]; + bool is_const = executor.isPure(); + const auto &inputs = executor.inputs(); + for (size_t in_idx = 0; is_const && (in_idx < inputs.size()); ++in_idx) { + is_const &= is_calculated[inputs[in_idx]]; + } + if (is_const) { + executor.execute(md); + const auto &outputs = executor.outputs(); + for (size_t out_idx = 0; out_idx < outputs.size(); ++out_idx) { + is_calculated[outputs[out_idx]] = true; + } + } else { + _program.push_back(&executor); + } + } +} + +RankProgram::RankProgram(BlueprintResolver::SP resolver) + : _resolver(resolver), + _shared_inputs(), + _program(), + _executors(), + _unboxed_seeds() +{ +} + +void +RankProgram::setup(const MatchDataLayout &mdl_in, + const IQueryEnvironment &queryEnv, + const Properties &featureOverrides) +{ + assert(_executors.empty()); + MatchDataLayout my_mdl(mdl_in); + std::vector overrides = prepare_overrides(_resolver->getFeatureMap(), featureOverrides); + auto override = overrides.begin(); + auto override_end = overrides.end(); + + const auto &specs = _resolver->getExecutorSpecs(); + _executors.reserve(specs.size()); + for (uint32_t i = 0; i < specs.size(); ++i) { + FeatureExecutor::UP executor(specs[i].blueprint->createExecutor(queryEnv).release()); + assert(executor); + executor->bind_shared_inputs(_shared_inputs); + for (; (override < override_end) && (override->ref.executor == i); ++override) { + FeatureExecutor::LP tmp(executor.release()); + executor.reset(new FeatureOverrider(tmp, override->ref.output, override->value)); + executor->bind_shared_inputs(_shared_inputs); + } + for (auto ref: specs[i].inputs) { + executor->addInput(_executors[ref.executor]->outputs()[ref.output]); + } + executor->inputs_done(); + uint32_t out_cnt = specs[i].output_types.size(); + for (uint32_t out_idx = 0; out_idx < out_cnt; ++out_idx) { + executor->bindOutput(my_mdl.allocFeature(specs[i].output_types[out_idx])); + } + executor->outputs_done(); + _executors.push_back(std::move(executor)); + } + add_unboxing_executors(my_mdl); + _match_data = my_mdl.createMatchData(); + compile(); +} + +namespace { + +template +void extract_handles(const BlueprintResolver::FeatureMap &features, + const std::vector &executors, + const Each &each) +{ + each.reserve(features.size()); + for (const auto &entry: features) { + auto ref = entry.second; + FeatureHandle handle = executors[ref.executor]->outputs()[ref.output]; + each.process(entry.first, handle); + } +} + +struct RawHandleCollector { + std::vector &names; + std::vector &handles; + RawHandleCollector(std::vector &names_in, + std::vector &handles_in) + : names(names_in), handles(handles_in) {} + void reserve(size_t size) const { + names.reserve(size); + handles.reserve(size); + } + void process(const vespalib::string &name, FeatureHandle handle) const { + names.push_back(name); + handles.push_back(handle); + } +}; + +struct MappedHandleCollector { + typedef std::map > MappedFeatures; + RawHandleCollector collector; + const MappedFeatures &mapped; + MappedHandleCollector(std::vector &names, + std::vector &handles, + const MappedFeatures &mapped_in) + : collector(names, handles), mapped(mapped_in) {} + void reserve(size_t size) const { collector.reserve(size); } + void process(const vespalib::string &name, FeatureHandle handle) const { + auto pos = mapped.find(name); + if (pos == mapped.end()) { + collector.process(name, handle); + } else { + assert(handle == pos->second.first); + collector.process(name, pos->second.second); + } + } +}; + +} + +void +RankProgram::get_seed_handles(std::vector &names_out, + std::vector &handles_out, + bool unbox_seeds) const +{ + if (unbox_seeds && !_unboxed_seeds.empty()) { + extract_handles(_resolver->getSeedMap(), _executors, MappedHandleCollector(names_out, handles_out, _unboxed_seeds)); + } else { + extract_handles(_resolver->getSeedMap(), _executors, RawHandleCollector(names_out, handles_out)); + } +} + +void +RankProgram::get_all_feature_handles(std::vector &names_out, + std::vector &handles_out, + bool unbox_seeds) const +{ + if (unbox_seeds && !_unboxed_seeds.empty()) { + extract_handles(_resolver->getFeatureMap(), _executors, MappedHandleCollector(names_out, handles_out, _unboxed_seeds)); + } else { + extract_handles(_resolver->getFeatureMap(), _executors, RawHandleCollector(names_out, handles_out)); + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h new file mode 100644 index 00000000000..d9ac2e0e68b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/rank_program.h @@ -0,0 +1,135 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprintresolver.h" +#include "featureexecutor.h" +#include "properties.h" +#include "matchdata.h" +#include "matchdatalayout.h" +#include +#include +#include + +namespace search { +namespace fef { + +/** + * A rank program runs multiple feature executors in a predefined + * order to produce a set of feature values. The rank program owns the + * MatchData used to store unpacked term-field match information and + * feature values used during evaluation. + **/ +class RankProgram +{ +private: + RankProgram(const RankProgram &) = delete; + RankProgram &operator=(const RankProgram &) = delete; + + // { first: old_handle, second: new_handle } + typedef std::pair MappedHandle; + + BlueprintResolver::SP _resolver; + FeatureExecutor::SharedInputs _shared_inputs; + std::vector _program; + MatchData::UP _match_data; + std::vector _executors; + std::map _unboxed_seeds; + + /** + * Add unboxing executors for seeds that are object features to + * make sure all output values are numbers. + **/ + void add_unboxing_executors(MatchDataLayout &my_mdl); + + /** + * Prepare the final program and evaluate all constant features. + **/ + void compile(); + +public: + typedef std::unique_ptr UP; + + /** + * Create a new rank program backed by the given resolver. + * + * @param resolver description on how to set up executors + **/ + RankProgram(BlueprintResolver::SP resolver); + + size_t program_size() const { return _program.size(); } + size_t num_executors() const { return _executors.size(); } + + /** + * Set up this rank program by creating the needed feature + * executors and wiring them together. This function will also + * create the MatchData to be used for iterator unpacking and + * feature calculation as well as pre-calculating all constant + * features. + **/ + void setup(const MatchDataLayout &mdl, + const IQueryEnvironment &queryEnv, + const Properties &featureOverrides = Properties()); + + /** + * Expose the MatchData containing all calculated features. This + * is also used when creating search iterators as it is where all + * iterators should unpack their match information. + **/ + MatchData &match_data() { return *_match_data; } + const MatchData &match_data() const { return *_match_data; } + + /** + * Obtain the names and match data storage locations of all seed + * features for this rank program. The obtained information is + * written in parallel into the given vectors such that the i'th + * name corresponds to the i'th storage location. Programs for + * ranking phases will only have a single seed while programs used + * for summary features or scraping will have multiple seeds. + * + * @param names where to store feature names + * @param handles where to store feature storage locations + * @params unbox_seeds make sure seeds values are numbers + **/ + void get_seed_handles(std::vector &names_out, + std::vector &handles_out, + bool unbox_seeds = true) const; + + /** + * Obtain the names and match data storage locations of all + * features for this rank program. The obtained information is + * written in parallel into the given vectors such that the i'th + * name corresponds to the i'th storage location. This method is + * intended for debugging and testing. + * + * @param names where to store feature names + * @param handles where to store feature storage locations + * @params unbox_seeds make sure seeds values are numbers + **/ + void get_all_feature_handles(std::vector &names_out, + std::vector &handles_out, + bool unbox_seeds = true) const; + + /** + * Run this rank program on the current state of the internal + * match data for the given docid. Typically, match data for a + * specific result will be unpacked before calling run. After run + * is called, the wanted results can be extracted using the + * appropriate feature handles. The given docid will be used to + * tag the internal match data container before execution. Match + * data for individual term/field combinations are only considered + * valid if their docid matches that of the match data container. + * + * @param docid the document we are ranking + **/ + void run(uint32_t docid) { + MatchData &md = match_data(); + md.setDocId(docid); + for (FeatureExecutor *executor: _program) { + executor->execute(md); + } + } +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp new file mode 100644 index 00000000000..a954f70c82b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp @@ -0,0 +1,186 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.ranksetup"); +#include "ranksetup.h" +#include "idumpfeaturevisitor.h" +#include "indexproperties.h" +#include "featurenameparser.h" + +namespace { +class VisitorAdapter : public search::fef::IDumpFeatureVisitor +{ + search::fef::BlueprintResolver &_resolver; +public: + VisitorAdapter(search::fef::BlueprintResolver &resolver) + : _resolver(resolver) {} + virtual void visitDumpFeature(const vespalib::string &name) { + _resolver.addSeed(name); + } +}; +} // namespace + +namespace search { +namespace fef { + +RankSetup::RankSetup(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv) + : _factory(factory), + _indexEnv(indexEnv), + _first_phase_resolver(new BlueprintResolver(factory, indexEnv)), + _second_phase_resolver(new BlueprintResolver(factory, indexEnv)), + _summary_resolver(new BlueprintResolver(factory, indexEnv)), + _dumpResolver(new BlueprintResolver(factory, indexEnv)), + _firstPhaseRankFeature(), + _secondPhaseRankFeature(), + _degradationAttribute(), + _numThreads(0), + _numSearchPartitions(0), + _heapSize(0), + _arraySize(0), + _estimatePoint(0), + _estimateLimit(0), + _degradationMaxHits(0), + _degradationMaxFilterCoverage(1.0), + _degradationSamplePercentage(0.2), + _degradationPostFilterMultiplier(1.0), + _rankScoreDropLimit(0), + _summaryFeatures(), + _dumpFeatures(), + _ignoreDefaultRankFeatures(false), + _compiled(false), + _compileError(false), + _degradationAscendingOrder(false), + _diversityAttribute(), + _diversityMinGroups(1), + _diversityCutoffFactor(10.0), + _diversityCutoffStrategy("loose") +{ +} + +void +RankSetup::configure() +{ + setFirstPhaseRank(indexproperties::rank::FirstPhase::lookup(_indexEnv.getProperties())); + setSecondPhaseRank(indexproperties::rank::SecondPhase::lookup(_indexEnv.getProperties())); + std::vector summaryFeatures = indexproperties::summary::Feature::lookup(_indexEnv.getProperties()); + for (uint32_t i = 0; i < summaryFeatures.size(); ++i) { + addSummaryFeature(summaryFeatures[i]); + } + setIgnoreDefaultRankFeatures(indexproperties::dump::IgnoreDefaultFeatures::check(_indexEnv.getProperties())); + std::vector dumpFeatures = indexproperties::dump::Feature::lookup(_indexEnv.getProperties()); + for (uint32_t i = 0; i < dumpFeatures.size(); ++i) { + addDumpFeature(dumpFeatures[i]); + } + set_termwise_limit(indexproperties::matching::TermwiseLimit::lookup(_indexEnv.getProperties())); + setNumThreadsPerSearch(indexproperties::matching::NumThreadsPerSearch::lookup(_indexEnv.getProperties())); + setNumSearchPartitions(indexproperties::matching::NumSearchPartitions::lookup(_indexEnv.getProperties())); + setHeapSize(indexproperties::hitcollector::HeapSize::lookup(_indexEnv.getProperties())); + setArraySize(indexproperties::hitcollector::ArraySize::lookup(_indexEnv.getProperties())); + setDegradationAttribute(indexproperties::matchphase::DegradationAttribute::lookup(_indexEnv.getProperties())); + setDegradationOrderAscending(indexproperties::matchphase::DegradationAscendingOrder::lookup(_indexEnv.getProperties())); + setDegradationMaxHits(indexproperties::matchphase::DegradationMaxHits::lookup(_indexEnv.getProperties())); + setDegradationMaxFilterCoverage(indexproperties::matchphase::DegradationMaxFilterCoverage::lookup(_indexEnv.getProperties())); + setDegradationSamplePercentage(indexproperties::matchphase::DegradationSamplePercentage::lookup(_indexEnv.getProperties())); + setDegradationPostFilterMultiplier(indexproperties::matchphase::DegradationPostFilterMultiplier::lookup(_indexEnv.getProperties())); + setDiversityAttribute(indexproperties::matchphase::DiversityAttribute::lookup(_indexEnv.getProperties())); + setDiversityMinGroups(indexproperties::matchphase::DiversityMinGroups::lookup(_indexEnv.getProperties())); + setDiversityCutoffFactor(indexproperties::matchphase::DiversityCutoffFactor::lookup(_indexEnv.getProperties())); + setDiversityCutoffStrategy(indexproperties::matchphase::DiversityCutoffStrategy::lookup(_indexEnv.getProperties())); + setEstimatePoint(indexproperties::hitcollector::EstimatePoint::lookup(_indexEnv.getProperties())); + setEstimateLimit(indexproperties::hitcollector::EstimateLimit::lookup(_indexEnv.getProperties())); + setRankScoreDropLimit(indexproperties::hitcollector::RankScoreDropLimit::lookup(_indexEnv.getProperties())); +} + +void +RankSetup::setFirstPhaseRank(const vespalib::string &featureName) +{ + LOG_ASSERT(!_compiled); + _firstPhaseRankFeature = featureName; +} + +void +RankSetup::setSecondPhaseRank(const vespalib::string &featureName) +{ + LOG_ASSERT(!_compiled); + _secondPhaseRankFeature = featureName; +} + +void +RankSetup::addSummaryFeature(const vespalib::string &summaryFeature) +{ + LOG_ASSERT(!_compiled); + _summaryFeatures.push_back(summaryFeature); +} + +void +RankSetup::addDumpFeature(const vespalib::string &dumpFeature) +{ + LOG_ASSERT(!_compiled); + _dumpFeatures.push_back(dumpFeature); +} + +bool +RankSetup::compile() +{ + LOG_ASSERT(!_compiled); + if (!_firstPhaseRankFeature.empty()) { + FeatureNameParser parser(_firstPhaseRankFeature); + if (parser.valid()) { + _firstPhaseRankFeature = parser.featureName(); + _first_phase_resolver->addSeed(_firstPhaseRankFeature); + } else { + LOG(warning, "invalid feature name for initial rank: '%s'", + _firstPhaseRankFeature.c_str()); + _compileError = true; + } + } + if (!_secondPhaseRankFeature.empty()) { + FeatureNameParser parser(_secondPhaseRankFeature); + if (parser.valid()) { + _secondPhaseRankFeature = parser.featureName(); + _second_phase_resolver->addSeed(_secondPhaseRankFeature); + } else { + LOG(warning, "invalid feature name for final rank: '%s'", + _secondPhaseRankFeature.c_str()); + _compileError = true; + } + } + for (uint32_t i = 0; i < _summaryFeatures.size(); ++i) { + _summary_resolver->addSeed(_summaryFeatures[i]); + } + if (!_ignoreDefaultRankFeatures) { + VisitorAdapter adapter(*_dumpResolver); + _factory.visitDumpFeatures(_indexEnv, adapter); + } + for (uint32_t i = 0; i < _dumpFeatures.size(); ++i) { + _dumpResolver->addSeed(_dumpFeatures[i]); + } + _indexEnv.hintFeatureMotivation(IIndexEnvironment::RANK); + _compileError |= !_first_phase_resolver->compile(); + _compileError |= !_second_phase_resolver->compile(); + _compileError |= !_summary_resolver->compile(); + _indexEnv.hintFeatureMotivation(IIndexEnvironment::DUMP); + _compileError |= !_dumpResolver->compile(); + _compiled = true; + return !_compileError; +} + +void +RankSetup::prepareSharedState(const IQueryEnvironment &queryEnv, IObjectStore &objectStore) const +{ + LOG_ASSERT(_compiled && !_compileError); + for (const auto &spec : _first_phase_resolver->getExecutorSpecs()) { + spec.blueprint->prepareSharedState(queryEnv, objectStore); + } + for (const auto &spec : _second_phase_resolver->getExecutorSpecs()) { + spec.blueprint->prepareSharedState(queryEnv, objectStore); + } + for (const auto &spec : _summary_resolver->getExecutorSpecs()) { + spec.blueprint->prepareSharedState(queryEnv, objectStore); + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h new file mode 100644 index 00000000000..86b381e3af6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h @@ -0,0 +1,393 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprintfactory.h" +#include "iindexenvironment.h" +#include "iqueryenvironment.h" +#include "blueprintresolver.h" +#include "rank_program.h" + +namespace search { +namespace fef { + +/** + * A rank setup contains information about how initial and final rank + * should be calculated. A rank setup is responsible for resolving + * dependencies between named features and also acts as a factory for + * @ref RankContext objects. In addition to keeping track of how to + * calculate rank, a RankSetup also keeps track of how to calculate + * all features that should be dumped when performing a full feature + * dump. + **/ +class RankSetup +{ +private: + const BlueprintFactory &_factory; + const IIndexEnvironment &_indexEnv; + BlueprintResolver::SP _first_phase_resolver; + BlueprintResolver::SP _second_phase_resolver; + BlueprintResolver::SP _summary_resolver; + BlueprintResolver::SP _dumpResolver; + vespalib::string _firstPhaseRankFeature; + vespalib::string _secondPhaseRankFeature; + vespalib::string _degradationAttribute; + double _termwise_limit; + uint32_t _numThreads; + uint32_t _numSearchPartitions; + uint32_t _heapSize; + uint32_t _arraySize; + uint32_t _estimatePoint; + uint32_t _estimateLimit; + uint32_t _degradationMaxHits; + double _degradationMaxFilterCoverage; + double _degradationSamplePercentage; + double _degradationPostFilterMultiplier; + feature_t _rankScoreDropLimit; + std::vector _summaryFeatures; + std::vector _dumpFeatures; + bool _ignoreDefaultRankFeatures; + bool _compiled; + bool _compileError; + bool _degradationAscendingOrder; + vespalib::string _diversityAttribute; + uint32_t _diversityMinGroups; + double _diversityCutoffFactor; + vespalib::string _diversityCutoffStrategy; + + +public: + RankSetup(const RankSetup &) = delete; + RankSetup &operator=(const RankSetup &) = delete; + /** + * Convenience typedef for a shared pointer to this class. + **/ + typedef std::shared_ptr SP; + + /** + * Create a new rank setup within the given index environment and + * backed by the given factory. + * + * @param factory blueprint factory + * @param indexEnv index environment + **/ + RankSetup(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv); + + /** + * Configures this rank setup according to the fef properties + * found in the index environment. + **/ + void configure(); + + /** + * This method is invoked during setup (before invoking the @ref + * compile method) to define what feature to use as first phase + * ranking. + * + * @param featureName full feature name for first phase rank + **/ + void setFirstPhaseRank(const vespalib::string &featureName); + + /** + * Returns the first phase ranking. + * + * @return feature name for first phase rank + **/ + const vespalib::string &getFirstPhaseRank() const { return _firstPhaseRankFeature; } + + /** + * This method is invoked during setup (before invoking the @ref + * compile method) to define what feature to use as second phase ranking. + * + * @param featureName full feature name for second phase rank + **/ + void setSecondPhaseRank(const vespalib::string &featureName); + + /** + * Returns the second phase ranking. + * + * @return feature name for second phase rank + **/ + const vespalib::string &getSecondPhaseRank() const { return _secondPhaseRankFeature; } + + /** + * Set the termwise limit + * + * The termwise limit is a number in the range [0,1] indicating + * how much of the corpus the query must match for termwise + * evaluation to be enabled. + * + * @param value termwise limit + **/ + void set_termwise_limit(double value) { _termwise_limit = value; } + + /** + * Get the termwise limit + * + * The termwise limit is a number in the range [0,1] indicating + * how much of the corpus the query must match for termwise + * evaluation to be enabled. + * + * @return termwise limit + **/ + double get_termwise_limit() const { return _termwise_limit; } + + /** + * Sets the number of threads per search. + * + * @param numThreads the number of threads + **/ + void setNumThreadsPerSearch(uint32_t numThreads) { _numThreads = numThreads; } + + /** + * Returns the number of threads per search. + * + * @return the number of threads + **/ + uint32_t getNumThreadsPerSearch() const { return _numThreads; } + + void setNumSearchPartitions(uint32_t numSearchPartitions) { _numSearchPartitions = numSearchPartitions; } + + uint32_t getNumSearchPartitions() const { return _numSearchPartitions; } + + /** + * Sets the heap size to be used in the hit collector. + * + * @param heapSize the heap size + **/ + void setHeapSize(uint32_t heapSize) { _heapSize = heapSize; } + + /** + * Returns the heap size to be used in the hit collector. + * + * @return the heap size + **/ + uint32_t getHeapSize() const { return _heapSize; } + + /** + * Sets the array size to be used in the hit collector. + * + * @param arraySize the array size + **/ + void setArraySize(uint32_t arraySize) { _arraySize = arraySize; } + + /** + * Returns the array size to be used in the hit collector. + * + * @return the array size + **/ + uint32_t getArraySize() const { return _arraySize; } + + /** whether match phase should do graceful degradation */ + bool hasMatchPhaseDegradation() const { + return (_degradationAttribute.size() > 0); + } + + /** get name of attribute to use for graceful degradation in match phase */ + vespalib::string getDegradationAttribute() const { + return _degradationAttribute; + } + /** check whether attribute should be used in ascending order during graceful degradation in match phase */ + bool isDegradationOrderAscending() const { + return _degradationAscendingOrder; + } + /** get number of hits to collect during graceful degradation in match phase */ + uint32_t getDegradationMaxHits() const { + return _degradationMaxHits; + } + + double getDegradationMaxFilterCoverage() const { return _degradationMaxFilterCoverage; } + /** get number of hits to collect during graceful degradation in match phase */ + double getDegradationSamplePercentage() const { + return _degradationSamplePercentage; + } + + /** get number of hits to collect during graceful degradation in match phase */ + double getDegradationPostFilterMultiplier() const { + return _degradationPostFilterMultiplier; + } + + /** get the attribute used to ensure diversity during match phase limiting **/ + vespalib::string getDiversityAttribute() const { + return _diversityAttribute; + } + + /** get the minimal diversity we should try to achieve **/ + uint32_t getDiversityMinGroups() const { + return _diversityMinGroups; + } + + double getDiversityCutoffFactor() const { + return _diversityCutoffFactor; + } + + const vespalib::string & getDiversityCutoffStrategy() const { + return _diversityCutoffStrategy; + } + + /** set name of attribute to use for graceful degradation in match phase */ + void setDegradationAttribute(const vespalib::string &name) { + _degradationAttribute = name; + } + /** set whether attribute should be used in ascending order during graceful degradation in match phase */ + void setDegradationOrderAscending(bool ascending) { + _degradationAscendingOrder = ascending; + } + /** set number of hits to collect during graceful degradation in match phase */ + void setDegradationMaxHits(uint32_t maxHits) { + _degradationMaxHits = maxHits; + } + + void setDegradationMaxFilterCoverage(double degradationMaxFilterCoverage) { + _degradationMaxFilterCoverage = degradationMaxFilterCoverage; + } + + /** set number of hits to collect during graceful degradation in match phase */ + void setDegradationSamplePercentage(double samplePercentage) { + _degradationSamplePercentage = samplePercentage; + } + + /** set number of hits to collect during graceful degradation in match phase */ + void setDegradationPostFilterMultiplier(double samplePercentage) { + _degradationPostFilterMultiplier = samplePercentage; + } + + /** set the attribute used to ensure diversity during match phase limiting **/ + void setDiversityAttribute(const vespalib::string &value) { + _diversityAttribute = value; + } + + /** set the minimal diversity we should try to achieve **/ + void setDiversityMinGroups(uint32_t value) { + _diversityMinGroups = value; + } + + void setDiversityCutoffFactor(double value) { + _diversityCutoffFactor = value; + } + + void setDiversityCutoffStrategy(const vespalib::string & value) { + _diversityCutoffStrategy = value; + } + + /** + * Sets the estimate point to be used in parallel query evaluation. + * + * @param estimatePoint the estimate point + **/ + void setEstimatePoint(uint32_t estimatePoint) { _estimatePoint = estimatePoint; } + + /** + * Returns the estimate point to be used in parallel query evaluation. + * + * @return the estimate point + **/ + uint32_t getEstimatePoint() const { return _estimatePoint; } + + /** + * Sets the estimate limit to be used in parallel query evaluation. + * + * @param estimateLimit the estimate limit + **/ + void setEstimateLimit(uint32_t estimateLimit) { _estimateLimit = estimateLimit; } + + /** + * Returns the estimate limit to be used in parallel query evaluation. + * + * @return the estimate limit + **/ + uint32_t getEstimateLimit() const { return _estimateLimit; } + + /** + * Sets the rank score drop limit to be used in parallel query evaluation. + * + * @param rankScoreDropLimit the rank score drop limit + **/ + void setRankScoreDropLimit(feature_t rankScoreDropLimit) { _rankScoreDropLimit = rankScoreDropLimit; } + + /** + * Returns the rank score drop limit to be used in parallel query evaluation. + * + * @return the rank score drop limit + **/ + feature_t getRankScoreDropLimit() const { return _rankScoreDropLimit; } + + /** + * This method may be used to indicate that certain features + * should be present in the docsum. + * + * @param summaryFeature full feature name of a summary feature + **/ + void addSummaryFeature(const vespalib::string &summaryFeature); + + /** + * Returns a const view of the summary features added. + * + * @return vector of summary feature names. + **/ + const std::vector &getSummaryFeatures() const { return _summaryFeatures; } + + /** + * Set the flag indicating whether we should ignore the default + * rank features (the ones specified by the plugins themselves) + * + * @param flag true means ignore default rank features + **/ + void setIgnoreDefaultRankFeatures(bool flag) { _ignoreDefaultRankFeatures = flag; } + + /** + * Get the flag indicating whether we should ignore the default + * rank features (the ones specified by the plugins themselves) + * + * @return true means ignore default rank features + **/ + bool getIgnoreDefaultRankFeatures() { return _ignoreDefaultRankFeatures; } + + /** + * This method may be used to indicate that certain features + * should be dumped during a full feature dump. + * + * @param dumpFeature full feature name of a dump feature + **/ + void addDumpFeature(const vespalib::string &dumpFeature); + + /** + * Returns a const view of the dump features added. + * + * @return vector of dump feature names. + **/ + const std::vector &getDumpFeatures() const { return _dumpFeatures; } + + /** + * Create blueprints, resolve dependencies and form a strategy for + * how to create feature executors used to calculate initial and + * final rank for individual queries. This method must be invoked + * after the @ref setInitialRank and @ref setFinalRank methods and + * before creating @ref RankContext objects using the @ref + * createRankContext and @ref createDumpContext methods. + * + * @return true if things went ok, false otherwise (dependency issues) + **/ + bool compile(); + + // These functions create rank programs for different tasks. Note + // that the setup function must be called on rank programs for + // them to be ready to use. Also keep in mind that creating a rank + // program is cheap while setting it up is more expensive. + + RankProgram::UP create_first_phase_program() const { return RankProgram::UP(new RankProgram(_first_phase_resolver)); } + RankProgram::UP create_second_phase_program() const { return RankProgram::UP(new RankProgram(_second_phase_resolver)); } + RankProgram::UP create_summary_program() const { return RankProgram::UP(new RankProgram(_summary_resolver)); } + RankProgram::UP create_dump_program() const { return RankProgram::UP(new RankProgram(_dumpResolver)); } + + /** + * Here you can do some preprocessing. State must be stored in the IObjectStore. + * This is called before creating multiple execution threads. + * @param queryEnv The query environment. + */ + void prepareSharedState(const IQueryEnvironment & queryEnv, IObjectStore & objectStore) const; +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp new file mode 100644 index 00000000000..97ec1f8cca3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/simpletermdata.cpp @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.simpletermdata"); +#include "simpletermdata.h" + +namespace search { +namespace fef { + +SimpleTermData::SimpleTermData() + : _weight(0), + _numTerms(0), + _termIndex(0), + _uniqueId(0), + _fields() +{ +} + +SimpleTermData::SimpleTermData(const ITermData &rhs) + : _weight(rhs.getWeight()), + _numTerms(rhs.getPhraseLength()), + _termIndex(rhs.getTermIndex()), + _uniqueId(rhs.getUniqueId()), + _fields() +{ + for (size_t i(0), m(rhs.numFields()); i < m; ++i) { + _fields.push_back(SimpleTermFieldData(rhs.field(i))); + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/simpletermdata.h b/searchlib/src/vespa/searchlib/fef/simpletermdata.h new file mode 100644 index 00000000000..ee4cab468e1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/simpletermdata.h @@ -0,0 +1,195 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "handle.h" +#include "itermdata.h" +#include "simpletermfielddata.h" +#include +#include + +namespace search { +namespace fef { + +/** + * Static match data for a single unit (term/phrase/etc). + **/ +class SimpleTermData : public ITermData +{ +private: + query::Weight _weight; + uint32_t _numTerms; + uint32_t _termIndex; + uint32_t _uniqueId; + + std::vector _fields; + +public: + /** + * Creates a new object. + **/ + SimpleTermData(); + + /** + * Side-cast copy constructor. + **/ + SimpleTermData(const ITermData &rhs); + + //----------- ITermData implementation ------------------------------------ + + /** + * Returns the term weight. + **/ + virtual query::Weight getWeight() const { return _weight; } + + /** + * Returns the number of terms represented by this term data object. + **/ + virtual uint32_t getPhraseLength() const { return _numTerms; } + + /** + * Obtain the location of this term in the original user query. + * + * @return term index + **/ + virtual uint32_t getTermIndex() const { return _termIndex; } + + /** + * Obtain the unique id of this term. 0 means not set. + * + * @return unique id or 0 + **/ + virtual uint32_t getUniqueId() const { return _uniqueId; } + + /** + * Get number of fields searched + **/ + virtual size_t numFields() const { return _fields.size(); } + + /** + * Direct access to data for individual fields + * @param i local index, must have: 0 <= i < numFields() + */ + virtual const ITermFieldData &field(size_t i) const { + return _fields[i]; + } + + /** + * Obtain information about a specific field that may be searched + * by this term. If the requested field is not searched by this + * term, NULL will be returned. + * + * @return term field data, or NULL if not found + **/ + virtual const ITermFieldData *lookupField(uint32_t fieldId) const { + for (size_t fieldIdx(0), m(numFields()); fieldIdx < m; ++fieldIdx) { + const ITermFieldData &tfd = field(fieldIdx); + if (tfd.getFieldId() == fieldId) { + return &tfd; + } + } + return 0; + } + + //----------- Utility functions ------------------------------------------- + + /** + * Sets the term weight. + **/ + SimpleTermData &setWeight(query::Weight weight) { + _weight = weight; + return *this; + } + + /** + * Sets the number of terms represented by this term data object. + **/ + SimpleTermData &setPhraseLength(uint32_t numTerms) { + _numTerms = numTerms; + return *this; + } + + /** + * Set the location of this term in the original user query. + * + * @return this to allow chaining. + * @param idx term index + **/ + SimpleTermData &setTermIndex(uint32_t idx) { + _termIndex = idx; + return *this; + } + + /** + * Set the unique id of this term. 0 means not set. + * + * @param id unique id or 0 + * @return this to allow chaining. + **/ + SimpleTermData &setUniqueId(uint32_t id) { + _uniqueId = id; + return *this; + } + + /** + * Add a new field to the set that is searched by this term. + * + * @return the newly added field + * @param fieldId field id of the added field + **/ + SimpleTermFieldData &addField(uint32_t fieldId) { + _fields.push_back(SimpleTermFieldData(fieldId)); + return _fields.back(); + } + + /** + * Direct access to data for individual fields + * @param i local index, must have: 0 <= i < numFields() + */ + SimpleTermFieldData &field(size_t i) { + return _fields[i]; + } + + /** + * Obtain information about a specific field that may be searched + * by this term. If the requested field is not searched by this + * term, NULL will be returned. + * + * @return term field data, or NULL if not found + **/ + SimpleTermFieldData *lookupField(uint32_t fieldId) { + for (size_t fieldIdx(0), m(numFields()); fieldIdx < m; ++fieldIdx) { + SimpleTermFieldData& tfd = field(fieldIdx); + if (tfd.getFieldId() == fieldId) { + return &tfd; + } + } + return 0; + } +}; + + +/** + * convenience adapter for easy iteration + **/ +class SimpleTermFieldRangeAdapter +{ + SimpleTermData& _ref; + size_t _idx; + size_t _lim; +public: + explicit SimpleTermFieldRangeAdapter(SimpleTermData& ref) + : _ref(ref), _idx(0), _lim(ref.numFields()) + {} + + bool valid() const { return (_idx < _lim); } + + SimpleTermFieldData& get() const { return _ref.field(_idx); } + + void next() { assert(valid()); ++_idx; } +}; + + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp new file mode 100644 index 00000000000..582e5e330d6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.cpp @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.simpletermfielddata"); +#include "simpletermfielddata.h" + +namespace search { +namespace fef { + +SimpleTermFieldData::SimpleTermFieldData(uint32_t fieldId) + : _fieldId(fieldId), + _docFreq(0), + _handle(IllegalHandle) +{ +} + +SimpleTermFieldData::SimpleTermFieldData(const ITermFieldData &rhs) + : _fieldId(rhs.getFieldId()), + _docFreq(rhs.getDocFreq()), + _handle(rhs.getHandle()) +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h new file mode 100644 index 00000000000..f95ca5b3472 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/simpletermfielddata.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "itermfielddata.h" + +namespace search { +namespace fef { + +/** + * Information about a single field that is being searched for a term + * (described by the TermData class). The field may be either an index + * field or an attribute field. If more information about the field is + * needed, the field id may be used to consult the index environment. + **/ +class SimpleTermFieldData : public ITermFieldData +{ +private: + uint32_t _fieldId; + double _docFreq; + TermFieldHandle _handle; + +public: + /** + * Side-cast copy constructor. + **/ + SimpleTermFieldData(const ITermFieldData &rhs); + + /** + * Create a new instance for the given field. + * + * @param fieldId the field being searched + **/ + SimpleTermFieldData(uint32_t fieldId); + + /** + * Obtain the field id. + * + * @return field id + **/ + virtual uint32_t getFieldId() const { return _fieldId; } + + /** + * Obtain the document frequency. + * + * @return document frequency + **/ + virtual double getDocFreq() const { return _docFreq; } + + /** + * Obtain the match handle for this field. + * + * @return match handle + **/ + virtual TermFieldHandle getHandle() const { + return _handle; + } + + /** + * Sets the document frequency. + * + * @return this object (for chaining) + * @param docFreq document frequency + **/ + SimpleTermFieldData &setDocFreq(double docFreq) { + _docFreq = docFreq; + return *this; + } + + /** + * Sets the match handle for this field. + * + * @return this object (for chaining) + * @param handle match handle + **/ + SimpleTermFieldData &setHandle(TermFieldHandle handle) { + _handle = handle; + return *this; + } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp b/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp new file mode 100644 index 00000000000..06df6a6d909 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/sumexecutor.cpp @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "sumexecutor.h" +#include "matchdata.h" + +namespace search { +namespace fef { + +void +SumExecutor::execute(MatchData &data) +{ + feature_t sum = 0.0; + for (uint32_t i = 0; i < inputs().size(); ++i) { + sum += *data.resolveFeature(inputs()[i]); + } + *data.resolveFeature(outputs()[0]) = sum; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/sumexecutor.h b/searchlib/src/vespa/searchlib/fef/sumexecutor.h new file mode 100644 index 00000000000..82f5ea237b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/sumexecutor.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "featureexecutor.h" + +namespace search { +namespace fef { + +class MatchData; + +/** + * Simple executor that calculates the sum of a set of inputs. This + * will be moved to another library as it is not really part of the + * framework. + **/ +class SumExecutor : public FeatureExecutor +{ +public: + virtual void execute(MatchData &data); + + /** + * Create an instance of this class and return it as a shared pointer. + * + * @return shared pointer to new instance + **/ + static FeatureExecutor::LP create() { return FeatureExecutor::LP(new SumExecutor()); } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp b/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp new file mode 100644 index 00000000000..29818a9f416 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/symmetrictable.cpp @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "symmetrictable.h" + +namespace search { +namespace fef { + +SymmetricTable::SymmetricTable() : + _backingTable(), + _size(), + _table(NULL), + _max(0) +{ +} + +SymmetricTable::SymmetricTable(const SymmetricTable & table) : + _backingTable(table._backingTable), + _size(_backingTable.size()/2), + _table(&_backingTable[_size]), + _max(table.max()) +{ +} + +SymmetricTable & SymmetricTable::operator=(const SymmetricTable & rhs) +{ + if (&rhs != this) { + SymmetricTable n(rhs); + swap(n); + } + return *this; +} + +SymmetricTable::SymmetricTable(const Table & table) : + _backingTable(table.size()*2 - 1), + _size(_backingTable.size()/2), + _table(&_backingTable[_size]), + _max(table.max()) +{ + _table[0] = table[0]; + for(int i(1); i <= _size; i++) { + _table[i] = table[i]; + _table[-i] = -table[i]; + } +} + +SymmetricTable::~SymmetricTable() +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/symmetrictable.h b/searchlib/src/vespa/searchlib/fef/symmetrictable.h new file mode 100644 index 00000000000..984879cc540 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/symmetrictable.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace fef { + +/** + * This class represents a rank table with double values. It takes both negative and positive indexes. + * The content of a table is typically a pre-computed function that is used by a feature executor. + * Values in the negative index range are negated values of corresponding positive value. + **/ +class SymmetricTable +{ +private: + std::vector _backingTable; + int _size; + double * _table; + double _max; + +public: + typedef std::shared_ptr SP; + + SymmetricTable(); + /** + * Creates a symmetric table based on the real one. + **/ + SymmetricTable(const Table & table); + SymmetricTable(const SymmetricTable & table); + ~SymmetricTable(); + + SymmetricTable & operator =(const SymmetricTable & table); + void swap(SymmetricTable & rhs) { + _backingTable.swap(rhs._backingTable); + std::swap(_size, rhs._size); + std::swap(_table, rhs._table); + std::swap(_max, rhs._max); + } + /** + * Returns the element at the given position. + **/ + double operator[](int i) const { return _table[i]; } + + /** + * Retrives the element at the given position or the last element if i is outside the range. + **/ + double get(int i) const { + return (i<-_size) ? _table[-_size] : ((i>_size) ? _table[_size] : _table[i]); + }; + double max() const { return _max; } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/table.cpp b/searchlib/src/vespa/searchlib/fef/table.cpp new file mode 100644 index 00000000000..c32cd233937 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/table.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "table.h" +#include + +namespace search { +namespace fef { + +Table::Table() : + _table(), + _max(-std::numeric_limits::max()) +{ + _table.reserve(256); +} + +Table::~Table() +{ +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/table.h b/searchlib/src/vespa/searchlib/fef/table.h new file mode 100644 index 00000000000..a2203b83041 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/table.h @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +/** + * This class represents a rank table with double values. + * The content of a table is typically a pre-computed function that is used by a feature executor. + **/ +class Table +{ +private: + std::vector _table; + double _max; + +public: + typedef std::shared_ptr SP; + + /** + * Creates a new table with zero elements. + **/ + Table(); + ~Table(); + + /** + * Adds the given element to this table. + **/ + Table & add(double val) { + _table.push_back(val); + _max = std::max(val, _max); + return *this; + } + + /** + * Returns the number of elements in this table. + **/ + size_t size() const { return _table.size(); } + + /** + * Returns the element at the given position. + **/ + double operator[](size_t i) const { return _table[i]; } + + /** + * Retrives the element at the given position or the last element if i is outside the range. + **/ + double get(size_t i) const { + return _table[std::min(i, size() - 1)]; + }; + + /** + * Returns the largest element in this table. + **/ + double max() const { + return _max; + } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/tablemanager.cpp b/searchlib/src/vespa/searchlib/fef/tablemanager.cpp new file mode 100644 index 00000000000..f62f24a3b0f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/tablemanager.cpp @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "tablemanager.h" + +namespace search { +namespace fef { + +TableManager::TableManager() : + _factories(), + _cache(), + _lock() +{ +} + +const Table * +TableManager::getTable(const vespalib::string & name) const +{ + vespalib::LockGuard guard(_lock); + TableCache::const_iterator itr = _cache.find(name); + if (itr != _cache.end()) { + return itr->second.get(); + } + for (size_t i = 0; i < _factories.size(); ++i) { + Table::SP table = _factories[i]->createTable(name); + if (table.get() != NULL) { + _cache.insert(std::make_pair(name, table)); + return table.get(); + } + } + _cache.insert(std::make_pair(name, Table::SP(NULL))); + return NULL; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/tablemanager.h b/searchlib/src/vespa/searchlib/fef/tablemanager.h new file mode 100644 index 00000000000..e69c05b1dce --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/tablemanager.h @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "itablefactory.h" +#include "itablemanager.h" + +namespace search { +namespace fef { + +/** + * This class manages a set of tables and contains an ordered list of table factories used to create tables, + * and a cache of allready created tables. A table is accessed by a unique name. + **/ +class TableManager : public ITableManager +{ +private: + TableManager(const TableManager &); + TableManager &operator=(const TableManager &); + + typedef std::map TableCache; + std::vector _factories; + mutable TableCache _cache; + vespalib::Lock _lock; + +public: + TableManager(); + + /** + * Adds a table factory to this manager. + * The table factories are used in the order they where added to create tables. + **/ + void addFactory(ITableFactory::SP factory) { _factories.push_back(factory); } + + /** + * Retrieves the table with the given name using the following strategy: + * 1. Try to find the table in the cache. + * 2. Iterate over the table factories and try to create the table. + * The first table that is successfully created is added it to the cache and returned. + * 3. Return NULL. + **/ + virtual const Table * getTable(const vespalib::string & name) const; +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp new file mode 100644 index 00000000000..2ba9cf90870 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.cpp @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.termfieldmatchdata"); +#include "termfieldmatchdata.h" +#include "fieldinfo.h" +#include + +namespace search { +namespace fef { + +TermFieldMatchData::TermFieldMatchData() : + _docId(invalidId()), + _fieldId(FIELDID_MASK), + _sz(0) +{ + memset(&_data, 0, sizeof(_data)); +} + +TermFieldMatchData::TermFieldMatchData(const TermFieldMatchData & rhs) : + _docId(rhs._docId), + _fieldId(rhs._fieldId), + _sz(0) +{ + memset(&_data, 0, sizeof(_data)); + if (isRawScore()) { + _data._rawScore = rhs._data._rawScore; + } else { + for (auto it(rhs.begin()), mt(rhs.end()); it != mt; it++) { + appendPosition(*it); + } + } +} + +TermFieldMatchData & TermFieldMatchData::operator = (const TermFieldMatchData & rhs) +{ + if (this != & rhs) { + TermFieldMatchData tmp(rhs); + swap(tmp); + } + return *this; +} + +TermFieldMatchData::~TermFieldMatchData() +{ + if (isRawScore()) { + } else if (isMultiPos()) { + delete [] _data._positions._positions; + } else { + getFixed()->~TermFieldMatchDataPosition(); + } +} + +namespace { + +template +void sswap(T * a, T * b) { + T tmp(*a); + *a = *b; + *b = tmp; +} + +} + +void +TermFieldMatchData::swap(TermFieldMatchData &rhs) +{ + sswap(&_docId, &rhs._docId); + sswap(&_fieldId, &rhs._fieldId); + sswap(&_sz, &rhs._sz); + char tmp[sizeof(_data)]; + memcpy(tmp, &rhs._data, sizeof(_data)); + memcpy(&rhs._data, &_data, sizeof(_data)); + memcpy(&_data, tmp, sizeof(_data)); +} + +namespace { + +constexpr size_t MAX_ELEMS = std::numeric_limits::max(); + +} + +void +TermFieldMatchData::resizePositionVector(size_t sz) +{ + size_t newSize(std::min(MAX_ELEMS, std::max(1ul, sz*2))); + TermFieldMatchDataPosition * n = new TermFieldMatchDataPosition[newSize]; + if (sz > 0) { + if (isMultiPos()) { + for (size_t i(0); i < _data._positions._allocated; i++) { + n[i] = _data._positions._positions[i]; + } + delete [] _data._positions._positions; + } else { + assert(sz == 1); + _fieldId = _fieldId | 0x4000; + n[0] = *getFixed(); + _data._positions._maxElementLength = getFixed()->getElementLen(); + } + } + _data._positions._allocated = newSize; + _data._positions._positions = n; +} + +void +TermFieldMatchData::appendPositionToAllocatedVector(const TermFieldMatchDataPosition &pos) +{ + if (__builtin_expect(_sz >= _data._positions._allocated, false)) { + resizePositionVector(_sz); + } + if (__builtin_expect(pos.getElementLen() > _data._positions._maxElementLength, false)) { + _data._positions._maxElementLength = pos.getElementLen(); + } + if (__builtin_expect(_sz < MAX_ELEMS, true)) { + _data._positions._positions[_sz++] = pos; + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h new file mode 100644 index 00000000000..a3ce0ac4bb6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdata.h @@ -0,0 +1,267 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "termfieldmatchdataposition.h" +#include "fieldpositionsiterator.h" +#include "fieldinfo.h" +#include +#include +#include +#include + +class MatchDataHeapTest; + +namespace search { +namespace fef { + +class TermMatchDataMerger; + +/** + * Match information for a single term within a single field. + **/ +class TermFieldMatchData +{ +public: + typedef const TermFieldMatchDataPosition * PositionsIterator; + typedef TermFieldMatchDataPosition * MutablePositionsIterator; + struct Positions { + uint16_t _maxElementLength; + uint16_t _allocated; + TermFieldMatchDataPosition *_positions; + } __attribute__((packed)); + + union Features { + feature_t _rawScore; + unsigned char _position[sizeof(TermFieldMatchDataPosition)]; + Positions _positions; + uint64_t _subqueries; + } __attribute__((packed)); +private: + bool isRawScore() const { return _fieldId & 0x8000; } + bool isMultiPos() const { return _fieldId & 0x4000; } + bool empty() const { return _sz == 0; } + void clear() { _sz = 0; } + bool allocated() const { return isMultiPos(); } + const TermFieldMatchDataPosition * getFixed() const { return reinterpret_cast(_data._position); } + TermFieldMatchDataPosition * getFixed() { return reinterpret_cast(_data._position); } + const TermFieldMatchDataPosition * getMultiple() const { return _data._positions._positions; } + TermFieldMatchDataPosition * getMultiple() { return _data._positions._positions; } + int32_t getElementWeight() const { return empty() ? 1 : allocated() ? getMultiple()->getElementWeight() : getFixed()->getElementWeight(); } + uint32_t getMaxElementLength() const { return empty() ? 0 : allocated() ? _data._positions._maxElementLength : getFixed()->getElementLen(); } + void appendPositionToAllocatedVector(const TermFieldMatchDataPosition &pos); + void resizePositionVector(size_t sz) __attribute__((noinline)); + + enum { FIELDID_MASK = 0x1fff}; + + uint32_t _docId; + // 3 upper bits used to tell if it is use for RawScore, SinglePos or multiPos. + uint16_t _fieldId; + uint16_t _sz; + Features _data; + + friend class ::MatchDataHeapTest; + +public: + /** + * This gives you access to the underlying positions. + * @return the array of positions. + */ + MutablePositionsIterator getPositions() { return allocated() ? getMultiple() : getFixed(); } + PositionsIterator begin() const { return allocated() ? getMultiple() : getFixed(); } + PositionsIterator end() const { return allocated() ? getMultiple() + _sz : empty() ? getFixed() : getFixed()+1; } + size_t size() const { return _sz; } + size_t capacity() const { return allocated() ? _data._positions._allocated : 1; } + + /** + * Create empty object. To complete object setup, field id must be + * set. + **/ + TermFieldMatchData(); + + TermFieldMatchData(const TermFieldMatchData & rhs); + + ~TermFieldMatchData(); + TermFieldMatchData & operator = (const TermFieldMatchData & rhs); + + /** + * Swaps the content of this object with the content of the given + * term field match data object. + * + * @param rhs The object to swap with. + **/ + void swap(TermFieldMatchData &rhs); + + /** + * Set which field this object has match information for. + * + * @return this object (for chaining) + * @param fieldId field id + **/ + TermFieldMatchData &setFieldId(uint32_t fieldId) { + if (fieldId == IllegalFieldId) { + fieldId = FIELDID_MASK; + } else { + assert(fieldId < FIELDID_MASK); + } + _fieldId = (_fieldId & ~FIELDID_MASK) | fieldId; + return *this; + } + + /** + * Obtain the field id + * + * @return field id + **/ + uint32_t getFieldId() const { + return __builtin_expect((_fieldId & FIELDID_MASK) != FIELDID_MASK, true) ? (_fieldId & FIELDID_MASK) : IllegalFieldId; + } + + /** + * Reset the content of this match data and prepare it for use + * with the given docid. + * + * @return this object (for chaining) + * @param docId id of the document we are generating match information for + **/ + TermFieldMatchData &reset(uint32_t docId) { + _docId = docId; + _sz = 0; + if (isRawScore()) { + _data._rawScore = 0.0; + } else if (isMultiPos()) { + _data._positions._maxElementLength = 0; + } + return *this; + } + + /** + * Reset only the docid of this match data and prepare it for use + * with the given docid. Assume all other are not touched. + * + * @return this object (for chaining) + * @param docId id of the document we are generating match information for + **/ + TermFieldMatchData &resetOnlyDocId(uint32_t docId) { + _docId = docId; + return *this; + } + + /** + * Indicate a match for a given docid and inject a raw score + * instead of detailed match data. The raw score can be picked up + * in the ranking framework by using the rawScore feature for the + * appropriate field. + * + * @return this object (for chaining) + * @param docId id of the document we have matched + * @param score a raw score for the matched document + **/ + TermFieldMatchData &setRawScore(uint32_t docId, feature_t score) { + resetOnlyDocId(docId); + enableRawScore(); + _data._rawScore = score; + return *this; + } + TermFieldMatchData & enableRawScore() { + _fieldId = _fieldId | 0x8000; + return *this; + } + + /** + * Obtain the raw score for this match data. + * + * @return raw score + **/ + feature_t getRawScore() const { + return __builtin_expect(isRawScore(), true) ? _data._rawScore : 0.0; + } + + void setSubqueries(uint32_t docId, uint64_t subqueries) { + resetOnlyDocId(docId); + _data._subqueries = subqueries; + } + + uint64_t getSubqueries() const { + if (!empty() || isRawScore()) { + return 0; + } + return _data._subqueries; + } + + /** + * Obtain the document id for which the data contained in this object is valid. + * + * @return document id + **/ + uint32_t getDocId() const { + return _docId; + } + + /** + * Obtain the weight of the first occurrence in this field, or 1 + * if no occurrences are present. This function is intended for + * attribute matching calculations. + * + * @return weight + **/ + int32_t getWeight() const { + if (__builtin_expect(_sz == 0, false)) { + return 1; + } + return __builtin_expect(allocated(), false) ? getMultiple()->getElementWeight() : getFixed()->getElementWeight(); + } + + /** + * Add occurrence information to this match data for the current + * document. + * + * @return this object (for chaining) + * @param pos low-level occurrence information + **/ + TermFieldMatchData &appendPosition(const TermFieldMatchDataPosition &pos) { + if (isMultiPos() || (_sz > 0)) { + appendPositionToAllocatedVector(pos); + } else { + _sz = 1; + new (_data._position) TermFieldMatchDataPosition(pos); + } + return *this; + } + + /** + * Obtain an object that gives access to the low-level occurrence + * information stored in this object. + * + * @return field position iterator + **/ + FieldPositionsIterator getIterator() const { + const uint32_t len(getMaxElementLength()); + return FieldPositionsIterator(len != 0 ? len : FieldPositionsIterator::UNKNOWN_LENGTH, begin(), end()); + } + + /** + * This indicates if this instance is actually used for ranking or not. + * @return true if it is not needed. + */ + bool isNotNeeded() const { return _fieldId & 0x2000; } + + /** + * Tag that this instance is not really used for ranking. + */ + void tagAsNotNeeded() { + _fieldId = _fieldId | 0x2000; + } + + /** + * Special docId value indicating that no data has been saved yet. + * This should match (or be above) endId() in search::queryeval::SearchIterator. + * + * @return constant + **/ + static uint32_t invalidId() { return 0xdeadbeefU; } +} __attribute__((packed)); + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h new file mode 100644 index 00000000000..874870f5afa --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataarray.h @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace fef { + +class TermFieldMatchData; + +/** + * Array of pointers to TermFieldMatchData instances. + * Use this class to pass an ordered set of references + * into e.g. iterators searching in multiple fields at once. + * The array must either be totally empty, or contain + * the appropriate number of valid references. + **/ +class TermFieldMatchDataArray +{ +private: + std::vector _array; + +public: + /** + * Reserve space for a number of elements in order to reduce number of allocations. + * @param size Number of elements to reserve space for. + */ + void reserve(size_t sz) { + _array.reserve(sz); + } + /** + * add a pointer to the array. + * + * @return this object for chaining + * @param value the pointer to be added + **/ + TermFieldMatchDataArray &add(TermFieldMatchData *value) { + assert(value != 0); + _array.push_back(value); + return *this; + } + + /** + * check that the array contains valid references. + * + * @return true if array not empty + **/ + bool valid() const { return !_array.empty(); } + + /** + * size of the array. + * + * @return the size + **/ + size_t size() const { return _array.size(); } + + /** + * get a pointer from the array. + * + * @return the pointer + * @param i index of the pointer + **/ + TermFieldMatchData *operator[] (size_t i) const { + return _array[i]; + } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp new file mode 100644 index 00000000000..f76c785b616 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.termfieldmatchdataposition"); +#include "termfieldmatchdataposition.h" + +namespace search { +namespace fef { + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h new file mode 100644 index 00000000000..b7f82819bfb --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termfieldmatchdataposition.h @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { + +class TermFieldMatchDataPositionKey +{ +private: + uint32_t _elementId; + uint32_t _position; + +public: + TermFieldMatchDataPositionKey() + : _elementId(0u), + _position(0u) + { + } + + TermFieldMatchDataPositionKey(uint32_t elementId, + uint32_t position) + : _elementId(elementId), + _position(position) + { + } + + uint32_t getElementId() const { return _elementId; } + uint32_t getPosition() const { return _position; } + + void setElementId(uint32_t elementId) { _elementId = elementId; } + void setPosition(uint32_t position) { _position = position; } + + bool operator<(const TermFieldMatchDataPositionKey &rhs) const { + if (_elementId != rhs._elementId) { + return _elementId < rhs._elementId; + } + return _position < rhs._position; + } + + bool operator==(const TermFieldMatchDataPositionKey &rhs) const { + return ((_elementId == rhs._elementId) && + (_position == rhs._position)); + } +}; + +class TermFieldMatchDataPosition : public TermFieldMatchDataPositionKey +{ +private: + int32_t _elementWeight; + uint32_t _elementLen; + uint32_t _matchLength; + double _matchExactness; // or possibly _matchWeight + +public: + TermFieldMatchDataPosition() + : TermFieldMatchDataPositionKey(), + _elementWeight(1), + _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH), + _matchLength(1), + _matchExactness(1.0) + { + } + + const TermFieldMatchDataPositionKey &key() const { + return *this; + } + + /** + * A comparator for sorting in natural (ascending) order but if + * positions are equal, sort best exactness first. + */ + static bool compareWithExactness(const TermFieldMatchDataPosition &a, + const TermFieldMatchDataPosition &b) + { + if (a < b) return true; + if (b < a) return false; + return a._matchExactness >= b._matchExactness; + } + + TermFieldMatchDataPosition(uint32_t elementId, + uint32_t position, + int32_t elementWeight, + uint32_t elementLen) + : TermFieldMatchDataPositionKey(elementId, position), + _elementWeight(elementWeight), + _elementLen(elementLen), + _matchLength(1), + _matchExactness(1.0) + { + } + + int32_t getElementWeight() const { return _elementWeight; } + uint32_t getElementLen() const { return _elementLen; } + double getMatchExactness() const { return _matchExactness; } + + void setElementWeight(int32_t elementWeight) { + _elementWeight = elementWeight; + } + void setElementLen(uint32_t elementLen) { + _elementLen = elementLen; + } + TermFieldMatchDataPosition& setMatchExactness(double exactness) { + _matchExactness = exactness; + return *this; + } +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp new file mode 100644 index 00000000000..c82d9e1e030 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.cpp @@ -0,0 +1,77 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.termmatchdatamerger"); +#include "termmatchdatamerger.h" + +#include + +namespace search { +namespace fef { + +TermMatchDataMerger::TermMatchDataMerger(const Inputs &allinputs, + const TermFieldMatchDataArray &outputs) + : _inputs(), + _output(outputs), + _scratch() +{ + for (size_t i = 0; i < _output.size(); ++i) { + Inputs inputs_for_i; + uint32_t fieldId = _output[i]->getFieldId(); + + for (size_t j = 0; j < allinputs.size(); ++j) { + if (allinputs[j].matchData->getFieldId() == fieldId) { + inputs_for_i.push_back(allinputs[j]); + } + } + _inputs.push_back(inputs_for_i); + } +} + +void +TermMatchDataMerger::merge(uint32_t docid) +{ + for (size_t i = 0; i < _output.size(); ++i) { + merge(docid, _inputs[i], *(_output[i])); + } +} + +void +TermMatchDataMerger::merge(uint32_t docid, + const Inputs &in, + TermFieldMatchData &out) +{ + _scratch.clear(); + bool wasMatch = false; + for (size_t i = 0; i < in.size(); ++i) { + const TermFieldMatchData *md = in[i].matchData; + if (md->getDocId() == docid) { + for (const TermFieldMatchDataPosition &iter : *md) { + double exactness = in[i].exactness * iter.getMatchExactness(); + _scratch.push_back(iter); + _scratch.back().setMatchExactness(exactness); + } + wasMatch = true; + } + } + if (wasMatch) { + out.reset(docid); + if (_scratch.size() > 0) { + std::sort(_scratch.begin(), _scratch.end(), + TermFieldMatchDataPosition::compareWithExactness); + TermFieldMatchDataPosition prev = _scratch[0]; + for (size_t i = 1; i < _scratch.size(); ++i) { + const TermFieldMatchDataPosition &curr = _scratch[i]; + if (prev.key() < curr.key()) { + out.appendPosition(prev); + prev = curr; + } + } + out.appendPosition(prev); + } + } +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h new file mode 100644 index 00000000000..6c1ae717a43 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/termmatchdatamerger.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "termfieldmatchdataarray.h" +#include "termfieldmatchdata.h" +#include + +namespace search { +namespace fef { + +class TermMatchDataMerger +{ +public: + struct Input { + const TermFieldMatchData *matchData; + double exactness; + + Input() : matchData(NULL), exactness(0.0) {} + Input(const TermFieldMatchData *arg_matchData, double arg_exactness) + : matchData(arg_matchData), exactness(arg_exactness) + {} + }; + typedef std::vector Inputs; +private: + std::vector _inputs; + const TermFieldMatchDataArray _output; + std::vector _scratch; + + TermMatchDataMerger(const TermMatchDataMerger &); + TermMatchDataMerger &operator=(const TermMatchDataMerger &); + + void merge(uint32_t docid, + const Inputs &in, + TermFieldMatchData &out); +public: + + TermMatchDataMerger(const Inputs &allinputs, + const TermFieldMatchDataArray &outputs); + + void merge(uint32_t docid); +}; + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/.gitignore b/searchlib/src/vespa/searchlib/fef/test/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt new file mode 100644 index 00000000000..dd9ea8828c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_fef_test OBJECT + SOURCES + dummy_dependency_handler.cpp + featuretest.cpp + ftlib.cpp + indexenvironment.cpp + indexenvironmentbuilder.cpp + matchdatabuilder.cpp + queryenvironment.cpp + queryenvironmentbuilder.cpp + rankresult.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp new file mode 100644 index 00000000000..7515a6338e0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.cpp @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + + +#include +#include "dummy_dependency_handler.h" + +namespace search { +namespace fef { +namespace test { + +DummyDependencyHandler::DummyDependencyHandler(Blueprint &blueprint_in) + : blueprint(blueprint_in), + object_type_map(), + accept_type_mismatch(false), + input(), + accept_input(), + output(), + output_type() +{ + blueprint.attach_dependency_handler(*this); +} + +DummyDependencyHandler::~DummyDependencyHandler() +{ + blueprint.detach_dependency_handler(); +} + +void +DummyDependencyHandler::define_object_input(const vespalib::string &name, const vespalib::eval::ValueType &type) +{ + object_type_map.emplace(name, FeatureType::object(type)); +} + +const FeatureType & +DummyDependencyHandler::resolve_input(const vespalib::string &feature_name, Blueprint::AcceptInput accept_type) +{ + input.push_back(feature_name); + accept_input.push_back(accept_type); + auto pos = object_type_map.find(feature_name); + if (pos == object_type_map.end()) { + if (accept_type == Blueprint::AcceptInput::OBJECT) { + accept_type_mismatch = true; + } + return FeatureType::number(); + } + if (accept_type == Blueprint::AcceptInput::NUMBER) { + accept_type_mismatch = true; + } + return pos->second; +} + +void DummyDependencyHandler::define_output(const vespalib::string &output_name, const FeatureType &type) +{ + output.push_back(output_name); + output_type.push_back(type); +} + +} // namespace search::fef::test +} // namespace search::fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h new file mode 100644 index 00000000000..fa1a21d42ad --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/dummy_dependency_handler.h @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +/** + * A very simple blueprint dependency resolver that will keep track of + * inputs and outputs for a single blueprint. + **/ +struct DummyDependencyHandler : public Blueprint::DependencyHandler +{ + Blueprint &blueprint; + std::map object_type_map; + bool accept_type_mismatch; + std::vector input; + std::vector accept_input; + std::vector output; + std::vector output_type; + + explicit DummyDependencyHandler(Blueprint &blueprint_in); + ~DummyDependencyHandler(); + void define_object_input(const vespalib::string &name, const vespalib::eval::ValueType &type); + const FeatureType &resolve_input(const vespalib::string &feature_name, Blueprint::AcceptInput accept_type) override; + void define_output(const vespalib::string &output_name, const FeatureType &type) override; +}; + +} // namespace search::fef::test +} // namespace search::fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp new file mode 100644 index 00000000000..dd8dc0699f5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".fef.featuretest"); + +#include +#include "featuretest.h" +#include + +namespace search { +namespace fef { +namespace test { + +FeatureTest::FeatureTest(BlueprintFactory &factory, + const IndexEnvironment &indexEnv, + QueryEnvironment &queryEnv, + MatchDataLayout &layout, + const std::vector &features, + const Properties &overrides) : + _factory(factory), + _indexEnv(indexEnv), + _queryEnv(queryEnv), + _features(features), + _layout(layout), + _overrides(overrides), + _resolver(new BlueprintResolver(factory, indexEnv)), + _rankProgram(new RankProgram(_resolver)), + _doneSetup(false) +{ + // empty +} + +FeatureTest::FeatureTest(BlueprintFactory &factory, + const IndexEnvironment &indexEnv, + QueryEnvironment &queryEnv, + MatchDataLayout &layout, + const vespalib::string &feature, + const Properties &overrides) : + _factory(factory), + _indexEnv(indexEnv), + _queryEnv(queryEnv), + _features(), + _layout(layout), + _overrides(overrides), + _resolver(new BlueprintResolver(factory, indexEnv)), + _rankProgram(new RankProgram(_resolver)), + _doneSetup(false) +{ + _features.push_back(feature); +} + +bool +FeatureTest::setup() +{ + if (_doneSetup) { + LOG(error, "Setup already done."); + return false; + } + + // clear state so that setup can be called multiple times. + clear(); + + for (uint32_t i = 0; i < _features.size(); ++i) { + _resolver->addSeed(_features[i]); + } + + if (!_resolver->compile()) { + LOG(error, "Failed to compile blueprint resolver."); + return false; + } + + _rankProgram->setup(_layout, _queryEnv, _overrides); + _doneSetup = true; + return true; +} + +MatchDataBuilder::UP +FeatureTest::createMatchDataBuilder() +{ + if (_doneSetup) { + return MatchDataBuilder::UP(new MatchDataBuilder(_queryEnv, _rankProgram->match_data())); + } + LOG(warning, "Match data not initialized."); + return MatchDataBuilder::UP(); +} + +bool +FeatureTest::execute(const RankResult &expected, uint32_t docId) +{ + RankResult result; + if (!executeOnly(result, docId)) { + return false; + } + + if (!result.includes(expected)) { + std::stringstream exp, act; + exp << "Expected: " << expected; + act << "Actual : " << result; + + LOG(error, "Expected result not present in actual result after execution:"); + LOG(error, "%s", exp.str().c_str()); + LOG(error, "%s", act.str().c_str()); + + return false; + } + return true; +} + +bool +FeatureTest::execute(feature_t expected, double epsilon, uint32_t docId) +{ + return execute(RankResult().setEpsilon(epsilon).addScore(_features.front(), expected), docId); +} + +bool +FeatureTest::executeOnly(uint32_t docId) +{ + if (!_doneSetup) { + LOG(error, "Setup not done."); + return false; + } + // Note: match data object is reset as part of run + _rankProgram->run(docId); + + return true; +} + +bool +FeatureTest::executeOnly(RankResult & result, uint32_t docId) +{ + if (!executeOnly(docId)) { + return false; + } + + std::map all = Utils::getAllFeatures(*_rankProgram); + for (auto itr = all.begin(); itr != all.end(); ++itr) { + result.addScore(itr->first, itr->second); + } + + return true; +} + +const vespalib::eval::Value::CREF * +FeatureTest::resolveObjectFeature() +{ + return Utils::getObjectFeature(*_rankProgram); +} + +void +FeatureTest::clear() +{ + _resolver = BlueprintResolver::SP(new BlueprintResolver(_factory, _indexEnv)); + _rankProgram.reset(new RankProgram(_resolver)); + _doneSetup = false; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/featuretest.h b/searchlib/src/vespa/searchlib/fef/test/featuretest.h new file mode 100644 index 00000000000..ead33f35f88 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/featuretest.h @@ -0,0 +1,137 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +/** + * This class wraps everything necessary to simulate a feature execution environment. + */ +class FeatureTest { +public: + /** + * Constructs a new feature test. + * + * @param factory The blueprint factory that holds all registered features. + * @param indexEnv The index environment to use. + * @param queryEnv The query environment to use. + * @param layout The match data layout to use. + * @param feature The feature strings to run. + * @param overrides The set of feature overrides. + */ + FeatureTest(BlueprintFactory &factory, + const IndexEnvironment &indexEnv, + QueryEnvironment &queryEnv, + MatchDataLayout &layout, + const std::vector &features, + const Properties &overrides); + + /** + * Constructs a new feature test. + * + * @param factory The blueprint factory that holds all registered features. + * @param indexEnv The index environment to use. + * @param queryEnv The query environment to use. + * @param layout The match data layout to use. + * @param feature The feature string to run. + * @param overrides The set of feature overrides. + */ + FeatureTest(BlueprintFactory &factory, + const IndexEnvironment &indexEnv, + QueryEnvironment &queryEnv, + MatchDataLayout &layout, + const vespalib::string &feature, + const Properties &overrides); + /** + * Necessary method to setup the internal feature execution manager. A test will typically assert on the return of + * this method, since no test can run if setup failed. + * + * @return Whether or not setup was ok. + */ + bool setup(); + + /** + * Creates and returns a match data builder object. This will clear whatever content is currently contained in this + * runner. The builder offers a simple API to build a match data object. + * + * @return A builder object. + */ + MatchDataBuilder::UP createMatchDataBuilder(); + + /** + * Executes the content of this runner, comparing the result to the given result set. + * + * @param expected The expected output. + * @param docId The document id to set on the match data object before running executors. + * @return Whether or not the output matched the expected. + */ + bool execute(const RankResult &expected, uint32_t docId = 1); + + /** + * Convenience method to assert the final output of a feature string. + * + * @param expected The expected output. + * @param epsilon The allowed slack for comparing rank results. + * @param docId The document id to set on the match data object before running executors. + * @return Whether or not the output matched the expected. + */ + bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1); + + /** + * Executes the content of this runner only. + * + * @param docId The document id to set on the match data object before running executors. + * @return Whether the executors were executed. + */ + bool executeOnly(uint32_t docId = 1); + + /** + * Executes the content of this runner only and stores the result in the given rank result. + * + * @param result The rank result to store the rank scores. + * @param docId The document id to set on the match data object before running executors. + * @return Whether the executors were executed. + */ + bool executeOnly(RankResult & result, uint32_t docId = 1); + + /** + * Resolve the only object feature that is present in the match data of the underlying + * rank program. + */ + const vespalib::eval::Value::CREF *resolveObjectFeature(); + +private: + BlueprintFactory &_factory; + const IndexEnvironment &_indexEnv; + QueryEnvironment &_queryEnv; + std::vector _features; + MatchDataLayout &_layout; + const Properties &_overrides; + BlueprintResolver::SP _resolver; + RankProgram::UP _rankProgram; + bool _doneSetup; + + void clear(); +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp new file mode 100644 index 00000000000..3fd85d04241 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp @@ -0,0 +1,399 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".ftlib"); + +#include +#include +#include +#include +#include +#include "ftlib.h" +#include "dummy_dependency_handler.h" + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +FtIndexEnvironment::FtIndexEnvironment() : + search::fef::test::IndexEnvironment(), + _builder(*this) +{ + // empty +} + +FtQueryEnvironment::FtQueryEnvironment(search::fef::test::IndexEnvironment &env) + : search::fef::test::QueryEnvironment(&env), + _layout(), + _builder(*this, _layout) +{ + // empty +} + +FtDumpFeatureVisitor::FtDumpFeatureVisitor() : + _features() +{ +} + +FtFeatureTest::FtFeatureTest(search::fef::BlueprintFactory &factory, const vespalib::string &feature) : + _indexEnv(), + _queryEnv(_indexEnv), + _overrides(), + _test(factory, _indexEnv, _queryEnv, _queryEnv.getLayout(), feature, _overrides) +{ + // empty +} + +FtFeatureTest::FtFeatureTest(search::fef::BlueprintFactory &factory, const std::vector &features) + : _indexEnv(), + _queryEnv(_indexEnv), + _overrides(), + _test(factory, _indexEnv, _queryEnv, _queryEnv.getLayout(), features, _overrides) +{ +} + + +//--------------------------------------------------------------------------------------------------------------------- +// FtUtil +//--------------------------------------------------------------------------------------------------------------------- +std::vector +FtUtil::tokenize(const vespalib::string & str, const vespalib::string & separator) +{ + typedef boost::tokenizer > Tokenizer; + typedef boost::char_separator Separator; + + std::vector retval; + if (separator != vespalib::string("")) { + Tokenizer tnz(str, Separator(separator.c_str())); + for (Tokenizer::const_iterator itr = tnz.begin(); itr != tnz.end(); ++itr) { + retval.push_back(*itr); + } + } else { + for (uint32_t i = 0; i < str.size(); ++i) { + retval.push_back(vespalib::string("" + str[i])); + } + } + return retval; +} + + +FtQuery +FtUtil::toQuery(const vespalib::string & query, const vespalib::string & separator) +{ + std::vector prepQuery = FtUtil::tokenize(query, separator); + FtQuery retval(prepQuery.size()); + for (uint32_t i = 0; i < prepQuery.size(); ++i) { + std::vector significanceSplit = FtUtil::tokenize(prepQuery[i], vespalib::string("%")); + std::vector weightSplit = FtUtil::tokenize(significanceSplit[0], vespalib::string("!")); + std::vector connexitySplit = FtUtil::tokenize(weightSplit[0], vespalib::string(":")); + if (connexitySplit.size() > 1) { + retval[i].term = connexitySplit[1]; + retval[i].connexity = search::features::util::strToNum(connexitySplit[0]); + } else { + retval[i].term = connexitySplit[0]; + } + if (significanceSplit.size() > 1) { + retval[i].significance = search::features::util::strToNum(significanceSplit[1]); + } + if (weightSplit.size() > 1) { + retval[i].termWeight.setPercent(search::features::util::strToNum(weightSplit[1])); + } + } + return retval; +} + +RankResult +FtUtil::toRankResult(const vespalib::string & baseName, const vespalib::string & result, const vespalib::string & separator) +{ + RankResult retval; + std::vector prepResult = FtUtil::tokenize(result, separator); + for (uint32_t i = 0; i < prepResult.size(); ++i) { + std::vector rs = FtUtil::tokenize(prepResult[i], ":"); + vespalib::string name = rs[0]; + vespalib::string value = rs[1]; + retval.addScore(baseName + "." + name, search::features::util::strToNum(value)); + } + return retval; +} + + +//--------------------------------------------------------------------------------------------------------------------- +// FtTestApp +//--------------------------------------------------------------------------------------------------------------------- +void +FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms) +{ + search::fef::test::IndexEnvironment ie; + FT_SETUP_FAIL(prototype, ie, params); +} + +void +FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms) +{ + FT_LOG(prototype, env, params); + search::fef::Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + EXPECT_TRUE(!bp->setup(env, params)); +} + +void +FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, + const StringList &expectedIn, const StringList &expectedOut) +{ + search::fef::test::IndexEnvironment ie; + FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut); +} + +void +FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut) +{ + FT_LOG(prototype, env, params); + search::fef::Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + ASSERT_TRUE(bp->setup(env, params)); + FT_EQUAL(expectedIn, deps.input, "In, "); + FT_EQUAL(expectedOut, deps.output, "Out,"); +} + +void +FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName) +{ + StringList empty; + FT_DUMP(factory, baseName, empty); +} + +void +FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env) +{ + StringList empty; + FT_DUMP(factory, baseName, env, empty); +} + +void +FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + const StringList &expected) +{ + search::fef::test::IndexEnvironment ie; + FT_DUMP(factory, baseName, ie, expected); +} + +void +FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env, + const StringList &expected) +{ + FtDumpFeatureVisitor dfv; + search::fef::Blueprint::SP bp = factory.createBlueprint(baseName); + if (bp.get() == NULL) { + LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str()); + ASSERT_TRUE(bp.get() != NULL); + } + bp->visitDumpFeatures(env, dfv); + FT_EQUAL(expected, dfv.features(), "Dump"); +} + +void +FtTestApp::FT_EQUAL(const std::vector &expected, const std::vector &actual, + const vespalib::string prefix) +{ + FT_LOG(prefix + " expected", expected); + FT_LOG(prefix + " actual ", actual); + EXPECT_EQUAL(expected.size(), actual.size()); + ASSERT_TRUE(expected.size() == actual.size()); + for (uint32_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i], actual[i]); + ASSERT_TRUE(expected[i] == actual[i]); + } +} + +void +FtTestApp::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms) +{ + LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str()); + std::vector arr; + for (std::vector::const_iterator it = env.getFields().begin(); + it != env.getFields().end(); ++it) { + arr.push_back(it->name()); + } + FT_LOG("Environment ", arr); + FT_LOG("Parameters ", params); +} + +void +FtTestApp::FT_LOG(const vespalib::string &prefix, const std::vector &arr) +{ + vespalib::string str = prefix + " = [ "; + for (uint32_t i = 0; i < arr.size(); ++i) { + str.append("'").append(arr[i]).append("'"); + if (i < arr.size() - 1) { + str.append(", "); + } + } + str.append(" ]"); + LOG(info, "%s", str.c_str()); +} + +void +FtTestApp::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index, + uint32_t docId) +{ + LOG(info, "Setup test for query '%s'.", query.c_str()); + + // Add all query terms. + FtQueryEnvironment &queryEnv = test.getQueryEnv(); + for (uint32_t i = 0; i < query.size(); ++i) { + queryEnv.getBuilder().addAllFields(); + } + ASSERT_TRUE(test.setup()); + + // Add all occurences. + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + for (StringMap::const_iterator it = index.begin(); + it != index.end(); ++it) { + ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size())); + for (uint32_t i = 0; i < it->second.size(); ++i) { + size_t pos = query.find_first_of(it->second[i]); + if (pos != vespalib::string::npos) { + LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i); + ASSERT_TRUE(mdb->addOccurence(it->first, pos, i)); + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestApp::FT_SETUP(FtFeatureTest & test, const std::vector & query, const StringVectorMap & index, + uint32_t docId) +{ + setupQueryEnv(test.getQueryEnv(), query); + ASSERT_TRUE(test.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + + // Add all occurences. + for (StringVectorMap::const_iterator itr = index.begin(); itr != index.end(); ++itr) { + ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size())); + for (uint32_t i = 0; i < itr->second.size(); ++i) { + FtQuery::const_iterator fitr = query.begin(); + for (;;) { + fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i])); + if (fitr != query.end()) { + uint32_t termId = fitr - query.begin(); + LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i); + ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i)); + ++fitr; + } else { + break; + } + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestApp::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId) +{ + setupQueryEnv(test.getQueryEnv(), query); + ASSERT_TRUE(test.setup()); + search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder(); + + // Add all occurences. + for (FtIndex::FieldMap::const_iterator itr = index.index.begin(); itr != index.index.end(); ++itr) { + const FtIndex::Field &field = itr->second; + for (size_t e = 0; e < field.size(); ++e) { + const FtIndex::Element &element = field[e]; + ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size())); + for (size_t t = 0; t < element.tokens.size(); ++t) { + const vespalib::string &token = element.tokens[t]; + for (size_t q = 0; q < query.size(); ++q) { + if (query[q].term == token) { + ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e)); + } + } + } + } + } + ASSERT_TRUE(mdb->apply(docId)); +} + +void +FtTestApp::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query) +{ + // Add all query terms. + for (uint32_t i = 0; i < query.size(); ++i) { + queryEnv.getBuilder().addAllFields(); + queryEnv.getTerms()[i].setPhraseLength(1); + queryEnv.getTerms()[i].setUniqueId(i); + queryEnv.getTerms()[i].setWeight(query[i].termWeight); + if (i > 0) { + vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i); + vespalib::string to = vespalib::make_string("%u", i - 1); + vespalib::string connexity = vespalib::make_string("%f", query[i].connexity); + queryEnv.getProperties().add(from, to); + queryEnv.getProperties().add(from, connexity); + } + vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i); + vespalib::string significance = vespalib::make_string("%f", query[i].significance); + queryEnv.getProperties().add(term, significance); + LOG(debug, "Add term node: '%s'", query[i].term.c_str()); + } +} + +void +FtTestApp::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName, + const vespalib::string & query, const vespalib::string & field, + const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance, + uint32_t docId) +{ + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, indexName); + + if (params != NULL) { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit())); + p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations())); + p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences())); + p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance())); + p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance())); + p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance())); + p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance())); + p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance())); + p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance())); + for (std::vector::const_iterator it = params->getProximityTable().begin(); + it != params->getProximityTable().end(); ++it) + { + p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", *it)); + } + } + + if (totalTermWeight > 0) { + ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight", + vespalib::make_string("%u", totalTermWeight)); + } + + if (totalSignificance > 0.0f) { + ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance", + vespalib::make_string("%f", totalSignificance)); + } + + std::map > index; + index[indexName] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, docId); +} + + +RankResult +FtTestApp::toRankResult(const vespalib::string & baseName, + const vespalib::string & result, + const vespalib::string & separator) +{ + return FtUtil::toRankResult(baseName, result, separator); +} + + + diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.h b/searchlib/src/vespa/searchlib/fef/test/ftlib.h new file mode 100644 index 00000000000..dff9764b03b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.h @@ -0,0 +1,238 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using search::feature_t; + +//--------------------------------------------------------------------------------------------------------------------- +// StringList +//--------------------------------------------------------------------------------------------------------------------- +class StringList : public std::vector { +public: + StringList &add(const vespalib::stringref &str) { push_back(str); return *this; } + StringList &clear() { std::vector::clear(); return *this; } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// StringMap +//--------------------------------------------------------------------------------------------------------------------- +class StringMap : public std::map { +public: + StringMap &add(const vespalib::string &key, const vespalib::string &val) { + iterator it = insert(std::make_pair(key, val)).first; + it->second = val; + return *this; + } + StringMap &clear() { + std::map::clear(); + return *this; + } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// StringSet +//--------------------------------------------------------------------------------------------------------------------- +class StringSet : public std::set { +public: + StringSet & add(const vespalib::string & str) { insert(str); return *this; } + StringSet & clear() { std::set::clear(); return *this; } +}; + + +//--------------------------------------------------------------------------------------------------------------------- +// FtIndexEnvironment +//--------------------------------------------------------------------------------------------------------------------- +class FtIndexEnvironment : public search::fef::test::IndexEnvironment { +public: + FtIndexEnvironment(); + + search::fef::test::IndexEnvironmentBuilder &getBuilder() { return _builder; } + +private: + search::fef::test::IndexEnvironmentBuilder _builder; +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtQueryEnvironment +//--------------------------------------------------------------------------------------------------------------------- +class FtQueryEnvironment : public search::fef::test::QueryEnvironment { +public: + FtQueryEnvironment(search::fef::test::IndexEnvironment &indexEnv); + + search::fef::test::QueryEnvironmentBuilder &getBuilder() { return _builder; } + search::fef::MatchDataLayout &getLayout() { return _layout; } + +private: + search::fef::MatchDataLayout _layout; + search::fef::test::QueryEnvironmentBuilder _builder; +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtDumpFeatureVisitor +//--------------------------------------------------------------------------------------------------------------------- +class FtDumpFeatureVisitor : public search::fef::IDumpFeatureVisitor +{ +private: + std::vector _features; + +public: + FtDumpFeatureVisitor(); + virtual void visitDumpFeature(const vespalib::string & name) { _features.push_back(name); } + const std::vector & features() const { return _features; } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtTestRunner +//--------------------------------------------------------------------------------------------------------------------- +class FtFeatureTest { +public: + FtFeatureTest(search::fef::BlueprintFactory &factory, const vespalib::string &feature); + FtFeatureTest(search::fef::BlueprintFactory &factory, const std::vector &features); + + bool setup() { return _test.setup(); } + bool execute(feature_t expected, double epsilon = 0, uint32_t docId = 1) { return _test.execute(expected, epsilon, docId); } + bool execute(const search::fef::test::RankResult &expected, uint32_t docId = 1) { return _test.execute(expected, docId); } + bool executeOnly(uint32_t docId = 1) { return _test.executeOnly(docId); } + bool executeOnly(search::fef::test::RankResult &result, uint32_t docId = 1) { return _test.executeOnly(result, docId); } + search::fef::test::MatchDataBuilder::UP createMatchDataBuilder() { return _test.createMatchDataBuilder(); } + const vespalib::eval::Value::CREF *resolveObjectFeature() { return _test.resolveObjectFeature(); } + + FtIndexEnvironment &getIndexEnv() { return _indexEnv; } + FtQueryEnvironment &getQueryEnv() { return _queryEnv; } + search::fef::Properties &getOverrides() { return _overrides; } + +private: + FtIndexEnvironment _indexEnv; + FtQueryEnvironment _queryEnv; + search::fef::Properties _overrides; + search::fef::test::FeatureTest _test; +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtQueryTerm +//--------------------------------------------------------------------------------------------------------------------- +struct FtQueryTerm { + FtQueryTerm(const vespalib::string t, uint32_t tw = 100, feature_t co = 0.1f, feature_t si = 0.1f) : + term(t), termWeight(tw), connexity(co), significance(si) {} + FtQueryTerm() : term(), termWeight(100), connexity(0.1f), significance(0.1f) {} + vespalib::string term; + search::query::Weight termWeight; + feature_t connexity; + feature_t significance; + bool operator<(const FtQueryTerm & rhs) const { + return term < rhs.term; + } + bool operator==(const FtQueryTerm & rhs) const { + return term == rhs.term; + } +}; + +typedef std::vector FtQuery; +typedef std::map > StringVectorMap; + +//--------------------------------------------------------------------------------------------------------------------- +// FtUtil +//--------------------------------------------------------------------------------------------------------------------- +class FtUtil { +public: + static std::vector tokenize(const vespalib::string & str, const vespalib::string & separator = " "); + static FtQuery toQuery(const vespalib::string & query, const vespalib::string & separator = " "); + static search::fef::test::RankResult toRankResult(const vespalib::string & baseName, + const vespalib::string & result, + const vespalib::string & separator = " "); +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtIndex +//--------------------------------------------------------------------------------------------------------------------- +struct FtIndex { + struct Element { + typedef std::vector Tokens; + int32_t weight; + Tokens tokens; + Element(int32_t w, const Tokens &t) + : weight(w), tokens(t) {} + }; + typedef std::vector Field; + typedef std::map FieldMap; + FieldMap index; // raw content of all fields + vespalib::string cursor; // last referenced field + FtIndex() : index(), cursor() {} + FtIndex &field(const vespalib::string &name) { + cursor = name; + index[name]; + return *this; + } + FtIndex &element(const vespalib::string &content, int32_t weight = 1) { + assert(!cursor.empty()); + index[cursor].push_back(Element(weight, FtUtil::tokenize(content, " "))); + return *this; + } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// FtTestApp +//--------------------------------------------------------------------------------------------------------------------- +struct FtTestApp : public vespalib::TestApp { + typedef vespalib::string string; + static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList ¶ms); + static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms); + static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList ¶ms, + const StringList &expectedIn, const StringList &expectedOut); + static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, + const StringList ¶ms, const StringList &expectedIn, const StringList &expectedOut); + + static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName); + static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env); + static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + const StringList &expected); + static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName, + search::fef::test::IndexEnvironment &env, + const StringList &expected); + + static void FT_EQUAL(const std::vector &expected, const std::vector &actual, + const vespalib::string prefix = ""); + + static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList ¶ms); + static void FT_LOG(const vespalib::string &prefix, const std::vector &arr); + + + static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId); + static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId); + + static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId); + + static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query); + static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName, + const vespalib::string & query, const vespalib::string & field, + const search::features::fieldmatch::Params * params, + uint32_t totalTermWeight, feature_t totalSignificance, + uint32_t docId); + + static search::fef::test::RankResult toRankResult(const vespalib::string & baseName, + const vespalib::string & result, + const vespalib::string & separator = " "); + + template + static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) { + search::fef::Blueprint::UP bp = prototype.createInstance(); + if (!EXPECT_TRUE(dynamic_cast(bp.get()) != NULL)) return false; + if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false; + return true; + } +}; + diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp new file mode 100644 index 00000000000..fa2e2102311 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.cpp @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".fef.indexenvironment"); + +#include +#include +#include "indexenvironment.h" + +namespace search { +namespace fef { +namespace test { + +IndexEnvironment::IndexEnvironment() : + _properties(), + _fields(), + _attrMan(), + _tableMan() +{ +} + +const FieldInfo * +IndexEnvironment::getField(uint32_t id) const +{ + return id < _fields.size() ? &_fields[id] : NULL; +} + +const FieldInfo * +IndexEnvironment::getFieldByName(const string &name) const +{ + for (std::vector::const_iterator it = _fields.begin(); + it != _fields.end(); ++it) { + if (it->name() == name) { + return &(*it); + } + } + return NULL; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h new file mode 100644 index 00000000000..aeb669be158 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironment.h @@ -0,0 +1,83 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +/** + * Implementation of the IIndexEnvironment interface used for testing. + */ +class IndexEnvironment : public IIndexEnvironment +{ +public: + /** + * Constructs a new index environment. + */ + IndexEnvironment(); + + // Inherit doc from IIndexEnvironment. + virtual const Properties &getProperties() const { return _properties; } + + // Inherit doc from IIndexEnvironment. + virtual uint32_t getNumFields() const { return _fields.size(); } + + // Inherit doc from IIndexEnvironment. + virtual const FieldInfo *getField(uint32_t id) const; + + // Inherit doc from IIndexEnvironment. + virtual const FieldInfo *getFieldByName(const string &name) const; + + // Inherit doc from IIndexEnvironment. + virtual const ITableManager &getTableManager() const { return _tableMan; } + + // Inherit doc from IIndexEnvironment. + virtual FeatureMotivation getFeatureMotivation() const override { return UNKNOWN; } + + // Inherit doc from IIndexEnvironment. + virtual void hintFeatureMotivation(FeatureMotivation) const {} + + // Inherit doc from IIndexEnvironment. + virtual void hintFieldAccess(uint32_t) const {} + + // Inherit doc from IIndexEnvironment. + virtual void hintAttributeAccess(const string &) const {} + + /** Returns a reference to the properties map of this. */ + Properties &getProperties() { return _properties; } + + /** Returns a reference to the list of fields of this. */ + std::vector &getFields() { return _fields; } + + /** Returns a const reference to the list of fields of this. */ + const std::vector &getFields() const { return _fields; } + + /** Returns a reference to the attribute manager of this. */ + AttributeManager &getAttributeManager() { return _attrMan; } + + /** Returns a reference to the table manager of this. */ + TableManager &getTableManager() { return _tableMan; } + +private: + IndexEnvironment(const IndexEnvironment &); // hide + IndexEnvironment & operator=(const IndexEnvironment &); // hide + +private: + Properties _properties; + std::vector _fields; + AttributeManager _attrMan; + TableManager _tableMan; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp new file mode 100644 index 00000000000..4682dbfe00f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.cpp @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "indexenvironmentbuilder.h" + +namespace search { +namespace fef { +namespace test { + +IndexEnvironmentBuilder::IndexEnvironmentBuilder(IndexEnvironment &env) : + _env(env) +{ + // empty +} + +IndexEnvironmentBuilder & +IndexEnvironmentBuilder::addField(const FieldType &type, + const CollectionType &coll, + const vespalib::string &name) +{ + uint32_t idx = _env.getFields().size(); + FieldInfo field(type, coll, name, idx); + _env.getFields().push_back(field); + return *this; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h new file mode 100644 index 00000000000..15640eb7bfe --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/indexenvironmentbuilder.h @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "indexenvironment.h" + +namespace search { +namespace fef { +namespace test { + +/** + * This class is used to setup an IndexEnvironment for testing. + */ +class IndexEnvironmentBuilder { +public: + /** + * Constructs a new index environment builder. + * + * @param env The index environment to build to. + */ + IndexEnvironmentBuilder(IndexEnvironment &env); + + /** + * Add a field to the index environment. This is analogous to adding fields to a document. + * + * @param type The type of field to add. + * @param coll collection type + * @param name The name of the field. + */ + IndexEnvironmentBuilder &addField(const FieldType &type, + const CollectionType &coll, + const vespalib::string &name); + + /** Returns a reference to the index environment of this. */ + IndexEnvironment &getIndexEnv() { return _env; } + + /** Returns a const reference to the index environment of this. */ + const IndexEnvironment &getIndexEnv() const { return _env; } + +private: + IndexEnvironmentBuilder(const IndexEnvironmentBuilder &); // hide + IndexEnvironmentBuilder & operator=(const IndexEnvironmentBuilder &); // hide + +private: + IndexEnvironment &_env; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp new file mode 100644 index 00000000000..2324198d302 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".fef.matchdatabuilder"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "matchdatabuilder.h" + +namespace search { +namespace fef { +namespace test { + +MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) : + _queryEnv(queryEnv), + _data(data), + _index(), + _match() +{ + // reset all match data objects and set docId to 'endId' (aka -1) + for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) { + _data.resolveTermField(handle)->reset(TermFieldMatchData::invalidId()); + } + _data.setDocId(TermFieldMatchData::invalidId()); +} + +TermFieldMatchData * +MatchDataBuilder::getTermFieldMatchData(uint32_t termId, uint32_t fieldId) +{ + const ITermData *term = _queryEnv.getTerm(termId); + if (term == NULL) { + return NULL; + } + const ITermFieldData *field = term->lookupField(fieldId); + if (field == NULL || field->getHandle() >= _data.getNumTermFields()) { + return NULL; + } + return _data.resolveTermField(field->getHandle()); +} + + +bool +MatchDataBuilder::setFieldLength(const vespalib::string &fieldName, uint32_t length) +{ + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName); + if (info == NULL) { + LOG(error, "Field '%s' does not exist.", fieldName.c_str()); + return false; + } + _index[info->id()].fieldLength = length; + return true; +} + +bool +MatchDataBuilder::addElement(const vespalib::string &fieldName, int32_t weight, uint32_t length) +{ + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName); + if (info == NULL) { + LOG(error, "Field '%s' does not exist.", fieldName.c_str()); + return false; + } + _index[info->id()].elements.push_back(MyElement(weight, length)); + return true; +} + +bool +MatchDataBuilder::addOccurence(const vespalib::string &fieldName, uint32_t termId, uint32_t pos, uint32_t element) +{ + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName); + if (info == NULL) { + LOG(error, "Field '%s' does not exist.", fieldName.c_str()); + return false; + } + if (termId >= _queryEnv.getNumTerms()) { + LOG(error, "Term id '%u' is invalid.", termId); + return false; + } + const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); + if (tfd == NULL) { + LOG(error, "Field '%s' is not searched by the given term.", + fieldName.c_str()); + return false; + } + _match[termId][info->id()].insert(Position(pos, element)); + return true; +} + +bool +MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId, int32_t weight) +{ + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldName); + if (info == NULL) { + LOG(error, "Field '%s' does not exist.", fieldName.c_str()); + return false; + } + if (termId >= _queryEnv.getNumTerms()) { + LOG(error, "Term id '%u' is invalid.", termId); + return false; + } + const ITermFieldData *tfd = _queryEnv.getTerm(termId)->lookupField(info->id()); + if (tfd == NULL) { + LOG(error, "Field '%s' is not searched by the given term.", + fieldName.c_str()); + return false; + } + uint32_t eid = _index[info->id()].elements.size(); + _match[termId][info->id()].clear(); + _match[termId][info->id()].insert(Position(0, eid)); + _index[info->id()].elements.push_back(MyElement(weight, 1)); + return true; +} + +bool +MatchDataBuilder::apply(uint32_t docId) +{ + _data.setDocId(docId); + + // For each term, do + for (TermMap::const_iterator term_iter = _match.begin(); + term_iter != _match.end(); ++term_iter) + { + uint32_t termId = term_iter->first; + + for (FieldPositions::const_iterator field_iter = term_iter->second.begin(); + field_iter != term_iter->second.end(); ++field_iter) + { + uint32_t fieldId = field_iter->first; + TermFieldMatchData *match = getTermFieldMatchData(termId, fieldId); + + // Make sure there is a corresponding term field match data object. + if (match == NULL) { + LOG(error, "Term id '%u' is invalid.", termId); + return false; + } + match->reset(docId); + + // find field data + MyField field; + IndexData::const_iterator idxItr = _index.find(fieldId); + if (idxItr != _index.end()) { + field = idxItr->second; + } + + // For log, attempt to lookup field name. + const FieldInfo *info = _queryEnv.getIndexEnv()->getField(fieldId); + vespalib::string name = info != NULL ? info->name() : vespalib::make_string("%d", fieldId).c_str(); + + // For each occurence of that term, in that field, do + for (Positions::const_iterator occ_iter = field_iter->second.begin(); + occ_iter != field_iter->second.end(); occ_iter++) + { + // Append a term match position to the term match data. + Position occ = *occ_iter; + match->appendPosition(TermFieldMatchDataPosition( + occ.eid, + occ.pos, + field.getWeight(occ.eid), + field.getLength(occ.eid))); + LOG(debug, + "Added occurence of term '%u' in field '%s'" + " at position '%u'.", + termId, name.c_str(), occ.pos); + if (occ.pos >= field.getLength(occ.eid)) { + LOG(warning, + "Added occurence of term '%u' in field '%s'" + " at position '%u' >= fieldLen '%u'.", + termId, name.c_str(), occ.pos, field.getLength(occ.eid)); + } + } + } + } + // Return ok. + return true; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h new file mode 100644 index 00000000000..6efc335dd94 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.h @@ -0,0 +1,150 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include "queryenvironment.h" + +namespace search { +namespace fef { +namespace test { + +class MatchDataBuilder { +public: + struct MyElement { + int32_t weight; + uint32_t length; + MyElement(int32_t w, uint32_t l) : weight(w), length(l) {} + }; + struct MyField { + uint32_t fieldLength; + std::vector elements; + MyField() : fieldLength(0), elements() {} + MyElement &getElement(uint32_t eid) { + while (elements.size() <= eid) { + elements.push_back(MyElement(0, 0)); + } + return elements[eid]; + } + int32_t getWeight(uint32_t eid) const { + if (eid < elements.size()) { + return elements[eid].weight; + } + return 1; + } + uint32_t getLength(uint32_t eid) const { + if (eid < elements.size()) { + return elements[eid].length; + } + return fieldLength; + } + }; + struct Position { + uint32_t pos; + uint32_t eid; + Position(uint32_t p, uint32_t e) : pos(p), eid(e) {} + bool operator<(const Position &other) const { + if (eid == other.eid) { + return pos < other.pos; + } + return eid < other.eid; + } + }; + + /** + * Convenience typedefs. + */ + typedef std::unique_ptr UP; + typedef std::map IndexData; // index data per field + typedef std::set Positions; // match information for a single term and field combination + typedef std::map FieldPositions; // position information per field for a single term + typedef std::map TermMap; // maps term id to map of position information per field + +public: + /** + * Constructs a new match data builder. This is what you should use when building match data since there are alot of + * interconnections that must be set up correctly. + * + * @param queryEnv The query environment to build for. + * @param data The match data to build in. + */ + MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data); + + /** + * Returns the term field match data that corresponds to a given + * term id and field id combination. This goes by way of the query + * environment to find the handler of the given term id. + * + * @param termId The id of the term whose data to return. + * @param fieldId The id of the field whose data to return. + * @return The corresponding term match data. + */ + TermFieldMatchData *getTermFieldMatchData(uint32_t termId, uint32_t fieldId); + + /** + * Sets the length of a named field. This will fail if the named field does not exist. + * + * @param fieldName The name of the field. + * @param length The length to set. + * @return Whether or not the field length could be set. + */ + bool setFieldLength(const vespalib::string &fieldName, uint32_t length); + + /** + * Adds an element to a named field. This will fail if the named field does not exist. + * + * @param fieldName The name of the field. + * @param weight The weight of the element. + * @param length The length of the element. + * @return Whether or not the element could be added. + */ + bool addElement(const vespalib::string &fieldName, int32_t weight, uint32_t length); + + /** + * Adds an occurence of a term to the named field, at the given + * position. This will fail if the named field does not exist. The + * list of occurences is implemented as a set, so there is no need + * to add these in order. + * + * @param fieldName The name of the field. + * @param termId The id of the term to register an occurence for. + * @param pos The position of the occurence. + * @param element The element containing the occurence. + * @return Whether or not the occurence could be added. + */ + bool addOccurence(const vespalib::string &fieldName, uint32_t termId, uint32_t pos, uint32_t element = 0); + + /** + * Sets the weight for an attribute match. + * + * @param fieldName The name of the field. + * @param termId The id of the term to register an occurence for. + * @param weight The weight of the match. + * @return Whether or not the occurence could be added. + **/ + bool setWeight(const vespalib::string &fieldName, uint32_t termId, int32_t weight); + + /** + * Apply the content of this builder to the underlying match data. + * + * @param docId the document id + * @return Whether or not the content of this could be applied. + */ + bool apply(uint32_t docId); + +private: + MatchDataBuilder(const MatchDataBuilder &); // hide + MatchDataBuilder & operator=(const MatchDataBuilder &); // hide + +private: + QueryEnvironment &_queryEnv; + MatchData &_data; + IndexData _index; + TermMap _match; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore b/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt b/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt new file mode 100644 index 00000000000..00c75637129 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_fef_test_plugin OBJECT + SOURCES + double.cpp + sum.cpp + staticrank.cpp + chain.cpp + cfgvalue.cpp + query.cpp + setup.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp new file mode 100644 index 00000000000..f36c7588b4b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.cfgvalue"); + +#include +#include +#include "cfgvalue.h" + +namespace search { +namespace fef { +namespace test { + +CfgValueBlueprint::CfgValueBlueprint() : + Blueprint("test_cfgvalue"), + _values() +{ + // empty +} + +void +CfgValueBlueprint::visitDumpFeatures(const IIndexEnvironment &indexEnv, IDumpFeatureVisitor &visitor) const +{ + Property p = indexEnv.getProperties().lookup(getBaseName(), "dump"); + for (uint32_t i = 0; i < p.size(); ++i) { + visitor.visitDumpFeature(p.getAt(i)); + } +} + +bool +CfgValueBlueprint::setup(const IIndexEnvironment &indexEnv, const StringVector ¶ms) +{ + (void) params; + Property p = indexEnv.getProperties().lookup(getName(), "value"); + for (uint32_t i = 0; i < p.size(); ++i) { + std::istringstream iss(p.getAt(i)); + feature_t value; + iss >> std::dec >> value; + _values.push_back(value); + + if (iss.fail()) { + return false; + } + + std::ostringstream name; + name << i; + std::ostringstream desc; + desc << "value " << i; + describeOutput(name.str(), desc.str()); + // we have no inputs + } + return true; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h new file mode 100644 index 00000000000..0fc9baac424 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/cfgvalue.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class CfgValueBlueprint : public Blueprint +{ +private: + std::vector _values; + +public: + CfgValueBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const; + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new CfgValueBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const { + (void) queryEnv; + return FeatureExecutor::LP(new search::features::ValueExecutor(_values)); + } +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp new file mode 100644 index 00000000000..33567dd1a67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.cpp @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.chain"); +#include "chain.h" + +#include + +namespace search { +namespace fef { +namespace test { + +ChainExecutor::ChainExecutor() : + FeatureExecutor() +{ +} + +void +ChainExecutor::execute(MatchData & data) +{ + *data.resolveFeature(outputs()[0]) = *data.resolveFeature(inputs()[0]); +} + + +ChainBlueprint::ChainBlueprint() : + Blueprint("chain") +{ +} + +bool +ChainBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params) +{ + (void) indexEnv; + if (params.size() != 3) { // [type, children, value] + return false; + } + const std::string & type = params[0]; + const std::string & children = params[1]; + const std::string & value = params[2]; + + uint32_t numChildren; + std::istringstream iss(children); + iss >> std::dec >> numChildren; + std::ostringstream oss; + if (numChildren == 0) { + return false; + } + if (numChildren == 1) { + if (type == "basic") { + oss << "value(" << value << ")"; // value = input to value executor + defineInput(oss.str()); + } else if (type == "cycle") { + oss << "chain(" << type << "," << value << "," << value << ")"; // value = where to insert the cycle + defineInput(oss.str()); + } else { + return false; + } + } else { + oss << "chain(" << type << "," << (numChildren - 1) << "," << value << ")"; + defineInput(oss.str()); + } + describeOutput("out", "chain"); + return true; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h new file mode 100644 index 00000000000..ca65012fa0f --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/chain.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class ChainExecutor : public FeatureExecutor +{ +public: + ChainExecutor(); + virtual void execute(MatchData & data); +}; + + +class ChainBlueprint : public Blueprint +{ +public: + ChainBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new ChainBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const { + (void) queryEnv; + return FeatureExecutor::LP(new ChainExecutor()); + } +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp new file mode 100644 index 00000000000..724b8597ece --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/double.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.double"); +#include "double.h" + +#include +#include + +namespace search { +namespace fef { +namespace test { + +void +DoubleExecutor::execute(MatchData & data) +{ + assert(inputs().size() == _cnt); + assert(outputs().size() == _cnt); + for (uint32_t i = 0; i < _cnt; ++i) { + *data.resolveFeature(outputs()[i]) = *data.resolveFeature(inputs()[i]) * 2; + } +} + + +DoubleBlueprint::DoubleBlueprint() : + Blueprint("double"), + _cnt(0) +{ +} + +void +DoubleBlueprint::visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const +{ + (void) indexEnv; + (void) visitor; +} + +bool +DoubleBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params) +{ + (void) indexEnv; + for (uint32_t i = 0; i < params.size(); ++i) { + defineInput(params[i]); + } + for (uint32_t i = 0; i < params.size(); ++i) { + vespalib::asciistream name; + name << i; + vespalib::asciistream desc; + desc << "doubled value " << i; + describeOutput(name.str(), desc.str()); + } + _cnt = params.size(); + return true; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/double.h b/searchlib/src/vespa/searchlib/fef/test/plugin/double.h new file mode 100644 index 00000000000..af69a4fbeec --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/double.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class DoubleExecutor : public FeatureExecutor +{ +private: + size_t _cnt; +public: + DoubleExecutor(size_t cnt) : _cnt(cnt) {} + virtual void execute(MatchData & data); +}; + + +class DoubleBlueprint : public Blueprint +{ +private: + size_t _cnt; +public: + DoubleBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const; + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new DoubleBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const { + (void) queryEnv; + return FeatureExecutor::LP(new DoubleExecutor(_cnt)); + } +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp new file mode 100644 index 00000000000..4308bd4908d --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/query.cpp @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.query"); + +#include +#include +#include +#include "query.h" + +namespace search { +namespace fef { +namespace test { + +QueryBlueprint::QueryBlueprint() : + Blueprint("test_query"), + _key() +{ + // empty +} + +bool +QueryBlueprint::setup(const IIndexEnvironment &indexEnv, const StringVector ¶ms) +{ + (void) indexEnv; + if (params.size() != 1) { + return false; + } + _key = params[0]; + describeOutput("value", "the parameter looked up in the rank properties and converted to a float"); + return true; +} + +FeatureExecutor::LP +QueryBlueprint::createExecutor(const IQueryEnvironment &queryEnv) const +{ + std::vector values; + std::string val = queryEnv.getProperties().lookup(_key).get("0.0"); + values.push_back(strtod(val.data(), NULL)); + return FeatureExecutor::LP(new search::features::ValueExecutor(values)); +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/query.h b/searchlib/src/vespa/searchlib/fef/test/plugin/query.h new file mode 100644 index 00000000000..95a56ddf59a --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/query.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class QueryBlueprint : public Blueprint +{ +private: + std::string _key; + +public: + QueryBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {}; + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new QueryBlueprint()); } + virtual bool setup(const IIndexEnvironment &indexEnv, const StringVector ¶ms); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &queryEnv) const; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp new file mode 100644 index 00000000000..94a74947cea --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.cpp @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.setup"); + +#include +#include + +#include "cfgvalue.h" +#include "chain.h" +#include "double.h" +#include "query.h" +#include "setup.h" +#include "staticrank.h" +#include "sum.h" + +namespace search { +namespace fef { +namespace test { + +void setup_fef_test_plugin(IBlueprintRegistry & registry) +{ + // register blueprints + registry.addPrototype(Blueprint::SP(new DoubleBlueprint())); + registry.addPrototype(Blueprint::SP(new SumBlueprint())); + registry.addPrototype(Blueprint::SP(new StaticRankBlueprint())); + registry.addPrototype(Blueprint::SP(new ChainBlueprint())); + registry.addPrototype(Blueprint::SP(new CfgValueBlueprint())); + registry.addPrototype(Blueprint::SP(new QueryBlueprint())); +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h new file mode 100644 index 00000000000..0204c12663a --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/setup.h @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace fef { +namespace test { + +void setup_fef_test_plugin(IBlueprintRegistry & registry); + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp new file mode 100644 index 00000000000..502115b2b1b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.staticrank"); +#include +#include "staticrank.h" + +namespace search { +namespace fef { +namespace test { + +StaticRankExecutor::StaticRankExecutor(const search::attribute::IAttributeVector * attribute) : + FeatureExecutor(), + _attribute(attribute) +{ +} + +void +StaticRankExecutor::execute(MatchData & data) +{ + uint32_t doc = data.getDocId(); + search::attribute::FloatContent staticRank; + if (_attribute != NULL) { + staticRank.allocate(_attribute->getMaxValueCount()); + staticRank.fill(*_attribute, doc); + } + *data.resolveFeature(outputs()[0]) = static_cast(staticRank[0]); +} + + +StaticRankBlueprint::StaticRankBlueprint() : + Blueprint("staticrank"), + _attributeName() +{ +} + +bool +StaticRankBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params) +{ + (void) indexEnv; + if (params.size() != 1) { + return false; + } + _attributeName = params[0]; + describeOutput("out", "static rank"); + return true; +} + +FeatureExecutor::LP +StaticRankBlueprint::createExecutor(const IQueryEnvironment & queryEnv) const +{ + const search::attribute::IAttributeVector * av = queryEnv.getAttributeContext().getAttribute(_attributeName); + return FeatureExecutor::LP(new StaticRankExecutor(av)); +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h new file mode 100644 index 00000000000..3b6ee1e5b76 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/staticrank.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class StaticRankExecutor : public FeatureExecutor +{ +private: + const search::attribute::IAttributeVector * _attribute; + +public: + StaticRankExecutor(const search::attribute::IAttributeVector * attribute); + virtual void execute(MatchData & data); +}; + + +class StaticRankBlueprint : public Blueprint +{ +private: + std::string _attributeName; + +public: + StaticRankBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new StaticRankBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp new file mode 100644 index 00000000000..e5e8e3dedc0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.cpp @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.sum"); +#include "sum.h" +#include + +namespace search { +namespace fef { +namespace test { + +void +SumExecutor::execute(MatchData & data) +{ + feature_t sum = 0.0f; + for (uint32_t i = 0; i < inputs().size(); ++i) { + sum += *data.resolveFeature(inputs()[i]); + } + *data.resolveFeature(outputs()[0]) = sum; +} + + +SumBlueprint::SumBlueprint() : + Blueprint("mysum") +{ +} + +void +SumBlueprint::visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const +{ + (void) indexEnv; +#if 1 + (void) visitor; +#else + // Use the feature name builder to make sure that the naming of features are quoted correctly. + typedef FeatureNameBuilder FNB; + + // This blueprint dumps 2 ranking features. This is a very tricky feature in that it's dependencies + // are given by its parameters, so the definition of features implicitly declares this tree. This + // blueprint can actually produce any number of features, but only the following 2 are ever dumped. + + // The first feature this produces is "sum(value(4),value(16))", quoted correctly by the feature name + // builder. The feature "value" simply returns the value of its single parameter, so this feature will + // always produce the output "20". + visitor.visitDumpFeature(FNB().baseName("sum").parameter("value(4)").parameter("value(16)").buildName()); + + // The second feature is "sum(double(value(8)),double(value(32)))", again quoted by the feature name + // builder. The feature "double" returns twice the value of its single input. This means that this + // feature will always produce the output "80" (= 8*2 + 32*2). + std::string d1 = FNB().baseName("double").parameter("value(8)").buildName(); + std::string d2 = FNB().baseName("double").parameter("value(32)").buildName(); + visitor.visitDumpFeature(FNB().baseName("sum").parameter(d1).parameter(d2).buildName()); +#endif +} + +bool +SumBlueprint::setup(const IIndexEnvironment & indexEnv, const StringVector & params) +{ + (void) indexEnv; + + // This blueprints expects all parameters to be complete feature names, so depend on these. + for (uint32_t i = 0; i < params.size(); ++i) { + defineInput(params[i]); + } + + // Produce only a single output named "out". + describeOutput("out", "The sum of the values of all parameter features."); + return true; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h new file mode 100644 index 00000000000..d54d31bb5d9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/plugin/sum.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class SumExecutor : public FeatureExecutor +{ +public: + virtual bool isPure() { return true; } + virtual void execute(MatchData & data); +}; + + +class SumBlueprint : public Blueprint +{ +public: + SumBlueprint(); + virtual void visitDumpFeatures(const IIndexEnvironment & indexEnv, IDumpFeatureVisitor & visitor) const; + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new SumBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, const StringVector & params); + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment & queryEnv) const { + (void) queryEnv; + return FeatureExecutor::LP(new SumExecutor()); + } +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp new file mode 100644 index 00000000000..af68e2a5163 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.cpp @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "queryenvironment.h" + +namespace search { +namespace fef { +namespace test { + +QueryEnvironment::QueryEnvironment(IndexEnvironment *env) + : _indexEnv(env), + _terms(), + _properties(), + _location(), + _attrCtx((env == NULL) ? attribute::IAttributeContext::UP() : env->getAttributeManager().createContext()) +{ +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h new file mode 100644 index 00000000000..acb454bbfa7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include "indexenvironment.h" + +namespace search { +namespace fef { +namespace test { + +/** + * Implementation of the IQueryEnvironment interface used for testing. + */ +class QueryEnvironment : public IQueryEnvironment +{ +private: + QueryEnvironment(const QueryEnvironment &); // hide + QueryEnvironment & operator=(const QueryEnvironment &); // hide + + IndexEnvironment *_indexEnv; + std::vector _terms; + Properties _properties; + Location _location; + search::attribute::IAttributeContext::UP _attrCtx; + +public: + /** + * Constructs a new query environment. + * + * @param indexEnv The index environment of this. + */ + QueryEnvironment(IndexEnvironment *indexEnv = NULL); + + // Inherit doc from IQueryEnvironment. + virtual const Properties &getProperties() const { return _properties; } + + // Inherit doc from IQueryEnvironment. + virtual uint32_t getNumTerms() const { return _terms.size(); } + + // Inherit doc from IQueryEnvironment. + virtual const ITermData *getTerm(uint32_t idx) const { return idx < _terms.size() ? &_terms[idx] : NULL; } + + // Inherit doc from IQueryEnvironment. + virtual const Location & getLocation() const { return _location; } + + // Inherit doc from IQueryEnvironment. + virtual const search::attribute::IAttributeContext &getAttributeContext() const { return *_attrCtx; } + + // Inherit doc from IQueryEnvironment. + virtual const IIndexEnvironment &getIndexEnvironment() const { assert(_indexEnv != NULL); return *_indexEnv; } + + /** Returns a reference to the index environment of this. */ + IndexEnvironment *getIndexEnv() { return _indexEnv; } + + /** Returns a const reference to the index environment of this. */ + const IndexEnvironment *getIndexEnv() const { return _indexEnv; } + + /** Sets the index environment of this. */ + QueryEnvironment &setIndexEnv(IndexEnvironment *indexEnv) { + _indexEnv = indexEnv; + _attrCtx = ((indexEnv == NULL) ? search::attribute::IAttributeContext::UP() : + indexEnv->getAttributeManager().createContext()); + return *this; + } + + /** + * Override which attribute manager to use. + * + * @param vecMan the manager we want to use + **/ + void overrideAttributeManager(AttributeManager *vecMan) { + _attrCtx = ((vecMan == NULL) ? search::attribute::IAttributeContext::UP() : vecMan->createContext()); + } + + /** Returns a reference to the list of term data objects. */ + std::vector &getTerms() { return _terms; } + + /** Returns a const reference to the list of term data objects. */ + const std::vector &getTerms() const { return _terms; } + + /** Returns a reference to the properties of this. */ + Properties & getProperties() { return _properties; } + + /** Returns a reference to the location of this. */ + Location & getLocation() { return _location; } +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp new file mode 100644 index 00000000000..8291a2b7ebd --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "queryenvironmentbuilder.h" + +namespace search { +namespace fef { +namespace test { + +QueryEnvironmentBuilder::QueryEnvironmentBuilder(QueryEnvironment &env, + MatchDataLayout &layout) : + _queryEnv(env), + _layout(layout) +{ + // empty +} + +SimpleTermData & +QueryEnvironmentBuilder::addAllFields() +{ + _queryEnv.getTerms().push_back(SimpleTermData()); + SimpleTermData &td = _queryEnv.getTerms().back(); + td.setWeight(search::query::Weight(100)); + const IIndexEnvironment &idxEnv = *_queryEnv.getIndexEnv(); + for (uint32_t i = 0; i < idxEnv.getNumFields(); ++i) { + const FieldInfo *info = idxEnv.getField(i); + SimpleTermFieldData &tfd = td.addField(info->id()); + tfd.setHandle(_layout.allocTermField(tfd.getFieldId())); + } + return td; +} + +SimpleTermData * +QueryEnvironmentBuilder::addIndexNode(const std::vector &fieldNames) +{ + _queryEnv.getTerms().push_back(SimpleTermData()); + SimpleTermData &td = _queryEnv.getTerms().back(); + td.setWeight(search::query::Weight(100)); + for (uint32_t i = 0; i < fieldNames.size(); ++i) { + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(fieldNames[i]); + if (info == NULL || info->type() != FieldType::INDEX) { + return NULL; + } + SimpleTermFieldData &tfd = td.addField(info->id()); + tfd.setHandle(_layout.allocTermField(tfd.getFieldId())); + } + return &td; +} + +SimpleTermData * +QueryEnvironmentBuilder::addAttributeNode(const vespalib::string &attrName) +{ + const FieldInfo *info = _queryEnv.getIndexEnv()->getFieldByName(attrName); + if (info == NULL || info->type() != FieldType::ATTRIBUTE) { + return NULL; + } + _queryEnv.getTerms().push_back(SimpleTermData()); + SimpleTermData &td = _queryEnv.getTerms().back(); + td.setWeight(search::query::Weight(100)); + SimpleTermFieldData &tfd = td.addField(info->id()); + tfd.setHandle(_layout.allocTermField(tfd.getFieldId())); + return &td; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h new file mode 100644 index 00000000000..2842e4d8ca5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include "queryenvironment.h" + +namespace search { +namespace fef { +namespace test { + +class QueryEnvironmentBuilder { +public: + /** + * Constructs a new query environment builder. + * + * @param queryEnv The query environment to build in. + * @param layout The layout of match data to simultaneously update. + */ + QueryEnvironmentBuilder(QueryEnvironment &queryEnv, MatchDataLayout &layout); + + /** + * Add a term node searching all known fields to this query + * environment. This will update both the environment and the + * match data layout. + * + * @return Reference to the corresponding term data. + */ + SimpleTermData &addAllFields(); + + /** + * Add a term node searching in the given fields to this query + * environment. This will update both the environment and the + * match data layout. All fields are required to be of type INDEX. + * + * @return Pointer to the corresponding term data or NULL if one of the fields does not exists. + */ + SimpleTermData *addIndexNode(const std::vector &fieldNames); + + /** + * Add an attribute node searching in the given attribute to this query environment. + * This will update both the environment and the match data layout. + * + * @return Pointer to the corresponding term data or NULL if attribute does not exists. + */ + SimpleTermData *addAttributeNode(const vespalib::string & attrName); + + /** Returns a reference to the query environment of this. */ + QueryEnvironment &getQueryEnv() { return _queryEnv; } + + /** Returns a const reference to the query environment of this. */ + const QueryEnvironment &getQueryEnv() const { return _queryEnv; } + + /** Returns a reference to the match data layout of this. */ + MatchDataLayout &getLayout() { return _layout; } + + /** Returns a const reference to the match data layout of this. */ + const MatchDataLayout &getLayout() const { return _layout; } + +private: + QueryEnvironmentBuilder(const QueryEnvironmentBuilder &); // hide + QueryEnvironmentBuilder & operator=(const QueryEnvironmentBuilder &); // hide + +private: + QueryEnvironment &_queryEnv; + MatchDataLayout &_layout; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp b/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp new file mode 100644 index 00000000000..bfc61348c1e --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/rankresult.cpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".fef.rankresult"); +#include "rankresult.h" +#include +#include + +namespace search { +namespace fef { +namespace test { + +RankResult::RankResult() : + _rankScores(), + _epsilon(0.0) +{ + // empty +} + +RankResult & +RankResult::addScore(const vespalib::string & featureName, feature_t score) +{ + _rankScores[featureName] = score; + return *this; +} + +feature_t +RankResult::getScore(const vespalib::string & featureName) const +{ + RankScores::const_iterator itr = _rankScores.find(featureName); + if (itr != _rankScores.end()) { + return itr->second; + } + return 0.0f; +} + +bool +RankResult::operator==(const RankResult & rhs) const +{ + return includes(rhs) && rhs.includes(*this); +} + +bool +RankResult::includes(const RankResult & rhs) const +{ + double epsilon = std::max(_epsilon, rhs._epsilon); + + RankScores::const_iterator findItr; + for (RankScores::const_iterator itr = rhs._rankScores.begin(); itr != rhs._rankScores.end(); ++itr) { + findItr = _rankScores.find(itr->first); + if (findItr == _rankScores.end()) { + LOG(info, "Did not find expected feature '%s' in this rank result", itr->first.c_str()); + return false; + } + if (itr->second < findItr->second - epsilon || + itr->second > findItr->second + epsilon || + (std::isnan(findItr->second) && + !std::isnan(itr->second))) + { + LOG(info, "Feature '%s' did not have expected score.", itr->first.c_str()); + LOG(info, "Expected: %f ~ %f", itr->second, epsilon); + LOG(info, "Actual : %f", findItr->second); + return false; + } + } + return true; +} + +RankResult & +RankResult::clear() +{ + _rankScores.clear(); + return *this; +} + +std::vector & +RankResult::getKeys(std::vector &ret) +{ + for (RankScores::const_iterator it = _rankScores.begin(); it != _rankScores.end(); ++it) { + ret.push_back(it->first); + } + return ret; +} + +std::vector +RankResult::getKeys() +{ + std::vector ret; + return getKeys(ret); +} + +RankResult & +RankResult::setEpsilon(double epsilon) { + _epsilon = epsilon; + return *this; +} + +double +RankResult::getEpsilon() const { + return _epsilon; +} + +std::ostream & operator<<(std::ostream & os, const RankResult & rhs) { + os << "["; + for (RankResult::RankScores::const_iterator itr = rhs._rankScores.begin(); itr != rhs._rankScores.end(); ++itr) { + os << "['" << itr->first << "' = " << itr->second << "]"; + } + return os << "]"; +} + +} // namespace test +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/test/rankresult.h b/searchlib/src/vespa/searchlib/fef/test/rankresult.h new file mode 100644 index 00000000000..90ac332c87b --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/test/rankresult.h @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace fef { +namespace test { + +class RankResult { +public: + /** + * Convenience typedefs. + */ + typedef std::map RankScores; + +public: + /** + * Constructs a new rank result. + */ + RankResult(); + + /** + * Adds a score for the given feature name. + * + * @param featureName The name of the feature. + * @param score The score of that feature. + * @return This, to allow chaining. + */ + RankResult &addScore(const vespalib::string & featureName, feature_t score); + + /** + * Returns the score of a given feature. + * + * @param featureName The name of the feature. + * @return The score of that feature. + */ + feature_t getScore(const vespalib::string & featureName) const; + + /** + * Implements equality operator. + * + * @param rhs The result to compare to. + * @return Whether or not this is equal to the other. + */ + bool operator==(const RankResult & rhs) const; + + /** + * Returns whether or not this rank result contains another. + * + * @param rhs The result to see if this contains. + * @return Whether or not this contains the other. + */ + bool includes(const RankResult & rhs) const; + + /** + * Clears the content of this map. + * + * @return This, to allow chaining. + */ + RankResult &clear(); + + /** + * Fills the given vector with the key strings of this. + * + * @param ret The vector to fill. + * @return Reference to the 'ret' param. + */ + std::vector &getKeys(std::vector &ret); + + /** + * Creates and returns a vector with the key strings of this. + * + * @return List of all key strings. + */ + std::vector getKeys(); + + /** + * Sets the epsilon used when comparing this rank result to another. + * + * @param epsilon The new epsilon. + * @return This, to allow chaining. + */ + RankResult &setEpsilon(double epsilon); + + /** + * Returns the epsilon used when comparing this rank result to another. + * + * @return The epsilon. + */ + double getEpsilon() const; + + /** + * Implements streaming operator. + * + * @param os The stream to write to. + * @param rhs The result to write. + * @return The stream, to allow chaining. + */ + friend std::ostream & operator<<(std::ostream & os, const RankResult & rhs); + +private: + RankScores _rankScores; + double _epsilon; +}; + +} // namespace test +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/fef/utils.cpp b/searchlib/src/vespa/searchlib/fef/utils.cpp new file mode 100644 index 00000000000..7532d0d60fb --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/utils.cpp @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "utils.h" +#include + +namespace search { +namespace fef { + +namespace { + +FeatureHandle +getSingleFeatureHandle(const RankProgram &rankProgram) +{ + std::vector featureNames; + std::vector featureHandles; + rankProgram.get_seed_handles(featureNames, featureHandles, false); + assert(featureNames.size() == 1); + assert(featureHandles.size() == 1); + return featureHandles.front(); +} + +} + +const feature_t * +Utils::getScoreFeature(const RankProgram &rankProgram) +{ + return rankProgram.match_data().resolveFeature(getSingleFeatureHandle(rankProgram)); +} + +const vespalib::eval::Value::CREF * +Utils::getObjectFeature(const RankProgram &rankProgram) +{ + return rankProgram.match_data().resolve_object_feature(getSingleFeatureHandle(rankProgram)); +} + +namespace { + +std::map +resolveFeatures(const MatchData &matchData, + const std::vector &featureNames, + const std::vector &featureHandles) +{ + assert(featureNames.size() == featureHandles.size()); + std::map result; + for (size_t i = 0; i < featureNames.size(); ++i) { + const vespalib::string &name = featureNames[i]; + feature_t value = *(matchData.resolveFeature(featureHandles[i])); + result.insert(std::make_pair(name, value)); + } + return result; +} + +} + +std::map +Utils::getSeedFeatures(const RankProgram &rankProgram) +{ + std::vector featureNames; + std::vector featureHandles; + rankProgram.get_seed_handles(featureNames, featureHandles); + return resolveFeatures(rankProgram.match_data(), featureNames, featureHandles); +} + +std::map +Utils::getAllFeatures(const RankProgram &rankProgram) +{ + std::vector featureNames; + std::vector featureHandles; + rankProgram.get_all_feature_handles(featureNames, featureHandles); + return resolveFeatures(rankProgram.match_data(), featureNames, featureHandles); +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/utils.h b/searchlib/src/vespa/searchlib/fef/utils.h new file mode 100644 index 00000000000..20ec62e3bfe --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/utils.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "rank_program.h" +#include +#include + +namespace search { +namespace fef { + +struct Utils +{ + /** + * Extract a single score feature from the given rank program. + */ + static const feature_t *getScoreFeature(const RankProgram &rankProgram); + + /** + * Extract a single object feature from the given rank program. + */ + static const vespalib::eval::Value::CREF *getObjectFeature(const RankProgram &rankProgram); + + /** + * Extract all seed feature values from the given rank program. + **/ + static std::map getSeedFeatures(const RankProgram &rankProgram); + + /** + * Extract all feature values from the given rank program. + **/ + static std::map getAllFeatures(const RankProgram &rankProgram); + +}; + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/verify_feature.cpp b/searchlib/src/vespa/searchlib/fef/verify_feature.cpp new file mode 100644 index 00000000000..ebfdf1622ba --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/verify_feature.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fef.verify_feature"); +#include "verify_feature.h" +#include "blueprintresolver.h" + +namespace search { +namespace fef { + +bool verifyFeature(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv, + const std::string &featureName, + const std::string &desc) +{ + indexEnv.hintFeatureMotivation(IIndexEnvironment::VERIFY_SETUP); + BlueprintResolver resolver(factory, indexEnv); + resolver.addSeed(featureName); + bool result = resolver.compile(); + if (!result) { + LOG(error, "rank feature verification failed: %s (%s)", + featureName.c_str(), desc.c_str()); + } + return result; +} + +} // namespace fef +} // namespace search diff --git a/searchlib/src/vespa/searchlib/fef/verify_feature.h b/searchlib/src/vespa/searchlib/fef/verify_feature.h new file mode 100644 index 00000000000..b1edd5a16fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/fef/verify_feature.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprintfactory.h" +#include "iindexenvironment.h" +#include + +namespace search { +namespace fef { + +/** + * Verify whether a specific feature can be computed. If the feature + * can not be computed, log a reason why, including feature + * dependencies. + * + * @return true if the feature can be computed, false otherwise + * @param factory blueprint factory + * @param indexEnv index environment + * @param featureName name of feature to verify + * @param desc external description of the feature + **/ +bool verifyFeature(const BlueprintFactory &factory, + const IIndexEnvironment &indexEnv, + const std::string &featureName, + const std::string &desc); + +} // namespace fef +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt b/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt new file mode 100644 index 00000000000..3e202895beb --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_grouping OBJECT + SOURCES + collect.cpp + groupandcollectengine.cpp + groupengine.cpp + groupingengine.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/grouping/OWNERS b/searchlib/src/vespa/searchlib/grouping/OWNERS new file mode 100644 index 00000000000..1037590124e --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/OWNERS @@ -0,0 +1 @@ +balder diff --git a/searchlib/src/vespa/searchlib/grouping/collect.cpp b/searchlib/src/vespa/searchlib/grouping/collect.cpp new file mode 100644 index 00000000000..f34b63d4047 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/collect.cpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { + +using namespace expression; +using namespace aggregation; + +namespace grouping { + +Collect::ResultAccessor::ResultAccessor(const AggregationResult & aggregator, size_t offset) : + _bluePrint(&aggregator), + _aggregator(_bluePrint->clone()), + _offset(offset) +{ +} + +void Collect::ResultAccessor::create(uint8_t * base) +{ + _aggregator->getResult().create(base+_offset); + _bluePrint->getResult().encode(base+_offset); +} + +Collect::Collect(const Group & gp) : + _aggregatorSize(0), + _aggregator(), + _aggrBacking() +{ + _aggregator.reserve(gp.getAggrSize()); + for (size_t i(0); i < gp.getAggrSize(); i++) { + ResultAccessor accessor(const_cast(gp.getAggregationResult(i)), _aggregatorSize); + _aggregator.push_back(accessor); + assert(accessor.getRawByteSize() > 0); + _aggregatorSize += accessor.getRawByteSize(); + } + _sortInfo.resize(gp.getOrderBySize()); + for(size_t i(0); i < _sortInfo.size(); i++) { + const uint32_t index = std::abs(gp.getOrderBy(i)) - 1; + const uint32_t z(gp.getExpr(index)); + _sortInfo[i] = SortInfo(z, gp.getOrderBy(i)); + } +} + +Collect::~Collect() +{ + if (_aggregatorSize > 0) { + assert((_aggrBacking.size() % _aggregatorSize) == 0); + for (size_t i(0), m(_aggrBacking.size()/_aggregatorSize); i < m; i++) { + uint8_t * base(&_aggrBacking[ i * _aggregatorSize]); + for (size_t j(0), k(_aggregator.size()); j < k; j++) { + ResultAccessor & r = _aggregator[j]; + r.destroy(base); + } + } + } +} + +void +Collect::getCollectors(GroupRef ref, Group & g) const +{ + size_t offset(getAggrBase(ref)); + if (offset < _aggrBacking.size()) { + const uint8_t * base(&_aggrBacking[offset]); + for (size_t i(0), m(_aggregator.size()); i < m; i++) { + const ResultAccessor & r = _aggregator[i]; + r.getResult(g.getAggregationResult(i).getResult(), base); + g.getAggregationResult(i).postMerge(); + } + } +} + +void +Collect::collect(GroupRef gr, uint32_t docId, double rank) +{ + uint8_t * base(&_aggrBacking[getAggrBase(gr)]); + for (size_t i(0), m(_aggregator.size()); i < m; i++) { + _aggregator[i].aggregate(base, docId, rank); + } +} + +void +Collect::createCollectors(GroupRef gr) +{ + size_t offset(getAggrBase(gr)); + if (offset == _aggrBacking.size()) { + _aggrBacking.resize(getAggrBase(GroupRef(gr.getRef() + 1))); + uint8_t * base(&_aggrBacking[offset]); + for (size_t i(0), m(_aggregator.size()); i < m; i++) { + ResultAccessor & r = _aggregator[i]; + r.create(base); + } + } +} + +void +Collect::preFill(GroupRef gr, const Group & g) +{ + if (gr.valid()) { + size_t offset(getAggrBase(gr)); + uint8_t * base(&_aggrBacking[offset]); + for (size_t i(0), m(_aggregator.size()); i < m; i++) { + ResultAccessor & r = _aggregator[i]; + r.setResult(g.getAggregationResult(i).getResult(), base); + } + } +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_grouping_collect() {} diff --git a/searchlib/src/vespa/searchlib/grouping/collect.h b/searchlib/src/vespa/searchlib/grouping/collect.h new file mode 100644 index 00000000000..f2bdf014826 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/collect.h @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace grouping { + +class Collect : public vespalib::noncopyable +{ +protected: + Collect(const aggregation::Group & protoType); + ~Collect(); + void preFill(GroupRef gr, const aggregation::Group & r); + void createCollectors(GroupRef gr); + void collect(GroupRef group, uint32_t docId, double rank); + void getCollectors(GroupRef ref, aggregation::Group & g) const; + int cmpAggr(GroupRef a, GroupRef b) const { + int diff(0); + size_t aOff(getAggrBase(a)); + size_t bOff(getAggrBase(b)); + for(std::vector::const_iterator it(_sortInfo.begin()), mt(_sortInfo.end()); (diff == 0) && (it != mt); it++) { + diff = _aggregator[it->getIndex()].cmp(&_aggrBacking[aOff], &_aggrBacking[bOff]) * it->getSign(); + } + return diff; + } + uint64_t radixAggrAsc(GroupRef gr) const { + return _aggregator[_sortInfo[0].getIndex()].radixAsc(&_aggrBacking[getAggrBase(gr)]); + } + uint64_t radixAggrDesc(GroupRef gr) const { + return _aggregator[_sortInfo[0].getIndex()].radixDesc(&_aggrBacking[getAggrBase(gr)]); + } + bool hasSpecifiedOrder() const { return ! _sortInfo.empty(); } + bool isPrimarySortKeyAscending() const { return _sortInfo[0].getSign() >= 0; } +private: + // Returns the byteoffset where aggregationresults for this group are stored. + size_t getAggrBase(GroupRef gr) const { return _aggregatorSize*gr.getRef(); } + // Return the aggregator with the corresponding id for the requested group. + const expression::ResultNode & getAggrResult(uint32_t aggrId, GroupRef ref) const { + return _aggregator[aggrId].getResult(&_aggrBacking[getAggrBase(ref.getRef())]); + } + + /** + * A ResultAccessor hides the dirty details for aggregating and accessing results + * stored in flat memory elsewhere. + * It keeps an offset that is added to get to memory storing the result. + * It also keeps a scratch aggregator for doing the calculation. The 'warm' method, aggregate, does + * r.swap(m); r.aggregate(); r.swap(m); + * The extra incurred cost is dual swap, in exchange for avoiding the memory cost of virtual objects. + * TODO: This are solutions planned to avoid the dual swaps. But so far they can be neglected as they do not occupy many cycles. + */ + class ResultAccessor { + public: + ResultAccessor() : _bluePrint(NULL), _aggregator(NULL), _offset(0) { } + ResultAccessor(const aggregation::AggregationResult & aggregator, size_t offset); + void setResult(const expression::ResultNode & result, uint8_t * base) { + result.encode(base+_offset); + } + const expression::ResultNode & getResult(expression::ResultNode & result, const uint8_t * base) const { + result.decode(base+_offset); + return result; + } + const expression::ResultNode & getResult(const uint8_t * base) const { + _aggregator->getResult().decode(base+_offset); + return _aggregator->getResult(); + } + size_t getRawByteSize() const { return _aggregator->getResult().getRawByteSize(); } + uint64_t radixAsc(const uint8_t * a) const { return _aggregator->getResult().radixAsc(a); } + uint64_t radixDesc(const uint8_t * a) const { return _aggregator->getResult().radixDesc(a); } + int cmp(const uint8_t * a, const uint8_t * b) const { + return _aggregator->getResult().cmpMem(a, b); + } + void create(uint8_t * base); + void destroy(uint8_t * base) { _aggregator->getResult().destroy(base+_offset); } + void aggregate(uint8_t * base, uint32_t docId, double rank) { + _aggregator->getResult().swap(base+_offset); + _aggregator->aggregate(docId, rank); + _aggregator->getResult().swap(base+_offset); + } + private: + const aggregation::AggregationResult * _bluePrint; + mutable vespalib::IdentifiablePtr _aggregator; + uint32_t _offset; + }; + typedef vespalib::Array AggregatorBacking; + typedef vespalib::Array ResultAccessorList; + class SortInfo { + public: + SortInfo() : _index(0), _sign(1) { } + SortInfo(uint8_t index, int8_t sign) : _index(index), _sign(sign) { } + uint8_t getIndex() const { return _index; } + int8_t getSign() const { return _sign; } + private: + uint8_t _index; // Which index in the aggragators should be used for sorting this level. + int8_t _sign; // And which way. positive number -> ascending, negative number descending. + }; + size_t _aggregatorSize; // This is the bytesize required to store the aggrgate values per bucket. + ResultAccessorList _aggregator; // These are the accessors to use when accessing the results. + AggregatorBacking _aggrBacking; // This is the storage for the accessors. + std::vector _sortInfo; // Generated cheap sortInfo, to avoid accessing more complicated data. +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/grouping/forcelink.hpp b/searchlib/src/vespa/searchlib/grouping/forcelink.hpp new file mode 100644 index 00000000000..09496d294c7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/forcelink.hpp @@ -0,0 +1,13 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +void forcelink_file_searchlib_grouping_groupandcollectengine(); +void forcelink_file_searchlib_grouping_groupingengine(); +void forcelink_file_searchlib_grouping_groupengine(); + +void forcelink_searchlib_grouping() { + forcelink_file_searchlib_grouping_groupandcollectengine(); + forcelink_file_searchlib_grouping_groupingengine(); + forcelink_file_searchlib_grouping_groupengine(); +} + diff --git a/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp new file mode 100644 index 00000000000..6f06960d5c5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.cpp @@ -0,0 +1,50 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { + +using namespace expression; +using namespace aggregation; + +namespace grouping { + +GroupAndCollectEngine::GroupAndCollectEngine(const GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen) : + GroupEngine(request, level, nextEngine, frozen) +{ +} + +GroupAndCollectEngine::~GroupAndCollectEngine() +{ +} + +GroupRef +GroupAndCollectEngine::group(Children & children, uint32_t docId, double rank) +{ + GroupRef gr(GroupEngine::group(children, docId, rank)); + if (gr.valid()) { + collect(gr, docId, rank); + } + return gr; +} + +void +GroupAndCollectEngine::group(uint32_t docId, double rank) +{ + GroupEngine::group(docId, rank); + collect(GroupRef(0), docId, rank); +} + +GroupRef +GroupAndCollectEngine::createGroup(const search::expression::ResultNode & v) +{ + GroupRef gr(GroupEngine::createGroup(v)); + createCollectors(gr); + return gr; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_grouping_groupandcollectengine() {} diff --git a/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h new file mode 100644 index 00000000000..4d1aa5a49df --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupandcollectengine.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace grouping { + +class GroupAndCollectEngine : public GroupEngine +{ +public: + GroupAndCollectEngine(const aggregation::GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen); + ~GroupAndCollectEngine(); +private: + virtual GroupRef group(Children & children, uint32_t docId, double rank); + virtual void group(uint32_t docId, double rank); + virtual GroupRef createGroup(const expression::ResultNode & id); +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/grouping/groupengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupengine.cpp new file mode 100644 index 00000000000..48ecf6931ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupengine.cpp @@ -0,0 +1,227 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +namespace search { + +using namespace expression; +using namespace aggregation; + +namespace grouping { + +GroupEngine::GroupEngine(const GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen) : + Collect(request->getGroupPrototype()), + _request(request), + _nextEngine(nextEngine), + _idByteSize(0), + _ids(), + _idScratch(), + _rank(), + _groupBacking(), + _level(level), + _frozen(frozen) +{ + if ((request != NULL) && (level > 0)) { + _idScratch.reset(request->getExpression().getResult().clone()); + } else { + _idScratch.reset(new NullResultNode()); + } + _idByteSize = _idScratch->getRawByteSize(); +} + +GroupEngine::~GroupEngine() +{ + if (_idByteSize) { + for (size_t i(0), m(_ids.size()/_idByteSize); i < m; i++) { + _idScratch->destroy(&_ids[getIdBase(GroupRef(i))]); + } + } + for (size_t i(0), m(_groupBacking.size()); i < m; i++) { + delete _groupBacking[i]; + } +} + +GroupRef GroupEngine::group(Children & children, uint32_t docId, double rank) +{ + const ExpressionTree &selector = _request->getExpression(); + if (!selector.execute(docId, rank)) { + throw std::runtime_error("Does not know how to handle failed select statements"); + } + const ResultNode &selectResult = selector.getResult(); + Children::iterator found = children.find(selectResult, GroupResult(*this)); + GroupRef gr; + if (found == children.end()) { + if (_request->allowMoreGroups(children.size())) { + gr = createGroup(selectResult); + _rank.push_back(rank); + children.insert(gr); + } else { + return gr; + } + } else { + gr = *found; + } + + if (_nextEngine != NULL) { + _nextEngine->group(*_groupBacking[gr], docId, rank); + } + + return gr; +} + +void GroupEngine::group(uint32_t docId, double rank) +{ + if (_nextEngine != NULL) { + _nextEngine->group(*_groupBacking[0], docId, rank); + } +} + +void GroupEngine::merge(Children &, const GroupEngine &) +{ +} + +void GroupEngine::merge(const GroupEngine & b) +{ + if (_nextEngine != NULL) { + _nextEngine->merge(*_groupBacking[0], *b._nextEngine); + } +} + +#if 0 +int GroupEngine::cmpRank(GroupRef a, GroupRef b) const +{ +#if 0 + return cmpAggr(a, b); +#else +#if 0 + int diff(cmpAggr(a, b)); + return diff + ? diff + : ((_rank[a] > _rank[b]) + ? -1 + : ((_rank[a] < _rank[b]) ? 1 : 0)); +#else + return (_rank[a] > _rank[b]) + ? -1 + : ((_rank[a] < _rank[b]) ? 1 : 0); +#endif +#endif +} +#endif + +GroupRef GroupEngine::createGroup(const search::expression::ResultNode & v) +{ + GroupRef gr(_idByteSize ? _ids.size()/_idByteSize : 0); + _ids.resize(getIdBase(GroupRef(gr + 1))); + uint8_t * base(&_ids[getIdBase(gr)]); + v.create(base); + v.encode(base); + if (_nextEngine != NULL) { + _groupBacking.push_back(_nextEngine->createChildren().release()); + } + return gr; +} + +GroupRef +GroupEngine::createFullGroup(const search::expression::ResultNode & v) +{ + GroupRef gr(GroupEngine::createGroup(v)); + createCollectors(gr); + return gr; +} + +namespace { +class RadixAccess { +public: + RadixAccess(const uint64_t * v) : _radix(v) { } + uint64_t operator () (size_t i) const { return _radix[i]; } +private: + const uint64_t * _radix; +}; +} + +Group::UP GroupEngine::getGroup(GroupRef ref) const +{ + Group::UP p(new Group(_request->getGroupPrototype())); + Group & g(*p); + g.setId(getGroupId(ref)); + g.setRank(_rank[ref]); + if (_nextEngine != NULL) { + const Children & ch(*_groupBacking[ref]); + std::vector v(ch.size()); + { + size_t i(0); + for (Children::const_iterator it(ch.begin()), mt(ch.end()); it != mt; it++) { + v[i++] = *it; + } + } + uint64_t maxN(_nextEngine->_request->getPrecision()); + if (maxN < v.size()) { +#if 0 + std::sort(v.begin(), v.end(), GroupRankLess(*_nextEngine)); +#else + size_t radixSorted; + if (_nextEngine->hasSpecifiedOrder()) { + uint64_t * radixCache = new uint64_t[v.size()]; + if (_nextEngine->isPrimarySortKeyAscending()) { + for (size_t i(0); i < v.size(); i++) { + radixCache[i] = _nextEngine->radixAggrAsc(GroupRef(i)); + } + } else { + for (size_t i(0); i < v.size(); i++) { + radixCache[i] = _nextEngine->radixAggrDesc(GroupRef(i)); + } + } + radixSorted = ShiftBasedRadixSorter:: + radix_sort(RadixAccess(radixCache), GroupRankLess(*_nextEngine), &v[0], v.size(), 16, maxN); + delete [] radixCache; + } else { + radixSorted = ShiftBasedRadixSorter:: + radix_sort(GroupRankRadix(*_nextEngine), GroupRankLess(*_nextEngine), &v[0], v.size(), 16, maxN); + } + assert(radixSorted >= maxN); + assert(radixSorted <= v.size()); + v.resize(radixSorted); + std::sort(v.begin(), v.end(), GroupRankLess(*_nextEngine)); +#endif + v.resize(maxN); + } + std::sort(v.begin(), v.end(), GroupIdLess(*_nextEngine)); + for (size_t i(0); i < v.size(); i++) { + g.addChild(_nextEngine->getGroup(v[i])); + } + } + getCollectors(ref, g); + return p; +} + +GroupRef +GroupEngine::preFillEngine(const Group & r, size_t depth) +{ + GroupRef gr; + if (depth >= _level) { + gr = (r.hasId()) + ? createFullGroup(r.getId()) + : createFullGroup(NullResultNode()); + _rank.push_back(r.getRank()); + if (_nextEngine != NULL) { + Children & ch(*_groupBacking[gr]); + for (size_t i(0), m(r.getChildrenSize()); i < m; i++) { + GroupRef tmp = _nextEngine->preFillEngine(r.getChild(i), depth); + if (tmp.valid()) { + ch.insert(tmp); + } + } + } + preFill(gr, r); + } + return gr; +} + +} +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_grouping_groupengine() {} diff --git a/searchlib/src/vespa/searchlib/grouping/groupengine.h b/searchlib/src/vespa/searchlib/grouping/groupengine.h new file mode 100644 index 00000000000..4ac29d77b3d --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupengine.h @@ -0,0 +1,139 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace grouping { + +class GroupEngine : protected Collect +{ +public: + class GroupHash { + public: + GroupHash(const GroupEngine & engine) : _engine(engine) { } + uint32_t operator () (GroupRef a) const { return _engine.hash(a); } + private: + const GroupEngine & _engine; + }; + class GroupEqual { + public: + GroupEqual(const GroupEngine & engine) : _engine(engine) { } + bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpId(a, b) == 0; } + private: + const GroupEngine & _engine; + }; + class GroupIdLess { + public: + GroupIdLess(const GroupEngine & engine) : _engine(engine) { } + bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpId(a, b) < 0; } + private: + const GroupEngine & _engine; + }; + class GroupRankRadix { + public: + GroupRankRadix(const GroupEngine & engine) : _engine(engine) { } + uint64_t operator () (GroupRef a) const { return _engine.rankRadix(a); } + private: + const GroupEngine & _engine; + }; + class GroupRankLess { + public: + GroupRankLess(const GroupEngine & engine) : _engine(engine) { } + bool operator () (GroupRef a, GroupRef b) const { return _engine.cmpRank(a, b) < 0; } + private: + const GroupEngine & _engine; + }; + class GroupResult { + public: + GroupResult(const GroupEngine & engine) : _engine(engine) { } + const expression::ResultNode & operator() (GroupRef v) const { return _engine.getGroupId(v); } + private: + const GroupEngine & _engine; + }; + + typedef vespalib::hash_set Children; + + /** + * @param request The request creating this engine. + * @param level This is my level. 0 is the top level. + * @param nextEngine This is the engine handling the next level. + * @param frozen Tell if this level can create new groups or not. + */ + GroupEngine(const aggregation::GroupingLevel * request, size_t level, GroupEngine * nextEngine, bool frozen); + virtual ~GroupEngine(); + + /** + * @param children The list of children already present. + * @param docId The docid of the hit + * @param rank The rank of the hit + **/ + virtual GroupRef group(Children & children, uint32_t docId, double rank); + virtual void group(uint32_t docId, double rank); + virtual void merge(Children & children, const GroupEngine & b); + virtual void merge(const GroupEngine & b); + + std::unique_ptr createChildren() { return std::unique_ptr(new Children(0, GroupHash(*this), GroupEqual(*this))); } + + virtual aggregation::Group::UP getGroup(GroupRef ref) const; + aggregation::Group::UP getRootGroup() const { return getGroup(GroupRef(0)); } + + GroupRef preFillEngine(const aggregation::Group & r, size_t depth); + +protected: + GroupEngine(const aggregation::GroupingLevel * request, size_t level); + void groupNext(uint32_t docId, double rank); + virtual GroupRef createGroup(const expression::ResultNode & id); +private: + int cmpRank(GroupRef a, GroupRef b) const { + //Here there is room for improvement + //Most critical inner loop. +#if 0 + return cmpAggr(a, b); +#else +#if 1 + int diff(cmpAggr(a, b)); + return diff + ? diff + : ((_rank[a] > _rank[b]) + ? -1 + : ((_rank[a] < _rank[b]) ? 1 : 0)); +#else + return (_rank[a] > _rank[b]) + ? -1 + : ((_rank[a] < _rank[b]) ? 1 : 0); +#endif +#endif + } + size_t hash(GroupRef a) const { return _idScratch->hash(&_ids[getIdBase(a)]); } + uint64_t rankRadix(GroupRef a) const { return vespalib::convertForSort::convert(_rank[a]); } + int cmpId(GroupRef a, GroupRef b) const { + return _idScratch->cmpMem(&_ids[getIdBase(a)], &_ids[getIdBase(b)]); + } + GroupRef createFullGroup(const expression::ResultNode & id); + const expression::ResultNode & getGroupId(GroupRef ref) const { return getGroupId(ref, *_idScratch); } + const expression::ResultNode & getGroupId(GroupRef ref, expression::ResultNode & r) const { + r.decode(&_ids[getIdBase(ref)]); + return r; + } + size_t getIdBase(GroupRef g) const { return _idByteSize*g; } + + typedef expression::ResultNodeVector::UP IdList; + typedef vespalib::Array GroupBacking; + typedef std::vector RankV; + typedef vespalib::Array IdBacking; + + const aggregation::GroupingLevel * _request; + GroupEngine * _nextEngine; // This is the engine for the next level. + size_t _idByteSize; // Correct fixed size of memory needed for one id. + IdBacking _ids; // These are all the group ids at this level. + expression::ResultNode::UP _idScratch; // Used for typing the ids. + RankV _rank; // This is the rank of the group. TODO handle with ordinary aggregator. + GroupBacking _groupBacking; // These are all the children at this level. Vector> + size_t _level; // This is my level + bool _frozen; // If set no more groups will be created at this level. +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp b/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp new file mode 100644 index 00000000000..ec34af16662 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupingengine.cpp @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +namespace search { + +using namespace aggregation; +using namespace expression; + +namespace grouping { + +GroupingEngine::GroupingEngine(Grouping & request) : + _request(request), + _levels(), + _rootRequestLevel() +{ + const Grouping::GroupingLevelList & gll(request.getLevels()); + assert(request.getLastLevel() <= gll.size()); + bool collectLastLevel(request.getLastLevel() == gll.size()); + _levels.resize(request.getLastLevel() + ((gll.size()==request.getLastLevel()) ? 0 : 1) + 1); // 1 for inclusive, 1 for artificial root + GroupEngine * nextEngine(NULL); + for (size_t i(_levels.size()); i-- > 1; ) { + const GroupingLevel & l = gll[i-1]; + if (i > request.getFirstLevel()) { + if ((i-1) == request.getLastLevel()) { + if (collectLastLevel) { + _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, false); + } else { + _levels[i] = new GroupEngine(&l, i, nextEngine, false); + } + } else { + _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, false); + } + } else { + // This should be a frozen level + if (i == request.getFirstLevel()) { + _levels[i] = new GroupAndCollectEngine(&l, i, nextEngine, true); + } else { + _levels[i] = new GroupEngine(&l, i, nextEngine, true); + } + } + nextEngine = _levels[i]; + } + + fillRootRequest(request.getRoot()); + if (0 >= request.getFirstLevel()) { + _levels[0] = new GroupAndCollectEngine(&_rootRequestLevel, 0, nextEngine, true); + } else { + _levels[0] = new GroupEngine(&_rootRequestLevel, 0, nextEngine, true); + } + preFillEngines(request.getRoot(), request.getFirstLevel()); +} + +void +GroupingEngine::preFillEngines(const Group & r, size_t levels) +{ + if (_levels.size() > levels) { + _levels[0]->preFillEngine(r, levels); + } +} + +void +GroupingEngine::fillRootRequest(const Group & r) +{ + _rootRequestLevel.setMaxGroups(1).setPresicion(1).freeze(); + for (size_t i(0), m(r.getAggrSize()); i < m; i++) { + _rootRequestLevel.addResult(r.getAggregationResult(i)); + } +} + +GroupingEngine::~GroupingEngine() +{ + for (size_t i(0); i < _levels.size(); i++) { + delete _levels[i]; + _levels[i] = 0; + } +} + +void +GroupingEngine::aggregate(const RankedHit * rankedHit, unsigned int len) +{ + _request.preAggregate( ! _request.needResort()); + if ( ! _levels.empty() ) { + len = _request.getMaxN(len); + for (size_t i(0); i < len; i++) { + const RankedHit & r(rankedHit[i]); + _levels[0]->group(r.getDocId(), r.getRank()); + } + } + _request.postAggregate(); +} + +Group::UP +GroupingEngine::createResult() const +{ + return _levels[0]->getRootGroup(); +} + +void GroupingEngine::merge(const GroupingEngine & b) +{ + _levels[0]->merge(*b._levels[0]); +} + +} + +} + +// this function was added by ../../forcelink.sh +void forcelink_file_searchlib_grouping_groupingengine() {} diff --git a/searchlib/src/vespa/searchlib/grouping/groupingengine.h b/searchlib/src/vespa/searchlib/grouping/groupingengine.h new file mode 100644 index 00000000000..00187a0c818 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupingengine.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace grouping { + +class GroupingEngine : private vespalib::noncopyable +{ +public: + typedef std::vector GroupEngines; +public: + GroupingEngine(aggregation::Grouping & request); + GroupingEngine(vespalib::nbostream & request, bool oldWay); + ~GroupingEngine(); + vespalib::nbostream & serializeOldWay(vespalib::nbostream & request) const; + vespalib::nbostream & serialize(vespalib::nbostream & request) const; + void aggregate(const RankedHit * rankedHit, unsigned int len); + void merge(const GroupingEngine & b); + aggregation::Group::UP createResult() const; + const GroupEngines & getEngines() const { return _levels; } +private: + void fillRootRequest(const aggregation::Group & r); + void preFillEngines(const aggregation::Group & r, size_t levels); + aggregation::Grouping & _request; + GroupEngines _levels; + aggregation::GroupingLevel _rootRequestLevel; +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/grouping/groupref.h b/searchlib/src/vespa/searchlib/grouping/groupref.h new file mode 100644 index 00000000000..bcc56172be6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/groupref.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace grouping { + +class GroupRef +{ +public: + GroupRef() : _ref(-1) { } + GroupRef(uint32_t ref) : _ref(ref) { } + uint32_t getRef() const { return _ref; } + bool valid() const { return _ref != static_cast(-1); } + operator uint32_t () const { return getRef(); } +private: + uint32_t _ref; +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/grouping/hyperloglog.h b/searchlib/src/vespa/searchlib/grouping/hyperloglog.h new file mode 100644 index 00000000000..7ef731f833b --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/hyperloglog.h @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "sketch.h" +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +// How many elements are required before we use a normal sketch representation. +const uint32_t SPARSE_SKETCH_LIMIT = 255; + +/** + * Decorator to SparseSketch handling the switch to NormalSketch + * representation. It holds a reference to HyperLogLog::_sketch, which + * is a unique pointer initially pointing to this class. By resetting + * that pointer to a new sketch class, this class is deleted. By + * having the logic for exchanging the sketch class here, we remove it + * along with the sparse representation once the switch is made. + */ +template +class ExchangerSketch : public SparseSketch { + typename Sketch::UP &_sketch_ptr; + + virtual int aggregate(HashT hash) override { + if (this->getSize() > SPARSE_SKETCH_LIMIT) { + NormalSketch *normal_sketch = + new NormalSketch; + normal_sketch->merge(*this); + _sketch_ptr.reset(normal_sketch); // deletes this + return normal_sketch->aggregate(hash); + } + return SparseSketch::aggregate(hash); + } +public: + ExchangerSketch(typename Sketch::UP &sketch_ptr) + : _sketch_ptr(sketch_ptr) {} +}; + +/** + * HyperLogLog is used to estimate the number of unique hashes seen. + */ +template +class HyperLogLog { + typename Sketch::UP _sketch; + +public: + typedef HashT hash_type; + enum { bucketBits = BucketBits }; + + // Initialize ExchangerSketch with a reference to _sketch. + HyperLogLog() : _sketch(new ExchangerSketch(_sketch)) {} + HyperLogLog(const HyperLogLog &other) + : HyperLogLog() { + merge(other); + } + HyperLogLog &operator=( + const HyperLogLog &other) { + _sketch.reset(new ExchangerSketch(_sketch)); + merge(other); + return *this; + } + + // Aggregates a hash value into the sketch. + int aggregate(HashT hash) { return _sketch->aggregate(hash); } + void merge(const HyperLogLog &other); + void serialize(vespalib::Serializer &os) const; + void deserialize(vespalib::Deserializer &is); + + const Sketch &getSketch() const { return *_sketch; } +}; + + +template +void HyperLogLog:: +merge(const HyperLogLog &other) { + typedef SparseSketch Sparse; + typedef NormalSketch Normal; + + if (_sketch->getClassId() == Sparse::classId) { + Sparse &sparse = static_cast(*_sketch); + if (other.getSketch().getClassId() == Sparse::classId) { + const Sparse &other_sparse = + static_cast(other.getSketch()); + sparse.merge(other_sparse); + if (sparse.getSize() > SPARSE_SKETCH_LIMIT) { + typename Normal::UP new_sketch(new Normal); + new_sketch->merge(sparse); + _sketch.reset(new_sketch.release()); + } + } else { // other is NormalSketch + const Normal &other_normal = + static_cast(other.getSketch()); + typename Normal::UP new_sketch(new Normal(other_normal)); + new_sketch->merge(sparse); + _sketch.reset(new_sketch.release()); + } + } else { // NormalSketch + Normal &normal = static_cast(*_sketch); + if (other.getSketch().getClassId() == Sparse::classId) { + const Sparse &other_sparse = + static_cast(other.getSketch()); + normal.merge(other_sparse); + } else { // other is NormalSketch + const Normal &other_normal = + static_cast(other.getSketch()); + normal.merge(other_normal); + } + } +} + +template +void HyperLogLog:: +serialize(vespalib::Serializer &os) const { + os << _sketch->getClassId(); + _sketch->serialize(os); +} + +template +void HyperLogLog:: +deserialize(vespalib::Deserializer &is) { + uint32_t type; + is >> type; + if (type == SparseSketch::classId) { + _sketch.reset(new ExchangerSketch(_sketch)); + _sketch->deserialize(is); + } else if (type == NormalSketch::classId) { + _sketch.reset(new NormalSketch); + _sketch->deserialize(is); + } +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/grouping/sketch.h b/searchlib/src/vespa/searchlib/grouping/sketch.h new file mode 100644 index 00000000000..0a475a9e805 --- /dev/null +++ b/searchlib/src/vespa/searchlib/grouping/sketch.h @@ -0,0 +1,260 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +template struct NormalSketch; + +/** + * Sketch interface. + */ +template +struct Sketch { + enum { bucketBits = BucketBits }; + typedef HashT hash_type; + typedef Sketch SketchType; + typedef std::unique_ptr UP; + + static const HashT BUCKET_COUNT = HashT(1) << BucketBits; + static const HashT BUCKET_MASK = BUCKET_COUNT - 1; + + virtual ~Sketch() {} + + virtual int aggregate(HashT hash) = 0; + + virtual uint32_t getClassId() const = 0; + virtual void serialize(vespalib::Serializer &os) const = 0; + virtual void deserialize(vespalib::Deserializer &is) = 0; + + virtual bool operator==(const SketchType &other) const = 0; + virtual void print(std::ostream &out) const = 0; +}; +template +std::ostream &operator<<(std::ostream &o, const Sketch &s) { + o << "["; + s.print(o); + return o << " ]"; +} + + +template +uint8_t countPrefixZeros(T t) { + uint8_t count = 0; + const T FIRST_BIT = T(1) << ((sizeof(T) * 8) - 1); + while (!((t << count) & FIRST_BIT)) { + ++count; + } + return ++count; +} + + +/** + * Sketch containing a set of hashes + */ +template +struct SparseSketch : Sketch { + using typename Sketch::SketchType; + enum { classId = IDENTIFIABLE_CLASSID_NS(search, SparseSketch) }; + + struct IdentityHash { + size_t operator()(HashT hash) const { return hash; } + }; + std::unordered_set hash_set; + + size_t getSize() const { return hash_set.size(); } + + virtual int aggregate(HashT hash) override { + return hash_set.insert(hash).second ? 1 : 0; + } + + virtual uint32_t getClassId() const override { return classId; } + virtual void serialize(vespalib::Serializer &os) const override; + virtual void deserialize(vespalib::Deserializer &is) override; + + virtual bool operator==(const SketchType &other) const override { + const SparseSketch *other_sparse = + dynamic_cast *>(&other); + if (!other_sparse) { + return false; + } + if (hash_set.size() != other_sparse->hash_set.size()) { + return false; + } + for (auto hash : hash_set) { + if (other_sparse->hash_set.count(hash) == 0) { + return false; + } + } + return true; + } + + virtual void print(std::ostream &out) const override { + out << " (" << hash_set.size() << " elements)"; + for (auto hash : hash_set) { + out << " 0x" << std::hex; + out.width(8); + out.fill('0'); + out << hash; + } + } + + void merge(const SparseSketch &other) { + hash_set.insert(other.hash_set.begin(), other.hash_set.end()); + } +}; + + +/** + * Sketch containing a fixed number of buckets + */ +template +struct NormalSketch : Sketch { + using typename Sketch::SketchType; + using Sketch::BUCKET_COUNT; + using Sketch::BUCKET_MASK; + typedef std::unique_ptr UP; + enum { classId = IDENTIFIABLE_CLASSID_NS(search, NormalSketch) }; + + uint8_t bucket[BUCKET_COUNT]; + + NormalSketch() { memset(&bucket[0], 0, BUCKET_COUNT); } + + virtual int aggregate(HashT hash) override { + uint8_t existing_value = bucket[hash & BUCKET_MASK]; + uint8_t new_value = countPrefixZeros(hash | BUCKET_MASK); + if (new_value > existing_value) { + bucket[hash & BUCKET_MASK] = new_value; + return new_value - existing_value; + } + return 0; + } + + uint32_t compress_buckets_into(char *buffer, uint32_t size) const; + void decompress_buckets_from(char *buffer, uint32_t size); + virtual uint32_t getClassId() const override { return classId; } + virtual void serialize(vespalib::Serializer &os) const override; + virtual void deserialize(vespalib::Deserializer &is) override; + + virtual bool operator==(const SketchType &other) const override { + const NormalSketch *other_normal = + dynamic_cast *>(&other); + if (!other_normal) { + return false; + } + for (size_t i = 0; i < BUCKET_COUNT; ++i) { + if (other_normal->bucket[i] != bucket[i]) { + return false; + } + } + return true; + } + + virtual void print(std::ostream &out) const override { + for (size_t i = 0; i < BUCKET_COUNT; ++i) { + out << " " << int(bucket[i]); + } + } + + void merge(const NormalSketch &other) { + std::transform(bucket, bucket + BUCKET_COUNT, other.bucket, bucket, + [](uint8_t a, uint8_t b) { return std::max(a, b); }); + } + + void merge(const SparseSketch &other) { + for (auto hash : other.hash_set) { + aggregate(hash); + } + } +}; + + +template +void SparseSketch:: +serialize(vespalib::Serializer &os) const { + uint32_t size = hash_set.size(); + os << size; + for (HashT hash : hash_set) { + os << hash; + } +} +template +void SparseSketch:: +deserialize(vespalib::Deserializer &is) { + uint32_t size; + is >> size; + for (uint32_t i = 0; i < size; ++i) { + uint32_t hash; + is >> hash; + aggregate(hash); + } +} + +template +uint32_t NormalSketch:: +compress_buckets_into(char *buffer, uint32_t size) const { + document::CompressionConfig config(document::CompressionConfig::LZ4, 9, 9); + vespalib::ConstBufferRef org(&bucket[0], BUCKET_COUNT); + vespalib::DataBuffer compress_buffer(buffer, size); + document::CompressionConfig::Type r = + document::compress(config, org, compress_buffer, false); + assert(compress_buffer.getDead() == buffer); + if (r == document::CompressionConfig::LZ4) { + assert(compress_buffer.getDataLen() < BUCKET_COUNT); + return compress_buffer.getDataLen(); + } else { + assert(BUCKET_COUNT <= size); + memcpy(buffer, bucket, BUCKET_COUNT); + return BUCKET_COUNT; + } +} +template +void NormalSketch:: +decompress_buckets_from(char *buffer, uint32_t size) { + if (size == BUCKET_COUNT) { // not compressed + memcpy(bucket, buffer, BUCKET_COUNT); + } else { + vespalib::ConstBufferRef compressed(buffer, size); + vespalib::DataBuffer uncompressed(reinterpret_cast(&bucket[0]), + BUCKET_COUNT); + document::decompress(document::CompressionConfig::LZ4, BUCKET_COUNT, + compressed, uncompressed, false); + } +} +template +void NormalSketch:: +serialize(vespalib::Serializer &os) const { + vespalib::DefaultAlloc backing(LZ4_compressBound(BUCKET_COUNT)); + char * compress_array(static_cast(backing.get())); + uint32_t size = + compress_buckets_into(compress_array, backing.size()); + os << BUCKET_COUNT << size; + for (size_t i = 0; i < size; ++i) { + os << static_cast(compress_array[i]); + } +} +template +void NormalSketch:: +deserialize(vespalib::Deserializer &is) { + uint32_t bucket_count, size; + is >> bucket_count >> size; + assert(bucket_count == BUCKET_COUNT); + uint8_t compressed_array[BUCKET_COUNT]; + for (size_t i = 0; i < size; ++i) { + is >> compressed_array[i]; + } + decompress_buckets_from(reinterpret_cast(compressed_array), size); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/.gitignore b/searchlib/src/vespa/searchlib/index/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/index/CMakeLists.txt b/searchlib/src/vespa/searchlib/index/CMakeLists.txt new file mode 100644 index 00000000000..0fa012ab51e --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_index OBJECT + SOURCES + dictionaryfile.cpp + docbuilder.cpp + docidandfeatures.cpp + doctypebuilder.cpp + dummyfileheadercontext.cpp + indexbuilder.cpp + olddictionaryfile.cpp + postinglisthandle.cpp + postinglistcounts.cpp + postinglistcountfile.cpp + postinglistfile.cpp + postinglistparams.cpp + schemautil.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/index/OWNERS b/searchlib/src/vespa/searchlib/index/OWNERS new file mode 100644 index 00000000000..64735d11d93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/OWNERS @@ -0,0 +1 @@ +tegge diff --git a/searchlib/src/vespa/searchlib/index/bitvectorkeys.h b/searchlib/src/vespa/searchlib/index/bitvectorkeys.h new file mode 100644 index 00000000000..2a1e33026af --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/bitvectorkeys.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +namespace search +{ + +namespace index +{ + +class BitVectorWordSingleKey +{ +public: + uint64_t _wordNum; + uint32_t _numDocs; + uint32_t _pad; + + BitVectorWordSingleKey(void) + : _wordNum(0), + _numDocs(0), + _pad(0) + { + } + + bool + operator<(const BitVectorWordSingleKey &rhs) const + { + return _wordNum < rhs._wordNum; + } + + bool + operator==(const BitVectorWordSingleKey &rhs) const + { + return _wordNum == rhs._wordNum; + } +}; + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp b/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp new file mode 100644 index 00000000000..9915e2c56e1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.cpp @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.dictionaryfile"); +#include "dictionaryfile.h" + +namespace search +{ + +namespace index +{ + + +DictionaryFileSeqRead::~DictionaryFileSeqRead(void) +{ +} + + +DictionaryFileSeqWrite::~DictionaryFileSeqWrite(void) +{ +} + + +DictionaryFileRandRead::DictionaryFileRandRead(void) + : _memoryMapped(false) +{ +} + + +DictionaryFileRandRead::~DictionaryFileRandRead(void) +{ +} + + +void +DictionaryFileRandRead::afterOpen(FastOS_FileInterface &file) +{ + _memoryMapped = file.MemoryMapPtr(0) != NULL; +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/dictionaryfile.h b/searchlib/src/vespa/searchlib/index/dictionaryfile.h new file mode 100644 index 00000000000..8a3f101ba99 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/dictionaryfile.h @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "postinglistcounts.h" +#include "postinglisthandle.h" +#include "postinglistcountfile.h" +#include +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace index +{ + +/** + * Interface for dictionary file containing words and counts for words. + */ +class DictionaryFileSeqRead : public PostingListCountFileSeqRead +{ +public: + DictionaryFileSeqRead(void) + { + } + + virtual + ~DictionaryFileSeqRead(void); + + /** + * Read word and counts. Only nonzero counts are returned. If at + * end of dictionary then noWordNumHigh() is returned as word number. + */ + virtual void + readWord(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) = 0; + + /** + * Open dictionary file for sequential read. + */ + virtual bool + open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) = 0; + + static uint64_t + noWordNum(void) + { + return 0u; + } + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } +}; + +/** + * Interface for dictionary file containing words and count for words. + */ +class DictionaryFileSeqWrite : public PostingListCountFileSeqWrite +{ +protected: +public: + DictionaryFileSeqWrite(void) + { + } + + virtual + ~DictionaryFileSeqWrite(void); + + /** + * Write word and counts. Only nonzero counts should be supplied. + */ + virtual void + writeWord(const vespalib::stringref &word, + const PostingListCounts &counts) = 0; +}; + + +/** + * Interface for dictionary file containing words and counts. + */ +class DictionaryFileRandRead +{ +protected: + // Can be examined after open + bool _memoryMapped; +public: + DictionaryFileRandRead(void); + + virtual + ~DictionaryFileRandRead(void); + + virtual bool + lookup(const vespalib::stringref &word, + uint64_t &wordNum, + PostingListOffsetAndCounts &offsetAndCounts) = 0; + + /** + * Open dictionary file for random read. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileRandRead &tuneFileRead) = 0; + + /** + * Close dictionary file. + */ + virtual bool + close(void) = 0; + + bool + getMemoryMapped(void) const + { + return _memoryMapped; + } + + virtual uint64_t + getNumWordIds(void) const = 0; +protected: + void + afterOpen(FastOS_FileInterface &file); +}; + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.cpp b/searchlib/src/vespa/searchlib/index/docbuilder.cpp new file mode 100644 index 00000000000..fc362b6a306 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/docbuilder.cpp @@ -0,0 +1,930 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.docbuilder"); +#include "docbuilder.h" +#include "doctypebuilder.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace document; +using namespace search::index; +using vespalib::Utf8Reader; +using vespalib::Utf8Writer; +using vespalib::geo::ZCurve; + +namespace { + +void +insertStr(const Schema::Field & sfield, document::FieldValue * fvalue, const vespalib::string & val) +{ + if (sfield.getDataType() == Schema::STRING || + sfield.getDataType() == Schema::RAW) + { + (dynamic_cast(fvalue))->setValue(val); + } else { + throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); + } +} + +void +insertInt(const Schema::Field & sfield, document::FieldValue * fvalue, int64_t val) +{ + if (sfield.getDataType() == Schema::INT8) { + (dynamic_cast(fvalue))->setValue((uint8_t)val); + } else if (sfield.getDataType() == Schema::INT16) { + (dynamic_cast(fvalue))->setValue((int16_t)val); + } else if (sfield.getDataType() == Schema::INT32) { + (dynamic_cast(fvalue))->setValue((int32_t)val); + } else if (sfield.getDataType() == Schema::INT64) { + (dynamic_cast(fvalue))->setValue(val); + } else { + throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); + } +} + +void +insertFloat(const Schema::Field & sfield, document::FieldValue * fvalue, double val) +{ + if (sfield.getDataType() == Schema::FLOAT) { + (dynamic_cast(fvalue))->setValue((float)val); + } else if (sfield.getDataType() == Schema::DOUBLE) { + (dynamic_cast(fvalue))->setValue(val); + } else { + throw DocBuilder::Error(vespalib::make_string("Field '%s' not compatible", sfield.getName().c_str())); + } +} + +void insertPredicate(const Schema::Field &sfield, + document::FieldValue *fvalue, + std::unique_ptr val) { + if (sfield.getDataType() == Schema::BOOLEANTREE) { + *(dynamic_cast(fvalue)) = + PredicateFieldValue(std::move(val)); + } else { + throw DocBuilder::Error(vespalib::make_string( + "Field '%s' not compatible", + sfield.getName().c_str())); + } +} + +void insertTensor(const Schema::Field &schemaField, + document::FieldValue *fvalue, + std::unique_ptr val) { + if (schemaField.getDataType() == Schema::TENSOR) { + *(dynamic_cast(fvalue)) = std::move(val); + } else { + throw DocBuilder::Error(vespalib::make_string( + "Field '%s' not compatible", + schemaField.getName().c_str())); + } +} + +void +insertPosition(const Schema::Field & sfield, + document::FieldValue * fvalue, int32_t xpos, int32_t ypos) +{ + assert(*fvalue->getDataType() == *DataType::LONG); + assert(sfield.getDataType() == Schema::INT64); + (void) sfield; + int64_t zpos = ZCurve::encode(xpos, ypos); + document::LongFieldValue *zvalue = + dynamic_cast(fvalue); + zvalue->setValue(zpos); +} + + +void +insertRaw(const Schema::Field & sfield, + document::FieldValue *fvalue, const void *buf, size_t len) +{ + assert(*fvalue->getDataType() == *DataType::RAW); + assert(sfield.getDataType() == Schema::RAW); + (void) sfield; + document::RawFieldValue *rfvalue = + dynamic_cast(fvalue); + rfvalue->setValue(static_cast(buf), len); +} + + +template +std::unique_ptr +make_UP(T *p) +{ + return std::unique_ptr(p); +} + +template +std::unique_ptr +makeUP(T *p) +{ + return std::unique_ptr(p); +} + +} // namespace + +namespace docbuilderkludge +{ + +namespace linguistics +{ + +const vespalib::string SPANTREE_NAME("linguistics"); + +enum TokenType { + UNKNOWN = 0, + SPACE = 1, + PUNCTUATION = 2, + SYMBOL = 3, + ALPHABETIC = 4, + NUMERIC = 5, + MARKER = 6 +}; + +} + +} + +using namespace docbuilderkludge; + +namespace +{ + +Annotation::UP +makeTokenType(linguistics::TokenType type) +{ + return makeUP(new Annotation(*AnnotationType::TOKEN_TYPE, + makeUP(new IntFieldValue(type)))); +} + +} + +namespace search { +namespace index { + +VESPA_IMPLEMENT_EXCEPTION(DocBuilderError, vespalib::Exception); + +DocBuilder::FieldHandle::FieldHandle(const document::Field & dfield, const Schema::Field & field) : + _sfield(field), + _value(), + _element() +{ + _value = dfield.createValue(); +} + + +DocBuilder::CollectionFieldHandle::CollectionFieldHandle(const document::Field & dfield, const Schema::Field & field) : + FieldHandle(dfield, field), + _elementWeight(1) +{ +} + +void +DocBuilder::CollectionFieldHandle::startElement(int32_t weight) +{ + assert(_element.get() == NULL); + _elementWeight = weight; + const CollectionFieldValue * value = dynamic_cast(_value.get()); + _element = value->createNested(); +} + +void +DocBuilder::CollectionFieldHandle::endElement() +{ + if (_sfield.getCollectionType() == Schema::ARRAY) { + onEndElement(); + ArrayFieldValue * value = dynamic_cast(_value.get()); + value->add(*_element); + } else if (_sfield.getCollectionType() == + Schema::WEIGHTEDSET) { + onEndElement(); + WeightedSetFieldValue * value = dynamic_cast(_value.get()); + value->add(*_element, _elementWeight); + } else { + throw Error(vespalib::make_string("Field '%s' not compatible", _sfield.getName().c_str())); + } + _element.reset(NULL); +} + + +DocBuilder::IndexFieldHandle::IndexFieldHandle(const FixedTypeRepo & repo, const document::Field & dfield, const Schema::Field & sfield) + : CollectionFieldHandle(dfield, sfield), + _str(), + _strSymbols(0u), + _spanList(NULL), + _spanTree(), + _lastSpan(NULL), + _spanStart(0u), + _autoAnnotate(true), + _autoSpace(true), + _skipAutoSpace(true), + _uriField(false), + _subField(), + _repo(repo) +{ + _str.reserve(1023); + + if (_sfield.getCollectionType() == Schema::SINGLE) { + if (*_value->getDataType() == document::UrlDataType::getInstance()) + _uriField = true; + } else { + const CollectionFieldValue * value = dynamic_cast(_value.get()); + if (value->getNestedType() == document::UrlDataType::getInstance()) + _uriField = true; + } + startAnnotate(); +} + + +void +DocBuilder::IndexFieldHandle::append(const vespalib::string &val) +{ + _strSymbols += val.size(); + _str += val; +} + + +void +DocBuilder::IndexFieldHandle::addStr(const vespalib::string &val) +{ + assert(_spanTree.get() != NULL); + if (val.empty()) + return; + if (!_skipAutoSpace && _autoSpace) + addSpace(); + _skipAutoSpace = false; + _spanStart = _strSymbols; + append(val); + if (_autoAnnotate) { + addSpan(); + addTermAnnotation(); + if (val[0] >= '0' && val[0] <= '9') { + addNumericTokenAnnotation(); + } else { + addAlphabeticTokenAnnotation(); + } + } +} + + +void +DocBuilder::IndexFieldHandle::addSpace(void) +{ + addNoWordStr(" "); +} + + +void +DocBuilder::IndexFieldHandle::addNoWordStr(const vespalib::string &val) +{ + assert(_spanTree.get() != NULL); + if (val.empty()) + return; + _spanStart = _strSymbols; + append(val); + if (_autoAnnotate) { + addSpan(); + if (val[0] == ' ' || val[0] == '\t') + addSpaceTokenAnnotation(); + else if (val[0] >= '0' && val[0] <= '9') { + addNumericTokenAnnotation(); + } else { + addAlphabeticTokenAnnotation(); + } + + } + _skipAutoSpace = true; +} + + +void +DocBuilder::IndexFieldHandle::addTokenizedString(const vespalib::string &val, + bool urlMode) +{ + Utf8Reader r(val); + vespalib::string sbuf; + Utf8Writer w(sbuf); + uint32_t c = 0u; + bool oldWord = false; + assert(_uriField == urlMode); + assert(_uriField != _subField.empty()); + + while (r.hasMore()) { + c = r.getChar(); + bool newWord = Fast_UnicodeUtil::IsWordChar(c) || + (urlMode && (c == '-' || c == '_')); + if (oldWord != newWord) { + if (!sbuf.empty()) { + if (oldWord) + addStr(sbuf); + else + addNoWordStr(sbuf); + sbuf.clear(); + } + oldWord = newWord; + } + w.putChar(c); + } + if (!sbuf.empty()) { + if (oldWord) + addStr(sbuf); + else + addNoWordStr(sbuf); + } +} + + +void +DocBuilder::IndexFieldHandle::addSpan(size_t start, size_t len) +{ + const SpanNode &span = _spanList->add(makeUP(new Span(start, len))); + _lastSpan = &span; +} + + +void +DocBuilder::IndexFieldHandle::addSpan(void) +{ + size_t endPos = _strSymbols; + assert(endPos > _spanStart); + addSpan(_spanStart, endPos - _spanStart); + _spanStart = endPos; +} + + +void +DocBuilder::IndexFieldHandle::addSpaceTokenAnnotation(void) +{ + assert(_spanTree.get() != NULL); + assert(_lastSpan != NULL); + _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::SPACE)); +} + + +void +DocBuilder::IndexFieldHandle::addNumericTokenAnnotation(void) +{ + assert(_spanTree.get() != NULL); + assert(_lastSpan != NULL); + _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::NUMERIC)); +} + + +void +DocBuilder::IndexFieldHandle::addAlphabeticTokenAnnotation(void) +{ + assert(_spanTree.get() != NULL); + assert(_lastSpan != NULL); + _spanTree->annotate(*_lastSpan, makeTokenType(linguistics::ALPHABETIC)); +} + + +void +DocBuilder::IndexFieldHandle::addTermAnnotation(void) +{ + assert(_spanTree.get() != NULL); + assert(_lastSpan != NULL); + _spanTree->annotate(*_lastSpan, *AnnotationType::TERM); +} + + +void +DocBuilder::IndexFieldHandle::addTermAnnotation(const vespalib::string &val) +{ + assert(_spanTree.get() != NULL); + assert(_lastSpan != NULL); + _spanTree->annotate(*_lastSpan, + makeUP(new Annotation(*AnnotationType::TERM, + makeUP(new StringFieldValue(val))))); +} + + +void +DocBuilder::IndexFieldHandle::onEndElement(void) +{ + // Flush data for index field. + assert(_subField.empty()); + if (_uriField) + return; + StringFieldValue * value; + if (_sfield.getCollectionType() != Schema::SINGLE) { + value = dynamic_cast(_element.get()); + } else { + value = dynamic_cast(_value.get()); + } + value->setValue(_str); + // Also drop all spans no annotation for now + if (_spanTree->numAnnotations() > 0u) { + StringFieldValue::SpanTrees trees; + trees.emplace_back(std::move(_spanTree)); + value->setSpanTrees(trees, _repo); + } else { + _spanTree.reset(); + } + _spanList = NULL; + _lastSpan = NULL; + _spanStart = 0u; + _strSymbols = 0u; + _str.clear(); + _skipAutoSpace = true; + startAnnotate(); +} + + +void +DocBuilder::IndexFieldHandle::onEndField(void) +{ + if (_sfield.getCollectionType() == Schema::SINGLE) + onEndElement(); +} + + +void +DocBuilder::IndexFieldHandle::startAnnotate(void) +{ + SpanList::UP span_list(new SpanList); + _spanList = span_list.get(); + _spanTree.reset(new SpanTree(linguistics::SPANTREE_NAME, std::move(span_list))); +} + + +void +DocBuilder::IndexFieldHandle::setAutoAnnotate(bool autoAnnotate) +{ + _autoAnnotate = autoAnnotate; +} + + +void +DocBuilder::IndexFieldHandle::setAutoSpace(bool autoSpace) +{ + _autoSpace = autoSpace; +} + + +void +DocBuilder::IndexFieldHandle::startSubField(const vespalib::string &subField) +{ + assert(_subField.empty()); + assert(_uriField); + _subField = subField; +} + + + +void +DocBuilder::IndexFieldHandle::endSubField(void) +{ + assert(!_subField.empty()); + assert(_uriField); + StructuredFieldValue *sValue; + if (_sfield.getCollectionType() != Schema::SINGLE) { + sValue = dynamic_cast(_element.get()); + } else { + sValue = dynamic_cast(_value.get()); + } + const Field &f = sValue->getField(_subField); + FieldValue::UP fval(f.getDataType().createFieldValue()); + *fval = _str; + StringFieldValue *value = dynamic_cast(fval.get()); + StringFieldValue::SpanTrees trees; + trees.emplace_back(std::move(_spanTree)); + value->setSpanTrees(trees, _repo); + sValue->setValue(f, *fval); + _spanList = NULL; + _lastSpan = NULL; + _spanStart = 0u; + _strSymbols = 0u; + _str.clear(); + _skipAutoSpace = true; + startAnnotate(); + _subField.clear(); +} + + + +DocBuilder::AttributeFieldHandle:: +AttributeFieldHandle(const document::Field &dfield, + const Schema::Field &sfield) + : CollectionFieldHandle(dfield, sfield) +{ +} + +void +DocBuilder::AttributeFieldHandle::addStr(const vespalib::string & val) +{ + if (_element.get() != NULL) { + insertStr(_sfield, _element.get(), val); + } else { + insertStr(_sfield, _value.get(), val); + } +} + +void +DocBuilder::AttributeFieldHandle::addInt(int64_t val) +{ + if (_element.get() != NULL) { + insertInt(_sfield, _element.get(), val); + } else { + insertInt(_sfield, _value.get(), val); + } +} + +void +DocBuilder::AttributeFieldHandle::addFloat(double val) +{ + if (_element.get() != NULL) { + insertFloat(_sfield, _element.get(), val); + } else { + insertFloat(_sfield, _value.get(), val); + } +} + +void +DocBuilder::AttributeFieldHandle::addPredicate( + std::unique_ptr val) +{ + if (_element.get() != NULL) { + insertPredicate(_sfield, _element.get(), std::move(val)); + } else { + insertPredicate(_sfield, _value.get(), std::move(val)); + } +} + + +void +DocBuilder::AttributeFieldHandle::addTensor( + std::unique_ptr val) +{ + if (_element.get() != NULL) { + insertTensor(_sfield, _element.get(), std::move(val)); + } else { + insertTensor(_sfield, _value.get(), std::move(val)); + } +} + + +void +DocBuilder::AttributeFieldHandle::addPosition(int32_t xpos, int32_t ypos) +{ + if (_element.get() != NULL) { + insertPosition(_sfield, _element.get(), xpos, ypos); + } else { + insertPosition(_sfield, _value.get(), xpos, ypos); + } +} + + +DocBuilder::SummaryFieldHandle:: +SummaryFieldHandle(const document::Field & dfield, + const Schema::Field & sfield) + : CollectionFieldHandle(dfield, sfield) +{ +} + +void +DocBuilder::SummaryFieldHandle::addStr(const vespalib::string & val) +{ + if (_element.get() != NULL) { + insertStr(_sfield, _element.get(), val); + } else { + insertStr(_sfield, _value.get(), val); + } +} + +void +DocBuilder::SummaryFieldHandle::addInt(int64_t val) +{ + if (_element.get() != NULL) { + insertInt(_sfield, _element.get(), val); + } else { + insertInt(_sfield, _value.get(), val); + } +} + +void +DocBuilder::SummaryFieldHandle::addFloat(double val) +{ + if (_element.get() != NULL) { + insertFloat(_sfield, _element.get(), val); + } else { + insertFloat(_sfield, _value.get(), val); + } +} + + +void +DocBuilder::SummaryFieldHandle::addRaw(const void *buf, size_t len) +{ + if (_element.get() != NULL) { + insertRaw(_sfield, _element.get(), buf, len); + } else { + insertRaw(_sfield, _value.get(), buf, len); + } +} + + +DocBuilder::DocumentHandle::DocumentHandle(document::Document &doc, + const vespalib::string & docId) + : _type(&doc.getType()), + _doc(&doc), + _fieldHandle(), + _repo(*_doc->getRepo(), *_type) +{ + (void) docId; +} + + +DocBuilder::DocBuilder(const Schema &schema) + : _schema(schema), + _doctypes_config(DocTypeBuilder(schema).makeConfig()), + _repo(new DocumentTypeRepo(_doctypes_config)), + _docType(*_repo->getDocumentType("searchdocument")), + _doc(), + _handleDoc(), + _currDoc() +{ +} + +DocBuilder & +DocBuilder::startDocument(const vespalib::string & docId) +{ + _doc.reset(new Document(_docType, DocumentId(docId))); + _doc->setRepo(*_repo); + _handleDoc.reset(new DocumentHandle(*_doc, docId)); + return *this; +} + +document::Document::UP +DocBuilder::endDocument() +{ + _handleDoc->endDocument(_doc); + return std::move(_doc); +} + +DocBuilder & +DocBuilder::startIndexField(const vespalib::string & name) +{ + assert(_handleDoc->getFieldHandle().get() == NULL); + uint32_t field_id = _schema.getIndexFieldId(name); + assert(field_id != Schema::UNKNOWN_FIELD_ID); + _handleDoc->startIndexField(_schema.getIndexField(field_id)); + _currDoc = _handleDoc.get(); + return *this; +} + +DocBuilder & +DocBuilder::startAttributeField(const vespalib::string & name) +{ + assert(_handleDoc->getFieldHandle().get() == NULL); + uint32_t field_id = _schema.getIndexFieldId(name); + assert(field_id == Schema::UNKNOWN_FIELD_ID); + field_id = _schema.getAttributeFieldId(name); + assert(field_id != Schema::UNKNOWN_FIELD_ID); + _handleDoc->startAttributeField(_schema.getAttributeField(field_id)); + _currDoc = _handleDoc.get(); + return *this; +} + +DocBuilder & +DocBuilder::startSummaryField(const vespalib::string & name) +{ + assert(_handleDoc->getFieldHandle().get() == NULL); + uint32_t field_id = _schema.getIndexFieldId(name); + assert(field_id == Schema::UNKNOWN_FIELD_ID); + field_id = _schema.getAttributeFieldId(name); + assert(field_id == Schema::UNKNOWN_FIELD_ID); + field_id = _schema.getSummaryFieldId(name); + assert(field_id != Schema::UNKNOWN_FIELD_ID); + _handleDoc->startSummaryField(_schema.getSummaryField(field_id)); + _currDoc = _handleDoc.get(); + return *this; +} + +DocBuilder & +DocBuilder::endField() +{ + assert(_currDoc != NULL); + _currDoc->endField(); + _currDoc = NULL; + return *this; +} + +DocBuilder & +DocBuilder::startElement(int32_t weight) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->startElement(weight); + return *this; +} + +DocBuilder & +DocBuilder::endElement() +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->endElement(); + return *this; +} + +DocBuilder & +DocBuilder::addStr(const vespalib::string & str) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addStr(str); + return *this; +} + +DocBuilder & +DocBuilder::addSpace(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addSpace(); + return *this; +} + +DocBuilder & +DocBuilder::addNoWordStr(const vespalib::string & str) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addNoWordStr(str); + return *this; +} + +DocBuilder & +DocBuilder::addTokenizedString(const vespalib::string &str) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addTokenizedString(str, false); + return *this; +} + +DocBuilder & +DocBuilder::addUrlTokenizedString(const vespalib::string &str) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addTokenizedString(str, true); + return *this; +} + +DocBuilder & +DocBuilder::addInt(int64_t val) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addInt(val); + return *this; +} + +DocBuilder & +DocBuilder::addFloat(double val) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addFloat(val); + return *this; +} + + +DocBuilder & +DocBuilder::addPredicate(std::unique_ptr val) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addPredicate(std::move(val)); + return *this; +} + + +DocBuilder & +DocBuilder::addTensor(std::unique_ptr val) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addTensor(std::move(val)); + return *this; +} + + +DocBuilder & +DocBuilder::addSpan(size_t start, size_t len) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addSpan(start, len); + return *this; +} + + +DocBuilder & +DocBuilder::addSpan(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addSpan(); + return *this; +} + + +DocBuilder & +DocBuilder::addSpaceTokenAnnotation(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addSpaceTokenAnnotation(); + return *this; +} + + +DocBuilder & +DocBuilder::addNumericTokenAnnotation(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addNumericTokenAnnotation(); + return *this; +} + + +DocBuilder & +DocBuilder::addAlphabeticTokenAnnotation(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addAlphabeticTokenAnnotation(); + return *this; +} + + +DocBuilder& +DocBuilder::addTermAnnotation(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addTermAnnotation(); + return *this; +} + + +DocBuilder & +DocBuilder::addTermAnnotation(const vespalib::string &val) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addTermAnnotation(val); + return *this; +} + + +DocBuilder & +DocBuilder::addPosition(int32_t xpos, int32_t ypos) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addPosition(xpos, ypos); + return *this; +} + + +DocBuilder & +DocBuilder::addRaw(const void *buf, size_t len) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->addRaw(buf, len); + return *this; +} + + +DocBuilder & +DocBuilder::startSubField(const vespalib::string &subField) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->startSubField(subField); + return *this; +} + + +DocBuilder & +DocBuilder::endSubField(void) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->endSubField(); + return *this; +} + + +DocBuilder & +DocBuilder::setAutoAnnotate(bool autoAnnotate) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->setAutoAnnotate(autoAnnotate); + return *this; +} + + +DocBuilder & +DocBuilder::setAutoSpace(bool autoSpace) +{ + assert(_currDoc != NULL); + _currDoc->getFieldHandle()->setAutoSpace(autoSpace); + return *this; +} + + +} // namespace search::index +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/docbuilder.h b/searchlib/src/vespa/searchlib/index/docbuilder.h new file mode 100644 index 00000000000..8bb5f08d722 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/docbuilder.h @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "doctypebuilder.h" + +namespace vespalib { namespace tensor { class Tensor; } } +namespace search { +namespace index { + +VESPA_DEFINE_EXCEPTION(DocBuilderError, vespalib::Exception); + +/** + * Builder class used to generate a search document that corresponds + * to an index schema. + **/ +class DocBuilder +{ +public: + typedef DocBuilderError Error; + +private: + /** + * Base class for handling the construction of a field. + **/ + class FieldHandle { + public: + typedef std::shared_ptr SP; + protected: + const Schema::Field & _sfield; + document::FieldValue::UP _value; + document::FieldValue::UP _element; + public: + FieldHandle(const document::Field & dfield, const Schema::Field & field); + virtual ~FieldHandle() {} + virtual void startElement(int32_t weight) { (void) weight; throw Error("Function not supported"); } + virtual void endElement() { throw Error("Function not supported"); } + virtual void addStr(const vespalib::string & val) { (void) val; throw Error("Function not supported"); } + + virtual void + addSpace(void) + { + throw Error("Function not supported"); + } + + virtual void + addNoWordStr(const vespalib::string & val) + { + (void) val; + throw Error("Function not supported"); + } + + virtual void + addTokenizedString(const vespalib::string &val, + bool urlMode) + { + (void) val; + (void) urlMode; + throw Error("Function not supported"); + } + + virtual void + addSpan(size_t start, size_t len) + { + (void) start; + (void) len; + throw Error("Function not supported"); + } + + virtual void + addSpan(void) + { + throw Error("Function not supported"); + } + + virtual void + addSpaceTokenAnnotation(void) + { + throw Error("Function not supported"); + } + + virtual void + addNumericTokenAnnotation(void) + { + throw Error("Function not supported"); + } + + virtual void + addAlphabeticTokenAnnotation(void) + { + throw Error("Function not supported"); + } + + virtual void + addTermAnnotation(void) + { + throw Error("Function not supported"); + } + + virtual void + addTermAnnotation(const vespalib::string &val) + { + (void) val; + throw Error("Function not supported"); + } + + virtual void addInt(int64_t val) { (void) val; throw Error("Function not supported"); } + virtual void addFloat(double val) { (void) val; throw Error("Function not supported"); } + virtual void addPredicate(std::unique_ptr) { + throw Error("Function not supported"); + } + virtual void addTensor(std::unique_ptr) { + throw Error("Function not supported"); + } + const document::FieldValue::UP & getValue() const { return _value; } + const Schema::Field & getField() const { return _sfield; } + + virtual void + onEndElement(void) + { + } + + virtual void + onEndField(void) + { + } + + virtual void + setAutoAnnotate(bool autoAnnotate) + { + (void) autoAnnotate; + throw Error("Function not supported"); + } + + virtual void + setAutoSpace(bool autoSpace) + { + (void) autoSpace; + throw Error("Function not supported"); + } + + virtual void + addPosition(int32_t xpos, int32_t ypos) + { + (void) xpos; + (void) ypos; + throw Error("Function not supported"); + } + + virtual void + addRaw(const void *buf, size_t len) + { + (void) buf; + (void) len; + throw Error("Function not supported"); + } + + virtual void + startSubField(const vespalib::string &subField) + { + (void) subField; + throw Error("Function not supported"); + } + + virtual void + endSubField(void) + { + throw Error("Function not supported"); + } + }; + + /** + * Class that can handle multi value fields. + **/ + class CollectionFieldHandle : public FieldHandle { + private: + int32_t _elementWeight; + public: + CollectionFieldHandle(const document::Field & dfield, const Schema::Field & sfield); + virtual void startElement(int32_t weight); + virtual void endElement(); + }; + + /** + * Class for handling the construction of the content of an index field. + **/ + class IndexFieldHandle : public CollectionFieldHandle + { + vespalib::string _str; // adjusted as word comes along + size_t _strSymbols; // symbols in string, assuming UTF8 + document::SpanList *_spanList; // owned by _spanTree + document::SpanTree::UP _spanTree; + const document::SpanNode *_lastSpan; + size_t _spanStart; // start of span + bool _autoAnnotate; // Add annotation when adding strings + bool _autoSpace; // Add space before strings + bool _skipAutoSpace; // one shot skip of adding space + bool _uriField; // URI handling (special struct case) + vespalib::string _subField; + const document::FixedTypeRepo & _repo; + + void + append(const vespalib::string &val); + + public: + IndexFieldHandle(const document::FixedTypeRepo & repo, + const document::Field &dfield, + const Schema::Field &sfield); + + virtual void addStr(const vespalib::string & val); + + virtual void + addSpace(void); + + virtual void + addNoWordStr(const vespalib::string & val); + + void + addTokenizedString(const vespalib::string &val, + bool urlMode); + + virtual void + addSpan(size_t start, size_t len); + + virtual void + addSpan(void); + + virtual void + addSpaceTokenAnnotation(void); + + virtual void + addNumericTokenAnnotation(void); + + virtual void + addAlphabeticTokenAnnotation(void); + + virtual void + addTermAnnotation(void); + + virtual void + addTermAnnotation(const vespalib::string &val); + + virtual void + onEndElement(void); + + virtual void + onEndField(void); + + void + startAnnotate(void); + + virtual void + setAutoAnnotate(bool autoAnnotate); + + virtual void + setAutoSpace(bool autoSpace); + + virtual void + startSubField(const vespalib::string &subField); + + virtual void + endSubField(void); + }; + + /** + * Class for handling the construction of the content of an attribute field. + **/ + class AttributeFieldHandle : public CollectionFieldHandle + { + public: + AttributeFieldHandle(const document::Field & dfield, const Schema::Field & sfield); + virtual void addStr(const vespalib::string & val); + virtual void addInt(int64_t val); + virtual void addFloat(double val); + virtual void addPredicate(std::unique_ptr val); + virtual void addTensor(std::unique_ptr val) + override; + + virtual void + addPosition(int32_t xpos, int32_t ypos); + }; + + /** + * Class for handling the construction of the content of a summary field. + **/ + class SummaryFieldHandle : public CollectionFieldHandle { + public: + SummaryFieldHandle(const document::Field & dfield, const Schema::Field & sfield); + virtual void addStr(const vespalib::string & val); + virtual void addInt(int64_t val); + virtual void addFloat(double val); + + virtual void + addRaw(const void *buf, size_t len); + }; + + /** + * Class for handling the construction of a document (set of fields). + **/ + class DocumentHandle { + public: + typedef std::shared_ptr SP; + private: + const document::DocumentType * _type; + document::Document *const _doc; + FieldHandle::SP _fieldHandle; + document::FixedTypeRepo _repo; + public: + DocumentHandle(document::Document &doc, const vespalib::string & docId); + const FieldHandle::SP & getFieldHandle() const { return _fieldHandle; } + void startIndexField(const Schema::Field & sfield) { + _fieldHandle.reset(new IndexFieldHandle(_repo, _type->getField(sfield.getName()), sfield)); + } + void startAttributeField(const Schema::Field & sfield) { + _fieldHandle.reset(new AttributeFieldHandle(_type->getField(sfield.getName()), sfield)); + } + void startSummaryField(const Schema::Field & sfield) { + _fieldHandle.reset(new SummaryFieldHandle(_type->getField(sfield.getName()), sfield)); + } + void + endField() + { + _fieldHandle->onEndField(); + _doc->setValue(_type->getField(_fieldHandle->getField().getName()), *_fieldHandle->getValue()); + _fieldHandle.reset(static_cast(NULL)); + } + void endDocument(const document::Document::UP & doc) { + (void) doc; + } + }; + + const Schema & _schema; + document::DocumenttypesConfig _doctypes_config; + document::DocumentTypeRepo::SP _repo; + const document::DocumentType &_docType; + document::Document::UP _doc; // the document we are about to generate + + DocumentHandle::SP _handleDoc; // handle for all fields + DocumentHandle * _currDoc; // the current document handle + +public: + DocBuilder(const Schema & schema); + + DocBuilder & startDocument(const vespalib::string & docId); + document::Document::UP endDocument(); + + DocBuilder & startIndexField(const vespalib::string & name); + DocBuilder & startAttributeField(const vespalib::string & name); + DocBuilder & startSummaryField(const vespalib::string & name); + DocBuilder & endField(); + + DocBuilder & startElement(int32_t weight = 1); + DocBuilder & endElement(); + + DocBuilder & addStr(const vespalib::string & val); + DocBuilder & addSpace(void); + DocBuilder & addNoWordStr(const vespalib::string & val); + DocBuilder & addInt(int64_t val); + DocBuilder & addFloat(double val); + DocBuilder & addPredicate(std::unique_ptr val); + DocBuilder & addTensor(std::unique_ptr val); + + DocBuilder & + addTokenizedString(const vespalib::string &val); + + DocBuilder & + addUrlTokenizedString(const vespalib::string &val); + + DocBuilder & + addSpan(size_t start, size_t len); + + DocBuilder & + addSpan(void); + + DocBuilder & + addSpaceTokenAnnotation(void); + + DocBuilder & + addNumericTokenAnnotation(void); + + DocBuilder & + addAlphabeticTokenAnnotation(void); + + DocBuilder& + addTermAnnotation(void); + + DocBuilder & + addTermAnnotation(const vespalib::string &val); + + DocBuilder & + setAutoAnnotate(bool autoAnnotate); + + DocBuilder & + setAutoSpace(bool autoSpace); + + DocBuilder & + addPosition(int32_t xpos, int32_t ypos); + + DocBuilder & + addRaw(const void *buf, size_t len); + + DocBuilder & + startSubField(const vespalib::string &subField); + + DocBuilder & + endSubField(void); + + static bool + hasAnnotations(void) + { + return true; + } + + const document::DocumentType &getDocumentType() const { return _docType; } + const document::DocumentTypeRepo::SP &getDocumentTypeRepo() const + { return _repo; } + document::DocumenttypesConfig getDocumenttypesConfig() const + { return _doctypes_config; } +}; + +} // namespace search::index +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp new file mode 100644 index 00000000000..442f25b94f0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.cpp @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.docidandfeatures"); +#include "docidandfeatures.h" +#include + +namespace search +{ + +namespace index +{ + +using vespalib::nbostream; + +#if 0 +void +DocIdAndFeatures::append(const DocIdAndFeatures &rhs, uint32_t localFieldId) +{ + assert(!rhs.getRaw()); + assert(rhs._fields.size() == 1); + const WordDocFieldFeatures &field = rhs._fields.front(); + assert(field.getFieldId() == 0); + uint32_t numElements = field.getNumElements(); + std::vector::const_iterator element = + rhs._elements.begin(); + std::vector::const_iterator position = + rhs._wordPositions.begin(); + assert(_fields.empty() || localFieldId > _fields.back().getFieldId()); + _fields.push_back(field); + _fields.back().setFieldId(localFieldId); + for (uint32_t elementDone = 0; elementDone < numElements; + ++elementDone, ++element) { + _elements.push_back(*element); + for (uint32_t posResidue = element->getNumOccs(); posResidue > 0; + --posResidue, ++position) { + _wordPositions.push_back(*position); + } + } +} +#endif + + +nbostream & +operator<<(nbostream &out, const WordDocElementFeatures &features) +{ + out << features._elementId << features._numOccs << + features._weight << features._elementLen; + return out; +} + + +nbostream & +operator>>(nbostream &in, WordDocElementFeatures &features) +{ + in >> features._elementId >> features._numOccs >> + features._weight >> features._elementLen; + return in; +} + + +nbostream & +operator<<(nbostream &out, const WordDocElementWordPosFeatures &features) +{ + out << features._wordPos; + return out; +} + + +nbostream & +operator>>(nbostream &in, WordDocElementWordPosFeatures &features) +{ + in >> features._wordPos; + return in; +} + + +nbostream & +operator<<(nbostream &out, const DocIdAndFeatures &features) +{ + out << features._docId; + out.saveVector(features._elements). + saveVector(features._wordPositions); + out.saveVector(features._blob); + out << features._bitOffset << features._bitLength << features._raw; + return out; +} + + +nbostream & +operator>>(nbostream &in, DocIdAndFeatures &features) +{ + in >> features._docId; + in.restoreVector(features._elements). + restoreVector(features._wordPositions); + in.restoreVector(features._blob); + in >> features._bitOffset >> features._bitLength >> features._raw; + return in; +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/docidandfeatures.h b/searchlib/src/vespa/searchlib/index/docidandfeatures.h new file mode 100644 index 00000000000..1bb74e6c3b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/docidandfeatures.h @@ -0,0 +1,338 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +namespace index +{ + +/* + * The following feature classes are not self contained. To reduce + * memory allocator pressure, the DocIdAndFeatures class contains a + * flattened representation of the features at different levels. + */ + +/* + * (word, doc) features. + * + * Present as member in DocIdAndFeatures. + */ +class WordDocFeatures +{ +public: + // TODO: add support for user features + + WordDocFeatures(void) + { + } + + void + clear(void) + { + } +}; + +/* + * (word, doc, field) features. + * + * Present as vector element in DocIdAndFeatures. + */ +class WordDocFieldFeatures +{ +public: + uint32_t _numElements; // Number of array indexes + // TODO: add support for user features + + WordDocFieldFeatures(void) + : _numElements(0u) + { + } + + uint32_t + getNumElements(void) const + { + return _numElements; + } + + void + setNumElements(uint32_t numElements) + { + _numElements = numElements; + } + + void + incNumElements(void) + { + ++_numElements; + } +}; + +/* + * (word, doc, field, element) features. + * + * Present as vector element in DocIdAndFeatures. + */ +class WordDocElementFeatures +{ +public: + uint32_t _elementId; // Array index + uint32_t _numOccs; + int32_t _weight; + uint32_t _elementLen; + // TODO: add support for user features + + WordDocElementFeatures(void) + : _elementId(0u), + _numOccs(0u), + _weight(1), + _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH) + { + } + + WordDocElementFeatures(uint32_t elementId) + : _elementId(elementId), + _numOccs(0u), + _weight(1), + _elementLen(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH) + { + } + + WordDocElementFeatures(uint32_t elementId, + uint32_t weight, + uint32_t elementLen) + : _elementId(elementId), + _numOccs(0u), + _weight(weight), + _elementLen(elementLen) + { + } + + uint32_t + getElementId(void) const + { + return _elementId; + } + + uint32_t + getNumOccs(void) const + { + return _numOccs; + } + + int32_t + getWeight(void) const + { + return _weight; + } + + uint32_t + getElementLen(void) const + { + return _elementLen; + } + + void + setElementId(uint32_t elementId) + { + _elementId = elementId; + } + + void + setNumOccs(uint32_t numOccs) + { + _numOccs = numOccs; + } + + void + setWeight(int32_t weight) + { + _weight = weight; + } + + void + setElementLen(uint32_t elementLen) + { + _elementLen = elementLen; + } + + void + incNumOccs(void) + { + ++_numOccs; + } +}; + +/* + * (word, doc, field, element, wordpos) features. + * + * Present as vector element in DocIdAndFeatures. + */ +class WordDocElementWordPosFeatures +{ +public: + uint32_t _wordPos; + // TODO: add support for user features + + WordDocElementWordPosFeatures(void) + : _wordPos(0u) + { + } + + WordDocElementWordPosFeatures(uint32_t wordPos) + : _wordPos(wordPos) + { + } + + uint32_t + getWordPos(void) const + { + return _wordPos; + } + + void + setWordPos(uint32_t wordPos) + { + _wordPos = wordPos; + } +}; + +/** + * Class for minimal common representation of features available for a + * (word, doc) pair, used by index fusion to shuffle information from + * input files to the output file without having to know all the details. + */ +class DocIdAndFeatures +{ +public: + uint32_t _docId; // Current Docid + // generic feature data, flattened to avoid excessive allocator usage + WordDocFeatures _wordDocFeatures; + std::vector _elements; + std::vector _wordPositions; +#ifdef notyet + // user blobs (packed) + UserFeatures _userFeatures; + // TODO: Determine how to handle big endian versus little endian user + // features, and whether set of user features is contiguous in file or + // interleaved with predefined features (word position, word weight) +#endif + // raw data (file format specific, packed) + std::vector _blob; // Feature data for (word, docid) pair + uint32_t _bitOffset; // Offset of feature start ([0..63]) + uint32_t _bitLength; // Length of features + bool _raw; // + + DocIdAndFeatures(void) + : _docId(0), + _wordDocFeatures(), + _elements(), + _wordPositions(), + _blob(), + _bitOffset(0u), + _bitLength(0u), + _raw(false) + { + } + + ~DocIdAndFeatures(void) + { + } + + void + clearFeatures(void) + { + _wordDocFeatures.clear(); + _elements.clear(); + _wordPositions.clear(); + _bitOffset = 0u; + _bitLength = 0u; + _blob.clear(); + } + + void + clearFeatures(uint32_t bitOffset) + { + _wordDocFeatures.clear(); + _elements.clear(); + _wordPositions.clear(); + _bitOffset = bitOffset; + _bitLength = 0u; + _blob.clear(); + } + + void + clear(uint32_t docId) + { + _docId = docId; + clearFeatures(); + } + + + void + clear(uint32_t docId, + uint32_t bitOffset) + { + _docId = docId; + clearFeatures(bitOffset); + } + + void + setRaw(bool raw) + { + _raw = raw; + } + + bool + getRaw(void) const + { + return _raw; + } + + /** + * Append features from a single field to a field collection. + * + * @param rhs features for a single field + * @param localFieldId local field id for the field + */ + void + append(const DocIdAndFeatures &rhs, uint32_t localFieldId); +}; + + +vespalib::nbostream & +operator<<(vespalib::nbostream &out, + const WordDocElementFeatures &features); + +vespalib::nbostream & +operator>>(vespalib::nbostream &in, WordDocElementFeatures &features); + +vespalib::nbostream & +operator<<(vespalib::nbostream &out, + const WordDocElementWordPosFeatures &features); + +vespalib::nbostream & +operator>>(vespalib::nbostream &in, + WordDocElementWordPosFeatures &features); + +vespalib::nbostream & +operator<<(vespalib::nbostream &out, const DocIdAndFeatures &features); + +vespalib::nbostream & +operator>>(vespalib::nbostream &in, DocIdAndFeatures &features); + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp new file mode 100644 index 00000000000..fff8a735bf9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.cpp @@ -0,0 +1,356 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "doctypebuilder.h" +#include +#include +#include +#include +#include + +LOG_SETUP(".index.doctypebuilder"); + +using namespace document; +using namespace search::index; + +namespace search +{ + +namespace index +{ + +namespace +{ + +const DataType *convert(Schema::DataType type) { + switch (type) { + case Schema::UINT1: + case Schema::UINT2: + case Schema::UINT4: + case Schema::INT8: + return DataType::BYTE; + case Schema::INT16: + return DataType::SHORT; + case Schema::INT32: + return DataType::INT; + case Schema::INT64: + return DataType::LONG; + case Schema::FLOAT: + return DataType::FLOAT; + case Schema::DOUBLE: + return DataType::DOUBLE; + case Schema::STRING: + return DataType::STRING; + case Schema::RAW: + return DataType::RAW; + case Schema::BOOLEANTREE: + return DataType::PREDICATE; + case Schema::TENSOR: + return DataType::TENSOR; + default: + break; + } + assert(!"Unknown datatype in schema"); + return 0; +} + +void +insertStructType(document::DocumenttypesConfig::Documenttype & cfg, + const StructDataType & structType) +{ + typedef document::DocumenttypesConfig DTC; + DTC::Documenttype::Datatype::Sstruct cfgStruct; + cfgStruct.name = structType.getName(); + Field::Set fieldSet = structType.getFieldSet(); + for (Field::Set::const_iterator itr = fieldSet.begin(); + itr != fieldSet.end(); ++itr) + { + DTC::Documenttype::Datatype::Sstruct::Field field; + field.name = (*itr)->getName(); + field.datatype = (*itr)->getDataType().getId(); + field.id = (*itr)->getId(Document::getNewestSerializationVersion()); + field.idV6 = (*itr)->getId(6); + cfgStruct.field.push_back(field); + } + cfg.datatype.push_back(DTC::Documenttype::Datatype()); + cfg.datatype.back().sstruct = cfgStruct; + cfg.datatype.back().id = structType.getId(); +} + +} + +DocTypeBuilder::UriField::UriField(void) + : _all(Schema::UNKNOWN_FIELD_ID), + _scheme(Schema::UNKNOWN_FIELD_ID), + _host(Schema::UNKNOWN_FIELD_ID), + _port(Schema::UNKNOWN_FIELD_ID), + _path(Schema::UNKNOWN_FIELD_ID), + _query(Schema::UNKNOWN_FIELD_ID), + _fragment(Schema::UNKNOWN_FIELD_ID), + _hostname(Schema::UNKNOWN_FIELD_ID) +{ +} + + +bool +DocTypeBuilder::UriField::valid(const Schema &schema, + uint32_t fieldId, + const Schema::CollectionType &collectionType) +{ + if (fieldId == Schema::UNKNOWN_FIELD_ID) + return false; + const Schema::IndexField &field = schema.getIndexField(fieldId); + if (field.getDataType() != Schema::STRING) + return false; + if (field.getCollectionType() != collectionType) + return false; + return true; +} + + +bool +DocTypeBuilder::UriField::broken(const Schema &schema, + const Schema::CollectionType & + collectionType) const +{ + return !valid(schema, _all, collectionType) && + valid(schema, _scheme, collectionType) && + valid(schema, _host, collectionType) && + valid(schema, _port, collectionType) && + valid(schema, _path, collectionType) && + valid(schema, _query, collectionType) && + valid(schema, _fragment, collectionType); +} + +bool +DocTypeBuilder::UriField::valid(const Schema &schema, + const Schema::CollectionType & + collectionType) const +{ + return valid(schema, _all, collectionType) && + valid(schema, _scheme, collectionType) && + valid(schema, _host, collectionType) && + valid(schema, _port, collectionType) && + valid(schema, _path, collectionType) && + valid(schema, _query, collectionType) && + valid(schema, _fragment, collectionType); +} + + +void +DocTypeBuilder::UriField::setup(const Schema &schema, + const vespalib::string &field) +{ + _all = schema.getIndexFieldId(field); + _scheme = schema.getIndexFieldId(field + ".scheme"); + _host = schema.getIndexFieldId(field + ".host"); + _port = schema.getIndexFieldId(field + ".port"); + _path = schema.getIndexFieldId(field + ".path"); + _query = schema.getIndexFieldId(field + ".query"); + _fragment = schema.getIndexFieldId(field + ".fragment"); + _hostname = schema.getIndexFieldId(field + ".hostname"); +} + + +void +DocTypeBuilder::UriField::markUsed(UsedFieldsMap &usedFields, + uint32_t field) +{ + if (field == Schema::UNKNOWN_FIELD_ID) + return; + assert(usedFields.size() > field); + usedFields[field] = true; +} + + +void +DocTypeBuilder::UriField::markUsed(UsedFieldsMap &usedFields) const +{ + markUsed(usedFields, _all); + markUsed(usedFields, _scheme); + markUsed(usedFields, _host); + markUsed(usedFields, _port); + markUsed(usedFields, _path); + markUsed(usedFields, _query); + markUsed(usedFields, _fragment); + markUsed(usedFields, _hostname); +} + + + +DocTypeBuilder::SchemaIndexFields::SchemaIndexFields(void) + : _textFields(), + _uriFields() +{ +} + + +void +DocTypeBuilder::SchemaIndexFields::setup(const Schema &schema) +{ + uint32_t numIndexFields = schema.getNumIndexFields(); + UsedFieldsMap usedFields; + usedFields.resize(numIndexFields); + + // Detect all URI fields (flattened structs). + for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) { + const Schema::IndexField &field = schema.getIndexField(fieldId); + const vespalib::string &name = field.getName(); + size_t dotPos = name.find('.'); + if (dotPos != vespalib::string::npos) { + const vespalib::string suffix = name.substr(dotPos + 1); + if (suffix == "scheme") { + const vespalib::string shortName = name.substr(0, dotPos); + UriField uriField; + uriField.setup(schema, shortName); + if (uriField.valid(schema, field.getCollectionType())) { + _uriFields.push_back(uriField); + uriField.markUsed(usedFields); + } else if (uriField.broken(schema, + field.getCollectionType())) { + // Broken removal of unused URI fields. + uriField.markUsed(usedFields); + } + } + } + } + + // Non-URI fields are currently supposed to be text fields. + for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) { + if (usedFields[fieldId]) + continue; + const Schema::IndexField &field = schema.getIndexField(fieldId); + switch (field.getDataType()) { + case Schema::STRING: + _textFields.push_back(fieldId); + break; + default: + ; + } + } +} + +DocTypeBuilder::DocTypeBuilder(const Schema &schema) + : _schema(schema), + _iFields() +{ + _iFields.setup(schema); +} + +namespace { +using namespace document::config_builder; +TypeOrId makeCollection(TypeOrId datatype, + Schema::CollectionType collection_type) { + switch (collection_type) { + case Schema::ARRAY: + return Array(datatype); + case Schema::WEIGHTEDSET: + // TODO: consider using array of struct to keep order + return Wset(datatype); + default: + return datatype; + } +} + +struct TypeCache { + std::map, TypeOrId> types; + + TypeOrId getType(TypeOrId datatype, Schema::CollectionType c_type) { + TypeOrId type = makeCollection(datatype, c_type); + std::pair key = std::make_pair(datatype.id, c_type); + if (types.find(key) == types.end()) { + types.insert(std::make_pair(key, type)); + } + return types.find(key)->second; + } +}; +} // namespace + +document::DocumenttypesConfig DocTypeBuilder::makeConfig() const { + using namespace document::config_builder; + TypeCache type_cache; + + typedef std::set UsedFields; + UsedFields usedFields; + + Struct header_struct("searchdocument.header"); + header_struct.setId(-1505212454); + + int32_t field_id = 0; + for (size_t i = 0; i < _iFields._textFields.size(); ++i) { + const Schema::IndexField &field = + _schema.getIndexField(_iFields._textFields[i]); + + // only handles string fields for now + assert(field.getDataType() == Schema::STRING); + header_struct.addField(field.getName(), type_cache.getType( + DataType::T_STRING, field.getCollectionType())); + header_struct.sstruct.field.back().id = field_id++; + usedFields.insert(field.getName()); + } + + const int32_t uri_type = document::UrlDataType::getInstance().getId(); + for (size_t i = 0; i < _iFields._uriFields.size(); ++i) { + const Schema::IndexField &field = + _schema.getIndexField(_iFields._uriFields[i]._all); + + // only handles string fields for now + assert(field.getDataType() == Schema::STRING); + header_struct.addField(field.getName(), type_cache.getType( + uri_type, field.getCollectionType())); + header_struct.sstruct.field.back().id = field_id++; + usedFields.insert(field.getName()); + } + + for (uint32_t i = 0; i < _schema.getNumAttributeFields(); ++i) { + const Schema::AttributeField &field = _schema.getAttributeField(i); + UsedFields::const_iterator usf = usedFields.find(field.getName()); + if (usf != usedFields.end()) + continue; // taken as index field + + const DataType *primitiveType = convert(field.getDataType()); + header_struct.addField(field.getName(), type_cache.getType( + primitiveType->getId(), field.getCollectionType())); + header_struct.sstruct.field.back().id = field_id++; + usedFields.insert(field.getName()); + } + + for (uint32_t i = 0; i < _schema.getNumSummaryFields(); ++i) { + const Schema::SummaryField &field = _schema.getSummaryField(i); + UsedFields::const_iterator usf = usedFields.find(field.getName()); + if (usf != usedFields.end()) + continue; // taken as index field or attribute field + const DataType *primitiveType(convert(field.getDataType())); + header_struct.addField(field.getName(), type_cache.getType( + primitiveType->getId(), field.getCollectionType())); + header_struct.sstruct.field.back().id = field_id++; + usedFields.insert(field.getName()); + } + + DocumenttypesConfigBuilderHelper builder; + builder.document(-645763131, "searchdocument", + header_struct, Struct("searchdocument.body")); + return builder.config(); +} + +document::DocumenttypesConfig +DocTypeBuilder::makeConfig(const DocumentType &docType) +{ + typedef document::DocumenttypesConfigBuilder DTC; + DTC cfg; + { // document type + DTC::Documenttype dtype; + dtype.id = docType.getId(); + dtype.name = docType.getName(); + // TODO(vekterli): remove header/body config + dtype.headerstruct = docType.getFieldsType().getId(); + dtype.bodystruct = docType.getFieldsType().getId(); + cfg.documenttype.push_back(dtype); + } + insertStructType(cfg.documenttype[0], docType.getFieldsType()); + return cfg; +} + + +} // namespace search::index +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/doctypebuilder.h b/searchlib/src/vespa/searchlib/index/doctypebuilder.h new file mode 100644 index 00000000000..1781d772122 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/doctypebuilder.h @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace index +{ + +/** + * Builder for the indexingdocument document type based on an index schema. + **/ +class DocTypeBuilder { +public: + typedef std::vector UsedFieldsMap; + typedef std::vector FieldIdVector; + + class UriField + { + public: + uint32_t _all; + uint32_t _scheme; + uint32_t _host; + uint32_t _port; + uint32_t _path; + uint32_t _query; + uint32_t _fragment; + uint32_t _hostname; + + private: + static void + markUsed(UsedFieldsMap &usedFields, + uint32_t field); + + static bool + valid(const Schema &schema, + uint32_t fieldId, + const Schema::CollectionType &collectionType); + + public: + UriField(void); + + bool + broken(const Schema &schema, + const Schema::CollectionType &collectionType) const; + + bool + valid(const Schema &schema, + const Schema::CollectionType &collectionType) const; + + void + setup(const Schema &schema, + const vespalib::string &field); + + void + markUsed(UsedFieldsMap &usedFields) const; + }; + + typedef std::vector UriFieldIdVector; + + class SchemaIndexFields + { + public: + FieldIdVector _textFields; + UriFieldIdVector _uriFields; + + SchemaIndexFields(void); + + void + setup(const Schema &schema); + }; + +private: + const Schema &_schema; + SchemaIndexFields _iFields; + +public: + DocTypeBuilder(const Schema & schema); + document::DocumenttypesConfig makeConfig() const; + + static document::DocumenttypesConfig + makeConfig(const document::DocumentType &docType); +}; + +} // namespace search::index +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp new file mode 100644 index 00000000000..2228a19a1e7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.cpp @@ -0,0 +1,70 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.dummyfileheadercontext"); +#include "dummyfileheadercontext.h" +#include +#include + +namespace search +{ + +namespace index +{ + +vespalib::string DummyFileHeaderContext::_creator; + +DummyFileHeaderContext::DummyFileHeaderContext(void) + : common::FileHeaderContext(), + _disableFileName(false), + _hostName(), + _pid(getpid()) +{ + _hostName = FastOS_Socket::getHostName(); + assert(!_hostName.empty()); +} + + +DummyFileHeaderContext::~DummyFileHeaderContext(void) +{ +} + + +void +DummyFileHeaderContext::disableFileName(void) +{ + _disableFileName = true; +} + + +void +DummyFileHeaderContext::addTags(vespalib::GenericHeader &header, + const vespalib::string &name) const +{ + typedef vespalib::GenericHeader::Tag Tag; + + FileHeaderTk::addVersionTags(header); + if (!_disableFileName) { + header.putTag(Tag("fileName", name)); + addCreateAndFreezeTime(header); + } + header.putTag(Tag("hostName", _hostName)); + header.putTag(Tag("pid", _pid)); + if (!_creator.empty()) { + header.putTag(Tag("creator", _creator)); + } + header.putTag(Tag("DummyFileHeaderContext", "enabled")); +} + + +void +DummyFileHeaderContext::setCreator(const vespalib::string &creator) +{ + _creator = creator; +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h new file mode 100644 index 00000000000..c7270dcf61e --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/dummyfileheadercontext.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace vespalib +{ + +class GenericHeader; + +} + +namespace search +{ + +namespace index +{ + +class DummyFileHeaderContext : public common::FileHeaderContext +{ + bool _disableFileName; + vespalib::string _hostName; + pid_t _pid; + + static vespalib::string _creator; +public: + DummyFileHeaderContext(void); + + virtual + ~DummyFileHeaderContext(void); + + void + disableFileName(void); + + virtual void + addTags(vespalib::GenericHeader &header, + const vespalib::string &name) const; + + static void + setCreator(const vespalib::string &creator); +}; + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.cpp b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp new file mode 100644 index 00000000000..aaefb0e4ff8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/indexbuilder.cpp @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.indexbuilder"); +#include "indexbuilder.h" + +namespace search +{ + +namespace index +{ + + +IndexBuilder::IndexBuilder(const Schema &schema) + : _schema(schema) +{ +} + + +IndexBuilder::~IndexBuilder(void) +{ +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/indexbuilder.h b/searchlib/src/vespa/searchlib/index/indexbuilder.h new file mode 100644 index 00000000000..f4688167b81 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/indexbuilder.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search +{ + +namespace index +{ + +class Schema; +class WordDocElementWordPosFeatures; + +class IndexBuilder +{ +protected: + const Schema &_schema; + +public: + IndexBuilder(const Schema &schema); + + virtual + ~IndexBuilder(void); + + virtual void + startWord(const vespalib::stringref & word) = 0; + + virtual void + endWord(void) = 0; + + virtual void + startDocument(uint32_t docId) = 0; + + virtual void + endDocument(void) = 0; + + virtual void + startField(uint32_t fieldId) = 0; + + virtual void + endField(void) = 0; + + virtual void + startElement(uint32_t elementId, int32_t weight, uint32_t elementLen) = 0; + + virtual void + endElement(void) = 0; + + virtual void + addOcc(const WordDocElementWordPosFeatures &features) = 0; + + // TODO: methods for attribute vectors. + + // TODO: methods for document summary. +}; + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp b/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp new file mode 100644 index 00000000000..7e194e3ead4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/olddictionaryfile.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.dictionaryfile"); +#include "olddictionaryfile.h" + +namespace search +{ + +namespace index +{ + + +OldDictionaryIndexMapping::OldDictionaryIndexMapping(void) + : _fieldIdToLocalId(), + _indexNames(), + _indexIds(), + _washedIndexIds() +{ +} + + +OldDictionaryIndexMapping::~OldDictionaryIndexMapping(void) +{ +} + + +void +OldDictionaryIndexMapping:: +setup(const Schema &schema, + const std::vector &fieldNames) +{ + _indexIds.clear(); + _washedIndexIds.clear(); + _indexNames.clear(); + + for (std::vector::const_iterator + i = fieldNames.begin(), ie = fieldNames.end(); + i != ie; + ++i) { + uint32_t fieldId = schema.getIndexFieldId(*i); + _indexIds.push_back(fieldId); + if (fieldId != Schema::UNKNOWN_FIELD_ID) + _washedIndexIds.push_back(fieldId); + _indexNames.push_back(*i); + } + setupHelper(schema); +} + + +void +OldDictionaryIndexMapping::setup(const Schema &schema, + const std::vector &fields) +{ + _indexIds.clear(); + _washedIndexIds.clear(); + _indexNames.clear(); + + uint32_t fieldId = 0; + vespalib::string fname; + for (std::vector::const_iterator + i = fields.begin(), ie = fields.end(); + i != ie; + ++i, ++fieldId) + { + assert(*i != Schema::UNKNOWN_FIELD_ID); + assert(*i < schema.getNumIndexFields()); + fname = schema.getIndexField(*i).getName(); + _indexIds.push_back(*i); + _washedIndexIds.push_back(*i); + _indexNames.push_back(fname); + } + setupHelper(schema); +} + + +void +OldDictionaryIndexMapping::setupHelper(const Schema &schema) +{ + // Create mapping to local ids + + _fieldIdToLocalId.clear(); + uint32_t localId = 0; + vespalib::string fname; + for (std::vector::const_iterator + i = _indexIds.begin(), ie = _indexIds.end(); + i != ie; + ++i, ++localId) + { + if (*i == Schema::UNKNOWN_FIELD_ID) + continue; // Field on file not in current schema + assert(*i < schema.getNumIndexFields()); + (void) schema; + while (_fieldIdToLocalId.size() <= *i) + _fieldIdToLocalId.push_back(noLocalId()); + assert(_fieldIdToLocalId[*i] == noLocalId()); + _fieldIdToLocalId[*i] = localId; + } +} + + +OldDictionaryFileSeqRead::~OldDictionaryFileSeqRead(void) +{ +} + + +OldDictionaryFileSeqWrite::~OldDictionaryFileSeqWrite(void) +{ +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/olddictionaryfile.h b/searchlib/src/vespa/searchlib/index/olddictionaryfile.h new file mode 100644 index 00000000000..cd7d2171b61 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/olddictionaryfile.h @@ -0,0 +1,208 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "postinglistcounts.h" +#include "postinglisthandle.h" +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace index +{ + +class OldDictionaryIndexMapping +{ +private: + std::vector _fieldIdToLocalId; + std::vector _indexNames; + std::vector _indexIds; + std::vector _washedIndexIds; + + void + setupHelper(const Schema &schema); + +public: + OldDictionaryIndexMapping(void); + + ~OldDictionaryIndexMapping(void); + + static uint32_t + noLocalId(void) + { + return std::numeric_limits::max(); + } + + uint32_t + getLocalId(uint32_t dfid) const + { + if (dfid < _fieldIdToLocalId.size()) + return _fieldIdToLocalId[dfid]; + else + return noLocalId(); + } + + uint32_t + getExternalId(uint32_t localId) const + { + return _indexIds[localId]; + } + + void + setup(const Schema &schema, + const std::vector &indexNames); + + void + setup(const Schema &schema, + const std::vector &indexes); + + const std::vector & + getIndexIds(void) const + { + return _indexIds; + } + + const std::vector & + getWashedIndexIds(void) const + { + return _washedIndexIds; + } + + const std::vector & + getIndexNames(void) const + { + return _indexNames; + } + + uint32_t + getNumIndexes(void) const + { + return _indexIds.size(); + } +}; + + +/** + * Interface for dictionary file containing words and counts for words. + * + * This is "at" schema level. + */ +class OldDictionaryFileSeqRead +{ +public: + OldDictionaryFileSeqRead(void) + { + } + + virtual + ~OldDictionaryFileSeqRead(void); + + /** + * Read word and counts. Only nonzero counts are returned. If at + * end of dictionary then noWordNumHigh() is returned as word number. + */ + virtual void + readWord(vespalib::string &word, + uint64_t &wordNum, + std::vector &indexes, + std::vector &counts) = 0; + + /** + * Open dictionary file for sequential read. The supplied schema + * decides what existing indexes are visible (i.e. indexes in dictionary + * but not in schema are hidden). A dictionary might have no visible + * indexes. + */ + virtual bool + open(const vespalib::string &name, const Schema &schema, + const TuneFileSeqRead &tuneFileRead) = 0; + + /** + * Close dictionary file. + */ + virtual bool + close(void) = 0; + + /* + * Get visible indexes available in dictionary. + */ + virtual void + getIndexes(std::vector &indexes) = 0; + + static uint64_t + noWordNum(void) + { + return 0u; + } + + static uint64_t + noWordNumHigh(void) + { + return std::numeric_limits::max(); + } +}; + +/** + * Interface for dictionary file containing words and count for words. + * + * This is "at" schema level. + * + * The file should contain the set of field names for which the dictionary + * is valid, to simplify handling of schema changes. + */ +class OldDictionaryFileSeqWrite +{ +protected: +public: + OldDictionaryFileSeqWrite(void) + { + } + + virtual + ~OldDictionaryFileSeqWrite(void); + + /** + * Write word and counts. Only nonzero counts should be supplied. + */ + virtual void + writeWord(const vespalib::stringref &word, + const std::vector &indexes, + const std::vector &counts) = 0; + + /** + * Open dictionary file for sequential write. The field with most + * words should be first for optimal compression. + */ + virtual bool + open(const vespalib::string &name, + uint32_t numWords, + uint32_t chunkSize, + const std::vector &indexes, + const Schema &schema, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext) = 0; + + /** + * Close dictionary file. + */ + virtual bool + close(void) = 0; +}; + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp new file mode 100644 index 00000000000..78c7d28905b --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.cpp @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.postinglistcountfile"); +#include "postinglistcountfile.h" + +namespace search +{ + +namespace index +{ + +PostingListCountFileSeqRead::PostingListCountFileSeqRead(void) +{ +} + + +PostingListCountFileSeqRead::~PostingListCountFileSeqRead(void) +{ +} + + +void +PostingListCountFileSeqRead:: +getParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +PostingListCountFileSeqWrite::PostingListCountFileSeqWrite(void) +{ +} + + +PostingListCountFileSeqWrite::~PostingListCountFileSeqWrite(void) +{ +} + + +void +PostingListCountFileSeqWrite:: +setParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +void +PostingListCountFileSeqWrite:: +getParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/postinglistcountfile.h b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h new file mode 100644 index 00000000000..f9b1c66bf9c --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistcountfile.h @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include "postinglistcounts.h" +#include +#include + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace index +{ + +class PostingListCounts; +class PostingListHandle; + +/** + * Interface for count files describing where in a posting list file + * the various words are located. It is merged at index time with a + * text-only dictionary to produce a binary dictionary optimized for + * random access used at search time. + * + * TODO: Might want to allow semi-random access for prefix searches, + * allowing for less data in posting list files being duplicated from + * the count file. + */ +class PostingListCountFileSeqRead +{ +public: + PostingListCountFileSeqRead(void); + + virtual + ~PostingListCountFileSeqRead(void); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; + + /** + * Open posting list count file for sequential read. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) = 0; + + /** + * Close posting list count file. + */ + virtual bool + close(void) = 0; + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); +}; + + +class PostingListCountFileSeqWrite +{ +public: + PostingListCountFileSeqWrite(void); + + virtual + ~PostingListCountFileSeqWrite(void); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; + + /** + * Open posting list count file for sequential write. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext) = 0; + + /** + * Close posting list count file. + */ + virtual bool + close(void) = 0; + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); +}; + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp b/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp new file mode 100644 index 00000000000..8268b1e5c64 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistcounts.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.postinglistcounts"); +#include +#include "postinglistcounts.h" + +namespace search +{ + +namespace index +{ + +void swap(PostingListCounts & a, PostingListCounts & b) +{ + a.swap(b); +} + +using vespalib::nbostream; + +nbostream & +operator<<(nbostream &out, const PostingListCounts::Segment &segment) +{ + out << segment._bitLength << segment._numDocs << segment._lastDoc; + return out; +} + + +nbostream & +operator>>(nbostream &in, PostingListCounts::Segment &segment) +{ + in >> segment._bitLength >> segment._numDocs >> segment._lastDoc; + return in; +} + + +nbostream & +operator<<(nbostream &out, const PostingListCounts &counts) +{ + out << counts._numDocs << counts._bitLength; + size_t numSegments = counts._segments.size(); + out << numSegments; + for (size_t seg = 0; seg < numSegments; ++seg) { + out << counts._segments[seg]; + } + return out; +} + + +nbostream & +operator>>(nbostream &in, PostingListCounts &counts) +{ + in >> counts._numDocs >> counts._bitLength; + size_t numSegments = 0; + in >> numSegments; + counts._segments.reserve(numSegments); + counts._segments.clear(); + for (size_t seg = 0; seg < numSegments; ++seg) { + PostingListCounts::Segment segment; + in >> segment; + counts._segments.push_back(segment); + } + return in; +} + + +nbostream & +operator<<(nbostream &out, const PostingListOffsetAndCounts &offsetAndCounts) +{ + out << offsetAndCounts._offset; + out << offsetAndCounts._accNumDocs; + out << offsetAndCounts._counts; + return out; +} + + +nbostream & +operator>>(nbostream &in, PostingListOffsetAndCounts &offsetAndCounts) +{ + in >> offsetAndCounts._offset; + in >> offsetAndCounts._accNumDocs; + in >> offsetAndCounts._counts; + return in; +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/postinglistcounts.h b/searchlib/src/vespa/searchlib/index/postinglistcounts.h new file mode 100644 index 00000000000..564955d8c6b --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistcounts.h @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +namespace index +{ + +/** + * Basic class for holding the result of a dictionary lookup result + * for a word, to optimize query tree node child order and know from + * where in the posting list files to read data. A posting list with + * 64 or fewer documents does not have skip info. + */ +class PostingListCounts +{ +public: + /* + * Nested class for describing a segment of a large posting list. + * Very large posting lists are divided into segments, to limit + * memory consumption (for buffering) and can be viewed as a + * high level skip list stored in the dictionary. If the posting + * list for a word is less than 256 kB then it is not split into + * segments. + */ + class Segment + { + public: + uint64_t _bitLength; // Length of segment + uint32_t _numDocs; // Number of documents in segment + uint32_t _lastDoc; // Last document id in segment + + Segment(void) + : _bitLength(0), + _numDocs(0), + _lastDoc(0) + { + } + + bool + operator==(const Segment &rhs) const + { + return (_bitLength == rhs._bitLength && + _numDocs == rhs._numDocs && + _lastDoc == rhs._lastDoc); + } + + friend vespalib::nbostream & + operator<<(vespalib::nbostream &out, const Segment &segment); + + friend vespalib::nbostream & + operator>>(vespalib::nbostream &in, Segment &segment); + }; + + /** + * Counts might span multiple posting lists (i.e. multiple words + * for prefix search), numDocs is then sum of documents for each posting + * list, which segment info is absent. + */ + uint64_t _numDocs; // Number of documents for word(s) + uint64_t _bitLength; // Length of postings for word(s) + + /** + * Very large posting lists with skip info are split into multiple + * segments. If there are more than one segments for a word then the + * last segment has skip info even if it has fewer than 64 documents. + */ + std::vector _segments; + + PostingListCounts(void) + : _numDocs(0), + _bitLength(0), + _segments() + { + } + void swap(PostingListCounts & rhs) { + std::swap(_numDocs, rhs._numDocs); + std::swap(_bitLength, rhs._bitLength); + std::swap(_segments, rhs._segments); + } + + void + clear(void) + { + _bitLength = 0; + _numDocs = 0; + _segments.clear(); + } + + bool + operator==(const PostingListCounts &rhs) const + { + return (_numDocs == rhs._numDocs && + _bitLength == rhs._bitLength && + _segments == rhs._segments); + } + + friend vespalib::nbostream & + operator<<(vespalib::nbostream &out, const PostingListCounts &counts); + + friend vespalib::nbostream & + operator>>(vespalib::nbostream &in, PostingListCounts &counts); +}; + +void swap(PostingListCounts & a, PostingListCounts & b); + + +class PostingListOffsetAndCounts +{ +public: + uint64_t _offset; + uint64_t _accNumDocs; // Used by prefix search for now. + PostingListCounts _counts; + + PostingListOffsetAndCounts(void) + : _offset(0), + _accNumDocs(0u), + _counts() + { + } + + friend vespalib::nbostream & + operator<<(vespalib::nbostream &out, + const PostingListOffsetAndCounts &offsetAndCounts); + + friend vespalib::nbostream & + operator>>(vespalib::nbostream &in, + PostingListOffsetAndCounts &offsetAndCounts); +}; + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp new file mode 100644 index 00000000000..677ca101d88 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.postinglistfile"); +#include "postinglistfile.h" + +namespace search +{ + +namespace index +{ + +PostingListFileSeqRead::PostingListFileSeqRead(void) + : _counts(), + _residueDocs(0) +{ +} + + +PostingListFileSeqRead::~PostingListFileSeqRead(void) +{ +} + + +void +PostingListFileSeqRead:: +getParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +void +PostingListFileSeqRead:: +setFeatureParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +void +PostingListFileSeqRead:: +getFeatureParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +PostingListFileSeqWrite::PostingListFileSeqWrite(void) + : _counts() +{ +} + + +PostingListFileSeqWrite::~PostingListFileSeqWrite(void) +{ +} + + +void +PostingListFileSeqWrite:: +setParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +void +PostingListFileSeqWrite:: +getParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +void +PostingListFileSeqWrite:: +setFeatureParams(const PostingListParams ¶ms) +{ + (void) params; +} + + +void +PostingListFileSeqWrite:: +getFeatureParams(PostingListParams ¶ms) +{ + params.clear(); +} + + +PostingListFileRandRead:: +PostingListFileRandRead(void) + : _memoryMapped(false) +{ +} + + +PostingListFileRandRead::~PostingListFileRandRead(void) +{ +} + + +void +PostingListFileRandRead::afterOpen(FastOS_FileInterface &file) +{ + _memoryMapped = file.MemoryMapPtr(0) != NULL; +} + + +PostingListFileRandReadPassThrough:: +PostingListFileRandReadPassThrough(PostingListFileRandRead *lower, + bool ownLower) + : _lower(lower), + _ownLower(ownLower) +{ +} + + +PostingListFileRandReadPassThrough::~PostingListFileRandReadPassThrough(void) +{ + if (_ownLower) + delete _lower; +} + + +search::queryeval::SearchIterator * +PostingListFileRandReadPassThrough:: +createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const +{ + return _lower->createIterator(counts, handle, matchData, usebitVector); +} + + +void +PostingListFileRandReadPassThrough:: +readPostingList(const PostingListCounts &counts, + uint32_t firstSegment, + uint32_t numSegments, + PostingListHandle &handle) +{ + _lower->readPostingList(counts, firstSegment, numSegments, + handle); +} + + +bool +PostingListFileRandReadPassThrough::open(const vespalib::string &name, + const TuneFileRandRead &tuneFileRead) +{ + bool ret = _lower->open(name, tuneFileRead); + _memoryMapped = _lower->getMemoryMapped(); + return ret; +} + + +bool +PostingListFileRandReadPassThrough::close(void) +{ + return _lower->close(); +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h new file mode 100644 index 00000000000..1518948cc6f --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -0,0 +1,344 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace index +{ + + +class DocIdAndFeatures; + + +/** + * Interface for posting list files containing document ids and features + * for words. + */ +class PostingListFileSeqRead +{ +protected: + PostingListCounts _counts; + unsigned int _residueDocs; // Docids left to read for word +public: + PostingListFileSeqRead(void); + + virtual + ~PostingListFileSeqRead(void); + + /** + * Read document id and features. + */ + virtual void + readDocIdAndFeatures(DocIdAndFeatures &features) = 0; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; + + /** + * Read counts for a word. + */ + virtual void + readCounts(const PostingListCounts &counts) = 0; + + /** + * Open posting list file for sequential read. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqRead &tuneFileRead) = 0; + + /** + * Close posting list file. + */ + virtual bool + close(void) = 0; + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); + + /* + * Set (word, docid) feature parameters. + * + * Typically can only enable or disable cooked features. + */ + virtual void + setFeatureParams(const PostingListParams ¶ms); + + /* + * Get current (word, docid) feature parameters. + */ + virtual void + getFeatureParams(PostingListParams ¶ms); + + // Methods used when generating posting list for common word pairs. + + /* + * Get current posting offset, measured in bits. First posting list + * starts at 0, i.e. file header is not accounted for here. + * + * @return current posting offset, measured in bits. + */ + virtual uint64_t + getCurrentPostingOffset(void) const = 0; + + /** + * Set current posting offset, measured in bits. First posting + * list starts at 0, i.e. file header is not accounted for here. + * + * @param Offset start of posting lists for word pair. + * @param endOffset end of posting lists for word pair. + * @param readAheadOffset end of posting list for either this or a + * later word pair, depending on disk seek cost. + */ + virtual void + setPostingOffset(uint64_t offset, + uint64_t endOffset, + uint64_t readAheadOffset) = 0; + + /** + * Get counts read by last readCounts(). + */ + const PostingListCounts & + getCounts(void) const + { + return _counts; + } + + PostingListCounts & + getCounts(void) + { + return _counts; + } +}; + +/** + * Interface for posting list files containing document ids and features + * for words. + */ +class PostingListFileSeqWrite +{ +protected: + PostingListCounts _counts; +public: + PostingListFileSeqWrite(void); + + virtual + ~PostingListFileSeqWrite(void); + + /** + * Write document id and features. + */ + virtual void + writeDocIdAndFeatures(const DocIdAndFeatures &features) = 0; + + /** + * Flush word (during write) after it is complete to buffers, i.e. + * prepare for next word, but not for application crash. + */ + virtual void + flushWord(void) = 0; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Implies + * flush from memory to disk, and possibly also sync to permanent + * storage media. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; + + /** + * Open posting list file for sequential write. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext) = 0; + + /** + * Close posting list file. + */ + virtual bool + close(void) = 0; + + /* + * Set parameters. + */ + virtual void + setParams(const PostingListParams ¶ms); + + /* + * Get current parameters. + */ + virtual void + getParams(PostingListParams ¶ms); + + /* + * Set (word, docid) feature parameters. + */ + virtual void + setFeatureParams(const PostingListParams ¶ms); + + /* + * Get current (word, docid) feature parameters. + */ + virtual void + getFeatureParams(PostingListParams ¶ms); + + PostingListCounts & + getCounts(void) + { + return _counts; + } +}; + + +/** + * Interface for posting list files containing document ids and features + * for words. + */ +class PostingListFileRandRead +{ +protected: + // Can be examined after open + bool _memoryMapped; +public: + typedef std::shared_ptr SP; + + PostingListFileRandRead(void); + + virtual + ~PostingListFileRandRead(void); + + /** + * Create iterator for single word. Semantic lifetime of counts and + * handle must exceed lifetime of iterator. + * + * XXX: TODO: How to read next set of segments from disk if handle + * didn't cover the whole word, probably need access to higher level + * API above caches. + */ + virtual search::queryeval::SearchIterator * + createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const = 0; + + + /** + * Read (possibly partial) posting list into handle. + */ + virtual void + readPostingList(const PostingListCounts &counts, + uint32_t firstSegment, + uint32_t numSegments, + PostingListHandle &handle) = 0; + + /** + * Open posting list file for random read. + */ + virtual bool + open(const vespalib::string &name, + const TuneFileRandRead &tuneFileRead) = 0; + + /** + * Close posting list file. + */ + virtual bool + close(void) = 0; + + bool + getMemoryMapped(void) const + { + return _memoryMapped; + } + +protected: + void + afterOpen(FastOS_FileInterface &file); +}; + + +/** + * Passthrough class. + */ +class PostingListFileRandReadPassThrough : public PostingListFileRandRead +{ +protected: + PostingListFileRandRead *_lower; + bool _ownLower; + +public: + PostingListFileRandReadPassThrough(PostingListFileRandRead *lower, + bool ownLower); + + virtual + ~PostingListFileRandReadPassThrough(void); + + virtual search::queryeval::SearchIterator * + createIterator(const PostingListCounts &counts, + const PostingListHandle &handle, + const search::fef::TermFieldMatchDataArray &matchData, + bool usebitVector) const; + + virtual void + readPostingList(const PostingListCounts &counts, + uint32_t firstSegment, + uint32_t numSegments, + PostingListHandle &handle); + + virtual bool + open(const vespalib::string &name, const TuneFileRandRead &tuneFileRead); + + virtual bool + close(void); +}; + + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp b/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp new file mode 100644 index 00000000000..97a5a6f3da6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglisthandle.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.postinglisthandle"); +#include "postinglisthandle.h" +#include + +namespace search +{ + +namespace index +{ + +search::queryeval::SearchIterator * +PostingListHandle::createIterator(const PostingListCounts &counts, + const search::fef::TermFieldMatchDataArray &matchData, + bool useBitVector) const +{ + (void) useBitVector; + return _file->createIterator(counts, *this, matchData, useBitVector); +} + + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/postinglisthandle.h b/searchlib/src/vespa/searchlib/index/postinglisthandle.h new file mode 100644 index 00000000000..605904e3912 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglisthandle.h @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { class BitVector; } +namespace search { namespace queryeval { class SearchIterator; } } +namespace search { namespace fef { class TermFieldMatchDataArray; } } + +namespace search { +namespace index { + +class PostingListFileRandRead; + +/** + * Class for owning a posting list in memory after having read it from + * posting list file, or referencing a chunk of memory containing the + * posting list (if the file was memory mapped). + */ +class PostingListHandle +{ +public: + typedef std::unique_ptr UP; + // Key portion + PostingListFileRandRead *_file; // File containing posting list + uint64_t _bitOffset; // posting list start relative to start of file + uint64_t _bitLength; // Length of posting list, in bits + + // Value portion + uint32_t _firstSegment; // First segment for word + uint32_t _numSegments; // Number of segments + uint64_t _bitOffsetMem; // _mem relative to start of file + const void *_mem; // Memory backing posting list after read/mmap + void *_allocMem; // What to free after posting list + size_t _allocSize; // Size of allocated memory + + PostingListHandle(void) + : _file(NULL), + _bitOffset(0), + _bitLength(0), + _firstSegment(0), + _numSegments(0), + _bitOffsetMem(0), + _mem(NULL), + _allocMem(NULL), + _allocSize(0) + { + } + + ~PostingListHandle(void) + { + if (_allocMem != NULL) + free(_allocMem); + } + + /** + * Create iterator for single word. Semantic lifetime of counts and + * handle must exceed lifetime of iterator. + * + * XXX: TODO: How to read next set of segments from disk if handle + * didn't cover the whole word, probably need access to higher level + * API above caches. + */ + search::queryeval::SearchIterator * + createIterator(const PostingListCounts &counts, + const search::fef::TermFieldMatchDataArray &matchData, + bool useBitVector=false) const; + + /** + * Drop value portion of handle. + */ + void + drop(void) + { + _firstSegment = 0; + _numSegments = 0; + _bitOffsetMem = 0; + _mem = NULL; + if (_allocMem != NULL) { + free(_allocMem); + _allocMem = NULL; + } + _allocSize = 0; + } +}; + + +} // namespace search::index +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.cpp b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp new file mode 100644 index 00000000000..9c64587f7ee --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.cpp @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".index.postinglistparams"); +#include "postinglistparams.h" +#include + +namespace +{ + +vespalib::string empty; + +} + +namespace search +{ + +namespace index +{ + +bool +PostingListParams::isSet(const vespalib::string &key) const +{ + Map::const_iterator it; + + it = _map.find(key); + if (it != _map.end()) + return true; + return false; +} + + +void +PostingListParams::setStr(const vespalib::string &key, + const vespalib::string &val) +{ + _map[key] = val; +} + + +const vespalib::string & +PostingListParams::getStr(const vespalib::string &key) const +{ + Map::const_iterator it; + + it = _map.find(key); + if (it != _map.end()) + return it->second; + return empty; +} + + +void +PostingListParams::clear(void) +{ + _map.clear(); +} + + +void +PostingListParams::erase(const vespalib::string &key) +{ + _map.erase(key); +} + + +bool +PostingListParams::operator!=(const PostingListParams &rhs) const +{ + return _map != rhs._map; +} + +template +void +PostingListParams::set(const vespalib::string &key, + const TYPE &val) +{ + std::ostringstream os; + + os << val; + _map[key] = os.str(); +} + + +template +void +PostingListParams::get(const vespalib::string &key, + TYPE &val) const +{ + std::istringstream is; + Map::const_iterator it; + + it = _map.find(key); + if (it != _map.end()) { + is.str(it->second); + is >> val; + } +} + + +template void +PostingListParams::set(const vespalib::string &key, + const bool &val); + +template void +PostingListParams::get(const vespalib::string &key, + bool &val) const; + + +template void +PostingListParams::set(const vespalib::string &key, + const int32_t &val); + +template void +PostingListParams::get(const vespalib::string &key, + int32_t &val) const; + +template void +PostingListParams::set(const vespalib::string &key, + const uint32_t &val); + +template void +PostingListParams::get(const vespalib::string &key, + uint32_t &val) const; + + +template void +PostingListParams::set(const vespalib::string &key, + const uint64_t &val); + +template void +PostingListParams::get(const vespalib::string &key, + uint64_t &val) const; + +} // namespace index + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/postinglistparams.h b/searchlib/src/vespa/searchlib/index/postinglistparams.h new file mode 100644 index 00000000000..d8424b81835 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/postinglistparams.h @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search +{ + +namespace index +{ + +class PostingListParams +{ + typedef std::map Map; + Map _map; +public: + template + void + set(const vespalib::string &key, const TYPE &val); + + template + void + get(const vespalib::string &key, TYPE &val) const; + + bool + isSet(const vespalib::string &key) const; + + void + setStr(const vespalib::string &key, const vespalib::string &val); + + const vespalib::string & + getStr(const vespalib::string &key) const; + + void + clear(void); + + void + erase(const vespalib::string &key); + + bool + operator!=(const PostingListParams &rhs) const; +}; + +} // namespace index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/index/schemautil.cpp b/searchlib/src/vespa/searchlib/index/schemautil.cpp new file mode 100644 index 00000000000..6019c7ce4bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/schemautil.cpp @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include "schemautil.h" +#include +LOG_SETUP(".index.schemautil"); + +namespace search +{ + +namespace index +{ + +SchemaUtil::IndexSettings +SchemaUtil::getIndexSettings(const Schema &schema, + const uint32_t index) +{ + IndexSettings ret; + Schema::DataType indexDataType(Schema::STRING); + bool error = false; + bool somePrefixes = false; + bool someNotPrefixes = false; + bool somePhrases = false; + bool someNotPhrases = false; + bool somePositions = false; + bool someNotPositions = false; + + const Schema::IndexField &iField = schema.getIndexField(index); + if (iField.hasPhrases()) + somePhrases = true; + else + someNotPhrases = true; + if (iField.hasPrefix()) + somePrefixes = true; + else + someNotPrefixes = true; + if (iField.hasPositions()) + somePositions = true; + else + someNotPositions = true; + indexDataType = iField.getDataType(); + switch (indexDataType) { + case Schema::STRING: + break; + default: + error = true; + LOG(error, + "Field %s has bad data type", + iField.getName().c_str()); + } + + return IndexSettings(indexDataType, error, + somePrefixes && !someNotPrefixes, + somePhrases && !someNotPhrases, + somePositions && !someNotPositions); +} + + +bool +SchemaUtil::IndexIterator::hasOldFields(const Schema &oldSchema, + bool phrases) const +{ + assert(isValid()); + const Schema::IndexField &newField = + getSchema().getIndexField(getIndex()); + const vespalib::string &fieldName = newField.getName(); + uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName); + if (oldFieldId == Schema::UNKNOWN_FIELD_ID) + return false; + const Schema::IndexField &oldField = + oldSchema.getIndexField(oldFieldId); + if (oldField.getDataType() != newField.getDataType()) + return false; // wrong data type + if (!phrases) + return true; + return oldField.hasPhrases(); +} + + +bool +SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, + bool phrases) const +{ + assert(isValid()); + const Schema::IndexField &newField = + getSchema().getIndexField(getIndex()); + const vespalib::string &fieldName = newField.getName(); + uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName); + if (oldFieldId == Schema::UNKNOWN_FIELD_ID) + return false; + if (phrases) { + IndexIterator oldIterator(oldSchema, oldFieldId); + IndexSettings settings = oldIterator.getIndexSettings(); + if (!settings.hasPhrases()) + return false; + } + const Schema::IndexField &oldField = + oldSchema.getIndexField(oldFieldId); + if (oldField.getDataType() != newField.getDataType() || + oldField.getCollectionType() != newField.getCollectionType()) + return false; + return true; +} + + +bool +SchemaUtil::validateIndexField(const Schema::IndexField &field) +{ + bool ok = true; + if (!validateIndexFieldType(field.getDataType())) { + LOG(error, + "Field %s has bad data type", + field.getName().c_str()); + ok = false; + } + if (field.getDataType() != Schema::STRING) { + if (field.hasPrefix()) { + LOG(error, + "Field %s is non-string but has prefix", + field.getName().c_str()); + ok = false; + } + if (field.hasPhrases()) { + LOG(error, + "Field %s is non-string but has phrases", + field.getName().c_str()); + ok = false; + } + if (field.hasPositions()) { + LOG(error, + "Field %s is non-string but has positions", + field.getName().c_str()); + ok = false; + } + } + if (field.hasPhrases() && !field.hasPositions()) { + LOG(error, + "Field %s has phrases but not positions", + field.getName().c_str()); + ok = false; + } + return ok; +} + + +bool +SchemaUtil::addIndexField(Schema &schema, + const Schema::IndexField &field) +{ + bool ok = true; + if (!validateIndexField(field)) + ok = false; + uint32_t fieldId = schema.getIndexFieldId(field.getName()); + if (fieldId != Schema::UNKNOWN_FIELD_ID) { + LOG(error, + "Field %s already exists in schema", + field.getName().c_str()); + ok = false; + } + if (ok) + schema.addIndexField(field); + return ok; +} + + +bool +SchemaUtil::validateSchema(const Schema &schema) +{ + bool ok = true; + for (IndexIterator it(schema); it.isValid(); ++it) { + uint32_t fieldId = it.getIndex(); + const Schema::IndexField &field = schema.getIndexField(fieldId); + if (!validateIndexField(field)) + ok = false; + if (schema.getIndexFieldId(field.getName()) != fieldId) { + LOG(error, + "Duplcate field %s", + field.getName().c_str()); + ok = false; + } + } + for (uint32_t fsId = 0; fsId < schema.getNumFieldSets(); ++fsId) { + const Schema::FieldSet &fs = schema.getFieldSet(fsId); + if (schema.getFieldSetId(fs.getName()) != fsId) { + LOG(error, + "Duplicate field set %s", + fs.getName().c_str()); + ok = false; + } + } + return ok; +} + + +bool +SchemaUtil::getIndexIds(const Schema &schema, + DataType dataType, + std::vector &indexes) +{ + typedef SchemaUtil::IndexIterator IndexIterator; + + indexes.clear(); + for (IndexIterator i(schema); i.isValid(); ++i) { + SchemaUtil::IndexSettings settings = i.getIndexSettings(); + if (settings.hasError()) + return false; + if (settings.getDataType() == dataType) + indexes.push_back(i.getIndex()); + } + return true; +} + + +} // namespace search::index +} // namespace search diff --git a/searchlib/src/vespa/searchlib/index/schemautil.h b/searchlib/src/vespa/searchlib/index/schemautil.h new file mode 100644 index 00000000000..1f7c351c43e --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/schemautil.h @@ -0,0 +1,234 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace index +{ + + +class SchemaUtil +{ +public: + typedef Schema::DataType DataType; + + class IndexSettings + { + DataType _dataType; + bool _error; // Schema is bad. + bool _prefix; + bool _phrases; + bool _positions; + + public: + const DataType & + getDataType(void) const + { + return _dataType; + } + + bool + hasError(void) const + { + return _error; + } + + bool + hasPrefix(void) const + { + return _prefix; + } + + bool + hasPhrases(void) const + { + return _phrases; + } + + bool + hasPositions(void) const + { + return _positions; + } + + IndexSettings(void) + : _dataType(Schema::STRING), + _error(false), + _prefix(false), + _phrases(false), + _positions(false) + { + } + + IndexSettings(const IndexSettings &rhs) + : _dataType(rhs._dataType), + _error(rhs._error), + _prefix(rhs._prefix), + _phrases(rhs._phrases), + _positions(rhs._positions) + { + } + + IndexSettings(DataType dataType, + bool error, + bool prefix, + bool phrases, + bool positions) + : _dataType(dataType), + _error(error), + _prefix(prefix), + _phrases(phrases), + _positions(positions) + { + } + + IndexSettings & + operator=(const IndexSettings &rhs) + { + IndexSettings tmp(rhs); + swap(tmp); + return *this; + } + + void + swap(IndexSettings &rhs) + { + std::swap(_dataType, rhs._dataType); + std::swap(_error, rhs._error); + std::swap(_prefix, rhs._prefix); + std::swap(_phrases, rhs._phrases); + std::swap(_positions, rhs._positions); + } + }; + + class IndexIterator + { + const Schema &_schema; + uint32_t _index; + + public: + IndexIterator(const Schema &schema) + : _schema(schema), + _index(0u) + { + } + + IndexIterator(const Schema &schema, uint32_t index) + : _schema(schema), + _index(index) + { + } + + IndexIterator(const Schema &schema, const IndexIterator &rhs) + : _schema(schema), + _index(Schema::UNKNOWN_FIELD_ID) + { + const vespalib::string &name = rhs.getName(); + _index = schema.getIndexFieldId(name); + } + + const Schema & + getSchema(void) const + { + return _schema; + } + + uint32_t + getIndex(void) const + { + return _index; + } + + const vespalib::string & + getName(void) const + { + return _schema.getIndexField(_index).getName(); + } + + IndexIterator & + operator++(void) + { + if (_index < _schema.getNumIndexFields()) { + ++_index; + } + return *this; + } + + bool + isValid(void) const + { + return _index < _schema.getNumIndexFields(); + } + + IndexSettings + getIndexSettings(void) const + { + return SchemaUtil::getIndexSettings(_schema, _index); + } + + /** + * Return if old schema has at least one usable input field + * with matching data type. If we want phrases then all input + * fields usable for terms must also be usable for phrases. + * + * @param oldSchema old schema, present in an input index + * @param phrases ask for phrase files + */ + bool + hasOldFields(const Schema &oldSchema, bool phrases) const; + + /** + * Return if fields in old schema matches fields in new + * schema, allowing for slightly faster fusion operations. + * Field collections must have same set of fields which must + * also match between new and old schema. + * + * @param oldSchema old schema, present in an input index + * @param phrases ask for phrase files + */ + bool + hasMatchingOldFields(const Schema &oldSchema, bool phrases) const; + }; + + static IndexSettings + getIndexSettings(const Schema &schema, const uint32_t index); + + + static bool + validateIndexFieldType(DataType dataType) + { + switch (dataType) { + case Schema::STRING: + case Schema::INT32: + return true; + default: + ; + } + return false; + } + + static bool + validateIndexField(const Schema::IndexField &field); + + static bool + addIndexField(Schema &schema, + const Schema::IndexField &field); + + static bool + validateSchema(const Schema &schema); + + static bool + getIndexIds(const Schema &schema, + DataType dataType, + std::vector &indexes); +}; + + +} // namespace search::index + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/.gitignore b/searchlib/src/vespa/searchlib/memoryindex/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt new file mode 100644 index 00000000000..b9e5bf5a4ea --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_memoryindex OBJECT + SOURCES + compact_document_words_store.cpp + dictionary.cpp + documentinverter.cpp + document_remover.cpp + featurestore.cpp + fieldinverter.cpp + memoryfieldindex.cpp + memoryindex.cpp + ordereddocumentinserter.cpp + postingiterator.cpp + urlfieldinverter.cpp + wordstore.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/memoryindex/OWNERS b/searchlib/src/vespa/searchlib/memoryindex/OWNERS new file mode 100644 index 00000000000..e6340232840 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/OWNERS @@ -0,0 +1,2 @@ +tegge +geirst diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp new file mode 100644 index 00000000000..05f242b0928 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".memoryindex.compact_document_words_store"); +#include "compact_document_words_store.h" + +namespace search { +namespace memoryindex { + +typedef CompactDocumentWordsStore::Builder Builder; + +namespace { + +constexpr size_t MIN_CLUSTERS = 1024u; + +size_t +getSerializedSize(const Builder &builder) +{ + size_t size = 1 + builder.words().size(); // numWords, [words] + return size; +} + +uint32_t * +serialize(const Builder &builder, uint32_t *begin) +{ + uint32_t *buf = begin; + const Builder::WordRefVector &words = builder.words(); + *buf++ = words.size(); + for (auto word : words) { + *buf++ = word.ref(); + } + return buf; +} + +} + +CompactDocumentWordsStore::Builder & +CompactDocumentWordsStore::Builder::insert(btree::EntryRef wordRef) +{ + _words.push_back(wordRef); + return *this; +} + +inline void +CompactDocumentWordsStore::Iterator::nextWord() +{ + _wordRef = *_buf++; + _remainingWords--; +} + +CompactDocumentWordsStore::Iterator::Iterator() + : _buf(NULL), + _remainingWords(0), + _wordRef(0), + _valid(false) +{ +} + +CompactDocumentWordsStore::Iterator::Iterator(const uint32_t *buf) + : _buf(buf), + _remainingWords(0), + _wordRef(0), + _valid(true) +{ + _remainingWords = *_buf++; + if (_remainingWords > 0) { + nextWord(); + } else { + _valid = false; + } +} + +CompactDocumentWordsStore::Iterator & +CompactDocumentWordsStore::Iterator::operator++() +{ + if (_remainingWords > 0) { + nextWord(); + } else { + _valid = false; + } + return *this; +} + +CompactDocumentWordsStore::Store::Store() + : _store(), + _type(1, + MIN_CLUSTERS, + RefType::offsetSize()), + _typeId(0) +{ + _store.addType(&_type); + _store.initActiveBuffers(); +} + +CompactDocumentWordsStore::Store::~Store() +{ + _store.dropBuffers(); +} + +btree::EntryRef +CompactDocumentWordsStore::Store::insert(const Builder &builder) +{ + size_t serializedSize = getSerializedSize(builder); + _store.ensureBufferCapacity(_typeId, serializedSize); + + uint32_t activeBufferId = _store.getActiveBufferId(_typeId); + btree::BufferState &state = _store.getBufferState(activeBufferId); + size_t oldSize = state.size(); + RefType ref(oldSize, activeBufferId); + assert(oldSize == ref.offset()); + + uint32_t *begin = _store.getBufferEntry(activeBufferId, oldSize); + uint32_t *end = serialize(builder, begin); + assert(size_t(end - begin) == serializedSize); + state.pushed_back(serializedSize); + + return ref; +} + +CompactDocumentWordsStore::Iterator +CompactDocumentWordsStore::Store::get(btree::EntryRef ref) const +{ + RefType internalRef(ref); + const uint32_t *buf = _store.getBufferEntry(internalRef.bufferId(), + internalRef.offset()); + return Iterator(buf); +} + +CompactDocumentWordsStore::CompactDocumentWordsStore() + : _docs(), + _wordsStore() +{ +} + +void +CompactDocumentWordsStore::insert(const Builder &builder) +{ + btree::EntryRef ref = _wordsStore.insert(builder); + auto insres = _docs.insert(std::make_pair(builder.docId(), ref)); + if (!insres.second) { + LOG(error, "Failed inserting remove info for docid %u", + builder.docId()); + abort(); + } +} + +void +CompactDocumentWordsStore::remove(uint32_t docId) +{ + _docs.erase(docId); +} + +CompactDocumentWordsStore::Iterator +CompactDocumentWordsStore::get(uint32_t docId) const +{ + auto itr = _docs.find(docId); + if (itr != _docs.end()) { + return _wordsStore.get(itr->second); + } + return Iterator(); +} + +MemoryUsage +CompactDocumentWordsStore::getMemoryUsage() const +{ + MemoryUsage usage; + usage.incAllocatedBytes(_docs.getMemoryConsumption()); + usage.incUsedBytes(_docs.getMemoryUsed()); + usage.merge(_wordsStore.getMemoryUsage()); + return usage; + +} + +} // namespace memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h new file mode 100644 index 00000000000..2841b02bab3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/compact_document_words_store.h @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +namespace search { +namespace memoryindex { + +/** + * Class used to store the {wordRef, fieldId, docId} tuples that are inserted + * into the memory index dictionary. These tuples are later used when removing + * all remains of a document from the posting lists of the dictionary. + */ +class CompactDocumentWordsStore +{ +public: + + /** + * Builder used to collect all wordRefs for a field. + */ + class Builder + { + public: + typedef std::unique_ptr UP; + typedef vespalib::Array WordRefVector; + + private: + uint32_t _docId; + WordRefVector _words; + + public: + Builder(uint32_t docId_) : _docId(docId_), _words() {} + Builder &insert(btree::EntryRef wordRef); + uint32_t docId() const { return _docId; } + const WordRefVector &words() const { return _words; } + }; + + /** + * Iterator over all {wordRef, fieldId} pairs for a document. + */ + class Iterator + { + private: + const uint32_t *_buf; + uint32_t _remainingWords; + uint32_t _wordRef; + bool _valid; + + inline void nextWord(); + + public: + Iterator(); + Iterator(const uint32_t *buf); + bool valid() const { return _valid; } + Iterator &operator++(); + btree::EntryRef wordRef() const { return _wordRef; } + bool hasBackingBuf() const { return _buf != nullptr; } + }; + + /** + * Store for all {wordRef, fieldId} pairs among all documents. + */ + class Store + { + public: + typedef btree::DataStoreT > DataStoreType; + typedef DataStoreType::RefType RefType; + + private: + DataStoreType _store; + btree::BufferType _type; + const uint32_t _typeId; + + public: + Store(); + ~Store(); + btree::EntryRef insert(const Builder &builder); + Iterator get(btree::EntryRef ref) const; + MemoryUsage getMemoryUsage() const { return _store.getMemoryUsage(); } + }; + + typedef vespalib::hash_map DocumentWordsMap; + +private: + DocumentWordsMap _docs; + Store _wordsStore; + +public: + CompactDocumentWordsStore(); + void insert(const Builder &builder); + void remove(uint32_t docId); + Iterator get(uint32_t docId) const; + MemoryUsage getMemoryUsage() const; +}; + +} // namespace memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp b/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp new file mode 100644 index 00000000000..665d377af99 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/dictionary.cpp @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include "dictionary.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "fieldinverter.h" + +LOG_SETUP(".memoryindex.dictionary"); + +namespace search { + +using index::DocIdAndFeatures; +using index::WordDocElementFeatures; +using index::Schema; + +namespace memoryindex { + +Dictionary::Dictionary(const Schema & schema) + : _fieldIndexes(), + _numFields(schema.getNumIndexFields()) +{ + for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) { + auto fieldIndex = std::make_unique(schema, fieldId); + _fieldIndexes.push_back(std::move(fieldIndex)); + } +} + +Dictionary::~Dictionary(void) +{ +} + + +void +Dictionary::dump(search::index::IndexBuilder &indexBuilder) +{ + for (uint32_t fieldId = 0; fieldId < _numFields; ++fieldId) { + indexBuilder.startField(fieldId); + _fieldIndexes[fieldId]->dump(indexBuilder); + indexBuilder.endField(); + } +} + +MemoryUsage +Dictionary::getMemoryUsage() const +{ + MemoryUsage usage; + for (auto &fieldIndex : _fieldIndexes) { + usage.merge(fieldIndex->getMemoryUsage()); + } + return usage; +} + + +} // namespace search::memoryindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/dictionary.h b/searchlib/src/vespa/searchlib/memoryindex/dictionary.h new file mode 100644 index 00000000000..b4093a05a43 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/dictionary.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "memoryfieldindex.h" + +namespace search { +namespace memoryindex { + +class IDocumentRemoveListener; +class FieldInverter; + +class Dictionary { +public: + using PostingList = MemoryFieldIndex::PostingList; + +private: + typedef vespalib::GenerationHandler GenerationHandler; + + std::vector > _fieldIndexes; + uint32_t _numFields; + +public: + Dictionary(const index::Schema &schema); + ~Dictionary(void); + PostingList::Iterator find(const vespalib::stringref word, + uint32_t fieldId) const + { + return _fieldIndexes[fieldId]->find(word); + } + + PostingList::ConstIterator + findFrozen(const vespalib::stringref word, uint32_t fieldId) const + { + return _fieldIndexes[fieldId]->findFrozen(word); + } + + uint64_t getNumUniqueWords() const { + uint64_t numUniqueWords = 0; + for (auto &fieldIndex : _fieldIndexes) { + numUniqueWords += fieldIndex->getNumUniqueWords(); + } + return numUniqueWords; + } + + void dump(search::index::IndexBuilder & indexBuilder); + + MemoryUsage getMemoryUsage() const; + + MemoryFieldIndex *getFieldIndex(uint32_t fieldId) const { + return _fieldIndexes[fieldId].get(); + } + + const std::vector > & + getFieldIndexes() const { return _fieldIndexes; } + + uint32_t getNumFields() const { return _numFields; } +}; + +} // namespace search::memoryindex + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp new file mode 100644 index 00000000000..9119b9aa518 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.cpp @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include "document_remover.h" +#include "i_document_remove_listener.h" +#include "wordstore.h" +#include + +LOG_SETUP(".memoryindex.document_remover"); + +namespace search { +namespace memoryindex { + +typedef CompactDocumentWordsStore::Builder Builder; +typedef CompactDocumentWordsStore::Iterator Iterator; + +DocumentRemover::DocumentRemover(const WordStore &wordStore) + : _store(), + _builder(), + _wordFieldDocTuples(), + _wordStore(wordStore) +{ +} + +void +DocumentRemover::remove(uint32_t docId, IDocumentRemoveListener &listener) +{ + Iterator itr = _store.get(docId); + if (itr.valid()) { + for (; itr.valid(); ++itr) { + vespalib::stringref word = _wordStore.getWord(itr.wordRef()); + listener.remove(word, docId); + } + _store.remove(docId); + } +} + +void +DocumentRemover::insert(btree::EntryRef wordRef, uint32_t docId) +{ + _wordFieldDocTuples.emplace_back(wordRef, docId); +} + + +void +DocumentRemover::flush() +{ + if (_wordFieldDocTuples.empty()) { + return; + } + ShiftBasedRadixSorter, 24, true>:: + radix_sort(WordFieldDocTuple::Radix(), std::less(), &_wordFieldDocTuples[0], _wordFieldDocTuples.size(), 16); + Builder::UP builder(new Builder(_wordFieldDocTuples[0]._docId)); + for (const auto &tuple : _wordFieldDocTuples) { + if (builder->docId() != tuple._docId) { + _store.insert(*builder); + builder.reset(new Builder(tuple._docId)); + } + builder->insert(tuple._wordRef); + } + _store.insert(*builder); + _wordFieldDocTuples.clear(); +} + + +} // namespace memoryindex +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/document_remover.h b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h new file mode 100644 index 00000000000..d08cf46c68e --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/document_remover.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "compact_document_words_store.h" +#include "i_document_insert_listener.h" + +namespace search { +namespace memoryindex { + +class IDocumentRemoveListener; +class WordStore; + +/** + * Class used to remove documents from the memory index dictionary. + */ +class DocumentRemover : public IDocumentInsertListener +{ +private: + struct WordFieldDocTuple + { + btree::EntryRef _wordRef; + uint32_t _docId; + WordFieldDocTuple() : + _wordRef(0), + _docId(0) + { } + WordFieldDocTuple(btree::EntryRef wordRef, uint32_t docId) : + _wordRef(wordRef), + _docId(docId) + { } + bool operator<(const WordFieldDocTuple &rhs) const { + if (_docId != rhs._docId) { + return _docId < rhs._docId; + } + return _wordRef < rhs._wordRef; + } + struct Radix { + uint32_t operator () (const WordFieldDocTuple & wft) const { + return wft._docId; + } + }; + + }; + + CompactDocumentWordsStore _store; + CompactDocumentWordsStore::Builder::UP _builder; + std::vector _wordFieldDocTuples; + const WordStore &_wordStore; + +public: + DocumentRemover(const WordStore &wordStore); + void remove(uint32_t docId, IDocumentRemoveListener &inverter); + CompactDocumentWordsStore &getStore() { return _store; } + const CompactDocumentWordsStore &getStore() const { return _store; } + + // Implements IDocumentInsertListener + void insert(btree::EntryRef wordRef, uint32_t docId) override; + void flush() override; +}; + +} // namespace memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp new file mode 100644 index 00000000000..a32676baccf --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.cpp @@ -0,0 +1,206 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.documentinverter"); +#include "documentinverter.h" +#include +#include +#include +#include +#include +#include +#include +#include "fieldinverter.h" +#include "urlfieldinverter.h" +#include "dictionary.h" +#include "ordereddocumentinserter.h" +#include + +namespace search +{ + +namespace memoryindex +{ + +using document::Field; +using document::FieldValue; +using document::Document; +using document::ArrayFieldValue; +using document::WeightedSetFieldValue; +using document::StringFieldValue; +using document::IntFieldValue; +using document::StructFieldValue; +using document::DataType; +using document::DocumentType; +using document::Annotation; +using document::AnnotationType; +using document::AlternateSpanList; +using document::Span; +using document::SpanList; +using document::SimpleSpanList; +using document::SpanNode; +using document::SpanTree; +using document::SpanTreeVisitor; +using index::DocIdAndPosOccFeatures; +using index::Schema; +using vespalib::make_string; +using search::util::URL; + + +DocumentInverter::DocumentInverter(const Schema &schema, + ISequencedTaskExecutor &invertThreads, + ISequencedTaskExecutor &pushThreads) + : _schema(schema), + _indexedFieldPaths(), + _dataType(nullptr), + _schemaIndexFields(), + _inverters(), + _urlInverters(), + _invertThreads(invertThreads), + _pushThreads(pushThreads) +{ + _schemaIndexFields.setup(schema); + + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique(_schema, fieldId)); + } + for (auto &urlField : _schemaIndexFields._uriFields) { + Schema::CollectionType collectionType = + _schema.getIndexField(urlField._all).getCollectionType(); + _urlInverters.push_back(std::make_unique + (collectionType, + _inverters[urlField._all].get(), + _inverters[urlField._scheme].get(), + _inverters[urlField._host].get(), + _inverters[urlField._port].get(), + _inverters[urlField._path].get(), + _inverters[urlField._query].get(), + _inverters[urlField._fragment].get(), + _inverters[urlField._hostname].get())); + } +} + + +DocumentInverter::~DocumentInverter() +{ + _invertThreads.sync(); + _pushThreads.sync(); + +} + + +void +DocumentInverter::addFieldPath(const document::DocumentType &docType, + uint32_t fieldId) +{ + assert(fieldId < _indexedFieldPaths.size()); + std::unique_ptr fp; + if ( ! docType.hasField(_schema.getIndexField(fieldId).getName())) { + LOG(error, + "Mismatch between documentdefinition and schema. " + "No field named '%s' from schema in document type '%s'", + _schema.getIndexField(fieldId).getName().c_str(), + docType.getName().c_str()); + } else { + fp.reset(new Field(docType.getField(_schema.getIndexField(fieldId).getName()))); + } + _indexedFieldPaths[fieldId] = std::move(fp); +} + + +void DocumentInverter::buildFieldPath(const document::DocumentType &docType, + const document::DataType *dataType) +{ + _indexedFieldPaths.clear(); + _indexedFieldPaths.resize(_schema.getNumIndexFields()); + for (const auto & fi : _schemaIndexFields._textFields) { + addFieldPath(docType, fi); + } + for (const auto & fi : _schemaIndexFields._uriFields) { + addFieldPath(docType, fi._all); + } + _dataType = dataType; +} + + +void +DocumentInverter::invertDocument(uint32_t docId, const Document &doc) +{ + const document::DataType *dataType(doc.getDataType()); + if (_indexedFieldPaths.empty() || _dataType != dataType) { + buildFieldPath(doc.getType(), dataType); + } + for (uint32_t fieldId : _schemaIndexFields._textFields) { + const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get()); + FieldValue::UP fv; + if (fieldPath != nullptr) { + // TODO: better handling of input data (and better input data) + // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end()); + fv = doc.getValue(*fieldPath); + } + FieldInverter *inverter = _inverters[fieldId].get(); + _invertThreads.execute(fieldId, + [inverter, docId, fv(std::move(fv))]() + { inverter->invertField(docId, fv); }); + } + uint32_t urlId = 0; + for (const auto & fi : _schemaIndexFields._uriFields) { + uint32_t fieldId = fi._all; + const FieldPath *const fieldPath(_indexedFieldPaths[fieldId].get()); + FieldValue::UP fv; + if (fieldPath != nullptr) { + // TODO: better handling of input data (and better input data) + // FieldValue::UP fv = doc.getNestedFieldValue(fieldPath.begin(), fieldPath.end()); + fv = doc.getValue(*fieldPath); + } + UrlFieldInverter *inverter = _urlInverters[urlId].get(); + _invertThreads.execute(fieldId, + [inverter, docId, fv(std::move(fv))]() + { inverter->invertField(docId, fv); }); + ++urlId; + } +} + + +void +DocumentInverter::removeDocument(uint32_t docId) +{ + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _invertThreads.execute(fieldId, + [inverter(inverter.get()), docId]() + { inverter->removeDocument(docId); }); + ++fieldId; + } +} + + +void +DocumentInverter::pushDocuments(Dictionary &dict, + const std::shared_ptr & + onWriteDone) +{ + auto indexFieldIterator = dict.getFieldIndexes().begin(); + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + MemoryFieldIndex &fieldIndex(**indexFieldIterator); + DocumentRemover &remover(fieldIndex.getDocumentRemover()); + OrderedDocumentInserter &inserter(fieldIndex.getInserter()); + _pushThreads.execute(fieldId, + [inverter(inverter.get()), &remover, &inserter, + &fieldIndex, onWriteDone]() + { inverter->applyRemoves(remover); + inverter->pushDocuments(inserter); + fieldIndex.commit(); }); + ++indexFieldIterator; + ++fieldId; + } +} + + +} // namespace memoryindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h new file mode 100644 index 00000000000..415271a0990 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/documentinverter.h @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include "i_document_remove_listener.h" + +namespace search +{ + +class ISequencedTaskExecutor; +class IDestructorCallback; + +namespace memoryindex +{ + +class FieldInverter; +class UrlFieldInverter; +class Dictionary; + +class DocumentInverter +{ +private: + DocumentInverter(const DocumentInverter &) = delete; + DocumentInverter &operator=(const DocumentInverter &) = delete; + + const index::Schema &_schema; + + typedef index::DocTypeBuilder DocTypeBuilder; + typedef DocTypeBuilder::UriField UriField; + typedef DocTypeBuilder::SchemaIndexFields SchemaIndexFields; + + void + addFieldPath(const document::DocumentType &docType, + uint32_t fieldId); + + void + buildFieldPath(const document::DocumentType & docType, + const document::DataType *dataType); + + void + invertNormalDocTextField(size_t fieldId, + const document::FieldValue &field); + + void + invertNormalDocUriField(const UriField &handle, + const document::FieldValue &field); + + //typedef document::FieldPath FieldPath; + typedef document::Field FieldPath; + typedef std::vector > IndexedFieldPaths; + IndexedFieldPaths _indexedFieldPaths; + const document::DataType * _dataType; + + DocTypeBuilder::SchemaIndexFields _schemaIndexFields; + + std::vector> _inverters; + std::vector> _urlInverters; + ISequencedTaskExecutor &_invertThreads; + ISequencedTaskExecutor &_pushThreads; + + /** + * Obtain the schema used by this index. + * + * @return schema used by this index + */ + const index::Schema & + getSchema(void) const + { + return _schema; + } + +public: + /** + * Create a new memory index based on the given schema. + * + * @param schema the index schema to use + */ + DocumentInverter(const index::Schema &schema, + ISequencedTaskExecutor &invertThreads, + ISequencedTaskExecutor &pushThreads); + + ~DocumentInverter(); + + /** + * Push inverted documents to memory index structure. + * + * @param dict dictionary + */ + void + pushDocuments(Dictionary &dict, + const std::shared_ptr &onWriteDone); + + /** + * Invert a document. + * + * @param docId local id for document + * @param doc the document + * + **/ + void + invertDocument(uint32_t docId, const document::Document &doc); + + /** + * Remove a document. + * + * @param docId local id for document + */ + void removeDocument(uint32_t docId); + + FieldInverter *getInverter(uint32_t fieldId) const { + return _inverters[fieldId].get(); + } + + const std::vector > & + getInverters() const { return _inverters; } + + uint32_t getNumFields() const { return _inverters.size(); } +}; + +} // namespace memoryindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp new file mode 100644 index 00000000000..ac009ed7554 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.cpp @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.featurestore"); +#include "featurestore.h" +#include +#include + +namespace search +{ + +namespace memoryindex +{ + +constexpr size_t MIN_CLUSTERS = 1024u; + +using index::SchemaUtil; + +uint64_t +FeatureStore::writeFeatures(uint32_t packedIndex, + const DocIdAndFeatures &features) +{ + _f._fieldsParams = &_fieldsParams[packedIndex]; + uint64_t oldOffset = _f.getWriteOffset(); + assert((oldOffset & 63) == 0); + if (oldOffset > 2000) { + _f.setupWrite(_fctx); + oldOffset = 0; + assert(_f.getWriteOffset() == oldOffset); + } + assert(!features.getRaw()); + _f.writeFeatures(features); + return oldOffset; +} + + +btree::EntryRef +FeatureStore::addFeatures(const uint8_t *src, uint64_t byteLen) +{ + uint32_t pad = RefType::pad(byteLen); + _store.ensureBufferCapacity(_typeId, byteLen + pad + DECODE_SAFETY); + uint32_t activeBufferId = _store.getActiveBufferId(_typeId); + btree::BufferState &state = _store.getBufferState(activeBufferId); + size_t oldSize = state.size(); + RefType ref(oldSize, activeBufferId); + uint8_t * dst = _store.getBufferEntry(activeBufferId, oldSize); + memcpy(dst, src, byteLen); + dst += byteLen; + if (pad > 0) { + memset(dst, 0, pad); + dst += pad; + } + memset(dst, 0, DECODE_SAFETY); + state.pushed_back(byteLen + pad); + return ref; +} + + +std::pair +FeatureStore::addFeatures(uint64_t beginOffset, uint64_t endOffset) +{ + uint64_t bitLen = (endOffset - beginOffset); + assert(static_cast(bitLen) > 0); + uint64_t wordLen = (bitLen + 63) / 64; + uint64_t byteLen = (bitLen + 7) / 8; + assert(wordLen > 0); + assert(byteLen > 0); + const uint8_t *src = reinterpret_cast(_f._valI - wordLen); + RefType ref = addFeatures(src, byteLen); + return std::make_pair(ref, bitLen); +} + + +btree::EntryRef +FeatureStore::moveFeatures(btree::EntryRef ref, uint64_t bitLen) +{ + const uint8_t *src = getBits(ref); + uint64_t byteLen = (bitLen + 7) / 8; + RefType newRef = addFeatures(src, byteLen); + // Mark old features as dead + _store.incDead(ref, byteLen + RefType::pad(byteLen)); + return newRef; +} + + +FeatureStore::FeatureStore(const Schema &schema) + : _store(), + _f(NULL), + _fctx(_f), + _d(NULL), + _fieldsParams(), + _schema(schema), + _type(RefType::align(1u), MIN_CLUSTERS, + RefType::offsetSize() / RefType::align(1u)), + _typeId(0) +{ + _f.setWriteContext(&_fctx); + _fctx.allocComprBuf(64, 1); + _f.afterWrite(_fctx, 0, 0); + + _fieldsParams.resize(_schema.getNumIndexFields()); + SchemaUtil::IndexIterator it(_schema); + for(; it.isValid(); ++it) { + _fieldsParams[it.getIndex()]. + setSchemaParams(_schema, it.getIndex()); + } + _store.addType(&_type); + _store.initActiveBuffers(); +} + + +FeatureStore::~FeatureStore(void) +{ + _store.dropBuffers(); +} + + +std::pair +FeatureStore::addFeatures(uint32_t packedIndex, + const DocIdAndFeatures &features) +{ + uint64_t oldOffset = writeFeatures(packedIndex, features); + uint64_t newOffset = _f.getWriteOffset(); + _f.flush(); + return addFeatures(oldOffset, newOffset); +} + + + +void +FeatureStore::getFeatures(uint32_t packedIndex, btree::EntryRef ref, + DocIdAndFeatures &features) +{ + setupForField(packedIndex, _d); + setupForReadFeatures(ref, _d); + _d.readFeatures(features); +} + + +size_t +FeatureStore::bitSize(uint32_t packedIndex, btree::EntryRef ref) +{ + setupForField(packedIndex, _d); + setupForUnpackFeatures(ref, _d); + uint64_t oldOffset = _d.getReadOffset(); + _d.skipFeatures(1); + uint64_t newOffset = _d.getReadOffset(); + uint64_t bitLen = (newOffset - oldOffset); + assert(static_cast(bitLen) > 0); + return bitLen; +} + + +btree::EntryRef +FeatureStore::moveFeatures(uint32_t packedIndex, + btree::EntryRef ref) +{ + uint64_t bitLen = bitSize(packedIndex, ref); + return moveFeatures(ref, bitLen); +} + + +} // namespace memoryindex + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/featurestore.h b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h new file mode 100644 index 00000000000..676e2d54860 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/featurestore.h @@ -0,0 +1,274 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +namespace memoryindex { + +class FeatureStore +{ +public: + typedef btree::DataStoreT > DataStoreType; + typedef DataStoreType::RefType RefType; + typedef bitcompression::EG2PosOccEncodeContext EncodeContext; + typedef bitcompression::EG2PosOccDecodeContextCooked + DecodeContextCooked; + typedef vespalib::GenerationHandler::generation_t generation_t; + +private: + typedef index::Schema Schema; + typedef index::DocIdAndFeatures DocIdAndFeatures; + typedef bitcompression::PosOccFieldsParams PosOccFieldsParams; + + static const uint32_t DECODE_SAFETY = 16; + + DataStoreType _store; + + // Feature Encoder + EncodeContext _f; + // Buffer for compressed features. + ComprFileWriteContext _fctx; + + // Feature Decoder + DecodeContextCooked _d; + + // Coding parameters for fields and field collections, derived + // from schema. + std::vector _fieldsParams; + + const Schema &_schema; + + btree::BufferType _type; + const uint32_t _typeId; + + /** + * Writes the given features to the underlying encode context. + * + * @param packedIndex the field or field collection owning features + * @param features the features to be encoded + * @return the encode offset before writing + */ + uint64_t + writeFeatures(uint32_t packedIndex, const DocIdAndFeatures &features); + + /** + * Adds the features from the given buffer to the data store. + * + * @param src buffer with features + * @param byteLen the byte length of the buffer + * @return the entry ref for the added features + */ + btree::EntryRef + addFeatures(const uint8_t * src, uint64_t byteLen); + + /** + * Adds the features currently in the underlying encode context to the data store. + * + * @param beginOffset the begin offset into the encode context + * @param endOffset the end offset into the encode context + * @return the entry ref and bit length of the features + */ + std::pair + addFeatures(uint64_t beginOffset, uint64_t endOffset); + + /** + * Moves features to new location, as part of compaction. + * + * @param ref old reference to stored features + * @param bitLen bit length of features to move + * @return new reference to stored features + */ + btree::EntryRef moveFeatures(btree::EntryRef ref, uint64_t bitLen); + +public: + + /** + * Constructor for feature store. + * + * @param schema The schema describing fields and field + * collections available, used to derive + * coding parameters. + */ + FeatureStore(const Schema &schema); + + ~FeatureStore(void); + + /** + * Add features to feature store + * + * @param packedIndex The field or field collection owning features + * @param features The features to be encoded + * @return pair with reference to stored features and + * size of encoded features in bits + */ + std::pair + addFeatures(uint32_t packedIndex, + const DocIdAndFeatures &features); + + + /** + * Get features from feature store. Method signature is not + * const since feature decoder is written to during calculation. + * + * @param packedIndex The field or field collection owning features + * @param ref Reference to stored features + * @param features The features to be decoded + */ + void + getFeatures(uint32_t packedIndex, + btree::EntryRef ref, + DocIdAndFeatures &features); + + + /** + * Setup the given decoder to be used for the given field or field + * collection. + * + * @param packedIndex The field or field collection owning features + * @param decoder The feature decoder + */ + void + setupForField(uint32_t packedIndex, DecodeContextCooked &decoder) const + { + decoder._fieldsParams = &_fieldsParams[packedIndex]; + } + + /** + * Setup the given decoder to later use readFeatures() to decode + * the stored features. + * + * @param ref Reference to stored features + * @param decoder The feature decoder + */ + void + setupForReadFeatures(btree::EntryRef ref, DecodeContextCooked &decoder) const + { + const uint8_t * bits = getBits(ref); + decoder.setByteCompr(bits); + uint32_t bufferId = RefType(ref).bufferId(); + const btree::BufferState &state = _store.getBufferState(bufferId); + decoder.setEnd( + ((_store.getBufferEntry(bufferId, state.size()) - + bits) + 7) / 8, + false); + } + + /** + * Setup the given decoder to later use unpackFeatures() to decode + * the stored features. + * + * @param ref Reference to stored features + * @param decoder The feature decoder + */ + void + setupForUnpackFeatures(btree::EntryRef ref, DecodeContextCooked &decoder) const + { + decoder.setByteCompr(getBits(ref)); + } + + /** + * Calculate size of encoded features. Method signature is not + * const since feature decoder is written to during calculation. + * + * @param packedIndex The field or field collection owning features + * @param ref Reference to stored features + * @return size of features in bits + */ + size_t + bitSize(uint32_t packedIndex, btree::EntryRef ref); + + /** + * Get byte address of stored features + * + * @param ref Referennce to stored features + * @return byte address of stored features + */ + const uint8_t * + getBits(btree::EntryRef ref) const + { + RefType iRef(ref); + return _store.getBufferEntry(iRef.bufferId(), iRef.offset()); + } + + /** + * Move features to new location, as part of compaction. + * + * @param packedIndex The field or field collection owning features + * @param ref Old reference to stored features + * @return New reference to stored features + */ + btree::EntryRef + moveFeatures(uint32_t packedIndex, + btree::EntryRef ref); + + /** + * Return a const view of the fields params used by this feature store. + * + * @return const view of fields params. + */ + const std::vector & + getFieldsParams() const + { + return _fieldsParams; + } + + // Inherit doc from DataStoreBase + void + trimHoldLists(generation_t usedGen) + { + _store.trimHoldLists(usedGen); + } + + // Inherit doc from DataStoreBase + void + transferHoldLists(generation_t generation) + { + _store.transferHoldLists(generation); + } + + void + clearHoldLists(void) + { + _store.clearHoldLists(); + } + + // Inherit doc from DataStoreBase + std::vector + startCompact() + { + return _store.startCompact(_typeId); + } + + // Inherit doc from DataStoreBase + void + finishCompact(const std::vector & toHold) + { + _store.finishCompact(toHold); + } + + // Inherit doc from DataStoreBase + MemoryUsage + getMemoryUsage() const + { + return _store.getMemoryUsage(); + } + + // Inherit doc from DataStoreBase + btree::DataStoreBase::MemStats + getMemStats() const + { + return _store.getMemStats(); + } +}; + + +} // namespace search::memoryindex +} // namespace search + + diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp new file mode 100644 index 00000000000..a6899d87bba --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.cpp @@ -0,0 +1,577 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.fieldinverter"); +#include "fieldinverter.h" +#include +#include +#include +#include +#include +#include +#include "ordereddocumentinserter.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace memoryindex +{ + +using document::Field; +using document::FieldValue; +using document::Document; +using document::ArrayFieldValue; +using document::WeightedSetFieldValue; +using document::StringFieldValue; +using document::IntFieldValue; +using document::StructFieldValue; +using document::DataType; +using document::DocumentType; +using document::Annotation; +using document::AnnotationType; +using document::AlternateSpanList; +using document::Span; +using document::SpanList; +using document::SimpleSpanList; +using document::SpanNode; +using document::SpanTree; +using document::SpanTreeVisitor; +using index::DocIdAndPosOccFeatures; +using index::Schema; +using vespalib::make_string; +using search::util::URL; + +namespace documentinverterkludge +{ + +namespace linguistics +{ + +const vespalib::string SPANTREE_NAME("linguistics"); + +} + +} + +using namespace documentinverterkludge; + +namespace +{ + +class SpanFinder : public SpanTreeVisitor +{ +public: + int32_t begin_pos; + int32_t end_pos; + + SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {} + Span span() { return Span(begin_pos, end_pos - begin_pos); } + + void visit(const Span &node) override { + begin_pos = std::min(begin_pos, node.from()); + end_pos = std::max(end_pos, node.from() + node.length()); + } + void visit(const SpanList &node) override { + for (const auto & span_ : node) { + const_cast(span_)->accept(*this); + } + } + void visit(const SimpleSpanList &node) override { + for (const auto & span_ : node) { + const_cast(span_).accept(*this); + } + } + void visit(const AlternateSpanList &node) override { + for (size_t i = 0; i < node.getNumSubtrees(); ++i) { + visit(node.getSubtree(i)); + } + } +}; + +Span +getSpan(const SpanNode &span_node) +{ + SpanFinder finder; + // The SpanNode will not be changed. + const_cast(span_node).accept(finder); + return finder.span(); +} + +} + +void +FieldInverter::processAnnotations(const StringFieldValue &value) +{ + _terms.clear(); + StringFieldValue::SpanTrees spanTrees = value.getSpanTrees(); + const SpanTree *tree = StringFieldValue::findTree(spanTrees, linguistics::SPANTREE_NAME); + if (tree == NULL) { + /* This is wrong unless field is exact match */ + const vespalib::string &text = value.getValue(); + if (text.empty()) + return; + uint32_t wordRef = saveWord(text); + if (wordRef != 0u) { + add(wordRef); + stepWordPos(); + } + return; + } + const vespalib::string &text = value.getValue(); + for (const Annotation & annotation : *tree) { + const SpanNode *span = annotation.getSpanNode(); + if ((span != nullptr) && annotation.valid() && + (annotation.getType() == *AnnotationType::TERM)) + { + Span sp = getSpan(*span); + if (sp.length() != 0) { + _terms.push_back(std::make_pair(sp, + annotation.getFieldValue())); + } + } + } + std::sort(_terms.begin(), _terms.end()); + SpanTermVector::const_iterator it = _terms.begin(); + SpanTermVector::const_iterator ite = _terms.end(); + uint32_t wordRef; + bool mustStep = false; + for (; it != ite; ) { + SpanTermVector::const_iterator it_begin = it; + for (; it != ite && it->first == it_begin->first; ++it) { + if (it->second) { // it->second is a const FieldValue *. + wordRef = saveWord(*it->second); + } else { + const Span &iSpan = it->first; + assert(iSpan.from() >= 0); + assert(iSpan.length() > 0); + wordRef = saveWord(vespalib::stringref(&text[iSpan.from()], + iSpan.length())); + } + if (wordRef != 0u) { + add(wordRef); + mustStep = true; + } + } + if (mustStep) { + stepWordPos(); + mustStep = false; + } + } +} + + +void +FieldInverter::reset() +{ + _words.clear(); + _elems.clear(); + _positions.clear(); + _wordRefs.resize(1); + _pendingDocs.clear(); + _abortedDocs.clear(); + _removeDocs.clear(); + _oldPosSize = 0u; +} + +struct WordRefRadix { + uint32_t operator () (const uint64_t v) { return v >> 32; } +}; + +void +FieldInverter::sortWords(void) +{ + assert(_wordRefs.size() > 1); + + // Make a dictionary for words. + { // Use radix sort based on first four bytes of word, before finalizing with std::sort. + vespalib::Array firstFourBytes(_wordRefs.size()); + for (size_t i(1); i < _wordRefs.size(); i++) { + uint64_t firstFour = ntohl(*reinterpret_cast(getWordFromRef(_wordRefs[i]))); + firstFourBytes[i] = (firstFour << 32) | _wordRefs[i]; + } + ShiftBasedRadixSorter:: + radix_sort(WordRefRadix(), CompareWordRef(_words), &firstFourBytes[1], firstFourBytes.size()-1, 16); + for (size_t i(1); i < firstFourBytes.size(); i++) { + _wordRefs[i] = firstFourBytes[i] & 0xffffffffl; + } + } + // Populate word numbers in word buffer and mapping from + // word numbers to word reference. + // TODO: shrink word buffer to only contain unique words + std::vector::const_iterator w(_wordRefs.begin() + 1); + std::vector::const_iterator we(_wordRefs.end()); + uint32_t wordNum = 1; // First valid word number + const char *lastWord = getWordFromRef(*w); + updateWordNum(*w, wordNum); + for (++w; w != we; ++w) { + const char *word = getWordFromRef(*w); + int cmpres = strcmp(lastWord, word); + assert(cmpres <= 0); + if (cmpres < 0) { + ++wordNum; + _wordRefs[wordNum] = *w; + lastWord = word; + } + updateWordNum(*w, wordNum); + } + assert(_wordRefs.size() >= wordNum + 1); + _wordRefs.resize(wordNum + 1); + // Replace initial word reference by word number. + for (auto &p : _positions) { + p._wordNum = getWordNum(p._wordNum); + } +} + + +void +FieldInverter::startElement(int32_t weight) +{ + _elems.push_back(ElemInfo(weight)); // Fill in length later +} + + +void +FieldInverter::endElement(void) +{ + _elems.back().setLen(_wpos); + _wpos = 0; + ++_elem; +} + +uint32_t +FieldInverter::saveWord(const vespalib::stringref word) +{ + const size_t wordsSize = _words.size(); + // assert((wordsSize & 3) == 0); // Check alignment + size_t len = word.size(); + if (len == 0) + return 0u; + + const size_t fullyPaddedSize = (wordsSize + 4 + len + 1 + 3) & ~3; + _words.reserve(vespalib::roundUp2inN(fullyPaddedSize)); + _words.resize(fullyPaddedSize); + + char * buf = &_words[0] + wordsSize; + memset(buf, 0, 4); + memcpy(buf + 4, word.c_str(), len); + uint32_t *lastWord = reinterpret_cast(buf + 4 + (len & ~0x3)); + *lastWord &= (0xffffff >> ((3 - (len & 3)) << 3)); //only on little endian machiness !! + + uint32_t wordRef = (wordsSize + 4) >> 2; + // assert(wordRef != 0); + _wordRefs.push_back(wordRef); + return wordRef; +} + + +uint32_t +FieldInverter::saveWord(const document::FieldValue &fv) +{ + assert(fv.getClass().id() == StringFieldValue::classId); + typedef std::pair RawRef; + RawRef sRef = fv.getAsRaw(); + return saveWord(vespalib::stringref(sRef.first, sRef.second)); +} + + +void +FieldInverter::remove(const vespalib::stringref word, uint32_t docId) +{ + uint32_t wordRef = saveWord(word); + assert(wordRef != 0); + _positions.emplace_back(wordRef, docId); +} + + +void +FieldInverter::processNormalDocTextField(const StringFieldValue &field) +{ + startElement(1); + processAnnotations(field); + endElement(); +} + + +void +FieldInverter::processNormalDocArrayTextField(const ArrayFieldValue &field) +{ + uint32_t el = 0; + uint32_t ele = field.size(); + for (;el < ele; ++el) { + const FieldValue &elfv = field[el]; + assert(elfv.getClass().id() == StringFieldValue::classId); + const StringFieldValue &element = + static_cast(elfv); + startElement(1); + processAnnotations(element); + endElement(); + } +} + + +void +FieldInverter::processNormalDocWeightedSetTextField(const WeightedSetFieldValue &field) +{ + for (const auto & el : field) { + const FieldValue &key = *el.first; + const FieldValue &xweight = *el.second; + assert(key.getClass().id() == StringFieldValue::classId); + assert(xweight.getClass().id() == IntFieldValue::classId); + const StringFieldValue &element = static_cast(key); + int32_t weight = xweight.getAsInt(); + startElement(weight); + processAnnotations(element); + endElement(); + } +} + + +FieldInverter::FieldInverter(const Schema &schema, uint32_t fieldId) + : _fieldId(fieldId), + _elem(0u), + _wpos(0u), + _docId(0), + _oldPosSize(0), + _schema(schema), + _words(), + _elems(), + _positions(), + _features(), + _elementWordRefs(), + _wordRefs(1), + _terms(), + _abortedDocs(), + _pendingDocs(), + _removeDocs() +{ +} + + +void +FieldInverter::abortPendingDoc(uint32_t docId) +{ + auto itr = _pendingDocs.find(docId); + if (itr != _pendingDocs.end()) { + if (itr->second.getLen() != 0) { + _abortedDocs.push_back(itr->second); + } + _pendingDocs.erase(itr); + } +} + + +void +FieldInverter::moveNotAbortedDocs(uint32_t &dstIdx, + uint32_t srcIdx, + uint32_t nextTrimIdx) +{ + assert(nextTrimIdx >= srcIdx); + uint32_t size = nextTrimIdx - srcIdx; + if (size == 0) + return; + assert(dstIdx < srcIdx); + assert(srcIdx < _positions.size()); + assert(srcIdx + size <= _positions.size()); + PosInfo *dst = &_positions[dstIdx]; + const PosInfo *src = &_positions[srcIdx]; + const PosInfo *srce = src + size; + while (src != srce) { + *dst = *src; + ++dst; + ++src; + } + dstIdx += size; +} + + +void +FieldInverter::trimAbortedDocs() +{ + if (_abortedDocs.empty()) { + return; + } + std::sort(_abortedDocs.begin(), _abortedDocs.end()); + auto itrEnd = _abortedDocs.end(); + auto itr = _abortedDocs.begin(); + uint32_t dstIdx = itr->getStart(); + uint32_t srcIdx = itr->getStart() + itr->getLen(); + ++itr; + while (itr != itrEnd) { + moveNotAbortedDocs(dstIdx, srcIdx, itr->getStart()); + srcIdx = itr->getStart() + itr->getLen(); + ++itr; + } + moveNotAbortedDocs(dstIdx, srcIdx, _positions.size()); + _positions.resize(dstIdx); + _abortedDocs.clear(); +} + + +void +FieldInverter::invertField(uint32_t docId, const FieldValue::UP &val) +{ + startDoc(docId); + if (val) { + invertNormalDocTextField(*val); + } + endDoc(); +} + + +void +FieldInverter::invertNormalDocTextField(const FieldValue &val) +{ + const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass()); + const Schema::IndexField &field = _schema.getIndexField(_fieldId); + switch (field.getCollectionType()) { + case Schema::SINGLE: + if (cInfo.id() == StringFieldValue::classId) { + processNormalDocTextField(static_cast(val)); + } else { + throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", val.getDataType()->getName().c_str())); + } + break; + case Schema::WEIGHTEDSET: + if (cInfo.id() == WeightedSetFieldValue::classId) { + const WeightedSetFieldValue &wset = static_cast(val); + if (wset.getNestedType() == *DataType::STRING) { + processNormalDocWeightedSetTextField(wset); + } else { + throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", wset.getNestedType().getName().c_str())); + } + } else { + throw std::runtime_error(make_string("Expected weighted set, got '%s'", cInfo.name())); + } + break; + case Schema::ARRAY: + if (cInfo.id() == ArrayFieldValue::classId) { + const ArrayFieldValue &arr = static_cast(val); + if (arr.getNestedType() == *DataType::STRING) { + processNormalDocArrayTextField(arr); + } else { + throw std::runtime_error(make_string("Expected DataType::STRING, got '%s'", arr.getNestedType().getName().c_str())); + } + } else { + throw std::runtime_error(make_string("Expected Array, got '%s'", cInfo.name())); + } + break; + default: + break; + } +} + + +namespace { + +struct FullRadix { + uint64_t operator () (const FieldInverter::PosInfo & p) const { + return (static_cast(p._wordNum) << 32) | + p._docId; + } +}; + +} + + +void +FieldInverter::applyRemoves(DocumentRemover &remover) +{ + for (auto docId : _removeDocs) { + remover.remove(docId, *this); + } + _removeDocs.clear(); +} + + +void +FieldInverter::pushDocuments(IOrderedDocumentInserter &inserter) +{ + trimAbortedDocs(); + + if (_positions.empty()) { + reset(); + return; // All documents with words aborted + } + + sortWords(); + + // Sort for terms. + ShiftBasedRadixSorter, 56, true>:: + radix_sort(FullRadix(), std::less(), &_positions[0], _positions.size(), 16); + + constexpr uint32_t NO_ELEMENT_ID = std::numeric_limits::max(); + constexpr uint32_t NO_WORD_POS = std::numeric_limits::max(); + uint32_t lastWordNum = 0; + uint32_t lastElemId = 0; + uint32_t lastWordPos = 0; + uint32_t numWordIds = _wordRefs.size() - 1; + uint32_t lastDocId = 0; + vespalib::stringref word; + bool emptyFeatures = true; + + inserter.rewind(); + + for (auto &i : _positions) { + assert(i._wordNum <= numWordIds); + (void) numWordIds; + if (lastWordNum != i._wordNum || lastDocId != i._docId) { + if (!emptyFeatures) { + inserter.add(lastDocId, _features); + emptyFeatures = true; + } + if (lastWordNum != i._wordNum) { + lastWordNum = i._wordNum; + word = getWordFromNum(lastWordNum); + inserter.setNextWord(word); + } + lastDocId = i._docId; + if (i.removed()) { + inserter.remove(lastDocId); + continue; + } + } + if (emptyFeatures) { + if (!i.removed()) { + emptyFeatures = false; + _features.clear(lastDocId); + lastElemId = NO_ELEMENT_ID; + lastWordPos = NO_WORD_POS; + } else { + continue; // ignore dup remove + } + } else { + // removes must come before non-removes + assert(!i.removed()); + } + const ElemInfo &elem = _elems[i._elemRef]; + if (i._wordPos != lastWordPos || i._elemId != lastElemId) { + _features.addNextOcc(i._elemId, i._wordPos, + elem._weight, elem._len); + lastElemId = i._elemId; + lastWordPos = i._wordPos; + } else { + // silently ignore duplicate annotations + } + } + + if (!emptyFeatures) { + inserter.add(lastDocId, _features); + } + inserter.flush(); + reset(); +} + + +} // namespace memoryindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h new file mode 100644 index 00000000000..1f72c8e62b4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/fieldinverter.h @@ -0,0 +1,449 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "i_document_remove_listener.h" +#include +#include +#include +#include + +namespace search +{ + +namespace memoryindex +{ + +class IOrderedDocumentInserter; +class DocumentRemover; + +class FieldInverter : public IDocumentRemoveListener +{ +public: + class PosInfo + { + public: + uint32_t _wordNum; // XXX: Initially word reference + uint32_t _docId; + uint32_t _elemId; + uint32_t _wordPos; + uint32_t _elemRef; // Offset in _elems + + static constexpr uint32_t _elemRemoved = + std::numeric_limits::max(); + + PosInfo() + : _wordNum(0), + _docId(0), + _elemId(0), + _wordPos(0), + _elemRef(0) + { + } + + PosInfo(uint32_t wordRef, + uint32_t docId, + uint32_t elemId, + uint32_t wordPos, uint32_t elemRef) + : _wordNum(wordRef), + _docId(docId), + _elemId(elemId), + _wordPos(wordPos), + _elemRef(elemRef) + { + } + + + PosInfo(uint32_t wordRef, + uint32_t docId) + : _wordNum(wordRef), + _docId(docId), + _elemId(_elemRemoved), + _wordPos(0), + _elemRef(0) + { + } + + bool + removed() const + { + return _elemId == _elemRemoved; + } + + bool + operator<(const PosInfo &rhs) const + { + if (_wordNum != rhs._wordNum) + return _wordNum < rhs._wordNum; + if (_docId != rhs._docId) + return _docId < rhs._docId; + if (_elemId != rhs._elemId) { + if (removed() != rhs.removed()) + return removed() && !rhs.removed(); + return _elemId < rhs._elemId; + } + return _wordPos < rhs._wordPos; + } + }; + +private: + FieldInverter(const FieldInverter &) = delete; + FieldInverter(const FieldInverter &&) = delete; + FieldInverter &operator=(const FieldInverter &) = delete; + FieldInverter &operator=(const FieldInverter &&) = delete; + + typedef vespalib::Array WordBuffer; + + class ElemInfo + { + public: + int32_t _weight; + uint32_t _len; + + ElemInfo(int32_t weight) + : _weight(weight), + _len(0u) + { + } + + void + setLen(uint32_t len) + { + _len = len; + } + }; + + typedef std::vector ElemInfoVec; + + typedef std::vector PosInfoVec; + + class CompareWordRef + { + const char *const _wordBuffer; + + public: + CompareWordRef(const WordBuffer &wordBuffer) + : _wordBuffer(&wordBuffer[0]) + { + } + + const char * + getWord(uint32_t wordRef) const + { + return &_wordBuffer[static_cast(wordRef) << 2]; + } + + bool + operator()(const uint32_t lhs, const uint32_t rhs) const + { + return strcmp(getWord(lhs), getWord(rhs)) < 0; + } + }; + + /* + * Range in _positions vector used to represent a document put. + */ + class PositionRange + { + uint32_t _start; + uint32_t _len; + + public: + PositionRange(uint32_t start, uint32_t len) + : _start(start), + _len(len) + { + } + + bool + operator<(const PositionRange &rhs) const + { + if (_start != rhs._start) { + return _start < rhs._start; + } + return _len < rhs._len; + } + + uint32_t getStart() const { return _start; } + uint32_t getLen() const { return _len; } + }; + + // Current field state. + uint32_t _fieldId; // current field id + uint32_t _elem; // current element + uint32_t _wpos; // current word pos + uint32_t _docId; + uint32_t _oldPosSize; + + const index::Schema &_schema; + + WordBuffer _words; + ElemInfoVec _elems; + PosInfoVec _positions; + index::DocIdAndPosOccFeatures _features; + std::vector _elementWordRefs; + std::vector _wordRefs; + + typedef std::pair SpanTerm; + typedef std::vector SpanTermVector; + SpanTermVector _terms; + + // info about aborted and pending documents. + std::vector _abortedDocs; + std::map _pendingDocs; + std::vector _removeDocs; + + void + invertNormalDocTextField(const document::FieldValue &val); + +public: + /** + * Start a new element + * + * @param weight element weight + */ + void + startElement(int32_t weight); + + /** + * End an element. + */ + void + endElement(void); + +private: + /** + * Save field value as word in word buffer. + * + * @param word word to be saved + * @param len length of word to be saved. + * + * @return word reference + */ + VESPA_DLL_LOCAL uint32_t + saveWord(const vespalib::stringref word); + + /** + * Save field value as word in word buffer. + * + * @param fv field value containing word to be stored + * + * @return word reference + */ + VESPA_DLL_LOCAL uint32_t + saveWord(const document::FieldValue &fv); + + /** + * Get pointer to saved word from a word reference. + * + * @param wordRef word reference + * + * @return saved word + */ + const char * + getWordFromRef(uint32_t wordRef) const + { + return &_words[static_cast(wordRef) << 2]; + } + + /** + * Get pointer to saved word from a word number + * + * @param wordNum word number + * + * @return saved word + */ + const char * + getWordFromNum(uint32_t wordNum) const + { + return getWordFromRef(_wordRefs[wordNum]); + } + + /** + * Get word number from word reference + * + * @param wordRef word reference + * + * @return word number + */ + uint32_t + getWordNum(uint32_t wordRef) const + { + const char *p = &_words[static_cast(wordRef - 1) << 2]; + return *reinterpret_cast(p); + } + + /** + * Update mapping from word reference to word number + * + * @param wordRef word reference + * @param wordNum word number + */ + void + updateWordNum(uint32_t wordRef, uint32_t wordNum) + { + char *p = &_words[static_cast(wordRef - 1) << 2]; + *reinterpret_cast(p) = wordNum; + } + + /** + * Add a word reference to posting list. Don't step word pos. + * + * + * @param wordRef word reference + */ + void + add(uint32_t wordRef) { + _positions.emplace_back(wordRef, _docId, _elem, + _wpos, _elems.size() - 1); + } + + void + stepWordPos(void) + { + ++_wpos; + } + +public: + VESPA_DLL_LOCAL void + processAnnotations(const document::StringFieldValue &value); + +private: + void + processNormalDocTextField(const document::StringFieldValue &field); + + void + processNormalDocArrayTextField(const document::ArrayFieldValue &field); + + void + processNormalDocWeightedSetTextField(const document::WeightedSetFieldValue &field); + + /** + * Obtain the schema used by this index. + * + * @return schema used by this index + */ + const index::Schema & + getSchema(void) const + { + return _schema; + } + + /** + * Clear internal memory structures. + */ + void + reset(void); + + /** + * Calculate word numbers and replace word references with word + * numbers in internal memory structures. + */ + void + sortWords(void); + + void + moveNotAbortedDocs(uint32_t &dstIdx, uint32_t srcIdx, uint32_t nextTrimIdx); + + void + trimAbortedDocs(); + + /* + * Abort a pending document that has already been inverted. + * + * @param docId local id for document + * + */ + void + abortPendingDoc(uint32_t docId); + +public: + /** + * Create a new memory index based on the given schema. + * + * @param schema the index schema to use + * @param schema the field to be inverted + */ + FieldInverter(const index::Schema &schema, uint32_t fieldId); + + /* + * Apply pending removes. + * + * @param remover document remover + */ + void + applyRemoves(DocumentRemover &remover); + + /** + * Push inverted documents to memory index structure. + * + * Temporary restriction: Currently only one document at a time is + * supported. + * + * @param inserter ordered document inserter + */ + void + pushDocuments(IOrderedDocumentInserter &inserter); + + /* + * Invert a normal text field, based on annotations. + */ + void + invertField(uint32_t docId, const document::FieldValue::UP &val); + + /* + * Setup remove of word in old version of document. + */ + virtual void + remove(const vespalib::stringref word, uint32_t docId) override; + + void + removeDocument(uint32_t docId) + { + abortPendingDoc(docId); + _removeDocs.push_back(docId); + } + + void + startDoc(uint32_t docId) + { + assert(_docId == 0); + assert(docId != 0); + abortPendingDoc(docId); + _removeDocs.push_back(docId); + _docId = docId; + _elem = 0; + _wpos = 0; + } + + void + endDoc() + { + uint32_t newPosSize = static_cast(_positions.size()); + _pendingDocs.insert({ _docId, + { _oldPosSize, newPosSize - _oldPosSize } }); + _docId = 0; + _oldPosSize = newPosSize; + } + + void + addWord(const vespalib::stringref word) + { + uint32_t wordRef = saveWord(word); + if (wordRef != 0u) { + add(wordRef); + stepWordPos(); + } + } +}; + +} // namespace memoryindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h new file mode 100644 index 00000000000..7cf84892b17 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/i_document_insert_listener.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once +#include + +namespace search { +namespace memoryindex { + +/** + * Interface used to track which {wordRef, fieldId} pairs that are + * inserted into the memory index dictionary for a document. + */ +class IDocumentInsertListener +{ +public: + virtual ~IDocumentInsertListener() {} + virtual void insert(btree::EntryRef wordRef, uint32_t docId) = 0; + virtual void flush() = 0; +}; + + +} // namespace memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h b/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h new file mode 100644 index 00000000000..b8e71f8673b --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/i_document_remove_listener.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + +namespace memoryindex +{ + +/** + * Interface used to track which {wordRef, fieldId} pairs that are + * removed from the memory index dictionary for a document. + */ +class IDocumentRemoveListener +{ +public: + virtual ~IDocumentRemoveListener() {} + + virtual void remove(const vespalib::stringref word, + uint32_t docId) = 0; +}; + + +} + +} + diff --git a/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h new file mode 100644 index 00000000000..aef68b62d23 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/iordereddocumentinserter.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + +namespace memoryindex +{ + +/** + * Interface class for ordered document inserter. + * + * Insert order must be properly sorted, by (word, docId) + */ +class IOrderedDocumentInserter +{ +public: + virtual ~IOrderedDocumentInserter() { } + + /** + * Set next word to operate on. + */ + virtual void setNextWord(const vespalib::stringref word) = 0; + + /** + * Add (word, docId) tuple with given features. + */ + virtual void add(uint32_t docId, + const index::DocIdAndFeatures &features) = 0; + + /** + * Remove (word, docId) tuple. + */ + virtual void remove(uint32_t docId) = 0; + + /* + * Flush pending changes to postinglist for (_word). + * + * _dItr is located at correct position. + */ + virtual void flush() = 0; + + /* + * Rewind iterator, to start new pass. + */ + virtual void rewind() = 0; +}; + +} + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp new file mode 100644 index 00000000000..88b718e1860 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.cpp @@ -0,0 +1,342 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include "memoryfieldindex.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "ordereddocumentinserter.h" + +LOG_SETUP(".memoryindex.memoryfieldindex"); + +namespace search { + +using index::DocIdAndFeatures; +using index::WordDocElementFeatures; +using index::Schema; + +namespace memoryindex { + +MemoryFieldIndex::MemoryFieldIndex(const Schema & schema, uint32_t fieldId) + : _wordStore(), + _numUniqueWords(0), + _generationHandler(), + _dict(), + _postingListStore(), + _featureStore(schema), + _fieldId(fieldId), + _remover(_wordStore), + _inserter(std::make_unique(*this)) +{ +} + +MemoryFieldIndex::~MemoryFieldIndex(void) +{ + _postingListStore.disableFreeLists(); + _postingListStore.disableElemHoldList(); + _dict.disableFreeLists(); + _dict.disableElemHoldList(); + // XXX: Kludge + for (DictionaryTree::Iterator it = _dict.begin(); + it.valid(); ++it) { + btree::EntryRef pidx(it.getData()); + if (pidx.valid()) { + _postingListStore.clear(pidx); + // Before updating ref + std::atomic_thread_fence(std::memory_order_release); + it.writeData(btree::EntryRef().ref()); + } + } + _postingListStore.clearBuilder(); + freeze(); // Flush all pending posting list tree freezes + transferHoldLists(); + _dict.clear(); // Clear dictionary + freeze(); // Flush pending freeze for dictionary tree. + transferHoldLists(); + incGeneration(); + trimHoldLists(); +} + +MemoryFieldIndex::PostingList::Iterator +MemoryFieldIndex::find(const vespalib::stringref word) const +{ + DictionaryTree::Iterator itr = + _dict.find(WordKey(btree::EntryRef()), + KeyComp(_wordStore, word)); + if (itr.valid()) { + return _postingListStore.begin(itr.getData()); + } + return PostingList::Iterator(); +} + +MemoryFieldIndex::PostingList::ConstIterator +MemoryFieldIndex::findFrozen(const vespalib::stringref word) const +{ + DictionaryTree::ConstIterator itr = + _dict.getFrozenView().find(WordKey(btree::EntryRef()), + KeyComp(_wordStore, word)); + if (itr.valid()) { + return _postingListStore.beginFrozen(itr.getData()); + } + return PostingList::Iterator(); +} + + +void +MemoryFieldIndex::compactFeatures(void) +{ + std::vector toHold; + + toHold = _featureStore.startCompact(); + DictionaryTree::Iterator itr(_dict.begin()); + uint32_t packedIndex = _fieldId; + for (; itr.valid(); ++itr) { + PostingListStore::RefType pidx(itr.getData()); + if (!pidx.valid()) + continue; + uint32_t clusterSize = _postingListStore.getClusterSize(pidx); + if (clusterSize == 0) { + const PostingList *tree = + _postingListStore.getTreeEntry(pidx); + PostingList::Iterator + it(tree->begin(_postingListStore.getAllocator())); + for (; it.valid(); ++it) { + btree::EntryRef oldFeatures = it.getData(); + + // Filter on which buffers to move features from when + // performing incremental compaction. + + btree::EntryRef newFeatures = + _featureStore.moveFeatures(packedIndex, oldFeatures); + +#if 0 + LOG(info, + "Moved features from 0x%x to 0x%x\n", + oldFeatures.ref(), newFeatures.ref()); +#endif + + // Features must be written before reference is updated. + std::atomic_thread_fence(std::memory_order_release); + + // Ugly, ugly due to const_cast in iterator + it.writeData(newFeatures.ref()); + } + } else { + const PostingListKeyDataType *shortArray = + _postingListStore.getKeyDataEntry(pidx, clusterSize); + const PostingListKeyDataType *ite = shortArray + clusterSize; + for (const PostingListKeyDataType *it = shortArray; it < ite; + ++it) { + btree::EntryRef oldFeatures = it->getData(); + + // Filter on which buffers to move features from when + // performing incremental compaction. + + btree::EntryRef newFeatures = + _featureStore.moveFeatures(packedIndex, oldFeatures); + +#if 0 + LOG(info, + "Moved features from 0x%x to 0x%x\n", + oldFeatures.ref(), newFeatures.ref()); +#endif + + // Features must be written before reference is updated. + std::atomic_thread_fence(std::memory_order_release); + + // Ugly, ugly due to const_cast, but new data is + // semantically equal to old data + const_cast(it)-> + setData(newFeatures.ref()); + } + } + } + typedef GenerationHandler::generation_t generation_t; + _featureStore.finishCompact(toHold); + generation_t generation = _generationHandler.getCurrentGeneration(); + _featureStore.transferHoldLists(generation); +} + +void +MemoryFieldIndex::dump(search::index::IndexBuilder & indexBuilder) +{ + vespalib::stringref word; + FeatureStore::DecodeContextCooked decoder(NULL); + DocIdAndFeatures features; + vespalib::Array wordMap(_numUniqueWords + 1, 0); + _featureStore.setupForField(_fieldId, decoder); + for (DictionaryTree::Iterator itr = _dict.begin(); itr.valid(); ++itr) { + const WordKey & wk = itr.getKey(); + PostingListStore::RefType plist(itr.getData()); + word = _wordStore.getWord(wk._wordRef); + if (!plist.valid()) + continue; + indexBuilder.startWord(word); + uint32_t clusterSize = _postingListStore.getClusterSize(plist); + if (clusterSize == 0) { + const PostingList *tree = + _postingListStore.getTreeEntry(plist); + PostingList::Iterator pitr = tree->begin(_postingListStore.getAllocator()); + assert(pitr.valid()); + for (; pitr.valid(); ++pitr) { + uint32_t docId = pitr.getKey(); + btree::EntryRef featureRef = pitr.getData(); + indexBuilder.startDocument(docId); + _featureStore.setupForReadFeatures(featureRef, decoder); + decoder.readFeatures(features); + size_t poff = 0; + uint32_t wpIdx = 0u; + size_t numElements = features._elements.size(); + for (size_t i = 0; i < numElements; ++i) { + const WordDocElementFeatures & fef = features._elements[i]; + indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen()); + for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) { + assert(wpIdx == poff + j); + indexBuilder.addOcc(features._wordPositions[poff + j]); + } + poff += fef.getNumOccs(); + indexBuilder.endElement(); + } + indexBuilder.endDocument(); + } + } else { + const PostingListKeyDataType *kd = + _postingListStore.getKeyDataEntry(plist, clusterSize); + const PostingListKeyDataType *kde = kd + clusterSize; + for (; kd != kde; ++kd) { + uint32_t docId = kd->_key; + btree::EntryRef featureRef = kd->getData(); + indexBuilder.startDocument(docId); + _featureStore.setupForReadFeatures(featureRef, decoder); + decoder.readFeatures(features); + size_t poff = 0; + uint32_t wpIdx = 0u; + size_t numElements = features._elements.size(); + for (size_t i = 0; i < numElements; ++i) { + const WordDocElementFeatures & fef = features._elements[i]; + indexBuilder.startElement(fef.getElementId(), fef.getWeight(), fef.getElementLen()); + for (size_t j = 0; j < fef.getNumOccs(); ++j, ++wpIdx) { + assert(wpIdx == poff + j); + indexBuilder.addOcc(features. + _wordPositions[poff + j]); + } + poff += fef.getNumOccs(); + indexBuilder.endElement(); + } + indexBuilder.endDocument(); + } + } + indexBuilder.endWord(); + } +} + + +MemoryUsage +MemoryFieldIndex::getMemoryUsage() const +{ + MemoryUsage usage; + usage.merge(_wordStore.getMemoryUsage()); + usage.merge(_dict.getMemoryUsage()); + usage.merge(_postingListStore.getMemoryUsage()); + usage.merge(_featureStore.getMemoryUsage()); + usage.merge(_remover.getStore().getMemoryUsage()); + return usage; +} + + +} // namespace search::memoryindex + +namespace btree { + +template +class BTreeNodeDataWrap; + +template +class BTreeNodeT; + +#if 0 +template +class BTreeNodeT; +#endif + +template +class BTreeNodeTT; + +template +class BTreeNodeTT; + +template +class BTreeInternalNode; + +template +class BTreeLeafNode; + +template +class BTreeNodeStore; + +template +class BTreeIterator; + +template +class BTree; + +template +class BTreeRoot; + +template +class BTreeRootBase; + +template +class BTreeNodeAllocator; + + +} // namespace btree +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h new file mode 100644 index 00000000000..1c16f1746a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryfieldindex.h @@ -0,0 +1,283 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "featurestore.h" +#include "wordstore.h" +#include "document_remover.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace memoryindex { + +class OrderedDocumentInserter; +/* + * Memory index for a single field. + */ +class MemoryFieldIndex { +public: + typedef btree::BTreeRoot + PostingList; // docid -> feature ref + typedef btree::BTreeStore, + btree::BTreeDefaultTraits> PostingListStore; + typedef PostingListStore::KeyDataType PostingListKeyDataType; + + + struct WordKey { + btree::EntryRef _wordRef; + + explicit WordKey(btree::EntryRef wordRef) + : _wordRef(wordRef) + { + } + + WordKey(void) + : _wordRef() + { + } + + friend vespalib::asciistream & + operator<<(vespalib::asciistream & os, const WordKey & rhs) + { + os << "wr(" << rhs._wordRef.ref() << ")"; + return os; + } + }; + + class KeyComp { + private: + const WordStore &_wordStore; + const vespalib::stringref _word; + + const char * + getWord(btree::EntryRef wordRef) const + { + if (wordRef.valid()) { + return _wordStore.getWord(wordRef); + } + return _word.c_str(); + } + + public: + KeyComp(const WordStore &wordStore, const vespalib::stringref word) + : _wordStore(wordStore), + _word(word) + { + } + + bool + operator()(const WordKey & lhs, const WordKey & rhs) const + { + int cmpres = strcmp(getWord(lhs._wordRef), getWord(rhs._wordRef)); + return cmpres < 0; + } + }; + + typedef uint32_t PostingListPtr; + typedef btree::BTree DictionaryTree; +private: + typedef vespalib::GenerationHandler GenerationHandler; + + WordStore _wordStore; + uint64_t _numUniqueWords; + GenerationHandler _generationHandler; + DictionaryTree _dict; + PostingListStore _postingListStore; + FeatureStore _featureStore; + uint32_t _fieldId; + DocumentRemover _remover; + std::unique_ptr _inserter; + +public: + btree::EntryRef addWord(const vespalib::stringref word) { + _numUniqueWords++; + return _wordStore.addWord(word); + } + + btree::EntryRef + addFeatures(const index::DocIdAndFeatures &features) + { + return _featureStore.addFeatures(_fieldId, features).first; + } + + MemoryFieldIndex(const index::Schema &schema, uint32_t fieldId); + ~MemoryFieldIndex(void); + PostingList::Iterator find(const vespalib::stringref word) const; + + PostingList::ConstIterator + findFrozen(const vespalib::stringref word) const; + + uint64_t getNumUniqueWords() const { return _numUniqueWords; } + const FeatureStore & getFeatureStore() const { return _featureStore; } + const WordStore &getWordStore() const { return _wordStore; } + OrderedDocumentInserter &getInserter() const { return *_inserter; } + +private: + void freeze() { + _postingListStore.freeze(); + _dict.getAllocator().freeze(); + } + + void + trimHoldLists() + { + GenerationHandler::generation_t usedGen = + _generationHandler.getFirstUsedGeneration(); + _postingListStore.trimHoldLists(usedGen); + _dict.getAllocator().trimHoldLists(usedGen); + _featureStore.trimHoldLists(usedGen); + } + + void + transferHoldLists() + { + GenerationHandler::generation_t generation = + _generationHandler.getCurrentGeneration(); + _postingListStore.transferHoldLists(generation); + _dict.getAllocator().transferHoldLists(generation); + _featureStore.transferHoldLists(generation); + } + + void + incGeneration(void) + { + _generationHandler.incGeneration(); + } + +public: + GenerationHandler::Guard takeGenerationGuard() { + return _generationHandler.takeGuard(); + } + + void + compactFeatures(void); + + void dump(search::index::IndexBuilder & indexBuilder); + + MemoryUsage getMemoryUsage() const; + + DictionaryTree & + getDictionaryTree() + { + return _dict; + } + + PostingListStore & + getPostingListStore() + { + return _postingListStore; + } + + DocumentRemover & + getDocumentRemover() + { + return _remover; + } + + void commit() + { + _remover.flush(); + freeze(); + transferHoldLists(); + incGeneration(); + trimHoldLists(); + } +}; + +} // namespace search::memoryindex + +namespace btree { + +extern template +class BTreeNodeDataWrap; + +extern template +class BTreeNodeT; + +#if 0 +extern template +class BTreeNodeT; +#endif + +extern template +class BTreeNodeTT; + +extern template +class BTreeNodeTT; + +extern template +class BTreeInternalNode; + +extern template +class BTreeLeafNode; + +extern template +class BTreeNodeStore; + +extern template +class BTreeIterator; + +extern template +class BTree; + +extern template +class BTreeRoot; + +extern template +class BTreeRootBase; + +extern template +class BTreeNodeAllocator; + +} // namespace search::btree + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp new file mode 100644 index 00000000000..90a0957ccab --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.cpp @@ -0,0 +1,308 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchlib.memoryindex.memoryindex"); + +#include "memoryindex.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +using document::ArrayFieldValue; +using document::WeightedSetFieldValue; +using vespalib::LockGuard; +using vespalib::GenerationHandler; + +namespace search { + +using fef::TermFieldMatchDataArray; +using index::IndexBuilder; +using index::Schema; +using index::SchemaUtil; +using query::NumberTerm; +using query::LocationTerm; +using query::Node; +using query::PredicateQuery; +using query::PrefixTerm; +using query::RangeTerm; +using query::RegExpTerm; +using query::StringTerm; +using query::SubstringTerm; +using query::SuffixTerm; +using queryeval::SearchIterator; +using queryeval::Searchable; +using queryeval::CreateBlueprintVisitorHelper; +using queryeval::Blueprint; +using queryeval::BooleanMatchIteratorWrapper; +using queryeval::EmptyBlueprint; +using queryeval::FieldSpecBase; +using queryeval::FieldSpecBaseList; +using queryeval::FieldSpec; +using queryeval::IRequestContext; + +namespace memoryindex { + +MemoryIndex::MemoryIndex(const Schema &schema, + ISequencedTaskExecutor &invertThreads, + ISequencedTaskExecutor &pushThreads) + : _schema(schema), + _invertThreads(invertThreads), + _pushThreads(pushThreads), + _inverter0(_schema, _invertThreads, _pushThreads), + _inverter1(_schema, _invertThreads, _pushThreads), + _inverter(&_inverter0), + _dictionary(_schema), + _frozen(false), + _maxDocId(0), // docId 0 is reserved + _numDocs(0), + _lock(), + _hiddenFields(schema.getNumIndexFields(), false), + _wipeTimeSchema(), + _indexedDocs(0), + _staticMemoryFootprint(getMemoryUsage().allocatedBytes()) +{ +} + +MemoryIndex::~MemoryIndex() +{ + _invertThreads.sync(); + _pushThreads.sync(); +} + +void +MemoryIndex::insertDocument(uint32_t docId, const document::Document &doc) +{ + if (_frozen) { + LOG(warning, "Memory index frozen: ignoring insert of document '%s'(%u): '%s'", + doc.getId().toString().c_str(), docId, doc.toString().c_str()); + return; + } + updateMaxDocId(docId); + _inverter->invertDocument(docId, doc); + if (_indexedDocs.insert(docId).second) { + incNumDocs(); + } +} + +void +MemoryIndex::removeDocument(uint32_t docId) +{ + if (_frozen) { + LOG(warning, "Memory index frozen: ignoring remove of document (%u)", + docId); + return; + } + _inverter->removeDocument(docId); + if (_indexedDocs.find(docId) != _indexedDocs.end()) { + _indexedDocs.erase(docId); + decNumDocs(); + } +} + +void +MemoryIndex::commit(const std::shared_ptr &onWriteDone) +{ + _invertThreads.sync(); // drain inverting into this inverter + _pushThreads.sync(); // drain use of other inverter + _inverter->pushDocuments(_dictionary, onWriteDone); + flipInverter(); +} + + +void +MemoryIndex::flipInverter() +{ + if (_inverter != &_inverter0) { + _inverter = &_inverter0; + } else { + _inverter = &_inverter1; + } +} + +void +MemoryIndex::freeze() +{ + _frozen = true; +} + +void +MemoryIndex::dump(IndexBuilder &indexBuilder) +{ + _dictionary.dump(indexBuilder); +} + +namespace { + +class MemTermBlueprint : public queryeval::SimpleLeafBlueprint +{ +private: + GenerationHandler::Guard _genGuard; + Dictionary::PostingList::ConstIterator _pitr; + const FeatureStore &_featureStore; + const uint32_t _fieldId; + const bool _useBitVector; + +public: + MemTermBlueprint(GenerationHandler::Guard &&genGuard, + Dictionary::PostingList::ConstIterator pitr, + const FeatureStore &featureStore, + const FieldSpecBase &field, + uint32_t fieldId, + bool useBitVector) + : SimpleLeafBlueprint(field), + _genGuard(), + _pitr(pitr), + _featureStore(featureStore), + _fieldId(fieldId), + _useBitVector(useBitVector) + { + _genGuard = std::move(genGuard); + HitEstimate estimate(_pitr.size(), !_pitr.valid()); + setEstimate(estimate); + } + + virtual SearchIterator::UP + createLeafSearch(const TermFieldMatchDataArray &tfmda, bool) const + { + SearchIterator::UP search(new PostingIterator(_pitr, _featureStore, _fieldId, tfmda)); + if (_useBitVector) { + LOG(debug, "Return BooleanMatchIteratorWrapper: fieldId(%u), docCount(%zu)", + _fieldId, _pitr.size()); + return SearchIterator::UP(new BooleanMatchIteratorWrapper(std::move(search), tfmda)); + } + LOG(debug, "Return PostingIterator: fieldId(%u), docCount(%zu)", + _fieldId, _pitr.size()); + return search; + } + +}; + +/** + * Determines the correct Blueprint to use. + **/ +class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper +{ +private: + const FieldSpec &_field; + const uint32_t _fieldId; + Dictionary & _dictionary; + +public: + CreateBlueprintVisitor(Searchable &searchable, + const IRequestContext & requestContext, + const FieldSpec &field, + uint32_t fieldId, + Dictionary &dictionary) + : CreateBlueprintVisitorHelper(searchable, field, requestContext), + _field(field), + _fieldId(fieldId), + _dictionary(dictionary) {} + + template + void visitTerm(TermNode &n) { + const vespalib::string termStr = queryeval::termAsString(n); + LOG(debug, "searching for '%s' in '%s'", + termStr.c_str(), _field.getName().c_str()); + MemoryFieldIndex *fieldIndex = _dictionary.getFieldIndex(_fieldId); + GenerationHandler::Guard genGuard = fieldIndex->takeGenerationGuard(); + Dictionary::PostingList::ConstIterator pitr + = fieldIndex->findFrozen(termStr); + bool useBitVector = _field.isFilter(); + setResult(make_UP(new MemTermBlueprint(std::move(genGuard), pitr, + fieldIndex->getFeatureStore(), + _field, _fieldId, useBitVector))); + } + + virtual void visit(LocationTerm &n) { visitTerm(n); } + virtual void visit(PrefixTerm &n) { visitTerm(n); } + virtual void visit(RangeTerm &n) { visitTerm(n); } + virtual void visit(StringTerm &n) { visitTerm(n); } + virtual void visit(SubstringTerm &n) { visitTerm(n); } + virtual void visit(SuffixTerm &n) { visitTerm(n); } + virtual void visit(RegExpTerm &n) { visitTerm(n); } + virtual void visit(PredicateQuery &) { } + + virtual void visit(NumberTerm &n) { + handleNumberTermAsText(n); + } + +}; + +} // namespace search::memoryindex:: + +Blueprint::UP +MemoryIndex::createBlueprint(const IRequestContext & requestContext, + const FieldSpec &field, + const Node &term) +{ + uint32_t fieldId = _schema.getIndexFieldId(field.getName()); + if (fieldId == Schema::UNKNOWN_FIELD_ID || _hiddenFields[fieldId]) { + return Blueprint::UP(new EmptyBlueprint(field)); + } + CreateBlueprintVisitor visitor(*this, requestContext, field, fieldId, _dictionary); + const_cast(term).accept(visitor); + return visitor.getResult(); +} + +MemoryUsage +MemoryIndex::getMemoryUsage() const +{ + MemoryUsage usage; + usage.merge(_dictionary.getMemoryUsage()); + return usage; +} + +void +MemoryIndex::wipeHistory(const Schema &schema) +{ + LockGuard lock(_lock); + if (_wipeTimeSchema.get() == NULL) { + Schema::UP newSchema = Schema::intersect(_schema, schema); + if (_schema == *newSchema) + return; + _wipeTimeSchema.reset(newSchema.release()); + } else { + Schema::UP newSchema = Schema::intersect(*_wipeTimeSchema, schema); + if (*_wipeTimeSchema == *newSchema) + return; + _wipeTimeSchema.reset(newSchema.release()); + } + SchemaUtil::IndexIterator i(_schema); + for (; i.isValid(); ++i) { + uint32_t packedIndex = i.getIndex(); + assert(packedIndex < _hiddenFields.size()); + SchemaUtil::IndexIterator wi(*_wipeTimeSchema, i); + _hiddenFields[packedIndex] = !wi.isValid(); + } +} + +Schema::SP +MemoryIndex::getWipeTimeSchema() const +{ + LockGuard lock(_lock); + return _wipeTimeSchema; +} + +} // namespace memoryindex +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h new file mode 100644 index 00000000000..1a8a993275e --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/memoryindex.h @@ -0,0 +1,184 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dictionary.h" +#include "documentinverter.h" +#include +#include +#include +#include +#include +#include +#include + +namespace search { + +namespace index { class IndexBuilder; } + +class ISequencedTaskExecutor; + +namespace memoryindex { + +/** + * Lock-free implementation of a memory-based index + * using the document inverter and dictionary classes from searchlib. + **/ +class MemoryIndex : public queryeval::Searchable +{ +private: + index::Schema _schema; + ISequencedTaskExecutor &_invertThreads; + ISequencedTaskExecutor &_pushThreads; + DocumentInverter _inverter0; + DocumentInverter _inverter1; + DocumentInverter *_inverter; + Dictionary _dictionary; + bool _frozen; + uint32_t _maxDocId; + uint32_t _numDocs; + vespalib::Lock _lock; + std::vector _hiddenFields; + index::Schema::SP _wipeTimeSchema; + vespalib::hash_set _indexedDocs; // documents in memory index + const uint64_t _staticMemoryFootprint; + + MemoryIndex(const MemoryIndex &) = delete; + MemoryIndex(MemoryIndex &&) = delete; + MemoryIndex &operator=(const MemoryIndex &) = delete; + MemoryIndex &operator=(MemoryIndex &&) = delete; + + void removeDocumentHelper(uint32_t docId, const document::Document &doc); + void updateMaxDocId(uint32_t docId) { + if (docId > _maxDocId) { + _maxDocId = docId; + } + } + void incNumDocs() { + ++_numDocs; + } + void decNumDocs() { + if (_numDocs > 0) { + --_numDocs; + } + } + + void flipInverter(); + +public: + /** + * Convenience type defs. + */ + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + /** + * Create a new memory index based on the given schema. + * + * @param schema the index schema to use + **/ + MemoryIndex(const index::Schema &schema, + ISequencedTaskExecutor &invertThreads, + ISequencedTaskExecutor &pushThreads); + + /** + * Class destructor. Clean up washlist. + */ + ~MemoryIndex(); + + /** + * Obtain the schema used by this index. + * + * @return schema used by this index + **/ + const index::Schema &getSchema() const { return _schema; } + + /** + * Check if this index is frozen. + * + * @return true if this index is frozen + **/ + bool isFrozen() const { return _frozen; } + + /** + * Insert a document into the index. If the document is already in + * the index, the old version will be removed first. + * + * @param docId local document id. + * @param doc the document to insert. + **/ + void insertDocument(uint32_t docId, const document::Document &doc); + + /** + * Remove a document from the index. + * + * @param docId local document id. + **/ + void removeDocument(uint32_t docId); + + /** + * Commits the inserts and removes since the last commit, making + * them searchable. When commit is completed, onWriteDone goes out + * of scope, scheduling completion callback. + * + * Callers can call pushThreads.sync() to wait for push completion. + **/ + void commit(const std::shared_ptr &onWriteDone); + + /** + * Freeze this index. Further index updates will be + * discarded. Extra information kept to wash the posting lists + * will be discarded. + **/ + void freeze(); + + /** + * Dump the contents of this index into the given index builder. + * + * @param indexBuilder the builder to dump into + **/ + void dump(index::IndexBuilder &indexBuilder); + + // implements Searchable + virtual queryeval::Blueprint::UP + createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpec &field, + const query::Node &term); + + virtual queryeval::Blueprint::UP + createBlueprint(const queryeval::IRequestContext & requestContext, + const queryeval::FieldSpecList &fields, + const query::Node &term) { + return queryeval::Searchable::createBlueprint(requestContext, fields, term); + } + + virtual uint32_t getDocIdLimit() const { + // Used to get docId range. + return _maxDocId + 1; + } + + virtual uint32_t getNumDocs() const { + return _numDocs; + } + + virtual uint64_t getNumWords() const { + return _dictionary.getNumUniqueWords(); + } + + void + wipeHistory(const index::Schema &schema); + + index::Schema::SP getWipeTimeSchema() const; + + /** + * Gets an approximation of how much memory the index uses. + * + * @return approximately how much memory is used by the index. + **/ + MemoryUsage getMemoryUsage() const; + + uint64_t getStaticMemoryFootprint() const { return _staticMemoryFootprint; } +}; + +} // namespace memoryindex +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp new file mode 100644 index 00000000000..ca7b83a0781 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include "ordereddocumentinserter.h" +#include "i_document_insert_listener.h" + +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".memoryindex.orderedfieldinserter"); + +namespace search +{ + +namespace memoryindex +{ + +namespace +{ + +const vespalib::string emptyWord = ""; + +} + + +OrderedDocumentInserter::OrderedDocumentInserter(MemoryFieldIndex &fieldIndex) + : _word(), + _prevDocId(noDocId), + _prevAdd(false), + _fieldIndex(fieldIndex), + _dItr(_fieldIndex.getDictionaryTree().begin()), + _listener(_fieldIndex.getDocumentRemover()), + _removes(), + _adds() +{ +} + +OrderedDocumentInserter::~OrderedDocumentInserter() +{ + flush(); +} + + +void +OrderedDocumentInserter::flushWord() +{ + if (_removes.empty() && _adds.empty()) { + return; + } + //XXX: Feature store leak, removed features not marked dead + PostingListStore &postingListStore(_fieldIndex.getPostingListStore()); + btree::EntryRef pidx(_dItr.getData()); + postingListStore.apply(pidx, + &_adds[0], + &_adds[0] + _adds.size(), + &_removes[0], + &_removes[0] + _removes.size()); + if (pidx.ref() != _dItr.getData()) { + // Before updating ref + std::atomic_thread_fence(std::memory_order_release); + _dItr.writeData(pidx.ref()); + } + _removes.clear(); + _adds.clear(); +} + + +void +OrderedDocumentInserter::flush() +{ + flushWord(); + _listener.flush(); +} + + +void +OrderedDocumentInserter::setNextWord(const vespalib::stringref word) +{ + // TODO: Adjust here if zero length words should be legal. + assert(_word < word); + _word = word; + _prevDocId = noDocId; + _prevAdd = false; + flushWord(); + const WordStore &wordStore(_fieldIndex.getWordStore()); + KeyComp cmp(wordStore, _word); + WordKey key; + if (_dItr.valid() && cmp(_dItr.getKey(), key)) { + _dItr.binarySeek(key, cmp); + } + if (!_dItr.valid() || cmp(key, _dItr.getKey())) { + btree::EntryRef wordRef = _fieldIndex.addWord(_word); + WordKey insertKey(wordRef); + DictionaryTree &dTree(_fieldIndex.getDictionaryTree()); + dTree.insert(_dItr, insertKey, btree::EntryRef().ref()); + } + assert(_dItr.valid()); + assert(_word == wordStore.getWord(_dItr.getKey()._wordRef)); +} + + +void +OrderedDocumentInserter::add(uint32_t docId, + const index::DocIdAndFeatures &features) +{ + assert(docId != noDocId); + assert(_prevDocId == noDocId || _prevDocId < docId || + (_prevDocId == docId && !_prevAdd)); + btree::EntryRef featureRef = _fieldIndex.addFeatures(features); + _adds.push_back(PostingListKeyDataType(docId, featureRef.ref())); + _listener.insert(_dItr.getKey()._wordRef, docId); + _prevDocId = docId; + _prevAdd = true; +} + + +void +OrderedDocumentInserter::remove(uint32_t docId) +{ + assert(docId != noDocId); + assert(_prevDocId == noDocId || _prevDocId < docId); + _removes.push_back(docId); + _prevDocId = docId; + _prevAdd = false; +} + + +void +OrderedDocumentInserter::rewind() +{ + assert(_removes.empty() && _adds.empty()); + _word = ""; + _prevDocId = noDocId; + _prevAdd = false; + _dItr.begin(); +} + + +btree::EntryRef +OrderedDocumentInserter::getWordRef() const +{ + return _dItr.getKey()._wordRef; +} + + +} + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h new file mode 100644 index 00000000000..f8ec07e305e --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/ordereddocumentinserter.h @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "iordereddocumentinserter.h" +#include "memoryfieldindex.h" +#include + +namespace search +{ + +namespace memoryindex +{ + +class IDocumentInsertListener; + + +/** + * Class for inserting updates to MemoryFieldIndex in an ordered manner + * (single pass scan of dictionary tree) + * + * Insert order must be properly sorted, by (word, docId) + */ +class OrderedDocumentInserter : public IOrderedDocumentInserter +{ + vespalib::stringref _word; + uint32_t _prevDocId; + bool _prevAdd; + using DictionaryTree = MemoryFieldIndex::DictionaryTree; + using PostingListStore = MemoryFieldIndex::PostingListStore; + using KeyComp = MemoryFieldIndex::KeyComp; + using WordKey = MemoryFieldIndex::WordKey; + using PostingListKeyDataType = MemoryFieldIndex::PostingListKeyDataType; + MemoryFieldIndex &_fieldIndex; + DictionaryTree::Iterator _dItr; + IDocumentInsertListener &_listener; + + // Pending changes to posting list for (_word) + std::vector _removes; + std::vector _adds; + + + static constexpr uint32_t noFieldId = std::numeric_limits::max(); + static constexpr uint32_t noDocId = std::numeric_limits::max(); + + /* + * Flush pending changes to postinglist for (_word). + * + * _dItr is located at correct position. + */ + void flushWord(); + +public: + OrderedDocumentInserter(MemoryFieldIndex &fieldIndex); + virtual ~OrderedDocumentInserter(); + virtual void setNextWord(const vespalib::stringref word) override; + virtual void add(uint32_t docId, + const index::DocIdAndFeatures &features) override; + virtual void remove(uint32_t docId) override; + + /* + * Flush pending changes to postinglist for (_word). Also flush + * insert listener. + * + * _dItr is located at correct position. + */ + virtual void flush() override; + + /* + * Rewind iterator, to start new pass. + */ + virtual void rewind() override; + + // Used by unit test + btree::EntryRef getWordRef() const; +}; + +} + +} diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp new file mode 100644 index 00000000000..f03b476ef6e --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.cpp @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.postingiterator"); +#include "postingiterator.h" + +#include +#include +#include +#include +#include + +namespace search { +namespace memoryindex { + +PostingIterator::PostingIterator(Dictionary::PostingList::ConstIterator itr, + const FeatureStore & featureStore, + uint32_t packedIndex, + const fef::TermFieldMatchDataArray & matchData) : + queryeval::RankedSearchIteratorBase(matchData), + _itr(itr), + _featureStore(featureStore), + _featureDecoder(NULL) +{ + _featureStore.setupForField(packedIndex, _featureDecoder); +} + +void +PostingIterator::initRange(uint32_t begin, uint32_t end) +{ + SearchIterator::initRange(begin, end); + _itr.lower_bound(begin); + if (!_itr.valid() || isAtEnd(_itr.getKey())) { + setAtEnd(); + } else { + setDocId(_itr.getKey()); + } + clearUnpacked(); +} + +void +PostingIterator::doSeek(uint32_t docId) +{ + if (getUnpacked()) { + clearUnpacked(); + } + _itr.linearSeek(docId); + if (!_itr.valid()) { + setAtEnd(); + } else { + setDocId(_itr.getKey()); + } +} + +void +PostingIterator::doUnpack(uint32_t docId) +{ + if (!_matchData.valid() || getUnpacked()) { + return; + } + assert(docId == getDocId()); + assert(_itr.valid()); + assert(docId == _itr.getKey()); + btree::EntryRef featureRef(_itr.getData()); + _featureStore.setupForUnpackFeatures(featureRef, _featureDecoder); + _featureDecoder.unpackFeatures(_matchData, docId); + setUnpacked(); +} + + +} // namespace search::memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h new file mode 100644 index 00000000000..8b40984a710 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/postingiterator.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dictionary.h" +#include + +namespace search { +namespace memoryindex { + +/** + * Search iterator for memory index posting list. + **/ +class PostingIterator : public queryeval::RankedSearchIteratorBase +{ +private: + Dictionary::PostingList::ConstIterator _itr; + const FeatureStore &_featureStore; + FeatureStore::DecodeContextCooked _featureDecoder; + +public: + /** + * Creates a search iterator for the given posting list iterator. + * + * @param itr the posting list iterator to base the search iterator upon. + * @param featureStore reference to store for features. + * @param packedIndex the field or field collection owning features. + * @param matchData the match data to unpack features into. + **/ + PostingIterator(Dictionary::PostingList::ConstIterator itr, + const FeatureStore &featureStore, + uint32_t packedIndex, + const fef::TermFieldMatchDataArray &matchData); + + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + +} // namespace search::memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp new file mode 100644 index 00000000000..d31a69c9ff6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.cpp @@ -0,0 +1,384 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.urlfieldinverter"); +#include "urlfieldinverter.h" +#include "fieldinverter.h" +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace memoryindex +{ + +namespace +{ +static vespalib::string HOSTNAME_BEGIN("StArThOsT"); +static vespalib::string HOSTNAME_END("EnDhOsT"); +const vespalib::string SPANTREE_NAME("linguistics"); + +static size_t +lowercaseToken(vespalib::string &dest, const char *src, size_t srcSize) +{ + dest.clear(); + dest.reserve(8 + srcSize); + + vespalib::Utf8Reader r(src, srcSize); + vespalib::Utf8Writer w(dest); + + using vespalib::LowerCase; + + while (r.hasMore()) { + uint32_t i = r.getChar(vespalib::Utf8::BAD); + if (i != vespalib::Utf8::BAD) { + w.putChar(LowerCase::convert(i)); + } + } + return dest.size(); +} + + +} // namespace + + +using document::ArrayFieldValue; +using document::DataType; +using document::FieldValue; +using document::IntFieldValue; +using document::SpanTree; +using document::StringFieldValue; +using document::StructFieldValue; +using document::UrlDataType; +using document::WeightedSetFieldValue; +using search::index::Schema; +using search::util::URL; +using vespalib::make_string; + + +void +UrlFieldInverter::startDoc(uint32_t docId) +{ + _all->startDoc(docId); + _scheme->startDoc(docId); + _host->startDoc(docId); + _port->startDoc(docId); + _path->startDoc(docId); + _query->startDoc(docId); + _fragment->startDoc(docId); + _hostname->startDoc(docId); +} + + +void +UrlFieldInverter::endDoc() +{ + _all->endDoc(); + _scheme->endDoc(); + _host->endDoc(); + _port->endDoc(); + _path->endDoc(); + _query->endDoc(); + _fragment->endDoc(); + _hostname->endDoc(); +} + + +void +UrlFieldInverter::startElement(int32_t weight) +{ + _all->startElement(weight); + _scheme->startElement(weight); + _host->startElement(weight); + _port->startElement(weight); + _path->startElement(weight); + _query->startElement(weight); + _fragment->startElement(weight); + _hostname->startElement(weight); +} + + +void +UrlFieldInverter::endElement() +{ + _all->endElement(); + _scheme->endElement(); + _host->endElement(); + _port->endElement(); + _path->endElement(); + _query->endElement(); + _fragment->endElement(); + _hostname->endElement(); +} + + +void +UrlFieldInverter::processUrlSubField(FieldInverter *inverter, + const StructFieldValue &field, + vespalib::stringref subField, + bool addAnchors) +{ + const FieldValue::UP sfv = field.getValue(subField); + if (!sfv) + return; + if (!sfv->inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) { + LOG(error, + "Illegal field type %s for URL subfield %s, expected string", + sfv->getDataType()->getName().c_str(), + subField.c_str()); + return; + } + const StringFieldValue &value = static_cast(*sfv); + if (addAnchors) { + inverter->addWord(HOSTNAME_BEGIN); + } + inverter->processAnnotations(value); + if (addAnchors) { + inverter->addWord(HOSTNAME_END); + } +} + + +void +UrlFieldInverter::processAnnotatedUrlField(const StructFieldValue & field) +{ + processUrlSubField(_all, field, UrlDataType::FIELD_ALL, false); + processUrlSubField(_scheme, field, UrlDataType::FIELD_SCHEME, false); + processUrlSubField(_host, field, UrlDataType::FIELD_HOST, false); + processUrlSubField(_port, field, UrlDataType::FIELD_PORT, false); + processUrlSubField(_path, field, UrlDataType::FIELD_PATH, false); + processUrlSubField(_query, field, UrlDataType::FIELD_QUERY, false); + processUrlSubField(_fragment, field, UrlDataType::FIELD_FRAGMENT, false); + processUrlSubField(_hostname, field, UrlDataType::FIELD_HOST, true); +} + + +void +UrlFieldInverter::processUrlField(const FieldValue &url_field) +{ + if (url_field.inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) { + const vespalib::string &url_str = + static_cast(url_field).getValue(); + processUrlOldStyle(url_str); + return; + } + assert(url_field.getClass().id() == StructFieldValue::classId); + const StructFieldValue &field = + static_cast(url_field); + + const FieldValue::UP all_val = field.getValue("all"); + if (all_val.get() == NULL) { + if (_useAnnotations) { + // New style, use annotations + processAnnotatedUrlField(field); + } + return; + } + + if (!all_val->inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) { + LOG(error, + "Illegal field type %s for URL subfield all, expected string", + all_val->getDataType()->getName().c_str()); + return; + } + const StringFieldValue &all_sfv = + static_cast(*all_val); + if (_useAnnotations) { + StringFieldValue::SpanTrees trees = all_sfv.getSpanTrees(); + const SpanTree *tree = StringFieldValue::findTree(trees, SPANTREE_NAME); + if (tree != NULL) { + // New style, use annotations + processAnnotatedUrlField(field); + return; + } + } + + if (_useAnnotations) { + return; + } + + // Old style, tokenize in backend + const vespalib::string &s = all_sfv.getValue(); + processUrlOldStyle(s); +} + +void UrlFieldInverter::processUrlOldStyle(const vespalib::string &s) { + URL url(reinterpret_cast(s.data()), s.size()); + + _hostname->addWord(HOSTNAME_BEGIN); + + vespalib::string lowToken; + const unsigned char *t; + URL::URL_CONTEXT url_context; + while ((t = url.GetToken(url_context))) { + const char *token = reinterpret_cast(t); + size_t tokenLen = strlen(token); + tokenLen = lowercaseToken(lowToken, token, tokenLen); + token = lowToken.c_str(); + vespalib::stringref tokenRef(token, tokenLen); + switch (url_context) { + case URL::URL_SCHEME: + _scheme->addWord(tokenRef); + _all->addWord(tokenRef); + break; + case URL::URL_HOST: + case URL::URL_DOMAIN: + case URL::URL_MAINTLD: + _host->addWord(tokenRef); + _hostname->addWord(tokenRef); + _all->addWord(tokenRef); + break; + case URL::URL_PORT: + if (strcmp(token, "80") && strcmp(token, "443")) { + _port->addWord(tokenRef); + _all->addWord(tokenRef); + } + break; + case URL::URL_PATH: + case URL::URL_FILENAME: + case URL::URL_EXTENSION: + case URL::URL_PARAMS: + _path->addWord(tokenRef); + _all->addWord(tokenRef); + break; + case URL::URL_QUERY: + _query->addWord(tokenRef); + _all->addWord(tokenRef); + break; + case URL::URL_FRAGMENT: + _fragment->addWord(tokenRef); + _all->addWord(tokenRef); + break; + case URL::URL_ADDRESS: + _all->addWord(tokenRef); + break; + default: + LOG(warning, "Ignoring unknown Uri token '%s'.", token); + } + } + _hostname->addWord(HOSTNAME_END); +} + + +void +UrlFieldInverter::processArrayUrlField(const ArrayFieldValue &field) +{ + for (uint32_t el(0), ele(field.size());el < ele; ++el) { + const FieldValue &element = field[el]; + startElement(1); + processUrlField(element); + endElement(); + } +} + + +void +UrlFieldInverter::processWeightedSetUrlField(const WeightedSetFieldValue &field) +{ + for (const auto & el : field) { + const FieldValue &key = *el.first; + const FieldValue &xweight = *el.second; + assert(xweight.getClass().id() == IntFieldValue::classId); + int32_t weight = xweight.getAsInt(); + startElement(weight); + processUrlField(key); + endElement(); + } +} + +namespace { +bool isUriType(const DataType &type) { + return type == UrlDataType::getInstance() + || type == *DataType::STRING + || type == *DataType::URI; +} +} // namespace + + +void +UrlFieldInverter::invertUrlField(const FieldValue &val) +{ + const vespalib::Identifiable::RuntimeClass & cInfo(val.getClass()); + switch (_collectionType) { + case Schema::SINGLE: + if (isUriType(*val.getDataType())) { + startElement(1); + processUrlField(val); + endElement(); + } else { + throw std::runtime_error(make_string("Expected URI struct, got '%s'", val.getDataType()->getName().c_str())); + } + break; + case Schema::WEIGHTEDSET: + if (cInfo.id() == WeightedSetFieldValue::classId) { + const WeightedSetFieldValue &wset = static_cast(val); + if (isUriType(wset.getNestedType())) { + processWeightedSetUrlField(wset); + } else { + throw std::runtime_error(make_string("Expected wset of URI struct, got '%s'", wset.getNestedType().getName().c_str())); + } + } else { + throw std::runtime_error(make_string("Expected weighted set, got '%s'", cInfo.name())); + } + break; + case Schema::ARRAY: + if (cInfo.id() == ArrayFieldValue::classId) { + const ArrayFieldValue &arr = static_cast(val); + if (isUriType(arr.getNestedType())) { + processArrayUrlField(arr); + } else { + throw std::runtime_error(make_string("Expected array of URI struct, got '%s' (%s)", arr.getNestedType().getName().c_str(), arr.getNestedType().toString(true).c_str())); + } + } else { + throw std::runtime_error(make_string("Expected Array, got '%s'", cInfo.name())); + } + break; + default: + break; + } +} + +void +UrlFieldInverter::invertField(uint32_t docId, const FieldValue::UP &val) +{ + startDoc(docId); + if (val) { + invertUrlField(*val); + } + endDoc(); +} + + +UrlFieldInverter::UrlFieldInverter(index::Schema::CollectionType collectionType, + FieldInverter *all, + FieldInverter *scheme, + FieldInverter *host, + FieldInverter *port, + FieldInverter *path, + FieldInverter *query, + FieldInverter *fragment, + FieldInverter *hostname) + : _all(all), + _scheme(scheme), + _host(host), + _port(port), + _path(path), + _query(query), + _fragment(fragment), + _hostname(hostname), + _useAnnotations(false), + _collectionType(collectionType) +{ +} + + +} // namespace memoryindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h new file mode 100644 index 00000000000..107000cb775 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/urlfieldinverter.h @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace memoryindex +{ + +class FieldInverter; + +class UrlFieldInverter +{ + FieldInverter *_all; + FieldInverter *_scheme; + FieldInverter *_host; + FieldInverter *_port; + FieldInverter *_path; + FieldInverter *_query; + FieldInverter *_fragment; + FieldInverter *_hostname; + + bool _useAnnotations; + index::Schema::CollectionType _collectionType; + +public: + using UriField = index::DocTypeBuilder::UriField; + +private: + void startDoc(uint32_t docId); + + void endDoc(); + + void startElement(int32_t weight); + + void endElement(); + + void + processUrlSubField(FieldInverter *inverter, + const document::StructFieldValue &field, + vespalib::stringref subField, + bool addAnchors); + + void processAnnotatedUrlField(const document::StructFieldValue &field); + + void processUrlField(const document::FieldValue &url_field); + + void processUrlOldStyle(const vespalib::string &s); + + void processArrayUrlField(const document::ArrayFieldValue &field); + + void processWeightedSetUrlField(const document::WeightedSetFieldValue &field); + + void invertUrlField(const document::FieldValue &field); +public: + UrlFieldInverter(index::Schema::CollectionType collectionType, + FieldInverter *all, + FieldInverter *scheme, + FieldInverter *host, + FieldInverter *port, + FieldInverter *path, + FieldInverter *query, + FieldInverter *fragment, + FieldInverter *hostname); + + void invertField(uint32_t docId, const document::FieldValue::UP &field); + + void setUseAnnotations(bool useAnnotations) { + _useAnnotations = useAnnotations; + } +}; + + +} // namespace memoryindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp b/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp new file mode 100644 index 00000000000..a0df99f62be --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/wordstore.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".memoryindex.wordstore"); +#include "wordstore.h" +#include + +namespace search { +namespace memoryindex { + +constexpr size_t MIN_CLUSTERS = 1024; + +WordStore::WordStore() + : _store(), + _numWords(0), + _type(RefType::align(1), + MIN_CLUSTERS, + RefType::offsetSize() / RefType::align(1)), + _typeId(0) +{ + _store.addType(&_type); + _store.initActiveBuffers(); +} + + +WordStore::~WordStore(void) +{ + _store.dropBuffers(); +} + +btree::EntryRef +WordStore::addWord(const vespalib::stringref word) +{ + _store.ensureBufferCapacity(_typeId, RefType::align(word.size() + 1)); + uint32_t activeBufferId = _store.getActiveBufferId(_typeId); + btree::BufferState &state = _store.getBufferState(activeBufferId); + size_t oldSize = state.size(); + RefType ref(oldSize, activeBufferId); + assert(oldSize == ref.offset()); + char *be = _store.getBufferEntry(activeBufferId, oldSize); + for (size_t i = 0; i < word.size(); ++i) { + *be++ = word[i]; + } + *be++ = 0; + state.pushed_back(word.size() + 1); + size_t pad = RefType::pad(state.size()); + for (size_t i = 0; i < pad; ++i) { + *be++ = 0; + } + state.pushed_back(pad); + ++_numWords; + return ref; +} + + +} // namespace search::memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/memoryindex/wordstore.h b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h new file mode 100644 index 00000000000..ad5b5020759 --- /dev/null +++ b/searchlib/src/vespa/searchlib/memoryindex/wordstore.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace memoryindex { + +class WordStore +{ +public: + typedef btree::DataStoreT > DataStoreType; + typedef DataStoreType::RefType RefType; + +private: + DataStoreType _store; + uint32_t _numWords; + btree::BufferType _type; + const uint32_t _typeId; + +public: + WordStore(); + ~WordStore(); + btree::EntryRef addWord(const vespalib::stringref word); + const char * getWord(btree::EntryRef ref) const + { + RefType internalRef(ref); + return _store.getBufferEntry(internalRef.bufferId(), + internalRef.offset()); + } + + MemoryUsage getMemoryUsage() const { + return _store.getMemoryUsage(); + } +}; + +} // namespace search::memoryindex +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/parsequery/.gitignore b/searchlib/src/vespa/searchlib/parsequery/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt new file mode 100644 index 00000000000..3d0ca9697a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_parsequery OBJECT + SOURCES + parse.cpp + simplequerystack.cpp + stackdumpiterator.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/parsequery/OWNERS b/searchlib/src/vespa/searchlib/parsequery/OWNERS new file mode 100644 index 00000000000..1037590124e --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/OWNERS @@ -0,0 +1 @@ +balder diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.cpp b/searchlib/src/vespa/searchlib/parsequery/parse.cpp new file mode 100644 index 00000000000..e071b5728e9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/parse.cpp @@ -0,0 +1,239 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Creation date: 2000-05-15 + * + * Implementation of ParseItem + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ + +#include +#include +LOG_SETUP(""); + +#include +#include + +namespace search { + +#define PARSEITEM_DEFAULT_CONSTRUCTOR_LIST \ + _next(NULL), \ + _sibling(NULL), \ + _weight(100), \ + _uniqueId(0), \ + _arg1(0), \ + _arg2(0), \ + _arg3(0), \ + _type(ITEM_UNDEF), \ + _flags(0), \ + _arity(0), \ + _indexName(), \ + _term() + + +ParseItem::ParseItem(ItemType type, int arity) + : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST +{ + assert(type==ITEM_OR || type==ITEM_WEAK_AND || type==ITEM_EQUIV || + type==ITEM_AND || type==ITEM_NOT || type==ITEM_RANK || + type==ITEM_PHRASE || type==ITEM_ANY || type==ITEM_NEAR || type==ITEM_ONEAR); + SetType(type); + _arity = arity; +} + +ParseItem::ParseItem(ItemType type, int arity, const char *idx) + : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST +{ + assert(type == ITEM_PHRASE || type==ITEM_WEIGHTED_SET || type==ITEM_DOT_PRODUCT || type==ITEM_WAND); + SetType(type); + _arity = arity; + SetIndex(idx); +} + +namespace { + +void assert_type(ParseItem::ItemType type) +{ + assert(type == ParseItem::ITEM_TERM || + type == ParseItem::ITEM_NUMTERM || + type == ParseItem::ITEM_PREFIXTERM || + type == ParseItem::ITEM_SUBSTRINGTERM || + type == ParseItem::ITEM_SUFFIXTERM || + type == ParseItem::ITEM_PURE_WEIGHTED_STRING || + type == ParseItem::ITEM_PURE_WEIGHTED_LONG || + type == ParseItem::ITEM_EXACTSTRINGTERM || + type == ParseItem::ITEM_PREDICATE_QUERY); + (void) type; +} + +} + +ParseItem::ParseItem(ItemType type, const vespalib::stringref & idx, const char *term) + : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST +{ + assert_type(type); + SetType(type); + SetIndex(idx.c_str()); + SetTerm(term); +} + +ParseItem::ParseItem(ItemType type, const char *term) + : PARSEITEM_DEFAULT_CONSTRUCTOR_LIST +{ + assert_type(type); + SetType(type); + SetTerm(term); +} + +ParseItem::~ParseItem(void) +{ + delete _next; + delete _sibling; +} + +void +ParseItem::AppendBuffer(RawBuf *buf) const +{ + // Calculate the length of the buffer. + uint32_t indexLen = _indexName.size(); + uint32_t termLen = _term.size(); + + // Put the values into the buffer. + buf->append(_type); + if (Feature_Weight()) { // this item has weight + buf->appendCompressedNumber(_weight.percent()); + } + if (feature_UniqueId()) { + buf->appendCompressedPositiveNumber(_uniqueId); + } + if (feature_Flags()) { + buf->append(_flags); + } + switch (Type()) { + case ITEM_OR: + case ITEM_EQUIV: + case ITEM_AND: + case ITEM_NOT: + case ITEM_RANK: + case ITEM_ANY: + buf->appendCompressedPositiveNumber(_arity); + break; + case ITEM_WEAK_AND: + case ITEM_NEAR: + case ITEM_ONEAR: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(_arg1); + if (Type() == ITEM_WEAK_AND) { + buf->appendCompressedPositiveNumber(indexLen); + if (indexLen != 0) { + buf->append(_indexName.c_str(), indexLen); + } + } + break; + case ITEM_WEIGHTED_SET: + case ITEM_DOT_PRODUCT: + case ITEM_WAND: + case ITEM_PHRASE: + buf->appendCompressedPositiveNumber(_arity); + buf->appendCompressedPositiveNumber(indexLen); + if (indexLen != 0) { + buf->append(_indexName.c_str(), indexLen); + } + if (Type() == ITEM_WAND) { + buf->appendCompressedPositiveNumber(_arg1); // targetNumHits + double nboVal = vespalib::nbostream::n2h(_arg2); + buf->append(&nboVal, sizeof(nboVal)); // scoreThreshold + nboVal = vespalib::nbostream::n2h(_arg3); + buf->append(&nboVal, sizeof(nboVal)); // thresholdBoostFactor + } + break; + case ITEM_TERM: + case ITEM_NUMTERM: + case ITEM_PREFIXTERM: + case ITEM_SUBSTRINGTERM: + case ITEM_EXACTSTRINGTERM: + case ITEM_SUFFIXTERM: + case ITEM_REGEXP: + buf->appendCompressedPositiveNumber(indexLen); + if (indexLen != 0) { + buf->append(_indexName.c_str(), indexLen); + } + buf->appendCompressedPositiveNumber(termLen); + if (termLen != 0) { + buf->append(_term.c_str(), termLen); + } + break; + case ITEM_UNDEF: + default: + break; + } +} + +size_t +ParseItem::GetBufferLen(void) const +{ + // Calculate the length of the buffer. + uint32_t indexLen = _indexName.size(); + uint32_t termLen = _term.size(); + + uint32_t len = sizeof(uint8_t); // type field + if (Feature_Weight()) { + len += sizeof(uint32_t); + } + if (feature_UniqueId()) { + len += sizeof(uint32_t); + } + if (feature_Flags()) { + len += sizeof(uint8_t); + } + + // Put the values into the buffer. + switch (Type()) { + case ITEM_OR: + case ITEM_EQUIV: + case ITEM_AND: + case ITEM_NOT: + case ITEM_RANK: + case ITEM_ANY: + len += sizeof(uint32_t); + break; + case ITEM_NEAR: + case ITEM_ONEAR: + len += sizeof(uint32_t) * 2; + break; + case ITEM_WEAK_AND: + len += sizeof(uint32_t) * 3 + indexLen; + break; + case ITEM_WEIGHTED_SET: + case ITEM_DOT_PRODUCT: + case ITEM_PHRASE: + len += sizeof(uint32_t) * 2 + indexLen; + break; + case ITEM_WAND: + len += sizeof(uint32_t) * 4 + indexLen; + break; + case ITEM_TERM: + case ITEM_NUMTERM: + case ITEM_PREFIXTERM: + case ITEM_SUBSTRINGTERM: + case ITEM_EXACTSTRINGTERM: + case ITEM_SUFFIXTERM: + case ITEM_REGEXP: + len += sizeof(uint32_t) * 2 + indexLen + termLen; + break; + case ITEM_PURE_WEIGHTED_STRING: + len += sizeof(uint32_t) + termLen; + break; + case ITEM_PURE_WEIGHTED_LONG: + len += sizeof(uint64_t); + break; + case ITEM_UNDEF: + default: + break; + } + return len; +} + +} diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h new file mode 100644 index 00000000000..889cc52f31c --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -0,0 +1,232 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Creation date: 2000-05-15 + * + * Declaration of ParseItem class. + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ +#pragma once + +#include +#include +#include +#include + +namespace search { + +/** + * An item on the simple query stack. + * + * An object of this class represents a single item + * on the simple query stack. It has a type, which corresponds + * to the different query stack execution operations. It also + * provides an arity, and the string values indexName and term, to + * accomodate the different needs of the operations. + * It also includes a mechanism for making singly linked lists + * with sub-lists. This is used during the parsing, and also + * when constructing the simple query stack. + */ +class ParseItem +{ +private: + ParseItem(const ParseItem &); + ParseItem& operator=(const ParseItem &); +public: + /** Pointer to next item in a linked list. */ + ParseItem *_next; + /** Pointer to first item in a sublist. */ + ParseItem *_sibling; + + /** The type of the item is from this set of values. + It is important that these defines match those in prelude/source/com/yahoo/prelude/query/Item.java */ + enum ItemType { + ITEM_OR = 0, + ITEM_AND = 1, + ITEM_NOT = 2, + ITEM_RANK = 3, + ITEM_TERM = 4, + ITEM_NUMTERM = 5, + ITEM_PHRASE = 6, + ITEM_PAREN = 7, + ITEM_PREFIXTERM = 8, + ITEM_SUBSTRINGTERM = 9, + ITEM_ANY = 10, + ITEM_NEAR = 11, + ITEM_ONEAR = 12, + ITEM_SUFFIXTERM = 13, + ITEM_EQUIV = 14, + ITEM_WEIGHTED_SET = 15, + ITEM_WEAK_AND = 16, + ITEM_EXACTSTRINGTERM = 17, + UNUSED_LEGACY_ITEM_RISE_QUERY = 18, + ITEM_PURE_WEIGHTED_STRING = 19, + ITEM_PURE_WEIGHTED_LONG = 20, + ITEM_DOT_PRODUCT = 21, + ITEM_WAND = 22, + ITEM_PREDICATE_QUERY = 23, + ITEM_REGEXP = 24, + ITEM_WORD_ALTERNATIVES = 25, + ITEM_MAX = 26, // Indicates how long tables must be. + ITEM_UNDEF = 31, + }; + + /** A tag identifying the origin of this query node. + * Note that descendants may origin from elsewhere. + * If changes necessary: + * NB! Append at end of list - corresponding type + * used in Juniper and updates of these two types must be synchronized. + * (juniper/src/query.h) + */ + enum ItemCreator { + CREA_ORIG = 0, // Original user query + CREA_FILTER // Automatically applied filter (no specific type) + }; + + enum ItemFeatures { + IF_MASK = 0xE0, // mask for item features + IF_WEIGHT = 0x20, // item has rank weight + IF_UNIQUEID = 0x40, // item has unique id + IF_FLAGS = 0x80, // item has extra flags + IF_SUPPORTED_MASK = 0xE0 // mask for supported item features + }; + + enum ItemFlags { + IFLAG_NORANK = 0x00000001, // this term should not be ranked (not exposed to rank framework) + IFLAG_SPECIALTOKEN = 0x00000002, + IFLAG_NOPOSITIONDATA = 0x00000004, // we should not use position data when ranking this term + IFLAG_FILTER = 0x00000008 + }; + +private: + query::Weight _weight; + uint32_t _uniqueId; + uint32_t _arg1; + double _arg2; + double _arg3; + uint8_t _type; + uint8_t _flags; + +public: + /** Extra information on each item (creator id) coded in bits 12-19 of _type */ + static inline ItemCreator GetCreator(uint8_t type) { return static_cast((type >> 3) & 0x01); } + /** The old item type now uses only the lower 12 bits in a backward compatible way) */ + static inline ItemType GetType(uint8_t type) { return static_cast(type & 0x1F); } + inline ItemType Type() const { return GetType(_type); } + + static inline bool GetFeature(uint8_t type, uint8_t feature) + { return ((type & feature) != 0); } + + static inline bool GetFeature_Weight(uint8_t type) + { return GetFeature(type, IF_WEIGHT); } + + static inline bool getFeature_UniqueId(uint8_t type) + { return GetFeature(type, IF_UNIQUEID); } + + static inline bool getFeature_Flags(uint8_t type) + { return GetFeature(type, IF_FLAGS); } + + inline bool Feature(uint8_t feature) const + { return GetFeature(_type, feature); } + + inline bool Feature_Weight() const + { return GetFeature_Weight(_type); } + + inline bool feature_UniqueId() const + { return getFeature_UniqueId(_type); } + + inline bool feature_Flags() const + { return getFeature_Flags(_type); } + + static inline bool getFlag(uint8_t flags, uint8_t flag) + { return ((flags & flag) != 0); } + + /** The number of operands for the operation. */ + uint32_t _arity; + /** The name of the specified index, or NULL if no index. */ + vespalib::string _indexName; + /** The specified search term. */ + vespalib::string _term; + +/** + * Overloaded constructor for ParseItem. Used primarily for + * the operators, or pharse without indexName. + * + * @param type The type of the ParseItem. + * @param arity The arity of the operation indicated by the ParseItem. + */ + ParseItem(ItemType type, int arity); + +/** + * Overloaded constructor for ParseItem. Used for PHRASEs. + * + * @param type The type of the ParseItem. + * @param arity The arity of the operation indicated by the ParseItem. + * @param idx The name of the index of the ParseItem. + */ + ParseItem(ItemType type, int arity, const char *index); + +/** + * Overloaded constructor for ParseItem. Used for TERMs. + * + * @param type The type of the ParseItem. + * @param idx The name of the index of the ParseItem. + * @param term The actual term string of the ParseItem. + */ + ParseItem(ItemType type, const vespalib::stringref & index, const char *term); + +/** + * Overloaded constructor for ParseItem. Used for TERMs without index. + * + * @param type The type of the ParseItem. + * @param term The actual term string of the ParseItem. + */ + ParseItem(ItemType type, const char *term); + +/** + * Destructor for ParseItem. + */ + ~ParseItem(); + +/** + * Set the value of the _term field. + * @param term The string to set the _term field to. + */ + void SetTerm(const char *term) { _term = term; } + +/** + * Set the value of the _indexName field. + * @param idx The string to set the _indexName field to. + */ + void SetIndex(const char *index) { _indexName = index; } + + /** + * Set the type of the operator. Use this with caution, + * as this changes the semantics of the item. + * + * @param type The new type. + */ + void SetType(ItemType type) { + _type = (_type & ~0x1F) | type; + } + + /** + * Get the unique id for this item. + * + * @return unique id for this item + **/ + uint32_t getUniqueId() const { return _uniqueId; } + + /** + * Encode the item in a binary buffer. + * @param buf Pointer to a buffer containing the encoded contents. + */ + void AppendBuffer(RawBuf *buf) const; + + size_t GetBufferLen(void) const; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp new file mode 100644 index 00000000000..146b4aeeff4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.cpp @@ -0,0 +1,354 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Creation date: 2000-05-15 + * Implementation of the simple query stack. + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ +#include +#include +#include +#include +#include +#include + +using vespalib::make_vespa_string; + +namespace search { + +SimpleQueryStack::SimpleQueryStack(void) + : _numItems(0), + _stack(NULL), + _FP_queryOK(true) +{ +} + +SimpleQueryStack::~SimpleQueryStack(void) +{ + delete _stack; +} + +void +SimpleQueryStack::Push(search::ParseItem *item) +{ + // Check if query OK for FirstPage + _FP_queryOK &= + ( item->Type() != search::ParseItem::ITEM_UNDEF + && item->Type() != search::ParseItem::ITEM_PAREN + ); + + + item->_next = _stack; + _stack = item; + + _numItems++; +} + +search::ParseItem * +SimpleQueryStack::Pop(void) +{ + search::ParseItem *item = _stack; + if (_stack != NULL) { + _numItems--; + _stack = _stack->_next; + item->_next = NULL; + } + return item; +} + +void +SimpleQueryStack::AppendBuffer(search::RawBuf *buf) const +{ + for (search::ParseItem *item = _stack; item != NULL; item = item->_next) { + item->AppendBuffer(buf); + } +} + +size_t +SimpleQueryStack::GetBufferLen(void) const +{ + size_t result; + + result = 0; + for (const search::ParseItem *item = _stack; + item != NULL; item = item->_next) { + result += item->GetBufferLen(); + } + + return result; +} + +uint32_t +SimpleQueryStack::GetSize(void) +{ + return _numItems; +} + +bool +SimpleQueryStack::_FP_isAllowed(void) +{ + return _FP_queryOK; +} + +class ItemName { +public: + ItemName() { + memset(_name, 'X', sizeof(_name)); + _name[search::ParseItem::ITEM_OR] = '|'; + _name[search::ParseItem::ITEM_WEAK_AND] = 'w'; + _name[search::ParseItem::ITEM_EQUIV] = 'E'; + _name[search::ParseItem::ITEM_AND] = '&'; + _name[search::ParseItem::ITEM_NOT] = '-'; + _name[search::ParseItem::ITEM_ANY] = '?'; + _name[search::ParseItem::ITEM_RANK] = '%'; + _name[search::ParseItem::ITEM_NEAR] = 'N'; + _name[search::ParseItem::ITEM_ONEAR] = 'O'; + _name[search::ParseItem::ITEM_NUMTERM] = '#'; + _name[search::ParseItem::ITEM_TERM] = 't'; + _name[search::ParseItem::ITEM_PURE_WEIGHTED_STRING] = 'T'; + _name[search::ParseItem::ITEM_PURE_WEIGHTED_LONG] = 'L'; + _name[search::ParseItem::ITEM_PREFIXTERM] = '*'; + _name[search::ParseItem::ITEM_SUBSTRINGTERM] = 's'; + _name[search::ParseItem::ITEM_EXACTSTRINGTERM] = 'e'; + _name[search::ParseItem::ITEM_SUFFIXTERM] = 'S'; + _name[search::ParseItem::ITEM_PHRASE] = '"'; + _name[search::ParseItem::ITEM_WEIGHTED_SET] = 'W'; + _name[search::ParseItem::ITEM_DOT_PRODUCT] = 'D'; + _name[search::ParseItem::ITEM_WAND] = 'A'; + _name[search::ParseItem::ITEM_PREDICATE_QUERY] = 'P'; + _name[search::ParseItem::ITEM_REGEXP] = '^'; + } + char operator[] (search::ParseItem::ItemType i) const { return _name[i]; } + char operator[] (size_t i) const { return _name[i]; } +private: + char _name[search::ParseItem::ITEM_MAX]; +}; + +static ItemName _G_ItemName; + +vespalib::string +SimpleQueryStack::StackbufToString(const vespalib::stringref &theBuf) +{ + vespalib::string result; + + /* + * This is a slightly bogus estimate of the size required. It should + * be enough in most cases, but it is possible to break it in rare and + * artificial circumstances. + * + * The simple operators use 8 bytes in the buffer. + * The string representation has 3 overhead chars, leaving 5 chars + * for the printed representation of the arity, i.e. < 10^5. + * + * The phrase operator uses 12 bytes + the length of the index string. + * The string representation has 5 overhead chars, leaving 7 chars + * for the total printed representation of the length of the index. + * If the index is 0, then the arity may use 6 chars, i.e. < 10^6. + * + * The term operator uses 12 bytes + the length of the index and term string. + * The string representation has 6 overhead chars, leaving 6 chars + * for the total printed representation of the index and term lengths. + * If for instance the index is 0, then the term must be shorter + * than 10^5 characters. + */ + + uint8_t rawtype = 0; + uint32_t type = 0, arity = 0, arg1 = 0; + const char *idxRef; + const char *termRef; + uint32_t idxRefLen; + uint32_t termRefLen; + + const char *p = theBuf.begin(); + const char *ep = theBuf.end(); + uint64_t tmp(0); + uint8_t flags(0); + while (p < ep) { + vespalib::string metaStr; + rawtype = *p++; + type = search::ParseItem::GetType(rawtype); + if (search::ParseItem::GetFeature_Weight(rawtype)) { + int64_t tmpLong(0); + p += vespalib::compress::Integer::decompress(tmpLong, p); + metaStr.append("(w:"); + metaStr.append(make_vespa_string("%ld", tmpLong)); + metaStr.append(")"); + } + if (search::ParseItem::getFeature_UniqueId(rawtype)) { + p += vespalib::compress::Integer::decompressPositive(tmp, p); + metaStr.append("(u:"); + metaStr.append(make_vespa_string("%ld", tmp)); + metaStr.append(")"); + } + if (search::ParseItem::getFeature_Flags(rawtype)) { + flags = *p++; + metaStr.append("(f:"); + metaStr.append(make_vespa_string("%d", flags)); + metaStr.append(")"); + } + if (search::ParseItem::GetCreator(flags) != search::ParseItem::CREA_ORIG) { + metaStr.append("(c:"); + metaStr.append(make_vespa_string("%d", search::ParseItem::GetCreator(flags))); + metaStr.append(")"); + } + + metaStr.append('/'); + result.append(metaStr); + + switch (type) { + case search::ParseItem::ITEM_OR: + case search::ParseItem::ITEM_AND: + case search::ParseItem::ITEM_EQUIV: + case search::ParseItem::ITEM_NOT: + case search::ParseItem::ITEM_RANK: + case search::ParseItem::ITEM_ANY: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + arity = tmp; + result.append(make_vespa_string("%c/%d~", _G_ItemName[type], arity)); + break; + case search::ParseItem::ITEM_WEAK_AND: + case search::ParseItem::ITEM_NEAR: + case search::ParseItem::ITEM_ONEAR: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + arity = tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + arg1 = tmp; + if (type == search::ParseItem::ITEM_WEAK_AND) { + p += vespalib::compress::Integer::decompressPositive(tmp, p); + idxRefLen = tmp; + idxRef = p; + p += idxRefLen; + result.append(make_vespa_string("%c/%d/%d/%d:%.*s~", _G_ItemName[type], arity, arg1, idxRefLen, idxRefLen, idxRef)); + } else { + result.append(make_vespa_string("%c/%d/%d~", _G_ItemName[type], arity, arg1)); + } + break; + + case search::ParseItem::ITEM_NUMTERM: + case search::ParseItem::ITEM_TERM: + case search::ParseItem::ITEM_PREFIXTERM: + case search::ParseItem::ITEM_SUBSTRINGTERM: + case search::ParseItem::ITEM_EXACTSTRINGTERM: + case search::ParseItem::ITEM_SUFFIXTERM: + case search::ParseItem::ITEM_REGEXP: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + idxRefLen = tmp; + idxRef = p; + p += idxRefLen; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + termRefLen = tmp; + termRef = p; + p += termRefLen; + result.append(make_vespa_string("%c/%d:%.*s/%d:%.*s~", _G_ItemName[type], + idxRefLen, idxRefLen, idxRef, + termRefLen, termRefLen, termRef)); + break; + case search::ParseItem::ITEM_PURE_WEIGHTED_STRING: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + termRefLen = tmp; + termRef = p; + p += termRefLen; + result.append(make_vespa_string("%c/%d:%.*s~", _G_ItemName[type], + termRefLen, termRefLen, termRef)); + break; + + case search::ParseItem::ITEM_PURE_WEIGHTED_LONG: + tmp = vespalib::nbostream::n2h(*reinterpret_cast(p)); + p += sizeof(uint64_t); + result.append(make_vespa_string("%c/%lu", _G_ItemName[type], tmp)); + break; + + case search::ParseItem::ITEM_PHRASE: + case search::ParseItem::ITEM_WEIGHTED_SET: + case search::ParseItem::ITEM_DOT_PRODUCT: + case search::ParseItem::ITEM_WAND: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + arity = tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + idxRefLen = tmp; + idxRef = p; + p += idxRefLen; + if (type == search::ParseItem::ITEM_WAND) { + p += vespalib::compress::Integer::decompressPositive(tmp, p); + uint32_t targetNumHits = tmp; + double scoreThreshold = vespalib::nbostream::n2h(*reinterpret_cast(p)); + p += sizeof(double); + double thresholdBoostFactor = vespalib::nbostream::n2h(*reinterpret_cast(p)); // thresholdBoostFactor + p += sizeof(double); + result.append(make_vespa_string("%c/%d/%d:%.*s(%u,%f,%f)~", _G_ItemName[type], arity, idxRefLen, + idxRefLen, idxRef, targetNumHits, scoreThreshold, thresholdBoostFactor)); + } else { + result.append(make_vespa_string("%c/%d/%d:%.*s~", _G_ItemName[type], arity, idxRefLen, + idxRefLen, idxRef)); + } + break; + + case search::ParseItem::ITEM_PREDICATE_QUERY: + { + idxRefLen = static_cast(ReadCompressedPositiveInt(p)); + idxRef = p; + p += idxRefLen; + size_t feature_count = ReadCompressedPositiveInt(p); + result.append(make_vespa_string( + "%c/%d:%.*s/%zu(", _G_ItemName[type], idxRefLen, idxRefLen, idxRef, feature_count)); + for (size_t i = 0; i < feature_count; ++i) { + vespalib::string key = ReadString(p); + vespalib::string value = ReadString(p); + uint64_t sub_queries = ReadUint64(p); + result.append(make_vespa_string("%s:%s:%" PRIx64, key.c_str(), value.c_str(), sub_queries)); + if (i < feature_count - 1) { + result.append(','); + } + } + + size_t range_feature_count = ReadCompressedPositiveInt(p); + result.append(make_vespa_string(")/%zu(", range_feature_count)); + for (size_t i = 0; i < range_feature_count; ++i) { + vespalib::string key = ReadString(p); + uint64_t value = ReadUint64(p); + uint64_t sub_queries = ReadUint64(p); + result.append(make_vespa_string("%s:%" PRIu64 ":%" PRIx64, key.c_str(), value, sub_queries)); + if (i < range_feature_count - 1) { + result.append(','); + } + } + result.append(")~"); + break; + } + + default: + abort(); + } + } + return result; +} + +vespalib::string +SimpleQueryStack::ReadString(const char *&p) +{ + uint64_t tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + vespalib::string s(p, tmp); + p += s.size(); + return s; +} + +uint64_t +SimpleQueryStack::ReadUint64(const char *&p) +{ + uint64_t l = static_cast(vespalib::nbostream::n2h(*(const uint64_t *)p)); + p += sizeof(uint64_t); + return l; +} + +uint64_t +SimpleQueryStack::ReadCompressedPositiveInt(const char *&p) +{ + uint64_t tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + return tmp; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h new file mode 100644 index 00000000000..fd6bced2704 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/simplequerystack.h @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Creation date: 2000-05-15 + * + * Declaration of the SimpleQueryStack + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ +#pragma once + +#include +#include +#include + +namespace search { + +/** + * A stack of ParseItems. + * + * A simple stack consisting of a list of ParseItems. + * It is able to generate a binary encoding of itself + * to a search::RawBuf. + */ +class SimpleQueryStack +{ + +private: + SimpleQueryStack(const SimpleQueryStack &); + SimpleQueryStack& operator=(const SimpleQueryStack &); + + static vespalib::string ReadString(const char *&p); + static uint64_t ReadUint64(const char *&p); + static uint64_t ReadCompressedPositiveInt(const char *&p); + + /** The number of items on the stack. */ + uint32_t _numItems; + + /** The top of the stack. + * Warning: FastQT_ProximityEmul currently assumes this is the head + * of a singly linked list (linked with _next). + */ + search::ParseItem *_stack; + + /** Is this query OK for FirstPage? */ + bool _FP_queryOK; + +public: + /** + * Constructor for SimpleQueryStack. + */ + SimpleQueryStack(void); + /** + * Destructor for SimpleQueryStack. + */ + ~SimpleQueryStack(void); + /** + * Push an item on the stack. + * @param item The search::ParseItem to push. + */ + void Push(search::ParseItem *item); + /** + * Pop an item of the stack. + * @return Pointer to the search::ParseItem poped, or NULL if stack is empty. + */ + search::ParseItem *Pop(void); + /** + * Top node of the stack. + * @return Pointer to the top search::ParseItem, or NULL if stack is empty. + */ + search::ParseItem *Top(void) { return _stack; } + + /** + * Encode the contents of the stack in a binary buffer. + * @param buf Pointer to a buffer containing the encoded contents. + */ + void AppendBuffer(search::RawBuf *buf) const; + + size_t GetBufferLen(void) const; + /** + * Return the number of items on the stack. + * @return The number of items on the stack. + */ + uint32_t GetSize(void); + /** + * Set the number of items on the stack. + * This can be used by QTs that change the stack + * under the hood. Use with care! + * @param numItems The number of items on the stack. + */ + void SetSize(uint32_t numItems) { _numItems = numItems; } + + /** + * Is it possible to run this query on FirstPage? + * @return true if ok + */ + bool _FP_isAllowed(void); + /** + * Make a string representation of the search::RawBuf representing a querystack. + * @param theBuf The querystack encoded buffer. + * @return a fresh string + */ + static vespalib::string StackbufToString(const vespalib::stringref &theBuf); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp new file mode 100644 index 00000000000..dac07640bd7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -0,0 +1,297 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Implementation of the simple query stack dump iterator. + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ +#include +#include +#include +#include +#include + +using search::query::PredicateQueryTerm; + +namespace search { + +SimpleQueryStackDumpIterator::SimpleQueryStackDumpIterator(const vespalib::stringref &buf) : + _buf(buf.begin()), + _bufEnd(buf.end()), + _bufLen(buf.size()), + _currPos(_buf), + _currEnd(_buf), + _currType(ParseItem::ITEM_UNDEF), + _currCreator(ParseItem::CREA_ORIG), + _currWeight(100), + _currUniqueId(0), + _currFlags(0), + _currArity(0), + _currArg1(0), + _currArg2(0), + _currArg3(0), + _predicate_query_term(), + _currIndexName(NULL), + _currIndexNameLen(0), + _currTerm(NULL), + _currTermLen(0), + _generatedTerm(), + _currNum(-1) +{ +} + +SimpleQueryStackDumpIterator::~SimpleQueryStackDumpIterator() +{ +} + +vespalib::string SimpleQueryStackDumpIterator::readString(const char *&p) { + if (p >= _bufEnd) throw false; + uint64_t tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + vespalib::string s(p, tmp); + p += s.size(); + return s; +} + +uint64_t SimpleQueryStackDumpIterator::readUint64(const char *&p) { + if (p + sizeof(uint64_t) > _bufEnd) throw false; + uint64_t l = vespalib::nbostream::n2h(*(const uint64_t *)p); + p += sizeof(uint64_t); + return l; +} + +uint64_t +SimpleQueryStackDumpIterator::readCompressedPositiveInt(const char *&p) { + if (p >= _bufEnd) throw false; + uint64_t tmp; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + return tmp; +} + +bool +SimpleQueryStackDumpIterator::next() +{ + if (_currEnd >= _bufEnd) + // End of buffer, so no more items available + return false; + + // Set the position to the previous end. If just starting, sets pos to _buf + _currPos = _currEnd; + + // Find an item at the current position + const char *p = _currPos; + uint8_t typefield = *p++; + _currType = ParseItem::GetType(typefield); + + uint64_t tmp(0); + if (ParseItem::GetFeature_Weight(typefield)) { + long tmpLong; + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompress(tmpLong, p); + _currWeight.setPercent(tmpLong); + if (p > _bufEnd) return false; + } else { + _currWeight.setPercent(100); + } + if (ParseItem::getFeature_UniqueId(typefield)) { + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currUniqueId = tmp; + } else { + _currUniqueId = 0; + } + if (ParseItem::getFeature_Flags(typefield)) { + if ((p + sizeof(uint32_t)) > _bufEnd) { + return false; + } + _currFlags = (uint8_t)*p++; + } else { + _currFlags = 0; + } + _currCreator = ParseItem::GetCreator(_currFlags); + + switch (_currType) { + case ParseItem::ITEM_OR: + case ParseItem::ITEM_EQUIV: + case ParseItem::ITEM_AND: + case ParseItem::ITEM_NOT: + case ParseItem::ITEM_RANK: + case ParseItem::ITEM_ANY: + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArity = tmp; + if (p > _bufEnd) return false; + _currArg1 = 0; + _currIndexName = NULL; + _currIndexNameLen = 0; + _currTerm = NULL; + _currTermLen = 0; + break; + + case ParseItem::ITEM_NEAR: + case ParseItem::ITEM_ONEAR: + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArity = tmp; + if (p > _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArg1 = tmp; + if (p > _bufEnd) return false; + _currIndexName = NULL; + _currIndexNameLen = 0; + _currTerm = NULL; + _currTermLen = 0; + break; + + case ParseItem::ITEM_WEAK_AND: + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArity = tmp; + if (p > _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArg1 = tmp; + if (p > _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currIndexNameLen = tmp; + if (p > _bufEnd) return false; + _currIndexName = p; + p += _currIndexNameLen; + if (p > _bufEnd) return false; + _currTerm = NULL; + _currTermLen = 0; + break; + + case ParseItem::ITEM_PURE_WEIGHTED_STRING: + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currTermLen = tmp; + if (p > _bufEnd) return false; + _currTerm = p; + p += _currTermLen; + if (p > _bufEnd) return false; + + _currArg1 = 0; + _currArity = 0; + break; + case ParseItem::ITEM_PURE_WEIGHTED_LONG: + if (p + 8 > _bufEnd) return false; + _generatedTerm.clear(); + _generatedTerm << vespalib::nbostream::n2h(*(const uint64_t *)p); + _currTerm = _generatedTerm.c_str(); + _currTermLen = _generatedTerm.size(); + p += 8; + if (p > _bufEnd) return false; + + _currArg1 = 0; + _currArity = 0; + break; + case ParseItem::ITEM_WORD_ALTERNATIVES: + try { + _currIndexNameLen = readCompressedPositiveInt(p); + _currIndexName = p; + p += _currIndexNameLen; + _currArity = readCompressedPositiveInt(p); + _currTerm = NULL; + _currTermLen = 0; + if (p > _bufEnd) return false; + } catch (...) { + return false; + } + break; + case ParseItem::ITEM_NUMTERM: + case ParseItem::ITEM_TERM: + case ParseItem::ITEM_PREFIXTERM: + case ParseItem::ITEM_SUBSTRINGTERM: + case ParseItem::ITEM_EXACTSTRINGTERM: + case ParseItem::ITEM_SUFFIXTERM: + case ParseItem::ITEM_REGEXP: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currIndexNameLen = tmp; + if (p > _bufEnd) return false; + _currIndexName = p; + p += _currIndexNameLen; + if (p > _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currTermLen = tmp; + if (p > _bufEnd) return false; + _currTerm = p; + p += _currTermLen; + if (p > _bufEnd) return false; + + _currArg1 = 0; + _currArity = 0; + break; + case ParseItem::ITEM_PREDICATE_QUERY: + try { + if (p >= _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currIndexNameLen = tmp; + if (p > _bufEnd) return false; + _currIndexName = p; + p += _currIndexNameLen; + _predicate_query_term.reset(new PredicateQueryTerm); + + size_t count = readCompressedPositiveInt(p); + for (size_t i = 0; i < count; ++i) { + vespalib::string key = readString(p); + vespalib::string value = readString(p); + uint64_t sub_queries = readUint64(p); + _predicate_query_term->addFeature(key, value, sub_queries); + } + count = readCompressedPositiveInt(p); + for (size_t i = 0; i < count; ++i) { + vespalib::string key = readString(p); + uint64_t value = readUint64(p); + uint64_t sub_queries = readUint64(p); + _predicate_query_term->addRangeFeature( + key, value, sub_queries); + } + if (p > _bufEnd) return false; + } catch (...) { + return false; + } + break; + + case ParseItem::ITEM_WEIGHTED_SET: + case ParseItem::ITEM_DOT_PRODUCT: + case ParseItem::ITEM_WAND: + case ParseItem::ITEM_PHRASE: + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currArity = tmp; + if (p > _bufEnd) return false; + p += vespalib::compress::Integer::decompressPositive(tmp, p); + _currIndexNameLen = tmp; + if (p > _bufEnd) return false; + _currIndexName = p; + p += _currIndexNameLen; + if (p > _bufEnd) return false; + if (_currType == ParseItem::ITEM_WAND) { + p += vespalib::compress::Integer::decompressPositive(tmp, p); // targetNumHits + _currArg1 = tmp; + _currArg2 = vespalib::nbostream::n2h(*reinterpret_cast(p)); // scoreThreshold + p += sizeof(double); + _currArg3 = vespalib::nbostream::n2h(*reinterpret_cast(p)); // thresholdBoostFactor + p += sizeof(double); + } else { + _currArg1 = 0; + } + _currTerm = NULL; + _currTermLen = 0; + break; + + default: + // Unknown item, so report that no more are available + return false; + break; + } + _currNum++; + _currEnd = p; + + // We should not have passed the buffer + assert(_currEnd <= _bufEnd); + + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h new file mode 100644 index 00000000000..451ea226d86 --- /dev/null +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h @@ -0,0 +1,165 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Declaration of the SimpleQueryStack dump iterator + * + * Copyright (C) 1997-2003 Fast Search & Transfer ASA + * Copyright (C) 2003 Overture Services Norway AS + * ALL RIGHTS RESERVED + */ +#pragma once + +#include +#include +#include +#include + +namespace search { +/** + * An iterator to be used on a buffer that is a stack dump + * of a SimpleQueryStack. + */ +class SimpleQueryStackDumpIterator +{ +private: + SimpleQueryStackDumpIterator(const SimpleQueryStackDumpIterator &); + SimpleQueryStackDumpIterator& operator=(const SimpleQueryStackDumpIterator &); + + /** Pointer to the start of the input buffer */ + const char *_buf; + /** Pointer to just past the input buffer */ + const char *_bufEnd; + /** Total length of the input buffer */ + size_t _bufLen; + + /** Pointer to the position of the current item in the buffer */ + const char *_currPos; + /** Pointer to after the current item */ + const char *_currEnd; + /** The type of the current item */ + ParseItem::ItemType _currType; + ParseItem::ItemCreator _currCreator; + /** Rank weight of current item **/ + query::Weight _currWeight; + /** unique id of the current item **/ + uint32_t _currUniqueId; + + /** flags of the current item **/ + uint32_t _currFlags; + + /** The arity of the current item */ + uint32_t _currArity; + /** The first argument of the current item (length of NEAR/ONEAR area for example) */ + uint32_t _currArg1; + /** The second argument of the current item (score threshold of WAND for example) */ + double _currArg2; + /** The third argument of the current item (threshold boost factor of WAND for example) */ + double _currArg3; + /** The predicate query specification */ + query::PredicateQueryTerm::UP _predicate_query_term; + /** Pointer to the position of the index name in the current item */ + const char *_currIndexName; + /** The length of the index name in the current item */ + size_t _currIndexNameLen; + /** Pointer to the position of the term in the current item */ + const char *_currTerm; + /** The length of the term in the current item */ + size_t _currTermLen; + vespalib::asciistream _generatedTerm; + + /** The number of the current item */ + int _currNum; + + vespalib::string readString(const char *&p); + uint64_t readUint64(const char *&p); + uint64_t readCompressedPositiveInt(const char *&p); + +public: + /** + * Make an iterator on a buffer. To get the first item, next + * must be called. + * + * @param buf A pointer to the buffer holding the stackdump + * @param buflen The length of the buffer in bytes + */ + SimpleQueryStackDumpIterator(const vespalib::stringref &buf); + ~SimpleQueryStackDumpIterator(); + + /** + * Moves to the next item in the buffer. + * + * @return true if there is a new item, false if there are no more items + * or if there was errors in extracting the next item. + */ + bool next(void); + + /** + * Get the number of the current item. + * + * @return The ordinal of the current item. -1 if at the start. + */ + int getNum(void) const { return _currNum; } + + /** + * Get the type of the current item. + * @return the type. + */ + ParseItem::ItemType getType(void) const { return _currType; } + /** + * Get the type of the current item. + * @return the type. + */ + ParseItem::ItemCreator getCreator(void) const { return _currCreator; } + + /** + * Get the rank weight of the current item. + * + * @return rank weight. + **/ + query::Weight GetWeight() const { return _currWeight; } + + /** + * Get the unique id of the current item. + * + * @return unique id of current item + **/ + uint32_t getUniqueId() const { return _currUniqueId; } + + /** + * Get the term index of the current item. + * + * @return term index of current item + **/ + uint32_t getTermIndex() const { return -1; } + + /** + * Get the flags of the current item. + * + * @return flags of current item + **/ + uint32_t getFlags() const { return _currFlags; } + + uint32_t getArity(void) const { return _currArity; } + + uint32_t getArg1(void) const { return _currArg1; } + + double getArg2() const { return _currArg2; } + + double getArg3() const { return _currArg3; } + + query::PredicateQueryTerm::UP getPredicateQueryTerm() + { return std::move(_predicate_query_term); } + + /** + * Get the type of the current item. + * @return the type. + */ + void getIndexName(const char **buf, size_t *buflen) const { *buf = _currIndexName; *buflen = _currIndexNameLen; } + /** + * Get the type of the current item. + * @return the type. + */ + void getTerm(const char **buf, size_t *buflen) const { *buf = _currTerm; *buflen = _currTermLen; } +}; + +} + diff --git a/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt new file mode 100644 index 00000000000..51465893356 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_predicate OBJECT + SOURCES + document_features_store.cpp + predicate_index.cpp + predicate_interval.cpp + predicate_interval_store.cpp + predicate_range_expander.cpp + predicate_tree_analyzer.cpp + predicate_tree_annotator.cpp + predicate_zero_constraint_posting_list.cpp + simple_index.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/predicate/OWNERS b/searchlib/src/vespa/searchlib/predicate/OWNERS new file mode 100644 index 00000000000..569bf1cc3a1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/OWNERS @@ -0,0 +1 @@ +bjorncs diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp new file mode 100644 index 00000000000..db5f1611d0a --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.cpp @@ -0,0 +1,293 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".document_features_store"); +#include + +#include "document_features_store.h" +#include "predicate_index.h" +#include "predicate_range_expander.h" +#include "predicate_tree_annotator.h" +#include +#include + +#include +#include + +using search::btree::BTreeNoLeafData; +using search::btree::EntryRef; +using vespalib::MMapDataBuffer; +using vespalib::stringref; +using std::unordered_map; +using std::vector; + +namespace search { +namespace predicate { + +void +DocumentFeaturesStore::setCurrent(uint32_t docId, FeatureVector *features) { + _currDocId = docId; + _currFeatures = features; +} + +DocumentFeaturesStore::DocumentFeaturesStore(uint32_t arity) + : _docs(), + _ranges(), + _word_store(), + _word_index(), + _currDocId(0), + _currFeatures(), + _numFeatures(0), + _numRanges(0), + _arity(arity) { +} + +namespace { +template +void deserializeWords(MMapDataBuffer &buffer, + memoryindex::WordStore &word_store, + WordIndex &word_index, + vector &word_refs) { + uint32_t word_list_size = buffer.readInt32(); + word_refs.reserve(word_list_size); + vector word; + KeyComp cmp(word_store, ""); + for (uint32_t i = 0; i < word_list_size; ++i) { + uint32_t size = buffer.readInt32(); + word.clear(); + word.resize(size); + buffer.readBytes(&word[0], size); + word_refs.push_back(word_store.addWord(stringref(&word[0], size))); + word_index.insert(word_refs.back(), BTreeNoLeafData(), cmp); + } +} + +template +void deserializeRanges(MMapDataBuffer &buffer, vector &word_refs, + RangeFeaturesMap &ranges, size_t &num_ranges) { + typedef typename RangeFeaturesMap::mapped_type::value_type Range; + uint32_t ranges_size = buffer.readInt32(); + for (uint32_t i = 0; i < ranges_size; ++i) { + uint32_t doc_id = buffer.readInt32(); + uint32_t range_count = buffer.readInt32(); + auto &range_vector = ranges[doc_id]; + range_vector.reserve(range_count); + for (uint32_t j = 0; j < range_count; ++j) { + Range range; + range.label_ref = word_refs[buffer.readInt32()]; + range.from = buffer.readInt64(); + range.to = buffer.readInt64(); + range_vector.push_back(range); + } + num_ranges += range_count; + } +} + +template +void deserializeDocs(MMapDataBuffer &buffer, DocumentFeaturesMap &docs, + size_t &num_features) { + uint32_t docs_size = buffer.readInt32(); + for (uint32_t i = 0; i < docs_size; ++i) { + uint32_t doc_id = buffer.readInt32(); + uint32_t feature_count = buffer.readInt32(); + auto &feature_vector = docs[doc_id]; + feature_vector.reserve(feature_count); + for (uint32_t j = 0; j < feature_count; ++j) { + feature_vector.push_back(buffer.readInt64()); + } + num_features += feature_count; + } +} +} // namespace + +DocumentFeaturesStore::DocumentFeaturesStore(MMapDataBuffer &buffer) + : DocumentFeaturesStore(0) { + _arity = buffer.readInt16(); + + vector word_refs; + deserializeWords(buffer, _word_store, _word_index, word_refs); + deserializeRanges(buffer, word_refs, _ranges, _numRanges); + deserializeDocs(buffer, _docs, _numFeatures); +} + +DocumentFeaturesStore::~DocumentFeaturesStore() { + _word_index.disableFreeLists(); + _word_index.disableElemHoldList(); + _word_index.getAllocator().freeze(); + _word_index.clear(); +} + +void DocumentFeaturesStore::insert(uint64_t featureId, uint32_t docId) { + assert(docId != 0); + if (_currDocId != docId) { + auto docsItr = _docs.find(docId); + if (docsItr == _docs.end()) { + docsItr = + _docs.insert(std::make_pair(docId, FeatureVector())).first; + } + setCurrent(docId, &docsItr->second); + } + _currFeatures->push_back(featureId); + ++_numFeatures; +} + +void DocumentFeaturesStore::insert(const PredicateTreeAnnotations &annotations, + uint32_t doc_id) { + assert(doc_id != 0); + if (!annotations.features.empty()) { + auto it = _docs.find(doc_id); + if (it == _docs.end()) { + it = _docs.insert(std::make_pair(doc_id, FeatureVector())).first; + } + size_t size = it->second.size(); + it->second.resize(size + annotations.features.size()); + memcpy(&it->second[size], &annotations.features[0], + annotations.features.size() * sizeof(annotations.features[0])); + _numFeatures += annotations.features.size(); + } + if (!annotations.range_features.empty()) { + auto it = _ranges.find(doc_id); + if (it == _ranges.end()) { + it = _ranges.insert(std::make_pair(doc_id, RangeVector())).first; + } + for (const auto &range : annotations.range_features) { + stringref word(range.label.data, range.label.size); + KeyComp cmp(_word_store, word); + auto word_it = _word_index.find(btree::EntryRef(), cmp); + btree::EntryRef ref; + if (word_it.valid()) { + ref = word_it.getKey(); + } else { + ref = _word_store.addWord(word); + _word_index.insert(ref, BTreeNoLeafData(), cmp); + } + it->second.push_back({ref, range.from, range.to}); + } + _numRanges += annotations.range_features.size(); + } +} + +DocumentFeaturesStore::FeatureSet +DocumentFeaturesStore::get(uint32_t docId) const { + FeatureSet features; + auto docsItr = _docs.find(docId); + if (docsItr != _docs.end()) { + features.insert(docsItr->second.begin(), docsItr->second.end()); + } + auto rangeItr = _ranges.find(docId); + if (rangeItr != _ranges.end()) { + for (auto range : rangeItr->second) { + const char *label = _word_store.getWord(range.label_ref); + PredicateRangeExpander::expandRange( + label, range.from, range.to, _arity, + std::inserter(features, features.end())); + } + } + return features; +} + +void DocumentFeaturesStore::remove(uint32_t doc_id) { + auto itr = _docs.find(doc_id); + if (itr != _docs.end()) { + _numFeatures = _numFeatures >= itr->second.size() ? + (_numFeatures - itr->second.size()) : 0; + _docs.erase(itr); + } + auto range_itr = _ranges.find(doc_id); + if (range_itr != _ranges.end()) { + _numRanges = _numRanges >= range_itr->second.size() ? + (_numRanges - range_itr->second.size()) : 0; + _ranges.erase(range_itr); + } + if (_currDocId == doc_id) { + setCurrent(0, NULL); + } +} + +search::MemoryUsage DocumentFeaturesStore::getMemoryUsage() const { + search::MemoryUsage usage; + usage.incAllocatedBytes(_docs.getMemoryConsumption()); + usage.incUsedBytes(_docs.getMemoryUsed()); + usage.incAllocatedBytes(_ranges.getMemoryConsumption()); + usage.incUsedBytes(_ranges.getMemoryUsed()); + // Note: allocated bytes in FeatureVector is slighly larger, but + // this should be good enough. + usage.incAllocatedBytes(_numFeatures * sizeof(uint64_t)); + usage.incUsedBytes(_numFeatures * sizeof(uint64_t)); + usage.incAllocatedBytes(_numRanges * sizeof(Range)); + usage.incUsedBytes(_numRanges * sizeof(Range)); + + usage.merge(_word_store.getMemoryUsage()); + usage.merge(_word_index.getMemoryUsage()); + + return usage; +} + +namespace { +template +void findUsedWords(const RangeFeaturesMap &ranges, + unordered_map &word_map, + vector &word_list) { + for (const auto &range_features_entry : ranges) { + for (const auto &range : range_features_entry.second) { + if (!word_map.count(range.label_ref.ref())) { + word_map[range.label_ref.ref()] = word_list.size(); + word_list.push_back(range.label_ref); + } + } + } +} + +void serializeWords(MMapDataBuffer &buffer, const vector &word_list, + const memoryindex::WordStore &word_store) { + buffer.writeInt32(word_list.size()); + for (const auto &word_ref : word_list) { + const char *word = word_store.getWord(word_ref); + uint32_t len = strlen(word); + buffer.writeInt32(len); + buffer.writeBytes(word, len); + } +} + +template +void serializeRanges(MMapDataBuffer &buffer, RangeFeaturesMap &ranges, + unordered_map &word_map) { + buffer.writeInt32(ranges.size()); + for (const auto &range_features_entry : ranges) { + buffer.writeInt32(range_features_entry.first); // doc id + buffer.writeInt32(range_features_entry.second.size()); + for (const auto &range : range_features_entry.second) { + buffer.writeInt32(word_map[range.label_ref.ref()]); + buffer.writeInt64(range.from); + buffer.writeInt64(range.to); + } + } +} + +template +void serializeDocs(MMapDataBuffer &buffer, DocumentFeaturesMap &docs) { + buffer.writeInt32(docs.size()); + for (const auto &doc_features_entry : docs) { + buffer.writeInt32(doc_features_entry.first); // doc id + buffer.writeInt32(doc_features_entry.second.size()); + for (const auto &feature : doc_features_entry.second) { + buffer.writeInt64(feature); + } + } +} +} // namespace + +void DocumentFeaturesStore::serialize(MMapDataBuffer &buffer) const { + vector word_list; + unordered_map word_map; + + findUsedWords(_ranges, word_map, word_list); + + buffer.writeInt16(_arity); + serializeWords(buffer, word_list, _word_store); + serializeRanges(buffer, _ranges, word_map); + serializeDocs(buffer, _docs); +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/document_features_store.h b/searchlib/src/vespa/searchlib/predicate/document_features_store.h new file mode 100644 index 00000000000..314e7347f27 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/document_features_store.h @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_tree_annotator.h" +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace predicate { + +/** + * Class used to track the {featureId, docId} pairs that are inserted + * into the btree memory index dictionary. These pairs are later used + * when removing all remains of a document from the feature posting + * lists of the dictionary. + */ +class DocumentFeaturesStore { + typedef memoryindex::WordStore WordStore; + struct Range { + btree::EntryRef label_ref; + int64_t from; + int64_t to; + }; + // Compares EntryRefs by their corresponding word in a WordStore. + // To find a word without knowing its EntryRef, set the word in + // the constructor and search for an illegal EntryRef. + class KeyComp { + const WordStore &_word_store; + const vespalib::string _word; + + const char *getWord(btree::EntryRef ref) const { + return ref.valid() ? _word_store.getWord(ref) : _word.c_str(); + } + + public: + KeyComp(const WordStore &word_store, const vespalib::stringref &word) + : _word_store(word_store), + _word(word) { + } + + bool operator()(const btree::EntryRef &lhs, + const btree::EntryRef &rhs) const { + return strcmp(getWord(lhs), getWord(rhs)) < 0; + } + }; + typedef vespalib::Array FeatureVector; + typedef vespalib::hash_map DocumentFeaturesMap; + typedef vespalib::Array RangeVector; + typedef vespalib::hash_map RangeFeaturesMap; + typedef btree::BTree WordIndex; + + DocumentFeaturesMap _docs; + RangeFeaturesMap _ranges; + WordStore _word_store; + WordIndex _word_index; + uint32_t _currDocId; + FeatureVector *_currFeatures; + size_t _numFeatures; + size_t _numRanges; + uint32_t _arity; + + void setCurrent(uint32_t docId, FeatureVector *features); + +public: + typedef std::unordered_set FeatureSet; + + DocumentFeaturesStore(uint32_t arity); + DocumentFeaturesStore(vespalib::MMapDataBuffer &buffer); + ~DocumentFeaturesStore(); + + void insert(uint64_t featureId, uint32_t docId); + void insert(const PredicateTreeAnnotations &annotations, uint32_t docId); + FeatureSet get(uint32_t docId) const; + void remove(uint32_t docId); + search::MemoryUsage getMemoryUsage() const; + + void serialize(vespalib::MMapDataBuffer &buffer) const; +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h new file mode 100644 index 00000000000..28e0e9a7fbe --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_bounds_posting_list.h @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_posting_list.h" +#include "predicate_index.h" + +namespace search { +namespace predicate { + +/** + * PredicatePostingList implementation for range query edge iterators (bounds) + * from PredicateIndex. + */ +template +class PredicateBoundsPostingList : public PredicatePostingList { + const PredicateIntervalStore &_interval_store; + Iterator _iterator; + const IntervalWithBounds *_current_interval; + uint32_t _interval_count; + uint32_t _value_diff; + IntervalWithBounds _single_buf; + +public: + PredicateBoundsPostingList(const PredicateIntervalStore &interval_store, + Iterator it, + uint32_t value_diff); + bool next(uint32_t doc_id) override; + bool nextInterval() override; + VESPA_DLL_LOCAL uint32_t getInterval() const override { + return _current_interval ? _current_interval->interval : 0; + } +}; + +template +PredicateBoundsPostingList::PredicateBoundsPostingList( + const PredicateIntervalStore &interval_store, + Iterator it, uint32_t value_diff) + : _interval_store(interval_store), + _iterator(it), + _current_interval(0), + _interval_count(0), + _value_diff(value_diff) { +} + +namespace { + bool checkBounds(uint32_t bounds, uint32_t diff) { + if (bounds & 0x80000000) { + return diff >= (bounds & 0x3fffffff); + } else if (bounds & 0x40000000) { + return diff < (bounds & 0x3fffffff); + } else { + return (diff >= (bounds >> 16)) && (diff < (bounds & 0xffff)); + } + } +} // namespace + +template +bool PredicateBoundsPostingList::next(uint32_t doc_id) { + if (_iterator.valid() && _iterator.getKey() <= doc_id) { + _iterator.linearSeek(doc_id + 1); + } + for (;; ++_iterator) { + if (!_iterator.valid()) { + return false; + } + _current_interval = _interval_store.get(_iterator.getData(), + _interval_count, &_single_buf); + if (checkBounds(_current_interval->bounds, _value_diff)) { + break; + } + if (nextInterval()) { + break; + } + } + setDocId(_iterator.getKey()); + return true; +} + +template +bool PredicateBoundsPostingList::nextInterval() { + uint32_t next_bounds; + do { + if (__builtin_expect(_interval_count == 1, true)) { + return false; + } + ++_current_interval; + --_interval_count; + next_bounds = _current_interval->bounds; + } while (!checkBounds(next_bounds, _value_diff)); + return true; +} + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_hash.h b/searchlib/src/vespa/searchlib/predicate/predicate_hash.h new file mode 100644 index 00000000000..47719a5c80e --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_hash.h @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace predicate { +/** + * Hash function coming from the RISE code base, used in boolean search. + */ +struct PredicateHash { + static uint64_t hash64(vespalib::stringref aKey) { + return hash64(aKey.data(), aKey.size()); + } + + static uint64_t hash64(const void *data, uint32_t origLen) { + int64_t a, b, c; + int offset; // Current offset into the entire key. + + const uint8_t *aKey = static_cast(data); + + // Set up the internal state + int anInitval = 0; + a = b = anInitval; // the previous hash value + c = 0x9e3779b97f4a7c13LL; // the golden ratio; an arbitrary value + offset = 0; + uint32_t len = origLen; + + // handle most of the key + while (len >= 24) { + a += ((0xffLL & aKey[offset+0]) + + ((0xffLL & aKey[offset+1])<<8) + + ((0xffLL & aKey[offset+2])<<16) + + ((0xffLL & aKey[offset+3])<<24) + + ((0xffLL & aKey[offset+4])<<32) + + ((0xffLL & aKey[offset+5])<<40) + + ((0xffLL & aKey[offset+6])<<48) + + ((0xffLL & aKey[offset+7])<<56)); + b += ((0xffLL & aKey[offset+8]) + + ((0xffLL & aKey[offset+9])<<8) + + ((0xffLL & aKey[offset+10])<<16) + + ((0xffLL & aKey[offset+11])<<24) + + ((0xffLL & aKey[offset+12])<<32) + + ((0xffLL & aKey[offset+13])<<40) + + ((0xffLL & aKey[offset+14])<<48) + + ((0xffLL & aKey[offset+15])<<56)); + c += ((0xffLL & aKey[offset+16]) + + ((0xffLL & aKey[offset+17])<<8) + + ((0xffLL & aKey[offset+18])<<16) + + ((0xffLL & aKey[offset+19])<<24) + + ((0xffLL & aKey[offset+20])<<32) + + ((0xffLL & aKey[offset+21])<<40) + + ((0xffLL & aKey[offset+22])<<48) + + ((0xffLL & aKey[offset+23])<<56)); + + // Mix. This arithmetic must match the mix below. + a -= b; a -= c; a ^= (((uint64_t) c)>>43); + b -= c; b -= a; b ^= (a<<9); + c -= a; c -= b; c ^= (((uint64_t) b)>>8); + a -= b; a -= c; a ^= (((uint64_t) c)>>38); + b -= c; b -= a; b ^= (a<<23); + c -= a; c -= b; c ^= (((uint64_t) b)>>5); + a -= b; a -= c; a ^= (((uint64_t) c)>>35); + b -= c; b -= a; b ^= (a<<49); + c -= a; c -= b; c ^= (((uint64_t) b)>>11); + a -= b; a -= c; a ^= (((uint64_t) c)>>12); + b -= c; b -= a; b ^= (a<<18); + c -= a; c -= b; c ^= (((uint64_t) b)>>22); + // End mix. + + offset += 24; len -= 24; + } + + // handle the last 23 bytes + c += origLen; + switch(len) { // all the case statements fall through + case 23: c+=((0xffLL & aKey[offset+22])<<56); + case 22: c+=((0xffLL & aKey[offset+21])<<48); + case 21: c+=((0xffLL & aKey[offset+20])<<40); + case 20: c+=((0xffLL & aKey[offset+19])<<32); + case 19: c+=((0xffLL & aKey[offset+18])<<24); + case 18: c+=((0xffLL & aKey[offset+17])<<16); + case 17: c+=((0xffLL & aKey[offset+16])<<8); + // the first byte of c is reserved for the length + case 16: b+=((0xffLL & aKey[offset+15])<<56); + case 15: b+=((0xffLL & aKey[offset+14])<<48); + case 14: b+=((0xffLL & aKey[offset+13])<<40); + case 13: b+=((0xffLL & aKey[offset+12])<<32); + case 12: b+=((0xffLL & aKey[offset+11])<<24); + case 11: b+=((0xffLL & aKey[offset+10])<<16); + case 10: b+=((0xffLL & aKey[offset+ 9])<<8); + case 9: b+=( 0xffLL & aKey[offset+ 8]); + case 8: a+=((0xffLL & aKey[offset+ 7])<<56); + case 7: a+=((0xffLL & aKey[offset+ 6])<<48); + case 6: a+=((0xffLL & aKey[offset+ 5])<<40); + case 5: a+=((0xffLL & aKey[offset+ 4])<<32); + case 4: a+=((0xffLL & aKey[offset+ 3])<<24); + case 3: a+=((0xffLL & aKey[offset+ 2])<<16); + case 2: a+=((0xffLL & aKey[offset+ 1])<<8); + case 1: a+=( 0xffLL & aKey[offset+ 0]); + // case 0: nothing left to add + } + + // Mix. This arithmetic must match the mix above. + a -= b; a -= c; a ^= (((uint64_t) c)>>43); + b -= c; b -= a; b ^= (a<<9); + c -= a; c -= b; c ^= (((uint64_t) b)>>8); + a -= b; a -= c; a ^= (((uint64_t) c)>>38); + b -= c; b -= a; b ^= (a<<23); + c -= a; c -= b; c ^= (((uint64_t) b)>>5); + a -= b; a -= c; a ^= (((uint64_t) c)>>35); + b -= c; b -= a; b ^= (a<<49); + c -= a; c -= b; c ^= (((uint64_t) b)>>11); + a -= b; a -= c; a ^= (((uint64_t) c)>>12); + b -= c; b -= a; b ^= (a<<18); + c -= a; c -= b; c ^= (((uint64_t) b)>>22); + // End mix. + + return static_cast(c); + } +}; +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp new file mode 100644 index 00000000000..5ca00d1863f --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.cpp @@ -0,0 +1,288 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_index"); +#include + +#include "predicate_index.h" + +#include "predicate_tree_annotator.h" +#include +#include "predicate_hash.h" +#include + +using search::btree::EntryRef; +using vespalib::MMapDataBuffer; +using std::vector; + +namespace search { +namespace predicate { + +const vespalib::string PredicateIndex::z_star_attribute_name("z-star"); +const uint64_t PredicateIndex::z_star_hash( + PredicateHash::hash64(PredicateIndex::z_star_attribute_name)); +const vespalib::string PredicateIndex::z_star_compressed_attribute_name("z-star-compressed"); +const uint64_t PredicateIndex::z_star_compressed_hash( + PredicateHash::hash64(PredicateIndex::z_star_compressed_attribute_name)); + +template <> +void PredicateIndex::addPosting( + uint64_t feature, uint32_t doc_id, EntryRef ref) { + _interval_index.addPosting(feature, doc_id, ref); +} +template <> +void PredicateIndex::addPosting( + uint64_t feature, uint32_t doc_id, EntryRef ref) { + _bounds_index.addPosting(feature, doc_id, ref); +} + +template +void PredicateIndex::indexDocumentFeatures( + uint32_t doc_id, const PredicateIndex::FeatureMap &interval_map) { + if (interval_map.empty()) { + return; + } + for (const auto &map_entry : interval_map) { + uint64_t feature = map_entry.first; + const auto &interval_list = map_entry.second; + btree::EntryRef ref = _interval_store.insert(interval_list); + assert(ref.valid()); + addPosting(feature, doc_id, ref); + _cache.set(feature, doc_id, true); + } +} + +namespace { +constexpr double THRESHOLD_USE_BIT_VECTOR_CACHE = 0.1; + +// PostingSerializer that writes intervals from interval store based +// on the EntryRef that is to be serialized. +template +class IntervalSerializer : public PostingSerializer { + const PredicateIntervalStore &_store; +public: + IntervalSerializer(const PredicateIntervalStore &store) : _store(store) {} + virtual void serialize(const EntryRef &ref, + vespalib::MMapDataBuffer &buffer) const { + uint32_t size; + IntervalT single_buf; + const IntervalT *interval = _store.get(ref, size, &single_buf); + buffer.writeInt16(size); + for (uint32_t i = 0; i < size; ++i) { + interval[i].serialize(buffer); + } + } +}; + +// PostingDeserializer that writes intervals to interval store and +// returns an EntryRef to be stored in the PredicateIndex. +template +class IntervalDeserializer : public PostingDeserializer { + PredicateIntervalStore &_store; +public: + IntervalDeserializer(PredicateIntervalStore &store) : _store(store) {} + virtual EntryRef deserialize(vespalib::MMapDataBuffer &buffer) { + std::vector intervals; + size_t size = buffer.readInt16(); + for (uint32_t i = 0; i < size; ++i) { + intervals.push_back(IntervalT::deserialize(buffer)); + } + return _store.insert(intervals); + } +}; + +} // namespace + +PredicateIndex::PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, + const DocIdLimitProvider &limit_provider, + const SimpleIndexConfig &simple_index_config, MMapDataBuffer &buffer, + SimpleIndexDeserializeObserver<> & observer, uint32_t version) + : _arity(0), + _generation_handler(generation_handler), + _limit_provider(limit_provider), + _interval_index(genHolder, limit_provider, simple_index_config), + _bounds_index(genHolder, limit_provider, simple_index_config), + _interval_store(), + _zero_constraint_docs(), + _features_store(buffer), + _cache(genHolder) +{ + _arity = buffer.readInt16(); + uint32_t zero_constraint_doc_count = buffer.readInt32(); + typename BTreeSet::Builder builder(_zero_constraint_docs.getAllocator()); + for (size_t i = 0; i < zero_constraint_doc_count; ++i) { + uint32_t raw_id = buffer.readInt32(); + uint32_t doc_id = version == 0 ? raw_id >> 6 : raw_id; + builder.insert(doc_id, btree::BTreeNoLeafData::_instance); + observer.notifyInsert(0, doc_id, 0); + } + _zero_constraint_docs.assign(builder); + IntervalDeserializer interval_deserializer(_interval_store); + _interval_index.deserialize(buffer, interval_deserializer, observer, version); + IntervalDeserializer + bounds_deserializer(_interval_store); + _bounds_index.deserialize(buffer, bounds_deserializer, observer, version); + commit(); +} + +void PredicateIndex::serialize(MMapDataBuffer &buffer) const { + _features_store.serialize(buffer); + buffer.writeInt16(_arity); + buffer.writeInt32(_zero_constraint_docs.size()); + for (auto it = _zero_constraint_docs.begin(); it.valid(); ++it) { + buffer.writeInt32(it.getKey()); + } + IntervalSerializer interval_serializer(_interval_store); + _interval_index.serialize(buffer, interval_serializer); + IntervalSerializer bounds_serializer(_interval_store); + _bounds_index.serialize(buffer, bounds_serializer); +} + +void PredicateIndex::onDeserializationCompleted() { + _interval_index.promoteOverThresholdVectors(); + _bounds_index.promoteOverThresholdVectors(); +} + +void PredicateIndex::indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations) { + indexDocumentFeatures(doc_id, annotations.interval_map); + indexDocumentFeatures(doc_id, annotations.bounds_map); + _features_store.insert(annotations, doc_id); +} + +void PredicateIndex::indexEmptyDocument(uint32_t doc_id) +{ + _zero_constraint_docs.insert(doc_id, btree::BTreeNoLeafData::_instance); +} + +namespace { +void removeFromIndex( + uint64_t feature, uint32_t doc_id, SimpleIndex &index, PredicateIntervalStore &interval_store) +{ + auto result = index.removeFromPostingList(feature, doc_id); + if (result.second) { // Posting was removed + auto ref = result.first; + assert(ref.valid()); + interval_store.remove(ref); + } +} + +class DocIdIterator : public PopulateInterface::Iterator { +public: + using BTreeIterator = SimpleIndex::BTreeIterator; + + DocIdIterator(BTreeIterator it) : _it(it) { } + int32_t getNext() override { + if (_it.valid()) { + uint32_t docId = _it.getKey(); + ++_it; + return docId; + } + return -1; + } +private: + BTreeIterator _it; +}; + +} // namespace + +void PredicateIndex::removeDocument(uint32_t doc_id) { + _zero_constraint_docs.remove(doc_id); + + auto features = _features_store.get(doc_id); + if (!features.empty()) { + for (auto feature : features) { + removeFromIndex(feature, doc_id, _interval_index, + _interval_store); + removeFromIndex(feature, doc_id, _bounds_index, + _interval_store); + } + _cache.removeIndex(doc_id); + } + _features_store.remove(doc_id); +} + +void PredicateIndex::commit() { + _interval_index.commit(); + _bounds_index.commit(); + _zero_constraint_docs.getAllocator().freeze(); +} + +void PredicateIndex::trimHoldLists(generation_t used_generation) { + _interval_index.trimHoldLists(used_generation); + _bounds_index.trimHoldLists(used_generation); + _interval_store.trimHoldLists(used_generation); + _zero_constraint_docs.getAllocator().trimHoldLists(used_generation); +} + +void PredicateIndex::transferHoldLists(generation_t generation) { + _interval_index.transferHoldLists(generation); + _bounds_index.transferHoldLists(generation); + _interval_store.transferHoldLists(generation); + _zero_constraint_docs.getAllocator().transferHoldLists(generation); +} + +MemoryUsage PredicateIndex::getMemoryUsage() const { + // TODO Include bit vector cache memory usage + MemoryUsage combined; + combined.merge(_interval_index.getMemoryUsage()); + combined.merge(_bounds_index.getMemoryUsage()); + combined.merge(_zero_constraint_docs.getMemoryUsage()); + combined.merge(_interval_store.getMemoryUsage()); + combined.merge(_features_store.getMemoryUsage()); + return combined; +} + +PopulateInterface::Iterator::UP +PredicateIndex::lookup(uint64_t key) const +{ + auto dictIterator = _interval_index.lookup(key); + if (dictIterator.valid()) { + auto it = _interval_index.getBTreePostingList(dictIterator.getData()); + if (it.valid()) { + return PopulateInterface::Iterator::UP(new DocIdIterator(it)); + } + } + return PopulateInterface::Iterator::UP(); +} + +void +PredicateIndex::populateIfNeeded(size_t doc_id_limit) +{ + if ( _cache.needPopulation()) { + _cache.populate(doc_id_limit, *this); + } +} + +BitVectorCache::KeySet +PredicateIndex::lookupCachedSet(const BitVectorCache::KeyAndCountSet & keys) const +{ + // Don't count documents using bit vector if combined length is less than threshold + uint64_t total_length = 0; + auto cached_keys = _cache.lookupCachedSet(keys); + for (const auto &p : keys) { + if (cached_keys.find(p.first) != cached_keys.end()) { + total_length += p.second; + } + } + double fill_ratio = total_length / static_cast(_limit_provider.getDocIdLimit()); + if (fill_ratio < THRESHOLD_USE_BIT_VECTOR_CACHE) { + cached_keys.clear(); + } + return cached_keys; +} + +void +PredicateIndex::computeCountVector(BitVectorCache::KeySet & keys, BitVectorCache::CountVector & v) const +{ + _cache.computeCountVector(keys, v); +} + + +void +PredicateIndex::adjustDocIdLimit(uint32_t docId) +{ + _cache.adjustDocIdLimit(docId); +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_index.h b/searchlib/src/vespa/searchlib/predicate/predicate_index.h new file mode 100644 index 00000000000..d2e5c1f268e --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_index.h @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "document_features_store.h" +#include "predicate_interval_store.h" +#include "simple_index.h" +#include +#include +#include +#include +#include "predicate_interval.h" + +namespace search { +namespace predicate { +class PredicateTreeAnnotations; + +/** + * PredicateIndex keeps an index of boolean constraints for use with + * the interval algorithm. It is the central component of + * PredicateAttribute, and PredicateBlueprint uses it to obtain + * posting lists for matching. + */ +class PredicateIndex : public PopulateInterface { + typedef SimpleIndex IntervalIndex; + typedef SimpleIndex BoundsIndex; + typedef btree::BTree BTreeSet; + template + using FeatureMap = std::unordered_map>; + using generation_t = vespalib::GenerationHandler::generation_t; + template + using optional = std::experimental::optional; + +public: + using ZeroConstraintDocs = BTreeSet::FrozenView; + typedef std::unique_ptr UP; + typedef vespalib::GenerationHandler GenerationHandler; + typedef vespalib::GenerationHolder GenerationHolder; + using BTreeIterator = SimpleIndex::BTreeIterator; + using VectorIterator = SimpleIndex::VectorIterator; + static const vespalib::string z_star_attribute_name; + static const uint64_t z_star_hash; + static const vespalib::string z_star_compressed_attribute_name; + static const uint64_t z_star_compressed_hash; + +private: + uint32_t _arity; + GenerationHandler &_generation_handler; + const DocIdLimitProvider &_limit_provider; + IntervalIndex _interval_index; + BoundsIndex _bounds_index; + PredicateIntervalStore _interval_store; + BTreeSet _zero_constraint_docs; + + DocumentFeaturesStore _features_store; + mutable BitVectorCache _cache; + + template + void addPosting(uint64_t feature, uint32_t doc_id, + btree::EntryRef ref); + + template + void indexDocumentFeatures(uint32_t doc_id, const FeatureMap &interval_map); + + PopulateInterface::Iterator::UP lookup(uint64_t key) const override; + +public: + PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, + const DocIdLimitProvider &limit_provider, + const SimpleIndexConfig &simple_index_config, uint32_t arity) + : _arity(arity), + _generation_handler(generation_handler), + _limit_provider(limit_provider), + _interval_index(genHolder, limit_provider, simple_index_config), + _bounds_index(genHolder, limit_provider, simple_index_config), + _interval_store(), + _zero_constraint_docs(), + _features_store(arity), + _cache(genHolder) { + } + // deserializes PredicateIndex from buffer. + // The observer can be used to gain some insight into what has been added to the index.. + PredicateIndex(GenerationHandler &generation_handler, GenerationHolder &genHolder, + const DocIdLimitProvider &limit_provider, + const SimpleIndexConfig &simple_index_config, vespalib::MMapDataBuffer &buffer, + SimpleIndexDeserializeObserver<> & observer, uint32_t version); + + void serialize(vespalib::MMapDataBuffer &buffer) const; + void onDeserializationCompleted(); + + void indexEmptyDocument(uint32_t doc_id); + void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations); + void removeDocument(uint32_t doc_id); + void commit(); + void trimHoldLists(generation_t used_generation); + void transferHoldLists(generation_t generation); + MemoryUsage getMemoryUsage() const; + + int getArity() const { return _arity; } + + const ZeroConstraintDocs getZeroConstraintDocs() const { + return _zero_constraint_docs.getFrozenView(); + } + + const IntervalIndex &getIntervalIndex() const { + return _interval_index; + } + + const BoundsIndex &getBoundsIndex() const { + return _bounds_index; + } + + const PredicateIntervalStore &getIntervalStore() const { + return _interval_store; + } + + void populateIfNeeded(size_t doc_id_limit); + BitVectorCache::KeySet lookupCachedSet(const BitVectorCache::KeyAndCountSet & keys) const; + void computeCountVector(BitVectorCache::KeySet & keys, BitVectorCache::CountVector & v) const; + + /* + * Adjust size of structures to have space for docId. + */ + void adjustDocIdLimit(uint32_t docId); +}; + +extern template class SimpleIndex; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp new file mode 100644 index 00000000000..d6e830f3a15 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "predicate_interval.h" +#include + +namespace search { +namespace predicate { + +std::ostream &operator<<(std::ostream &out, const Interval &i) { + std::ios_base::fmtflags flags = out.flags(); + out << "0x" << std::hex << i.interval; + out.flags(flags); + return out; +} + +std::ostream &operator<<(std::ostream &out, const IntervalWithBounds &i) { + std::ios_base::fmtflags flags = out.flags(); + out << "0x" << std::hex << i.interval << ", 0x" << i.bounds; + out.flags(flags); + return out; +} + +} // namespace predicate +} diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval.h new file mode 100644 index 00000000000..fede659582a --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace predicate { + +/** + * Stores a simple interval for the boolean constraint interval algorithm. + */ +struct Interval { + uint32_t interval; + + Interval() : interval(0) {} + Interval(uint32_t interval_) : interval(interval_) {} + + void serialize(vespalib::MMapDataBuffer &buffer) const { + buffer.writeInt32(interval); + } + static Interval deserialize(vespalib::MMapDataBuffer &buffer) { + return Interval{buffer.readInt32()}; + } + bool operator==(const Interval &other) const { + return interval == other.interval; + } + bool valid() const { + return interval != 0; + } +}; +std::ostream &operator<<(std::ostream &out, const Interval &i); + +/** + * Stores an interval and bounds information for edge cases of range + * searches in the boolean constraint interval algorithm. + */ +struct IntervalWithBounds { + uint32_t interval; + uint32_t bounds; + + IntervalWithBounds() : interval(0), bounds(0) {} + IntervalWithBounds(uint32_t interval_, uint32_t bounds_) : interval(interval_), bounds(bounds_) {} + + void serialize(vespalib::MMapDataBuffer &buffer) const { + buffer.writeInt32(interval); + buffer.writeInt32(bounds); + } + static IntervalWithBounds deserialize(vespalib::MMapDataBuffer &buffer) { + uint32_t interval = buffer.readInt32(); + uint32_t bounds = buffer.readInt32(); + return IntervalWithBounds{interval, bounds}; + } + bool operator==(const IntervalWithBounds &other) const { + return interval == other.interval && bounds == other.bounds; + } + bool valid() const { + return interval != 0 && bounds != 0; + } +}; +std::ostream &operator<<(std::ostream &out, const IntervalWithBounds &i); + +} // namespace predicate +} diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h new file mode 100644 index 00000000000..918f2e2f1df --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_posting_list.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_posting_list.h" +#include "predicate_index.h" + +namespace search { +namespace predicate { + +/** + * PredicatePostingList implementation for regular interval iterators + * from PredicateIndex. + */ +template +class PredicateIntervalPostingList : public PredicatePostingList { + const PredicateIntervalStore &_interval_store; + Iterator _iterator; + const Interval *_current_interval; + uint32_t _interval_count; + Interval _single_buf; + +public: + PredicateIntervalPostingList(const PredicateIntervalStore &interval_store, Iterator it); + bool next(uint32_t doc_id) override; + VESPA_DLL_LOCAL bool nextInterval() override { + if (_interval_count == 1) { + return false; + } else { + ++_current_interval; + --_interval_count; + return true; + } + } + VESPA_DLL_LOCAL uint32_t getInterval() const override { + return _current_interval ? _current_interval->interval : 0; + } +}; + +template +PredicateIntervalPostingList::PredicateIntervalPostingList( + const PredicateIntervalStore &interval_store, Iterator it) + : _interval_store(interval_store), + _iterator(it), + _current_interval(nullptr), + _interval_count(0) { +} + +template +bool PredicateIntervalPostingList::next(uint32_t doc_id) { + if (!_iterator.valid()) { + return false; + } + if (__builtin_expect(_iterator.getKey() <= doc_id, true)) { + _iterator.linearSeek(doc_id + 1); + if (!_iterator.valid()) { + return false; + } + } + _current_interval = + _interval_store.get(_iterator.getData(), _interval_count, &_single_buf); + setDocId(_iterator.getKey()); + return true; +} + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp new file mode 100644 index 00000000000..b4204010eec --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_interval_store"); +#include + +#include "predicate_interval_store.h" + +#include "predicate_index.h" +#include +#include +#include + +using search::btree::BufferState; +using search::btree::EntryRef; +using std::vector; + +namespace search { +namespace predicate { + +template +PredicateIntervalStore::Entry PredicateIntervalStore::allocNewEntry( + uint32_t type_id, uint32_t size) { + _store.ensureBufferCapacity(type_id, size); + uint32_t active_buffer_id = _store.getActiveBufferId(type_id); + btree::BufferState &state = _store.getBufferState(active_buffer_id); + assert(state._state == btree::BufferState::ACTIVE); + size_t old_size = state.size(); + T *buf = _store.getBufferEntry(active_buffer_id, old_size); + state.pushed_back(size); + return {RefType(old_size, active_buffer_id), buf}; +} + +PredicateIntervalStore::PredicateIntervalStore() + : _store(), + _size1Type(1, 1024u, RefType::offsetSize()), + _store_adapter(_store), + _ref_cache(_store_adapter) { + + // This order determines type ids. + _store.addType(&_size1Type); + + _store.initActiveBuffers(); +} + +PredicateIntervalStore::~PredicateIntervalStore() { + _store.dropBuffers(); +} + +// +// NOTE: The allocated entries are arrays of type uint32_t, but the +// entries are used as arrays of either Interval or IntervalWithBounds +// objects (PODs). These objects are memcpy'ed into the uint32_t +// arrays, and in the get() function they are typecast back to the +// object expected by the caller. Which type an entry has cannot be +// inferred from the EntryRef, but must be known by the caller. +// +// This saves us from having separate buffers for Intervals and +// IntervalWithBounds objects, since the caller knows the correct type +// anyway. +// +template +btree::EntryRef PredicateIntervalStore::insert( + const vector &intervals) { + const uint32_t size = entrySize() * intervals.size(); + if (size == 0) { + return btree::EntryRef(); + } + uint32_t *buffer; + btree::EntryRef ref; + if (size == 1 && intervals[0].interval <= RefCacheType::DATA_REF_MASK) { + return btree::EntryRef(intervals[0].interval); + } + uint32_t cached_ref = _ref_cache.find( + reinterpret_cast(&intervals[0]), size); + if (cached_ref) { + return cached_ref; + } + + if (size < RefCacheType::MAX_SIZE) { + auto entry = allocNewEntry(0, size); + buffer = entry.buffer; + ref = entry.ref.ref() | (size << RefCacheType::SIZE_SHIFT); + } else { + auto entry = allocNewEntry(0, size + 1); + buffer = entry.buffer; + ref = entry.ref.ref() | RefCacheType::SIZE_MASK; + *buffer++ = size; + } + memcpy(buffer, &intervals[0], size * sizeof(uint32_t)); + _ref_cache.insert(ref.ref()); + return ref; +} +// Explicit instantiation for relevant types. +template +EntryRef PredicateIntervalStore::insert(const vector &); +template +EntryRef PredicateIntervalStore::insert(const vector &); + +void PredicateIntervalStore::remove(EntryRef ref) { + if (ref.valid()) { + uint32_t buffer_id = RefType(ref).bufferId(); + if (buffer_id == 0) { // single interval optimization. + return; + } + // Don't remove anything. + + // BufferState &state = _store.getBufferState(buffer_id); + // uint32_t type_id = state.getTypeId(); + // uint32_t size = type_id <= MAX_ARRAY_SIZE ? type_id : 1; + // _store.holdElem(ref, size); + } +} + +void PredicateIntervalStore::trimHoldLists(generation_t used_generation) { + _store.trimHoldLists(used_generation); +} + +void PredicateIntervalStore::transferHoldLists(generation_t generation) { + _store.transferHoldLists(generation); +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h new file mode 100644 index 00000000000..585b9e5bcb9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_interval_store.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_ref_cache.h" +#include +#include +#include +#include + +namespace search { +namespace predicate { +class Interval; + +/** + * Stores interval entries in a memory-efficient way. + * It works with both Interval and IntervalWithBounds entries. + */ +class PredicateIntervalStore { + class DataStoreAdapter; + typedef PredicateRefCache RefCacheType; + typedef btree::DataStoreT> DataStoreType; + typedef DataStoreType::RefType RefType; + using generation_t = vespalib::GenerationHandler::generation_t; + + DataStoreType _store; + btree::BufferType _size1Type; + + class DataStoreAdapter { + const DataStoreType &_store; + public: + DataStoreAdapter(const DataStoreType &store) : _store(store) {} + const uint32_t *getBuffer(uint32_t ref) const { + RefType entry_ref(ref); + return _store.getBufferEntry( + entry_ref.bufferId(), entry_ref.offset()); + } + }; + DataStoreAdapter _store_adapter; + RefCacheType _ref_cache; + + // Return type for private allocation functions + template + struct Entry { + RefType ref; + T *buffer; + }; + + // Allocates a new entry in a datastore buffer. + template + Entry allocNewEntry(uint32_t type_id, uint32_t size); + // Returns the size of an interval entry in number of uint32_t. + template + static uint32_t entrySize() { return sizeof(IntervalT) / sizeof(uint32_t); } + +public: + PredicateIntervalStore(); + ~PredicateIntervalStore(); + + /** + * Inserts an array of intervals into the store. + * IntervalT is either Interval or IntervalWithBounds. + */ + template + btree::EntryRef insert(const std::vector &intervals); + + /** + * Removes an entry. The entry remains accessible until commit + * is called, and also as long as readers hold the current + * generation. + * + * Remove is currently disabled, as the ref cache is assumed to + * keep the total number of different entries low. + */ + void remove(btree::EntryRef ref); + + void trimHoldLists(generation_t used_generation); + + void transferHoldLists(generation_t generation); + + /** + * Return memory usage (only the data store is included) + */ + MemoryUsage getMemoryUsage() const { + return _store.getMemoryUsage(); + } + + /** + * Retrieves a list of intervals. + * IntervalT is either Interval or IntervalWithBounds. + * single_buf is a pointer to a single IntervalT, used by the + * single interval optimization. + */ + template + const IntervalT *get(btree::EntryRef btree_ref, + uint32_t &size_out, + IntervalT *single_buf) const + { + uint32_t size = btree_ref.ref() >> RefCacheType::SIZE_SHIFT; + RefType data_ref(btree_ref.ref() & RefCacheType::DATA_REF_MASK); + if (__builtin_expect(size == 0, true)) { // single-interval optimization + *single_buf = IntervalT(); + single_buf->interval = data_ref.ref(); + size_out = 1; + return single_buf; + } + const uint32_t *buf = + _store.getBufferEntry(data_ref.bufferId(), + data_ref.offset()); + if (size == RefCacheType::MAX_SIZE) { + size = *buf++; + } + size_out = size / entrySize(); + return reinterpret_cast(buf); + } +}; +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h new file mode 100644 index 00000000000..6ee20c26a79 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_posting_list.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +/** + * Interface for posting lists used by PredicateSearch. + */ +namespace search { +namespace predicate { + +class PredicatePostingList { + uint32_t _docId; + uint64_t _subquery; + +protected: + PredicatePostingList() + : _docId(0), + _subquery(UINT64_MAX) { + } + + void setDocId(uint32_t docId) { _docId = docId; } + +public: + using UP = std::unique_ptr; + + virtual ~PredicatePostingList() {} + + /* + * Moves to next document after the one supplied. + * Returns false if there were no more doc ids. + */ + virtual bool next(uint32_t docId) = 0; + + /* + * Moves to the next interval within the current doc id. + * Returns false if there were no more intervals for the current doc id. + */ + virtual bool nextInterval() = 0; + + uint32_t getDocId() const { return _docId; } + VESPA_DLL_LOCAL virtual uint32_t getInterval() const = 0; + + // Comes from the query that triggered inclusion of this posting list. + void setSubquery(uint64_t subquery) { _subquery = subquery; } + uint64_t getSubquery() const { return _subquery; } +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp new file mode 100644 index 00000000000..bb7e26f168f --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.cpp @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_range_expander"); +#include + +#include "predicate_range_expander.h" + +namespace search { +namespace predicate { + +void PredicateRangeExpander::debugLog(const char *fmt, const char *msg) { + LOG(debug, fmt, msg); +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h new file mode 100644 index 00000000000..b7d5f25e78b --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_expander.h @@ -0,0 +1,122 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_hash.h" +#include +#include + +namespace search { +namespace predicate { + +/** + * Helper class for expanding ranges. This functionality is ported from + * com.yahoo.vespa.indexinglanguage.predicate.ComplexNodeTransformer + * + * It is tested through document_features_store_test.cpp. + */ +class PredicateRangeExpander { + static void debugLog(const char *format_str, const char *msg); + + template + static void addEdgePartition(const char *label, uint64_t value, + bool negative, InsertIt out) { + vespalib::string to_hash = + vespalib::make_string("%s%s%" PRIu64, label, + negative? "=-" : "=", value); + debugLog("Hashing edge partition %s", to_hash.c_str()); + *out++ = PredicateHash::hash64(to_hash); + } + + template + static void addPartitions(const char *label, uint64_t part, + uint64_t part_size, uint32_t first, + uint32_t last, bool negative, InsertIt out) { + for (uint32_t i = first; i < last; ++i) { + uint64_t from = (part + i) * part_size; + uint64_t to = from + part_size - 1; + if (negative) { + std::swap(to, from); + } + vespalib::string to_hash = + vespalib::make_string("%s%s%" PRIu64 "-%" PRIu64, label, + negative? "=-" : "=", from, to); + debugLog("Hashing partition %s", to_hash.c_str()); + *out++ = PredicateHash::hash64(to_hash); + } + } + + template + static void makePartitions(const char *label, + uint64_t from, uint64_t to, + uint64_t step_size, int32_t arity, + bool negative, InsertIt out) { + uint32_t from_remainder = from % arity; + uint32_t to_remainder = to % arity; + uint64_t next_from = from - from_remainder; + uint64_t next_to = to - to_remainder; + if (next_from == next_to) { + addPartitions(label, next_from, step_size, + from_remainder, to_remainder, negative, out); + } else { + if (from_remainder > 0) { + addPartitions(label, next_from, step_size, + from_remainder, arity, negative, out); + from = next_from + arity; + } + addPartitions(label, next_to, step_size, + 0, to_remainder, negative, out); + makePartitions(label, from / arity, to / arity, + step_size * arity, arity, negative, out); + } + } + + template + static void partitionRange(const char *label, uint64_t from, uint64_t to, + uint32_t arity, bool negative, InsertIt out) { + uint32_t from_remainder = from % arity; + // operate on exclusive upper bound. + uint32_t to_remainder = (to + 1) % arity; + uint64_t from_val = from - from_remainder; + uint64_t to_val = to - to_remainder; + if (from_val == to_val + 1) { + addEdgePartition(label, from_val, negative, out); + return; + } else { + if (from_remainder != 0) { + addEdgePartition(label, from_val, negative, out); + from_val += arity; + } + if (to_remainder != 0) { + addEdgePartition(label, to_val + 1, negative, out); + } + } + makePartitions(label, from_val / arity, + (to_val - (arity - 1)) / arity + 1, + arity, arity, negative, out); + } + +public: + // Expands a range and returns the hash values through the insert iterator. + template + static void expandRange(const char *label, int64_t from, int64_t to, + uint32_t arity, InsertIt out) { + if (from < 0) { + if (to < 0) { + // Special case for to==-1. -X-0 means the same as -X-1, + // but is more efficient. + partitionRange(label, (to == -1 ? 0 : -to), -from, arity, + true, out); + } else { + partitionRange(label, 0, -from, arity, true, out); + partitionRange(label, 0, to, arity, false, out); + } + } else { + partitionRange(label, from, to, arity, false, out); + } + } +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h new file mode 100644 index 00000000000..2918c96a0c5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_range_term_expander.h @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace predicate { + +/** + * Helper class for expanding a point in a predicate range query to + * the hashed labels. Used by PredicateBlueprint. + */ +class PredicateRangeTermExpander { + int _arity; + uint16_t _max_positive_levels; + uint16_t _max_negative_levels; + int64_t _lower_bound; + int64_t _upper_bound; + +public: + PredicateRangeTermExpander(int arity, + int64_t lower_bound = LLONG_MIN, + int64_t upper_bound = LLONG_MAX) + : _arity(arity), + _max_positive_levels(1), + _max_negative_levels(1), + _lower_bound(lower_bound), + _upper_bound(upper_bound) { + uint64_t t = _upper_bound; + while ((t /= _arity) > 0) ++_max_positive_levels; + t = -_lower_bound; + while ((t /= _arity) > 0) ++_max_negative_levels; + } + + template + void expand(const vespalib::string &key, int64_t value, Handler &handler); +}; + + +/** + * Handler must implement handleRange(string) and handleEdge(string, uint64_t). + */ +template +void PredicateRangeTermExpander::expand( + const vespalib::string &key, int64_t signed_value, Handler &handler) { + if (signed_value < _lower_bound || signed_value > _upper_bound) { + LOG(warning, "Search outside bounds should have been rejected by " + "ValidatePredicateSearcher."); + return; + } + char buffer[21 * 2 + 3 + key.size()]; // 2 numbers + punctuation + key + int size; + int prefix_size = sprintf(buffer, "%s=", key.c_str()); + bool negative = signed_value < 0; + uint64_t value; + int max_levels; + if (negative) { + value = -signed_value; + buffer[prefix_size++] = '-'; + max_levels = _max_negative_levels; + } else { + value = signed_value; + max_levels = _max_positive_levels; + } + + int64_t edge_interval = (value / _arity) * _arity; + size = sprintf(buffer + prefix_size, "%lu", edge_interval); + handler.handleEdge(vespalib::stringref(buffer, prefix_size + size), + value - edge_interval); + + uint64_t level_size = _arity; + for (int i = 0; i < max_levels; ++i) { + uint64_t start = (value / level_size) * level_size; + if (negative) { + if (start + level_size - 1 > uint64_t(-LLONG_MIN)) { + break; + } + size = sprintf(buffer + prefix_size, "%lu-%lu", + start + level_size - 1, start); + } else { + if (start + level_size - 1 > LLONG_MAX) { + break; + } + size = sprintf(buffer + prefix_size, "%lu-%lu", + start, start + level_size - 1); + } + handler.handleRange(vespalib::stringref(buffer, prefix_size + size)); + level_size *= _arity; + if (!level_size) { // overflow + break; + } + } +} + + +} // namespace search::predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h b/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h new file mode 100644 index 00000000000..9aae296311a --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_ref_cache.h @@ -0,0 +1,160 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + + +#pragma once + +#include + +namespace search { +namespace predicate { + +/** + * Holds the data used in a cache lookup operation. + */ +struct CurrentZeroRef { + const uint32_t *buf; + uint32_t size; + void set(const uint32_t *b, uint32_t s) { + buf = b; + size = s; + } +}; + +/** + * Comparator (less) used in std::set. It holds a reference to a data + * store, from which it looks up buffers from the "data_ref"-part of the + * cached references. + */ +template +class RefCacheComparator { + enum { DATA_REF_BITS = 32 - SIZE_BITS, + DATA_REF_MASK = (1 << DATA_REF_BITS) - 1, + MAX_SIZE = (1 << SIZE_BITS) - 1, + SIZE_SHIFT = DATA_REF_BITS }; + const BufferStore &_store; + const CurrentZeroRef &_current_zero_ref; +public: + RefCacheComparator(const BufferStore &store, + const CurrentZeroRef &zero_ref) + : _store(store), + _current_zero_ref(zero_ref){ + } + + void getSizeAndBuf(uint32_t ref, uint32_t &size, + const uint32_t *&buf) const { + if (ref) { + size = ref >> SIZE_SHIFT; + buf = _store.getBuffer(ref & DATA_REF_MASK); + if (size == MAX_SIZE) { + size = *buf++; + } + } else { + size = _current_zero_ref.size; + buf = _current_zero_ref.buf; + } + } + + bool compareWithZeroRef(uint32_t lhs, uint32_t rhs) const { + uint32_t lhs_size; + const uint32_t *lhs_buf; + getSizeAndBuf(lhs, lhs_size, lhs_buf); + uint32_t rhs_size; + const uint32_t *rhs_buf; + getSizeAndBuf(rhs, rhs_size, rhs_buf); + + if (lhs_size != rhs_size) { + return lhs_size < rhs_size; + } + for (uint32_t i = 0; i < lhs_size; ++i) { + if (lhs_buf[i] != rhs_buf[i]) { + return lhs_buf[i] < rhs_buf[i]; + } + } + return false; + } + + bool operator() (uint32_t lhs, uint32_t rhs) const { + if (!lhs || !rhs) { + return compareWithZeroRef(lhs, rhs); + } + uint32_t lhs_size = lhs >> SIZE_SHIFT; + uint32_t rhs_size = rhs >> SIZE_SHIFT; + if (lhs_size != rhs_size) { + return lhs_size < rhs_size; + } + if (lhs == rhs) { + return false; + } + const uint32_t *lhs_buf = _store.getBuffer(lhs & DATA_REF_MASK); + const uint32_t *rhs_buf = _store.getBuffer(rhs & DATA_REF_MASK); + uint32_t size = lhs_size; + if (lhs_size == MAX_SIZE) { + size = lhs_buf[0] + 1; // Compare sizes and data in loop + // below. If actual size differs + // then loop will exit in first + // iteration. + } + for (uint32_t i = 0; i < size; ++i) { + if (lhs_buf[i] != rhs_buf[i]) { + return lhs_buf[i] < rhs_buf[i]; + } + } + return false; + } +}; + +/** + * Holds a set of refs and a reference to a datastore that is used to + * lookup data based on the "data_ref"-part of the ref. Each ref also + * uses the upper bits to hold the size of the data refered to. If the + * size is too large to represent by the allocated bits, the max size + * is used, and the actual size is stored in the first 32-bit value of + * the data buffer. + * + * Note that this class is inherently single threaded, and thus needs + * external synchronization if used from multiple threads. (Both + * insert and find) + */ +template +class PredicateRefCache { + typedef RefCacheComparator ComparatorType; + + mutable CurrentZeroRef _current_zero_ref; + std::set _ref_cache; + +public: + enum { DATA_REF_BITS = 32 - SIZE_BITS, + DATA_REF_MASK = (1 << DATA_REF_BITS) - 1, + MAX_SIZE = (1 << SIZE_BITS) - 1, + SIZE_SHIFT = DATA_REF_BITS, + SIZE_MASK = MAX_SIZE << SIZE_SHIFT}; + + PredicateRefCache(const BufferStore &store) + : _ref_cache(ComparatorType(store, _current_zero_ref)) { + } + + /** + * Inserts a ref into the cache. The ref refers to data already + * inserted in the underlying data store. + */ + uint32_t insert(uint32_t ref) { + assert(ref); + return *_ref_cache.insert(ref).first; + } + + /** + * Checks if a data sequence is already present in the + * cache. Returns the datastore ref, or 0 if not present. + */ + uint32_t find(const uint32_t *buf, uint32_t size) const { + _current_zero_ref.set(buf, size); + auto it = _ref_cache.find(0); + if (it != _ref_cache.end()) { + return *it; + } + return 0; + } +}; + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp new file mode 100644 index 00000000000..21878d5ca9e --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.cpp @@ -0,0 +1,168 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_tree_analyzer"); +#include + +#include "predicate_tree_analyzer.h" +#include +#include +#include +#include + +using document::Predicate; +using std::map; +using std::min; +using std::string; +using vespalib::slime::Inspector; +using vespalib::slime::Memory; + +namespace search { +namespace predicate { +namespace { +long getType(const Inspector &in, bool negated) { + long type = in[Predicate::NODE_TYPE].asLong(); + if (negated) { + if (type == Predicate::TYPE_CONJUNCTION) { + return Predicate::TYPE_DISJUNCTION; + } else if (type == Predicate::TYPE_DISJUNCTION) { + return Predicate::TYPE_CONJUNCTION; + } + } + return type; +} + +void createOrIncrease(map &counts, const string &key) { + auto it = counts.find(key); + if (it == counts.end()) { + counts.insert(make_pair(key, 1)); + } else { + ++(it->second); + } +} +} // namespace + +void PredicateTreeAnalyzer::traverseTree(const Inspector &in) { + switch (getType(in, _negated)) { + case Predicate::TYPE_NEGATION: + assert(in[Predicate::CHILDREN].children() == 1); + _negated = !_negated; + traverseTree(in[Predicate::CHILDREN][0]); + _negated = !_negated; + return; + case Predicate::TYPE_CONJUNCTION: { + int crumb_size = _crumbs.size(); + int size = 0; + for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) { + _crumbs.setChild(i, 'a'); + traverseTree(in[Predicate::CHILDREN][i]); + size += _size; + _size_map.insert(make_pair(_crumbs.getCrumb(), _size)); + _crumbs.resize(crumb_size); + } + _size = size; + return; + } + case Predicate::TYPE_DISJUNCTION: { + int crumb_size = _crumbs.size(); + int size = 0; + for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) { + _crumbs.setChild(i, 'o'); + traverseTree(in[Predicate::CHILDREN][i]); + size += _size; + _crumbs.resize(crumb_size); + } + _size = size; + return; + } + case Predicate::TYPE_FEATURE_SET: + if (_negated) { + _size = 2; + _has_not = true; + } else { + _size = 1; + Memory label_mem = in[Predicate::KEY].asString(); + string label(label_mem.data, label_mem.size); + label.push_back('='); + const size_t prefix_size = label.size(); + for (size_t i = 0; i < in[Predicate::SET].children(); ++i) { + Memory value = in[Predicate::SET][i].asString(); + label.resize(prefix_size); + label.append(value.data, value.size); + createOrIncrease(_key_counts, label); + } + } + return; + case Predicate::TYPE_FEATURE_RANGE: { + if (_negated) { + _size = 2; + _has_not = true; + } else { + _size = 1; + string key = in[Predicate::KEY].asString().make_string(); + createOrIncrease(_key_counts, key); + } + } + } // switch +} + +float PredicateTreeAnalyzer::findMinFeature(const Inspector &in) { + float min_feature = 0.0f; + switch (getType(in, _negated)) { + case Predicate::TYPE_CONJUNCTION: // sum of children + for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) { + min_feature += findMinFeature(in[Predicate::CHILDREN][i]); + } + return min_feature; + case Predicate::TYPE_DISJUNCTION: // min of children + min_feature = findMinFeature(in[Predicate::CHILDREN][0]); + for (size_t i = 1; i < in[Predicate::CHILDREN].children(); ++i) { + min_feature = min(min_feature, + findMinFeature(in[Predicate::CHILDREN][i])); + } + return min_feature; + case Predicate::TYPE_NEGATION: // == child + assert(in[Predicate::CHILDREN].children() == 1); + _negated = !_negated; + min_feature = findMinFeature(in[Predicate::CHILDREN][0]); + _negated = !_negated; + return min_feature; + case Predicate::TYPE_FEATURE_SET: { + if (_negated) { + return 0.0f; + } + Memory label_mem = in[Predicate::KEY].asString(); + string label(label_mem.data, label_mem.size); + label.push_back('='); + const size_t prefix_size = label.size(); + min_feature = 1.0f; + for (size_t i = 0; i < in[Predicate::SET].children(); ++i) { + Memory value = in[Predicate::SET][i].asString(); + label.resize(prefix_size); + label.append(value.data, value.size); + auto it = _key_counts.find(label); + assert(it != _key_counts.end()); + min_feature = min(min_feature, 1.0f / it->second); + } + return min_feature; + } + case Predicate::TYPE_FEATURE_RANGE: { + if (_negated) { + return 0.0f; + } + string key = in[Predicate::KEY].asString().make_string(); + auto it = _key_counts.find(key); + assert(it != _key_counts.end()); + return 1.0f / it->second; + } + } // switch + return 0.0f; +} + +PredicateTreeAnalyzer::PredicateTreeAnalyzer(const Inspector &in) : _has_not(false), _negated(false) { + traverseTree(in); + _min_feature = static_cast(ceilf(findMinFeature(in)) + (_has_not? 1.0 : 0.0)); +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h new file mode 100644 index 00000000000..35e91db718c --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_analyzer.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "tree_crumbs.h" +#include +#include +#include + +namespace search { +namespace predicate { + +/** + * Analyzes a predicate tree, in the form of a slime object, to find + * the value for min_feature (the minimum number of features required + * to find a match), and a map of sizes that is used when assigning + * intervals. + */ +class PredicateTreeAnalyzer { + std::map _key_counts; + std::map _size_map; + int _min_feature; + bool _has_not; + + bool _negated; + TreeCrumbs _crumbs; + int _size; + + // Fills _key_counts, _size_map, and _has_not. + void traverseTree(const vespalib::slime::Inspector &in); + float findMinFeature(const vespalib::slime::Inspector &in); + +public: + PredicateTreeAnalyzer(const vespalib::slime::Inspector &in); + + int getMinFeature() const { return _min_feature; } + int getSize() const { return _size; } + const std::map &getSizeMap() const { return _size_map; } +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp new file mode 100644 index 00000000000..c97f1f73848 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.cpp @@ -0,0 +1,256 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_tree_annotator"); +#include + +#include "predicate_tree_annotator.h" + +#include "predicate_index.h" +#include "predicate_range_expander.h" +#include "predicate_tree_analyzer.h" +#include "tree_crumbs.h" +#include +#include "predicate_hash.h" +#include +#include +#include + +using document::Predicate; +using std::map; +using std::string; +using vespalib::slime::Inspector; +using vespalib::slime::Memory; + +namespace search { +namespace predicate { + +using predicate::MIN_INTERVAL; +using predicate::MAX_INTERVAL; + +namespace { + +class PredicateTreeAnnotatorImpl { + uint32_t _begin; + uint32_t _end; + uint32_t _left_weight; + PredicateTreeAnnotations &_result; + uint64_t _zStar_hash; + bool _negated; + bool _final_range_used; + const std::map &_size_map; + TreeCrumbs _crumbs; + int64_t _lower_bound; + int64_t _upper_bound; + uint16_t _interval_range; + + + uint32_t makeMarker(uint32_t begin, uint32_t end) { + return (begin << 16) | end; + } + uint32_t getCEnd() { + if (!_final_range_used && _end == _interval_range) { + _final_range_used = true; + return _interval_range - 1; + } + return _left_weight + 1; + } + void addZstarIntervalIfNegated(uint32_t cEnd); + +public: + PredicateTreeAnnotatorImpl(const std::map &size_map, + PredicateTreeAnnotations &result, + int64_t lower, int64_t upper, uint16_t interval_range); + + void assignIntervalMarkers(const vespalib::slime::Inspector &in); +}; + +void PredicateTreeAnnotatorImpl::addZstarIntervalIfNegated(uint32_t cEnd) { + if (_negated) { + auto it = _result.interval_map.find(_zStar_hash); + if (it == _result.interval_map.end()) { + it = _result.interval_map.insert(make_pair( + _zStar_hash, std::vector())).first; + _result.features.push_back(_zStar_hash); + } + auto &intervals = it->second; + intervals.push_back(Interval{ makeMarker(cEnd, _begin - 1) }); + if (_end - cEnd != 1) { + intervals.push_back(Interval{ makeMarker(0, _end) }); + } + _left_weight += 1; + } +} + +PredicateTreeAnnotatorImpl::PredicateTreeAnnotatorImpl( + const map &size_map, + PredicateTreeAnnotations &result, + int64_t lower_bound, int64_t upper_bound, uint16_t interval_range) + : _begin(MIN_INTERVAL), + _end(interval_range), + _left_weight(0), + _result(result), + _zStar_hash(PredicateIndex::z_star_compressed_hash), + _negated(false), + _final_range_used(false), + _size_map(size_map), + _crumbs(), + _lower_bound(lower_bound), + _upper_bound(upper_bound), + _interval_range(interval_range) { +} + +long getType(const Inspector &in, bool negated) { + long type = in[Predicate::NODE_TYPE].asLong(); + if (negated) { + if (type == Predicate::TYPE_CONJUNCTION) { + return Predicate::TYPE_DISJUNCTION; + } else if (type == Predicate::TYPE_DISJUNCTION) { + return Predicate::TYPE_CONJUNCTION; + } + } + return type; +} + +void PredicateTreeAnnotatorImpl::assignIntervalMarkers(const Inspector &in) { + switch (getType(in, _negated)) { + case Predicate::TYPE_CONJUNCTION: { + int crumb_size = _crumbs.size(); + uint32_t curr = _begin; + size_t child_count = in[Predicate::CHILDREN].children(); + uint32_t begin = _begin; + uint32_t end = _end; + for (size_t i = 0; i < child_count; ++i) { + _crumbs.setChild(i, 'a'); + if (i == child_count - 1) { // Last child (may also be the only?) + _begin = curr; + _end = end; + assignIntervalMarkers(in[Predicate::CHILDREN][i]); + // No need to update/touch curr + } else if (i == 0) { // First child + auto it = _size_map.find(_crumbs.getCrumb()); + assert (it != _size_map.end()); + uint32_t child_size = it->second; + uint32_t next = _left_weight + child_size + 1; + _begin = curr; + _end = next - 1; + assignIntervalMarkers(in[Predicate::CHILDREN][i]); + curr = next; + } else { // Middle children + auto it = _size_map.find(_crumbs.getCrumb()); + assert (it != _size_map.end()); + uint32_t child_size = it->second; + uint32_t next = curr + child_size; + _begin = curr; + _end = next - 1; + assignIntervalMarkers(in[Predicate::CHILDREN][i]); + curr = next; + } + _crumbs.resize(crumb_size); + } + _begin = begin; + break; + } + case Predicate::TYPE_DISJUNCTION: { + // All OR children will have the same {begin, end} values, and + // the values will be same as that of the parent OR node + int crumb_size = _crumbs.size(); + for (size_t i = 0; i < in[Predicate::CHILDREN].children(); ++i) { + _crumbs.setChild(i, 'o'); + assignIntervalMarkers(in[Predicate::CHILDREN][i]); + _crumbs.resize(crumb_size); + } + break; + } + case Predicate::TYPE_FEATURE_SET: { + uint32_t cEnd = _negated? getCEnd() : 0; + Memory label_mem = in[Predicate::KEY].asString(); + string label(label_mem.data, label_mem.size); + label.push_back('='); + const size_t prefix_size = label.size(); + for (size_t i = 0; i < in[Predicate::SET].children(); ++i) { + Memory value = in[Predicate::SET][i].asString(); + label.resize(prefix_size); + label.append(value.data, value.size); + uint64_t hash = PredicateHash::hash64(label); + if (_result.interval_map.find(hash) + == _result.interval_map.end()) { + _result.features.push_back(hash); + } + _result.interval_map[hash].push_back( + { makeMarker(_begin, _negated? cEnd : _end) }); + } + addZstarIntervalIfNegated(cEnd); + _left_weight += 1; + break; + } + case Predicate::TYPE_FEATURE_RANGE: { + uint32_t cEnd = _negated? getCEnd() : 0; + for (size_t i = 0; i < in[Predicate::HASHED_PARTITIONS].children(); + ++i) { + uint64_t hash = in[Predicate::HASHED_PARTITIONS][i].asLong(); + _result.interval_map[hash].push_back( + { makeMarker(_begin, _negated? cEnd : _end) }); + } + const Inspector& in_hashed_edges = + in[Predicate::HASHED_EDGE_PARTITIONS]; + for (size_t i = 0; i < in_hashed_edges.children(); ++i){ + const Inspector& child = in_hashed_edges[i]; + uint64_t hash = child[Predicate::HASH].asLong(); + uint32_t payload = child[Predicate::PAYLOAD].asLong(); + _result.bounds_map[hash].push_back( + { makeMarker(_begin, _negated? cEnd : _end), payload }); + } + uint32_t hash_count = in[Predicate::HASHED_PARTITIONS].children() + + in_hashed_edges.children(); + if (hash_count < 3) { // three features takes more space than + // one stored range. + for (size_t i = 0; i < in[Predicate::HASHED_PARTITIONS].children(); + ++i) { + _result.features.push_back(in[Predicate::HASHED_PARTITIONS][i] + .asLong()); + } + for (size_t i = 0; i < in_hashed_edges.children(); ++i) { + _result.features.push_back(in_hashed_edges[i].asLong()); + } + } else { + bool has_min = in[Predicate::RANGE_MIN].valid(); + bool has_max = in[Predicate::RANGE_MAX].valid(); + _result.range_features.push_back( + {in[Predicate::KEY].asString(), + has_min? in[Predicate::RANGE_MIN].asLong() : _lower_bound, + has_max? in[Predicate::RANGE_MAX].asLong() : _upper_bound + }); + } + addZstarIntervalIfNegated(cEnd); + _left_weight += 1; + break; + } + case Predicate::TYPE_NEGATION: + _negated = !_negated; + assignIntervalMarkers(in[Predicate::CHILDREN][0]); + _negated = !_negated; + break; + } // switch +} +} // namespace + +void PredicateTreeAnnotator::annotate(const Inspector &in, + PredicateTreeAnnotations &result, + int64_t lower, int64_t upper) { + PredicateTreeAnalyzer analyzer(in); + uint32_t min_feature = static_cast(analyzer.getMinFeature()); + // Size is as interval range (tree size is lower bound for interval range) + int size = analyzer.getSize(); + assert(size <= UINT16_MAX && size > 0); + uint16_t interval_range = static_cast(size); + + PredicateTreeAnnotatorImpl + annotator(analyzer.getSizeMap(), result, lower, upper, interval_range); + annotator.assignIntervalMarkers(in); + result.min_feature = min_feature; + result.interval_range = interval_range; +} + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h new file mode 100644 index 00000000000..d2146aa66a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_tree_annotator.h @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include "predicate_interval.h" + +namespace vespalib { +namespace slime { class Inspector; } +} // namespace vespalib; + +namespace search { +namespace predicate { + +struct RangeFeature { + vespalib::slime::Memory label; + int64_t from; + int64_t to; +}; + +constexpr uint32_t MIN_INTERVAL = 0x0001; +constexpr uint32_t MAX_INTERVAL = 0xffff; + +struct PredicateTreeAnnotations { + PredicateTreeAnnotations(uint32_t mf=0, uint16_t ir=MAX_INTERVAL) + : min_feature(mf), interval_range(ir) {} + uint32_t min_feature; + uint16_t interval_range; + std::unordered_map> interval_map; + std::unordered_map> bounds_map; + + std::vector features; + std::vector range_features; +}; + +/** + * Annotates a predicate document, represented by a slime object, with + * intervals used for matching with the interval algorithm. + */ +struct PredicateTreeAnnotator { + static void annotate(const vespalib::slime::Inspector &in, + PredicateTreeAnnotations &result, + int64_t lower_bound=LLONG_MIN, + int64_t upper_bound=LLONG_MAX); +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp new file mode 100644 index 00000000000..ca08d65b82f --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_zero_constraint_posting_list"); +#include + +#include "predicate_zero_constraint_posting_list.h" + +namespace search { +namespace predicate { + +PredicateZeroConstraintPostingList::PredicateZeroConstraintPostingList(Iterator it) + : _iterator(it) {} + +bool PredicateZeroConstraintPostingList::next(uint32_t doc_id) { + if (_iterator.valid() && _iterator.getKey() <= doc_id) { + _iterator.linearSeek(doc_id + 1); + } + if (!_iterator.valid()) { + return false; + } + setDocId(_iterator.getKey()); + return true; +} + +} // namespace search::predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h new file mode 100644 index 00000000000..428901823c2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_posting_list.h" +#include "predicate_index.h" + +namespace search { +namespace predicate { + +/** + * PredicatePostingList implementation for zero constraint documents + * from PredicateIndex. + */ +class PredicateZeroConstraintPostingList : public PredicatePostingList { + using Iterator = PredicateIndex::ZeroConstraintDocs::Iterator; + Iterator _iterator; + +public: + PredicateZeroConstraintPostingList(Iterator it); + bool next(uint32_t doc_id) override; + bool nextInterval() override { return false; } + VESPA_DLL_LOCAL uint32_t getInterval() const override { return 0x00010001; } +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h new file mode 100644 index 00000000000..5aaf02f9ded --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "predicate_posting_list.h" +#include "predicate_index.h" + +namespace search { +namespace predicate { + +/** + * PredicatePostingList implementation for zstar iterators from + * PredicateIndex. + */ +template +class PredicateZstarCompressedPostingList : public PredicatePostingList { + const PredicateIntervalStore &_interval_store; + Iterator _iterator; + const Interval *_current_interval; + uint32_t _interval_count; + uint32_t _interval; + uint32_t _prev_interval; + + void setInterval(uint32_t interval) { _interval = interval; } +public: + PredicateZstarCompressedPostingList(const PredicateIntervalStore &store, Iterator it); + bool next(uint32_t doc_id) override; + bool nextInterval() override; + VESPA_DLL_LOCAL uint32_t getInterval() const override { return _interval; } +}; + +template +PredicateZstarCompressedPostingList::PredicateZstarCompressedPostingList( + const PredicateIntervalStore &interval_store, Iterator it) + : _interval_store(interval_store), + _iterator(it), + _current_interval(0), + _interval_count(0), + _interval(0), + _prev_interval(0) { +} + +template +bool PredicateZstarCompressedPostingList::next(uint32_t doc_id) { + if (_iterator.valid() && _iterator.getKey() <= doc_id) { + _iterator.linearSeek(doc_id + 1); + } + if (!_iterator.valid()) { + return false; + } + Interval single_buf; + _current_interval = + _interval_store.get(_iterator.getData(), _interval_count, &single_buf); + setDocId(_iterator.getKey()); + setInterval(_current_interval[0].interval); + _prev_interval = getInterval(); + return true; +} + +template +bool PredicateZstarCompressedPostingList::nextInterval() { + uint32_t next_interval = UINT32_MAX; + if (_interval_count > 1) { + next_interval = _current_interval[1].interval; + } + if (_prev_interval) { + if ((next_interval & 0xffff0000) == 0) { + setInterval(_prev_interval >> 16 | next_interval << 16); + ++_current_interval; + --_interval_count; + } else { + uint32_t value = _prev_interval >> 16; + setInterval((value + 1) << 16 | value); + } + _prev_interval = 0; + return true; + } else if (next_interval != UINT32_MAX) { + ++_current_interval; + --_interval_count; + setInterval(next_interval); + _prev_interval = next_interval; + return true; + } + return false; +} + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.cpp b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp new file mode 100644 index 00000000000..829423bdc86 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.cpp @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "simple_index.hpp" + +LOG_SETUP(".searchlib.simple_index"); + +namespace search { +namespace predicate { +namespace simpleindex { + +bool log_enabled() { + return LOG_WOULD_LOG(debug); +} + +void log_debug(vespalib::string &str) { + LOG(debug, str.c_str()); +} + +} // namespace simpleindex + +template class SimpleIndex; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.h b/searchlib/src/vespa/searchlib/predicate/simple_index.h new file mode 100644 index 00000000000..be6fc098682 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.h @@ -0,0 +1,261 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace predicate { + + +template +struct SimpleIndexDeserializeObserver { + virtual ~SimpleIndexDeserializeObserver() {} + virtual void notifyInsert(Key key, DocId docId, uint32_t k) = 0; +}; + +template +struct PostingSerializer { + virtual ~PostingSerializer() {} + virtual void serialize(const Posting &posting, + vespalib::MMapDataBuffer &buffer) const = 0; +}; + +template +struct PostingDeserializer { + virtual ~PostingDeserializer() {} + virtual Posting deserialize(vespalib::MMapDataBuffer &buffer) = 0; +}; + +struct DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const = 0; + virtual uint32_t getCommittedDocIdLimit() const = 0; + virtual ~DocIdLimitProvider() {} +}; + +struct SimpleIndexConfig { + static constexpr double DEFAULT_UPPER_DOCID_FREQ_THRESHOLD = 0.40; + static constexpr double DEFAULT_LOWER_DOCID_FREQ_THRESHOLD = + 0.8 * DEFAULT_UPPER_DOCID_FREQ_THRESHOLD; + static constexpr size_t DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD = 10000; + static constexpr size_t DEFAULT_LOWER_VECTOR_SIZE_THRESHOLD = + static_cast(0.8 * DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD); + static constexpr size_t DEFAULT_VECTOR_PRUNE_FREQUENCY = 20000; + static constexpr double DEFAULT_FOREACH_VECTOR_THRESHOLD = 0.25; + + // Create vector posting list if doc frequency is above + double upper_docid_freq_threshold = DEFAULT_UPPER_DOCID_FREQ_THRESHOLD; + // Remove vector posting list if doc frequency is below + double lower_docid_freq_threshold = DEFAULT_LOWER_DOCID_FREQ_THRESHOLD; + // Threshold to create vector posting list + size_t upper_vector_size_threshold = DEFAULT_UPPER_VECTOR_SIZE_THRESHOLD; + // Threshold to remove vector posting list + size_t lower_vector_size_threshold = DEFAULT_LOWER_VECTOR_SIZE_THRESHOLD; + // How often to prune vector when add is called + size_t vector_prune_frequency = DEFAULT_VECTOR_PRUNE_FREQUENCY; + // Use vector posting list in foreach_frozen if doc frequency is above + double foreach_vector_threshold = DEFAULT_FOREACH_VECTOR_THRESHOLD; + // Grow strategy for the posting vectors + GrowStrategy grow_strategy = GrowStrategy(); + + SimpleIndexConfig() {} + SimpleIndexConfig(double upper_docid_freq_threshold_, + double lower_docid_freq_threshold_, + size_t upper_vector_size_threshold_, + size_t lower_vector_size_threshold_, + size_t vector_prune_frequency_, + double foreach_vector_threshold_, + GrowStrategy grow_strategy_) + : upper_docid_freq_threshold(upper_docid_freq_threshold_), + lower_docid_freq_threshold(lower_docid_freq_threshold_), + upper_vector_size_threshold(upper_vector_size_threshold_), + lower_vector_size_threshold(lower_vector_size_threshold_), + vector_prune_frequency(vector_prune_frequency_), + foreach_vector_threshold(foreach_vector_threshold_), + grow_strategy(grow_strategy_) {} + SimpleIndexConfig(double upper_docid_freq_threshold_, GrowStrategy grow_strategy_) + : upper_docid_freq_threshold(upper_docid_freq_threshold_), + lower_docid_freq_threshold(upper_docid_freq_threshold_ * 0.80), + grow_strategy(grow_strategy_) {} +}; + +template +class PostingVectorIterator { + using PostingVector = attribute::RcuVectorBase; + + const Posting * const _vector; + const size_t _size; + size_t _pos; + Posting _data; + +public: + // Handle both move and copy construction + PostingVectorIterator(PostingVectorIterator&&) = default; + PostingVectorIterator& operator=(PostingVectorIterator&&) = default; + PostingVectorIterator(const PostingVectorIterator&) = default; + PostingVectorIterator& operator=(const PostingVectorIterator&) = default; + + explicit PostingVectorIterator(const PostingVector & vector, size_t size) : + _vector(&vector[0]), _size(size) { + assert(_size <= vector.size()); + linearSeek(1); + } + + bool valid() const { return _pos < _size; } + DocId getKey() const { return _pos; } + Posting getData() const { return _data; } + void linearSeek(DocId doc_id) { + while (doc_id < _size) { + const Posting &p = _vector[doc_id]; + if (p.valid()) { + _pos = doc_id; + _data = p; + return; + } + ++doc_id; + } + _pos = _size; + } + PostingVectorIterator & operator++() { + linearSeek(_pos + 1); + return *this; + } +}; + +/** + * SimpleIndex holds a dictionary of Keys and posting lists of DocIds + * with Posting information. + * + * Serialization / deserialization assumes that Key fits in 64 bits + * and DocId fits in 32 bits. + */ +template +class SimpleIndex { +public: + using Dictionary = btree::BTree; + using DictionaryIterator = typename Dictionary::ConstIterator; + using BTreeStore = btree::BTreeStore< + DocId, Posting, btree::NoAggregated, std::less, btree::BTreeDefaultTraits>; + using BTreeIterator = typename BTreeStore::ConstIterator; + using PostingVector = attribute::RcuVectorBase; + using VectorStore = btree::BTree, btree::NoAggregated>; + using VectorIterator = PostingVectorIterator; + +private: + using GenerationHolder = vespalib::GenerationHolder; + using generation_t = vespalib::GenerationHandler::generation_t; + template + using optional = std::experimental::optional; + + Dictionary _dictionary; + BTreeStore _btree_posting_lists; + VectorStore _vector_posting_lists; + GenerationHolder &_generation_holder; + uint32_t _insert_remove_counter = 0; + const SimpleIndexConfig _config; + const DocIdLimitProvider &_limit_provider; + + void insertIntoPosting(btree::EntryRef &ref, Key key, DocId doc_id, const Posting &posting); + void insertIntoVectorPosting(btree::EntryRef ref, Key key, DocId doc_id, const Posting &posting); + void removeFromVectorPostingList(btree::EntryRef ref, Key key, DocId doc_id); + void pruneBelowThresholdVectors(); + void createVectorIfOverThreshold(btree::EntryRef ref, Key key); + bool removeVectorIfBelowThreshold(btree::EntryRef ref, typename VectorStore::Iterator &it); + + void logVector(const char *action, Key key, size_t document_count, + double ratio, size_t vector_length) const; + double getDocumentRatio(size_t document_count, uint32_t doc_id_limit) const; + size_t getDocumentCount(btree::EntryRef ref) const; + bool shouldCreateVectorPosting(size_t size, double ratio) const; + bool shouldRemoveVectorPosting(size_t size, double ratio) const; + size_t getVectorPostingSize(const PostingVector &vector) const { + return std::min(vector.size(), + static_cast(_limit_provider.getCommittedDocIdLimit())); + } + +public: + SimpleIndex(GenerationHolder &generation_holder, const DocIdLimitProvider &provider) : + SimpleIndex(generation_holder, provider, SimpleIndexConfig()) {} + SimpleIndex(GenerationHolder &generation_holder, + const DocIdLimitProvider &provider, const SimpleIndexConfig &config) + : _generation_holder(generation_holder), _config(config), _limit_provider(provider) {} + ~SimpleIndex(); + + void serialize(vespalib::MMapDataBuffer &buffer, + const PostingSerializer &serializer) const; + void deserialize(vespalib::MMapDataBuffer &buffer, + PostingDeserializer &deserializer, + SimpleIndexDeserializeObserver &observer, uint32_t version); + + void addPosting(Key key, DocId doc_id, const Posting &posting); + std::pair removeFromPostingList(Key key, DocId doc_id); + // Call promoteOverThresholdVectors() after deserializing a SimpleIndex + // (and after doc id limits values are determined) to promote posting lists to vectors. + void promoteOverThresholdVectors(); + void commit(); + void trimHoldLists(generation_t used_generation); + void transferHoldLists(generation_t generation); + MemoryUsage getMemoryUsage() const; + template + void foreach_frozen_key(btree::EntryRef ref, Key key, FunctionType func) const; + + DictionaryIterator lookup(Key key) const { + return _dictionary.getFrozenView().find(key); + } + + size_t getPostingListSize(btree::EntryRef ref) const { + return _btree_posting_lists.frozenSize(ref); + } + + BTreeIterator getBTreePostingList(btree::EntryRef ref) const { + return _btree_posting_lists.beginFrozen(ref); + } + + optional getVectorPostingList(Key key) const { + auto it = _vector_posting_lists.getFrozenView().find(key); + if (it.valid()) { + auto &vector = *it.getData(); + size_t size = getVectorPostingSize(vector); + return optional(VectorIterator(vector, size)); + } + return optional(); + + } +}; + +template +template +void SimpleIndex::foreach_frozen_key( + btree::EntryRef ref, Key key, FunctionType func) const { + auto it = _vector_posting_lists.getFrozenView().find(key); + double ratio = getDocumentRatio(getDocumentCount(ref), _limit_provider.getDocIdLimit()); + if (it.valid() && ratio > _config.foreach_vector_threshold) { + auto &vector = *it.getData(); + size_t size = getVectorPostingSize(vector); + for (DocId doc_id = 1; doc_id < size; ++doc_id) { + if (vector[doc_id].valid()) { + func(doc_id); + } + } + } else { + _btree_posting_lists.foreach_frozen_key(ref, func); + } +} + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/predicate/simple_index.hpp b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp new file mode 100644 index 00000000000..10ba3e79a02 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/simple_index.hpp @@ -0,0 +1,315 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "simple_index.h" + +namespace search { +namespace predicate { + +namespace simpleindex { + bool log_enabled(); + void log_debug(vespalib::string &str); +} + +template +void SimpleIndex::insertIntoPosting( + btree::EntryRef &ref, Key key, DocId doc_id, const Posting &posting) { + bool ok = _btree_posting_lists.insert(ref, doc_id, posting); + if (!ok) { + _btree_posting_lists.remove(ref, doc_id); + ok = _btree_posting_lists.insert(ref, doc_id, posting); + } + assert(ok); + insertIntoVectorPosting(ref, key, doc_id, posting); + pruneBelowThresholdVectors(); +} + +template +void SimpleIndex::insertIntoVectorPosting( + btree::EntryRef ref, Key key, DocId doc_id, const Posting &posting) { + assert(doc_id < _limit_provider.getDocIdLimit()); + auto it = _vector_posting_lists.find(key); + if (it.valid()) { + auto &vector = *it.getData(); + vector.ensure_size(doc_id + 1); + vector[doc_id] = posting; + } else { + createVectorIfOverThreshold(ref, key); + } +}; + +template +SimpleIndex::~SimpleIndex() { + _btree_posting_lists.disableFreeLists(); + _btree_posting_lists.disableElemHoldList(); + + for (auto it = _dictionary.begin(); it.valid(); ++it) { + btree::EntryRef ref(it.getData()); + if (ref.valid()) { + _btree_posting_lists.clear(ref); + } + } + + _vector_posting_lists.disableFreeLists(); + _vector_posting_lists.disableElemHoldList(); + _vector_posting_lists.clear(); + _vector_posting_lists.getAllocator().freeze(); + _vector_posting_lists.getAllocator().clearHoldLists(); + + _dictionary.disableFreeLists(); + _dictionary.disableElemHoldList(); + _dictionary.clear(); + _dictionary.getAllocator().freeze(); + _dictionary.getAllocator().clearHoldLists(); + + _btree_posting_lists.clearBuilder(); + _btree_posting_lists.freeze(); + _btree_posting_lists.clearHoldLists(); +} + +template +void SimpleIndex::serialize( + vespalib::MMapDataBuffer &buffer, + const PostingSerializer &serializer) const { + assert(sizeof(Key) <= sizeof(uint64_t)); + assert(sizeof(DocId) <= sizeof(uint32_t)); + buffer.writeInt32(_dictionary.size()); + for (auto it = _dictionary.begin(); it.valid(); ++it) { + btree::EntryRef ref = it.getData(); + buffer.writeInt32(_btree_posting_lists.size(ref)); // 0 if !valid() + auto posting_it = _btree_posting_lists.begin(ref); + if (!posting_it.valid()) + continue; + buffer.writeInt64(it.getKey()); // Key + for (; posting_it.valid(); ++posting_it) { + buffer.writeInt32(posting_it.getKey()); // DocId + serializer.serialize(posting_it.getData(), buffer); + } + } +} + +template +void SimpleIndex::deserialize( + vespalib::MMapDataBuffer &buffer, + PostingDeserializer &deserializer, + SimpleIndexDeserializeObserver &observer, uint32_t version) { + typename Dictionary::Builder builder(_dictionary.getAllocator()); + uint32_t size = buffer.readInt32(); + std::vector> postings; + for (size_t i = 0; i < size; ++i) { + uint32_t posting_size = buffer.readInt32(); + if (!posting_size) + continue; + postings.clear(); + Key key = buffer.readInt64(); + for (size_t j = 0; j < posting_size; ++j) { + DocId doc_id; + if (version == 0) { + DocId raw_id = buffer.readInt32(); + doc_id = raw_id >> 6; + uint8_t k = static_cast(raw_id & 0x3f); + uint8_t min_feature = k == 0 ? k : k + 1; + observer.notifyInsert(key, doc_id, min_feature); + } else { + doc_id = buffer.readInt32(); + // min-feature is stored in separate data structure for version > 0 + observer.notifyInsert(key, doc_id, 0); + } + postings.emplace_back(doc_id, deserializer.deserialize(buffer)); + } + btree::EntryRef ref; + _btree_posting_lists.apply(ref, &postings[0], &postings[postings.size()], + 0, 0); + builder.insert(key, ref); + } + _dictionary.assign(builder); + commit(); +} + +template +void SimpleIndex::addPosting(Key key, DocId doc_id, + const Posting &posting) { + auto iter = _dictionary.find(key); + btree::EntryRef ref; + if (iter.valid()) { + ref = iter.getData(); + insertIntoPosting(ref, key, doc_id, posting); + if (ref != iter.getData()) { + std::atomic_thread_fence(std::memory_order_release); + iter.writeData(ref); + } + } else { + insertIntoPosting(ref, key, doc_id, posting); + _dictionary.insert(key, ref); + } +} + +template +std::pair +SimpleIndex::removeFromPostingList(Key key, DocId doc_id) { + auto dict_it = _dictionary.find(key); + if (!dict_it.valid()) { + return std::make_pair(Posting(), false); + } + auto ref = dict_it.getData(); + assert(ref.valid()); + auto posting_it = _btree_posting_lists.begin(ref); + assert(posting_it.valid()); + + if (posting_it.getKey() < doc_id) { + posting_it.binarySeek(doc_id); + } + if (!posting_it.valid() || posting_it.getKey() != doc_id) { + return std::make_pair(Posting(), false); + } + + Posting posting = posting_it.getData(); + btree::EntryRef original_ref(ref); + _btree_posting_lists.remove(ref, doc_id); + removeFromVectorPostingList(ref, key, doc_id); + if (!ref.valid()) { // last posting was removed + _dictionary.remove(key); + } else if (ref != original_ref) { // ref changed. update dictionary. + std::atomic_thread_fence(std::memory_order_release); + dict_it.writeData(ref); + } + return std::make_pair(posting, true); +} + +template +void SimpleIndex::removeFromVectorPostingList( + btree::EntryRef ref, Key key, DocId doc_id) { + auto it = _vector_posting_lists.find(key); + if (it.valid()) { + if (!removeVectorIfBelowThreshold(ref, it)) { + (*it.getData())[doc_id] = Posting(); + } + } +}; + +template +void SimpleIndex::pruneBelowThresholdVectors() { + // Check if it is time to prune any vector postings + if (++_insert_remove_counter % _config.vector_prune_frequency > 0) return; + + for (auto posting_it = _vector_posting_lists.begin(); posting_it.valid();) { + Key key = posting_it.getKey(); + auto dict_it = _dictionary.find(key); + assert(dict_it.valid()); + if (!removeVectorIfBelowThreshold(dict_it.getData(), posting_it)) { + ++posting_it; + } + } +}; + +template +void SimpleIndex::promoteOverThresholdVectors() { + for (auto it = _dictionary.begin(); it.valid(); ++it) { + Key key = it.getKey(); + if (!_vector_posting_lists.find(key).valid()) { + createVectorIfOverThreshold(it.getData(), key); + } + } +} + +template +void SimpleIndex::logVector( + const char *action, Key key, size_t document_count, double ratio, size_t vector_length) const { + if (!simpleindex::log_enabled()) return; + auto msg = vespalib::make_string( + "%s vector for key '%016" PRIx64 "' with length %zu. Contains %zu documents " + "(doc id limit %" PRIu32", committed doc id limit %" PRIu32 ", ratio %f, " + "vector count %zu)", + action, key, vector_length, document_count, _limit_provider.getDocIdLimit(), + _limit_provider.getCommittedDocIdLimit(), ratio, _vector_posting_lists.size()); + simpleindex::log_debug(msg); +} + +template +void SimpleIndex::createVectorIfOverThreshold(btree::EntryRef ref, Key key) { + uint32_t doc_id_limit = _limit_provider.getDocIdLimit(); + size_t size = getDocumentCount(ref); + double ratio = getDocumentRatio(size, doc_id_limit); + if (shouldCreateVectorPosting(size, ratio)) { + auto vector = new attribute::RcuVectorBase(_config.grow_strategy, _generation_holder); + vector->unsafe_resize(doc_id_limit); + _btree_posting_lists.foreach_unfrozen( + ref, [&](DocId d, const Posting &p) { (*vector)[d] = p; }); + _vector_posting_lists.insert(key, std::shared_ptr(vector)); + logVector("Created", key, size, ratio, vector->size()); + } +} + +template +bool SimpleIndex::removeVectorIfBelowThreshold( + btree::EntryRef ref, typename VectorStore::Iterator &it) { + size_t size = getDocumentCount(ref); + double ratio = getDocumentRatio(size, _limit_provider.getDocIdLimit()); + if (shouldRemoveVectorPosting(size, ratio)) { + Key key = it.getKey(); + size_t vector_length = it.getData()->size(); + _vector_posting_lists.remove(it); + logVector("Removed", key, size, ratio, vector_length); + return true; + } + return false; +} + +template +double SimpleIndex::getDocumentRatio(size_t document_count, + uint32_t doc_id_limit) const { + assert(doc_id_limit > 1); + return document_count / static_cast(doc_id_limit - 1); +}; + +template +size_t SimpleIndex::getDocumentCount(btree::EntryRef ref) const { + return _btree_posting_lists.size(ref); +}; + +template +bool SimpleIndex::shouldRemoveVectorPosting(size_t size, double ratio) const { + return size < _config.lower_vector_size_threshold || ratio < _config.lower_docid_freq_threshold; +}; + +template +bool SimpleIndex::shouldCreateVectorPosting(size_t size, double ratio) const { + return size >= _config.upper_vector_size_threshold && ratio >= _config.upper_docid_freq_threshold; +}; + +template +void SimpleIndex::commit() { + _dictionary.getAllocator().freeze(); + _btree_posting_lists.freeze(); + _vector_posting_lists.getAllocator().freeze(); +} + +template +void SimpleIndex::trimHoldLists(generation_t used_generation) { + _btree_posting_lists.trimHoldLists(used_generation); + _dictionary.getAllocator().trimHoldLists(used_generation); + _vector_posting_lists.getAllocator().trimHoldLists(used_generation); + +} + +template +void SimpleIndex::transferHoldLists(generation_t generation) { + _dictionary.getAllocator().transferHoldLists(generation); + _btree_posting_lists.transferHoldLists(generation); + _vector_posting_lists.getAllocator().transferHoldLists(generation); +} + +template +MemoryUsage SimpleIndex::getMemoryUsage() const { + MemoryUsage combined; + combined.merge(_dictionary.getMemoryUsage()); + combined.merge(_btree_posting_lists.getMemoryUsage()); + combined.merge(_vector_posting_lists.getMemoryUsage()); + for (auto it = _vector_posting_lists.begin(); it.valid(); ++it) { + combined.merge(it.getData()->getMemoryUsage()); + } + return combined; +}; + +} // namespace predicate +} // namespace search diff --git a/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h b/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h new file mode 100644 index 00000000000..ee0cd638bb1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/predicate/tree_crumbs.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace predicate { + +/** + * Builds a path from the root of a tree, to be able to describe a + * given position in the tree. + */ +class TreeCrumbs { + std::vector _buffer; + +public: + void setChild(size_t number, char delimiter = ':') { + _buffer.push_back(delimiter); + char buf[10]; + int i = 0; + while (number > 0) { + buf[i++] = (number % 10) + '0'; + number /= 10; + } + if (i == 0) { + _buffer.push_back('0'); + } + while (i > 0) { + _buffer.push_back(buf[--i]); + } + } + void resize(size_t i) { _buffer.resize(i); } + + size_t size() const { return _buffer.size(); } + std::string getCrumb() const { + return std::string(&_buffer[0], _buffer.size()); + } +}; + +} // namespace predicate +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/.gitignore b/searchlib/src/vespa/searchlib/query/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/query/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/CMakeLists.txt new file mode 100644 index 00000000000..50aca60fc1c --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_query + SOURCES + queryterm.cpp + querynode.cpp + base.cpp + query.cpp + querynoderesultbase.cpp + $ + INSTALL lib64 + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/query/OWNERS b/searchlib/src/vespa/searchlib/query/OWNERS new file mode 100644 index 00000000000..1037590124e --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/OWNERS @@ -0,0 +1 @@ +balder diff --git a/searchlib/src/vespa/searchlib/query/base.cpp b/searchlib/src/vespa/searchlib/query/base.cpp new file mode 100644 index 00000000000..e149b7d4d2c --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/base.cpp @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include + +namespace search { + +Object::~Object(void) +{ +} + +vespalib::string Object::toString() const +{ + return vespalib::string(""); +} + +} diff --git a/searchlib/src/vespa/searchlib/query/base.h b/searchlib/src/vespa/searchlib/query/base.h new file mode 100644 index 00000000000..ba066b5410d --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/base.h @@ -0,0 +1,141 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search +{ + +/// Type of general unsigned 8 bit data. +typedef unsigned char byte; +/// A simple container for the raw querystack. +typedef vespalib::stringref QueryPacketT; +/// The type of the local documentId. +typedef unsigned DocumentIdT; +/// This is the type of the CollectionId used in the StorageAPI. +typedef uint64_t CollectionIdT; +/// The type to identify a query. +typedef unsigned QueryIdT; +/// The rank type. +typedef unsigned RankT; +/// How time type. Used to represent seconds since 1970. +typedef unsigned TimeT; +/// Type to identify performance counters. +typedef uint64_t CounterT; +/// Type to identify performance values. +typedef int ValueT; +/// This is a 16 byte vector used in SSE2 integer operations. +typedef char v16qi __attribute__ ((__vector_size__(16))); +/// This is a 2 element uint64_t vector used in SSE2 integer operations. +typedef long long v2di __attribute__ ((__vector_size__(16))); +/// A type to represent a list of strings. +typedef std::vector StringListT; +/// A type to represent a vector of 32 bit signed integers. +typedef std::vector Int32ListT; +/// A type to represent a list of document ids. +typedef std::vector DocumentIdList; + +/// A debug macro the does "a" when l & the mask is true. The mask is set per file. +#define DEBUG(l, a) { if (l&DEBUGMASK) {a;} } +#ifdef __USE_RAWDEBUG__ + #define RAWDEBUG(a) a +#else + #define RAWDEBUG(a) +#endif +/// A macro avoid warnings for unused parameters. +#define UNUSED_PARAM(p) +/// A macro that gives you number of elements in an array. +#define NELEMS(a) (sizeof(a)/sizeof(a[0])) + +/// A macro used in descendants of Object to instantiate the duplicate method. +#define DUPLICATE(a) virtual a * duplicate() const; +#define IMPLEMENT_DUPLICATE(a) a * a::duplicate() const { return new a(*this); } + +/** + This is a base class that ensures that all descendants can be duplicated. + This implies also that they have a copy constructor. + It also makes them streamable to an std:ostream. +*/ +class Object +{ + public: + virtual ~Object(void); + /// Returns an allocated(new) object that is identical to this one. + virtual Object * duplicate() const = 0; + /// Gives you streamability of the object. Object does nothing. + virtual vespalib::string toString() const; +}; + +/** + This is a template that can hold any objects of any descendants of T. + It does take a copy of the object. Very nice for holding different descendants + and not have to worry about what happens on copy, assignment, destruction. + No references, just simple copy. + It gives you the -> and * operator so you can use it as a pointer to T. + Very convenient. +*/ +template +class ObjectContainer +{ + public: + ObjectContainer() : _p(NULL) { } + ObjectContainer(const T & org) : _p(static_cast(org.duplicate())) { } + ObjectContainer(const T * org) : _p(org ? static_cast(org->duplicate()) : NULL) { } + ObjectContainer(const ObjectContainer & org) : _p(NULL) { *this = org; } + ObjectContainer & operator = (const T * org) { cleanUp(); if (org) { _p = static_cast(org->duplicate()); } return *this; } + ObjectContainer & operator = (const T & org) { cleanUp(); _p = static_cast(org.duplicate()); return *this; } + ObjectContainer & operator = (const ObjectContainer & org) { if (this != & org) { cleanUp(); if (org._p) { _p = static_cast(org._p->duplicate());} } return *this; } + virtual ~ObjectContainer() { cleanUp(); } + bool valid() const { return (_p != NULL); } + T *operator->() { return _p; } + T &operator*() { return *_p; } + const T *operator->() const { return _p; } + const T &operator*() const { return *_p; } + operator T & () const { return *_p; } + operator T * () const { return _p; } + + private: + void cleanUp() { delete _p; _p = NULL; } + T * _p; +}; + +/** + This is a template similar to ObjectContainer that frees you from the trouble + of having to write you own copy/assignment operators when you use pointers as + pure references. Adds one level of indirection, but that normally optimized + away by the compiler. Can be used as an ordinary pointer since -> and * is + overloaded. +*/ +template +class PointerContainer +{ + public: + PointerContainer() : _p(NULL) { } + PointerContainer(T & org) : _p(org) { } + PointerContainer(T * org) : _p(org) { } + PointerContainer(const PointerContainer & org) : _p(org._p) { } + PointerContainer & operator = (T * org) { _p = org; return *this; } + PointerContainer & operator = (T & org) { _p = &org; return *this; } + PointerContainer & operator = (const PointerContainer & org) { if (this != & org) { _p = org._p;} return *this; } + virtual ~PointerContainer() { _p = 0; } + bool valid() const { return (_p != NULL); } + T *operator->() const { return _p; } + T &operator*() const { return *_p; } + operator T & () const { return *_p; } + operator T * () const { return _p; } + private: + T * _p; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/query/posocc.h b/searchlib/src/vespa/searchlib/query/posocc.h new file mode 100644 index 00000000000..90aaa40b285 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/posocc.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search +{ + +class Hit +{ + public: + Hit(uint32_t pos_, uint32_t context_, int32_t weight_) : + _position(pos_ | (context_<<24)), + _weight(weight_) + { } + int32_t weight() const { return _weight; } + uint32_t pos() const { return _position; } + uint32_t wordpos() const { return _position & 0xffffff; } + uint32_t context() const { return _position >> 24; } + bool operator < (const Hit & b) const { return cmp(b) < 0; } + private: + int cmp(const Hit & b) const { return _position - b._position; } + uint32_t _position; + int32_t _weight; +}; + +typedef std::vector HitList; + +} + diff --git a/searchlib/src/vespa/searchlib/query/query.cpp b/searchlib/src/vespa/searchlib/query/query.cpp new file mode 100644 index 00000000000..861b0258dc1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/query.cpp @@ -0,0 +1,348 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +using vespalib::Identifiable; + +namespace search +{ + +IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS(search, QueryConnector, QueryNode); +IMPLEMENT_IDENTIFIABLE_NS(search, Query, Identifiable); +IMPLEMENT_IDENTIFIABLE_NS(search, TrueNode, QueryNode); +IMPLEMENT_IDENTIFIABLE_NS(search, AndQueryNode, QueryConnector); +IMPLEMENT_IDENTIFIABLE_NS(search, AndNotQueryNode, QueryConnector); +IMPLEMENT_IDENTIFIABLE_NS(search, OrQueryNode, QueryConnector); +IMPLEMENT_IDENTIFIABLE_NS(search, EquivQueryNode, OrQueryNode); +IMPLEMENT_IDENTIFIABLE_NS(search, PhraseQueryNode, AndQueryNode); +IMPLEMENT_IDENTIFIABLE_NS(search, NotQueryNode, QueryConnector); +IMPLEMENT_IDENTIFIABLE_NS(search, NearQueryNode, AndQueryNode); +IMPLEMENT_IDENTIFIABLE_NS(search, ONearQueryNode, NearQueryNode); + +void QueryConnector::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "Operator", _opName); + visit(visitor, "Children", (const QueryNodeList &)*this); +} + +QueryConnector::QueryConnector(const char * opName) : + QueryNode(), + _opName(opName), + _index() +{ +} + +const HitList & QueryConnector::evaluateHits(HitList & hl) const +{ + if (evaluate()) { + hl.push_back(Hit(1, 0, 1)); + } + return hl; +} + +void QueryConnector::reset() +{ + for(iterator it=begin(), mt=end(); it != mt; it++) { + QueryNode & qn = **it; + qn.reset(); + } +} + +void QueryConnector::getLeafs(QueryTermList & tl) +{ + for(iterator it=begin(), mt=end(); it != mt; it++) { + QueryNode & qn = **it; + qn.getLeafs(tl); + } +} + +void QueryConnector::getLeafs(ConstQueryTermList & tl) const +{ + for(const_iterator it=begin(), mt=end(); it != mt; it++) { + const QueryNode & qn = **it; + qn.getLeafs(tl); + } +} + +void QueryConnector::getPhrases(QueryNodeRefList & tl) +{ + for(iterator it=begin(), mt=end(); it != mt; it++) { + QueryNode & qn = **it; + qn.getPhrases(tl); + } +} + +void QueryConnector::getPhrases(ConstQueryNodeRefList & tl) const +{ + for(const_iterator it=begin(), mt=end(); it != mt; it++) { + const QueryNode & qn = **it; + qn.getPhrases(tl); + } +} + +size_t QueryConnector::depth() const +{ + size_t d(0); + for(const_iterator it=begin(), mt=end(); (it!=mt); it++) { + const QueryNode & qn = **it; + size_t t = qn.depth(); + if (t > d) + d = t; + } + return d+1; +} + +size_t QueryConnector::width() const +{ + size_t w(0); + for(const_iterator it=begin(), mt=end(); (it!=mt); it++) { + const QueryNode & qn = **it; + w += qn.width(); + } + + return w; +} + +QueryConnector * +QueryConnector::create(ParseItem::ItemType type) +{ + switch (type) { + case search::ParseItem::ITEM_AND: return new AndQueryNode(); + case search::ParseItem::ITEM_OR: return new OrQueryNode(); + case search::ParseItem::ITEM_WEAK_AND: return new OrQueryNode(); + case search::ParseItem::ITEM_EQUIV: return new EquivQueryNode(); + case search::ParseItem::ITEM_WEIGHTED_SET: return new EquivQueryNode(); + case search::ParseItem::ITEM_DOT_PRODUCT: return new OrQueryNode(); + case search::ParseItem::ITEM_WAND: return new OrQueryNode(); + case search::ParseItem::ITEM_NOT: return new AndNotQueryNode(); + case search::ParseItem::ITEM_PHRASE: return new PhraseQueryNode(); + case search::ParseItem::ITEM_NEAR: return new NearQueryNode(); + case search::ParseItem::ITEM_ONEAR: return new ONearQueryNode(); + default: + return NULL; + } +} + +bool TrueNode::evaluate() const +{ + return true; +} + +bool AndQueryNode::evaluate() const +{ + bool ok(true); + for (const_iterator it=begin(), mt=end(); ok && (it!=mt); it++) { + const QueryNode & qn = **it; + ok = ok && qn.evaluate(); + } + return ok; +} + +bool AndNotQueryNode::evaluate() const +{ + bool ok(empty() ? true : front()->evaluate()); + if (!empty()) { + for (const_iterator it=begin()+1, mt=end(); ok && (it!=mt); it++) { + const QueryNode & qn = **it; + ok = ok && ! qn.evaluate(); + } + } + return ok; +} + +bool OrQueryNode::evaluate() const +{ + bool ok(false); + for (const_iterator it=begin(), mt=end(); !ok && (it!=mt); it++) { + const QueryNode & qn = **it; + ok = qn.evaluate(); + } + return ok; +} + + +bool EquivQueryNode::evaluate() const +{ + return OrQueryNode::evaluate(); +} + + +bool PhraseQueryNode::evaluate() const +{ + bool ok(false); + HitList hl; + ok = ! evaluateHits(hl).empty(); + return ok; +} + +void PhraseQueryNode::getPhrases(QueryNodeRefList & tl) { tl.push_back(this); } +void PhraseQueryNode::getPhrases(ConstQueryNodeRefList & tl) const { tl.push_back(this); } + +const HitList & PhraseQueryNode::evaluateHits(HitList & hl) const +{ + hl.clear(); + _fieldInfo.clear(); + bool andResult(AndQueryNode::evaluate()); + if (andResult) { + HitList tmpHL; + unsigned int fullPhraseLen = size(); + unsigned int currPhraseLen = 0; + std::vector indexVector(fullPhraseLen, 0); + const QueryTerm * curr = static_cast (&(*(*this)[currPhraseLen])); + bool exhausted( curr->evaluateHits(tmpHL).empty()); + for (; !exhausted; ) { + const QueryTerm & next = static_cast(*(*this)[currPhraseLen+1]); + unsigned int & currIndex = indexVector[currPhraseLen]; + unsigned int & nextIndex = indexVector[currPhraseLen+1]; + const HitList & nextHL = next.evaluateHits(tmpHL); + + size_t firstPosition = curr->evaluateHits(tmpHL)[currIndex].pos(); + int diff(0); + size_t nextIndexMax = nextHL.size(); + while ((nextIndex < nextIndexMax) && ((diff = nextHL[nextIndex].pos()-firstPosition) < 1)) + nextIndex++; + if (diff == 1) { + currPhraseLen++; + bool ok = ((currPhraseLen+1)==fullPhraseLen); + if (ok) { + Hit h = nextHL[indexVector[currPhraseLen]]; + hl.push_back(h); + const QueryTerm::FieldInfo & fi = next.getFieldInfo(h.context()); + updateFieldInfo(h.context(), hl.size() - 1, fi.getFieldLength()); + currPhraseLen = 0; + indexVector[0]++; + } + } else { + currPhraseLen = 0; + indexVector[currPhraseLen]++; + } + curr = static_cast(&*(*this)[currPhraseLen]); + exhausted = (nextIndex >= nextIndexMax) || (indexVector[currPhraseLen] >= curr->evaluateHits(tmpHL).size()); + } + } + return hl; +} + +void +PhraseQueryNode::updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const +{ + if (fid >= _fieldInfo.size()) { + _fieldInfo.resize(fid + 1); + // only set hit offset and field length the first time + QueryTerm::FieldInfo & fi = _fieldInfo[fid]; + fi.setHitOffset(offset); + fi.setFieldLength(fieldLength); + } + QueryTerm::FieldInfo & fi = _fieldInfo[fid]; + fi.setHitCount(fi.getHitCount() + 1); +} + +bool NotQueryNode::evaluate() const +{ + bool ok(false); + for (const_iterator it=begin(), mt=end(); it!=mt; it++) { + const QueryNode & qn = **it; + ok |= ! qn.evaluate(); + } + return ok; +} + +bool NearQueryNode::evaluate() const +{ + bool ok(AndQueryNode::evaluate()); + return ok; +} + +void NearQueryNode::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + AndQueryNode::visitMembers(visitor); + visit(visitor, "distance", _distance); +} + + +bool ONearQueryNode::evaluate() const +{ + bool ok(NearQueryNode::evaluate()); + return ok; +} + +Query::Query() : + Identifiable(), + _root() +{ +} + +Query::Query(const QueryNodeResultBase & org, const QueryPacketT & queryRep) : + Identifiable(), + _root() +{ + build(org, queryRep); +} + +void Query::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "root", _root); +} + +bool Query::evaluate() const +{ + bool ok = valid() ? _root->evaluate() : false; + return ok; +} + +bool Query::build(const QueryNodeResultBase & org, const QueryPacketT & queryRep) +{ + search::SimpleQueryStackDumpIterator stack(queryRep); + if (stack.next()) { + _root.reset(QueryNode::Build(NULL, org, stack, true).release()); + } + return valid(); +} + +void Query::getLeafs(QueryTermList & tl) +{ + if (valid()) { + _root->getLeafs(tl); + } +} + +void Query::getLeafs(ConstQueryTermList & tl) const +{ + if (valid()) { + _root->getLeafs(tl); + } +} + +void Query::getPhrases(QueryNodeRefList & tl) +{ + if (valid()) { + _root->getPhrases(tl); + } +} + +void Query::getPhrases(ConstQueryNodeRefList & tl) const +{ + if (valid()) { + _root->getPhrases(tl); + } +} + +void Query::reset() +{ + if (valid()) { + _root->reset(); + } +} + +size_t Query::depth() const +{ + return valid() ? _root->depth() : 0; +} + +size_t Query::width() const +{ + return valid() ? _root->width() : 0; +} + +} diff --git a/searchlib/src/vespa/searchlib/query/query.h b/searchlib/src/vespa/searchlib/query/query.h new file mode 100644 index 00000000000..7e9f00eb436 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/query.h @@ -0,0 +1,212 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search +{ + +/** + Base class for all N-ary query operators. + Implements the width, depth, print, and collect all leafs operators(terms). +*/ +class QueryConnector : public QueryNode, public QueryNodeList +{ +public: + DECLARE_IDENTIFIABLE_ABSTRACT_NS(search, QueryConnector); + QueryConnector(const char * opName); + virtual const HitList & evaluateHits(HitList & hl) const; + /// Will clear the results from the querytree. + virtual void reset(); + /// Will get all leafnodes. + virtual void getLeafs(QueryTermList & tl); + virtual void getLeafs(ConstQueryTermList & tl) const; + /// Gives you all phrases of this tree. + virtual void getPhrases(QueryNodeRefList & tl); + virtual void getPhrases(ConstQueryNodeRefList & tl) const; + virtual size_t depth() const; + virtual size_t width() const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual void setIndex(const vespalib::string & index) { _index = index; } + virtual const vespalib::string & getIndex() const { return _index; } + static QueryConnector * create(ParseItem::ItemType type); + virtual bool isFlattenable(ParseItem::ItemType type) const { (void) type; return false; } +private: + vespalib::string _opName; + vespalib::string _index; +}; + +/** + True operator. Matches everything. +*/ +class TrueNode : public QueryConnector +{ +public: + DECLARE_IDENTIFIABLE_NS(search, TrueNode); + TrueNode() : QueryConnector("AND") { } + virtual bool evaluate() const; +}; + +/** + N-ary Or operator that simply ANDs all the nodes together. +*/ +class AndQueryNode : public QueryConnector +{ +public: + DECLARE_IDENTIFIABLE_NS(search, AndQueryNode); + AndQueryNode() : QueryConnector("AND") { } + AndQueryNode(const char * opName) : QueryConnector(opName) { } + virtual bool evaluate() const; + virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_AND; } +}; + +/** + N-ary special AndNot operator. n[0] & !n[1] & !n[2] .. & !n[j]. +*/ +class AndNotQueryNode : public QueryConnector +{ +public: + DECLARE_IDENTIFIABLE_NS(search, AndNotQueryNode); + AndNotQueryNode() : QueryConnector("ANDNOT") { } + virtual bool evaluate() const; + virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; } +}; + +/** + N-ary Or operator that simply ORs all the nodes together. +*/ +class OrQueryNode : public QueryConnector +{ +public: + DECLARE_IDENTIFIABLE_NS(search, OrQueryNode); + OrQueryNode() : QueryConnector("OR") { } + OrQueryNode(const char * opName) : QueryConnector(opName) { } + virtual bool evaluate() const; + virtual bool isFlattenable(ParseItem::ItemType type) const { + return (type == ParseItem::ITEM_OR) || + (type == ParseItem::ITEM_DOT_PRODUCT) || + (type == ParseItem::ITEM_WAND) || + (type == ParseItem::ITEM_WEAK_AND); + } +}; + +/** + N-ary "EQUIV" operator that merges terms from nodes below. +*/ +class EquivQueryNode : public OrQueryNode +{ +public: + DECLARE_IDENTIFIABLE_NS(search, EquivQueryNode); + EquivQueryNode() : OrQueryNode("EQUIV") { } + virtual bool evaluate() const; + virtual bool isFlattenable(ParseItem::ItemType type) const { + return (type == ParseItem::ITEM_EQUIV) || + (type == ParseItem::ITEM_WEIGHTED_SET); + } +}; + +/** + N-ary phrase operator. All terms must be satisfied and have the correct order + with distance to next term equal to 1. +*/ +class PhraseQueryNode : public AndQueryNode +{ +public: + DECLARE_IDENTIFIABLE_NS(search, PhraseQueryNode); + PhraseQueryNode() : AndQueryNode("PHRASE"), _fieldInfo(32) { } + virtual bool evaluate() const; + virtual const HitList & evaluateHits(HitList & hl) const; + virtual void getPhrases(QueryNodeRefList & tl); + virtual void getPhrases(ConstQueryNodeRefList & tl) const; + const QueryTerm::FieldInfo & getFieldInfo(size_t fid) const { return _fieldInfo[fid]; } + size_t getFieldInfoSize() const { return _fieldInfo.size(); } + virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; } +private: + mutable std::vector _fieldInfo; + void updateFieldInfo(size_t fid, size_t offset, size_t fieldLength) const; +#if WE_EVER_NEED_TO_CACHE_THIS_WE_MIGHT_WANT_SOME_CODE_HERE + HitList _cachedHitList; + bool _evaluated; +#endif +}; + +/** + Unary Not operator. Just inverts the nodes result. +*/ +class NotQueryNode : public QueryConnector +{ +public: + DECLARE_IDENTIFIABLE_NS(search, NotQueryNode); + NotQueryNode() : QueryConnector("NOT") { } + virtual bool evaluate() const; +}; + +/** + N-ary Near operator. All terms must be within the given distance. +*/ +class NearQueryNode : public AndQueryNode +{ +public: + DECLARE_IDENTIFIABLE_NS(search, NearQueryNode); + NearQueryNode() : AndQueryNode("NEAR"), _distance(0) { } + NearQueryNode(const char * opName) : AndQueryNode(opName), _distance(0) { } + virtual bool evaluate() const; + void distance(size_t dist) { _distance = dist; } + size_t distance() const { return _distance; } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual bool isFlattenable(ParseItem::ItemType type) const { return type == ParseItem::ITEM_NOT; } +private: + size_t _distance; +}; + +/** + N-ary Ordered near operator. The terms must be in order and the distance between + the first and last must not exceed the given distance. +*/ +class ONearQueryNode : public NearQueryNode +{ +public: + DECLARE_IDENTIFIABLE_NS(search, ONearQueryNode); + ONearQueryNode() : NearQueryNode("ONEAR") { } + virtual ~ONearQueryNode() { } + virtual bool evaluate() const; +}; + +/** + Query packages the query tree. The usage pattern is like this. + Construct the tree with the correct tree description. + Get the leaf nodes and populate them with the term occurences. + Then evaluate the query. This is repeated for each document or chunk that + you want to process. The tree can also be printed. And you can read the + width and depth properties. +*/ +class Query : public vespalib::Identifiable +{ +public: + DECLARE_IDENTIFIABLE_NS(search, Query); + Query(); + Query(const QueryNodeResultBase & org, const QueryPacketT & queryRep); + virtual ~Query() { } + /// Will build the query tree + bool build(const QueryNodeResultBase & org, const QueryPacketT & queryRep); + /// Will clear the results from the querytree. + void reset(); + /// Will get all leafnodes. + void getLeafs(QueryTermList & tl); + void getLeafs(ConstQueryTermList & tl) const; + /// Gives you all phrases of this tree. + void getPhrases(QueryNodeRefList & tl); + void getPhrases(ConstQueryNodeRefList & tl) const; + bool evaluate() const; + size_t depth() const; + size_t width() const; + bool valid() const { return _root.get() != NULL; } + const QueryNode::LP & getRoot() const { return _root; } + QueryNode::LP & getRoot() { return _root; } +private: + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + QueryNode::LP _root; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/query/querynode.cpp b/searchlib/src/vespa/searchlib/query/querynode.cpp new file mode 100644 index 00000000000..fc96a352b7f --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/querynode.cpp @@ -0,0 +1,199 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +LOG_SETUP(".vsm.querynode"); + +namespace search +{ + +IMPLEMENT_IDENTIFIABLE_ABSTRACT_NS(search, QueryNode, vespalib::Identifiable); + +void TestClose(QueryNode::LP & qn, QueryNodeList & currentNodeList) +{ + if (&qn != NULL) { + QueryConnector * qc = dynamic_cast (qn.get()); + if (qc) { + // qc->QueryNodeList = currentNodeList; + for (size_t i=0; i < currentNodeList.size(); i++) { + qc->push_back(currentNodeList[i]); + } + currentNodeList.clear(); + currentNodeList.push_back(qn); + } + } +} + +void NewNode(QueryNode::LP & qn, QueryNodeList & currentNodeList, size_t count=0) +{ + if ( ! currentNodeList.empty() ) { + QueryConnector *qc = dynamic_cast (&*currentNodeList.back()); + if (qc != 0 && + ((dynamic_cast(qc) != NULL) || + (dynamic_cast(qc) != NULL && (count == qc->size())))) + { + qc->push_back(qn); + } else { + currentNodeList.push_back(qn); + } + } else { + currentNodeList.push_back(qn); + } +} + +namespace { + vespalib::stringref DEFAULT("default"); +} + +#define CASE(c, q) case c: { qn.reset(new q()); } break; +QueryNode::UP QueryNode::Build(const QueryNode * parent, const QueryNodeResultBase & org, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite) +{ + unsigned int arity = queryRep.getArity(); + search::ParseItem::ItemType type = queryRep.getType(); + UP qn; + switch (type) { + case search::ParseItem::ITEM_AND: + case search::ParseItem::ITEM_OR: + case search::ParseItem::ITEM_WEAK_AND: + case search::ParseItem::ITEM_EQUIV: + case search::ParseItem::ITEM_WEIGHTED_SET: + case search::ParseItem::ITEM_DOT_PRODUCT: + case search::ParseItem::ITEM_WAND: + case search::ParseItem::ITEM_NOT: + case search::ParseItem::ITEM_PHRASE: + case search::ParseItem::ITEM_NEAR: + case search::ParseItem::ITEM_ONEAR: + { + qn.reset(QueryConnector::create(type)); + if (qn.get()) { + QueryConnector * qc = dynamic_cast (qn.get()); + NearQueryNode * nqn = dynamic_cast (qc); + if (nqn) { + nqn->distance(queryRep.getArg1()); + } + if ((type == search::ParseItem::ITEM_WEAK_AND) || + (type == search::ParseItem::ITEM_WEIGHTED_SET) || + (type == search::ParseItem::ITEM_DOT_PRODUCT) || + (type == search::ParseItem::ITEM_WAND)) + { + const char * index; + size_t indexLen(0); + queryRep.getIndexName(&index, &indexLen); + qn->setIndex(vespalib::string(index, indexLen)); + } + for (size_t i=0; i < arity; i++) { + queryRep.next(); + if (qc->isFlattenable(queryRep.getType())) { + arity += queryRep.getArity(); + } else { + LP child(Build(qc, + org, + queryRep, + allowRewrite && ((dynamic_cast (qn.get()) == NULL) && (dynamic_cast (qn.get()) == NULL))).release()); + qc->push_back(child); + } + } + } + } + break; + case search::ParseItem::ITEM_NUMTERM: + case search::ParseItem::ITEM_TERM: + case search::ParseItem::ITEM_PREFIXTERM: + case search::ParseItem::ITEM_REGEXP: + case search::ParseItem::ITEM_SUBSTRINGTERM: + case search::ParseItem::ITEM_EXACTSTRINGTERM: + case search::ParseItem::ITEM_SUFFIXTERM: + case search::ParseItem::ITEM_PURE_WEIGHTED_STRING: + case search::ParseItem::ITEM_PURE_WEIGHTED_LONG: + { + const char * index; + size_t indexLen(0); + queryRep.getIndexName(&index, &indexLen); + if (indexLen == 0) { + if ((type == search::ParseItem::ITEM_PURE_WEIGHTED_STRING) || (type == search::ParseItem::ITEM_PURE_WEIGHTED_LONG)) { + const vespalib::string & ref = parent->getIndex(); + index = ref.c_str(); + indexLen = ref.size(); + } else { + index = "default"; + indexLen = strlen(index); + } + } + const char * term; + size_t termLen(0); + queryRep.getTerm(&term, &termLen); + QueryTerm::SearchTerm sTerm(QueryTerm::WORD); + switch (type) { + case search::ParseItem::ITEM_REGEXP: + sTerm = QueryTerm::REGEXP; + break; + case search::ParseItem::ITEM_PREFIXTERM: + sTerm = QueryTerm::PREFIXTERM; + break; + case search::ParseItem::ITEM_SUBSTRINGTERM: + sTerm = QueryTerm::SUBSTRINGTERM; + break; + case search::ParseItem::ITEM_EXACTSTRINGTERM: + sTerm = QueryTerm::EXACTSTRINGTERM; + break; + case search::ParseItem::ITEM_SUFFIXTERM: + sTerm = QueryTerm::SUFFIXTERM; + break; + default: + break; + } + QueryTerm::string ssTerm(term, termLen); + QueryTerm::string ssIndex(index, indexLen); + if (ssIndex == "sddocname") { + // This is suboptimal as the term should be checked too. + // But it will do for now as only correct sddocname queries are sent down. + qn.reset(new TrueNode()); + } else { + std::unique_ptr qt(new QueryTerm(org, ssTerm, ssIndex, sTerm)); + qt->setWeight(queryRep.GetWeight()); + qt->setUniqueId(queryRep.getUniqueId()); + if ( qt->encoding().isBase10Integer() || ! qt->encoding().isFloat() || ! org.getRewriteFloatTerms() || !allowRewrite || (ssTerm.find('.') == vespalib::string::npos)) { + qn.reset(qt.release()); + } else { + std::unique_ptr phrase(new PhraseQueryNode()); + + phrase->push_back(LP(new QueryTerm(org, ssTerm.substr(0, ssTerm.find('.')), ssIndex, QueryTerm::WORD))); + phrase->push_back(LP(new QueryTerm(org, ssTerm.substr(ssTerm.find('.') + 1), ssIndex, QueryTerm::WORD))); + std::unique_ptr orqn(new EquivQueryNode()); + orqn->push_back(LP(qt.release())); + orqn->push_back(LP(phrase.release())); + qn.reset(orqn.release()); + } + } + } + break; + case search::ParseItem::ITEM_RANK: + { + if (arity >= 1) { + queryRep.next(); + qn = Build(parent, org, queryRep, false); + for (uint32_t skipCount = arity-1; (skipCount > 0) && queryRep.next(); skipCount--) { + skipCount += queryRep.getArity(); + } + } + } + break; + default: + { + for (uint32_t skipCount = arity; (skipCount > 0) && queryRep.next(); skipCount--) { + skipCount += queryRep.getArity(); + LOG(warning, "Does not understand anything,.... skipping %d", type); + } + } + break; + } + return qn; +} +#undef CASE + +const HitList & QueryNode::evaluateHits(HitList & hl) const +{ + return hl; +} + +} diff --git a/searchlib/src/vespa/searchlib/query/querynode.h b/searchlib/src/vespa/searchlib/query/querynode.h new file mode 100644 index 00000000000..f05c34be89b --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/querynode.h @@ -0,0 +1,66 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include "posocc.h" + +namespace search +{ + +class QueryTerm; +class QueryNode; +/// Typedef a simple list that contains references to QueryNodes. +typedef std::vector QueryNodeRefList; +/// Typedef a simple list that contains const references to QueryNodes. +typedef std::vector ConstQueryNodeRefList; +/// Typedef a simple list that contains references to QueryTerms. +typedef std::vector QueryTermList; +/// Typedef a simple list that contains const references to QueryTerms. +typedef std::vector ConstQueryTermList; + +/** + This is the base of any node in the query tree. Both leaf nodes (terms) + and operator nodes (AND, NOT, OR, PHRASE, NEAR, ONEAR, etc). +*/ +class QueryNode : public vespalib::Identifiable +{ + public: + DECLARE_IDENTIFIABLE_ABSTRACT_NS(search, QueryNode); + typedef vespalib::LinkedPtr LP; + typedef std::unique_ptr UP; + + virtual ~QueryNode() { } + /// This evalutes if the subtree starting here evaluates to true. + virtual bool evaluate() const = 0; + /// This return the hitList for this subtree. Does only give meaning in a + /// phrase search or any other search that requires position info. + virtual const HitList & evaluateHits(HitList & hl) const; + /// Clears all the hitlists so the query tree can be reused. + virtual void reset() = 0; + /// Gives you all leafs of this tree. + virtual void getLeafs(QueryTermList & tl) = 0; + /// Gives you all leafs of this tree. Indicating that they are all const. + virtual void getLeafs(ConstQueryTermList & tl) const = 0; + /// Gives you all phrases of this tree. + virtual void getPhrases(QueryNodeRefList & tl) = 0; + /// Gives you all phrases of this tree. Indicating that they are all const. + virtual void getPhrases(ConstQueryNodeRefList & tl) const = 0; + virtual void setIndex(const vespalib::string & index) = 0; + virtual const vespalib::string & getIndex() const = 0; + + /// Return the depth of this tree. + virtual size_t depth() const { return 1; } + /// Return the width of this tree. + virtual size_t width() const { return 1; } + static UP Build(const QueryNode * parent, const QueryNodeResultBase & org, search::SimpleQueryStackDumpIterator & queryRep, bool allowRewrite); +}; + +/// A list conating the QuerNode objects. With copy/assignment. +typedef std::vector QueryNodeList; + +} + diff --git a/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp b/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp new file mode 100644 index 00000000000..48d056cce80 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/querynoderesultbase.cpp @@ -0,0 +1,8 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "querynoderesultbase.h" + +namespace search { + +IMPLEMENT_DUPLICATE(EmptyQueryNodeResult); + +} diff --git a/searchlib/src/vespa/searchlib/query/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/querynoderesultbase.h new file mode 100644 index 00000000000..80030e9eb31 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/querynoderesultbase.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search +{ + +/** + This is the base of any item that can be attached to the leafs in a querytree. + The intention is to put stuff here that are search specific. Fx to differentiate + between streamed and indexed variants. +*/ +class QueryNodeResultBase : public Object +{ + public: + virtual bool evaluate() const = 0; + virtual void reset() = 0; + virtual bool getRewriteFloatTerms() const { return false; } +}; + +class EmptyQueryNodeResult : public QueryNodeResultBase +{ + public: + DUPLICATE(EmptyQueryNodeResult); + virtual ~EmptyQueryNodeResult() { } + virtual bool evaluate() const { return true; } + virtual void reset() { } + private: +}; + + +typedef ObjectContainer QueryNodeResultBaseContainer; +} + diff --git a/searchlib/src/vespa/searchlib/query/queryterm.cpp b/searchlib/src/vespa/searchlib/query/queryterm.cpp new file mode 100644 index 00000000000..e6ab5591872 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/queryterm.cpp @@ -0,0 +1,469 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include + +namespace { + +class CharInfo { +public: + CharInfo(); + uint8_t get(uint8_t c) const { return _charInfo[c]; } +private: + uint8_t _charInfo[256]; +}; + +CharInfo::CharInfo() +{ + // XXX: Should refactor to reduce number of magic constants. + memset(_charInfo, 0x01, 128); // All 7 bits are ascii7bit + memset(_charInfo+128, 0x00, 128); // The rest are not. + memset(_charInfo + '0', 0x07, 10); + _charInfo[uint8_t('-')] = 0x07; + _charInfo[uint8_t('<')] = 0x07; + _charInfo[uint8_t('>')] = 0x07; + _charInfo[uint8_t(';')] = 0x07; + _charInfo[uint8_t('[')] = 0x07; + _charInfo[uint8_t(']')] = 0x07; + + _charInfo[uint8_t('.')] = 0x05; + _charInfo[uint8_t('+')] = 0x05; + _charInfo[uint8_t('e')] = 0x05; + _charInfo[uint8_t('E')] = 0x05; +} + +static CharInfo _G_charTable; + + +template +bool isValidInteger(int64_t value) +{ + return value >= std::numeric_limits::min() && value <= std::numeric_limits::max(); +} + +} + +namespace search +{ + +IMPLEMENT_IDENTIFIABLE_NS(search, QueryTerm, QueryNode); + +QueryTermBase::QueryTermBase() : + QueryTermSimple(), + _cachedTermLen(0), + _termUCS4() +{ + _termUCS4.push_back(0); +} + +QueryTermBase::QueryTermBase(const string & termS, SearchTerm type) : + QueryTermSimple(termS, type), + _cachedTermLen(0), + _termUCS4() +{ + _termUCS4.reserve(termS.size() + 1); + vespalib::Utf8Reader r(termS); + while (r.hasMore()) { + ucs4_t u = r.getChar(); + _termUCS4.push_back(u); + } + _termUCS4.push_back(0); + _cachedTermLen = _termUCS4.size() - 1; +} + +QueryTerm::QueryTerm() : + QueryTermBase(), + _index(), + _encoding(), + _result(), + _hitList(), + _weight(100), + _uniqueId(0), + _fieldInfo(32) +{ +} + +void +QueryTermSimple::visitMembers(vespalib::ObjectVisitor & visitor) const +{ + visit(visitor, "term", _term); + visit(visitor, "type", _type); +} + +template +QueryTermSimple::RangeResult +QueryTermSimple::getFloatRange() const +{ + double lowRaw, highRaw; + bool valid = getAsDoubleTerm(lowRaw, highRaw); + RangeResult res; + res.valid = valid; + if (!valid) { + res.low = std::numeric_limits::max(); + res.high = - std::numeric_limits::max(); + res.adjusted = true; + } else { + res.low = lowRaw; + res.high = highRaw; + } + return res; +} + +namespace { + +bool isRepresentableByInt64(double d) { + return (d > double(std::numeric_limits::min())) + && (d < double(std::numeric_limits::max())); +} + +} + +bool +QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const +{ + bool valid = getAsIntegerTerm(low, high); + if ( ! valid ) { + double l(0), h(0); + valid = getAsDoubleTerm(l, h); + if (valid) { + if ((l == h) && isRepresentableByInt64(l)) { + low = high = round(l); + } else { + if (l > double(std::numeric_limits::min())) { + if (l < double(std::numeric_limits::max())) { + low = ceil(l); + } else { + low = std::numeric_limits::max(); + } + } + if (h < double(std::numeric_limits::max())) { + if (h > double(std::numeric_limits::min())) { + high = floor(h); + } else { + high = std::numeric_limits::min(); + } + } + } + } + } + return valid; +} + +template +QueryTermSimple::RangeResult +QueryTermSimple::getIntegerRange() const +{ + int64_t lowRaw, highRaw; + bool valid = getRangeInternal(lowRaw, highRaw); + RangeResult res; + res.valid = valid; + if (valid) { + bool validLow = isValidInteger(lowRaw); + if (validLow) { + res.low = lowRaw; + } else { + res.low = (lowRaw < static_cast(std::numeric_limits::min()) ? + std::numeric_limits::min() : std::numeric_limits::max()); + res.adjusted = true; + } + bool validHigh = isValidInteger(highRaw); + if (validHigh) { + res.high = highRaw; + } else { + res.high = (highRaw > static_cast(std::numeric_limits::max()) ? + std::numeric_limits::max() : std::numeric_limits::min()); + res.adjusted = true; + } + } else { + res.low = std::numeric_limits::max(); + res.high = std::numeric_limits::min(); + res.adjusted = true; + } + return res; +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getFloatRange(); +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getFloatRange(); +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getIntegerRange(); +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getIntegerRange(); +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getIntegerRange(); +} + +template <> +QueryTermSimple::RangeResult +QueryTermSimple::getRange() const +{ + return getIntegerRange(); +} + +void +QueryTermBase::visitMembers(vespalib::ObjectVisitor & visitor) const +{ + QueryTermSimple::visitMembers(visitor); + visit(visitor, "termlength", _cachedTermLen); +} + +void +QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const +{ + QueryTermBase::visitMembers(visitor); + visit(visitor, "encoding.isBase10Integer", _encoding.isBase10Integer()); + visit(visitor, "encoding.isFloat", _encoding.isFloat()); + visit(visitor, "encoding.isAscii7Bit", _encoding.isAscii7Bit()); + visit(visitor, "index", _index); + visit(visitor, "weight", _weight.percent()); + visit(visitor, "uniqueid", _uniqueId); +} + + +QueryTerm::QueryTerm(const QueryNodeResultBase & org, const string & termS, const string & indexS, SearchTerm type) : + QueryTermBase(termS, type), + _index(indexS), + _encoding(0x01), + _result(org), + _hitList(), + _weight(100), + _uniqueId(0), + _fieldInfo(32) +{ + if (!termS.empty()) { + uint8_t enc(0xff); + for (size_t i(0), m(termS.size()); i < m; i++) { + enc &= _G_charTable.get(termS[i]); + } + _encoding = enc; + } +} + +void QueryTerm::getPhrases(QueryNodeRefList & tl) { (void) tl; } +void QueryTerm::getPhrases(ConstQueryNodeRefList & tl) const { (void) tl; } +void QueryTerm::getLeafs(QueryTermList & tl) { tl.push_back(this); } +void QueryTerm::getLeafs(ConstQueryTermList & tl) const { tl.push_back(this); } +bool QueryTerm::evaluate() const { return !_hitList.empty() && _result->evaluate(); } +void QueryTerm::reset() { _hitList.clear(); _result->reset(); } +const HitList & QueryTerm::evaluateHits(HitList & UNUSED_PARAM(hl)) const { return _hitList; } + +void QueryTerm::resizeFieldId(size_t fieldNo) +{ + if (fieldNo >= _fieldInfo.size()) { + _fieldInfo.resize(fieldNo + 1); + } +} + +void QueryTerm::add(unsigned pos, unsigned context, int32_t weight_) +{ + _hitList.emplace_back(pos, context, weight_); +} + +template +struct IntDecoder { + static int64_t fromstr(const char * v, char ** end) { return strtoll(v, end, B); } + static int64_t nearestDownwd(int64_t n, int64_t min) { return (n > min ? n - 1 : n); } + static int64_t nearestUpward(int64_t n, int64_t max) { return (n < max ? n + 1 : n); } +}; + +struct DoubleDecoder { + static double fromstr(const char * v, char ** end) { return strtod(v, end); } + static double nearestDownwd(double n, double min) { return nextafterf(n, min); } + static double nearestUpward(double n, double max) { return nextafterf(n, max); } +}; + +bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const +{ + lower = std::numeric_limits::min(); + upper = std::numeric_limits::max(); + return getAsNumericTerm(lower, upper, IntDecoder<10>()); +} + +bool QueryTermSimple::getAsDoubleTerm(double & lower, double & upper) const +{ + lower = - std::numeric_limits::max(); + upper = std::numeric_limits::max(); + return getAsNumericTerm(lower, upper, DoubleDecoder()); +} + +QueryTermSimple::QueryTermSimple() : + _type(WORD), + _term(), + _diversityAttribute(), + _rangeLimit(0), + _maxPerGroup(0), + _diversityCutoffGroups(std::numeric_limits::max()), + _diversityCutoffStrict(false), + _valid(true) +{ +} + +namespace { + +bool isFullRange(const vespalib::stringref & s) { + const size_t sz(s.size()); + return (sz >= 3u) && + (s[0] == '<' || s[0] == '[') && + (s[sz-1] == '>' || s[sz-1] == ']'); +} + +} + +QueryTermSimple::QueryTermSimple(const string & term_, SearchTerm type) : + _type(type), + _term(term_), + _diversityAttribute(), + _rangeLimit(0), + _maxPerGroup(0), + _diversityCutoffGroups(std::numeric_limits::max()), + _diversityCutoffStrict(false), + _valid(true) +{ + if (isFullRange(_term)) { + stringref rest(_term.c_str() + 1, _term.size() - 2); + std::vector parts; + parts.reserve(5); + while (! rest.empty() ) { + size_t pos(rest.find(';')); + if (pos != vespalib::string::npos) { + parts.push_back(rest.substr(0, pos)); + rest = rest.substr(pos + 1); + if (rest.empty()) { + parts.push_back(rest); + } + } else { + parts.push_back(rest); + rest = stringref(); + } + } + _valid = parts.size() >= 2; + if (parts.size() >= 3) { + _rangeLimit = strtol(parts[2].c_str(), NULL, 0); + if (parts.size() > 3) { + _valid = parts.size() >= 5; + if (_valid) { + _diversityAttribute = parts[3]; + _maxPerGroup = strtoul(parts[4].c_str(), NULL, 0); + if ((_maxPerGroup > 0) && (parts.size() > 5)) { + char *err = nullptr; + size_t cutoffGroups = strtoul(parts[5].c_str(), &err, 0); + if ((err == nullptr) || (size_t(err - parts[5].c_str()) == parts[5].size())) { + _diversityCutoffGroups = cutoffGroups; + } + if (parts.size() > 6) { + _diversityCutoffStrict = (parts[6] == "strict"); + _valid = (parts.size() == 7); + } + } + } + } + } + } +} + +template +bool +QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const +{ + bool valid(empty()); + size_t sz(_term.size()); + if (sz) { + char *err(NULL); + T low(lower); + T high(upper); + const char * q = _term.c_str(); + const char first(q[0]); + const char last(q[sz-1]); + q += ((first == '<') || (first == '>') || (first == '[')) ? 1 : 0; + T ll = d.fromstr(q, &err); + valid = isValid() && ((*err == 0) || (*err == ';')); + if (valid) { + if (first == '<' && (*err == 0)) { + high = d.nearestDownwd(ll, lower); + } else if (first == '>' && (*err == 0)) { + low = d.nearestUpward(ll, upper); + } else if ((first == '[') || (first == '<')) { + if (q != err) { + low = (first == '[') ? ll : d.nearestUpward(ll, upper); + } + q = err + 1; + T hh = d.fromstr(q, &err); + bool hasUpperLimit(q != err); + if (*err == ';') { + err = const_cast(_term.end() - 1); + } + valid = (*err == last) && ((last == ']') || (last == '>')); + if (hasUpperLimit) { + high = (last == ']') ? hh : d.nearestDownwd(hh, lower); + } + } else { + low = high = ll; + } + } + if (valid) { + lower = low; + upper = high; + } + } + return valid; +} + +vespalib::string +QueryTermSimple::getClassName() const +{ + vespalib::string name(typeid(*this).name()); + int status = 0; + size_t size = 0; + char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status); + vespalib::string result(unmangled); + free(unmangled); + return result; +} + +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::QueryTermSimple *obj) +{ + if (obj != 0) { + self.openStruct(name, obj->getClassName()); + obj->visitMembers(self); + self.closeStruct(); + } else { + self.visitNull(name); + } +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::QueryTermSimple &obj) +{ + visit(self, name, &obj); +} diff --git a/searchlib/src/vespa/searchlib/query/queryterm.h b/searchlib/src/vespa/searchlib/query/queryterm.h new file mode 100644 index 00000000000..5d8d971c9f8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/queryterm.h @@ -0,0 +1,190 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +#include "weight.h" + +namespace search +{ + +/// An stl based ucs4 string identical to a char string. +typedef vespalib::Array UCS4StringT; + +class QueryTermSimple +{ +public: + typedef std::unique_ptr UP; + typedef vespalib::string string; + typedef vespalib::stringref stringref; + enum SearchTerm { + WORD, + PREFIXTERM, + SUBSTRINGTERM, + EXACTSTRINGTERM, + SUFFIXTERM, + REGEXP + }; + + template + struct RangeResult { + N low; + N high; + bool valid; // Whether parsing of the range was successful + bool adjusted; // Whether the low and high was adjusted according to min and max limits of the given type. + RangeResult() : low(), high(), valid(true), adjusted(false) {} + bool isEqual() const { return low == high; } + }; + + QueryTermSimple(); + QueryTermSimple(const string & term_, SearchTerm type); + virtual ~QueryTermSimple() { } + /** + * Extracts the content of this query term as a range with low and high values. + */ + template + RangeResult getRange() const; + int getRangeLimit() const { return _rangeLimit; } + size_t getMaxPerGroup() const { return _maxPerGroup; } + size_t getDiversityCutoffGroups() const { return _diversityCutoffGroups; } + bool getDiversityCutoffStrict() const { return _diversityCutoffStrict; } + vespalib::stringref getDiversityAttribute() const { return _diversityAttribute; } + bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const; + bool getAsDoubleTerm(double & lower, double & upper) const; + const char * getTerm() const { return _term.c_str(); } + bool isPrefix() const { return (_type == PREFIXTERM); } + bool isSubstring() const { return (_type == SUBSTRINGTERM); } + bool isExactstring() const { return (_type == EXACTSTRINGTERM); } + bool isSuffix() const { return (_type == SUFFIXTERM); } + bool isWord() const { return (_type == WORD); } + bool isRegex() const { return (_type == REGEXP); } + bool empty() const { return _term.empty(); } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + vespalib::string getClassName() const; + bool isValid() const { return _valid; } +private: + bool getRangeInternal(int64_t & low, int64_t & high) const; + template + RangeResult getIntegerRange() const; + template + RangeResult getFloatRange() const; + SearchTerm _type; + string _term; + stringref _diversityAttribute; + int _rangeLimit; + uint32_t _maxPerGroup; + uint32_t _diversityCutoffGroups; + bool _diversityCutoffStrict; + bool _valid; + template + bool getAsNumericTerm(T & lower, T & upper, D d) const; +}; + +class QueryTermBase : public QueryTermSimple +{ +public: + typedef std::unique_ptr UP; + QueryTermBase(); + QueryTermBase(const string & term_, SearchTerm type); + size_t getTermLen() const { return _cachedTermLen; } + size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } + size_t term(const ucs4_t * & t) const { t = _termUCS4.begin(); return _cachedTermLen; } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +private: + size_t _cachedTermLen; + UCS4StringT _termUCS4; +}; + +/** + This is a leaf in the Query tree. All terms are leafs. + A QueryTerm has the index for where to find the term. The term is a string, + both char(utf8) and ucs4. There are flags indicating encoding. And there are + flags indicating if it should be considered a prefix. +*/ +class QueryTerm : public QueryTermBase, public QueryNode +{ +public: + typedef std::unique_ptr UP; + class EncodingBitMap + { + public: + EncodingBitMap(unsigned bm=0) : _enc(bm) { } + bool isFloat() const { return _enc & Float; } + bool isBase10Integer() const { return _enc & Base10Integer; } + bool isAscii7Bit() const { return _enc & Ascii7Bit; } + void setBase10Integer(bool v) { if (v) _enc |= Base10Integer; else _enc &= ~Base10Integer; } + void setAscii7Bit(bool v) { if (v) _enc |= Ascii7Bit; else _enc &= ~Ascii7Bit; } + void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; } + private: + enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 }; + unsigned _enc; + }; + class FieldInfo { + public: + FieldInfo() : _hitListOffset(0), _hitCount(0), _fieldLength(0) { } + FieldInfo(uint32_t hitListOffset, uint32_t hitCount, uint32_t fieldLength) : + _hitListOffset(hitListOffset), _hitCount(hitCount), _fieldLength(fieldLength) { } + size_t getHitOffset() const { return _hitListOffset; } + size_t getHitCount() const { return _hitCount; } + size_t getFieldLength() const { return _fieldLength; } + void setHitOffset(size_t v) { _hitListOffset = v; } + void setHitCount(size_t v) { _hitCount = v; } + void setFieldLength(size_t v) { _fieldLength = v; } + private: + uint32_t _hitListOffset; + uint32_t _hitCount; + uint32_t _fieldLength; + }; + DECLARE_IDENTIFIABLE_NS(search, QueryTerm); + QueryTerm(); + QueryTerm(const QueryNodeResultBase & org, const string & term, const string & index, SearchTerm type); + virtual ~QueryTerm() { } + virtual bool evaluate() const; + virtual const HitList & evaluateHits(HitList & hl) const; + virtual void reset(); + virtual void getLeafs(QueryTermList & tl); + virtual void getLeafs(ConstQueryTermList & tl) const; + /// Gives you all phrases of this tree. + virtual void getPhrases(QueryNodeRefList & tl); + /// Gives you all phrases of this tree. Indicating that they are all const. + virtual void getPhrases(ConstQueryNodeRefList & tl) const; + + void add(unsigned pos, unsigned context, int32_t weight); + EncodingBitMap encoding() const { return _encoding; } + size_t termLen() const { return getTermLen(); } + const string & index() const { return _index; } + void setWeight(query::Weight v) { _weight = v; } + void setUniqueId(uint32_t u) { _uniqueId = u; } + query::Weight weight() const { return _weight; } + uint32_t uniqueId() const { return _uniqueId; } + void resizeFieldId(size_t fieldId); + const FieldInfo & getFieldInfo(size_t fid) const { return _fieldInfo[fid]; } + FieldInfo & getFieldInfo(size_t fid) { return _fieldInfo[fid]; } + size_t getFieldInfoSize() const { return _fieldInfo.size(); } + const QueryNodeResultBase & getQueryItem() const { return *_result; } + QueryNodeResultBase & getQueryItem() { return *_result; } + const HitList & getHitList() const { return _hitList; } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual void setIndex(const string & index_) { _index = index_; } + virtual const string & getIndex() const { return _index; } +protected: + string _index; + EncodingBitMap _encoding; + QueryNodeResultBaseContainer _result; + HitList _hitList; +private: + query::Weight _weight; + uint32_t _uniqueId; + std::vector _fieldInfo; +}; + +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::QueryTermSimple &obj); +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::QueryTermSimple *obj); + diff --git a/searchlib/src/vespa/searchlib/query/tree/.gitignore b/searchlib/src/vespa/searchlib/query/tree/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt new file mode 100644 index 00000000000..3f7f5bdb3af --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_tree OBJECT + SOURCES + intermediate.cpp + intermediatenodes.cpp + querybuilder.cpp + stackdumpcreator.cpp + term.cpp + location.cpp + range.cpp + termnodes.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/query/tree/OWNERS b/searchlib/src/vespa/searchlib/query/tree/OWNERS new file mode 100644 index 00000000000..12b533ec610 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/OWNERS @@ -0,0 +1 @@ +havardpe diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h new file mode 100644 index 00000000000..ca58b6c1dce --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/customtypetermvisitor.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace query { + +template +class CustomTypeTermVisitor : public CustomTypeVisitor +{ +protected: + void visitChildren(Intermediate &n) { + for (size_t i = 0; i < n.getChildren().size(); ++i) { + n.getChildren()[i]->accept(*this); + } + } + +private: + virtual void visit(typename NodeTypes::And &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::AndNot &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::Equiv &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::Near &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::ONear &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::Or &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::Rank &n) { visitChildren(n); } + virtual void visit(typename NodeTypes::WeakAnd &n) { visitChildren(n); } + + // phrases and weighted set terms are conceptual leaf nodes and + // should be handled that way. +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h new file mode 100644 index 00000000000..ce1cff082b7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryvisitor.h" + +namespace search { +namespace query { + +/** + * By typedefing a (complete) set of subclasses to the query nodes in + * a traits class, you can get the CustomTypeVisitor to visit those + * types instead of their base classes. + * + * The traits class must define the following types: + * And, AndNot, Equiv, NumberTerm, Near, ONear, Or, + * Phrase, PrefixTerm, RangeTerm, Rank, StringTerm, SubstringTerm, + * SuffixTerm, WeakAnd, WeightedSetTerm, DotProduct, RegExpTerm + * + * See customtypevisitor_test.cpp for an example. + * + * Please note that your CustomTypeVisitor subclass should NOT + * implement any of the regular QueryVisitor member functions, as this + * would interfere with the routing. + */ +template +class CustomTypeVisitor : public QueryVisitor { +public: + virtual ~CustomTypeVisitor() {} + + virtual void visit(typename NodeTypes::And &) = 0; + virtual void visit(typename NodeTypes::AndNot &) = 0; + virtual void visit(typename NodeTypes::Equiv &) = 0; + virtual void visit(typename NodeTypes::NumberTerm &) = 0; + virtual void visit(typename NodeTypes::LocationTerm &) = 0; + virtual void visit(typename NodeTypes::Near &) = 0; + virtual void visit(typename NodeTypes::ONear &) = 0; + virtual void visit(typename NodeTypes::Or &) = 0; + virtual void visit(typename NodeTypes::Phrase &) = 0; + virtual void visit(typename NodeTypes::PrefixTerm &) = 0; + virtual void visit(typename NodeTypes::RangeTerm &) = 0; + virtual void visit(typename NodeTypes::Rank &) = 0; + virtual void visit(typename NodeTypes::StringTerm &) = 0; + virtual void visit(typename NodeTypes::SubstringTerm &) = 0; + virtual void visit(typename NodeTypes::SuffixTerm &) = 0; + virtual void visit(typename NodeTypes::WeakAnd &) = 0; + virtual void visit(typename NodeTypes::WeightedSetTerm &) = 0; + virtual void visit(typename NodeTypes::DotProduct &) = 0; + virtual void visit(typename NodeTypes::WandTerm &) = 0; + virtual void visit(typename NodeTypes::PredicateQuery &) = 0; + virtual void visit(typename NodeTypes::RegExpTerm &) = 0; + +private: + // Route QueryVisit requests to the correct custom type. + + typedef typename NodeTypes::And TAnd; + typedef typename NodeTypes::AndNot TAndNot; + typedef typename NodeTypes::Equiv TEquiv; + typedef typename NodeTypes::NumberTerm TNumberTerm; + typedef typename NodeTypes::LocationTerm TLocTrm; + typedef typename NodeTypes::Near TNear; + typedef typename NodeTypes::ONear TONear; + typedef typename NodeTypes::Or TOr; + typedef typename NodeTypes::Phrase TPhrase; + typedef typename NodeTypes::PrefixTerm TPrefixTerm; + typedef typename NodeTypes::RangeTerm TRangeTerm; + typedef typename NodeTypes::Rank TRank; + typedef typename NodeTypes::StringTerm TStringTerm; + typedef typename NodeTypes::SubstringTerm TSubstrTr; + typedef typename NodeTypes::SuffixTerm TSuffixTerm; + typedef typename NodeTypes::WeakAnd TWeakAnd; + typedef typename NodeTypes::WeightedSetTerm TWeightedSetTerm; + typedef typename NodeTypes::DotProduct TDotProduct; + typedef typename NodeTypes::WandTerm TWandTerm; + typedef typename NodeTypes::PredicateQuery TPredicateQuery; + typedef typename NodeTypes::RegExpTerm TRegExpTerm; + + virtual void visit(And &n) { visit(static_cast(n)); } + virtual void visit(AndNot &n) { visit(static_cast(n)); } + virtual void visit(Equiv &n) { visit(static_cast(n)); } + virtual void visit(NumberTerm &n) { visit(static_cast(n)); } + virtual void visit(LocationTerm &n) { visit(static_cast(n)); } + virtual void visit(Near &n) { visit(static_cast(n)); } + virtual void visit(ONear &n) { visit(static_cast(n)); } + virtual void visit(Or &n) { visit(static_cast(n)); } + virtual void visit(Phrase &n) { visit(static_cast(n)); } + virtual void visit(PrefixTerm &n) { visit(static_cast(n)); } + virtual void visit(RangeTerm &n) { visit(static_cast(n)); } + virtual void visit(Rank &n) { visit(static_cast(n)); } + virtual void visit(StringTerm &n) { visit(static_cast(n)); } + virtual void visit(SubstringTerm &n) { visit(static_cast(n)); } + virtual void visit(SuffixTerm &n) { visit(static_cast(n)); } + virtual void visit(WeakAnd &n) { visit(static_cast(n)); } + virtual void visit(WeightedSetTerm &n) + { visit(static_cast(n)); } + virtual void visit(DotProduct &n) { visit(static_cast(n)); } + virtual void visit(WandTerm &n) { visit(static_cast(n)); } + virtual void visit(PredicateQuery &n) + { visit(static_cast(n)); } + virtual void visit(RegExpTerm &n) { visit(static_cast(n)); } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp new file mode 100644 index 00000000000..2f4d6b35be7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/intermediate.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +namespace search { +namespace query { + +Intermediate::~Intermediate() { + for (size_t i = 0; i < _children.size(); ++i) { + delete _children[i]; + } +} + +Intermediate &Intermediate::append(Node::UP child) +{ + _children.push_back(child.release()); + return *this; +} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediate.h b/searchlib/src/vespa/searchlib/query/tree/intermediate.h new file mode 100644 index 00000000000..4ee6d30445e --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/intermediate.h @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace query { + +class Intermediate : public Node +{ + std::vector _children; + public: + typedef std::unique_ptr UP; + + Intermediate(const Intermediate & rhs) = delete; + Intermediate & operator = (const Intermediate & rhs) = delete; + + Intermediate() = default; + virtual ~Intermediate() = 0; + + const std::vector &getChildren() const { return _children; } + Intermediate &reserve(size_t sz) { _children.reserve(sz); return *this; } + Intermediate &append(Node::UP child); +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp new file mode 100644 index 00000000000..ba1485107b1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.cpp @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".intermediatenodes"); + +#include "intermediatenodes.h" + +namespace search { +namespace query { + +And::~And() {} + +AndNot::~AndNot() {} + +Or::~Or() {} + +WeakAnd::~WeakAnd() {} + +Equiv::~Equiv() {} + +Rank::~Rank() {} + +Near::~Near() {} + +ONear::~ONear() {} + +Phrase::~Phrase() {} + +WeightedSetTerm::~WeightedSetTerm() {} + +DotProduct::~DotProduct() {} + +WandTerm::~WandTerm() {} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h new file mode 100644 index 00000000000..29b0e8f8af7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/intermediatenodes.h @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "intermediate.h" +#include "querynodemixin.h" +#include "term.h" +#include +#include + +namespace search { +namespace query { + +class And : public QueryNodeMixin { +public: + virtual ~And() = 0; +}; + +//----------------------------------------------------------------------------- + +class AndNot : public QueryNodeMixin { +public: + virtual ~AndNot() = 0; +}; + +//----------------------------------------------------------------------------- + +class Or : public QueryNodeMixin { +public: + virtual ~Or() = 0; +}; + +//----------------------------------------------------------------------------- + +class WeakAnd : public QueryNodeMixin { + uint32_t _minHits; + vespalib::string _view; +public: + virtual ~WeakAnd() = 0; + + WeakAnd(uint32_t minHits, const vespalib::string & view) : _minHits(minHits), _view(view) {} + + uint32_t getMinHits() const { return _minHits; } + const vespalib::string & getView() const { return _view; } +}; + +//----------------------------------------------------------------------------- + +class Equiv : public QueryNodeMixin { +private: + int32_t _id; + Weight _weight; + int32_t _term_index; +public: + virtual ~Equiv() = 0; + + Equiv(int32_t id, Weight weight) + : _id(id), _weight(weight), _term_index(-1) + {} + void setTermIndex(int32_t term_index) { _term_index = term_index; } + + Weight getWeight() const { return _weight; } + int32_t getId() const { return _id; } + int32_t getTermIndex() const { return _term_index; } +}; + +//----------------------------------------------------------------------------- + +class Rank : public QueryNodeMixin { +public: + virtual ~Rank() = 0; +}; + +//----------------------------------------------------------------------------- + +class Near : public QueryNodeMixin +{ + uint32_t _distance; + + public: + Near(size_t distance) : _distance(distance) {} + virtual ~Near() = 0; + + size_t getDistance() const { return _distance; } +}; + +//----------------------------------------------------------------------------- + +class ONear : public QueryNodeMixin +{ + uint32_t _distance; + + public: + ONear(size_t distance) : _distance(distance) {} + virtual ~ONear() = 0; + + size_t getDistance() const { return _distance; } +}; + +//----------------------------------------------------------------------------- + +class Phrase : public QueryNodeMixin, public Term { +public: + Phrase(const vespalib::string &view, int32_t id, Weight weight) + : Term(view, id, weight) {} + virtual ~Phrase() = 0; +}; + +class WeightedSetTerm : public QueryNodeMixin, public Term { +public: + WeightedSetTerm(const vespalib::string &view, int32_t id, Weight weight) + : Term(view, id, weight) {} + virtual ~WeightedSetTerm() = 0; +}; + +class DotProduct : public QueryNodeMixin, public Term { +public: + DotProduct(const vespalib::string &view, int32_t id, Weight weight) + : Term(view, id, weight) {} + virtual ~DotProduct() = 0; +}; + +class WandTerm : public QueryNodeMixin, public Term { +private: + uint32_t _targetNumHits; + int64_t _scoreThreshold; + double _thresholdBoostFactor; +public: + WandTerm(const vespalib::string &view, int32_t id, Weight weight, + uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) + : Term(view, id, weight), + _targetNumHits(targetNumHits), + _scoreThreshold(scoreThreshold), + _thresholdBoostFactor(thresholdBoostFactor) {} + virtual ~WandTerm() = 0; + uint32_t getTargetNumHits() const { return _targetNumHits; } + int64_t getScoreThreshold() const { return _scoreThreshold; } + double getThresholdBoostFactor() const { return _thresholdBoostFactor; } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/location.cpp b/searchlib/src/vespa/searchlib/query/tree/location.cpp new file mode 100644 index 00000000000..9e8a5d59147 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/location.cpp @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "location.h" +#include "point.h" +#include "rectangle.h" + +using vespalib::asciistream; + +namespace search { +namespace query { + +Location::Location(const Point &point, uint32_t max_dist, uint32_t x_aspect) { + asciistream loc; + loc << "(2" // dimensionality + << "," << point.x + << "," << point.y + << "," << max_dist + << "," << "0" // table id. + << "," << "1" // rank multiplier. + << "," << "0" // rank only on distance. + << "," << x_aspect // x aspect. + << ")"; + _location_string = loc.str(); +} + +Location::Location(const Rectangle &rect, + const Point &point, uint32_t max_dist, uint32_t x_aspect) +{ + asciistream loc; + loc << "(2" // dimensionality + << "," << point.x + << "," << point.y + << "," << max_dist + << "," << "0" // table id. + << "," << "1" // rank multiplier. + << "," << "0" // rank only on distance. + << "," << x_aspect // x aspect. + << ")"; + loc << "[2," << rect.left + << "," << rect.top + << "," << rect.right + << "," << rect.bottom + << "]" ; + _location_string = loc.str(); + +} + + +Location::Location(const Rectangle &rect) { + asciistream loc; + loc << "[2," << rect.left + << "," << rect.top + << "," << rect.right + << "," << rect.bottom + << "]" ; + _location_string = loc.str(); +} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/location.h b/searchlib/src/vespa/searchlib/query/tree/location.h new file mode 100644 index 00000000000..8941fcf0b0e --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/location.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace query { +class Point; +class Rectangle; + +class Location { + vespalib::string _location_string; + +public: + Location() : _location_string() {} + Location(const Point &p, uint32_t dist, uint32_t x_asp); + Location(const Rectangle &rect); + Location(const Rectangle &rect, + const Point &p, uint32_t dist, uint32_t x_asp); + Location(const vespalib::string &s) : _location_string(s) {} + + bool operator==(const Location &other) const { + return _location_string == other._location_string; + } + const vespalib::string &getLocationString() const + { return _location_string; } +}; + +inline vespalib::asciistream &operator<<(vespalib::asciistream &out, const Location &loc) { + return out << loc.getLocationString(); +} + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/node.h b/searchlib/src/vespa/searchlib/query/tree/node.h new file mode 100644 index 00000000000..5af76c4e7ef --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/node.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace query { + +class QueryVisitor; + +/** + This is the base of any node in the query tree. Both leaf nodes (terms) + and operator nodes (AND, NOT, OR, PHRASE, NEAR, ONEAR, etc). +*/ +class Node { + public: + typedef std::unique_ptr UP; + + virtual ~Node() {} + + virtual void accept(QueryVisitor &visitor) = 0; +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/point.h b/searchlib/src/vespa/searchlib/query/tree/point.h new file mode 100644 index 00000000000..8490ae1ec20 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/point.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace query { + +struct Point { + int64_t x; + int64_t y; + Point() : x(0), y(0) {} + Point(int64_t x_in, int64_t y_in) : x(x_in), y(y_in) {} +}; + +inline bool operator==(const Point &p1, const Point &p2) { + return p1.x == p2.x && p1.y == p2.y; +} + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h new file mode 100644 index 00000000000..6868b039307 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/predicate_query_term.h @@ -0,0 +1,76 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace query { + +/** + * Represents a predicate query, with features and range features. + */ +class PredicateQueryTerm { + static const uint64_t ALL_SUB_QUERIES = 0xffffffffffffffffULL; + + template + class Entry { + vespalib::string _key; + ValueType _value; + uint64_t _sub_query_bitmap; + + public: + Entry(const vespalib::string &key, const ValueType &value, + uint64_t sub_query_bitmap = ALL_SUB_QUERIES) + : _key(key), _value(value), _sub_query_bitmap(sub_query_bitmap) {} + + vespalib::string getKey() const { return _key; } + ValueType getValue() const { return _value; } + uint64_t getSubQueryBitmap() const { return _sub_query_bitmap; } + bool operator==(const Entry &other) const { + return _key == other._key + && _value == other._value + && _sub_query_bitmap == other._sub_query_bitmap; + } + }; + + std::vector> _features; + std::vector> _range_features; + +public: + typedef std::unique_ptr UP; + + PredicateQueryTerm() : _features(), _range_features() {} + + PredicateQueryTerm(const std::vector> &features, + const std::vector> &range_features) + : _features(features), + _range_features(range_features) { + } + + void addFeature(const vespalib::string &key, const vespalib::string &value, + uint64_t sub_query_bitmask = ALL_SUB_QUERIES) { + _features.emplace_back(key, value, sub_query_bitmask); + } + + void addRangeFeature(const vespalib::string &key, uint64_t value, + uint64_t sub_query_bitmask = ALL_SUB_QUERIES) { + _range_features.emplace_back(key, value, sub_query_bitmask); + } + + const std::vector> &getFeatures() const + { return _features; } + const std::vector> &getRangeFeatures() const + { return _range_features; } + + bool operator==(const PredicateQueryTerm &other) const { + return _features == other._features + && _range_features == other._range_features; + } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp b/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp new file mode 100644 index 00000000000..ae8c2012049 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.cpp @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".querybuilder"); + +#include "querybuilder.h" + +#include "intermediate.h" + +using vespalib::string; +using namespace search::query; + +void QueryBuilderBase::reportError(const vespalib::string &msg) { + if (!hasError()) { + _error_msg = msg; + } +} + +QueryBuilderBase::QueryBuilderBase() + : _root(), + _nodes(), + _error_msg() { +} + +QueryBuilderBase::~QueryBuilderBase() { + reset(); +} + +void QueryBuilderBase::addCompleteNode(Node *n) +{ + Node::UP node(n); + + if (hasError()) { + return; + } + if (_nodes.empty()) { + if (!_root.get()) { + _root = std::move(node); + return; + } + reportError("QueryBuilder got invalid node structure."); + return; + } + + assert(_nodes.top().remaining_child_count > 0); + _nodes.top().node->append(std::move(node)); + if (--_nodes.top().remaining_child_count == 0) { + Node *completed(_nodes.top().node); + _nodes.pop(); + addCompleteNode(completed); + } +} + +void QueryBuilderBase::addIntermediateNode(Intermediate *n, int child_count) +{ + Intermediate::UP node(n); + if (!hasError()) { + if (_root.get()) { + reportError("QueryBuilder got invalid node structure."); + } else { + node->reserve(child_count); + WeightOverride weight_override; + if (!_nodes.empty()) { + weight_override = _nodes.top().weight_override; + } + _nodes.push(NodeInfo(node.release(), child_count)); + _nodes.top().weight_override = weight_override; + if (child_count == 0) { + Node *completed(_nodes.top().node); + _nodes.pop(); + addCompleteNode(completed); + } + } + } +} + +void QueryBuilderBase::setWeightOverride(const Weight &weight) { + assert(!_nodes.empty()); + _nodes.top().weight_override = WeightOverride(weight); +} + +Node::UP QueryBuilderBase::build() { + if (!_root.get()) { + reportError("Trying to build incomplete query tree."); + } + if (!_nodes.empty()) { + reportError("QueryBuilder got invalid node structure."); + } + if (hasError()) { + return Node::UP(); + } + return std::move(_root); +} + +void QueryBuilderBase::reset() { + while (!_nodes.empty()) { + delete _nodes.top().node; + _nodes.pop(); + } + _root.reset(0); + _error_msg = ""; +} diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h new file mode 100644 index 00000000000..b5cbdb07a13 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h @@ -0,0 +1,358 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * The QueryBuilder builds a query tree. The exact type of the nodes + * in the tree is defined by a traits class, which defines the actual + * subclasses of the query nodes to use. Simple subclasses are defined + * in "simplequery.h" + * + * To create a QueryBuilder that uses the simple query nodes, create + * the builder like this: + * + * QueryBuilder builder; + * + * Query trees are built using prefix traversal, e.g: + * builder.addOr(2); // Two children + * builder.addStringTerm(term, view, id, weight); + * builder.addStringTerm(term, view, id, weight); + * Node::UP node = builder.build(); + */ + +#pragma once + +#include "predicate_query_term.h" +#include +#include +#include +#include "node.h" + +namespace search { +namespace query { + +class Intermediate; +class Location; +class Range; + +class QueryBuilderBase +{ + class WeightOverride { + bool _active; + Weight _weight; + public: + WeightOverride() : _active(false), _weight(0) {} + WeightOverride(Weight weight) : _active(true), _weight(weight) {} + void adjustWeight(Weight &weight) const { if (_active) weight = _weight; } + }; + struct NodeInfo { + Intermediate *node; + int remaining_child_count; + WeightOverride weight_override; + NodeInfo(Intermediate *n, int c) : node(n), remaining_child_count(c) {} + }; + Node::UP _root; + std::stack _nodes; + vespalib::string _error_msg; + + void reportError(const vespalib::string &msg); + +protected: + QueryBuilderBase(); + ~QueryBuilderBase(); + + // Takes ownership of node. + void addCompleteNode(Node *node); + // Takes ownership of node. + void addIntermediateNode(Intermediate *node, int child_count); + // Activates a weight override for the current intermediate node. + void setWeightOverride(const Weight &weight); + // Resets weight if a weight override is active. + void adjustWeight(Weight &weight) const { + if (!_nodes.empty()) { + _nodes.top().weight_override.adjustWeight(weight); + } + } + +public: + /** + * Builds the query tree. Returns 0 if something went wrong. + */ + Node::UP build(); + + /** + * Checks if an error has occurred. + */ + bool hasError() const { return !_error_msg.empty(); } + + /** + * If build failed, the reason is stored here. + */ + vespalib::string error() { return _error_msg; } + + /** + * After an error, reset() must be called before attempting to + * build a new query tree with the same builder. + */ + void reset(); +}; + + +// These template functions create nodes based on a traits class. +// You may specialize these functions for your own traits class to have full +// control of the query node instantiation. + +// Intermediate nodes +template +typename NodeTypes::And *createAnd() { return new typename NodeTypes::And; } + +template +typename NodeTypes::AndNot * +createAndNot() { return new typename NodeTypes::AndNot; } + +template +typename NodeTypes::Or *createOr() { return new typename NodeTypes::Or; } + +template +typename NodeTypes::WeakAnd *createWeakAnd(uint32_t minHits, const vespalib::stringref & view) { + return new typename NodeTypes::WeakAnd(minHits, view); +} +template +typename NodeTypes::Equiv *createEquiv(int32_t id, Weight weight) { + return new typename NodeTypes::Equiv(id, weight); +} +template +typename NodeTypes::Phrase *createPhrase( + const vespalib::stringref &view, int32_t id, Weight weight) { + return new typename NodeTypes::Phrase(view, id, weight); +} +template +typename NodeTypes::WeightedSetTerm *createWeightedSetTerm( + const vespalib::stringref &view, int32_t id, Weight weight) { + return new typename NodeTypes::WeightedSetTerm(view, id, weight); +} +template +typename NodeTypes::DotProduct *createDotProduct( + const vespalib::stringref &view, int32_t id, Weight weight) { + return new typename NodeTypes::DotProduct(view, id, weight); +} +template +typename NodeTypes::WandTerm *createWandTerm( + const vespalib::stringref &view, int32_t id, Weight weight, + uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) { + return new typename NodeTypes::WandTerm(view, id, weight, + targetNumHits, scoreThreshold, thresholdBoostFactor); +} +template +typename NodeTypes::Rank *createRank() { return new typename NodeTypes::Rank; } + +template +typename NodeTypes::Near *createNear(size_t distance) { + return new typename NodeTypes::Near(distance); +} +template +typename NodeTypes::ONear *createONear(size_t distance) { + return new typename NodeTypes::ONear(distance); +} + +// Term nodes +template +typename NodeTypes::NumberTerm *createNumberTerm( + const vespalib::stringref &term, const vespalib::stringref &view, int32_t id, Weight weight) +{ + return new typename NodeTypes::NumberTerm(term, view, id, weight); +} +template +typename NodeTypes::PrefixTerm *createPrefixTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::PrefixTerm(term, view, id, weight); +} +template +typename NodeTypes::RangeTerm *createRangeTerm( + const Range &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::RangeTerm(term, view, id, weight); +} +template +typename NodeTypes::StringTerm *createStringTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::StringTerm(term, view, id, weight); +} +template +typename NodeTypes::SubstringTerm *createSubstringTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::SubstringTerm(term, view, id, weight); +} +template +typename NodeTypes::SuffixTerm *createSuffixTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::SuffixTerm(term, view, id, weight); +} + +template +typename NodeTypes::LocationTerm *createLocationTerm( + const Location &loc, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::LocationTerm(loc, view, id, weight); +} + +template +typename NodeTypes::PredicateQuery *createPredicateQuery( + PredicateQueryTerm::UP term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::PredicateQuery( + std::move(term), view, id, weight); +} + +template +typename NodeTypes::RegExpTerm *createRegExpTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) +{ + return new typename NodeTypes::RegExpTerm(term, view, id, weight); +} + +template +class QueryBuilder : public QueryBuilderBase { + template + T &addIntermediate(T *node, int child_count) { + addIntermediateNode(node, child_count); + return *node; + } + + template + T &addTerm(T *node) { + addCompleteNode(node); + return *node; + } + +public: + typename NodeTypes::And &addAnd(int child_count) { + return addIntermediate(createAnd(), child_count); + } + typename NodeTypes::AndNot &addAndNot(int child_count) { + return addIntermediate(createAndNot(), child_count); + } + typename NodeTypes::Near &addNear(int child_count, size_t distance) { + return addIntermediate(createNear(distance), child_count); + } + typename NodeTypes::ONear &addONear(int child_count, size_t distance) { + return addIntermediate(createONear(distance), child_count); + } + typename NodeTypes::Or &addOr(int child_count) { + return addIntermediate(createOr(), child_count); + } + typename NodeTypes::WeakAnd &addWeakAnd(int child_count, uint32_t minHits, const vespalib::stringref & view) { + return addIntermediate(createWeakAnd(minHits, view), child_count); + } + typename NodeTypes::Equiv &addEquiv(int child_count, int32_t id, Weight weight) { + return addIntermediate(createEquiv(id, weight), child_count); + } + typename NodeTypes::Phrase &addPhrase( + int child_count, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + typename NodeTypes::Phrase &node = addIntermediate( + createPhrase(view, id, weight), child_count); + setWeightOverride(weight); + return node; + } + typename NodeTypes::WeightedSetTerm &addWeightedSetTerm( + int child_count, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + typename NodeTypes::WeightedSetTerm &node = addIntermediate( + createWeightedSetTerm(view, id, weight), child_count); + return node; + } + typename NodeTypes::DotProduct &addDotProduct( + int child_count, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + typename NodeTypes::DotProduct &node = addIntermediate( + createDotProduct(view, id, weight), child_count); + return node; + } + typename NodeTypes::WandTerm &addWandTerm( + int child_count, const vespalib::stringref &view, + int32_t id, Weight weight, uint32_t targetNumHits, + int64_t scoreThreshold, double thresholdBoostFactor) { + adjustWeight(weight); + typename NodeTypes::WandTerm &node = addIntermediate( + createWandTerm(view, id, weight, + targetNumHits, scoreThreshold, thresholdBoostFactor), + child_count); + return node; + } + typename NodeTypes::Rank &addRank(int child_count) { + return addIntermediate(createRank(), child_count); + } + + typename NodeTypes::NumberTerm &addNumberTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createNumberTerm(term, view, id, weight)); + } + typename NodeTypes::PrefixTerm &addPrefixTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createPrefixTerm(term, view, id, weight)); + } + typename NodeTypes::RangeTerm &addRangeTerm( + const Range &range, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createRangeTerm(range, view, id, weight)); + } + typename NodeTypes::StringTerm &addStringTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createStringTerm(term, view, id, weight)); + } + typename NodeTypes::SubstringTerm &addSubstringTerm( + const vespalib::stringref &t, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createSubstringTerm(t, view, id, weight)); + } + typename NodeTypes::SuffixTerm &addSuffixTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createSuffixTerm(term, view, id, weight)); + } + typename NodeTypes::LocationTerm &addLocationTerm( + const Location &loc, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createLocationTerm(loc, view, id, weight)); + } + typename NodeTypes::PredicateQuery &addPredicateQuery( + PredicateQueryTerm::UP term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createPredicateQuery( + std::move(term), view, id, weight)); + } + typename NodeTypes::RegExpTerm &addRegExpTerm( + const vespalib::stringref &term, const vespalib::stringref &view, + int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createRegExpTerm(term, view, id, weight)); + } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h new file mode 100644 index 00000000000..7fbcb45d742 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/querynodemixin.h @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace query { + +template +struct QueryNodeMixin : Base { + typedef QueryNodeMixin QueryNodeMixinType; + + virtual ~QueryNodeMixin() = 0; + virtual void accept(QueryVisitor &visitor) { + visitor.visit(static_cast(*this)); + } + +protected: + using Base::Base; +}; + +template +QueryNodeMixin::~QueryNodeMixin() {} + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h new file mode 100644 index 00000000000..38f666cb155 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h @@ -0,0 +1,171 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "intermediatenodes.h" +#include "querybuilder.h" +#include "queryvisitor.h" +#include "termnodes.h" + +namespace search { +namespace query { + +/** + * Creates a new query tree based on an existing one. The traits class + * specifies what concrete types the query tree classes should have. + */ +template +class QueryReplicator : private QueryVisitor { + QueryBuilder _builder; + +public: + Node::UP replicate(const Node &node) { + // The visitor doesn't deal with const nodes. However, we are + // not changing the node, so we can safely remove the const. + const_cast(node).accept(*this); + return _builder.build(); + } + +private: + void visitNodes(const std::vector &nodes) { + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i]->accept(*this); + } + } + + virtual void visit(And &node) { + _builder.addAnd(node.getChildren().size()); + visitNodes(node.getChildren()); + } + + virtual void visit(AndNot &node) { + _builder.addAndNot(node.getChildren().size()); + visitNodes(node.getChildren()); + } + + virtual void visit(WeakAnd &node) { + _builder.addWeakAnd(node.getChildren().size(), node.getMinHits(), node.getView()); + visitNodes(node.getChildren()); + } + + virtual void visit(Equiv &node) { + _builder.addEquiv(node.getChildren().size(), node.getId(), node.getWeight()) + .setTermIndex(node.getTermIndex()); + visitNodes(node.getChildren()); + } + + virtual void visit(Near &node) { + _builder.addNear(node.getChildren().size(), node.getDistance()); + visitNodes(node.getChildren()); + } + + virtual void visit(ONear &node) { + _builder.addONear(node.getChildren().size(), node.getDistance()); + visitNodes(node.getChildren()); + } + + virtual void visit(Or &node) { + _builder.addOr(node.getChildren().size()); + visitNodes(node.getChildren()); + } + + virtual void visit(Phrase &node) { + replicate(node, _builder.addPhrase(node.getChildren().size(), + node.getView(), + node.getId(), node.getWeight())); + visitNodes(node.getChildren()); + } + + virtual void visit(WeightedSetTerm &node) { + replicate(node, _builder.addWeightedSetTerm(node.getChildren().size(), + node.getView(), + node.getId(), node.getWeight())); + visitNodes(node.getChildren()); + } + + virtual void visit(DotProduct &node) { + replicate(node, _builder.addDotProduct(node.getChildren().size(), + node.getView(), + node.getId(), node.getWeight())); + visitNodes(node.getChildren()); + } + + virtual void visit(WandTerm &node) { + replicate(node, _builder.addWandTerm(node.getChildren().size(), + node.getView(), + node.getId(), node.getWeight(), + node.getTargetNumHits(), + node.getScoreThreshold(), + node.getThresholdBoostFactor())); + visitNodes(node.getChildren()); + } + + virtual void visit(Rank &node) { + _builder.addRank(node.getChildren().size()); + visitNodes(node.getChildren()); + } + + void replicate(const Term &original, Term &replica) { + replica.setTermIndex(original.getTermIndex()); + replica.setRanked(original.isRanked()); + } + + virtual void visit(NumberTerm &node) { + replicate(node, _builder.addNumberTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(LocationTerm &node) { + replicate(node,_builder.addLocationTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(PrefixTerm &node) { + replicate(node, _builder.addPrefixTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(RangeTerm &node) { + replicate(node, _builder.addRangeTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(StringTerm &node) { + replicate(node, _builder.addStringTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(SubstringTerm &node) { + replicate(node, _builder.addSubstringTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(SuffixTerm &node) { + replicate(node, _builder.addSuffixTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } + + virtual void visit(PredicateQuery &node) { + replicate(node, _builder.addPredicateQuery( + PredicateQueryTerm::UP(new PredicateQueryTerm( + *node.getTerm())), + node.getView(), node.getId(), node.getWeight())); + } + + virtual void visit(RegExpTerm &node) { + replicate(node, _builder.addRegExpTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h b/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h new file mode 100644 index 00000000000..f7e997e82f4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/querytreecreator.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryreplicator.h" +#include "stackdumpquerycreator.h" + +namespace search { +namespace query { + +/** + * Holds functions for creating query trees, either from a stack dump + * or from another query tree. The traits specify the concrete + * subclasses to be used when building the tree. + */ +template +struct QueryTreeCreator { + static Node::UP replicate(const Node &node) { + return QueryReplicator().replicate(node); + } + + static Node::UP create(search::SimpleQueryStackDumpIterator &iterator) { + return StackDumpQueryCreator().create(iterator); + } + +private: + QueryTreeCreator(); +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h new file mode 100644 index 00000000000..dc24dc9e8f6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace query { + +class And; +class AndNot; +class Equiv; +class NumberTerm; +class LocationTerm; +class Near; +class ONear; +class Or; +class Phrase; +class PrefixTerm; +class RangeTerm; +class Rank; +class StringTerm; +class SubstringTerm; +class SuffixTerm; +class WeakAnd; +class WeightedSetTerm; +class DotProduct; +class WandTerm; +class PredicateQuery; +class RegExpTerm; + +struct QueryVisitor { + virtual ~QueryVisitor() {} + + virtual void visit(And &) = 0; + virtual void visit(AndNot &) = 0; + virtual void visit(Equiv &) = 0; + virtual void visit(NumberTerm &) = 0; + virtual void visit(LocationTerm &) = 0; + virtual void visit(Near &) = 0; + virtual void visit(ONear &) = 0; + virtual void visit(Or &) = 0; + virtual void visit(Phrase &) = 0; + virtual void visit(PrefixTerm &) = 0; + virtual void visit(RangeTerm &) = 0; + virtual void visit(Rank &) = 0; + virtual void visit(StringTerm &) = 0; + virtual void visit(SubstringTerm &) = 0; + virtual void visit(SuffixTerm &) = 0; + virtual void visit(WeakAnd &) = 0; + virtual void visit(WeightedSetTerm &) = 0; + virtual void visit(DotProduct &) = 0; + virtual void visit(WandTerm &) = 0; + virtual void visit(PredicateQuery &) = 0; + virtual void visit(RegExpTerm &) = 0; +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/range.cpp b/searchlib/src/vespa/searchlib/query/tree/range.cpp new file mode 100644 index 00000000000..0b516d3f73c --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/range.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "range.h" +#include +#include + +namespace search { +namespace query { + +Range::Range(int64_t f, int64_t t) +{ + vespalib::asciistream ost; + ost << "[" << f << ";" << t << "]"; + _range = ost.str(); +} + +vespalib::asciistream &operator<<(vespalib::asciistream &out, const Range &range) +{ + return out << range.getRangeString(); +} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/range.h b/searchlib/src/vespa/searchlib/query/tree/range.h new file mode 100644 index 00000000000..39a0776ca7d --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/range.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace query { + +class Range { + vespalib::string _range; + +public: + Range() : _range() {} + Range(int64_t f, int64_t t); + Range(const vespalib::string &range) : _range(range) {} + + const vespalib::string & getRangeString() const { return _range; } +}; + +inline bool operator==(const Range &r1, const Range &r2) { + return r1.getRangeString() == r2.getRangeString(); +} + +vespalib::asciistream &operator<<(vespalib::asciistream &out, const Range &range); + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/rectangle.h b/searchlib/src/vespa/searchlib/query/tree/rectangle.h new file mode 100644 index 00000000000..faf2ca4b4d0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/rectangle.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { +namespace query { + +struct Rectangle { + int64_t left; + int64_t top; + int64_t right; + int64_t bottom; + + Rectangle() : left(0), top(0), right(0), bottom(0) {} + Rectangle(int64_t l, int64_t t, int64_t r, int64_t b) + : left(l), top(t), right(r), bottom(b) {} +}; + +inline bool operator==(const Rectangle &r1, const Rectangle &r2) { + return r1.left == r2.left && r1.right == r2.right + && r1.top == r2.top && r1.bottom == r2.bottom; +} + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h new file mode 100644 index 00000000000..e0f66d70f28 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h @@ -0,0 +1,132 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* + * This file defines a set of subclasses to the query nodes, and a + * traits class to make them easy to use with the query builder. These + * subclasses don't add any extra information to the abstract nodes. + */ + +#pragma once + +#include +#include "intermediatenodes.h" +#include "termnodes.h" + +namespace search { +namespace query { + +struct SimpleAnd : And {}; +struct SimpleAndNot : AndNot {}; +struct SimpleNear : Near { SimpleNear(size_t dist) : Near(dist) {} }; +struct SimpleONear : ONear { SimpleONear(size_t dist) : ONear(dist) {} }; +struct SimpleOr : Or {}; +struct SimpleWeakAnd : WeakAnd { + SimpleWeakAnd(uint32_t minHits, const vespalib::stringref & view) : + WeakAnd(minHits, view) + {} +}; +struct SimpleEquiv : Equiv { + SimpleEquiv(int32_t id, Weight weight) + : Equiv(id, weight) {} +}; +struct SimplePhrase : Phrase { + SimplePhrase(const vespalib::stringref &view, int32_t id, Weight weight) + : Phrase(view, id, weight) {} +}; +struct SimpleWeightedSetTerm : WeightedSetTerm { + SimpleWeightedSetTerm(const vespalib::stringref &view, int32_t id, Weight weight) + : WeightedSetTerm(view, id, weight) {} +}; +struct SimpleDotProduct : DotProduct { + SimpleDotProduct(const vespalib::stringref &view, int32_t id, Weight weight) + : DotProduct(view, id, weight) {} +}; +struct SimpleWandTerm : WandTerm { + SimpleWandTerm(const vespalib::stringref &view, int32_t id, Weight weight, + uint32_t targetNumHits, int64_t scoreThreshold, double thresholdBoostFactor) + : WandTerm(view, id, weight, targetNumHits, scoreThreshold, thresholdBoostFactor) {} +}; +struct SimpleRank : Rank {}; +struct SimpleNumberTerm : NumberTerm { + SimpleNumberTerm(Type term, const vespalib::stringref &view, + int32_t id, Weight weight) + : NumberTerm(term, view, id, weight) { + } +}; +struct SimpleLocationTerm : LocationTerm { + SimpleLocationTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : LocationTerm(term, view, id, weight) { + } +}; +struct SimplePrefixTerm : PrefixTerm { + SimplePrefixTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : PrefixTerm(term, view, id, weight) { + } +}; +struct SimpleRangeTerm : RangeTerm { + SimpleRangeTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : RangeTerm(term, view, id, weight) { + } +}; +struct SimpleStringTerm : StringTerm { + SimpleStringTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : StringTerm(term, view, id, weight) { + } +}; +struct SimpleSubstringTerm : SubstringTerm { + SimpleSubstringTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : SubstringTerm(term, view, id, weight) { + } +}; +struct SimpleSuffixTerm : SuffixTerm { + SimpleSuffixTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : SuffixTerm(term, view, id, weight) { + } +}; +struct SimplePredicateQuery : PredicateQuery { + SimplePredicateQuery(PredicateQueryTerm::UP term, + const vespalib::stringref &view, + int32_t id, Weight weight) + : PredicateQuery(std::move(term), view, id, weight) { + } +}; +struct SimpleRegExpTerm : RegExpTerm { + SimpleRegExpTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : RegExpTerm(term, view, id, weight) { + } +}; + + +struct SimpleQueryNodeTypes { + typedef SimpleAnd And; + typedef SimpleAndNot AndNot; + typedef SimpleEquiv Equiv; + typedef SimpleNumberTerm NumberTerm; + typedef SimpleLocationTerm LocationTerm; + typedef SimpleNear Near; + typedef SimpleONear ONear; + typedef SimpleOr Or; + typedef SimplePhrase Phrase; + typedef SimplePrefixTerm PrefixTerm; + typedef SimpleRangeTerm RangeTerm; + typedef SimpleRank Rank; + typedef SimpleStringTerm StringTerm; + typedef SimpleSubstringTerm SubstringTerm; + typedef SimpleSuffixTerm SuffixTerm; + typedef SimpleWeakAnd WeakAnd; + typedef SimpleWeightedSetTerm WeightedSetTerm; + typedef SimpleDotProduct DotProduct; + typedef SimpleWandTerm WandTerm; + typedef SimplePredicateQuery PredicateQuery; + typedef SimpleRegExpTerm RegExpTerm; +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp new file mode 100644 index 00000000000..b5d43176f36 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -0,0 +1,301 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".stackdumpcreator"); + +#include "stackdumpcreator.h" + +#include "intermediatenodes.h" +#include "queryvisitor.h" +#include "termnodes.h" +#include +#include +#include +#include + +using vespalib::string; +using std::vector; +using search::ParseItem; +using search::RawBuf; +using namespace search::query; + +namespace { +class QueryNodeConverter : public QueryVisitor { + RawBuf _buf; + + void visitNodes(const vector &nodes) { + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i]->accept(*this); + } + } + + void appendString(const string &s) { + _buf.preAlloc(sizeof(uint32_t) + s.size()); + _buf.appendCompressedPositiveNumber(s.size()); + _buf.append(s.data(), s.size()); + } + + void appendCompressedPositiveNumber(uint64_t n) { + _buf.appendCompressedPositiveNumber(n); + } + + void appendCompressedNumber(int64_t n) { + _buf.appendCompressedNumber(n); + } + + void appendInt(uint32_t i) { + _buf.preAlloc(sizeof(uint32_t)); + _buf.PutToInet(i); + } + + void appendLong(uint64_t l) { + _buf.preAlloc(sizeof(uint64_t)); + _buf.Put64ToInet(l); + } + + void appendByte(uint8_t i) { + _buf.preAlloc(sizeof(uint8_t)); + _buf.append(&i, sizeof(uint8_t)); + } + + void appendDouble(double i) { + _buf.preAlloc(sizeof(double)); + double nboVal = vespalib::nbostream::n2h(i); + _buf.append(&nboVal, sizeof(double)); + } + void append(const vespalib::string &s) { appendString(s); } + void append(uint64_t l) { appendLong(l); } + + template + void appendPredicateQueryTermVector(const V& v); + + void createIntermediate(const Intermediate &node, size_t type) { + appendByte(type); + appendCompressedPositiveNumber(node.getChildren().size()); + visitNodes(node.getChildren()); + } + + void createIntermediate(const Intermediate &node, size_t type, + size_t distance) { + appendByte(type); + appendCompressedPositiveNumber(node.getChildren().size()); + appendCompressedPositiveNumber(distance); + visitNodes(node.getChildren()); + } + + void createIntermediate(const Intermediate &node, size_t type, + size_t distance, + const vespalib::string & view) { + appendByte(type); + appendCompressedPositiveNumber(node.getChildren().size()); + appendCompressedPositiveNumber(distance); + appendString(view); + visitNodes(node.getChildren()); + } + + virtual void visit(And &node) { + createIntermediate(node, ParseItem::ITEM_AND); + } + + virtual void visit(AndNot &node) { + createIntermediate(node, ParseItem::ITEM_NOT); + } + + virtual void visit(Near &node) { + createIntermediate(node, ParseItem::ITEM_NEAR, node.getDistance()); + } + + virtual void visit(ONear &node) { + createIntermediate(node, ParseItem::ITEM_ONEAR, node.getDistance()); + } + + virtual void visit(Or &node) { + createIntermediate(node, ParseItem::ITEM_OR); + } + + virtual void visit(WeakAnd &node) { + createIntermediate(node, ParseItem::ITEM_WEAK_AND, node.getMinHits(), node.getView()); + } + + virtual void visit(Equiv &node) { + createIntermediate(node, ParseItem::ITEM_EQUIV); + } + + virtual void visit(Phrase &node) { + uint8_t typefield = (ParseItem::ITEM_PHRASE | ParseItem::IF_WEIGHT); + uint8_t flags = 0; + if (!node.isRanked()) { + flags |= ParseItem::IFLAG_NORANK; + } + if (!node.usePositionData()) { + flags |= ParseItem::IFLAG_NOPOSITIONDATA; + } + if (flags != 0) { + typefield |= ParseItem::IF_FLAGS; + } + appendByte(typefield); + appendCompressedNumber(node.getWeight().percent()); + if (typefield & ParseItem::IF_FLAGS) { + appendByte(flags); + } + appendCompressedPositiveNumber(node.getChildren().size()); + appendString(node.getView()); + visitNodes(node.getChildren()); + } + + template + void createWeightedSet(NODE &node, uint8_t typefield) { + uint8_t flags = 0; + if (!node.isRanked()) { + flags |= ParseItem::IFLAG_NORANK; + } + // usePositionData should not have any effect + // but is propagated anyway + if (!node.usePositionData()) { + flags |= ParseItem::IFLAG_NOPOSITIONDATA; + } + if (flags != 0) { + typefield |= ParseItem::IF_FLAGS; + } + appendByte(typefield); + appendCompressedNumber(node.getWeight().percent()); + if (typefield & ParseItem::IF_FLAGS) { + appendByte(flags); + } + appendCompressedPositiveNumber(node.getChildren().size()); + appendString(node.getView()); + } + + virtual void visit(WeightedSetTerm &node) { + createWeightedSet(node, ParseItem::ITEM_WEIGHTED_SET | ParseItem::IF_WEIGHT); + visitNodes(node.getChildren()); + } + + virtual void visit(DotProduct &node) { + createWeightedSet(node, ParseItem::ITEM_DOT_PRODUCT | ParseItem::IF_WEIGHT); + visitNodes(node.getChildren()); + } + + virtual void visit(WandTerm &node) { + createWeightedSet(node, ParseItem::ITEM_WAND | ParseItem::IF_WEIGHT); + appendCompressedPositiveNumber(node.getTargetNumHits()); + appendDouble(node.getScoreThreshold()); + appendDouble(node.getThresholdBoostFactor()); + visitNodes(node.getChildren()); + } + + virtual void visit(Rank &node) { + createIntermediate(node, ParseItem::ITEM_RANK); + } + + template void appendTerm(const TermBase &node); + + template + void createTerm(const Term &node, size_t type) { + uint8_t typefield = type | + ParseItem::IF_WEIGHT | + ParseItem::IF_UNIQUEID; + uint8_t flags = 0; + if (!node.isRanked()) { + flags |= ParseItem::IFLAG_NORANK; + } + if (!node.usePositionData()) { + flags |= ParseItem::IFLAG_NOPOSITIONDATA; + } + if (flags != 0) { + typefield |= ParseItem::IF_FLAGS; + } + appendByte(typefield); + appendCompressedNumber(node.getWeight().percent()); + appendCompressedPositiveNumber(node.getId()); + if (typefield & ParseItem::IF_FLAGS) { + appendByte(flags); + } + appendString(node.getView()); + appendTerm(node); + } + + virtual void visit(NumberTerm &node) { + createTerm(node, ParseItem::ITEM_NUMTERM); + } + + virtual void visit(LocationTerm &node) { + createTerm(node, ParseItem::ITEM_NUMTERM); + } + + virtual void visit(PrefixTerm &node) { + createTerm(node, ParseItem::ITEM_PREFIXTERM); + } + + virtual void visit(RangeTerm &node) { + createTerm(node, ParseItem::ITEM_NUMTERM); + } + + virtual void visit(StringTerm &node) { + createTerm(node, ParseItem::ITEM_TERM); + } + + virtual void visit(SubstringTerm &node) { + createTerm(node, ParseItem::ITEM_SUBSTRINGTERM); + } + + virtual void visit(SuffixTerm &node) { + createTerm(node, ParseItem::ITEM_SUFFIXTERM); + } + + virtual void visit(PredicateQuery &node) { + createTerm(node, ParseItem::ITEM_PREDICATE_QUERY); + } + + virtual void visit(RegExpTerm &node) { + createTerm(node, ParseItem::ITEM_REGEXP); + } + +public: + QueryNodeConverter() + : _buf(4096) + { + } + + string getStackDump() { + return string(_buf.GetDrainPos(), + _buf.GetDrainPos() + _buf.GetUsedLen()); + } +}; + +template +void QueryNodeConverter::appendTerm(const TermBase &node) { + vespalib::asciistream ost; + ost << node.getTerm(); + appendString(ost.str()); +} +template <> +void QueryNodeConverter::appendTerm(const TermBase &node) { + appendString(node.getTerm()); +} +template <> +void QueryNodeConverter::appendTerm( + const TermBase &node) { + const PredicateQueryTerm &term = *node.getTerm(); + appendPredicateQueryTermVector(term.getFeatures()); + appendPredicateQueryTermVector(term.getRangeFeatures()); +} +template +void QueryNodeConverter::appendPredicateQueryTermVector(const V& v) { + appendCompressedNumber(v.size()); + for (const auto &entry : v) { + append(entry.getKey()); + append(entry.getValue()); + append(entry.getSubQueryBitmap()); + } +} +} // namespace + +string StackDumpCreator::create(const Node &node) { + QueryNodeConverter converter; + const_cast(node).accept(converter); + return converter.getStackDump(); +} + +using namespace search::query; diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h new file mode 100644 index 00000000000..80bcd60df5d --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace query { + +class Node; + +struct StackDumpCreator { + // Creates a stack dump from a query tree. + static vespalib::string create(const Node &node); +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h new file mode 100644 index 00000000000..c3b10aae05d --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "node.h" +#include "querybuilder.h" +#include "term.h" +#include +#include +#include +#include + +namespace search { +namespace query { + +/** + * Creates a query tree from a stack dump. + */ +template +class StackDumpQueryCreator { +private: + /** + * If changing this class note: + * Note that this method must return a reference into the existing querystack. + * This is necessary to use the non-copying stringref noted in the create method. + */ + static vespalib::stringref readString( + SimpleQueryStackDumpIterator &queryStack, + void (SimpleQueryStackDumpIterator::*f)(const char **, + size_t *) const) + { + const char *p; + size_t len; + (queryStack.*f)(&p, &len); + return vespalib::stringref(p, len); + } + +public: + static Node::UP create(search::SimpleQueryStackDumpIterator &queryStack) + { + QueryBuilder builder; + + // Make sure that the life time of what pureTermView refers to exceeds that of pureTermView. + // Especially make sure that do not create any stack local objects like vespalib::string + // with smaller scope, that you refer with pureTermView. + vespalib::stringref pureTermView; + while (queryStack.next()) { + uint32_t arity = queryStack.getArity(); + uint32_t arg1 = queryStack.getArg1(); + double arg2 = queryStack.getArg2(); + double arg3 = queryStack.getArg3(); + ParseItem::ItemType type = queryStack.getType(); + Node::UP node; + Term *t = 0; + if (type == ParseItem::ITEM_AND) { + builder.addAnd(arity); + } else if (type == ParseItem::ITEM_RANK) { + builder.addRank(arity); + } else if (type == ParseItem::ITEM_OR) { + builder.addOr(arity); + } else if (type == ParseItem::ITEM_WORD_ALTERNATIVES) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + builder.addEquiv(arity, id, weight); + pureTermView = view; + } else if (type == ParseItem::ITEM_WEAK_AND) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + builder.addWeakAnd(arity, arg1, view); + pureTermView = view; + } else if (type == ParseItem::ITEM_EQUIV) { + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + builder.addEquiv(arity, id, weight); + } else if (type == ParseItem::ITEM_NEAR) { + builder.addNear(arity, arg1); + } else if (type == ParseItem::ITEM_ONEAR) { + builder.addONear(arity, arg1); + } else if (type == ParseItem::ITEM_PHRASE) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + t = &builder.addPhrase(arity, view, id, weight); + pureTermView = view; + } else if (type == ParseItem::ITEM_WEIGHTED_SET) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + t = &builder.addWeightedSetTerm(arity, view, id, weight); + pureTermView = vespalib::stringref(); + } else if (type == ParseItem::ITEM_DOT_PRODUCT) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + t = &builder.addDotProduct(arity, view, id, weight); + pureTermView = vespalib::stringref(); + } else if (type == ParseItem::ITEM_WAND) { + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + t = &builder.addWandTerm( + arity, view, id, weight, arg1, arg2, arg3); + pureTermView = vespalib::stringref(); + } else if (type == ParseItem::ITEM_NOT) { + builder.addAndNot(arity); + } else { + vespalib::stringref term = readString(queryStack, + &SimpleQueryStackDumpIterator::getTerm); + vespalib::stringref view = readString(queryStack, + &SimpleQueryStackDumpIterator::getIndexName); + int32_t id = queryStack.getUniqueId(); + Weight weight = queryStack.GetWeight(); + + if (type == ParseItem::ITEM_TERM) { + t = &builder.addStringTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_PURE_WEIGHTED_STRING) { + t = &builder.addStringTerm(term, pureTermView, id, weight); + } else if (type == ParseItem::ITEM_PURE_WEIGHTED_LONG) { + t = &builder.addNumberTerm(term, pureTermView, id, weight); + } else if (type == ParseItem::ITEM_PREFIXTERM) { + t = &builder.addPrefixTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_SUBSTRINGTERM) { + t = &builder.addSubstringTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_EXACTSTRINGTERM) { + t = &builder.addStringTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_SUFFIXTERM) { + t = &builder.addSuffixTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_NUMTERM) { + if (term[0] == '[' || term[0] == '<' || term[0] == '>') { + Range range(term); + t = &builder.addRangeTerm(range, view, id, weight); + } else if (term[0] == '(') { + Location loc(term); + t = &builder.addLocationTerm(loc, view, id, weight); + } else { + t = &builder.addNumberTerm(term, view, id, weight); + } + } else if (type == ParseItem::ITEM_PREDICATE_QUERY) { + t = &builder.addPredicateQuery( + queryStack.getPredicateQueryTerm(), + view, id, weight); + } else if (type == ParseItem::ITEM_REGEXP) { + t = &builder.addRegExpTerm(term, view, id, weight); + } else { + LOG(error, "Unable to create query tree from stack dump. " + "node type = %d.", type); + } + } + if (t) { + t->setTermIndex(queryStack.getTermIndex()); + if (queryStack.getFlags() & ParseItem::IFLAG_NORANK) { + t->setRanked(false); + } + if (queryStack.getFlags() & ParseItem::IFLAG_NOPOSITIONDATA) { + t->setPositionData(false); + } + } + } + if (builder.hasError()) { + LOG(error, "Unable to create query tree from stack dump. %s", + builder.error().c_str()); + } + return builder.build(); + } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h new file mode 100644 index 00000000000..fc11856f564 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace query { + +/** + * Use this class to visit all term nodes by deriving from this class + * and implementing a single template member function: + * template void visitTerm(TermType &n); + * + * This class uses the curiously recurring template pattern to know + * its own derived class that has the visitTerm template member + * function. + */ +template +class TemplateTermVisitor : public CustomTypeTermVisitor { + template + void myVisit(TermNode &n) { + static_cast(*this).template visitTerm(n); + } + + virtual void visit(typename NodeTypes::NumberTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::LocationTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::PrefixTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::RangeTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::StringTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::SubstringTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::SuffixTerm &n) { myVisit(n); } + virtual void visit(typename NodeTypes::PredicateQuery &n) { myVisit(n); } + virtual void visit(typename NodeTypes::RegExpTerm &n) { myVisit(n); } + + // Phrases are terms with children. This visitor will not visit + // the phrase's children, unless this member function is + // overridden to do so. + virtual void visit(typename NodeTypes::Phrase &n) { myVisit(n); } + + // WeightedSetTerms are terms with children. This visitor will not visit + // the weighted set's children, unless this member function is + // overridden to do so. + virtual void visit(typename NodeTypes::WeightedSetTerm &n) { myVisit(n); } + + // DotProducts have children. This visitor will not visit the dot + // product's children, unless this member function is overridden + // to do so. + virtual void visit(typename NodeTypes::DotProduct &n) { myVisit(n); } + + // WandTerms have children. This visitor will not visit the wand + // term's children, unless this member function is overridden + // to do so. + virtual void visit(typename NodeTypes::WandTerm &n) { myVisit(n); } +}; + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/term.cpp b/searchlib/src/vespa/searchlib/query/tree/term.cpp new file mode 100644 index 00000000000..a9bdf50962c --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/term.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".term"); + +#include "term.h" + +namespace search { +namespace query { + +Term::~Term() +{ +} + +Term::Term(const vespalib::stringref &view, int32_t id, Weight weight) : + _view(view), + _id(id), + _weight(weight), + _term_index(-1), + _ranked(true), + _position_data(true) +{ +} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/term.h b/searchlib/src/vespa/searchlib/query/tree/term.h new file mode 100644 index 00000000000..dafcc3976da --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/term.h @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search { +namespace query { + +/** + * This is a leaf in the Query tree. Sort of. Phrases are both terms + * and intermediate nodes. + */ +class Term +{ + vespalib::string _view; + int32_t _id; + Weight _weight; + int32_t _term_index; + bool _ranked; + bool _position_data; + +public: + virtual ~Term() = 0; + + void setTermIndex(int32_t term_index) { _term_index = term_index; } + void setRanked(bool ranked) { _ranked = ranked; } + void setPositionData(bool position_data) { _position_data = position_data; } + + void setStateFrom(const Term& other) { + setTermIndex(other.getTermIndex()); + setRanked(other.isRanked()); + setPositionData(other.usePositionData()); + // too late to copy this state: + assert(_view == other.getView()); + assert(_id == other.getId()); + assert(_weight == other.getWeight()); + } + + const vespalib::string & getView() const { return _view; } + Weight getWeight() const { return _weight; } + int32_t getId() const { return _id; } + int32_t getTermIndex() const { return _term_index; } + bool isRanked() const { return _ranked; } + bool usePositionData() const { return _position_data; } + +protected: + Term(const vespalib::stringref &view, int32_t id, Weight weight); +}; + +/** + * Generic functionality for most of Term's derived classes. + */ +template +class TermBase : public Node, public Term { + T _term; + +public: + typedef T Type; + + virtual ~TermBase() = 0; + const T &getTerm() const { return _term; } + +protected: + TermBase(T term, const vespalib::stringref &view, int32_t id, Weight weight) + : Term(view, id, weight), + _term(std::move(term)) { + } +}; + +template +TermBase::~TermBase() {} + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp new file mode 100644 index 00000000000..3da7aa59b7b --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".termnodes"); + +#include "termnodes.h" + +namespace search { +namespace query { + +NumberTerm::~NumberTerm() {} + +PrefixTerm::~PrefixTerm() {} + +RangeTerm::~RangeTerm() {} + +StringTerm::~StringTerm() {} + +SubstringTerm::~SubstringTerm() {} + +SuffixTerm::~SuffixTerm() {} + +LocationTerm::~LocationTerm() {} + +RegExpTerm::~RegExpTerm() {} + +} // namespace query +} // namespace search diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h new file mode 100644 index 00000000000..4e5c6ae0e49 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "location.h" +#include "predicate_query_term.h" +#include "querynodemixin.h" +#include "range.h" +#include "term.h" + +namespace search { +namespace query { + +typedef TermBase StringBase; + +class NumberTerm : public QueryNodeMixin +{ +public: + NumberTerm(Type term, const vespalib::stringref &view, int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) {} + virtual ~NumberTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class PrefixTerm : public QueryNodeMixin +{ +public: + PrefixTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~PrefixTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class RangeTerm : public QueryNodeMixin > +{ +public: + RangeTerm(const Type& term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~RangeTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class StringTerm : public QueryNodeMixin +{ +public: + StringTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) {} + virtual ~StringTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class SubstringTerm : + public QueryNodeMixin +{ + public: + SubstringTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~SubstringTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class SuffixTerm : public QueryNodeMixin +{ +public: + SuffixTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~SuffixTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class LocationTerm : public QueryNodeMixin > +{ +public: + LocationTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~LocationTerm() = 0; +}; + +//----------------------------------------------------------------------------- + +class PredicateQuery : public QueryNodeMixin > +{ +public: + PredicateQuery(PredicateQueryTerm::UP term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(std::move(term), view, id, weight) + {} +}; + +//----------------------------------------------------------------------------- + +class RegExpTerm : public QueryNodeMixin +{ +public: + RegExpTerm(const Type &term, const vespalib::stringref &view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~RegExpTerm() = 0; +}; + + +} // namespace query +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/query/weight.h b/searchlib/src/vespa/searchlib/query/weight.h new file mode 100644 index 00000000000..18c6a2edd78 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/weight.h @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace search { +namespace query { + +/** + * Represents the weight given on a query item such as a term, phrase, or equiv. + * Normally given and used as an integer percent value. + */ +class Weight +{ +private: + int32_t _weight; + +public: + /** + * constructor. + * @param value The initial weight in percent; should be 100 unless a specific value is set. + **/ + explicit Weight(int32_t value) : _weight(value) {} + + /** + * change the weight value. + * @param value The new weight value in percent. + **/ + void setPercent(int32_t value) { _weight = value; } + + /** + * retrieve the weight value. + * @return weight value in percent. + **/ + int32_t percent() const { return _weight; } + + /** + * retrieve the weight value as a multiplier. + * @return weight multiplier with 100 percent giving 1.0 as multiplier. + **/ + double multiplier() const { return 0.01 * _weight; } + + /** compare two weights */ + bool operator== (const Weight& other) const { return _weight == other._weight; } +}; + +} // namespace query +} // namespace search + +inline search::query::Weight operator+(const search::query::Weight& a, const search::query::Weight& b) +{ + return search::query::Weight(a.percent() + b.percent()); +} + diff --git a/searchlib/src/vespa/searchlib/queryeval/.gitignore b/searchlib/src/vespa/searchlib/queryeval/.gitignore new file mode 100644 index 00000000000..583460ae288 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/.gitignore @@ -0,0 +1,3 @@ +*.So +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt new file mode 100644 index 00000000000..56c77ed46cf --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt @@ -0,0 +1,54 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_queryeval + SOURCES + andnotsearch.cpp + andsearch.cpp + blueprint.cpp + booleanmatchiteratorwrapper.cpp + create_blueprint_visitor_helper.cpp + document_weight_search_iterator.cpp + dot_product_blueprint.cpp + dot_product_search.cpp + emptysearch.cpp + equiv_blueprint.cpp + equivsearch.cpp + fake_requestcontext.cpp + fake_result.cpp + fake_search.cpp + fake_searchable.cpp + field_spec.cpp + get_weight_from_node.cpp + hitcollector.cpp + intermediate_blueprints.cpp + isourceselector.cpp + iterator_pack.cpp + iterators.cpp + leaf_blueprints.cpp + monitoring_dump_iterator.cpp + monitoring_search_iterator.cpp + multibitvectoriterator.cpp + multisearch.cpp + nearsearch.cpp + orsearch.cpp + predicate_blueprint.cpp + predicate_search.cpp + ranksearch.cpp + searchable.cpp + searchiterator.cpp + simple_phrase_blueprint.cpp + simple_phrase_search.cpp + simpleresult.cpp + simplesearch.cpp + sourceblendersearch.cpp + split_float.cpp + termasstring.cpp + termwise_blueprint_helper.cpp + termwise_search.cpp + truesearch.cpp + unpackinfo.cpp + weighted_set_term_blueprint.cpp + weighted_set_term_search.cpp + $ + INSTALL lib64 + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/queryeval/OWNERS b/searchlib/src/vespa/searchlib/queryeval/OWNERS new file mode 100644 index 00000000000..12b533ec610 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/OWNERS @@ -0,0 +1 @@ +havardpe diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp new file mode 100644 index 00000000000..58a33c26d9f --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp @@ -0,0 +1,163 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "andnotsearch.h" + +namespace search { +namespace queryeval { + +void +AndNotSearch::doSeek(uint32_t docid) +{ + const Children & children(getChildren()); + if (!children[0]->seek(docid)) { + return; // not match in positive subtree + } + for (uint32_t i = 1; i < children.size(); ++i) { + if (children[i]->seek(docid)) { + return; // match in negative subtree + } + } + setDocId(docid); // we have a match +} + +void +AndNotSearch::doUnpack(uint32_t docid) +{ + getChildren()[0]->doUnpack(docid); +} + +SearchIterator::UP +AndNotSearchStrictBase::andWith(UP filter, uint32_t estimate) +{ + return getChildren()[0]->andWith(std::move(filter), estimate); +} + +namespace { +class AndNotSearchStrict : public AndNotSearchStrictBase +{ +private: + template + void internalSeek(uint32_t docid); +protected: + void doSeek(uint32_t docid) override { + internalSeek(docid); + } +public: + /** + * Create a new strict AndNot Search with the given children. + * A strict AndNot can assume that the first child below is also strict. + * No such assumptions can be made about the * other children. + * + * @param children the search objects we are andnot'ing + **/ + AndNotSearchStrict(const Children & children) : AndNotSearchStrictBase(children) + { + } + + void initRange(uint32_t beginid, uint32_t endid) override { + AndNotSearch::initRange(beginid, endid); + internalSeek(beginid); + } + +}; + +template +void +AndNotSearchStrict::internalSeek(uint32_t docid) +{ + const Children & children(getChildren()); + bool hit; + if (doSeekOnlyOnPositiveChild) { + children[0]->doSeek(docid); + hit = (children[0]->getDocId() == docid); + } else { + hit = children[0]->seek(docid); + } + for (uint32_t i = 1; hit && i < children.size(); ++i) { + if (children[i]->seek(docid)) { + hit = false; + } + } + if (hit) { + setDocId(docid); + return; + } + uint32_t nextId = children[0]->getDocId(); + while (!isAtEnd(nextId)) { + bool foundHit = true; + for (uint32_t i = 1; i < children.size(); ++i) { + if (children[i]->seek(nextId)) { + foundHit = false; + ++nextId; + break; + } + } + if (foundHit) { + break; + } else { + children[0]->doSeek(nextId); + nextId = children[0]->getDocId(); + } + } + setDocId(nextId); +} + +} // namespace + +OptimizedAndNotForBlackListing::OptimizedAndNotForBlackListing(const MultiSearch::Children & children) : + AndNotSearchStrictBase(children) +{ +} + +void OptimizedAndNotForBlackListing::initRange(uint32_t beginid, uint32_t endid) +{ + AndNotSearch::initRange(beginid, endid); + setDocId(internalSeek(beginid)); +} + +bool OptimizedAndNotForBlackListing::isBlackListIterator(const SearchIterator * iterator) +{ + return dynamic_cast(iterator) != 0; +} + +void OptimizedAndNotForBlackListing::doSeek(uint32_t docid) +{ + setDocId(internalSeek(docid)); +} + +void OptimizedAndNotForBlackListing::doUnpack(uint32_t docid) +{ + positive()->doUnpack(docid); +} + +SearchIterator * +AndNotSearch::create(const AndNotSearch::Children &children, bool strict) { + if (strict) { + if ((children.size() == 2) && OptimizedAndNotForBlackListing::isBlackListIterator(children[1])) { + return new OptimizedAndNotForBlackListing(children); + } else { + return new AndNotSearchStrict(children); + } + } else { + return new AndNotSearch(children); + } +} + +BitVector::UP +AndNotSearch::get_hits(uint32_t begin_id) { + const Children &children = getChildren(); + BitVector::UP result = children.front()->get_hits(begin_id); + if (children.size() > 1) { + BitVector::UP not_result = children[1]->get_hits(begin_id); + for (size_t i = 2; i < children.size(); ++i) { + children[i]->or_hits_into(*not_result, begin_id); + } + const BitVector &rhs = *not_result; + result->andNotWith(rhs); + } + return result; +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h new file mode 100644 index 00000000000..4c6eae0693c --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "multisearch.h" +#include +#include + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the AndNot search operation. + **/ +class AndNotSearch : public MultiSearch +{ +protected: + void doSeek(uint32_t docid) override; + void doUnpack(uint32_t docid) override; + Trinary is_strict() const override { return Trinary::False; } + + /** + * Create a new AndNot Search with the given children. + *A AndNot has no strictness assumptions about its children. + * + * @param children the search objects we are andnot'ing + **/ + AndNotSearch(const Children & children) : MultiSearch(children) { } + +public: + // Caller takes ownership of the returned SearchIterator. + static SearchIterator *create(const Children &children, bool strict); + + BitVector::UP get_hits(uint32_t begin_id) override; + +private: + bool isAndNot() const override { return true; } + bool needUnpack(size_t index) const override { + return index == 0; + } +}; + +class AndNotSearchStrictBase : public AndNotSearch +{ +protected: + AndNotSearchStrictBase(const Children & children) : AndNotSearch(children) { } +private: + Trinary is_strict() const override { return Trinary::True; } + UP andWith(UP filter, uint32_t estimate) override; +}; + +/** + * This is a specialized andnot iterator you get when you have no andnot's in you query and only get the blacklist blueprint. + * This one is now constructed at getSearch() phase. However this should be better handled in the AndNotBlueprint. + */ +class OptimizedAndNotForBlackListing : public AndNotSearchStrictBase +{ +private: + // This is the actual iterator that should be produced by the documentmetastore in searchcore, but that + // will probably be changed later on. An ordinary bitvector could be even better as that would open up for more optimizations. + //typedef FilterAttributeIteratorT BlackListIterator; + typedef AttributeIteratorT BlackListIterator; +public: + OptimizedAndNotForBlackListing(const MultiSearch::Children & children); + static bool isBlackListIterator(const SearchIterator * iterator); + + uint32_t seekFast(uint32_t docid) { + return internalSeek(docid); + } + void initRange(uint32_t beginid, uint32_t endid) override; +private: + SearchIterator * positive() { return getChildren()[0]; } + BlackListIterator * blackList() { return static_cast(getChildren()[1]); } + template + uint32_t internalSeek(uint32_t docid) { + uint32_t curr(docid); + while (true) { + if (doSeekOnly) { + positive()->doSeek(curr); + } else { + positive()->seek(curr); + } + if ( ! positive()->isAtEnd() ) { + curr = positive()->getDocId(); + if (! blackList()->seekFast(curr)) { + return curr; + } + curr++; + } else { + return search::endDocId; + } + } + } + virtual void doSeek(uint32_t docid); + virtual void doUnpack(uint32_t docid); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp new file mode 100644 index 00000000000..9217c90ad59 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andsearch.cpp @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "andsearch.h" +#include "andsearchstrict.h" + +namespace search { +namespace queryeval { + +BitVector::UP +AndSearch::get_hits(uint32_t begin_id) { + const Children &children = getChildren(); + BitVector::UP result = children.front()->get_hits(begin_id); + for (size_t i = 1; i < children.size(); ++i) { + children[i]->and_hits_into(*result, begin_id); + } + return result; +} + +SearchIterator::UP AndSearch::andWith(UP filter, uint32_t estimate_) +{ + return offerFilterToChildren(std::move(filter), estimate_); +} + +SearchIterator::UP AndSearch::offerFilterToChildren(UP filter, uint32_t estimate_) +{ + const Children & children(getChildren()); + for (uint32_t i(0); filter && (i < children.size()); ++i) { + filter = children[i]->andWith(std::move(filter), estimate_); + } + return filter; +} + +void AndSearch::doUnpack(uint32_t docid) +{ + const Children & children(getChildren()); + for (uint32_t i(0); i < children.size(); ++i) { + children[i]->doUnpack(docid); + } +} + +AndSearch::AndSearch(const Children & children) : + MultiSearch(children), + _estimate(std::numeric_limits::max()) +{ +} + +namespace { + +class FullUnpack +{ +public: + void unpack(uint32_t docid, const MultiSearch & search) { + const MultiSearch::Children & children(search.getChildren()); + for (uint32_t i(0); i < children.size(); ++i) { + children[i]->doUnpack(docid); + } + } + bool needUnpack(size_t index) const { + (void) index; + return true; + } + void onRemove(size_t index) { (void) index; } + void onInsert(size_t index) { (void) index; } +}; + +class SelectiveUnpack +{ +public: + SelectiveUnpack(const UnpackInfo & unpackInfo) : + _unpackInfo(unpackInfo) + { } + void unpack(uint32_t docid, const MultiSearch & search) { + auto &children = search.getChildren(); + _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);}, + children.size()); + } + bool needUnpack(size_t index) const { + return _unpackInfo.needUnpack(index); + } + void onRemove(size_t index) { + _unpackInfo.remove(index); + } + void onInsert(size_t index) { + _unpackInfo.insert(index); + } +private: + UnpackInfo _unpackInfo; +}; + +} + +AndSearch * +AndSearch::create(const MultiSearch::Children &children, bool strict) +{ + UnpackInfo unpackInfo; + unpackInfo.forceAll(); + return create(children, strict, unpackInfo); +} + +AndSearch * +AndSearch::create(const MultiSearch::Children &children, bool strict, const UnpackInfo & unpackInfo) { + if (strict) { + if (unpackInfo.unpackAll()) { + return new AndSearchStrict(children, FullUnpack()); + } else if(unpackInfo.empty()) { + return new AndSearchStrict(children, NoUnpack()); + } else { + return new AndSearchStrict(children, SelectiveUnpack(unpackInfo)); + } + } else { + if (unpackInfo.unpackAll()) { + return new AndSearchNoStrict(children, FullUnpack()); + } else if (unpackInfo.empty()) { + return new AndSearchNoStrict(children, NoUnpack()); + } else { + return new AndSearchNoStrict(children, SelectiveUnpack(unpackInfo)); + } + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearch.h b/searchlib/src/vespa/searchlib/queryeval/andsearch.h new file mode 100644 index 00000000000..d15e08213f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andsearch.h @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" +#include "unpackinfo.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the And search operation. + **/ +class AndSearch : public MultiSearch +{ +public: + // Caller takes ownership of the returned SearchIterator. + static AndSearch *create(const Children &children, bool strict, const UnpackInfo & unpackInfo); + static AndSearch *create(const Children &children, bool strict); + + BitVector::UP get_hits(uint32_t begin_id) override; + + AndSearch & estimate(uint32_t est) { _estimate = est; return *this; } + uint32_t estimate() const { return _estimate; } +protected: + AndSearch(const Children & children); + void doUnpack(uint32_t docid) override; + UP andWith(UP filter, uint32_t estimate) override; + UP offerFilterToChildren(UP filter, uint32_t estimate); +private: + bool isAnd() const override { return true; } + uint32_t _estimate; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h b/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h new file mode 100644 index 00000000000..b42359bf760 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andsearchnostrict.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "andsearch.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the And search operation. + **/ +template +class AndSearchNoStrict : public AndSearch +{ +public: + /** + * Create a new And Search with the given children. + * A And Search has no strictness assumptions about + * its children. + * + * @param children the search objects we are and'ing + * ownership of the children is taken by the MultiSearch base class. + **/ + AndSearchNoStrict(const Children & children, const Unpack & unpacker) : + AndSearch(children), + _unpacker(unpacker) + { } + +protected: + void doSeek(uint32_t docid) override { + const Children & children(getChildren()); + for (uint32_t i = 0; i < children.size(); ++i) { + if (!children[i]->seek(docid)) { + return; + } + } + setDocId(docid); + } + Trinary is_strict() const override { return Trinary::False; } + + virtual void doUnpack(uint32_t docid) { + _unpacker.unpack(docid, *this); + } + virtual void onRemove(size_t index) { + _unpacker.onRemove(index); + } + virtual void onInsert(size_t index) { + _unpacker.onInsert(index); + } + virtual bool needUnpack(size_t index) const { + return _unpacker.needUnpack(index); + } + +private: + Unpack _unpacker; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h b/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h new file mode 100644 index 00000000000..7f275e9d585 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/andsearchstrict.h @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "andsearchnostrict.h" + +namespace search { +namespace queryeval { + +/** + * A simple strict implementation of the And search operation. + **/ +template +class AndSearchStrict : public AndSearchNoStrict +{ +private: + template + VESPA_DLL_LOCAL void advance(uint32_t failedChildIndexd) __attribute__((noinline)); + using Trinary=vespalib::Trinary; +protected: + void doSeek(uint32_t docid) override; + Trinary is_strict() const override { return Trinary::True; } + SearchIterator::UP andWith(SearchIterator::UP filter, uint32_t estimate) override; +public: + AndSearchStrict(const MultiSearch::Children & children, const Unpack & unpacker) : + AndSearchNoStrict(children, unpacker) + { + } + + void initRange(uint32_t beginid, uint32_t endid) override { + AndSearchNoStrict::initRange(beginid, endid); + advance(0); + } +}; + +template +template +void +AndSearchStrict::advance(uint32_t failedChildIndex) +{ + const MultiSearch::Children & children(this->getChildren()); + SearchIterator & firstChild(*children[0]); + bool foundHit(false); + if (failedChildIndex != 0) { + if (doSeekOnly) { + if (__builtin_expect(children[failedChildIndex]->isAtEnd(), false)) { + this->setAtEnd(); + return; + } + firstChild.doSeek(std::max(firstChild.getDocId() + 1, children[failedChildIndex]->getDocId())); + } else { + firstChild.seek(std::max(firstChild.getDocId() + 1, children[failedChildIndex]->getDocId())); + } + } + uint32_t nextId(firstChild.getDocId()); + while (!foundHit && !this->isAtEnd(nextId)) { + foundHit = true; + for (uint32_t i(1); foundHit && (i < children.size()); ++i) { + SearchIterator & child(*children[i]); + if (!(foundHit = child.seek(nextId))) { + if (__builtin_expect(!child.isAtEnd(), true)) { + firstChild.doSeek(std::max(nextId+1, child.getDocId())); + nextId = firstChild.getDocId(); + } else { + this->setAtEnd(); + return; + } + } + } + } + this->setDocId(nextId); +} + +template +void +AndSearchStrict::doSeek(uint32_t docid) +{ + const MultiSearch::Children & children(this->getChildren()); + for (uint32_t i(0); i < children.size(); ++i) { + children[i]->doSeek(docid); + if (children[i]->getDocId() != docid) { + advance(i); + return; + } + } + this->setDocId(docid); +} + +template +SearchIterator::UP +AndSearchStrict::andWith(SearchIterator::UP filter, uint32_t estimate_) +{ + filter = this->getChildren()[0]->andWith(std::move(filter), estimate_); + if (filter) { + if ((estimate_ < this->estimate()) && (filter->is_strict() == Trinary::True)) { + this->insert(0, std::move(filter)); + } else { + filter = this->offerFilterToChildren(std::move(filter), estimate_); + if (filter) { + this->insert(1, std::move(filter)); + } + } + } + return filter; // Should always be empty, returning it incase logic changes. +} + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h b/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h new file mode 100644 index 00000000000..70b0ad40bae --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/begin_and_end_id.h @@ -0,0 +1,10 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace search { + +static constexpr uint32_t beginDocId = 0u; +static constexpr uint32_t endDocId = 0x7fffffffu; // max signed value + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp new file mode 100644 index 00000000000..78bf883acde --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp @@ -0,0 +1,562 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.blueprint"); +#include "blueprint.h" +#include +#include +#include +#include "leaf_blueprints.h" +#include "intermediate_blueprints.h" +#include "equiv_blueprint.h" + +#include +#include +#include + +// NB: might need to hide this from non-gcc compilers... +#include + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) { + // replace with replacement + if (replacement.get() != nullptr) { + Blueprint *tmp = replacement.release(); + tmp->setParent(self->getParent()); + tmp->setSourceId(self->getSourceId()); + self->setParent(0); + replacement.reset(self); + self = tmp; + } + // replace with empty blueprint if empty + if (self->getState().estimate().empty) { + Blueprint::UP discard(self); + self = new EmptyBlueprint(discard->getState().fields()); + self->setParent(discard->getParent()); + self->setSourceId(discard->getSourceId()); + } +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +Blueprint::max(const std::vector &data) +{ + HitEstimate est; + for (size_t i = 0; i < data.size(); ++i) { + if (est.empty || est.estHits < data[i].estHits) { + est = data[i]; + } + } + return est; +} + +Blueprint::HitEstimate +Blueprint::min(const std::vector &data) +{ + HitEstimate est; + for (size_t i = 0; i < data.size(); ++i) { + if (i == 0 || data[i].empty || data[i].estHits < est.estHits) { + est = data[i]; + } + } + return est; +} + +void +Blueprint::notifyChange() +{ + if (_parent != 0) { + _parent->notifyChange(); + } +} + +Blueprint::Blueprint() + : _parent(0), + _sourceId(0xffffffff), + _docid_limit(0) +{ +} + +Blueprint::Blueprint(const Blueprint &x) + : _parent(0), + _sourceId(x.getSourceId()), + _docid_limit(x.get_docid_limit()) +{ +} + +Blueprint::~Blueprint() +{ +} + +Blueprint::UP +Blueprint::optimize(Blueprint::UP bp) { + Blueprint *root = bp.release(); + root->optimize(root); + return Blueprint::UP(root); +} + +void +Blueprint::optimize_self() +{ +} + +Blueprint::UP +Blueprint::get_replacement() +{ + return Blueprint::UP(); +} + +const Blueprint & +Blueprint::root() const +{ + const Blueprint *bp = this; + while (bp->_parent != nullptr) { + bp = bp->_parent; + } + return *bp; +} + +vespalib::string +Blueprint::asString() const +{ + vespalib::ObjectDumper dumper; + visit(dumper, "", this); + return dumper.toString(); +} + +vespalib::string +Blueprint::getClassName() const +{ + vespalib::string name(typeid(*this).name()); + int status = 0; + size_t size = 0; + // NB: might need to hide this from non-gcc compilers... + char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status); + vespalib::string result(unmangled); + free(unmangled); + return result; +} + +void +Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + const State &state = getState(); + visitor.visitBool("isTermLike", state.isTermLike()); + if (state.isTermLike()) { + visitor.openStruct("fields", "FieldList"); + for (size_t i = 0; i < state.numFields(); ++i) { + const FieldSpecBase &spec = state.field(i); + visitor.openStruct(vespalib::make_string("[%zu]", i), "Field"); + // visitor.visitString("name", spec.getName()); + visitor.visitInt("fieldId", spec.getFieldId()); + visitor.visitInt("handle", spec.getHandle()); + visitor.visitBool("isFilter", spec.isFilter()); + visitor.closeStruct(); + } + visitor.closeStruct(); + } + visitor.openStruct("estimate", "HitEstimate"); + visitor.visitBool("empty", state.estimate().empty); + visitor.visitInt("estHits", state.estimate().estHits); + visitor.visitInt("tree_size", state.tree_size()); + visitor.visitInt("allow_termwise_eval", state.allow_termwise_eval()); + visitor.closeStruct(); + visitor.visitInt("sourceId", _sourceId); + visitor.visitInt("docid_limit", _docid_limit); +} + +namespace blueprint { + +//----------------------------------------------------------------------------- + +void +StateCache::notifyChange() +{ + Blueprint::notifyChange(); + _stale = true; +} + +const Blueprint::State & +StateCache::getState() const +{ + if (_stale) { + calculateState().swap(_state); + _stale = false; + } + return _state; +} + +} // namespace blueprint + +//----------------------------------------------------------------------------- + +IntermediateBlueprint::~IntermediateBlueprint() +{ + while (!_children.empty()) { + delete _children.back(); + _children.pop_back(); + } +} + +void +IntermediateBlueprint::setDocIdLimit(uint32_t limit) +{ + Blueprint::setDocIdLimit(limit); + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->setDocIdLimit(limit); + } +} + +Blueprint::HitEstimate +IntermediateBlueprint::calculateEstimate() const +{ + std::vector estimates; + estimates.reserve(_children.size()); + for (size_t i = 0; i < _children.size(); ++i) { + estimates.push_back(_children[i]->getState().estimate()); + } + return combine(estimates); +} + +uint32_t +IntermediateBlueprint::calculate_tree_size() const +{ + uint32_t nodes = 1; + for (size_t i = 0; i < _children.size(); ++i) { + nodes += _children[i]->getState().tree_size(); + } + return nodes; +} + +bool +IntermediateBlueprint::infer_allow_termwise_eval() const +{ + if (!supports_termwise_children()) { + return false; + } + for (size_t i = 0; i < _children.size(); ++i) { + if (!_children[i]->getState().allow_termwise_eval()) { + return false; + } + } + return true; +}; + +size_t +IntermediateBlueprint::count_termwise_nodes(const UnpackInfo &unpack) const +{ + size_t termwise_nodes = 0; + for (size_t i = 0; i < _children.size(); ++i) { + const State &state = _children[i]->getState(); + if (state.allow_termwise_eval() && !unpack.needUnpack(i)) { + termwise_nodes += state.tree_size(); + } + } + return termwise_nodes; +} + +IntermediateBlueprint::IndexList +IntermediateBlueprint::find(const IPredicate & pred) const +{ + IndexList list; + for (size_t i = 0; i < _children.size(); ++i) { + if (pred.check(*_children[i])) { + list.push_back(i); + } + } + return list; +} + +FieldSpecBaseList +IntermediateBlueprint::mixChildrenFields() const +{ + typedef std::map Map; + typedef Map::value_type MapVal; + typedef Map::iterator MapPos; + typedef std::pair MapRes; + + Map fieldMap; + FieldSpecBaseList fieldList; + for (size_t i = 0; i < _children.size(); ++i) { + const State &childState = _children[i]->getState(); + if (!childState.isTermLike()) { + return fieldList; // empty: non-term-like child + } + for (size_t j = 0; j < childState.numFields(); ++j) { + const FieldSpecBase &f = childState.field(j); + MapRes res = fieldMap.insert(MapVal(f.getFieldId(), &f)); + if (!res.second) { + const FieldSpecBase &other = *(res.first->second); + if (other.getHandle() != f.getHandle()) { + return fieldList; // empty: conflicting children + } + } + } + } + for (MapPos pos = fieldMap.begin(); pos != fieldMap.end(); ++pos) { + fieldList.add(*(pos->second)); + } + return fieldList; +} + +Blueprint::State +IntermediateBlueprint::calculateState() const +{ + State state(exposeFields()); + state.estimate(calculateEstimate()); + state.allow_termwise_eval(infer_allow_termwise_eval()); + state.tree_size(calculate_tree_size()); + return state; +} + +bool +IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const +{ + if (root().hit_ratio() <= match_limit) { + return false; // global hit density too low + } + if (getState().allow_termwise_eval() && unpack.empty() && + has_parent() && getParent()->supports_termwise_children()) + { + return false; // higher up will be better + } + return (count_termwise_nodes(unpack) > 1); +} + +void +IntermediateBlueprint::optimize(Blueprint* &self) +{ + assert(self == this); + if (should_optimize_children()) { + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->optimize(_children[i]); + } + } + optimize_self(); + sort(_children); + maybe_eliminate_self(self, get_replacement()); +} + +SearchIterator::UP +IntermediateBlueprint::createSearch(fef::MatchData &md, bool strict) const +{ + MultiSearch::Children subSearches; + subSearches.reserve(_children.size()); + for (size_t i = 0; i < _children.size(); ++i) { + bool strictChild = (strict && inheritStrict(i)); + SearchIterator::UP search = _children[i]->createSearch(md, strictChild); + subSearches.push_back(search.release()); + } + return createIntermediateSearch(subSearches, strict, md); +} + +IntermediateBlueprint::IntermediateBlueprint() + : _children() +{ +} + +IntermediateBlueprint:: +IntermediateBlueprint(const IntermediateBlueprint &x) + : StateCache(x), + _children() +{ + // children are not copied +} + +const Blueprint & +IntermediateBlueprint::getChild(size_t n) const +{ + assert(n < _children.size()); + return *_children[n]; +} + +Blueprint & +IntermediateBlueprint::getChild(size_t n) +{ + assert(n < _children.size()); + return *_children[n]; +} + +IntermediateBlueprint & +IntermediateBlueprint::addChild(Blueprint::UP child) +{ + _children.push_back(child.get()); + child.release()->setParent(this); + notifyChange(); + return *this; +} + +Blueprint::UP +IntermediateBlueprint::removeChild(size_t n) +{ + assert(n < _children.size()); + Blueprint::UP ret(_children[n]); + _children.erase(_children.begin() + n); + ret->setParent(0); + notifyChange(); + return ret; +} + +IntermediateBlueprint & +IntermediateBlueprint::insertChild(size_t n, Blueprint::UP child) +{ + assert(n <= _children.size()); + _children.insert(_children.begin() + n, child.get()); + child.release()->setParent(this); + notifyChange(); + return *this; +} + +void +IntermediateBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + StateCache::visitMembers(visitor); + visit(visitor, "children", _children); +} + +void +IntermediateBlueprint::fetchPostings(bool strict) +{ + for (size_t i = 0; i < _children.size(); ++i) { + bool strictChild = (strict && inheritStrict(i)); + _children[i]->fetchPostings(strictChild); + } +} + +namespace { + +bool +areAnyParentsEquiv(const Blueprint * node) +{ + return (node == NULL) + ? false + : (dynamic_cast(node) != NULL) + ? true + : areAnyParentsEquiv(node->getParent()); +} + +bool +canBlueprintSkipUnpack(const Blueprint & bp, const fef::MatchData & md) +{ + return (bp.getState().numFields() != 0) + || (( dynamic_cast(&bp) != nullptr) + && static_cast(bp).calculateUnpackInfo(md).empty()); +} + +} + +UnpackInfo +IntermediateBlueprint::calculateUnpackInfo(const fef::MatchData & md) const +{ + UnpackInfo unpackInfo; + bool allNeedUnpack(true); + if ( ! areAnyParentsEquiv(getParent()) ) { + for (size_t i = 0; i < childCnt(); ++i) { + if (isPositive(i)) { + const Blueprint & child = getChild(i); + const State &cs = child.getState(); + bool canSkipUnpack(canBlueprintSkipUnpack(child, md)); + LOG(debug, "Child[%ld] has %ld fields. canSkipUnpack='%s'.", i, cs.numFields(), canSkipUnpack ? "true" : "false"); + for (size_t j = 0; canSkipUnpack && (j < cs.numFields()); ++j) { + if ( ! cs.field(j).resolve(md)->isNotNeeded()) { + LOG(debug, "Child[%ld].field(%ld).fieldId=%d need unpack.", i, j, cs.field(j).getFieldId()); + canSkipUnpack = false; + } + } + if ( canSkipUnpack) { + allNeedUnpack = false; + } else { + unpackInfo.add(i); + } + } else { + allNeedUnpack = false; + } + } + } + if (allNeedUnpack) { + unpackInfo.forceAll(); + } + LOG(spam, "UnpackInfo for %s \n is \n %s", asString().c_str(), unpackInfo.toString().c_str()); + return unpackInfo; +} + + +//----------------------------------------------------------------------------- + +void +LeafBlueprint::fetchPostings(bool strict) +{ + (void) strict; +} + +SearchIterator::UP +LeafBlueprint::createSearch(fef::MatchData &md, bool strict) const +{ + const State &state = getState(); + fef::TermFieldMatchDataArray tfmda; + tfmda.reserve(state.numFields()); + for (size_t i = 0; i < state.numFields(); ++i) { + tfmda.add(state.field(i).resolve(md)); + } + return createLeafSearch(tfmda, strict); +} + +void +LeafBlueprint::optimize(Blueprint* &self) +{ + assert(self == this); + optimize_self(); + maybe_eliminate_self(self, get_replacement()); +} + +void +LeafBlueprint::setEstimate(HitEstimate est) +{ + _state.estimate(est); + notifyChange(); +} + +void +LeafBlueprint::set_allow_termwise_eval(bool value) +{ + _state.allow_termwise_eval(value); + notifyChange(); +} + +void +LeafBlueprint::set_tree_size(uint32_t value) +{ + _state.tree_size(value); + notifyChange(); +} + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search + +//----------------------------------------------------------------------------- + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::Blueprint *obj) +{ + if (obj != 0) { + self.openStruct(name, obj->getClassName()); + obj->visitMembers(self); + self.closeStruct(); + } else { + self.visitNull(name); + } +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::Blueprint &obj) +{ + visit(self, name, &obj); +} diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h new file mode 100644 index 00000000000..82a7aa642a5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -0,0 +1,314 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "field_spec.h" + +namespace vespalib { class ObjectVisitor; }; + +namespace search { +namespace queryeval { + + +/** + * A Blueprint is an intermediate representation of a search. More + * concretely, it is a tree of search iterator factories annotated + * with meta-data about the fields to be searched, how match + * information is to be exposed to the ranking framework and estimates + * for the number of results that will be produced. Intermediate + * operations are implemented by extending the blueprint::Intermediate + * template class. Leaf operations are implemented by extending the + * blueprint::Leaf template class. + **/ +class Blueprint +{ +public: + typedef std::unique_ptr UP; + + struct HitEstimate { + uint32_t estHits; + bool empty; + + HitEstimate() : estHits(0), empty(true) {} + HitEstimate(uint32_t estHits_, bool empty_) + : estHits(estHits_), empty(empty_) {} + + bool operator < (const HitEstimate &other) const { + if (empty == other.empty) { + return (estHits < other.estHits); + } else { + return empty; + } + } + }; + + class State + { + private: + FieldSpecBaseList _fields; + HitEstimate _estimate; + uint32_t _tree_size; + bool _allow_termwise_eval; + + public: + State(const FieldSpecBaseList &fields_in) + : _fields(fields_in), + _estimate(), + _tree_size(1), + _allow_termwise_eval(true) + { + } + void swap(State & rhs) { + _fields.swap(rhs._fields); + std::swap(_estimate, rhs._estimate); + std::swap(_tree_size, rhs._tree_size); + std::swap(_allow_termwise_eval, rhs._allow_termwise_eval); + } + + bool isTermLike() const { return !_fields.empty(); } + const FieldSpecBaseList &fields() const { return _fields; } + + size_t numFields() const { return _fields.size(); } + const FieldSpecBase &field(size_t idx) const { return _fields[idx]; } + const FieldSpecBase *lookupField(uint32_t fieldId) const { + for (size_t i = 0; i < _fields.size(); ++i) { + if (_fields[i].getFieldId() == fieldId) { + return &_fields[i]; + } + } + return nullptr; + } + + void estimate(HitEstimate est) { _estimate = est; } + HitEstimate estimate() const { return _estimate; } + double hit_ratio(uint32_t docid_limit) const { + uint32_t total_hits = _estimate.estHits; + uint32_t total_docs = std::max(total_hits, docid_limit); + return double(total_hits) / double(total_docs); + } + void tree_size(uint32_t value) { _tree_size = value; } + uint32_t tree_size() const { return _tree_size; } + void allow_termwise_eval(bool value) { _allow_termwise_eval = value; } + bool allow_termwise_eval() const { return _allow_termwise_eval; } + }; + + // utility that just takes maximum estimate + static HitEstimate max(const std::vector &data); + + // utility that just takes minium estimate + static HitEstimate min(const std::vector &data); + + // utility to get the greater estimate to sort first + struct GreaterEstimate { + bool operator () (Blueprint * const &a, Blueprint * const &b) const { + return (b->getState().estimate() < a->getState().estimate()); + } + }; + + // utility to get the lesser estimate to sort first + struct LessEstimate { + bool operator () (Blueprint * const &a, const Blueprint * const &b) const { + return (a->getState().estimate() < b->getState().estimate()); + } + }; + +private: + Blueprint *_parent; + uint32_t _sourceId; + uint32_t _docid_limit; + + Blueprint &operator=(const Blueprint &); // disable + +public: + class IPredicate { + public: + virtual ~IPredicate() {} + virtual bool check(const Blueprint & bp) const = 0; + }; + + Blueprint(); + Blueprint(const Blueprint &x); + virtual ~Blueprint(); + + void setParent(Blueprint *parent) { _parent = parent; } + Blueprint *getParent() const { return _parent; } + bool has_parent() const { return (_parent != nullptr); } + + Blueprint &setSourceId(uint32_t sourceId) { _sourceId = sourceId; return *this; } + uint32_t getSourceId() const { return _sourceId; } + + virtual void setDocIdLimit(uint32_t limit) { _docid_limit = limit; } + uint32_t get_docid_limit() const { return _docid_limit; } + + virtual void notifyChange(); + + static Blueprint::UP optimize(Blueprint::UP bp); + virtual void optimize(Blueprint* &self) = 0; + virtual void optimize_self(); + virtual Blueprint::UP get_replacement(); + virtual bool should_optimize_children() const { return true; } + + virtual bool supports_termwise_children() const { return false; } + + virtual const State &getState() const = 0; + const Blueprint &root() const; + + double hit_ratio() const { return getState().hit_ratio(_docid_limit); } + + virtual void fetchPostings(bool strict) = 0; + + virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const = 0; + + // for debug dumping + vespalib::string asString() const; + virtual vespalib::string getClassName() const; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +}; + +namespace blueprint { + +//----------------------------------------------------------------------------- + +class StateCache : public Blueprint +{ +private: + mutable bool _stale; + mutable State _state; + +protected: + virtual void notifyChange(); + virtual State calculateState() const = 0; + +public: + StateCache() : _stale(true), _state(FieldSpecBaseList()) {} + StateCache(const StateCache &x) + : Blueprint(x), _stale(true), _state(FieldSpecBaseList()) {} + const State &getState() const override final; +}; + +} // namespace blueprint + +//----------------------------------------------------------------------------- + +class IntermediateBlueprint : public blueprint::StateCache +{ +public: + typedef std::vector Children; +private: + Children _children; + HitEstimate calculateEstimate() const; + uint32_t calculate_tree_size() const; + bool infer_allow_termwise_eval() const; + + size_t count_termwise_nodes(const UnpackInfo &unpack) const; + +protected: + // returns an empty collection if children have empty or + // conflicting collections of field specs. + FieldSpecBaseList mixChildrenFields() const; + + State calculateState() const override final; + + virtual bool isPositive(size_t index) const { (void) index; return true; } + + bool should_do_termwise_eval(const UnpackInfo &unpack, double match_limit) const; + +public: + typedef std::vector IndexList; + IntermediateBlueprint(); + IntermediateBlueprint(const IntermediateBlueprint &x); + virtual ~IntermediateBlueprint(); + + void setDocIdLimit(uint32_t limit) override final; + + virtual void optimize(Blueprint* &self) override final; + + IndexList find(const IPredicate & check) const; + size_t childCnt() const { return _children.size(); } + const Blueprint &getChild(size_t n) const; + Blueprint &getChild(size_t n); + IntermediateBlueprint & insertChild(size_t n, Blueprint::UP child); + IntermediateBlueprint &addChild(Blueprint::UP child); + Blueprint::UP removeChild(size_t n); + virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const; + + virtual HitEstimate + combine(const std::vector &data) const = 0; + virtual FieldSpecBaseList exposeFields() const = 0; + virtual void sort(std::vector &children) const = 0; + virtual bool inheritStrict(size_t i) const = 0; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, fef::MatchData &md) const = 0; + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + virtual void fetchPostings(bool strict); + UnpackInfo calculateUnpackInfo(const fef::MatchData & md) const; +}; + + +class LeafBlueprint : public Blueprint +{ +private: + State _state; + +protected: + virtual void optimize(Blueprint* &self) override final; + + void setEstimate(HitEstimate est); + + void set_allow_termwise_eval(bool value); + + void set_tree_size(uint32_t value); + + LeafBlueprint(const FieldSpecBaseList &fields, bool allow_termwise_eval) : _state(fields) { + _state.allow_termwise_eval(allow_termwise_eval); + } + +public: + const State &getState() const override final { return _state; } + + void setDocIdLimit(uint32_t limit) override final { Blueprint::setDocIdLimit(limit); } + + virtual void fetchPostings(bool strict); + + virtual SearchIterator::UP createSearch(fef::MatchData &md, bool strict) const; + + virtual SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, + bool strict) const = 0; +}; + +// for leaf nodes representing a single term +struct SimpleLeafBlueprint : LeafBlueprint { + SimpleLeafBlueprint(const FieldSpecBase &field) : LeafBlueprint(FieldSpecBaseList().add(field), true) {} + SimpleLeafBlueprint(const FieldSpecBaseList &fields) : LeafBlueprint(fields, true) {} +}; + +// for leaf nodes representing more complex structures like wand/phrase +struct ComplexLeafBlueprint : LeafBlueprint { + ComplexLeafBlueprint(const FieldSpecBase &field) : LeafBlueprint(FieldSpecBaseList().add(field), false) {} + ComplexLeafBlueprint(const FieldSpecBaseList &fields) : LeafBlueprint(fields, false) {} +}; + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::Blueprint &obj); +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::Blueprint *obj); + diff --git a/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp new file mode 100644 index 00000000000..1510716a84b --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".booleanmatchiteratorwrapper"); +#include "booleanmatchiteratorwrapper.h" +#include +#include +#include + +namespace search { +namespace queryeval { + +void +BooleanMatchIteratorWrapper::doSeek(uint32_t docid) +{ + _search->seek(docid); // use outer seek for most robustness + setDocId(_search->getDocId()); // propagate current iterator docid +} + +void +BooleanMatchIteratorWrapper::doUnpack(uint32_t docid) +{ + if (_tfmdp != 0) { // handle not having a match data (unranked, or multiple fields) + _tfmdp->reset(docid); // unpack ensures that docid is a hit + } +} + +BooleanMatchIteratorWrapper::BooleanMatchIteratorWrapper( + SearchIterator::UP search, + const fef::TermFieldMatchDataArray &matchData) + : _search(std::move(search)), + _tfmdp(0) +{ + if (matchData.size() == 1) { + _tfmdp = matchData[0]; + } +} + +void +BooleanMatchIteratorWrapper::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "search", _search); + // _match not visited +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h new file mode 100644 index 00000000000..6d4ca1abbb7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include + +namespace search { +namespace queryeval { + +/** + * A term iterator wrapper used to hide detailed match + * information. Wrapping a term iterator with an instance of this + * class will ensure that the unpack method will only disclose whether + * we found a match or not. This is done by intercepting calls to the + * doUnpack method. The doSeek method will be forwarded to ensure we + * match the same set of documents. + **/ +class BooleanMatchIteratorWrapper : public SearchIterator +{ +private: + SearchIterator::UP _search; + fef::TermFieldMatchData *_tfmdp; + + BooleanMatchIteratorWrapper(const BooleanMatchIteratorWrapper &); + BooleanMatchIteratorWrapper &operator=(const BooleanMatchIteratorWrapper &); + +protected: + void doSeek(uint32_t docid) override; + void doUnpack(uint32_t docid) override; + Trinary is_strict() const override { return _search->is_strict(); } + void initRange(uint32_t beginid, uint32_t endid) override { + _search->initRange(beginid, endid); + SearchIterator::initRange(_search->getDocId()+1, _search->getEndId()); + } + void resetRange() override { + _search->resetRange(); + SearchIterator::resetRange(); + } + +public: + /** + * Create a wrapper for the given search using the given term + * match data. This object will take ownership of the given search + * and delete it in the destructor. The given search must be a + * term iterator that is using the given term match data to store + * its matching details during unpack. The given term match data + * is expected to be stored inside a match data object and as such + * be managed outside of this object. The iterator will fill in + * match/non-match information only, and only if the given array + * holds exactly one reference. + * + * @param search internal search, must be a term iterator + * @param match term match data used by the internal iterator + **/ + BooleanMatchIteratorWrapper(SearchIterator::UP search, + const fef::TermFieldMatchDataArray &matchData); + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh b/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh new file mode 100755 index 00000000000..728ab7a11ce --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/create-class-cpp.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` +name=`echo $class | tr 'A-Z' 'a-z'` + +cat < +LOG_SETUP(".$name"); +#include +#include "$name.h" + +namespace search { +namespace queryeval { + +$class::$class() +{ +} + +$class::~$class() +{ +} + +} // namespace queryeval +} // namespace search +EOF diff --git a/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh b/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh new file mode 100644 index 00000000000..1548ff86daa --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/create-class-h.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +class=$1 +guard=`echo $class | tr 'a-z' 'A-Z'` + +cat < +#include "create_blueprint_visitor_helper.h" +#include + +namespace search { +namespace queryeval { + +Blueprint::UP +CreateBlueprintVisitorHelper::getResult() +{ + return _result + ? std::move(_result) + : Blueprint::UP(new EmptyBlueprint(_field)); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h new file mode 100644 index 00000000000..a79f1f104be --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "dot_product_blueprint.h" +#include "get_weight_from_node.h" +#include "wand/parallel_weak_and_blueprint.h" +#include "searchable.h" +#include "simple_phrase_blueprint.h" +#include "split_float.h" +#include "termasstring.h" +#include "weighted_set_term_blueprint.h" +#include +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +class CreateBlueprintVisitorHelper : public search::query::QueryVisitor +{ +private: + const IRequestContext & _requestContext; + Searchable & _searchable; + FieldSpec _field; + Blueprint::UP _result; + +protected: + const IRequestContext & getRequestContext() const { return _requestContext; } + +public: + CreateBlueprintVisitorHelper(Searchable &searchable, const FieldSpec &field, const IRequestContext & requestContext) : + _requestContext(requestContext), + _searchable(searchable), + _field(field), + _result() + {} + + template + std::unique_ptr make_UP(T *p) { return std::unique_ptr(p); } + + template + void setResult(std::unique_ptr result) { _result = std::move(result); } + + Blueprint::UP getResult(); + + const FieldSpec &getField() const { return _field; } + + void visitPhrase(search::query::Phrase &n) { + SimplePhraseBlueprint *phrase = new SimplePhraseBlueprint(_field, _requestContext); + Blueprint::UP result(phrase); + for (size_t i = 0; i < n.getChildren().size(); ++i) { + FieldSpecList fields; + fields.add(phrase->getNextChildField(_field)); + phrase->addTerm(_searchable.createBlueprint(_requestContext, fields, *n.getChildren()[i])); + } + setResult(std::move(result)); + } + + template + void createWeightedSet(WS *bp, NODE &n) { + Blueprint::UP result(bp); + FieldSpecList fields; + for (size_t i = 0; i < n.getChildren().size(); ++i) { + fields.clear(); + fields.add(bp->getNextChildField(_field)); + const search::query::Node &node = *n.getChildren()[i]; + uint32_t weight = getWeightFromNode(node).percent(); + bp->addTerm(_searchable.createBlueprint(_requestContext, fields, node), weight); + } + setResult(std::move(result)); + } + void visitWeightedSetTerm(search::query::WeightedSetTerm &n) { + WeightedSetTermBlueprint *bp = new WeightedSetTermBlueprint(_field); + createWeightedSet(bp, n); + } + void visitDotProduct(search::query::DotProduct &n) { + DotProductBlueprint *bp = new DotProductBlueprint(_field); + createWeightedSet(bp, n); + } + void visitWandTerm(search::query::WandTerm &n) { + ParallelWeakAndBlueprint *bp = new ParallelWeakAndBlueprint(_field, + n.getTargetNumHits(), + n.getScoreThreshold(), + n.getThresholdBoostFactor()); + createWeightedSet(bp, n); + } + + void handleNumberTermAsText(search::query::NumberTerm &n) + { + vespalib::string termStr = termAsString(n); + queryeval::SplitFloat splitter(termStr); + if (splitter.parts() > 1) { + query::SimplePhrase phraseNode(n.getView(), n.getId(), n.getWeight()); + phraseNode.setStateFrom(n); + for (size_t i = 0; i < splitter.parts(); ++i) { + query::Node::UP nn; + nn.reset(new query::SimpleStringTerm(splitter.getPart(i), "", 0, query::Weight(0))); + phraseNode.append(std::move(nn)); + } + visitPhrase(phraseNode); + } else { + if (splitter.parts() == 1) { + termStr = splitter.getPart(0); + } + query::SimpleStringTerm stringNode(termStr, n.getView(), n.getId(), n.getWeight()); + stringNode.setStateFrom(n); + visit(stringNode); + } + } + + void illegalVisit() {} + + virtual void visit(search::query::And &) { illegalVisit(); } + virtual void visit(search::query::AndNot &) { illegalVisit(); } + virtual void visit(search::query::Equiv &) { illegalVisit(); } + virtual void visit(search::query::Near &) { illegalVisit(); } + virtual void visit(search::query::ONear &) { illegalVisit(); } + virtual void visit(search::query::Or &) { illegalVisit(); } + virtual void visit(search::query::Rank &) { illegalVisit(); } + virtual void visit(search::query::WeakAnd &) { illegalVisit(); } + + virtual void visit(search::query::Phrase &n) { + visitPhrase(n); + } + virtual void visit(search::query::WeightedSetTerm &n) { visitWeightedSetTerm(n); } + virtual void visit(search::query::DotProduct &n) { visitDotProduct(n); } + virtual void visit(search::query::WandTerm &n) { visitWandTerm(n); } + + virtual void visit(search::query::NumberTerm &n) = 0; + virtual void visit(search::query::LocationTerm &n) = 0; + virtual void visit(search::query::PrefixTerm &n) = 0; + virtual void visit(search::query::RangeTerm &n) = 0; + virtual void visit(search::query::StringTerm &n) = 0; + virtual void visit(search::query::SubstringTerm &n) = 0; + virtual void visit(search::query::SuffixTerm &n) = 0; + virtual void visit(search::query::RegExpTerm &n) = 0; +}; + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp new file mode 100644 index 00000000000..9f876e788bf --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp @@ -0,0 +1,4 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "document_weight_search_iterator.h" diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h new file mode 100644 index 00000000000..1bda4ba3dda --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include +#include + +namespace search { +namespace queryeval { + +class DocumentWeightSearchIterator : public SearchIterator +{ +private: + fef::TermFieldMatchData &_tfmd; + fef::TermFieldMatchDataPosition * _matchPosition; + DocumentWeightIterator _iterator; + queryeval::MinMaxPostingInfo _postingInfo; + +public: + DocumentWeightSearchIterator(fef::TermFieldMatchData &tfmd, + const IDocumentWeightAttribute &attr, + IDocumentWeightAttribute::LookupResult dict_entry) + : _tfmd(tfmd), + _matchPosition(NULL), + _iterator(attr.create(dict_entry.posting_idx)), + _postingInfo(queryeval::MinMaxPostingInfo(dict_entry.min_weight, dict_entry.max_weight)) + { + search::fef::TermFieldMatchDataPosition pos; + _tfmd.appendPosition(pos); + _matchPosition = _tfmd.getPositions(); + } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + _iterator.lower_bound(begin); + updateDocId(); + } + void updateDocId() { + if (_iterator.valid()) { + setDocId(_iterator.getKey()); + } else { + setAtEnd(); + } + } + + void doSeek(uint32_t docId) override { + _iterator.linearSeek(docId); + updateDocId(); + } + + void doUnpack(uint32_t docId) override { + _tfmd.resetOnlyDocId(docId); + _matchPosition->setElementWeight(_iterator.getData()); + } + + const queryeval::PostingInfo *getPostingInfo() const override { return &_postingInfo; } + Trinary is_strict() const override { return Trinary::True; } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp new file mode 100644 index 00000000000..62efcab7c4c --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.dot_product_blueprint"); + +#include "dot_product_blueprint.h" +#include "dot_product_search.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +DotProductBlueprint::DotProductBlueprint(const FieldSpec &field) + : ComplexLeafBlueprint(field), + _estimate(), + _layout(), + _weights(), + _terms() +{ +} + +DotProductBlueprint::~DotProductBlueprint() +{ + while (!_terms.empty()) { + delete _terms.back(); + _terms.pop_back(); + } +} + +FieldSpec +DotProductBlueprint::getNextChildField(const FieldSpec &outer) +{ + return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false); +} + +void +DotProductBlueprint::addTerm(Blueprint::UP term, int32_t weight) +{ + HitEstimate childEst = term->getState().estimate(); + if (! childEst.empty) { + if (_estimate.empty) { + _estimate = childEst; + } else { + _estimate.estHits += childEst.estHits; + } + setEstimate(_estimate); + } + _weights.push_back(weight); + _terms.push_back(term.get()); + term.release(); +} + +SearchIterator::UP +DotProductBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool) const +{ + assert(tfmda.size() == 1); + fef::MatchData::UP md = _layout.createMatchData(); + std::vector childMatch; + std::vector children(_terms.size()); + for (size_t i = 0; i < _terms.size(); ++i) { + const State &childState = _terms[i]->getState(); + assert(childState.numFields() == 1); + childMatch.push_back(childState.field(0).resolve(*md)); + children[i] = _terms[i]->createSearch(*md, true).release(); + } + return DotProductSearch::create(children, *tfmda[0], childMatch, _weights, std::move(md)); +} + +void +DotProductBlueprint::fetchPostings(bool strict) +{ + (void) strict; + for (size_t i = 0; i < _terms.size(); ++i) { + _terms[i]->fetchPostings(true); + } +} + +void +DotProductBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + LeafBlueprint::visitMembers(visitor); + visit(visitor, "_weights", _weights); + visit(visitor, "_terms", _terms); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h new file mode 100644 index 00000000000..1e19264e9cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_blueprint.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchable.h" +#include +#include +#include + +namespace search { +namespace fef { class TermFieldMatchData; } + +namespace queryeval { + +class DotProductBlueprint : public ComplexLeafBlueprint +{ + HitEstimate _estimate; + fef::MatchDataLayout _layout; + std::vector _weights; + std::vector _terms; + + DotProductBlueprint(const DotProductBlueprint &); // disabled + DotProductBlueprint &operator=(const DotProductBlueprint &); // disabled + +public: + DotProductBlueprint(const FieldSpec &field); + virtual ~DotProductBlueprint(); + + // used by create visitor + FieldSpec getNextChildField(const FieldSpec &outer); + + // used by create visitor + void addTerm(Blueprint::UP term, int32_t weight); + + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + virtual void + fetchPostings(bool strict); +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp new file mode 100644 index 00000000000..91fcf8a9502 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.cpp @@ -0,0 +1,154 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.dot_product_search"); + +#include "dot_product_search.h" +#include "iterator_pack.h" +#include +#include +#include +#include + +using search::fef::TermFieldMatchData; +using vespalib::ObjectVisitor; + +namespace search { +namespace queryeval { + + +template +class DotProductSearchImpl : public DotProductSearch +{ +private: + typedef uint32_t ref_t; + + struct CmpDocId { + const uint32_t *termPos; + CmpDocId(const uint32_t *tp) : termPos(tp) {} + bool operator()(const ref_t &a, const ref_t &b) const { + return (termPos[a] < termPos[b]); + } + }; + + fef::TermFieldMatchData &_tmd; + std::vector _weights; + std::vector _termPos; + CmpDocId _cmpDocId; + std::vector _data_space; + ref_t *_data_begin; + ref_t *_data_stash; + ref_t *_data_end; + IteratorPack _children; + + void seek_child(ref_t child, uint32_t docId) { + _termPos[child] = _children.seek(child, docId); + } + +public: + DotProductSearchImpl(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + IteratorPack &&iteratorPack) + : _tmd(tmd), + _weights(weights), + _termPos(weights.size()), + _cmpDocId(&_termPos[0]), + _data_space(), + _data_begin(nullptr), + _data_stash(nullptr), + _data_end(nullptr), + _children(std::move(iteratorPack)) + { + HEAP::require_left_heap(); + assert(_weights.size() > 0); + assert(_weights.size() == _children.size()); + _data_space.reserve(_weights.size()); + for (size_t i = 0; i < weights.size(); ++i) { + _data_space.push_back(i); + } + _data_begin = &_data_space[0]; + _data_end = _data_begin + _data_space.size(); + } + + void doSeek(uint32_t docId) override { + while (_data_stash < _data_end) { + seek_child(*_data_stash, docId); + HEAP::push(_data_begin, ++_data_stash, _cmpDocId); + } + while (_termPos[HEAP::front(_data_begin, _data_stash)] < docId) { + seek_child(HEAP::front(_data_begin, _data_stash), docId); + HEAP::adjust(_data_begin, _data_stash, _cmpDocId); + } + setDocId(_termPos[HEAP::front(_data_begin, _data_stash)]); + } + + void doUnpack(uint32_t docId) override { + feature_t score = 0.0; + while ((_data_begin < _data_stash) && + _termPos[HEAP::front(_data_begin, _data_stash)] == docId) + { + HEAP::pop(_data_begin, _data_stash--, _cmpDocId); + const ref_t child = *_data_stash; + double tmp = _weights[child]; + tmp *= _children.get_weight(child, docId); + score += tmp; + }; + _tmd.setRawScore(docId, score); + } + + void initRange(uint32_t begin, uint32_t end) override { + DotProductSearch::initRange(begin, end); + _children.initRange(begin, end); + for (size_t i = 0; i < _children.size(); ++i) { + _termPos[i] = _children.get_docid(i); + } + _data_stash = _data_begin; + while (_data_stash < _data_end) { + HEAP::push(_data_begin, ++_data_stash, _cmpDocId); + } + } + Trinary is_strict() const override { return Trinary::True; } + + void visitMembers(vespalib::ObjectVisitor &) const override {} +}; + +//----------------------------------------------------------------------------- + + +SearchIterator::UP +DotProductSearch::create(const std::vector &children, + search::fef::TermFieldMatchData &tmd, + const std::vector &childMatch, + const std::vector &weights, + fef::MatchData::UP md) +{ + typedef DotProductSearchImpl ArrayHeapImpl; + typedef DotProductSearchImpl HeapImpl; + + if (childMatch.size() < 128) { + return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md)))); + } + return SearchIterator::UP(new HeapImpl(tmd, weights, SearchIteratorPack(children, childMatch, std::move(md)))); +} + +//----------------------------------------------------------------------------- + +SearchIterator::UP +DotProductSearch::create(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + std::vector &&iterators) +{ + typedef DotProductSearchImpl ArrayHeapImpl; + typedef DotProductSearchImpl HeapImpl; + + if (iterators.size() < 128) { + return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + } + return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); +} + +//----------------------------------------------------------------------------- + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h new file mode 100644 index 00000000000..d5503f7baa5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +class TermFieldMatchData; +} // namespace fef + +namespace queryeval { + +/** + * Search iterator for a sparse dot product, based on a set of child + * search iterators. + * + * This class is a base class for a set of different instantiations of + * DotProductSearchImpl, defined in the .cpp-file. + */ +class DotProductSearch : public SearchIterator +{ +protected: + DotProductSearch() {} + +public: + static SearchIterator::UP create(const std::vector &children, + search::fef::TermFieldMatchData &tmd, + const std::vector &childMatch, + const std::vector &weights, + fef::MatchData::UP md); + + static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + std::vector &&iterators); +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp new file mode 100644 index 00000000000..b500e977848 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "emptysearch.h" + +namespace search { +namespace queryeval { + +void +EmptySearch::doSeek(uint32_t) +{ +} + +void +EmptySearch::doUnpack(uint32_t) +{ +} + +EmptySearch::EmptySearch() + : SearchIterator() +{ +} + +EmptySearch::~EmptySearch() +{ +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/emptysearch.h b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h new file mode 100644 index 00000000000..45b60e08468 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/emptysearch.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +class EmptySearch : public SearchIterator +{ +protected: + void doSeek(uint32_t) override; + void doUnpack(uint32_t) override; + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setAtEnd(); + } + +public: + EmptySearch(); + ~EmptySearch(); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp new file mode 100644 index 00000000000..e61fd77918c --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.cpp @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "equiv_blueprint.h" +#include "equivsearch.h" + +namespace search { +namespace queryeval { + +EquivBlueprint::EquivBlueprint(const FieldSpecBaseList &fields, + fef::MatchDataLayout subtree_mdl) + : ComplexLeafBlueprint(fields), + _fields(fields), + _estimate(), + _layout(subtree_mdl), + _terms(), + _exactness() +{ +} + +EquivBlueprint::~EquivBlueprint() +{ +} + +SearchIterator::UP +EquivBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &outputs, + bool strict) const +{ + fef::MatchData::UP md = _layout.createMatchData(); + MultiSearch::Children children(_terms.size()); + search::fef::TermMatchDataMerger::Inputs childMatch; + for (size_t i = 0; i < _terms.size(); ++i) { + const State &childState = _terms[i]->getState(); + for (size_t j = 0; j < childState.numFields(); ++j) { + childMatch.emplace_back(childState.field(j).resolve(*md), _exactness[i]); + } + children[i] = _terms[i]->createSearch(*md, strict).release(); + } + return SearchIterator::UP(EquivSearch::create(children, std::move(md), childMatch, outputs, strict)); +} + +void +EquivBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + LeafBlueprint::visitMembers(visitor); + visit(visitor, "terms", _terms); +} + +void +EquivBlueprint::fetchPostings(bool strict) +{ + for (size_t i = 0; i < _terms.size(); ++i) { + _terms[i]->fetchPostings(strict); + } +} + +EquivBlueprint& +EquivBlueprint::addTerm(Blueprint::UP term, double exactness) +{ + const State &childState = term->getState(); + + HitEstimate childEst = childState.estimate(); + if (_terms.empty() || _estimate < childEst ) { + _estimate = childEst; + } + setEstimate(_estimate); + _terms.push_back(std::move(term)); + _exactness.push_back(exactness); + return *this; +} + + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h new file mode 100644 index 00000000000..818257df7a1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/equiv_blueprint.h @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprint.h" +#include + +namespace search { +namespace queryeval { + +class EquivBlueprint : public ComplexLeafBlueprint +{ +private: + FieldSpecBaseList _fields; + HitEstimate _estimate; + fef::MatchDataLayout _layout; + std::vector _terms; + std::vector _exactness; + +public: + EquivBlueprint(const FieldSpecBaseList &fields, fef::MatchDataLayout subtree_mdl); + virtual ~EquivBlueprint(); + + // used by create visitor + EquivBlueprint& addTerm(Blueprint::UP term, double exactness); + + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual void fetchPostings(bool strict); +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp new file mode 100644 index 00000000000..841466d3e67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/equivsearch.cpp @@ -0,0 +1,72 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "equivsearch.h" +#include +#include + +namespace search { +namespace queryeval { + +template +class EquivImpl : public OrLikeSearch +{ +private: + fef::MatchData::UP _inputMatchData; + fef::TermMatchDataMerger _merger; + bool _valid; + +protected: + virtual void doUnpack(uint32_t docid); + +public: + /** + * Create a new Equiv Search with the given children. + * + * @param children the search objects that should be equivalent + **/ + EquivImpl(const MultiSearch::Children &children, + fef::MatchData::UP inputMatchData, + const search::fef::TermMatchDataMerger::Inputs &inputs, + const fef::TermFieldMatchDataArray &outputs); +}; + +template +EquivImpl::EquivImpl(const MultiSearch::Children &children, + fef::MatchData::UP inputMatchData, + const search::fef::TermMatchDataMerger::Inputs &inputs, + const search::fef::TermFieldMatchDataArray &outputs) + + : OrLikeSearch(children, NoUnpack()), + _inputMatchData(std::move(inputMatchData)), + _merger(inputs, outputs), + _valid(outputs.valid()) +{ +} + +template +void +EquivImpl::doUnpack(uint32_t docid) +{ + if (_valid) { + MultiSearch::doUnpack(docid); + _merger.merge(docid); + } +} + +SearchIterator * +EquivSearch::create(const Children &children, + fef::MatchData::UP inputMatchData, + const search::fef::TermMatchDataMerger::Inputs &inputs, + const search::fef::TermFieldMatchDataArray &outputs, + bool strict) +{ + if (strict) { + return new EquivImpl(children, std::move(inputMatchData), inputs, outputs); + } else { + return new EquivImpl(children, std::move(inputMatchData), inputs, outputs); + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/equivsearch.h b/searchlib/src/vespa/searchlib/queryeval/equivsearch.h new file mode 100644 index 00000000000..b96117a946a --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/equivsearch.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "orlikesearch.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the Equiv search operation. + **/ +class EquivSearch : public SearchIterator +{ +public: + typedef MultiSearch::Children Children; + + // Caller takes ownership of the returned SearchIterator. + static SearchIterator *create(const Children &children, + fef::MatchData::UP inputMD, + const search::fef::TermMatchDataMerger::Inputs &inputs, + const search::fef::TermFieldMatchDataArray &outputs, + bool strict); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp new file mode 100644 index 00000000000..7dc2fd3869b --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include + +namespace search { +namespace queryeval { + +FakeRequestContext::FakeRequestContext(attribute::IAttributeContext * context, fastos::TimeStamp doom_in) : + _clock(), + _doom(_clock, doom_in), + _attributeContext(context) +{ } + +} +} diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h new file mode 100644 index 00000000000..9807d2310f3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.h @@ -0,0 +1,34 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace queryeval { + +class FakeRequestContext : public IRequestContext +{ +public: + FakeRequestContext(attribute::IAttributeContext * context = nullptr, fastos::TimeStamp doom=std::numeric_limits::max()); + const vespalib::Doom & getDoom() const override { return _doom; } + const AttributeVector * getAttribute(const vespalib::string & name) const override { + return _attributeContext + ? dynamic_cast(_attributeContext->getAttribute(name)) + : nullptr; + } + const AttributeVector * getAttributeStableEnum(const vespalib::string & name) const override { + return _attributeContext + ? dynamic_cast(_attributeContext->getAttribute(name)) + : nullptr; + } +private: + vespalib::Clock _clock; + const vespalib::Doom _doom; + attribute::IAttributeContext * _attributeContext; +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp new file mode 100644 index 00000000000..6c152376803 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.cpp @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakeresult"); +#include "fake_result.h" + +namespace search { +namespace queryeval { + +std::ostream &operator << (std::ostream &out, const FakeResult &result) { + const std::vector &doc = result.inspect(); + if (doc.size() == 0) { + out << std::endl << "empty" << std::endl; + } else { + out << std::endl; + for (size_t d = 0; d < doc.size(); ++d) { + out << "{ DOC id: " << doc[d].docId << " }" << std::endl; + + const std::vector &elem = doc[d].elements; + for (size_t e = 0; e < elem.size(); ++e) { + out << " ( ELEM id: " << elem[e].id + << " weight: " << elem[e].weight + << " len: " << elem[e].length + << " )" << std::endl; + + const std::vector &pos = elem[e].positions; + for (size_t p = 0; p < pos.size(); ++p) { + out << " [ OCC pos: " << pos[p] << " ]" << std::endl; + } + } + out << " ( RAW score: " << doc[d].rawScore << " )" << std::endl; + } + } + return out; +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_result.h b/searchlib/src/vespa/searchlib/queryeval/fake_result.h new file mode 100644 index 00000000000..d47cbcf8763 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_result.h @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "posting_info.h" +#include +#include +#include + +namespace search { +namespace queryeval { + +class FakeResult +{ +public: + struct Element { + uint32_t id; + int32_t weight; + uint32_t length; + std::vector positions; + Element(uint32_t id_) : id(id_), weight(1), + length(SEARCHLIB_FEF_UNKNOWN_FIELD_LENGTH), + positions() {} + bool operator==(const Element &rhs) const { + return (id == rhs.id && + weight == rhs.weight && + length == rhs.length && + positions == rhs.positions); + } + }; + + struct Document { + uint32_t docId; + std::vector elements; + feature_t rawScore; + Document(uint32_t id) : docId(id), elements(), rawScore(0) {} + bool operator==(const Document &rhs) const { + return (docId == rhs.docId && + elements == rhs.elements && + rawScore == rhs.rawScore); + } + }; + +private: + std::vector _documents; + MinMaxPostingInfo::SP _minMaxPostingInfo; + +public: + FakeResult() : _documents(), _minMaxPostingInfo() {} + + FakeResult &doc(uint32_t docId) { + _documents.push_back(Document(docId)); + return *this; + } + + FakeResult &elem(uint32_t id) { + _documents.back().elements.push_back(Element(id)); + return *this; + } + + FakeResult &score(feature_t s) { + _documents.back().rawScore = s; + return *this; + } + + FakeResult &len(uint32_t length) { + if (_documents.back().elements.empty()) { + elem(0); + } + _documents.back().elements.back().length = length; + return *this; + } + + FakeResult &weight(uint32_t w) { + if (_documents.back().elements.empty()) { + elem(0); + } + _documents.back().elements.back().weight = w; + return *this; + } + + FakeResult &pos(uint32_t p) { + if (_documents.back().elements.empty()) { + elem(0); + } + _documents.back().elements.back().positions.push_back(p); + return *this; + } + + FakeResult &minMax(int32_t minWeight, int32_t maxWeight) { + _minMaxPostingInfo.reset(new MinMaxPostingInfo(minWeight, maxWeight)); + return *this; + } + + bool operator==(const FakeResult &rhs) const { + return _documents == rhs._documents; + } + + const std::vector &inspect() const { return _documents; } + + const PostingInfo *postingInfo() const { return _minMaxPostingInfo.get(); } +}; + +std::ostream &operator << (std::ostream &out, const FakeResult &result); + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp new file mode 100644 index 00000000000..85946e1a758 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakesearch"); +#include +#include "fake_search.h" +#include +#include + +namespace search { +namespace queryeval { + +void +FakeSearch::doSeek(uint32_t docid) +{ + while (valid() && docid > currId()) { + next(); + } + if (valid()) { + setDocId(currId()); + } else { + setAtEnd(); + } +} + +void +FakeSearch::doUnpack(uint32_t docid) +{ + typedef fef::TermFieldMatchDataPosition PosCtx; + typedef FakeResult::Document Doc; + typedef FakeResult::Element Elem; + + assert(valid()); + const Doc &doc = _result.inspect()[_offset]; + assert(doc.docId == docid); + _tfmda[0]->reset(docid); + for (uint32_t i = 0; i < doc.elements.size(); ++i) { + const Elem &elem =doc.elements[i]; + for (uint32_t j = 0; j < elem.positions.size(); ++j) { + _tfmda[0]->appendPosition(PosCtx(elem.id, elem.positions[j], + elem.weight, elem.length)); + } + } +} + +void +FakeSearch::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "tag", _tag); + visit(visitor, "field", _field); + visit(visitor, "term", _term); +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_search.h b/searchlib/src/vespa/searchlib/queryeval/fake_search.h new file mode 100644 index 00000000000..b360cdd76e7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_search.h @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include "fake_result.h" +#include + +namespace search { +namespace queryeval { + +class FakeSearch : public SearchIterator +{ +private: + vespalib::string _tag; + vespalib::string _field; + vespalib::string _term; + FakeResult _result; + uint32_t _offset; + fef::TermFieldMatchDataArray _tfmda; + + bool valid() const { return _offset < _result.inspect().size(); } + uint32_t currId() const { return _result.inspect()[_offset].docId; } + void next() { ++_offset; } + +public: + FakeSearch(const vespalib::string &tag, + const vespalib::string &field, + const vespalib::string &term, + const FakeResult &res, + const fef::TermFieldMatchDataArray &tfmda) + : _tag(tag), _field(field), _term(term), + _result(res), _offset(0), _tfmda(tfmda) + { + assert(_tfmda.size() == 1); + } + virtual void doSeek(uint32_t docid); + virtual void doUnpack(uint32_t docid); + virtual const PostingInfo *getPostingInfo() const { return _result.postingInfo(); } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp new file mode 100644 index 00000000000..c9f088f9039 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchlib.queryeval.fake_searchable"); + +#include "fake_searchable.h" +#include "leaf_blueprints.h" +#include "termasstring.h" + +#include "create_blueprint_visitor_helper.h" +#include +#include +#include + +using search::query::NumberTerm; +using search::query::LocationTerm; +using search::query::Node; +using search::query::PredicateQuery; +using search::query::PrefixTerm; +using search::query::RangeTerm; +using search::query::RegExpTerm; +using search::query::StringTerm; +using search::query::SubstringTerm; +using search::query::SuffixTerm; + +namespace search { +namespace queryeval { + +FakeSearchable::FakeSearchable() + : _tag(""), + _map() +{ +} + +FakeSearchable & +FakeSearchable::addResult(const vespalib::string &field, + const vespalib::string &term, + const FakeResult &result) +{ + _map[Key(field, term)] = result; + return *this; +} + +namespace { + +/** + * Determines the correct LookupResult to use. + **/ +template +class LookupVisitor : public CreateBlueprintVisitorHelper +{ + const Map &_map; + const vespalib::string _tag; + +public: + LookupVisitor(Searchable &searchable, + const IRequestContext & requestContext, + const Map &map, const vespalib::string &tag, + const FieldSpec &field) + : CreateBlueprintVisitorHelper(searchable, field, requestContext), + _map(map), _tag(tag) {} + + template + void visitTerm(TermNode &n) { + const vespalib::string term_string = termAsString(n); + + FakeResult result; + typename Map::const_iterator pos = + _map.find(typename Map::key_type(getField().getName(), term_string)); + if (pos != _map.end()) { + result = pos->second; + } + FakeBlueprint *fake = new FakeBlueprint(getField(), result); + Blueprint::UP b(fake); + fake->tag(_tag).term(term_string); + setResult(std::move(b)); + } + + virtual void visit(NumberTerm &n) { visitTerm(n); } + virtual void visit(LocationTerm &n) { visitTerm(n); } + virtual void visit(PrefixTerm &n) { visitTerm(n); } + virtual void visit(RangeTerm &n) { visitTerm(n); } + virtual void visit(StringTerm &n) { visitTerm(n); } + virtual void visit(SubstringTerm &n) { visitTerm(n); } + virtual void visit(SuffixTerm &n) { visitTerm(n); } + virtual void visit(PredicateQuery &n) { visitTerm(n); } + virtual void visit(RegExpTerm &n) { visitTerm(n); } +}; + +} // namespace search::queryeval:: + +Blueprint::UP +FakeSearchable::createBlueprint(const IRequestContext & requestContext, + const FieldSpec &field, + const search::query::Node &term) +{ + LookupVisitor visitor(*this, requestContext, _map, _tag, field); + const_cast(term).accept(visitor); + return visitor.getResult(); +} + +FakeSearchable::~FakeSearchable() +{ +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h new file mode 100644 index 00000000000..26a8258713d --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.h @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchable.h" +#include "fake_result.h" + +#include +#include + +namespace search { +namespace queryeval { + +/** + * A fake Searchable implementation. + **/ +class FakeSearchable : public Searchable +{ +private: + typedef std::pair Key; + typedef FakeResult Value; + typedef std::map Map; + + vespalib::string _tag; + Map _map; + +public: + /** + * Create an initially empty fake searchable. + **/ + FakeSearchable(); + + /** + * Tag this searchable with a string value that will be visible + * when dumping search iterators created from it. + * + * @return this object for chaining + * @param t tag + **/ + FakeSearchable &tag(const vespalib::string &t) { + _tag = t; + return *this; + } + + /** + * Add a fake result to be returned for lookup on the given field + * and term combination. + * + * @return this object for chaining + * @param field field name + * @param term search term in string form + * @param result the fake result + **/ + FakeSearchable &addResult(const vespalib::string &field, + const vespalib::string &term, + const FakeResult &result); + + // inherited from Searchable + using Searchable::createBlueprint; + virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext, + const FieldSpec &field, + const search::query::Node &term); + virtual ~FakeSearchable(); +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp new file mode 100644 index 00000000000..60cdefab2c1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fieldspec"); +#include "field_spec.h" + +namespace search { +namespace queryeval { + +FieldSpecBase::FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_) : + _fieldId(fieldId | (isFilter_ ? 0x1000000u : 0)), + _handle(handle) +{ + assert(fieldId < 0x1000000); // Can be represented by 24 bits +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/field_spec.h b/searchlib/src/vespa/searchlib/queryeval/field_spec.h new file mode 100644 index 00000000000..b652b7b676a --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/field_spec.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { + +namespace queryeval { + + +/** + * Base description of a single field to be searched. + **/ +class FieldSpecBase +{ +public: + FieldSpecBase(uint32_t fieldId, fef::TermFieldHandle handle, bool isFilter_ = false); + + // resolve where to put match information for this term/field combination + search::fef::TermFieldMatchData *resolve(search::fef::MatchData &md) const { + return md.resolveTermField(getHandle()); + } + const search::fef::TermFieldMatchData *resolve(const search::fef::MatchData &md) const { + return md.resolveTermField(getHandle()); + } + uint32_t getFieldId() const { return _fieldId & 0xffffff; } + fef::TermFieldHandle getHandle() const { return _handle; } + /// a filter produces less detailed match data + bool isFilter() const { return _fieldId & 0x1000000; } +private: + uint32_t _fieldId; // field id in ranking framework + fef::TermFieldHandle _handle; // handle used when exposing match data to ranking framework +}; + +/** + * Description of a single field to be searched. + **/ +class FieldSpec : public FieldSpecBase +{ +public: + FieldSpec(const vespalib::string & name, uint32_t fieldId, + fef::TermFieldHandle handle, bool isFilter_ = false) + : FieldSpecBase(fieldId, handle, isFilter_), + _name(name) + {} + + // resolve where to put match information for this term/field combination + search::fef::TermFieldMatchData *resolve(search::fef::MatchData &md) const { + return md.resolveTermField(getHandle()); + } + const vespalib::string & getName() const { return _name; } +private: + vespalib::string _name; // field name +}; + +/** + * List of fields to be searched. + **/ +class FieldSpecBaseList +{ +private: + std::vector _list; + +public: + FieldSpecBaseList &add(const FieldSpecBase &spec) { + _list.push_back(spec); + return *this; + } + bool empty() const { + return _list.empty(); + } + size_t size() const { + return _list.size(); + } + const FieldSpecBase &operator[](size_t i) const { + return _list[i]; + } + void clear() { _list.clear(); } + + void swap(FieldSpecBaseList & rhs) { + _list.swap(rhs._list); + } +}; + +/** + * List of fields to be searched. + **/ +class FieldSpecList +{ +private: + std::vector _list; + +public: + FieldSpecList &add(const FieldSpec &spec) { + _list.push_back(spec); + return *this; + } + bool empty() const { + return _list.empty(); + } + size_t size() const { + return _list.size(); + } + const FieldSpec &operator[](size_t i) const { + return _list[i]; + } + void clear() { _list.clear(); } + void swap(FieldSpecList & rhs) { + _list.swap(rhs._list); + } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp new file mode 100644 index 00000000000..544240daeff --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".get_weight_from_node"); + +#include "get_weight_from_node.h" +#include +#include +#include +#include +#include + +using search::query::Node; +using search::query::SimpleQueryNodeTypes; +using search::query::TemplateTermVisitor; +using search::query::Weight; + +namespace search { +namespace queryeval { +namespace { + +struct WeightExtractor : public TemplateTermVisitor { + Weight weight; + + WeightExtractor() : weight(0) {} + + template void visitTerm(TermType &n) { + weight = n.getWeight(); + } + + // Treat Equiv nodes as terms. + virtual void visit(search::query::Equiv &n) { visitTerm(n); } +}; + +} // namespace search::queryeval:: + +Weight +getWeightFromNode(const Node &node) +{ + WeightExtractor extractor; + const_cast(node).accept(extractor); + return extractor.weight; +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h new file mode 100644 index 00000000000..6634fc474fb --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/get_weight_from_node.h @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace query { class Node; } +namespace queryeval { + +search::query::Weight getWeightFromNode(const search::query::Node &node); + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp new file mode 100644 index 00000000000..95d98e48727 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.cpp @@ -0,0 +1,313 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "hitcollector.h" +#include "scores.h" +#include +#include +#include + +namespace search { +namespace queryeval { + +void +HitCollector::sortHitsByScore(size_t topn) +{ + topn = std::min(topn, _hits.size()); + if (topn > _scoreOrder.size()) { + _scoreOrder.clear(); + _scoreOrder.reserve(_hits.size()); + for (size_t i(0); i < _hits.size(); i++) { + _scoreOrder.push_back(i); + } + ShiftBasedRadixSorter:: + radix_sort(IndirectScoreRadix(&_hits[0]), IndirectScoreComparator(&_hits[0]), &_scoreOrder[0], _scoreOrder.size(), 16, topn); + _scoreOrder.resize(topn); + } +} + +void +HitCollector::sortHitsByDocId() +{ + if (_hitsSortOrder != SortOrder::DOC_ID) { + ShiftBasedRadixSorter:: + radix_sort(DocIdRadix(), DocIdComparator(), &_hits[0], _hits.size(), 16); + _hitsSortOrder = SortOrder::DOC_ID; + _scoreOrder.clear(); + } +} + +HitCollector::HitCollector(uint32_t numDocs, + uint32_t maxHitsSize, + uint32_t maxReRankHitsSize) + : _numDocs(numDocs), + _maxHitsSize(maxHitsSize), + _maxReRankHitsSize(maxReRankHitsSize), + _maxDocIdVectorSize((numDocs + 31) / 32), + _hits(), + _hitsSortOrder(SortOrder::DOC_ID), + _docIdVector(), + _bitVector(), + _reRankedHits(), + _scale(1.0), + _adjust(0), + _hasReRanked(false), + _needReScore(false) +{ + if (_maxHitsSize > 0) { + _collector.reset(new RankedHitCollector(*this)); + } else { + _collector.reset(new DocIdCollector(*this)); + } + _hits.reserve(maxHitsSize); +} + +HitCollector::~HitCollector() +{ +} + +void +HitCollector::RankedHitCollector::collect(uint32_t docId, feature_t score) +{ + HitCollector & hc = this->_hc; + if (hc._hits.size() < hc._maxHitsSize) { + if (__builtin_expect(((hc._hits.size() > 0) && + (docId < hc._hits.back().first) && + (hc._hitsSortOrder == SortOrder::DOC_ID)), false)) + { + hc._hitsSortOrder = SortOrder::NONE; + } + hc._hits.push_back(std::make_pair(docId, score)); + } else { + collectAndChangeCollector(docId, score); + } +} + +void +HitCollector::RankedHitCollector::collectAndChangeCollector(uint32_t docId, feature_t score) +{ + HitCollector & hc = this->_hc; + Collector::UP newCollector; + if (hc._maxDocIdVectorSize > hc._maxHitsSize) { + // start using docid vector + hc._docIdVector.reserve(hc._maxDocIdVectorSize); + uint32_t iSize = hc._hits.size(); + for (uint32_t i = 0; i < iSize; ++i) { + hc._docIdVector.push_back(hc._hits[i].first); + } + hc._docIdVector.push_back(docId); + newCollector.reset(new DocIdCollector(hc)); + } else { + // start using bit vector + hc._bitVector = BitVector::create(hc._numDocs); + hc._bitVector->invalidateCachedCount(); + uint32_t iSize = hc._hits.size(); + for (uint32_t i = 0; i < iSize; ++i) { + hc._bitVector->setBit(hc._hits[i].first); + } + hc._bitVector->setBit(docId); + newCollector.reset(new BitVectorCollector(hc)); + } + // treat hit vector as a heap + std::make_heap(hc._hits.begin(), hc._hits.end(), ScoreComparator()); + hc._hitsSortOrder = SortOrder::HEAP; + this->considerForHitVector(docId, score); + hc._collector = std::move(newCollector); +} + +template +void +HitCollector::DocIdCollector::collect(uint32_t docId, feature_t score) +{ + if (CollectRankedHit) { + this->considerForHitVector(docId, score); + } + HitCollector & hc = this->_hc; + if (hc._docIdVector.size() < hc._maxDocIdVectorSize) { + hc._docIdVector.push_back(docId); + } else { + collectAndChangeCollector(docId); + } +} + +template +void +HitCollector::DocIdCollector::collectAndChangeCollector(uint32_t docId) +{ + HitCollector & hc = this->_hc; + // start using bit vector instead of docid array. + hc._bitVector = BitVector::create(hc._numDocs); + hc._bitVector->invalidateCachedCount(); + uint32_t iSize = static_cast(hc._docIdVector.size()); + for (uint32_t i = 0; i < iSize; ++i) { + hc._bitVector->setBit(hc._docIdVector[i]); + } + std::vector emptyVector; + emptyVector.swap(hc._docIdVector); + hc._bitVector->setBit(docId); + hc._collector.reset(new BitVectorCollector(hc)); // note - self-destruct. +} + +std::vector +HitCollector::getSortedHeapScores() +{ + std::vector scores; + size_t scoresToReturn = std::min(_hits.size(), static_cast(_maxReRankHitsSize)); + scores.reserve(scoresToReturn); + sortHitsByScore(scoresToReturn); + for (size_t i = 0; i < scoresToReturn; ++i) { + scores.push_back(_hits[_scoreOrder[i]].second); + } + return scores; +} + +size_t +HitCollector::reRank(DocumentScorer &scorer) +{ + return reRank(scorer, _maxReRankHitsSize); +} + +size_t +HitCollector::reRank(DocumentScorer &scorer, size_t count) +{ + size_t hitsToReRank = std::min(_hits.size(), count); + if (_hasReRanked || hitsToReRank == 0) { + return 0; + } + sortHitsByScore(hitsToReRank); + _reRankedHits.reserve(_reRankedHits.size() + hitsToReRank); + for (size_t i(0); i < hitsToReRank; i++) { + _reRankedHits.push_back(_hits[_scoreOrder[i]]); + } + + Scores &initScores = _ranges.first; + Scores &finalScores = _ranges.second; + initScores = Scores(_reRankedHits.back().second, + _reRankedHits.front().second); + finalScores = Scores(std::numeric_limits::max(), + -std::numeric_limits::max()); + + std::sort(_reRankedHits.begin(), _reRankedHits.end()); // sort on docId + for (auto &hit : _reRankedHits) { + hit.second = scorer.score(hit.first); + finalScores.low = std::min(finalScores.low, hit.second); + finalScores.high = std::max(finalScores.high, hit.second); + } + _hasReRanked = true; + return hitsToReRank; +} + +std::pair +HitCollector::getRanges() const +{ + return _ranges; +} + +void +HitCollector::setRanges(const std::pair &ranges) +{ + _ranges = ranges; +} + +namespace { + +void +mergeHitsIntoResultSet(const std::vector &hits, ResultSet &result) +{ + RankedHit *rhIter = result.getArray(); + RankedHit *rhEnd = rhIter + result.getArrayUsed(); + for (const auto &hit : hits) { + while (rhIter != rhEnd && rhIter->_docId != hit.first) { + // just set the iterators right + ++rhIter; + } + assert(rhIter != rhEnd); // the hits should be a subset of the hits in ranked hit array. + rhIter->_rankValue = hit.second; + } +} + +} + +std::unique_ptr +HitCollector::getResultSet() +{ + Scores &initHeapScores = _ranges.first; + Scores &finalHeapScores = _ranges.second; + if (initHeapScores.low > finalHeapScores.low) { + // scale and adjust the score according to the range + // of the initial and final heap score values to avoid that + // a score from the first phase is larger than finalHeapScores.low + feature_t initRange = initHeapScores.high - initHeapScores.low; + if (initRange < 1.0) initRange = 1.0f; + feature_t finalRange = finalHeapScores.high - finalHeapScores.low; + if (finalRange < 1.0) finalRange = 1.0f; + _scale = finalRange / initRange; + _adjust = initHeapScores.low * _scale - finalHeapScores.low; + _needReScore = true; + } + + // destroys the heap property or score sort order + sortHitsByDocId(); + + std::unique_ptr rs(new ResultSet()); + if ( ! _collector->isDocIdCollector() ) { + unsigned int iSize = _hits.size(); + rs->allocArray(iSize); + RankedHit * rh = rs->getArray(); + if (_needReScore) { + for (uint32_t i = 0; i < iSize; ++i) { + rh[i]._docId = _hits[i].first; + rh[i]._rankValue = getReScore(_hits[i].second); + } + } else { + for (uint32_t i = 0; i < iSize; ++i) { + rh[i]._docId = _hits[i].first; + rh[i]._rankValue = _hits[i].second; + } + } + rs->setArrayUsed(iSize); + } else { + unsigned int iSize = _hits.size(); + unsigned int jSize = _docIdVector.size(); + rs->allocArray(jSize); + RankedHit * rh = rs->getArray(); + uint32_t i = 0; + if (_needReScore) { + for (uint32_t j = 0; j < jSize; ++j) { + uint32_t docId = _docIdVector[j]; + rh[j]._docId = docId; + if (i < iSize && docId == _hits[i].first) { + rh[j]._rankValue = getReScore(_hits[i].second); + ++i; + } else { + rh[j]._rankValue = 0; + } + } + } else { + for (uint32_t j = 0; j < jSize; ++j) { + uint32_t docId = _docIdVector[j]; + rh[j]._docId = docId; + if (i < iSize && docId == _hits[i].first) { + rh[j]._rankValue = _hits[i].second; + ++i; + } else { + rh[j]._rankValue = 0; + } + } + } + rs->setArrayUsed(jSize); + } + + if (_hasReRanked) { + mergeHitsIntoResultSet(_reRankedHits, *rs.get()); + } + + if (_bitVector != NULL) { + rs->setBitOverflow(std::move(_bitVector)); + } + + return rs; +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/hitcollector.h b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h new file mode 100644 index 00000000000..78f71bdf81a --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/hitcollector.h @@ -0,0 +1,214 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "scores.h" +#include +#include +#include +#include +#include +#include + +namespace search { + +namespace queryeval { + +/** + * This class is used to store all hits found during parallel query evaluation. + **/ +class HitCollector { +public: + typedef std::pair Hit; + + /** + * Interface used to calculate the second phase score for the documents being re-ranked. + */ + struct DocumentScorer { + virtual ~DocumentScorer() {} + virtual feature_t score(uint32_t docId) = 0; + }; + +private: + enum class SortOrder { NONE, DOC_ID, HEAP }; + + const uint32_t _numDocs; + const uint32_t _maxHitsSize; + const uint32_t _maxReRankHitsSize; + const uint32_t _maxDocIdVectorSize; + + std::vector _hits; // used as a heap when _hits.size == _maxHitsSize + std::vector _scoreOrder; // Holds an indirection to the N best hits + SortOrder _hitsSortOrder; + std::vector _docIdVector; + BitVector::UP _bitVector; + std::vector _reRankedHits; + + std::pair _ranges; + feature_t _scale; + feature_t _adjust; + + bool _hasReRanked; + bool _needReScore; + + struct ScoreComparator { + bool operator() (const Hit & lhs, const Hit & rhs) const { + if (lhs.second == rhs.second) { + return (lhs.first < rhs.first); + } + return (lhs.second >= rhs.second); // comparator for min-heap + } + }; + + struct IndirectScoreComparator { + IndirectScoreComparator(const Hit * hits) : _hits(hits) { } + bool operator() (uint32_t lhs, uint32_t rhs) const { + if (_hits[lhs].second == _hits[rhs].second) { + return (_hits[lhs].first < _hits[rhs].first); + } + return (_hits[lhs].second >= _hits[rhs].second); // operator for min-heap + } + const Hit * _hits; + }; + + struct IndirectScoreRadix { + IndirectScoreRadix(const Hit * hits) : _hits(hits) { } + uint64_t operator () (uint32_t v) { + return vespalib::convertForSort::convert(_hits[v].second); + } + const Hit * _hits; + }; + struct DocIdRadix { + uint32_t operator () (const Hit & v) { return v.first; } + }; + struct DocIdComparator { + bool operator() (const Hit & lhs, const Hit & rhs) const { + return (lhs.first < rhs.first); + } + }; + + class Collector { + public: + typedef std::unique_ptr UP; + virtual ~Collector() {} + virtual void collect(uint32_t docId, feature_t score) = 0; + virtual bool isRankedHitCollector() const { return false; } + virtual bool isDocIdCollector() const { return false; } + }; + + Collector::UP _collector; + + class CollectorBase : public Collector { + public: + CollectorBase(HitCollector &hc) : _hc(hc) { } + void considerForHitVector(uint32_t docId, feature_t score) { + if (__builtin_expect((score > _hc._hits[0].second), false)) { + replaceHitInVector(docId, score); + } + } + protected: + void replaceHitInVector(uint32_t docId, feature_t score) { + // replace lowest scored hit in hit vector + std::pop_heap(_hc._hits.begin(), _hc._hits.end(), ScoreComparator()); + _hc._hits.back().first = docId; + _hc._hits.back().second = score; + std::push_heap(_hc._hits.begin(), _hc._hits.end(), ScoreComparator()); + } + HitCollector &_hc; + }; + + class RankedHitCollector : public CollectorBase { + public: + RankedHitCollector(HitCollector &hc) : CollectorBase(hc) { } + virtual void collect(uint32_t docId, feature_t score); + void collectAndChangeCollector(uint32_t docId, feature_t score) __attribute__((noinline)); + virtual bool isRankedHitCollector() const { return true; } + }; + + template + class DocIdCollector : public CollectorBase { + public: + DocIdCollector(HitCollector &hc) : CollectorBase(hc) { } + virtual void collect(uint32_t docId, feature_t score); + void collectAndChangeCollector(uint32_t docId) __attribute__((noinline)); + virtual bool isDocIdCollector() const { return true; } + }; + + template + class BitVectorCollector : public CollectorBase { + public: + BitVectorCollector(HitCollector &hc) : CollectorBase(hc) { } + virtual void collect(uint32_t docId, feature_t score) { + this->_hc._bitVector->setBit(docId); + if (CollectRankedHit) { + this->considerForHitVector(docId, score); + } + } + }; + + HitRank getReScore(feature_t score) const { + return ((score * _scale) - _adjust); + } + VESPA_DLL_LOCAL void sortHitsByScore(size_t topn); + VESPA_DLL_LOCAL void sortHitsByDocId(); + +public: + /** + * Creates a hit collector used to store hits for doc ids in the + * range [0, numDocs>. Doc id and rank score are stored for the n + * (=maxHitsSize) best hits. The best m (=maxReRankHitsSize) hits are + * candidates for re-ranking. Note that n >= m. + * + * @param numDocs + * @param maxHitsSize + * @param maxReRankHitsSize + **/ + HitCollector(uint32_t numDocs, uint32_t maxHitsSize, uint32_t maxReRankHitsSize); + ~HitCollector(); + + /** + * Adds the given hit to this collector. Stores doc id and rank + * score if the given hit is among the n (=maxHitsSize) best hits. + * Stores only doc id if it is not among the n best hits. + * + * @param docId the doc id for the hit + * @param score the first phase rank score for the hit + **/ + void addHit(uint32_t docId, feature_t score) { + _collector->collect(docId, score); + } + + /** + * Returns a sorted vector of scores for the hits that are stored + * in the heap. These are the candidates for re-ranking. + */ + std::vector getSortedHeapScores(); + + /** + * Re-ranks the m (=maxHeapSize) best hits by invoking the score() + * method on the given document scorer. The best m hits are sorted on doc id + * so that score() is called in doc id order. + **/ + size_t reRank(DocumentScorer &scorer); + size_t reRank(DocumentScorer &scorer, size_t count); + + std::pair getRanges() const; + void setRanges(const std::pair &ranges); + + /** + * Returns a result set based on the content of this collector. + * Invoking this method will destroy the heap property of the + * ranked hits and the match data heap. + * + * @param auto pointer to the result set + **/ + std::unique_ptr getResultSet(); + +private: + HitCollector(const HitCollector &); // Not implemented + HitCollector &operator=(const HitCollector &); // Not implemented +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp new file mode 100644 index 00000000000..4f9bf665796 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -0,0 +1,584 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.intermediate_blueprints"); +#include "intermediate_blueprints.h" +#include "andnotsearch.h" +#include "andsearch.h" +#include "orsearch.h" +#include "nearsearch.h" +#include "ranksearch.h" +#include "sourceblendersearch.h" +#include "equivsearch.h" +#include "termwise_blueprint_helper.h" +#include "termwise_search.h" +#include + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +namespace { + +template +size_t lookup_create_source(std::vector > &sources, uint32_t child_source) { + for (size_t i = 0; i < sources.size(); ++i) { + if (sources[i]->getSourceId() == child_source) { + return i; + } + } + sources.push_back(std::unique_ptr(new CombineType())); + sources.back()->setSourceId(child_source); + return (sources.size() - 1); +} + +template +void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) { + std::vector source_blenders; + SourceBlenderBlueprint *reference = nullptr; + for (size_t i = begin_idx; i < self.childCnt(); ++i) { + SourceBlenderBlueprint *child = dynamic_cast(&self.getChild(i)); + if (child != nullptr) { + if (reference == nullptr || reference->isCompatibleWith(*child)) { + source_blenders.push_back(i); + reference = child; + } + } + } + if (source_blenders.size() > 1) { // maybe 2 + Blueprint::UP blender_up; + std::vector > sources; + while (!source_blenders.empty()) { + blender_up = self.removeChild(source_blenders.back()); + source_blenders.pop_back(); + SourceBlenderBlueprint *blender = dynamic_cast(blender_up.get()); + assert(blender != nullptr); + while (blender->childCnt() > 0) { + Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1); + size_t source_idx = lookup_create_source(sources, child_up->getSourceId()); + sources[source_idx]->addChild(std::move(child_up)); + } + } + SourceBlenderBlueprint *top = dynamic_cast(blender_up.get()); + assert(top != nullptr); + while (!sources.empty()) { + top->addChild(std::move(sources.back())); + sources.pop_back(); + } + blender_up = Blueprint::optimize(std::move(blender_up)); + self.addChild(std::move(blender_up)); + } +} + +} // namespace search::queryeval:: + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +AndNotBlueprint::combine(const std::vector &data) const +{ + if (data.empty()) { + return HitEstimate(); + } + return data[0]; +} + +FieldSpecBaseList +AndNotBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +AndNotBlueprint::optimize_self() +{ + AndNotBlueprint *child = dynamic_cast(&getChild(0)); + if (child != nullptr) { + while (child->childCnt() > 1) { + addChild(child->removeChild(1)); + } + insertChild(1, child->removeChild(0)); + removeChild(0); + } + for (size_t i = 1; i < childCnt(); ++i) { + if (getChild(i).getState().estimate().empty) { + removeChild(i--); + } + } + if (dynamic_cast(getParent()) == nullptr) { + optimize_source_blenders(*this, 1); + } +} + +Blueprint::UP +AndNotBlueprint::get_replacement() +{ + if (childCnt() == 1) { + return removeChild(0); + } + return Blueprint::UP(); +} + +void +AndNotBlueprint::sort(std::vector &children) const +{ + if (children.size() > 2) { + std::sort(children.begin() + 1, children.end(), GreaterEstimate()); + } +} + +bool +AndNotBlueprint::inheritStrict(size_t i) const +{ + return (i == 0); +} + +SearchIterator::UP +AndNotBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const +{ + UnpackInfo unpackInfo(calculateUnpackInfo(md)); + if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); + auto termwise_search = (helper.first_termwise == 0) + ? SearchIterator::UP(AndNotSearch::create(helper.termwise, termwise_strict)) + : SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict)); + helper.insert_termwise(std::move(termwise_search), termwise_strict); + if (helper.children.size() == 1) { + return SearchIterator::UP(helper.children.front()); + } + return SearchIterator::UP(AndNotSearch::create(helper.children, strict)); + } + return SearchIterator::UP(AndNotSearch::create(subSearches, strict)); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +AndBlueprint::combine(const std::vector &data) const +{ + return min(data); +} + +FieldSpecBaseList +AndBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +AndBlueprint::optimize_self() +{ + for (size_t i = 0; i < childCnt(); ++i) { + AndBlueprint *child = dynamic_cast(&getChild(i)); + if (child != nullptr) { + while (child->childCnt() > 0) { + addChild(child->removeChild(0)); + } + removeChild(i--); + } + } + if (dynamic_cast(getParent()) == nullptr) { + optimize_source_blenders(*this, 0); + } +} + +Blueprint::UP +AndBlueprint::get_replacement() +{ + if (childCnt() == 1) { + return removeChild(0); + } + return Blueprint::UP(); +} + +void +AndBlueprint::sort(std::vector &children) const +{ + std::sort(children.begin(), children.end(), LessEstimate()); +} + +bool +AndBlueprint::inheritStrict(size_t i) const +{ + return (i == 0); +} + +SearchIterator::UP +AndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData & md) const +{ + UnpackInfo unpackInfo(calculateUnpackInfo(md)); + AndSearch * search = 0; + if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); + auto termwise_search = SearchIterator::UP(AndSearch::create(helper.termwise, termwise_strict)); + helper.insert_termwise(std::move(termwise_search), termwise_strict); + if (helper.children.size() == 1) { + return SearchIterator::UP(helper.children.front()); + } else { + search = AndSearch::create(helper.children, strict, helper.termwise_unpack); + } + } else { + search = AndSearch::create(subSearches, strict, unpackInfo); + } + search->estimate(getState().estimate().estHits); + return SearchIterator::UP(search); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +OrBlueprint::combine(const std::vector &data) const +{ + return max(data); +} + +FieldSpecBaseList +OrBlueprint::exposeFields() const +{ + return mixChildrenFields(); +} + +void +OrBlueprint::optimize_self() +{ + for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) { + OrBlueprint *child = dynamic_cast(&getChild(i)); + if (child != nullptr) { + while (child->childCnt() > 0) { + addChild(child->removeChild(0)); + } + removeChild(i--); + } else if (getChild(i).getState().estimate().empty) { + removeChild(i--); + } + } + if (dynamic_cast(getParent()) == nullptr) { + optimize_source_blenders(*this, 0); + } +} + +Blueprint::UP +OrBlueprint::get_replacement() +{ + if (childCnt() == 1) { + return removeChild(0); + } + return Blueprint::UP(); +} + +void +OrBlueprint::sort(std::vector &children) const +{ + std::sort(children.begin(), children.end(), GreaterEstimate()); +} + +bool +OrBlueprint::inheritStrict(size_t) const +{ + return true; +} + +SearchIterator::UP +OrBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData & md) const +{ + UnpackInfo unpackInfo(calculateUnpackInfo(md)); + if (should_do_termwise_eval(unpackInfo, md.get_termwise_limit())) { + TermwiseBlueprintHelper helper(*this, subSearches, unpackInfo); + bool termwise_strict = (strict && inheritStrict(helper.first_termwise)); + auto termwise_search = SearchIterator::UP(OrSearch::create(helper.termwise, termwise_strict)); + helper.insert_termwise(std::move(termwise_search), termwise_strict); + if (helper.children.size() == 1) { + return SearchIterator::UP(helper.children.front()); + } + return SearchIterator::UP(OrSearch::create(helper.children, strict, helper.termwise_unpack)); + } + return SearchIterator::UP(OrSearch::create(subSearches, strict, unpackInfo)); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +WeakAndBlueprint::combine(const std::vector &data) const +{ + HitEstimate childEst = max(data); + HitEstimate myEst(_n, false); + if (childEst < myEst) { + return childEst; + } + return myEst; +} + +FieldSpecBaseList +WeakAndBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +WeakAndBlueprint::sort(std::vector &) const +{ + // order needs to stay the same as _weights +} + +bool +WeakAndBlueprint::inheritStrict(size_t) const +{ + return true; +} + +SearchIterator::UP +WeakAndBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &) const +{ + WeakAndSearch::Terms terms; + assert(subSearches.size() == childCnt()); + assert(_weights.size() == childCnt()); + for (size_t i = 0; i < subSearches.size(); ++i) { + terms.push_back(wand::Term(subSearches[i], + _weights[i], + getChild(i).getState().estimate().estHits)); + } + return SearchIterator::UP(WeakAndSearch::create(terms, _n, strict)); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +NearBlueprint::combine(const std::vector &data) const +{ + return min(data); +} + +FieldSpecBaseList +NearBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +NearBlueprint::sort(std::vector &children) const +{ + std::sort(children.begin(), children.end(), LessEstimate()); +} + +bool +NearBlueprint::inheritStrict(size_t i) const +{ + return (i == 0); +} + +SearchIterator::UP +NearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const +{ + search::fef::TermFieldMatchDataArray tfmda; + for (size_t i = 0; i < childCnt(); ++i) { + const State &cs = getChild(i).getState(); + for (size_t j = 0; j < cs.numFields(); ++j) { + tfmda.add(cs.field(j).resolve(md)); + } + } + return SearchIterator::UP(new NearSearch(subSearches, tfmda, _window, strict)); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +ONearBlueprint::combine(const std::vector &data) const +{ + return min(data); +} + +FieldSpecBaseList +ONearBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +ONearBlueprint::sort(std::vector &children) const +{ + // ordered near cannot sort children here + (void)children; +} + +bool +ONearBlueprint::inheritStrict(size_t i) const +{ + return (i == 0); +} + +SearchIterator::UP +ONearBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const +{ + search::fef::TermFieldMatchDataArray tfmda; + for (size_t i = 0; i < childCnt(); ++i) { + const State &cs = getChild(i).getState(); + for (size_t j = 0; j < cs.numFields(); ++j) { + tfmda.add(cs.field(j).resolve(md)); + } + } + // could sort subSearches here + // but then strictness inheritance would also need to be fixed + return SearchIterator::UP(new ONearSearch(subSearches, tfmda, _window, strict)); +} + +//----------------------------------------------------------------------------- + +Blueprint::HitEstimate +RankBlueprint::combine(const std::vector &data) const +{ + if (data.empty()) { + return HitEstimate(); + } + return data[0]; +} + +FieldSpecBaseList +RankBlueprint::exposeFields() const +{ + return FieldSpecBaseList(); +} + +void +RankBlueprint::optimize_self() +{ + for (size_t i = 1; i < childCnt(); ++i) { + if (getChild(i).getState().estimate().empty) { + removeChild(i--); + } + } + optimize_source_blenders(*this, 1); +} + +Blueprint::UP +RankBlueprint::get_replacement() +{ + if (childCnt() == 1) { + return removeChild(0); + } + return Blueprint::UP(); +} + +void +RankBlueprint::sort(std::vector &children) const +{ + (void)children; +} + +bool +RankBlueprint::inheritStrict(size_t i) const +{ + return (i == 0); +} + +SearchIterator::UP +RankBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData & md) const +{ + UnpackInfo unpackInfo(calculateUnpackInfo(md)); + if (unpackInfo.unpackAll()) { + return SearchIterator::UP(RankSearch::create(subSearches, strict)); + } else { + MultiSearch::Children requireUnpack; + requireUnpack.reserve(subSearches.size()); + requireUnpack.push_back(subSearches[0]); + for (size_t i(1); i < subSearches.size(); i++) { + if (unpackInfo.needUnpack(i)) { + requireUnpack.push_back(subSearches[i]); + } else { + delete subSearches[i]; + } + } + if (requireUnpack.size() == 1) { + return SearchIterator::UP(requireUnpack[0]); + } else { + return SearchIterator::UP(RankSearch::create(requireUnpack, strict)); + } + } +} + +//----------------------------------------------------------------------------- + +SourceBlenderBlueprint::SourceBlenderBlueprint(const ISourceSelector &selector) + : _selector(selector) +{ +} + +Blueprint::HitEstimate +SourceBlenderBlueprint::combine(const std::vector &data) const +{ + return max(data); +} + +FieldSpecBaseList +SourceBlenderBlueprint::exposeFields() const +{ + return mixChildrenFields(); +} + +void +SourceBlenderBlueprint::sort(std::vector &) const +{ +} + +bool +SourceBlenderBlueprint::inheritStrict(size_t) const +{ + return true; +} + +class FindSource : public Blueprint::IPredicate +{ +public: + FindSource(uint32_t sourceId) : _sourceId(sourceId) { } + virtual bool check(const Blueprint & bp) const { return bp.getSourceId() == _sourceId; } +private: + uint32_t _sourceId; +}; + +ssize_t +SourceBlenderBlueprint::findSource(uint32_t sourceId) const +{ + ssize_t index(-1); + FindSource fs(sourceId); + IndexList list = find(fs); + if ( ! list.empty()) { + index = list.front(); + } + return index; +} + +SearchIterator::UP +SourceBlenderBlueprint::createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &) const +{ + SourceBlenderSearch::Children children; + assert(subSearches.size() == childCnt()); + for (size_t i = 0; i < subSearches.size(); ++i) { + children.push_back(SourceBlenderSearch::Child(subSearches[i], + getChild(i).getSourceId())); + assert(children.back().sourceId != 0xffffffff); + } + return SearchIterator::UP(SourceBlenderSearch::create(_selector.createIterator(), + children, strict)); +} + +bool +SourceBlenderBlueprint::isCompatibleWith(const SourceBlenderBlueprint &other) const +{ + return (&_selector == &other._selector); +} + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h new file mode 100644 index 00000000000..b36538b55d4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -0,0 +1,181 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprint.h" +#include "isourceselector.h" +#include "searchable.h" +#include +#include +#include + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +class AndNotBlueprint : public IntermediateBlueprint +{ +public: + bool supports_termwise_children() const override { return true; } + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void optimize_self() override; + virtual Blueprint::UP get_replacement() override; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; +private: + virtual bool isPositive(size_t index) const { return index == 0; } +}; + +//----------------------------------------------------------------------------- + +class AndBlueprint : public IntermediateBlueprint +{ +public: + bool supports_termwise_children() const override { return true; } + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void optimize_self() override; + virtual Blueprint::UP get_replacement() override; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; +}; + +//----------------------------------------------------------------------------- + +class OrBlueprint : public IntermediateBlueprint +{ +public: + bool supports_termwise_children() const override { return true; } + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void optimize_self() override; + virtual Blueprint::UP get_replacement() override; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; +}; + +//----------------------------------------------------------------------------- + +class WeakAndBlueprint : public IntermediateBlueprint +{ +private: + uint32_t _n; + std::vector _weights; + +public: + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; + + WeakAndBlueprint(uint32_t n) : _n(n) {} + void addTerm(Blueprint::UP bp, uint32_t weight) { + addChild(std::move(bp)); + _weights.push_back(weight); + } + uint32_t getN() const { return _n; } + const std::vector &getWeights() const { return _weights; } +}; + +//----------------------------------------------------------------------------- + +class NearBlueprint : public IntermediateBlueprint +{ +private: + uint32_t _window; + +public: + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual bool should_optimize_children() const override { return false; } + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; + + NearBlueprint(uint32_t window) : _window(window) {} +}; + +//----------------------------------------------------------------------------- + +class ONearBlueprint : public IntermediateBlueprint +{ +private: + uint32_t _window; + +public: + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual bool should_optimize_children() const override { return false; } + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; + + ONearBlueprint(uint32_t window) : _window(window) {} +}; + +//----------------------------------------------------------------------------- + +class RankBlueprint : public IntermediateBlueprint +{ +public: + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void optimize_self() override; + virtual Blueprint::UP get_replacement() override; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; +}; + +//----------------------------------------------------------------------------- + +class SourceBlenderBlueprint : public IntermediateBlueprint +{ +private: + const ISourceSelector &_selector; + +public: + SourceBlenderBlueprint(const ISourceSelector &selector); + virtual HitEstimate combine(const std::vector &data) const; + virtual FieldSpecBaseList exposeFields() const; + virtual void sort(std::vector &children) const; + virtual bool inheritStrict(size_t i) const; + /** + * Will return the index matching the given sourceId. + * @param sourceId The sourceid to find. + * @return The index to the child representing the sourceId. -1 if not found. + */ + ssize_t findSource(uint32_t sourceId) const; + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, search::fef::MatchData &md) const; + + /** check if this blueprint has the same source selector as the other */ + bool isCompatibleWith(const SourceBlenderBlueprint &other) const; +}; + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h new file mode 100644 index 00000000000..0ee13ccbde5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/irequestcontext.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace queryeval { + +/** + * Provides a context that follows the life of a query. + */ +class IRequestContext +{ +public: + virtual ~IRequestContext() { } + /** + * Provides the time of doom for the query. + * @return time of doom. + */ + virtual const vespalib::Doom & getDoom() const = 0; + + /** + * Provide access to attributevectors + * @return AttributeVector or nullptr if it does not exist. + */ + virtual const AttributeVector * getAttribute(const vespalib::string & name) const = 0; + virtual const AttributeVector * getAttributeStableEnum(const vespalib::string & name) const = 0; +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp new file mode 100644 index 00000000000..7821be450c0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.cpp @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +namespace search { +namespace queryeval { + +ISourceSelector::ISourceSelector(Source defaultSource) : + _baseId(0), + _defaultSource(defaultSource) +{ + assert(defaultSource < SOURCE_LIMIT); +} + +} + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/isourceselector.h b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h new file mode 100644 index 00000000000..4d3ce3ee302 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/isourceselector.h @@ -0,0 +1,103 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace queryeval { + +typedef uint8_t Source; + +/** + * Component used to select between sources during result blending. + **/ +class ISourceSelector +{ +protected: + typedef SingleValueNumericAttribute > SourceStore; +public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + static const Source SOURCE_LIMIT = 254u; + + /** + * Read-only interface to the data held by the parent source + * selector. + **/ + class Iterator { + public: + Iterator(const SourceStore & source) + : _source(source) + { + } + typedef std::unique_ptr UP; + + /** + * Obtain the source to be used for the given document. This + * function should always be called with increasing document + * ids. + * + * @return source id + * @param docId document id + **/ + queryeval::Source getSource(uint32_t docId) const { + return _source.getFast(docId); + } + + /** + * empty; defined for safe subclassing. + **/ + virtual ~Iterator() {} + + uint32_t + getDocIdLimit(void) const + { + return _source.getCommittedDocIdLimit(); + } + private: + const SourceStore & _source; + }; + +protected: + ISourceSelector(Source defaultSource); +public: + void setBaseId(uint32_t baseId) { _baseId = baseId; } + uint32_t getBaseId() const { return _baseId; } + Source getDefaultSource() const { return _defaultSource; } + /** + * Set the source to be used for a given document. + * + * @param docId local document id + * @param source source for this document + **/ + virtual void setSource(uint32_t docId, Source source) = 0; + + /** + * Gets the limit for docId numbers known to this selector. + * + * @return one above highest known doc id + **/ + virtual uint32_t getDocIdLimit() const = 0; + + /** + * Create a new iterator over the data held by this source + * selector. + * + * @return source selection iterator + **/ + virtual Iterator::UP createIterator() const = 0; + + /** + * empty; defined for safe subclassing. + **/ + virtual ~ISourceSelector() {} +private: + uint32_t _baseId; + Source _defaultSource; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp new file mode 100644 index 00000000000..b73af2a721e --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.cpp @@ -0,0 +1,9 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "iterator_pack.h" + +namespace search { + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h new file mode 100644 index 00000000000..b64af50a827 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/iterator_pack.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchiterator.h" +#include +#include + +namespace search { +namespace queryeval { + +class SearchIteratorPack +{ +private: + std::vector _children; + std::vector _childMatch; + fef::MatchData::UP _md; + +public: + SearchIteratorPack() : _children(), _childMatch(), _md() {} + SearchIteratorPack(SearchIteratorPack &&rhs) + : _children(std::move(rhs._children)), + _childMatch(std::move(rhs._childMatch)), + _md(std::move(rhs._md)) {} + + SearchIteratorPack &operator=(SearchIteratorPack &&rhs) { + _children = std::move(rhs._children); + _childMatch = std::move(rhs._childMatch); + _md = std::move(rhs._md); + return *this; + } + + SearchIteratorPack(const std::vector &children, + const std::vector &childMatch, + fef::MatchData::UP md) + : _children(), + _childMatch(childMatch), + _md(std::move(md)) + { + _children.reserve(children.size()); + for (auto child: children) { + _children.emplace_back(child); + } + assert((_children.size() == _childMatch.size()) || + (_childMatch.empty() && (_md.get() == nullptr))); + } + + explicit SearchIteratorPack(const std::vector &children) + : SearchIteratorPack(children, + std::vector(), + fef::MatchData::UP()) {} + + uint32_t get_docid(uint32_t ref) const { + return _children[ref]->getDocId(); + } + + uint32_t seek(uint32_t ref, uint32_t docid) { + _children[ref]->seek(docid); + return _children[ref]->getDocId(); + } + + int32_t get_weight(uint32_t ref, uint32_t docid) { + _children[ref]->doUnpack(docid); + return _childMatch[ref]->getWeight(); + } + + void unpack(uint32_t ref, uint32_t docid) { + _children[ref]->doUnpack(docid); + } + + size_t size() const { + return _children.size(); + } + void initRange(uint32_t begin, uint32_t end) { + for (auto & child: _children) { + child->initRange(begin, end); + } + } +}; + +} // namespace queryevel +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/iterators.cpp b/searchlib/src/vespa/searchlib/queryeval/iterators.cpp new file mode 100644 index 00000000000..d62a1b67069 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/iterators.cpp @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +LOG_SETUP(".iterators"); + +#include "iterators.h" + +namespace search { + +namespace queryeval { + +RankedSearchIteratorBase:: +RankedSearchIteratorBase(const fef::TermFieldMatchDataArray &matchData) + : SearchIterator(), + _matchData(matchData), + _needUnpack(1) +{ } + +} // namespace queryeval + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/iterators.h b/searchlib/src/vespa/searchlib/queryeval/iterators.h new file mode 100644 index 00000000000..565b7aff455 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/iterators.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include "searchiterator.h" +#include + +namespace search +{ + +namespace queryeval +{ + +class DocIdAndFeatures; + +class RankedSearchIteratorBase : public SearchIterator +{ +public: + fef::TermFieldMatchDataArray _matchData; +private: + uint32_t _needUnpack; +protected: + bool getUnpacked() const { return _needUnpack == 0; } + void setUnpacked() { _needUnpack = 0; } + void clearUnpacked() { _needUnpack = 1; } + uint32_t getNeedUnpack() const { return _needUnpack; } + void incNeedUnpack() { ++_needUnpack; } + +public: + RankedSearchIteratorBase(const fef::TermFieldMatchDataArray &matchData); +}; + +} // namespace queryeval + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp new file mode 100644 index 00000000000..4ddef401dd5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.leaf_blueprints"); +#include "leaf_blueprints.h" +#include "emptysearch.h" +#include "simplesearch.h" +#include "fake_search.h" + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +SearchIterator::UP +EmptyBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, + bool) const +{ + return SearchIterator::UP(new EmptySearch()); +} + +EmptyBlueprint::EmptyBlueprint(const FieldSpecBase &field) + : SimpleLeafBlueprint(field) +{ +} + +EmptyBlueprint::EmptyBlueprint(const FieldSpecBaseList &fields) + : SimpleLeafBlueprint(fields) +{ +} + +EmptyBlueprint::EmptyBlueprint() + : SimpleLeafBlueprint(FieldSpecBaseList()) +{ +} + +//----------------------------------------------------------------------------- + +SearchIterator::UP +SimpleBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, + bool) const +{ + SimpleSearch *ss = new SimpleSearch(_result); + SearchIterator::UP search(ss); + ss->tag(_tag); + return search; +} + +SimpleBlueprint::SimpleBlueprint(const SimpleResult &result) + : SimpleLeafBlueprint(FieldSpecBaseList()), + _tag(), + _result(result) +{ + setEstimate(HitEstimate(result.getHitCount(), + (result.getHitCount() == 0))); +} + +SimpleBlueprint & +SimpleBlueprint::tag(const vespalib::string &t) +{ + _tag = t; + return *this; +} + +//----------------------------------------------------------------------------- + +SearchIterator::UP +FakeBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, + bool) const +{ + return SearchIterator::UP(new FakeSearch(_tag, _field.getName(), _term, + _result, tfmda)); +} + +FakeBlueprint::FakeBlueprint(const FieldSpec &field, + const FakeResult &result) + : SimpleLeafBlueprint(field), + _tag(""), + _term(""), + _field(field), + _result(result) +{ + setEstimate(HitEstimate(result.inspect().size(), + result.inspect().empty())); +} + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h new file mode 100644 index 00000000000..eca464c846d --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprint.h" +#include "simpleresult.h" +#include "fake_result.h" +#include "searchable.h" + +namespace search { + +namespace queryeval { + +//----------------------------------------------------------------------------- + +class EmptyBlueprint : public SimpleLeafBlueprint +{ +protected: + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + +public: + EmptyBlueprint(const FieldSpecBaseList &fields); + EmptyBlueprint(const FieldSpecBase &field); + EmptyBlueprint(); +}; + +//----------------------------------------------------------------------------- + +class SimpleBlueprint : public SimpleLeafBlueprint +{ +private: + vespalib::string _tag; + SimpleResult _result; + +protected: + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + +public: + SimpleBlueprint(const SimpleResult &result); + SimpleBlueprint &tag(const vespalib::string &tag); + const vespalib::string &tag() const { return _tag; } +}; + +//----------------------------------------------------------------------------- + +class FakeBlueprint : public SimpleLeafBlueprint +{ +private: + vespalib::string _tag; + vespalib::string _term; + FieldSpec _field; + FakeResult _result; + +protected: + virtual SearchIterator::UP + createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + +public: + FakeBlueprint(const FieldSpec &field, + const FakeResult &result); + + FakeBlueprint &tag(const vespalib::string &t) { + _tag = t; + return *this; + } + + FakeBlueprint &term(const vespalib::string &t) { + _term = t; + return *this; + } +}; + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp new file mode 100644 index 00000000000..198634c56a9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.cpp @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".queryeval.monitoring_dump_iterator"); +#include "monitoring_dump_iterator.h" + +namespace search { +namespace queryeval { + +MonitoringDumpIterator::MonitoringDumpIterator(MonitoringSearchIterator::UP iterator) + : _search(std::move(iterator)) +{ +} + +MonitoringDumpIterator::~MonitoringDumpIterator() +{ + MonitoringSearchIterator::Dumper dumper(4, 25, 7, 10, 6); + visit(dumper, "", *_search); + LOG(info, "Search stats: %s", dumper.toString().c_str()); +} + +void +MonitoringDumpIterator::doSeek(uint32_t docId) +{ + _search->seek(docId); + setDocId(_search->getDocId()); +} + +void +MonitoringDumpIterator::doUnpack(uint32_t docId) +{ + _search->unpack(docId); +} + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h new file mode 100644 index 00000000000..6a6ab1f63a4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_dump_iterator.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "monitoring_search_iterator.h" + +namespace search { +namespace queryeval { + +/** + * Search iterator that dumps the search stats of the underlying + * monitoring search iterator upon destruction. + */ +class MonitoringDumpIterator : public SearchIterator +{ +private: + MonitoringSearchIterator::UP _search; + +public: + MonitoringDumpIterator(MonitoringSearchIterator::UP iterator); + ~MonitoringDumpIterator(); + + // Overrides SearchIterator + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; + Trinary is_strict() const override { return _search->is_strict(); } + void initRange(uint32_t beginid, uint32_t endid) override { + _search->initRange(beginid, endid); + SearchIterator::initRange(_search->getDocId()+1, _search->getEndId()); + } + void resetRange() override { + _search->resetRange(); + SearchIterator::resetRange(); + } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp new file mode 100644 index 00000000000..be282a29cdf --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.cpp @@ -0,0 +1,239 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include "monitoring_search_iterator.h" +#include +#include + +LOG_SETUP(".queryeval.monitoring_search_iterator"); + +using vespalib::make_string; + +namespace search { +namespace queryeval { + +MonitoringSearchIterator::Stats::Stats() + : _numSeeks(0), + _numUnpacks(0), + _numDocIdSteps(0), + _numHitSkips(0) +{ +} + +void +MonitoringSearchIterator::Dumper::addIndent() +{ + int n = _currIndent; + if (n < 0) { + n = 0; + } + _str.append(vespalib::string(n, ' ')); +} + +void +MonitoringSearchIterator::Dumper::addText(const vespalib::string &value) +{ + addIndent(); + _str.append(value.c_str()); + uint32_t extraSpaces = value.size() < _textFormatWidth ? _textFormatWidth - value.size() : 0; + _str.append(make_string(":%s ", vespalib::string(extraSpaces, ' ').c_str())); +} + +void +MonitoringSearchIterator::Dumper::addInt(int64_t value, const vespalib::string &desc) +{ + _str.append(make_string("%*" PRId64 " %s", + _intFormatWidth, value, desc.c_str())); +} + +void +MonitoringSearchIterator::Dumper::addFloat(double value, const vespalib::string &desc) +{ + _str.append(make_string("%*.*f %s", + _floatFormatWidth, _floatFormatPrecision, value, desc.c_str())); +} + +void +MonitoringSearchIterator::Dumper::openScope() +{ + _currIndent += _indent; +} + +void +MonitoringSearchIterator::Dumper::closeScope() +{ + _currIndent -= _indent; +} + +MonitoringSearchIterator::Dumper::Dumper(int indent, + uint32_t textFormatWidth, + uint32_t intFormatWidth, + uint32_t floatFormatWidth, + uint32_t floatFormatPrecision) + : _indent(indent), + _textFormatWidth(textFormatWidth), + _intFormatWidth(intFormatWidth), + _floatFormatWidth(floatFormatWidth), + _floatFormatPrecision(floatFormatPrecision), + _str(), + _currIndent(0), + _stack() +{ +} + +void +MonitoringSearchIterator::Dumper::openStruct(const vespalib::string &name, const vespalib::string &type) +{ + if (type == "search::queryeval::MonitoringSearchIterator") { + _stack.push(ITERATOR); + } else if (type == "MonitoringSearchIterator::Stats") { + _stack.push(STATS); + } else if (name == "children") { + _stack.push(CHILDREN); + openScope(); + } else { + _stack.push(UNKNOWN); + } +} + +void +MonitoringSearchIterator::Dumper::closeStruct() +{ + StructType top = _stack.top(); + _stack.pop(); + if (top == CHILDREN) { + closeScope(); + } +} + +void +MonitoringSearchIterator::Dumper::visitBool(const vespalib::string &name, bool value) +{ + (void) name; + (void) value; +} + +void +MonitoringSearchIterator::Dumper::visitInt(const vespalib::string &name, int64_t value) +{ + if (_stack.top() == STATS) { + if (name == "numSeeks") { + addInt(value, "seeks, "); + } else if (name == "numUnpacks") { + addInt(value, "unpacks, "); + } + } +} + +void +MonitoringSearchIterator::Dumper::visitFloat(const vespalib::string &name, double value) +{ + if (_stack.top() == STATS) { + if (name == "avgDocIdSteps") { + addFloat(value, "steps/seek, "); + } else if (name == "avgHitSkips") { + addFloat(value, "skips/seek, "); + } else if (name == "numSeeksPerUnpack") { + addFloat(value, "seeks/unpack\n"); + } + } +} + +void +MonitoringSearchIterator::Dumper::visitString(const vespalib::string &name, const vespalib::string &value) +{ + if (_stack.top() == ITERATOR) { + if (name == "iteratorName") { + addText(value); + } + } +} + +void +MonitoringSearchIterator::Dumper::visitNull(const vespalib::string &name) +{ + (void) name; +} + +void +MonitoringSearchIterator::Dumper::visitNotImplemented() +{ +} + + +uint32_t +MonitoringSearchIterator::countHitSkips(uint32_t docId) +{ + uint32_t tmpDocId = _search->getDocId(); + uint32_t numHitSkips = 0; + for (; ;) { + _search->seek(tmpDocId + 1); + tmpDocId = _search->getDocId(); + if (tmpDocId >= docId) { + break; + } + ++numHitSkips; + } + return numHitSkips; +} + +MonitoringSearchIterator::MonitoringSearchIterator(const vespalib::string &name, + SearchIterator::UP search, + bool collectHitSkipStats) + : _name(name), + _search(std::move(search)), + _collectHitSkipStats(collectHitSkipStats), + _stats() +{ +} + +void +MonitoringSearchIterator::doSeek(uint32_t docId) +{ + _stats.seek(); + _stats.step(docId - getDocId()); + if (_collectHitSkipStats) { + _stats.skip(countHitSkips(docId)); + } else { + _search->seek(docId); + } + LOG(debug, "%s:doSeek(%d) = %d e=%d", _name.c_str(), docId, _search->getDocId(), _search->getEndId()); + setDocId(_search->getDocId()); +} + +void +MonitoringSearchIterator::doUnpack(uint32_t docId) +{ + LOG(debug, "%s:doUnpack(%d)", _name.c_str(), docId); + _stats.unpack(); + _search->unpack(docId); +} + +const PostingInfo * +MonitoringSearchIterator::getPostingInfo() const +{ + return _search->getPostingInfo(); +} + +void +MonitoringSearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visitor.visitString("iteratorName", _name); + visitor.visitString("iteratorType", _search->getClassName()); + { + visitor.openStruct("stats", "MonitoringSearchIterator::Stats"); + visitor.visitInt("numSeeks", _stats.getNumSeeks()); + visitor.visitInt("numDocIdSteps", _stats.getNumDocIdSteps()); + visitor.visitFloat("avgDocIdSteps", _stats.getAvgDocIdSteps()); + visitor.visitInt("numHitSkips", _stats.getNumHitSkips()); + visitor.visitFloat("avgHitSkips", _stats.getAvgHitSkips()); + visitor.visitInt("numUnpacks", _stats.getNumUnpacks()); + visitor.visitFloat("numSeeksPerUnpack", _stats.getNumSeeksPerUnpack()); + visitor.closeStruct(); + } + _search->visitMembers(visitor); +} + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h new file mode 100644 index 00000000000..b837dd06978 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/monitoring_search_iterator.h @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "searchiterator.h" +#include +#include + +namespace search { +namespace queryeval { + +/** + * Search iterator that monitors an underlying search iterator + * and at the end provides statastics on the following: + * - number of seeks + * - number of unpacks + * - average docid step size + * - average hit skip size + * - number of seeks per hit + */ +class MonitoringSearchIterator : public SearchIterator +{ +public: + class Stats + { + private: + uint32_t _numSeeks; + uint32_t _numUnpacks; + uint64_t _numDocIdSteps; + uint64_t _numHitSkips; + double divide(double dividend, double divisor) const { + return divisor > 0.0 ? dividend / divisor : 0.0; + } + public: + Stats(); + void seek() { ++_numSeeks; } + void step(uint32_t docIdDiff) { _numDocIdSteps += docIdDiff; } + void skip(uint32_t hitDiff) { _numHitSkips += hitDiff; } + void unpack() { ++_numUnpacks; } + uint32_t getNumSeeks() const { return _numSeeks; } + uint32_t getNumUnpacks() const { return _numUnpacks; } + double getNumSeeksPerUnpack() const { return divide(getNumSeeks(), getNumUnpacks()); } + uint64_t getNumDocIdSteps() const { return _numDocIdSteps; } + double getAvgDocIdSteps() const { return divide(getNumDocIdSteps(), getNumSeeks()); } + uint64_t getNumHitSkips() const { return _numHitSkips; } + double getAvgHitSkips() const { return divide(getNumHitSkips(), getNumSeeks()); } + }; + + class Dumper : public vespalib::ObjectVisitor + { + private: + enum StructType { + ITERATOR, + STATS, + CHILDREN, + UNKNOWN + }; + + int _indent; + uint32_t _textFormatWidth; + uint32_t _intFormatWidth; + uint32_t _floatFormatWidth; + uint32_t _floatFormatPrecision; + vespalib::string _str; + int _currIndent; + std::stack _stack; + uint32_t _numberWidth; + + void addIndent(); + void addText(const vespalib::string &value); + void addInt(int64_t value, const vespalib::string &desc); + void addFloat(double value, const vespalib::string &desc); + void openScope(); + void closeScope(); + + public: + Dumper(int indent = 4, + uint32_t textFormatWidth = 1, + uint32_t intFormatWidth = 1, + uint32_t floatFormatWidth = 1, + uint32_t floatFormatPrecision = 2); + + vespalib::string toString() const { return _str; } + + // Overrides ObjectVisitor + virtual void openStruct(const vespalib::string &name, const vespalib::string &type); + virtual void closeStruct(); + virtual void visitBool(const vespalib::string &name, bool value); + virtual void visitInt(const vespalib::string &name, int64_t value); + virtual void visitFloat(const vespalib::string &name, double value); + virtual void visitString(const vespalib::string &name, const vespalib::string &value); + virtual void visitNull(const vespalib::string &name); + virtual void visitNotImplemented(); + }; + + typedef std::unique_ptr UP; + +private: + const vespalib::string _name; + const SearchIterator::UP _search; + const bool _collectHitSkipStats; + Stats _stats; + + uint32_t countHitSkips(uint32_t docId); + +public: + MonitoringSearchIterator(const vespalib::string &name, + SearchIterator::UP search, + bool collectHitSkipStats); + + // Overrides SearchIterator + void doSeek(uint32_t docId) override; + void doUnpack(uint32_t docId) override; + void initRange(uint32_t beginid, uint32_t endid) override { + _search->initRange(beginid, endid); + SearchIterator::initRange(_search->getDocId()+1, _search->getEndId()); + } + void resetRange() override { + _search->resetRange(); + SearchIterator::resetRange(); + } + Trinary is_strict() const override { return _search->is_strict(); } + virtual const PostingInfo *getPostingInfo() const; + void visitMembers(vespalib::ObjectVisitor &visitor) const override; + + const SearchIterator &getIterator() const { return *_search; } + const Stats &getStats() const { return _stats; } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp new file mode 100644 index 00000000000..0765d8d6850 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp @@ -0,0 +1,258 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +namespace { + +template +class MultiBitVectorIterator : public MultiBitVectorIteratorBase +{ +public: + MultiBitVectorIterator(const Children & children) : MultiBitVectorIteratorBase(children) { } +protected: + void updateLastValue(uint32_t docId); + void strictSeek(uint32_t docId); +private: + void doSeek(uint32_t docId) override; + bool isStrict() const override { return false; } + bool acceptExtraFilter() const override { return Update::isAnd(); } + Update _update; +}; + +template +class MultiBitVectorIteratorStrict : public MultiBitVectorIterator +{ +public: + MultiBitVectorIteratorStrict(const MultiSearch::Children & children) : MultiBitVectorIterator(children) { } +private: + void doSeek(uint32_t docId) override { this->strictSeek(docId); } + bool isStrict() const override { return true; } +}; + +template +void MultiBitVectorIterator::updateLastValue(uint32_t docId) +{ + if (docId >= _lastMaxDocIdLimit) { + if (__builtin_expect(docId < _numDocs, true)) { + const uint32_t index(wordNum(docId)); + _lastValue = _bvs[0][index]; + for(uint32_t i(1); i < _bvs.size(); i++) { + _lastValue = _update(_lastValue, _bvs[i][index]); + } + _lastMaxDocIdLimit = (index + 1) * WordLen; + } else { + setAtEnd(); + } + } +} + +template +void +MultiBitVectorIterator::doSeek(uint32_t docId) +{ + updateLastValue(docId); + if (__builtin_expect( ! isAtEnd(), true)) { + if (_lastValue & mask(docId)) { + setDocId(docId); + } + } +} + +template +void +MultiBitVectorIterator::strictSeek(uint32_t docId) +{ + for (updateLastValue(docId), _lastValue=_lastValue & checkTab(docId); + (_lastValue == 0) && __builtin_expect(! isAtEnd(), true); + updateLastValue(_lastMaxDocIdLimit)); + if (__builtin_expect(!isAtEnd(), true)) { + docId = _lastMaxDocIdLimit - WordLen + vespalib::Optimized::lsbIdx(_lastValue); + if (__builtin_expect(docId >= _numDocs, false)) { + setAtEnd(); + } else { + setDocId(docId); + } + } +} + +struct And { + typedef BitWord::Word Word; + Word operator () (const Word a, const Word b) { + return a & b; + } + static bool isAnd() { return true; } +}; + +struct Or { + typedef BitWord::Word Word; + Word operator () (const Word a, const Word b) { + return a | b; + } + static bool isAnd() { return false; } +}; + +typedef MultiBitVectorIterator AndBVIterator; +typedef MultiBitVectorIteratorStrict AndBVIteratorStrict; +typedef MultiBitVectorIterator OrBVIterator; +typedef MultiBitVectorIteratorStrict OrBVIteratorStrict; + +bool hasAtLeast2Bitvectors(const MultiSearch::Children & children) +{ + size_t count(0); + for (auto it(children.begin()); it != children.end(); it++) { + if ((*it)->isBitVector()) { + count++; + } + } + return count >= 2; +} + +size_t firstStealable(const MultiSearch & s) +{ + return s.isAndNot() ? 1 : 0; +} + +bool canOptimize(const MultiSearch & s) { + return (s.getChildren().size() >= 2) + && (s.isAnd() || s.isOr() || s.isAndNot()) + && hasAtLeast2Bitvectors(s.getChildren()); +} + +} + +MultiBitVectorIteratorBase::MultiBitVectorIteratorBase(const Children & children) : + MultiSearch(children), + _numDocs(std::numeric_limits::max()), + _lastValue(0), + _lastMaxDocIdLimit(0), + _bvs(children.size()) +{ + for (size_t i(0); i < children.size(); i++) { + const BitVectorIterator * bv = static_cast(children[i]); + _bvs[i] = reinterpret_cast(bv->getBitValues()); + _numDocs = std::min(_numDocs, bv->getDocIdLimit()); + } +} + +MultiBitVectorIteratorBase::~MultiBitVectorIteratorBase() +{ +} + +SearchIterator::UP +MultiBitVectorIteratorBase::andWith(UP filter, uint32_t estimate) +{ + (void) estimate; + if (filter->isBitVector() && acceptExtraFilter()) { + const BitVectorIterator & bv = static_cast(*filter); + _bvs.push_back(reinterpret_cast(bv.getBitValues())); + insert(getChildren().size(), std::move(filter)); + _lastMaxDocIdLimit = 0; // force reload + } + return filter; +} + +void +MultiBitVectorIteratorBase::doUnpack(uint32_t docid) +{ + if (_unpackInfo.unpackAll()) { + MultiSearch::doUnpack(docid); + } else { + auto &children = getChildren(); + _unpackInfo.each([&children,docid](size_t i){children[i]->doUnpack(docid);}, + children.size()); + } +} + +SearchIterator::UP +MultiBitVectorIteratorBase::optimize(SearchIterator::UP parentIt) +{ + if (parentIt->isSourceBlender()) { + SourceBlenderSearch & parent(static_cast(*parentIt)); + for (size_t i(0); i < parent.getNumChildren(); i++) { + parent.setChild(i, optimize(parent.steal(i))); + } + } else if (parentIt->isMultiSearch()) { + parentIt = optimizeMultiSearch(std::move(parentIt)); + } + return parentIt; +} + +SearchIterator::UP +MultiBitVectorIteratorBase::optimizeMultiSearch(SearchIterator::UP parentIt) +{ + MultiSearch & parent(static_cast(*parentIt)); + if (canOptimize(parent)) { + MultiSearch::Children stolen; + std::vector _unpackIndex; + bool strict(false); + size_t insertPosition(0); + for (size_t it(firstStealable(parent)); it != parent.getChildren().size(); ) { + if (parent.getChildren()[it]->isBitVector()) { + if (stolen.empty()) { + insertPosition = it; + } + if (parent.needUnpack(it)) { + _unpackIndex.push_back(stolen.size()); + } + SearchIterator::UP bit = parent.remove(it); + if ( ! strict && static_cast(*bit).isStrict()) { + strict = true; + } + stolen.push_back(bit.release()); + } else { + it++; + } + } + SearchIterator::UP next; + if (parent.isAnd()) { + if (strict) { + next.reset(new AndBVIteratorStrict(stolen)); + } else { + next.reset(new AndBVIterator(stolen)); + } + } else if (parent.isOr()) { + if (strict) { + next.reset(new OrBVIteratorStrict(stolen)); + } else { + next.reset(new OrBVIterator(stolen)); + } + } else if (parent.isAndNot()) { + if (strict) { + next.reset(new OrBVIteratorStrict(stolen)); + } else { + next.reset(new OrBVIterator(stolen)); + } + } + MultiBitVectorIteratorBase & nextM(static_cast(*next)); + for (size_t index : _unpackIndex) { + nextM.addUnpackIndex(index); + } + if (parent.getChildren().empty()) { + return next; + } else { + parent.insert(insertPosition, std::move(next)); + } + } + MultiSearch::Children & toOptimize(const_cast(parent.getChildren())); + for (size_t i(0); i < toOptimize.size(); i++) { + toOptimize[i] = optimize(MultiSearch::UP(toOptimize[i])).release(); + } + + return parentIt; +} + +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h new file mode 100644 index 00000000000..75762bf3e52 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { +namespace queryeval { + +class MultiBitVectorIteratorBase : public MultiSearch, protected BitWord +{ +public: + ~MultiBitVectorIteratorBase(); + virtual bool isStrict() const = 0; + void addUnpackIndex(size_t index) { _unpackInfo.add(index); } + /** + * Will steal and optimize bitvectoriterators if it can + * Might return itself or a new structure. + */ + static SearchIterator::UP optimize(SearchIterator::UP parent); +protected: + MultiBitVectorIteratorBase(const Children & children); + + uint32_t _numDocs; + Word _lastValue; // Last value computed + uint32_t _lastMaxDocIdLimit; // next documentid requiring recomputation. + std::vector _bvs; +private: + virtual bool acceptExtraFilter() const = 0; + UP andWith(UP filter, uint32_t estimate) override; + void doUnpack(uint32_t docid) override; + UnpackInfo _unpackInfo; + static SearchIterator::UP optimizeMultiSearch(SearchIterator::UP parent); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp new file mode 100644 index 00000000000..60a2d373e75 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.cpp @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "multisearch.h" +#include + +namespace search { +namespace queryeval { + +void +MultiSearch::insert(size_t index, SearchIterator::UP search) +{ + assert(index <= _children.size()); + _children.insert(_children.begin()+index, search.release()); + onInsert(index); +} + +SearchIterator::UP +MultiSearch::remove(size_t index) +{ + assert(index < _children.size()); + SearchIterator::UP search(_children[index]); + _children.erase(_children.begin() + index); + onRemove(index); + return search; +} + +void +MultiSearch::doUnpack(uint32_t docid) +{ + size_t sz(_children.size()); + for (size_t i = 0; i < sz; ) { + if (__builtin_expect(_children[i]->getDocId() < docid, false)) { + _children[i]->doSeek(docid); + if (_children[i]->isAtEnd()) { + sz = deactivate(i); + continue; + } + } + if (__builtin_expect(_children[i]->getDocId() == docid, false)) { + _children[i]->doUnpack(docid); + } + i++; + } +} + +size_t +MultiSearch::deactivate(size_t idx) +{ + assert(idx < _children.size()); + delete _children[idx]; + _children[idx] = _children.back(); + _children.resize(_children.size() - 1); + return _children.size(); +} + +MultiSearch::MultiSearch(const Children & children) + : _children(children) +{ +} + +MultiSearch::~MultiSearch() +{ + for (SearchIterator * child : _children) { + delete child; + } +} + +void +MultiSearch::initRange(uint32_t beginid, uint32_t endid) +{ + SearchIterator::initRange(beginid, endid); + for (SearchIterator * child : _children) { + child->initRange(beginid, endid); + } +} + +void +MultiSearch::resetRange() +{ + SearchIterator::resetRange(); + for (SearchIterator * child : _children) { + child->resetRange(); + } +} + +void +MultiSearch::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "children", _children); +} + + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/multisearch.h b/searchlib/src/vespa/searchlib/queryeval/multisearch.h new file mode 100644 index 00000000000..3de15040062 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/multisearch.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +/** + * A virtual intermediate class that serves as the basis for combining searches + * like and, or any or others that take a list of children. + **/ +class MultiSearch : public SearchIterator +{ +public: + /** + * Defines how to represent the children iterators. vespalib::Array usage + * generates faster and more compact code then using std::vector. + */ + typedef std::vector Children; + /** + * Create a new Multi Search with the given children. + * + * @param children the search objects we are and'ing + * this object takes ownership of the children. + **/ + MultiSearch(const Children & children); + virtual ~MultiSearch(); + const Children & getChildren() const { return _children; } + virtual bool isAnd() const { return false; } + virtual bool isAndNot() const { return false; } + virtual bool isOr() const { return false; } + void insert(size_t index, SearchIterator::UP search); + SearchIterator::UP remove(size_t index); + virtual bool needUnpack(size_t index) const { (void) index; return true; } + void initRange(uint32_t beginId, uint32_t endId) override; + void resetRange() override; +protected: + void doUnpack(uint32_t docid) override; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; +private: + /** + * Call back when children are removed / inserted after the Iterator has been constructed. + * This is to support code that make assumptions that iterators do not move around or disappear. + * These are invoked after the child has been removed. + */ + virtual void onRemove(size_t index) { (void) index; } + virtual void onInsert(size_t index) { (void) index; } + + virtual bool isMultiSearch() const { return true; } + size_t deactivate(size_t index); + Children _children; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp new file mode 100644 index 00000000000..b33ab946acc --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp @@ -0,0 +1,313 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".nearsearch"); + +#include "nearsearch.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +namespace { + +using search::fef::TermFieldMatchDataArray; +using search::fef::TermFieldMatchDataPositionKey; + +template +void setup_fields(uint32_t window, std::vector &matchers, const TermFieldMatchDataArray &in) { + std::set fields; + for (size_t i = 0; i < in.size(); ++i) { + fields.insert(in[i]->getFieldId()); + } + std::set::const_iterator pos = fields.begin(); + std::set::const_iterator end = fields.end(); + for (; pos != end; ++pos) { + matchers.push_back(T(window, *pos, in)); + } +} + +} // namespace search::queryeval:: + +NearSearchBase::NearSearchBase(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict) + : AndSearch(terms), + _data_size(data.size()), + _window(window), + _strict(strict) +{ +} + +void +NearSearchBase::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + AndSearch::visitMembers(visitor); + visit(visitor, "data_size", _data_size); + visit(visitor, "window", _window); + visit(visitor, "strict", _strict); +} + +void +NearSearchBase::seekNext(uint32_t docId) +{ + LOG(debug, "seekNext(%d)", docId); + const Children & terms(getChildren()); + SearchIterator &firstTerm = *terms[0]; + uint32_t nextId = firstTerm.getDocId(); + while ( ! isAtEnd(nextId)) { + LOG(debug, "Looking for match in document %d.", nextId); + bool foundHit = true; + for (uint32_t i = 1, len = terms.size(); i < len; ++i) { + SearchIterator &term = *terms[i]; + if (!term.seek(nextId)) { + LOG(debug, "Term %d does not occur in document %d.", i, nextId); + foundHit = false; + if (term.getDocId() > nextId) { + nextId = term.getDocId(); + LOG(debug, "Next document in which term %d occurs is %d.", i, nextId); + } else { + ++nextId; + LOG(debug, "Bumping target document to %d.", nextId); + } + break; + } + LOG(debug, "Term %d occurs in document %d.", i, nextId); + } + if (foundHit) { + LOG(debug, "All terms occur in document %d, check for match.", nextId); + if (match(nextId)) { + LOG(debug, "Document %d matches.", nextId); + break; + } + ++nextId; + } + if ( ! isAtEnd(nextId)) { + LOG(debug, "Seeking next document that contains term 0, starting at %d.", nextId); + firstTerm.seek(nextId); + nextId = firstTerm.getDocId(); + LOG(debug, "Next document that contains term 0 is %d.", nextId); + } + } + if (isAtEnd(nextId)) { + LOG(debug, "Reached end of document list."); + setAtEnd(); + } else { + setDocId(nextId); + } +} + +void +NearSearchBase::doSeek(uint32_t docId) +{ + LOG(debug, "doSeek(%d)", docId); + const Children & terms(getChildren()); + bool foundHit = true; + for (uint32_t i = 0, len = terms.size(); i < len; ++i) { + SearchIterator *term = terms[i]; + if (!term->seek(docId)) { + LOG(debug, "Term %d does not occur in document %d.", i, docId); + foundHit = false; + break; + } + } + if (foundHit && match(docId)) { + LOG(debug, "Document %d matches.", docId); + setDocId(docId); + } else if (_strict) { + LOG(debug, "Document %d does not match, seeking next.", docId); + seekNext(docId); + } +} + +NearSearch::NearSearch(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict) + : NearSearchBase(terms, data, window, strict), + _matchers() +{ + setup_fields(window, _matchers, data); +} + +namespace { + +struct PosIter { + search::fef::TermFieldMatchData::PositionsIterator curPos; + search::fef::TermFieldMatchData::PositionsIterator endPos; + + bool operator< (const PosIter &other) const { + // assumes none is at end + TermFieldMatchDataPositionKey mykey = *curPos; + TermFieldMatchDataPositionKey otherkey = *other.curPos; + return mykey < otherkey; + } +}; + +struct Iterators +{ + vespalib::PriorityQueue _queue; + TermFieldMatchDataPositionKey _maxOcc; + + void update(TermFieldMatchDataPositionKey occ) + { + if (_queue.size() == 1 || _maxOcc < occ) { _maxOcc = occ; } + } + + void add(const search::fef::TermFieldMatchData *term) + { + PosIter iter; + iter.curPos = term->begin(); + iter.endPos = term->end(); + LOG_ASSERT(iter.curPos != iter.endPos); + _queue.push(iter); + update(*iter.curPos); + } + + bool match(uint32_t window) { + for (;;) { + PosIter &front = _queue.front(); + TermFieldMatchDataPositionKey lastAllowed = *front.curPos; + lastAllowed.setPosition(front.curPos->getPosition() + window); + + if (!(lastAllowed < _maxOcc)) { + return true; + } + do { + ++front.curPos; + if (front.curPos == front.endPos) { + return false; + } + lastAllowed = *front.curPos; + lastAllowed.setPosition(front.curPos->getPosition() + window); + } while (lastAllowed < _maxOcc); + + update(*front.curPos); + _queue.adjust(); + } + } +}; + +} // namespace + +bool +NearSearch::Matcher::match(uint32_t docId) +{ + Iterators pos; + for (uint32_t i = 0, len = inputs().size(); i < len; ++i) { + const search::fef::TermFieldMatchData *term = inputs()[i]; + if (term->getDocId() != docId || term->begin() == term->end()) { + LOG(debug, "No occurrences found for term %d.", i); + return false; + } + LOG(debug, "Got positions iterator for term %d.", i); + pos.add(term); + } + + // Look for matching window. + return pos.match(window()); +} + +bool +NearSearch::match(uint32_t docId) +{ + // Retrieve position iterators for each term. + doUnpack(docId); + for (size_t i = 0; i < _matchers.size(); ++i) { + if (_matchers[i].match(docId)) { + return true; + } + } + return false; +} + +ONearSearch::ONearSearch(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict) + : NearSearchBase(terms, data, window, strict), + _matchers() +{ + setup_fields(window, _matchers, data); +} + +bool +ONearSearch::Matcher::match(uint32_t docId) +{ + uint32_t numTerms = inputs().size(); + PositionsIteratorList pos; + for (uint32_t i = 0; i < numTerms; ++i) { + const search::fef::TermFieldMatchData *term = inputs()[i]; + if (term->getDocId() != docId || term->begin() == term->end()) { + LOG(debug, "No occurrences found for term %d.", i); + return false; + } + LOG(debug, "Got positions iterator for term %d.", i); + pos.push_back(term->begin()); + } + if (numTerms < 2) return true; // 1 term is always near itself + + int32_t remain = window(); + + TermFieldMatchDataPositionKey prevTermPos; + TermFieldMatchDataPositionKey curTermPos; + TermFieldMatchDataPositionKey lastAllowed; + + // Look for match for every occurrence of the first term. + for ( ; pos[0] != inputs()[0]->end(); ++pos[0]) { + TermFieldMatchDataPositionKey firstTermPos = *pos[0]; + lastAllowed = firstTermPos; + lastAllowed.setPosition(firstTermPos.getPosition() + remain); + if (lastAllowed < curTermPos) { + // if we already know that we must seek onwards: + continue; + } + prevTermPos = firstTermPos; + LOG(spam, "Looking for match in window [%d, %d].", + firstTermPos.getPosition(), lastAllowed.getPosition()); + for (uint32_t i = 1; i < numTerms; ++i) { + LOG(spam, "Forwarding iterator for term %d beyond %d.", i, prevTermPos.getPosition()); + while (pos[i] != inputs()[i]->end() && !(prevTermPos < *pos[i])) { + ++pos[i]; + } + if (pos[i] == inputs()[i]->end()) { + LOG(debug, "Reached end of occurrences for term %d without matching ONEAR.", i); + return false; + } + curTermPos = *pos[i]; + if (lastAllowed < curTermPos) { + // outside window + break; + } + LOG(spam, "Current position for term %d is %d.", i, curTermPos.getPosition()); + if (i + 1 == numTerms) { + LOG(debug, "ONEAR match found for document %d.", docId); + // OK for all terms + return true; + } + prevTermPos = curTermPos; + } + } + LOG(debug, "No ONEAR match found for document %d.", docId); + return false; +} + +bool +ONearSearch::match(uint32_t docId) +{ + // Retrieve position iterators for each term. + doUnpack(docId); + for (size_t i = 0; i < _matchers.size(); ++i) { + if (_matchers[i].match(docId)) { + return true; + } + } + return false; +} + +} // queryeval +} // search diff --git a/searchlib/src/vespa/searchlib/queryeval/nearsearch.h b/searchlib/src/vespa/searchlib/queryeval/nearsearch.h new file mode 100644 index 00000000000..48a3af91a4d --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/nearsearch.h @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include "andsearch.h" + +namespace search { +namespace queryeval { + +/** + * The near search base implements the common logic of the near and o-near search. + */ +class NearSearchBase : public AndSearch +{ +protected: + uint32_t _data_size; + uint32_t _window; + bool _strict; + + typedef search::fef::TermFieldMatchDataArray TermFieldMatchDataArray; + + class MatcherBase + { + private: + uint32_t _window; + TermFieldMatchDataArray _inputs; + protected: + uint32_t window() const { return _window; } + const TermFieldMatchDataArray &inputs() const { return _inputs; } + public: + MatcherBase(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in) + : _window(win), + _inputs() + { + for (size_t i = 0; i < in.size(); ++i) { + if (in[i]->getFieldId() == fieldId) { + _inputs.add(in[i]); + } + } + } + }; + + /** + * Typedef the list of positions iterators because it takes far too much space to write out :-) + */ + typedef std::vector PositionsIteratorList; + + /** + * Returns whether or not given document matches. This should only be called when all child terms are all + * at the same document. + * + * @param docId The document for which we are checking. + * @return True if the document matches. + */ + virtual bool match(uint32_t docId) = 0; + + /** + * Performs seek() on all child terms until a match is found. This method calls setDocId() to signal the + * document found. + * + * @param docId The document id from which to start seeking. + */ + void seekNext(uint32_t docId); + +public: + /** + * Constructs a new search for the given term match data. + * + * @param terms The iterators for all child terms. + * @param data The term match data objects for all child terms. + * @param window The size of the window in which all terms must occur. + * @param strict Whether or not to skip to next matching document if seek fails. + */ + NearSearchBase(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict); + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + // Inherit doc from SearchIterator. + virtual void doSeek(uint32_t docId); +}; + +/** + * The near search matches only when all of its child terms occur within some given window size. + */ +class NearSearch : public NearSearchBase +{ +private: + struct Matcher : public NearSearchBase::MatcherBase + { + Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in) + : MatcherBase(win, fieldId, in) {} + bool match(uint32_t docId); + }; + + std::vector _matchers; + + // Inherit doc from NearSearchBase. + virtual bool match(uint32_t docId); + +public: + /** + * Constructs a new search for the given term match data. + * + * @param terms The iterators for all child terms. + * @param data The term match data objects for all child terms. + * @param window The size of the window in which all terms must occur. + * @param strict Whether or not to skip to next matching document if seek fails. + */ + NearSearch(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict = true); +}; + +/** + * The o-near search matches only when all of its child terms occur within some given window size, in the + * same order as they appear as children of this. + */ +class ONearSearch : public NearSearchBase +{ +private: + struct Matcher : public NearSearchBase::MatcherBase + { + Matcher(uint32_t win, uint32_t fieldId, const TermFieldMatchDataArray &in) + : MatcherBase(win, fieldId, in) {} + bool match(uint32_t docId); + }; + + std::vector _matchers; + + // Inherit doc from NearSearchBase. + virtual bool match(uint32_t docId); + +public: + /** + * Constructs a new search for the given term match data. + * + * @param terms The iterators for all child terms. + * @param data The term match data objects for all child terms. + * @param window The size of the window in which all terms must occur. + * @param strict Whether or not to skip to next matching document if seek fails. + */ + ONearSearch(const Children & terms, + const TermFieldMatchDataArray &data, + uint32_t window, + bool strict = true); + +}; + +} // queryeval +} // search + diff --git a/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h new file mode 100644 index 00000000000..356519ea5e8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/orlikesearch.h @@ -0,0 +1,73 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "orsearch.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the Or search operation. + **/ +template +class OrLikeSearch : public OrSearch +{ +protected: + void doSeek(uint32_t docid) override { + const Children & children(getChildren()); + for (uint32_t i = 0; i < children.size(); ++i) { + if (children[i]->seek(docid)) { + setDocId(docid); + return; + } + } + if (strict) { + uint32_t minNextId = children[0]->getDocId(); + for (uint32_t i = 1; i < children.size(); ++i) { + if (children[i]->getDocId() < minNextId) { + minNextId = children[i]->getDocId(); + } + } + setDocId(minNextId); + } + } + Trinary is_strict() const override { return strict ? Trinary::True : Trinary::False; } + void visitMembers(vespalib::ObjectVisitor &visitor) const override { + MultiSearch::visitMembers(visitor); + visit(visitor, "strict", strict); + } + +public: + /** + * Create a new Or Search with the given children. A strict Or + * can assume that all children below are also strict. A + * non-strict Or has no strictness assumptions about its children. + * + * @param children the search objects we are or'ing + **/ + OrLikeSearch(const Children &children, const Unpack & unpacker) : + OrSearch(children), + _unpacker(unpacker) + { } +private: + virtual void onRemove(size_t index) { + _unpacker.onRemove(index); + } + virtual void onInsert(size_t index) { + _unpacker.onInsert(index); + } + virtual void doUnpack(uint32_t docid) { + _unpacker.unpack(docid, *this); + } + virtual bool needUnpack(size_t index) const { + return _unpacker.needUnpack(index); + } + Unpack _unpacker; +}; + + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp new file mode 100644 index 00000000000..9342727100e --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.cpp @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "orsearch.h" +#include "orlikesearch.h" + +namespace search { +namespace queryeval { + +namespace { + +class FullUnpack +{ +public: + void unpack(uint32_t docid, MultiSearch & search) { + const MultiSearch::Children & children(search.getChildren()); + size_t sz(children.size()); + for (size_t i(0); i < sz; ) { + if (__builtin_expect(children[i]->getDocId() < docid, false)) { + children[i]->doSeek(docid); + if (children[i]->getDocId() == search::endDocId) { + sz = deactivate(search, i); + continue; + } + } + if (__builtin_expect(children[i]->getDocId() == docid, false)) { + children[i]->doUnpack(docid); + } + i++; + } + } + void onRemove(size_t index) { (void) index; } + void onInsert(size_t index) { (void) index; } + bool needUnpack(size_t index) const { (void) index; return true; } +private: + static size_t deactivate(MultiSearch &children, size_t idx); +}; + +size_t +FullUnpack::deactivate(MultiSearch & search, size_t idx) +{ + search.remove(idx); + return search.getChildren().size(); +} + +class SelectiveUnpack +{ +public: + SelectiveUnpack(const UnpackInfo & unpackInfo) : + _unpackInfo(unpackInfo) + { } + void unpack(uint32_t docid, const MultiSearch & search) { + auto &children = search.getChildren(); + _unpackInfo.each([&children,docid](size_t i) { + SearchIterator &child = *children[i]; + if (__builtin_expect(child.getDocId() < docid, false)) { + child.doSeek(docid); + } + if (__builtin_expect(child.getDocId() == docid, false)) { + child.doUnpack(docid); + } + }, children.size()); + } + void onRemove(size_t index) { + _unpackInfo.remove(index); + } + void onInsert(size_t index) { + _unpackInfo.insert(index); + } + bool needUnpack(size_t index) const { + return _unpackInfo.needUnpack(index); + } +private: + UnpackInfo _unpackInfo; +}; + +} + +BitVector::UP +OrSearch::get_hits(uint32_t begin_id) { + const Children &children = getChildren(); + BitVector::UP result = children.front()->get_hits(begin_id); + for (size_t i = 1; i < children.size(); ++i) { + children[i]->or_hits_into(*result, begin_id); + } + return result; +} + +SearchIterator * +OrSearch::create(const MultiSearch::Children &children, bool strict) { + UnpackInfo unpackInfo; + unpackInfo.forceAll(); + return create(children, strict, unpackInfo); +} + +SearchIterator * +OrSearch::create(const MultiSearch::Children &children, bool strict, const UnpackInfo & unpackInfo) { + (void) unpackInfo; + if (strict) { + if (unpackInfo.unpackAll()) { + return new OrLikeSearch(children, FullUnpack()); + } else if(unpackInfo.empty()) { + return new OrLikeSearch(children, NoUnpack()); + } else { + return new OrLikeSearch(children, SelectiveUnpack(unpackInfo)); + } + } else { + if (unpackInfo.unpackAll()) { + return new OrLikeSearch(children, FullUnpack()); + } else if(unpackInfo.empty()) { + return new OrLikeSearch(children, NoUnpack()); + } else { + return new OrLikeSearch(children, SelectiveUnpack(unpackInfo)); + } + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/orsearch.h b/searchlib/src/vespa/searchlib/queryeval/orsearch.h new file mode 100644 index 00000000000..353276da932 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/orsearch.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" +#include "unpackinfo.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the Or search operation. + **/ +class OrSearch : public MultiSearch +{ +public: + typedef MultiSearch::Children Children; + + // Caller takes ownership of the returned SearchIterator. + static SearchIterator *create(const Children &children, bool strict); + static SearchIterator *create(const Children &children, bool strict, const UnpackInfo & unpackInfo); + + BitVector::UP get_hits(uint32_t begin_id) override; + +protected: + OrSearch(const Children & children) : MultiSearch(children) { } +private: + virtual bool isOr() const { return true; } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/posting_info.h b/searchlib/src/vespa/searchlib/queryeval/posting_info.h new file mode 100644 index 00000000000..087e0fb62d7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/posting_info.h @@ -0,0 +1,45 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + + +namespace search { +namespace queryeval { + +/** + * Interface for getting global information stored in underlying posting list + * used by a search iterator. + * + * Subclasses of this interface will expose different information that can be + * used during evaluation. + */ +struct PostingInfo { + virtual ~PostingInfo() {} +}; + + +/** + * Class for getting the min and max weights of a posting list. + * + * Such posting lists store a weight with each doc id and maintain the min and + * max weights among the whole posting list. + */ +class MinMaxPostingInfo : public PostingInfo { +private: + int32_t _minWeight; + int32_t _maxWeight; + +public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + MinMaxPostingInfo(int32_t minWeight, int32_t maxWeight) + : PostingInfo(), + _minWeight(minWeight), + _maxWeight(maxWeight) + {} + int32_t getMinWeight() const { return _minWeight; } + int32_t getMaxWeight() const { return _maxWeight; } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp new file mode 100644 index 00000000000..70903788992 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.cpp @@ -0,0 +1,345 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include + +#include "predicate_blueprint.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP(".predicate_blueprint"); +#include + +using search::query::PredicateQuery; +using search::query::PredicateQueryTerm; +using std::make_pair; +using std::pair; +using std::vector; +using vespalib::string; +using namespace search::predicate; + +namespace search { +namespace queryeval { + +namespace { +typedef PredicateBlueprint::IntervalEntry IntervalEntry; +typedef PredicateBlueprint::BoundsEntry BoundsEntry; + +template +void pushValueDictionaryEntry(const Entry &entry, + const SimpleIndex &interval_index, + vector &interval_entries) { + const std::string &hash_str = entry.getKey() + "=" + entry.getValue(); + uint64_t feature = PredicateHash::hash64(hash_str); + auto iterator = interval_index.lookup(feature); + if (iterator.valid()) { + size_t sz = interval_index.getPostingListSize(iterator.getData()); + LOG(debug, "postinglist(%s) = (%d).size = %ld", hash_str.c_str(), iterator.getData().ref(), sz); + interval_entries.push_back({iterator.getData(), entry.getSubQueryBitmap(), sz, feature}); + } +} + +struct MyRangeHandler { + const SimpleIndex &interval_index; + const SimpleIndex &bounds_index; + vector &interval_entries; + vector &bounds_entries; + uint64_t subquery_bitmap; + + void handleRange(const string &label) { + uint64_t feature = PredicateHash::hash64(label); + auto iterator = interval_index.lookup(feature); + if (iterator.valid()) { + size_t sz = interval_index.getPostingListSize(iterator.getData()); + interval_entries.push_back({iterator.getData(), subquery_bitmap, sz, feature}); + } + } + void handleEdge(const string &label, uint32_t value) { + uint64_t feature = PredicateHash::hash64(label); + auto iterator = bounds_index.lookup(feature); + if (iterator.valid()) { + size_t sz = bounds_index.getPostingListSize(iterator.getData()); + bounds_entries.push_back({iterator.getData(), value, subquery_bitmap, sz, feature}); + } + } +}; + +template +void pushRangeDictionaryEntries( + const Entry &entry, + const PredicateIndex &index, + vector &interval_entries, + vector &bounds_entries) { + PredicateRangeTermExpander expander(index.getArity()); + MyRangeHandler handler{index.getIntervalIndex(), index.getBoundsIndex(), interval_entries, + bounds_entries, entry.getSubQueryBitmap()}; + expander.expand(entry.getKey(), entry.getValue(), handler); +} + +void pushZStarPostingList(const SimpleIndex &interval_index, + vector &interval_entries) { + uint64_t feature = PredicateIndex::z_star_hash; + auto iterator = interval_index.lookup(feature); + if (iterator.valid()) { + size_t sz = interval_index.getPostingListSize(iterator.getData()); + interval_entries.push_back({iterator.getData(), UINT64_MAX, sz, feature}); + } +} + +} // namespace + +void PredicateBlueprint::addPostingToK(uint64_t feature) +{ + const auto &interval_index = _index.getIntervalIndex(); + auto tmp = interval_index.lookup(feature); + if (__builtin_expect(tmp.valid() && (_cachedFeatures.find(feature) == _cachedFeatures.end()), true)) { + uint8_t *kVBase = &_kV[0]; + size_t kVSize = _kV.size(); + interval_index.foreach_frozen_key( + tmp.getData(), + feature, + [=](uint32_t doc_id) + { + if (__builtin_expect(doc_id < kVSize, true)) { + ++kVBase[doc_id]; + } + }); + } +} + +void PredicateBlueprint::addBoundsPostingToK(uint64_t feature) +{ + const auto &bounds_index = _index.getBoundsIndex(); + auto tmp = bounds_index.lookup(feature); + if (__builtin_expect(tmp.valid(), true)) { + uint8_t *kVBase = &_kV[0]; + size_t kVSize = _kV.size(); + bounds_index.foreach_frozen_key( + tmp.getData(), + feature, + [=](uint32_t doc_id) + { + if (__builtin_expect(doc_id < kVSize, true)) { + ++kVBase[doc_id]; + } + }); + } +} + +void PredicateBlueprint::addZeroConstraintToK() +{ + uint8_t *kVBase = &_kV[0]; + size_t kVSize = _kV.size(); + _index.getZeroConstraintDocs().foreach_key( + [=](uint32_t doc_id) + { + if (__builtin_expect(doc_id < kVSize, true)) { + ++kVBase[doc_id]; + } + }); +} + +PredicateBlueprint::PredicateBlueprint(const FieldSpecBase &field, + const PredicateAttribute & attribute, + const PredicateQuery &query) + : ComplexLeafBlueprint(field), + _attribute(attribute), + _index(predicate_attribute().getIndex()), + _kVBacking(), + _kV(nullptr, 0), + _cachedFeatures(), + _interval_dict_entries(), + _bounds_dict_entries(), + _zstar_dict_entry(), + _interval_btree_iterators(), + _interval_vector_iterators(), + _bounds_btree_iterators(), + _bounds_vector_iterators(), + _zstar_btree_iterator(), + _zstar_vector_iterator() +{ + const auto &interval_index = _index.getIntervalIndex(); + const auto zero_constraints_docs = _index.getZeroConstraintDocs(); + const PredicateQueryTerm &term = *query.getTerm(); + for (const auto &entry : term.getFeatures()) { + pushValueDictionaryEntry(entry, interval_index, _interval_dict_entries); + } + for (const auto &entry : term.getRangeFeatures()) { + pushRangeDictionaryEntries(entry, _index, _interval_dict_entries, + _bounds_dict_entries); + } + pushZStarPostingList(interval_index, _interval_dict_entries); + + BitVectorCache::KeyAndCountSet keys; + keys.reserve(_interval_dict_entries.size()); + for (const auto & e : _interval_dict_entries) { + keys.push_back({e.feature, e.size}); + } + _cachedFeatures = _index.lookupCachedSet(keys); + + auto it = interval_index.lookup(PredicateIndex::z_star_compressed_hash); + if (it.valid()) { + _zstar_dict_entry = it.getData(); + } + + std::sort(_interval_dict_entries.begin(), _interval_dict_entries.end(), + [&] (const auto & a, const auto & b) { + return a.size > b.size; + }); + + std::sort(_bounds_dict_entries.begin(), _bounds_dict_entries.end(), + [&] (const auto & a, const auto & b) { + return a.size > b.size; + }); + + + if (zero_constraints_docs.size() == 0 && + _interval_dict_entries.empty() && _bounds_dict_entries.empty() && + !_zstar_dict_entry.valid()) { + setEstimate(HitEstimate(0, true)); + } else { + setEstimate(HitEstimate(static_cast(zero_constraints_docs.size()), false)); + } +} + +namespace { + + template + void lookupPostingLists(const std::vector &dict_entries, + std::vector &vector_iterators, + std::vector &btree_iterators, + const SimpleIndex &index) + { + for (const auto &entry : dict_entries) { + auto vector_iterator = index.getVectorPostingList(entry.feature); + if (vector_iterator) { + vector_iterators.push_back(VectorIteratorEntry{*vector_iterator, entry}); + } else { + auto btree_iterator = index.getBTreePostingList(entry.entry_ref); + btree_iterators.push_back(BTreeIteratorEntry{btree_iterator, entry}); + } + } + + }; + +} + +void PredicateBlueprint::fetchPostings(bool) { + const auto &interval_index = _index.getIntervalIndex(); + const auto &bounds_index = _index.getBoundsIndex(); + lookupPostingLists(_interval_dict_entries, _interval_vector_iterators, + _interval_btree_iterators, interval_index); + lookupPostingLists(_bounds_dict_entries, _bounds_vector_iterators, + _bounds_btree_iterators, bounds_index); + + // Lookup zstar interval iterator + if (_zstar_dict_entry.valid()) { + auto vector_iterator = interval_index.getVectorPostingList( + PredicateIndex::z_star_compressed_hash); + if (vector_iterator) { + _zstar_vector_iterator.emplace(std::move(*vector_iterator)); + } else { + _zstar_btree_iterator.emplace(interval_index.getBTreePostingList(_zstar_dict_entry)); + } + } + + PredicateAttribute::MinFeatureHandle mfh = predicate_attribute().getMinFeatureVector(); + vespalib::DefaultAlloc kv(mfh.second); + _kVBacking.swap(kv); + _kV = BitVectorCache::CountVector(static_cast(_kVBacking.get()), mfh.second); + _index.computeCountVector(_cachedFeatures, _kV); + for (const auto & entry : _bounds_dict_entries) { + addBoundsPostingToK(entry.feature); + } + for (const auto & entry : _interval_dict_entries) { + addPostingToK(entry.feature); + } + addPostingToK(PredicateIndex::z_star_compressed_hash); + addZeroConstraintToK(); +} + +SearchIterator::UP +PredicateBlueprint::createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const { + const auto &attribute = predicate_attribute(); + PredicateAttribute::MinFeatureHandle mfh = attribute.getMinFeatureVector(); + auto interval_range_vector = attribute.getIntervalRangeVector(); + auto max_interval_range = attribute.getMaxIntervalRange(); + return SearchIterator::UP(new PredicateSearch(mfh.first, interval_range_vector, max_interval_range, _kV, + createPostingLists(), tfmda)); +} + +namespace { + + template + void createPredicatePostingLists(const std::vector &iterator_entries, + std::vector &posting_lists, + PostingListFactory posting_list_factory) + { + for (const auto &entry : iterator_entries) { + if (entry.iterator.valid()) { + auto posting_list = posting_list_factory(entry); + posting_list->setSubquery(entry.entry.subquery); + posting_lists.emplace_back(PredicatePostingList::UP(posting_list)); + } + } + } + +} + +std::vector PredicateBlueprint::createPostingLists() const { + size_t total_size = _interval_btree_iterators.size() + _interval_vector_iterators.size() + + _bounds_btree_iterators.size() + _bounds_vector_iterators.size() + 2; + std::vector posting_lists; + posting_lists.reserve(total_size); + const auto &interval_store = _index.getIntervalStore(); + + createPredicatePostingLists( + _interval_vector_iterators, posting_lists, + [&] (const IntervalIteratorEntry &entry) { + return new PredicateIntervalPostingList(interval_store, entry.iterator); + }); + + createPredicatePostingLists( + _interval_btree_iterators, posting_lists, + [&] (const IntervalIteratorEntry &entry) { + return new PredicateIntervalPostingList(interval_store, entry.iterator); + }); + + createPredicatePostingLists( + _bounds_vector_iterators, posting_lists, + [&] (const BoundsIteratorEntry &entry) { + return new PredicateBoundsPostingList(interval_store, entry.iterator, + entry.entry.value_diff); + }); + + createPredicatePostingLists( + _bounds_btree_iterators, posting_lists, + [&] (const BoundsIteratorEntry &entry) { + return new PredicateBoundsPostingList(interval_store, entry.iterator, + entry.entry.value_diff); + }); + + if (_zstar_vector_iterator && _zstar_vector_iterator->valid()) { + auto posting_list = PredicatePostingList::UP( + new PredicateZstarCompressedPostingList(interval_store, *_zstar_vector_iterator)); + posting_lists.emplace_back(std::move(posting_list)); + } else if (_zstar_btree_iterator && _zstar_btree_iterator->valid()) { + auto posting_list = PredicatePostingList::UP( + new PredicateZstarCompressedPostingList(interval_store, *_zstar_btree_iterator)); + posting_lists.emplace_back(std::move(posting_list)); + } + auto iterator = _index.getZeroConstraintDocs().begin(); + if (iterator.valid()) { + auto posting_list = PredicatePostingList::UP(new PredicateZeroConstraintPostingList(iterator)); + posting_lists.emplace_back(std::move(posting_list)); + } + return posting_lists; +} +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h new file mode 100644 index 00000000000..aeab9d4175f --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_blueprint.h @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "blueprint.h" +#include "predicate_search.h" +#include +#include +#include +#include +#include + +namespace search { +namespace query { class PredicateQuery; } + +namespace queryeval { +/** + * Blueprint for building predicate searches. It builds search + * iterators based on PredicateSearch. + */ +class PredicateBlueprint : public ComplexLeafBlueprint { +public: + struct IntervalEntry { + btree::EntryRef entry_ref; + uint64_t subquery; + size_t size; + uint64_t feature; + }; + struct BoundsEntry { + btree::EntryRef entry_ref; + uint32_t value_diff; + uint64_t subquery; + size_t size; + uint64_t feature; + }; + template + struct IntervalIteratorEntry { + I iterator; + const IntervalEntry &entry; + }; + template + struct BoundsIteratorEntry { + I iterator; + const BoundsEntry &entry; + }; + + PredicateBlueprint(const FieldSpecBase &field, + const PredicateAttribute & attribute, + const query::PredicateQuery &query); + + void fetchPostings(bool strict) override; + + SearchIterator::UP + createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, + bool strict) const override; +private: + using BTreeIterator = predicate::SimpleIndex::BTreeIterator; + using VectorIterator = predicate::SimpleIndex::VectorIterator; + template + using optional = std::experimental::optional; + + const PredicateAttribute & predicate_attribute() const { + return _attribute; + } + PredicateAttribute & predicate_attribute() { + return const_cast(_attribute); + } + void addBoundsPostingToK(uint64_t feature); + void addPostingToK(uint64_t feature); + void addZeroConstraintToK(); + std::vector createPostingLists() const; + + const PredicateAttribute & _attribute; + const predicate::PredicateIndex &_index; + vespalib::DefaultAlloc _kVBacking; + BitVectorCache::CountVector _kV; + BitVectorCache::KeySet _cachedFeatures; + + std::vector _interval_dict_entries; + std::vector _bounds_dict_entries; + btree::EntryRef _zstar_dict_entry; + + std::vector> _interval_btree_iterators; + std::vector> _interval_vector_iterators; + std::vector> _bounds_btree_iterators; + std::vector> _bounds_vector_iterators; + // The zstar iterator is either a vector or a btree iterator. + optional _zstar_btree_iterator; + optional _zstar_vector_iterator; +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp b/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp new file mode 100644 index 00000000000..91815f5f9ca --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_search.cpp @@ -0,0 +1,310 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +LOG_SETUP(".predicate_search"); +#include + +#include "predicate_search.h" +#include +#include +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using std::vector; +using namespace search::predicate; + +namespace search { + +using predicate::MIN_INTERVAL; +using predicate::MAX_INTERVAL; + +namespace queryeval { + +namespace { + +class SkipMinFeatureSSE2 : public SkipMinFeature +{ +public: + SkipMinFeatureSSE2(const uint8_t * min_feature, const uint8_t * kv, size_t sz); +private: + typedef char v16u8 __attribute__((vector_size(16))); + VESPA_DLL_LOCAL uint32_t next() override; + uint32_t cmp32(size_t j) { + v16u8 r0 = _kv[j*2] >= _min_feature[j*2]; + v16u8 r1 = _kv[j*2+1] >= _min_feature[j*2+1]; + return __builtin_ia32_pmovmskb128(r0) | (__builtin_ia32_pmovmskb128(r1) << 16); + } + VESPA_DLL_LOCAL void advance(); + const v16u8 * _min_feature; + const v16u8 * _kv; + uint32_t _sz; + uint32_t _chunk; + uint32_t _last32; +}; + +SkipMinFeatureSSE2::SkipMinFeatureSSE2(const uint8_t * min_feature, const uint8_t * kv, size_t sz) : + _min_feature(reinterpret_cast(min_feature)), + _kv(reinterpret_cast(kv)), + _sz(sz), + _chunk(0), + _last32(0) +{ + advance(); + if (_chunk == 1) { + _last32 &= ~0x1; + } +} + +void +SkipMinFeatureSSE2::advance() +{ + for (;(_last32 == 0) && (_chunk < (_sz>>5)); _last32 = cmp32(_chunk++)); + if (_last32 == 0) { + const uint8_t * min_feature = reinterpret_cast(_min_feature); + const uint8_t * kv = reinterpret_cast(_kv); + for (size_t i(_chunk << 5); i < _sz; i++) { + if (kv[i] >= min_feature[i]) { + _last32 |= 1 << (i - (_chunk << 5)); + } + } + _chunk++; + } +} + +uint32_t +SkipMinFeatureSSE2::next() +{ + if (__builtin_expect(_last32 == 0, true)) { + advance(); + } + if (_last32) { + uint32_t n = vespalib::Optimized::lsbIdx(_last32); + _last32 &= ~(1 << n); + n += ((_chunk - 1) << 5); + return n < _sz ? n : -1; + } else { + return -1; + } +} + +} + +SkipMinFeature::UP +SkipMinFeature::create(const uint8_t * min_feature, const uint8_t * kv, size_t sz) +{ + return UP(new SkipMinFeatureSSE2(min_feature, kv, sz)); +} + +PredicateSearch::PredicateSearch(const uint8_t * minFeatureVector, + const IntervalRange * interval_range_vector, + IntervalRange max_interval_range, + CondensedBitVector::CountVector kV, + vector posting_lists, + const fef::TermFieldMatchDataArray &tfmda) + : _skip(SkipMinFeature::create(minFeatureVector, &kV[0], kV.size())), + _posting_lists(std::move(posting_lists)), + _sorted_indexes(_posting_lists.size()), + _sorted_indexes_merge_buffer(_posting_lists.size()), + _doc_ids(_posting_lists.size()), + _intervals(_posting_lists.size()), + _subqueries(_posting_lists.size()), + _subquery_markers(new uint64_t[max_interval_range+1]), + _visited(new bool[max_interval_range+1]), + _termFieldMatchData(tfmda.valid()? tfmda[0] : nullptr), + _min_feature_vector(minFeatureVector), + _interval_range_vector(interval_range_vector), + _max_interval_range(max_interval_range) +{ + + for (size_t i = 0; i < _posting_lists.size(); ++i) { + _sorted_indexes[i] = i; + _doc_ids[i] = _posting_lists[i]->getDocId(); + _subqueries[i] = _posting_lists[i]->getSubquery(); + } +} + +PredicateSearch::~PredicateSearch() +{ + delete [] _visited; + delete [] _subquery_markers; +} + +bool PredicateSearch::advanceOneTo(uint32_t doc_id, size_t index) { + size_t i = _sorted_indexes[index]; + if (__builtin_expect(_posting_lists[i]->next(doc_id - 1), true)) { + _doc_ids[i] = _posting_lists[i]->getDocId(); + return true; + } + _doc_ids[i] = UINT32_MAX; // will be last after sorting. + return false; +} + +namespace { +template +void sort_indexes(uint16_t *indexes, size_t size, CompareType *values) { + std::sort(indexes, indexes + size, + [&] (uint16_t a, uint16_t b) { return values[a] < values[b]; }); +} +} // namespace + +void PredicateSearch::advanceAllTo(uint32_t doc_id) { + size_t i = 0; + size_t completed_count = 0; + for (; i < _sorted_indexes.size() && _doc_ids[_sorted_indexes[i]] < doc_id; ++i) { + if (!advanceOneTo(doc_id, i)) { + ++completed_count; + } + } + if (__builtin_expect((i > 0) && ! _sorted_indexes.empty(), true)) { + sort_indexes(&_sorted_indexes[0], i, &_doc_ids[0]); + std::merge( + _sorted_indexes.begin(), _sorted_indexes.begin() + i, + _sorted_indexes.begin() + i, _sorted_indexes.end(), + _sorted_indexes_merge_buffer.begin(), + [&] (uint16_t a, uint16_t b) { + return _doc_ids[a] < _doc_ids[b]; + }); + _sorted_indexes.swap(_sorted_indexes_merge_buffer); + // After sorting and merging the completed indexes are at the end. + _sorted_indexes.resize(_sorted_indexes.size() - completed_count); + _sorted_indexes_merge_buffer.resize(_sorted_indexes.size()); + } +} + + +namespace { +bool isNotInterval(uint32_t begin, uint32_t end) { + return begin > end; +} + +void markSubquery(uint32_t begin, uint32_t end, uint64_t subquery, uint64_t *subquery_markers, bool * visited) { + if (visited[begin]) { + visited[end] = true; + subquery_markers[end] |= subquery; + } +} + +// Returns the semantic interval end - or UINT32_MAX if no interval cover is possible +uint32_t addInterval(uint32_t interval, uint64_t subquery, + uint64_t *subquery_markers, bool * visited, uint32_t highest_end_seen) { + uint32_t begin = interval >> 16; + uint32_t end = interval & 0xffff; + + if (isNotInterval(begin, end)) { + // Note: End and begin values are swapped for zStar intervals + if (highest_end_seen < end) return UINT32_MAX; + markSubquery(end, begin, ~(subquery_markers[end]), subquery_markers, visited); + return begin; + } else { + if (highest_end_seen < begin - 1) return UINT32_MAX; + markSubquery(begin - 1, end, subquery_markers[begin - 1] & subquery, subquery_markers, visited); + return end; + } +} +void restoreSortedOrder(size_t first, size_t last, + vector &indexes, + const vector &intervals) __attribute__((noinline)); + +// One step of insertion sort: First element is moved to correct position. +void restoreSortedOrder(size_t first, size_t last, + vector &indexes, + const vector &intervals) { + uint32_t interval_to_move = intervals[indexes[first]]; + uint16_t index_to_move = indexes[first]; + while (++first < last && interval_to_move > intervals[indexes[first]]) { + indexes[first - 1] = indexes[first]; + } + indexes[first - 1] = index_to_move; +} + +} // namespace + +bool PredicateSearch::evaluateHit(uint32_t doc_id, uint32_t k) { + size_t candidates = sortIntervals(doc_id, k); + + size_t interval_end = _interval_range_vector[doc_id]; + memset(_subquery_markers, 0, sizeof(uint64_t) * (interval_end + 1)); + memset(_visited, false, sizeof(bool) * (interval_end + 1)); + _subquery_markers[0] = UINT64_MAX; + _visited[0] = true; + + uint32_t highest_end_seen = 1; + for (size_t i = 0; i < candidates; ) { + size_t index = _sorted_indexes[i]; + uint32_t last_end_seen = addInterval( + _intervals[index], _subqueries[index], _subquery_markers, _visited, highest_end_seen); + if (last_end_seen == UINT32_MAX) { + return false; + } + highest_end_seen = std::max(last_end_seen, highest_end_seen); + if (_posting_lists[index]->nextInterval()) { + _intervals[index] = _posting_lists[index]->getInterval(); + restoreSortedOrder(i, candidates, _sorted_indexes, _intervals); + } else { + ++i; + } + } + return _subquery_markers[interval_end] != 0; +} + +size_t PredicateSearch::sortIntervals(uint32_t doc_id, uint32_t k) { + size_t candidates = k + 1; + for (size_t i = candidates; i < _sorted_indexes.size(); ++i) { + if (_doc_ids[_sorted_indexes[i]] == doc_id) { + ++candidates; + } else { + break; + } + } + for (size_t i = 0; i < candidates; i++) { + _intervals[_sorted_indexes[i]] = _posting_lists[_sorted_indexes[i]]->getInterval(); + } + sort_indexes(&_sorted_indexes[0], candidates, &_intervals[0]); + return candidates; +} + +void PredicateSearch::skipMinFeature(uint32_t doc_id_in) +{ + uint32_t doc_id; + for (doc_id = _skip->next(); doc_id < doc_id_in; doc_id = _skip->next()); + + if (__builtin_expect( ! isAtEnd(doc_id), true)) { + advanceAllTo(doc_id); + } else { + setAtEnd(); + } +} + +void PredicateSearch::doSeek(uint32_t doc_id) { + skipMinFeature(doc_id); + while (!_sorted_indexes.empty() && ! isAtEnd()) { + uint32_t doc_id_0 = _doc_ids[_sorted_indexes[0]]; + uint8_t min_feature = _min_feature_vector[doc_id_0]; + uint8_t k = static_cast(min_feature == 0 ? 0 : min_feature - 1); + if (k < _sorted_indexes.size()) { + uint32_t doc_id_k = _doc_ids[_sorted_indexes[k]]; + if (doc_id_0 == doc_id_k) { + if (evaluateHit(doc_id_0, k)) { + setDocId(doc_id_0); + return; + } + } + } + skipMinFeature(doc_id_0 + 1); + } + setAtEnd(); +} + +void PredicateSearch::doUnpack(uint32_t doc_id) { + if (doc_id == getDocId()) { + if (_termFieldMatchData) { + auto end = _interval_range_vector[doc_id]; + _termFieldMatchData + ->setSubqueries(doc_id, _subquery_markers[end]); + } + } +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/predicate_search.h b/searchlib/src/vespa/searchlib/queryeval/predicate_search.h new file mode 100644 index 00000000000..f1461eb76b0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/predicate_search.h @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include +#include +#include +#include + +namespace search { +namespace fef { +class TermFieldMatchData; +class TermFieldMatchDataArray; +} // namespace fef +namespace queryeval { + + + +class SkipMinFeature +{ +public: + typedef std::unique_ptr UP; + virtual ~SkipMinFeature() { } + VESPA_DLL_LOCAL virtual uint32_t next() = 0; + static SkipMinFeature::UP create(const uint8_t * min_feature, const uint8_t * kv, size_t sz); +}; + +/** + * Search iterator implementing the interval algorithm for boolean + * search. It operates on PredicatePostingLists, as defined above. + */ +using IntervalRange = uint16_t; + +class PredicateSearch : public SearchIterator { + SkipMinFeature::UP _skip; + std::vector _posting_lists; + std::vector _sorted_indexes; + std::vector _sorted_indexes_merge_buffer; + std::vector _doc_ids; + std::vector _intervals; + std::vector _subqueries; + uint64_t *_subquery_markers; + bool * _visited; + fef::TermFieldMatchData *_termFieldMatchData; + const uint8_t * _min_feature_vector; + const IntervalRange * _interval_range_vector; + const IntervalRange _max_interval_range; + + VESPA_DLL_LOCAL bool advanceOneTo(uint32_t doc_id, size_t index); + VESPA_DLL_LOCAL void advanceAllTo(uint32_t doc_id); + VESPA_DLL_LOCAL bool evaluateHit(uint32_t doc_id, uint32_t k); + VESPA_DLL_LOCAL size_t sortIntervals(uint32_t doc_id, uint32_t k); + VESPA_DLL_LOCAL void skipMinFeature(uint32_t doc_id) __attribute__((noinline)); + +public: + PredicateSearch(const uint8_t * minFeature, + const IntervalRange * interval_range_vector, + IntervalRange max_interval_range, + CondensedBitVector::CountVector kV, + std::vector posting_lists, + const fef::TermFieldMatchDataArray &tfmda); + ~PredicateSearch(); + + void doSeek(uint32_t doc_id) override; + void doUnpack(uint32_t doc_id) override; +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp b/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp new file mode 100644 index 00000000000..58d61d83d08 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/ranksearch.cpp @@ -0,0 +1,64 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "ranksearch.h" + +namespace search { +namespace queryeval { + +void +RankSearch::doSeek(uint32_t docid) +{ + SearchIterator & firstChild(**getChildren().begin()); + if (firstChild.seek(docid)) { + setDocId(docid); + } +} + +namespace { +/** + * A simple implementation of the strict Rank search operation. + **/ +class RankSearchStrict : public RankSearch +{ +protected: + void doSeek(uint32_t docid) override; + UP andWith(UP filter, uint32_t estimate) override;; + +public: + /** + * Create a new Rank Search with the given children and + * strictness. A strict Rank can assume that the first child below + * is also strict. No such assumptions can be made about the other + * children. + * + * @param children the search objects we are rank'ing + **/ + RankSearchStrict(const Children & children) : RankSearch(children) { } +}; + +SearchIterator::UP +RankSearchStrict::andWith(UP filter, uint32_t estimate) +{ + return getChildren()[0]->andWith(std::move(filter), estimate); +} + +void +RankSearchStrict::doSeek(uint32_t docid) +{ + SearchIterator & firstChild(**getChildren().begin()); + setDocId(firstChild.seek(docid) ? docid : firstChild.getDocId()); +} +} // namespace + +SearchIterator * +RankSearch::create(const RankSearch::Children &children, bool strict) { + if (strict) { + return new RankSearchStrict(children); + } else { + return new RankSearch(children); + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/ranksearch.h b/searchlib/src/vespa/searchlib/queryeval/ranksearch.h new file mode 100644 index 00000000000..7dfd215d724 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/ranksearch.h @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the Rank search operation. + **/ +class RankSearch : public MultiSearch +{ +protected: + void doSeek(uint32_t docid) override; + + /** + * Create a new Rank Search with the given children. A non-strict Rank has + * no strictness assumptions about its children. + * + * @param children the search objects we are rank'ing + **/ + RankSearch(const Children & children) : MultiSearch(children) { } + +public: + // Caller takes ownership of the returned SearchIterator. + static SearchIterator *create(const Children &children, bool strict); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/scores.h b/searchlib/src/vespa/searchlib/queryeval/scores.h new file mode 100644 index 00000000000..776e8acb4bd --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/scores.h @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search { +namespace queryeval { + +struct Scores { + feature_t low; + feature_t high; + Scores() : low(1), high(0) {} + Scores(feature_t l, feature_t h) : low(l), high(h) {} + + bool isValid() const { return low <= high; } +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/searchable.cpp new file mode 100644 index 00000000000..be1487eb7fd --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/searchable.cpp @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchable"); +#include "searchable.h" +#include "leaf_blueprints.h" +#include "intermediate_blueprints.h" + +namespace search { +namespace queryeval { + +Blueprint::UP +Searchable::createBlueprint(const IRequestContext & requestContext, + const FieldSpecList &fields, + const search::query::Node &term) +{ + if (fields.empty()) { + return Blueprint::UP(new EmptyBlueprint()); + } + if (fields.size() == 1) { + return createBlueprint(requestContext, fields[0], term); + } + OrBlueprint *b = new OrBlueprint(); + Blueprint::UP result(b); + for (size_t i = 0; i < fields.size(); ++i) { + b->addChild(createBlueprint(requestContext, fields[i], term)); + } + return result; +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/searchable.h b/searchlib/src/vespa/searchlib/queryeval/searchable.h new file mode 100644 index 00000000000..b386793de8e --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/searchable.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "field_spec.h" +#include "blueprint.h" +#include + +namespace search { + +namespace query { class Node; } + +namespace queryeval { + +/** + * Abstract class extended by components to expose content that can be + * searched by a query term. A Searchable component supports searching + * in one or more named fields. The Blueprint created by a Searchable + * is an intermediate query representation that is later used to + * create the actual search iterators used to produce matches. + **/ +class Searchable +{ +protected: + /** + * Create a blueprint searching a single field. + * + * @return blueprint + * @param requestContext that belongs to the query + * @param field the field to search + * @param term the query tree term + **/ + virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext, + const FieldSpec &field, + const search::query::Node &term) = 0; + +public: + typedef std::shared_ptr SP; + + Searchable() {} + + /** + * Create a blueprint searching a set of fields. The default + * implementation of this function will create blueprints for + * individual fields and combine them with an OR blueprint. + * + * @return blueprint + * @param requestContext that belongs to the query + * @param fields the set of fields to search + * @param term the query tree term + **/ + virtual Blueprint::UP createBlueprint(const IRequestContext & requestContext, + const FieldSpecList &fields, + const search::query::Node &term); + virtual ~Searchable() {} +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp new file mode 100644 index 00000000000..2e439acbf14 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.cpp @@ -0,0 +1,129 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchbase"); +#include "searchiterator.h" +#include +#include +#include +#include +#include + +// NB: might need to hide this from non-gcc compilers... +#include + +namespace search { +namespace queryeval { + +SearchIterator::SearchIterator() : + _docid(0), + _endid(0) +{ +} + +void +SearchIterator::resetRange() +{ + _docid = 0; + _endid = 0; +} + +SearchIterator::~SearchIterator() +{ +} + +void +SearchIterator::initRange(uint32_t beginid, uint32_t endid) +{ + _docid = beginid - 1; + _endid = endid; +} + +BitVector::UP +SearchIterator::get_hits(uint32_t begin_id) +{ + BitVector::UP result(BitVector::create(getEndId())); + uint32_t docid = std::max(begin_id, getDocId()); + while (!isAtEnd(docid)) { + if (seek(docid)) { + result->setBit(docid); + } + docid = std::max(docid + 1, getDocId()); + } + return result; +} + +SearchIterator::UP +SearchIterator::andWith(UP filter, uint32_t estimate) +{ + (void) estimate; + return filter; +} + +void +SearchIterator::or_hits_into(BitVector &result, uint32_t begin_id) +{ + BitVector::UP tmp = get_hits(begin_id); + const BitVector &rhs = *tmp; + result.orWith(rhs); +} + +void +SearchIterator::and_hits_into(BitVector &result, uint32_t begin_id) +{ + BitVector::UP tmp = get_hits(begin_id); + const BitVector &rhs = *tmp; + result.andWith(rhs); +} + +vespalib::string +SearchIterator::asString() const +{ + vespalib::ObjectDumper dumper; + visit(dumper, "", this); + return dumper.toString(); +} + +vespalib::string +SearchIterator::getClassName() const +{ + vespalib::string name(typeid(*this).name()); + int status = 0; + size_t size = 0; + // NB: might need to hide this from non-gcc compilers... + char *unmangled = abi::__cxa_demangle(name.c_str(), 0, &size, &status); + vespalib::string result(unmangled); + free(unmangled); + return result; +} + +void +SearchIterator::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "docid", _docid); + visit(visitor, "endid", _endid); +} + +} // namespace queryeval +} // namespace search + +//----------------------------------------------------------------------------- + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SearchIterator *obj) +{ + if (obj != 0) { + self.openStruct(name, obj->getClassName()); + obj->visitMembers(self); + self.closeStruct(); + } else { + self.visitNull(name); + } +} + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SearchIterator &obj) +{ + visit(self, name, &obj); +} diff --git a/searchlib/src/vespa/searchlib/queryeval/searchiterator.h b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h new file mode 100644 index 00000000000..0aae661a6df --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/searchiterator.h @@ -0,0 +1,345 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "posting_info.h" +#include "begin_and_end_id.h" +#include +#include +#include + +namespace vespalib { class ObjectVisitor; }; + +namespace search { + +namespace queryeval { + +/** + * This is the abstract superclass of all search objects. Each search + * object act as an iterator over documents that are results for the + * subquery represented by that search object. Search objects will be + * combined into a tree structure to perform query evaluation in + * parallel. The unpack method is used to unpack match information for + * a document. The placement and format of this match data is a + * contract between the application and the leaf search objects and is + * of no concern to the interface defined by this class. + **/ +class SearchIterator +{ +private: + SearchIterator(const SearchIterator &); + SearchIterator &operator=(const SearchIterator &); + + /** + * The current document id for this search object. This variable + * will have a value that is either @ref beginId, @ref endId or a + * document id representing a hit for this search object. + **/ + uint32_t _docid; + + /** + * This is the end of the the lidspace this iterator shall consider. + */ + uint32_t _endid; + +protected: + /** + * This method is used by the @ref doSeek method to indicate that + * a document is a hit. This method is also used to indicate that + * no more hits are available by using the @ref endId value. + * + * @param id docid for hit + **/ + void setDocId(uint32_t id) { _docid = id; } + + /** + * Will terminate the iterator by setting it past the end. + * Further calls to isAtEnd() will then return true. + */ + void setAtEnd() { _docid = search::endDocId; } + +public: + using Trinary=vespalib::Trinary; + // doSeek and doUnpack are called by templated classes, so making + // them public to avoid complicated friend requests. Note that if + // you call doSeek and doUnpack directly instead of using + // seek/unpack, you are bypassing docid checks and need to know + // what you are doing. + + /** + * This method must be overridden to perform the actual seeking + * for the concrete search class. The task of this method is to + * check whether the given document id is a hit for this search + * object. The current document id is changed with the @ref + * setDocId method. When this method returns, the current document + * id must have been updated as follows: if the candidate document + * id was in fact a hit, this is now the new current document + * id. If the candidate document id was not a hit, the method may + * choose to either leave the current document id as is, or + * increase it to indicate the next hit for this search object + * (@ref endId being a valid value). + * + * @param docid hit candidate + **/ + virtual void doSeek(uint32_t docid) = 0; + + /** + * This method must be overridden to perform the actual unpacking + * for the concrete search class. The task of this method is to + * unpack match information for the given docid. This method can + * assume that the given document is also the current position of + * the iterator. This is checked by the @ref unpack method which + * invokes this method. + * + * @param docid what docid to unpack match information for. + **/ + virtual void doUnpack(uint32_t docid) = 0; + + /** + * This sets the range the iterator shall work. + * As soon as it reaches its limit it can stop. + * Iterators can overload this one and do what it needs to do. + * It must also rewind if instructed to do so. + * + * @param beginId This is the first valid docId and the lowest that will be given to doSeek. + * @param endId This is the first docid after the valid range. + */ + virtual void initRange(uint32_t beginId, uint32_t endId); + /** + * Will initialize the full range. + **/ + void initFullRange() { initRange(1, search::endDocId); } + + /** + * Temporary to explicitt rewind iterator. + */ + virtual void resetRange(); + + /** + * Find all hits in the currently searched range (specified by + * initRange) and return them as a bitvector. This function will + * perform term-at-a-time evaluation and should only be used for + * terms not needed for ranking. Calling this function will + * exhaust this iterator and no more results will be available in + * the currently searched range after this function returns. + * + * @return bitvector with hits for this iterator + * @param begin_id the lowest document id that may be a hit + * (we do not remember beginId from initRange) + **/ + virtual BitVector::UP get_hits(uint32_t begin_id); + + /** + * Find all hits in the currently searched range (specified by + * initRange) and OR them into the given temporary result. This + * function will perform term-at-a-time evaluation and should only + * be used for terms not needed for ranking. Calling this function + * will exhaust this iterator and no more results will be + * available in the currently searched range after this function + * returns. + * + * @param result result to be augmented by adding hits from this + * iterator. + * @param begin_id the lowest document id that may be a hit + * (we might not remember beginId from initRange) + **/ + virtual void or_hits_into(BitVector &result, uint32_t begin_id); + + /** + * Find all hits in the currently searched range (specified by + * initRange) and OR them into the given temporary result. This + * function will perform term-at-a-time evaluation and should only + * be used for terms not needed for ranking. Calling this function + * will exhaust this iterator and no more results will be + * available in the currently searched range after this function + * returns. + * + * @param result result to be augmented by adding hits from this + * iterator. + * @param begin_id the lowest document id that may be a hit + * (we might not remember beginId from initRange) + **/ + virtual void and_hits_into(BitVector &result, uint32_t begin_id); + +public: + typedef std::unique_ptr UP; + + /** + * The constructor sets the current document id to @ref beginId. + **/ + SearchIterator(); + + + /** + * Special value indicating that this searcher has not yet started + * seeking through documents. This must match beginId() in + * search::fef::TermFieldMatchData class. + * + * @return constant + **/ + static uint32_t beginId() { return beginDocId; } + + /** + * Tell if the iterator has reached the end. + * + * @return true if the iterator has reached its end. + **/ + bool isAtEnd() const { return isAtEnd(_docid); } + bool isAtEnd(uint32_t docid) const { + if (__builtin_expect(docid >= _endid, false)) { + assert (_endid != 0); + return true; + } + return false; + } + + /** + * Obtain the current document id for this search object. The + * value is either @ref beginId, @ref endId or a document id + * representing a hit for this search object. + * + * @return current document id + **/ + uint32_t getDocId() const { return _docid; } + + uint32_t getEndId() const { return _endid; } + + /** + * Check if the given document id is a hit. If it is a hit, the + * current document id of this search object is set to the given + * document id. If it is not a hit, the current document id is + * either unchanged, set to the next hit, or set to @ref endId. + * + * @return true if the given document id is a hit. + * @param docid hit candidate + **/ + bool seek(uint32_t docid) { + if (__builtin_expect(docid > _docid, true)) { + doSeek(docid); + } + return (docid == _docid); + } + + /** + * Seek to the next docid and return it. Start with the one given. + * With protection for going backWards. + * Note that this requires the iterator to be strict. + * + * @return the first matching docid + * @param docid hit candidate + **/ + uint32_t seekFirst(uint32_t docid) { + if (__builtin_expect(docid > _docid, true)) { + doSeek(docid); + } + return _docid; + } + + /** + * Seek to the next docid and return it. Start with the one given. + * Without protection for going backWards. + * Note that this requires the iterator to be strict. + * + * @return the first matching docid + * @param docid hit candidate + **/ + uint32_t seekNext(uint32_t docid) { + doSeek(docid); + return _docid; + } + + /** + * Unpack hit information for the given docid if available. This + * method may also change the current docid for this iterator. + * + * @param docid what docid to unpack match information for. + **/ + void unpack(uint32_t docid) { + if (__builtin_expect(seek(docid), true)) { + doUnpack(docid); + } + } + + /** + * Return global posting info associated with this search iterator. + * + * @return global posting info or NULL if no info is available. + **/ + virtual const PostingInfo *getPostingInfo() const { return NULL; } + + /** + * Create a human-readable representation of this object. This + * method will use object visitation internally to capture the + * full structure of this object. + * + * @return structured human-readable representation of this object + **/ + vespalib::string asString() const; + + /** + * Obtain the fully qualified name of the concrete class for this + * object. The default implementation will perform automatic name + * resolving. There is only a need to override this function if + * you want to impersonate another class. + * + * @return fully qualified class name + **/ + virtual vespalib::string getClassName() const; + + /** + * Visit each of the members of this object. This method should be + * overridden by subclasses and should present all appropriate + * internal structure of this object to the given visitor. Note + * that while each level of a class hierarchy may cooperate to + * visit all object members (invoking superclass method within + * method), this method, as implemented in the SearchIterator class + * should not be invoked, since its default implementation is + * there to signal about the method not being overridden. + * + * @param visitor the visitor of this object + **/ + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + /** + * Empty, just defined to make it virtual. + **/ + virtual ~SearchIterator(); + + /** + * @return true if it is a bitvector + */ + virtual bool isBitVector() const { return false; } + /** + * @return true if it is a source blender + */ + virtual bool isSourceBlender() const { return false; } + /** + * @return true if it is a multi search + */ + virtual bool isMultiSearch() const { return false; } + + /** + * This is used for adding an extra filter. If it is accepted it will return an empty UP. + * If not you will get in in return. Currently it will only be accepted by a + * MultiBitVector with a pure 'and' path down if it is an BitVector, + * or by a strict AND with a pure 'and' path. Be careful if you you plan to steal the filter. + * + * @param filter the searchiterator that is an extra filter. + * @param estimate is the number of hits this filter is expected to produce. + * @return the given filter or empty if it has been consumed. + **/ + virtual UP andWith(UP filter, uint32_t estimate); + + virtual Trinary is_strict() const { return Trinary::Undefined; } +}; + +} // namespace queryeval +} // namespace search + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SearchIterator &obj); +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SearchIterator *obj); + diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp new file mode 100644 index 00000000000..8f06823ea37 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.cpp @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".simple_phrase_blueprint"); + +#include "simple_phrase_blueprint.h" +#include "simple_phrase_search.h" +#include +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +SimplePhraseBlueprint::SimplePhraseBlueprint(const FieldSpec &field, const IRequestContext & requestContext) + : ComplexLeafBlueprint(field), + _doom(requestContext.getDoom()), + _field(field), + _estimate(), + _layout(), + _terms() +{ +} + +SimplePhraseBlueprint::~SimplePhraseBlueprint() +{ + while (!_terms.empty()) { + delete _terms.back(); + _terms.pop_back(); + } +} + +FieldSpec +SimplePhraseBlueprint::getNextChildField(const FieldSpec &outer) +{ + return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false); +} + +void +SimplePhraseBlueprint::addTerm(Blueprint::UP term) +{ + const State &childState = term->getState(); + assert(childState.numFields() == 1); + const FieldSpecBase &childField = childState.field(0); + assert(childField.getFieldId() == _field.getFieldId()); + (void) childField; + + HitEstimate childEst = childState.estimate(); + if (_terms.empty() || childEst < _estimate) { + _estimate = childEst; + } + setEstimate(_estimate); + _terms.push_back(term.get()); + term.release(); +} + +SearchIterator::UP +SimplePhraseBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const +{ + assert(tfmda.size() == 1); + fef::MatchData::UP md = _layout.createMatchData(); + search::fef::TermFieldMatchDataArray childMatch; + SimplePhraseSearch::Children children(_terms.size()); + std::multimap order_map; + for (size_t i = 0; i < _terms.size(); ++i) { + const State &childState = _terms[i]->getState(); + assert(childState.numFields() == 1); + childMatch.add(childState.field(0).resolve(*md)); + children[i] = _terms[i]->createSearch(*md, strict).release(); + order_map.insert(std::make_pair(childState.estimate().estHits, i)); + } + std::vector eval_order; + for (std::multimap::iterator + it = order_map.begin(); it != order_map.end(); ++it) { + eval_order.push_back(it->second); + } + + SimplePhraseSearch * phrase = new SimplePhraseSearch(children, std::move(md), childMatch, + eval_order, *tfmda[0], strict); + phrase->setDoom(& _doom); + return SearchIterator::UP(phrase); +} + + +void +SimplePhraseBlueprint::fetchPostings(bool strict) +{ + for (size_t i = 0; i < _terms.size(); ++i) { + _terms[i]->fetchPostings(strict); + } +} + +void +SimplePhraseBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + LeafBlueprint::visitMembers(visitor); + visit(visitor, "terms", _terms); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h new file mode 100644 index 00000000000..fc1fde7e2c6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_blueprint.h @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchable.h" +#include +#include +#include +#include + +namespace search { +namespace fef { class TermFieldMatchData; } + +namespace queryeval { + +class SimplePhraseBlueprint : public ComplexLeafBlueprint +{ +private: + const vespalib::Doom _doom; + FieldSpec _field; + HitEstimate _estimate; + fef::MatchDataLayout _layout; + std::vector _terms; + + SimplePhraseBlueprint(const SimplePhraseBlueprint &); // disabled + SimplePhraseBlueprint &operator=(const SimplePhraseBlueprint &); // disabled + +public: + SimplePhraseBlueprint(const FieldSpec &field, const IRequestContext & requestContext); + virtual ~SimplePhraseBlueprint(); + + // used by create visitor + FieldSpec getNextChildField(const FieldSpec &outer); + + // used by create visitor + void addTerm(Blueprint::UP term); + + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + virtual void + fetchPostings(bool strict); +}; + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp new file mode 100644 index 00000000000..9a624724933 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.cpp @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".simple_phrase_search"); + +#include "simple_phrase_search.h" +#include +#include +#include +#include + +using search::fef::TermFieldMatchData; +using std::unique_ptr; +using std::mem_fun_ref; +using std::transform; +using std::vector; +using vespalib::ObjectVisitor; + +namespace search { +namespace queryeval { + +namespace { +// Helper class +class PhraseMatcher { + const fef::TermFieldMatchDataArray &_tmds; + const vector &_eval_order; + vector &_iterators; + uint32_t _element_id; + uint32_t _position; + + TermFieldMatchData::PositionsIterator &iterator(uint32_t word_index) + { return _iterators[word_index]; } + + TermFieldMatchData::PositionsIterator end(uint32_t word_index) + { return _tmds[word_index]->end(); } + + uint32_t elementId(uint32_t word_index) + { return iterator(word_index)->getElementId(); } + + uint32_t position(uint32_t word_index) + { return iterator(word_index)->getPosition(); } + + void iterateToElement(uint32_t word_index) { + while (iterator(word_index) != end(word_index) && + elementId(word_index) < _element_id) { + ++iterator(word_index); + } + } + + template + bool match(FwdIt first, FwdIt last) { + if (first == last) { + return true; + } + uint32_t word_index = *first; + + iterateToElement(word_index); + while (iterator(word_index) != end(word_index) && + elementId(word_index) == _element_id) { + if (position(word_index) == _position + word_index) { + return match(++first, last); + } else if (position(word_index) > _position + word_index) { + return false; + } + ++iterator(word_index); + } + return false; + } + + bool match() { + _element_id = elementId(_eval_order[0]); + if (position(_eval_order[0]) < _eval_order[0]) { + // this position too early in element to allow match of other phrase terms + return false; + } + _position = position(_eval_order[0]) - _eval_order[0]; + return match(++_eval_order.begin(), _eval_order.end()); + } + +public: + PhraseMatcher(const fef::TermFieldMatchDataArray &tmds, + const vector &eval_order, + vector &iterators) + : _tmds(tmds), + _eval_order(eval_order), + _iterators(iterators) + { + for (size_t i = 0; i < _tmds.size(); ++i) { + _iterators[i] = _tmds[i]->begin(); + } + } + + bool hasMatch() { + if (_tmds.size() == 1) { + return true; + } + + while (iterator(_eval_order[0]) != end(_eval_order[0])) { + if (match()) { + return true; + } + ++iterator(_eval_order[0]); + } + return false; + } + + void fillPositions(TermFieldMatchData &tmd) { + if (_tmds.size() == 1) { + for (TermFieldMatchData::PositionsIterator + it = _tmds[0]->begin(); it != _tmds[0]->end(); ++it) { + tmd.appendPosition(*it); + } + } else { + while (iterator(_eval_order[0]) != end(_eval_order[0])) { + if (match()) { + tmd.appendPosition(*iterator(0)); + } + ++iterator(_eval_order[0]); + } + } + } +}; + +bool allTermsHaveMatch(const SimplePhraseSearch::Children &terms, + const vector &eval_order, uint32_t doc_id) { + for (uint32_t i = 0; i < terms.size(); ++i) { + if (!terms[eval_order[i]]->seek(doc_id)) { + return false; + } + } + return true; +} +} // namespace + +void SimplePhraseSearch::phraseSeek(uint32_t doc_id) { + if (allTermsHaveMatch(getChildren(), _eval_order, doc_id)) { + if ((_doom != nullptr) && _doom->doom()) { + setAtEnd(); + } else { + AndSearch::doUnpack(doc_id); + if (PhraseMatcher(_childMatch, _eval_order, _iterators).hasMatch()) { + setDocId(doc_id); + } + } + } +} + + +SimplePhraseSearch::SimplePhraseSearch(const Children &children, + fef::MatchData::UP md, + const fef::TermFieldMatchDataArray &childMatch, + vector eval_order, + TermFieldMatchData &tmd, bool strict) + : AndSearch(children), + _md(std::move(md)), + _childMatch(childMatch), + _eval_order(eval_order), + _tmd(tmd), + _doom(nullptr), + _strict(strict), + _iterators(children.size()) +{ + assert(!children.empty()); + assert(children.size() == _childMatch.size()); + assert(children.size() == _eval_order.size()); +} + +void SimplePhraseSearch::doSeek(uint32_t doc_id) { + phraseSeek(doc_id); + if (_strict) { + uint32_t next_candidate = doc_id; + while (getDocId() < doc_id || getDocId() == beginId()) { + getChildren()[0]->seek(next_candidate + 1); + next_candidate = getChildren()[0]->getDocId(); + if (isAtEnd(next_candidate)) { + setAtEnd(); + return; + } + // child must behave as strict. + assert(next_candidate > doc_id && next_candidate != beginId()); + + phraseSeek(next_candidate); + } + } +} + +void SimplePhraseSearch::doUnpack(uint32_t doc_id) { + // All children has already been unpacked before this call is made. + + _tmd.reset(doc_id); + PhraseMatcher(_childMatch, _eval_order, _iterators).fillPositions(_tmd); +} + +void SimplePhraseSearch::visitMembers(ObjectVisitor &visitor) const { + AndSearch::visitMembers(visitor); + visit(visitor, "strict", _strict); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h new file mode 100644 index 00000000000..5d8e7d592fe --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simple_phrase_search.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "andsearch.h" +#include +#include +#include +#include +#include +#include + +namespace search { + +namespace queryeval { + +/** + * Search iterator for a phrase, based on a set of child search iterators. + */ +class SimplePhraseSearch : public AndSearch +{ + fef::MatchData::UP _md; + fef::TermFieldMatchDataArray _childMatch; + std::vector _eval_order; + fef::TermFieldMatchData &_tmd; + const vespalib::Doom *_doom; + bool _strict; + + typedef fef::TermFieldMatchData::PositionsIterator It; + // Reuse this vector instead of allocating a new one when needed. + std::vector _iterators; + + void phraseSeek(uint32_t doc_id); + +public: + /** + * Takes ownership of the contents of children. + * If this iterator is strict, the first child also needs to be strict. + * + * @param children SearchIterator objects for each child. + * @param tmds TermFieldMatchData for the children. + * @param eval_order determines the order of evaluation for the + * terms. The term with fewest hits should be + * evaluated first. + **/ + SimplePhraseSearch(const Children &children, + fef::MatchData::UP md, + const fef::TermFieldMatchDataArray &childMatch, + std::vector eval_order, + fef::TermFieldMatchData &tmd, bool strict); + + virtual void doSeek(uint32_t doc_id); + virtual void doUnpack(uint32_t doc_id); + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + SimplePhraseSearch & setDoom(const vespalib::Doom * doom) { _doom = doom; return *this; } +}; + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp b/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp new file mode 100644 index 00000000000..f114eef8b39 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simpleresult.cpp @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".simpleresult"); +#include "simpleresult.h" + +namespace search { +namespace queryeval { + +SimpleResult & +SimpleResult::addHit(uint32_t docid) +{ + _hits.push_back(docid); + return *this; +} + +void +SimpleResult::clear() +{ + std::vector tmp; + tmp.swap(_hits); +} + +void +SimpleResult::search(SearchIterator &sb) +{ + clear(); + // assume strict toplevel search object located at start + sb.initFullRange(); + for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + sb.unpack(sb.getDocId()); + _hits.push_back(sb.getDocId()); + } +} + +void +SimpleResult::search(SearchIterator &sb, uint32_t docIdLimit) +{ + clear(); + // assume non-strict toplevel search object + sb.initFullRange(); + for (uint32_t docId = 1; docId < docIdLimit; ++docId) { + if (sb.seek(docId)) { + assert(docId == sb.getDocId()); + sb.unpack(docId); + _hits.push_back(docId); + } + } +} + +std::ostream & +operator << (std::ostream &out, const SimpleResult &result) +{ + if (result.getHitCount() == 0) { + out << std::endl << "empty" << std::endl; + } else { + out << std::endl; + for (uint32_t i = 0; i < result.getHitCount(); ++i) { + out << "{" << result.getHit(i) << "}" << std::endl; + } + } + return out; +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simpleresult.h b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h new file mode 100644 index 00000000000..18e01374ec5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simpleresult.h @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +/** + * Simple result class containing only document ids. This class will + * mostly be used for testing. + **/ +class SimpleResult +{ +private: + std::vector _hits; + +public: + /** + * Create an empty result + **/ + SimpleResult() : _hits() {} + + /** + * Obtain the number of hits + * + * @return number of hits + **/ + uint32_t getHitCount() const { return _hits.size(); } + + /** + * Get the docid of a specific hit + * + * @return docid for the i'th hit + * @param i which hit to obtain + **/ + uint32_t getHit(uint32_t i) const { return _hits[i]; } + + /** + * Add a hit. Hits must be added in sorted order (smallest docid + * first). + * + * @return this object for chaining + * @param docid hit to add + **/ + SimpleResult &addHit(uint32_t docid); + + /** + * remove all hits + **/ + void clear(); + + /** + * Fill this result with all the hits returned by the given search + * object. Old hits will be removed from this result before doing + * the search. Assumes strict toplevel search object located at start + * + * @param sb search object + **/ + void search(SearchIterator &sb); + + /** + * Fill this result with all the hits returned by the given search + * object. Old hits will be removed from this result before doing + * the search. Assumes non-strict toplevel search object. + * + * @param sb search object + * @param docIdLimit the end of the docId range for this search iterator + **/ + void search(SearchIterator &sb, uint32_t docIdLimit); + + /** + * Test of we contain the same hits as rhs. + * + * @return true if the results are equal + * @param rhs other results + **/ + bool operator==(const SimpleResult &rhs) const { return (_hits == rhs._hits); } +}; + +std::ostream &operator << (std::ostream &out, const SimpleResult &result); + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp new file mode 100644 index 00000000000..2d5b6b3e8b8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".simplesearch"); +#include "simplesearch.h" +#include + +namespace search { +namespace queryeval { + +void +SimpleSearch::doSeek(uint32_t docid) +{ + while (_index < _result.getHitCount() && _result.getHit(_index) < docid) { + ++_index; + } + if (_index == _result.getHitCount()) { + setAtEnd(); + return; + } + setDocId(_result.getHit(_index)); +} + +void +SimpleSearch::doUnpack(uint32_t docid) +{ + (void) docid; +} + +SimpleSearch::SimpleSearch(const SimpleResult &result) + : _tag(""), + _result(result), + _index(0) +{ +} + +void +SimpleSearch::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "tag", _tag); +} + +SimpleSearch::~SimpleSearch() +{ +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/simplesearch.h b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h new file mode 100644 index 00000000000..523d4f9dedc --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/simplesearch.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" +#include "simpleresult.h" + +namespace search { +namespace queryeval { + +/** + * Simple search class used to return a predefined set of + * results. This class will mostly be used for testing. + **/ +class SimpleSearch : public SearchIterator +{ +private: + vespalib::string _tag; + SimpleResult _result; + uint32_t _index; + + SimpleSearch(const SimpleSearch &); + SimpleSearch &operator=(const SimpleSearch &); + +protected: + virtual void doSeek(uint32_t docid); + virtual void doUnpack(uint32_t docid); + +public: + SimpleSearch(const SimpleResult &result); + SimpleSearch &tag(const vespalib::string &t) { + _tag = t; + return *this; + } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual ~SimpleSearch(); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp new file mode 100644 index 00000000000..1482ec21fed --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.cpp @@ -0,0 +1,187 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include "sourceblendersearch.h" +#include + +namespace search { +namespace queryeval { + +EmptySearch SourceBlenderSearch::_emptySearch; + +class SourceBlenderSearchStrict : public SourceBlenderSearch +{ +public: + SourceBlenderSearchStrict(ISourceSelector::Iterator::UP sourceSelector, const Children &children); +private: + VESPA_DLL_LOCAL void advance() __attribute__((noinline)); + vespalib::Array _nextChildren; + + void doSeek(uint32_t docid) override; + Trinary is_strict() const override { return Trinary::True; } +}; + +SourceBlenderSearchStrict::SourceBlenderSearchStrict( + ISourceSelector::Iterator::UP sourceSelector, + const Children &children) + : SourceBlenderSearch(std::move(sourceSelector), children), + _nextChildren() +{ + _nextChildren.reserve(children.size()); +} + +void +SourceBlenderSearch::doSeek(uint32_t docid) +{ + if (docid >= _docIdLimit) { + setDocId(endDocId); + return; + } + _matchedChild = getSearch(_sourceSelector->getSource(docid)); + if (_matchedChild->seek(docid)) { + setDocId(docid); + } +} + +void +SourceBlenderSearchStrict::doSeek(uint32_t docid) +{ + if (docid >= _docIdLimit) { + setDocId(endDocId); + return; + } + _matchedChild = getSearch(_sourceSelector->getSource(docid)); + if (_matchedChild->seek(docid)) { + setDocId(docid); + } else { + for (auto & child : _children) { + getSearch(child)->seek(docid); + } + advance(); + } +} + +void +SourceBlenderSearchStrict::advance() +{ + for (;;) { + SearchIterator * search = getSearch(_children[0]); + uint32_t minNextId = search->getDocId(); + _nextChildren.clear(); + _nextChildren.push_back_fast(search); + for (uint32_t i = 1; i < _children.size(); ++i) { + search = getSearch(_children[i]); + uint32_t nextId = search->getDocId(); + if (nextId < minNextId) { + minNextId = nextId; + _nextChildren.clear(); + _nextChildren.push_back_fast(search); + } else if (nextId == minNextId) { + _nextChildren.push_back_fast(search); + } + } + if (isAtEnd(minNextId)) { + setAtEnd(); + return; + } + if (minNextId >= _docIdLimit) { + setAtEnd(); + return; + } + search = getSearch(_sourceSelector->getSource(minNextId)); + for (uint32_t i = 0; i < _nextChildren.size(); ++i) { + if (_nextChildren[i] == search) { + _matchedChild = search; + setDocId(minNextId); + return; + } + _nextChildren[i]->seek(minNextId + 1); + } + } +} + +void +SourceBlenderSearch::doUnpack(uint32_t docid) +{ + _matchedChild->doUnpack(docid); +} + +SourceBlenderSearch::SourceBlenderSearch( + ISourceSelector::Iterator::UP sourceSelector, + const Children &children) : + _matchedChild(NULL), + _sourceSelector(std::move(sourceSelector)), + _children(), + _docIdLimit(_sourceSelector->getDocIdLimit()) +{ + for (size_t i(0); i < sizeof(_sources)/sizeof(_sources[0]); i++) { + _sources[i] = &_emptySearch; + } + for (auto & child : children) { + Source sid(child.sourceId); + _children.push_back(sid); + _sources[sid] = child.search; + } +} + +void +SourceBlenderSearch::initRange(uint32_t beginid, uint32_t endid) +{ + SearchIterator::initRange(beginid, endid); + for (auto & child : _children) { + getSearch(child)->initRange(beginid, endid); + } +} + +void +SourceBlenderSearch::resetRange() +{ + SearchIterator::resetRange(); + for (auto & child : _children) { + getSearch(child)->resetRange(); + } +} + +void +SourceBlenderSearch::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "children", _children); + for (const auto & child : _children) { + vespalib::asciistream os; + os << "Source " << child; + visit(visitor, os.str(), *getSearch(child)); + } +} + +SourceBlenderSearch::~SourceBlenderSearch() +{ + for (auto & child : _children) { + delete getSearch(child); + } +} + +SourceBlenderSearch * SourceBlenderSearch::create( + ISourceSelector::Iterator::UP sourceSelector, + const Children &children, + bool strict) +{ + if (strict) { + return new SourceBlenderSearchStrict(std::move(sourceSelector), children); + } else { + return new SourceBlenderSearch(std::move(sourceSelector), children); + } +} + + +} // namespace queryeval +} // namespace search + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SourceBlenderSearch::Child &obj) +{ + self.openStruct(name, "search::queryeval::SourceBlenderSearch::Child"); + visit(self, "search", obj.search); + visit(self, "sourceId", obj.sourceId); + self.closeStruct(); +} diff --git a/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h new file mode 100644 index 00000000000..81688184fa5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/sourceblendersearch.h @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchiterator.h" +#include "emptysearch.h" +#include "isourceselector.h" + +namespace search { +namespace queryeval { + +/** + * A simple implementation of the source blender operation. This class + * is used to blend results from multiple sources. Each source is + * represented with a separate search iterator. A source selector + * iterator is used to select the appropriate source for each + * document. The source blender will make sure to only propagate + * unpack requests to one of the sources below, enabling them to use + * the same target location for detailed match data unpacking. + **/ +class SourceBlenderSearch : public SearchIterator +{ +public: + /** + * Small wrapper used to specify the underlying searches to be + * blended. + **/ + struct Child { + SearchIterator *search; + uint32_t sourceId; + Child() : search(NULL), sourceId(0) { } + Child(SearchIterator *s, uint32_t id) : search(s), sourceId(id) {} + }; + typedef std::vector Children; + +private: + SourceBlenderSearch(const SourceBlenderSearch &); + SourceBlenderSearch &operator=(const SourceBlenderSearch &); + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + virtual bool isSourceBlender() const { return true; } + static EmptySearch _emptySearch; +protected: + typedef std::vector SourceIndex; + SearchIterator * _matchedChild; + ISourceSelector::Iterator::UP _sourceSelector; + SourceIndex _children; + uint32_t _docIdLimit; + SearchIterator * _sources[256]; + + void doSeek(uint32_t docid) override; + void doUnpack(uint32_t docid) override; + Trinary is_strict() const override { return Trinary::False; } + SourceBlenderSearch(ISourceSelector::Iterator::UP sourceSelector, const Children &children); + SearchIterator * getSearch(Source source) const { return _sources[source]; } +public: + /** + * Create a new SourceBlender Search with the given children and + * strictness. A strict blender can assume that all children below + * are also strict. A non-strict blender has no strictness + * assumptions about its children. + * + * @param sourceSelector This is an iterator that provide you with the + * the correct source to use. + * @param children the search objects we are blending + * this object takes ownership of the children. + * @param strict whether this search is strict + * (a strict search will locate its next hit when seeking fails) + **/ + static SourceBlenderSearch * create(ISourceSelector::Iterator::UP sourceSelector, + const Children &children, bool strict); + virtual ~SourceBlenderSearch(); + size_t getNumChildren() const { return _children.size(); } + SearchIterator::UP steal(size_t index) { + SearchIterator::UP retval(_sources[_children[index]]); + _sources[_children[index]] = NULL; + return retval; + } + void setChild(size_t index, SearchIterator::UP child) { + assert(_sources[_children[index]] == NULL); + _sources[_children[index]] = child.release(); + } + void initRange(uint32_t beginId, uint32_t endId) override; + void resetRange() override; +}; + +} // namespace queryeval +} // namespace search + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::SourceBlenderSearch::Child &obj); + diff --git a/searchlib/src/vespa/searchlib/queryeval/split_float.cpp b/searchlib/src/vespa/searchlib/queryeval/split_float.cpp new file mode 100644 index 00000000000..dd6a370ceda --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/split_float.cpp @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// $Id$ + +#include +#include "split_float.h" +#include + +namespace search { +namespace queryeval { + +SplitFloat::SplitFloat(const vespalib::string &input) +{ + bool seenText = false; + for (size_t i = 0; i < input.size(); ++i) { + unsigned char c = input[i]; + if (isalnum(c)) { + if (!seenText) { + _parts.push_back(vespalib::string()); + } + _parts.back().push_back(c); + seenText = true; + } else { + seenText = false; + } + } +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/split_float.h b/searchlib/src/vespa/searchlib/queryeval/split_float.h new file mode 100644 index 00000000000..b8fcf8f5ab3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/split_float.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// $Id$ + +#pragma once + +#include +#include + +namespace search { +namespace queryeval { + +class SplitFloat +{ +private: + std::vector _parts; +public: + SplitFloat(const vespalib::string &input); + size_t parts() const { return _parts.size(); } + const vespalib::string &getPart(size_t i) const { return _parts[i]; } +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp new file mode 100644 index 00000000000..79b04b90e53 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +#include "termasstring.h" +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".termasstring"); + +using search::query::And; +using search::query::AndNot; +using search::query::Equiv; +using search::query::NumberTerm; +using search::query::LocationTerm; +using search::query::Near; +using search::query::Node; +using search::query::ONear; +using search::query::Or; +using search::query::Phrase; +using search::query::PredicateQuery; +using search::query::PrefixTerm; +using search::query::QueryVisitor; +using search::query::RangeTerm; +using search::query::Rank; +using search::query::RegExpTerm; +using search::query::StringTerm; +using search::query::SubstringTerm; +using search::query::SuffixTerm; +using search::query::WeakAnd; +using search::query::WeightedSetTerm; +using search::query::DotProduct; +using search::query::WandTerm; +using vespalib::string; + +namespace search { +namespace queryeval { + +vespalib::string termAsString(double float_term) { + vespalib::asciistream os; + return (os << float_term).str(); +} + +vespalib::string termAsString(int64_t int_term) { + vespalib::asciistream os; + return (os << int_term).str(); +} + +vespalib::string termAsString(const search::query::Range &term) { + vespalib::asciistream os; + return (os << term).str(); +} + +vespalib::string termAsString(const search::query::Location &term) { + vespalib::asciistream os; + return (os << term).str(); +} + +namespace { +struct TermAsStringVisitor : public QueryVisitor { + string term; + bool isSet; + + TermAsStringVisitor() : term(), isSet(false) {} + + template + void visitTerm(TermNode &n) { + term = termAsString(n.getTerm()); + isSet = true; + } + + void illegalVisit() { + term.clear(); + isSet = false; + } + + virtual void visit(And &) { illegalVisit(); } + virtual void visit(AndNot &) { illegalVisit(); } + virtual void visit(Equiv &) { illegalVisit(); } + virtual void visit(Near &) { illegalVisit(); } + virtual void visit(ONear &) { illegalVisit(); } + virtual void visit(Or &) { illegalVisit(); } + virtual void visit(Phrase &) { illegalVisit(); } + virtual void visit(Rank &) { illegalVisit(); } + virtual void visit(WeakAnd &) { illegalVisit(); } + virtual void visit(WeightedSetTerm &) { illegalVisit(); } + virtual void visit(DotProduct &) { illegalVisit(); } + virtual void visit(WandTerm &) { illegalVisit(); } + + virtual void visit(NumberTerm &n) { visitTerm(n); } + virtual void visit(LocationTerm &n) { visitTerm(n); } + virtual void visit(PrefixTerm &n) { visitTerm(n); } + virtual void visit(RangeTerm &n) { visitTerm(n); } + virtual void visit(StringTerm &n) { visitTerm(n); } + virtual void visit(SubstringTerm &n) { visitTerm(n); } + virtual void visit(SuffixTerm &n) { visitTerm(n); } + virtual void visit(RegExpTerm &n) { visitTerm(n); } + + virtual void visit(PredicateQuery &) { illegalVisit(); } +}; +} // namespace + +string termAsString(const Node &term_node) { + TermAsStringVisitor visitor; + const_cast(term_node).accept(visitor); + if (!visitor.isSet) { + vespalib::string err(vespalib::make_string("Trying to convert a non-term node ('%s') to a term string.", typeid(term_node).name())); + LOG(warning, "%s", err.c_str()); + throw vespalib::IllegalArgumentException(err, VESPA_STRLOC); + } + return visitor.term; +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.h b/searchlib/src/vespa/searchlib/queryeval/termasstring.h new file mode 100644 index 00000000000..5428dfa0ce7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.h @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include + +namespace search { +namespace query { class Node; } + +namespace queryeval { + +inline const vespalib::string &termAsString(const vespalib::string &term) { + return term; +} + +vespalib::string termAsString(double float_term); + +vespalib::string termAsString(int64_t int_term); + +vespalib::string termAsString(const search::query::Range &term); + +vespalib::string termAsString(const search::query::Location &term); + +vespalib::string termAsString(const search::query::Node &term_node); + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp new file mode 100644 index 00000000000..45d975e5e77 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.cpp @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "termwise_blueprint_helper.h" +#include "termwise_search.h" + +namespace search { +namespace queryeval { + +TermwiseBlueprintHelper::TermwiseBlueprintHelper(const IntermediateBlueprint &self, + const MultiSearch::Children &subSearches, + UnpackInfo &unpackInfo) + : children(), + termwise(), + first_termwise(subSearches.size()), + termwise_unpack() +{ + children.reserve(subSearches.size()); + termwise.reserve(subSearches.size()); + for (size_t i = 0; i < subSearches.size(); ++i) { + bool need_unpack = unpackInfo.needUnpack(i); + bool allow_termwise = self.getChild(i).getState().allow_termwise_eval(); + if (need_unpack || !allow_termwise) { + if (need_unpack) { + size_t index = (i < first_termwise) ? children.size() : (children.size() + 1); + termwise_unpack.add(index); + } + children.push_back(subSearches[i]); + } else { + first_termwise = std::min(i, first_termwise); + termwise.push_back(subSearches[i]); + } + } +} + +void +TermwiseBlueprintHelper::insert_termwise(SearchIterator::UP search, bool strict) +{ + auto termwise_search = make_termwise(std::move(search), strict); + children.insert(children.begin() + first_termwise, termwise_search.release()); +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h new file mode 100644 index 00000000000..c99fedd4921 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termwise_blueprint_helper.h @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" +#include "blueprint.h" +#include "unpackinfo.h" +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +/** + * Utility used to keep track of which children can be evaluated + * termwise, which children we need to unpack and how to combine the + * termwise and non-termwise parts with each other. + **/ +struct TermwiseBlueprintHelper { + MultiSearch::Children children; + MultiSearch::Children termwise; + size_t first_termwise; + UnpackInfo termwise_unpack; + + TermwiseBlueprintHelper(const IntermediateBlueprint &self, + const MultiSearch::Children &subSearches, UnpackInfo &unpackInfo); + + void insert_termwise(SearchIterator::UP search, bool strict); +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp b/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp new file mode 100644 index 00000000000..7d25a897026 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termwise_search.cpp @@ -0,0 +1,62 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "termwise_search.h" +#include + +namespace search { +namespace queryeval { + +template +struct TermwiseSearch : public SearchIterator { + + SearchIterator::UP search; + BitVector::UP result; + + TermwiseSearch(SearchIterator::UP search_in) + : search(std::move(search_in)), result() {} + + Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; } + void initRange(uint32_t beginid, uint32_t endid) override { + SearchIterator::initRange(beginid, endid); + search->initRange(beginid, endid); + result = search->get_hits(beginid); + } + void resetRange() override { + SearchIterator::resetRange(); + search->resetRange(); + result.reset(); + } + void doSeek(uint32_t docid) override { + if (__builtin_expect(isAtEnd(docid), false)) { + setAtEnd(); + } else if (IS_STRICT) { + uint32_t nextid = result->getNextTrueBit(docid); + if (__builtin_expect(isAtEnd(nextid), false)) { + setAtEnd(); + } else { + setDocId(nextid); + } + } else if (result->testBit(docid)) { + setDocId(docid); + } + } + void doUnpack(uint32_t) override {} + void visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "search", *search); + visit(visitor, "strict", IS_STRICT); + } +}; + +SearchIterator::UP +make_termwise(SearchIterator::UP search, bool strict) +{ + if (strict) { + return SearchIterator::UP(new TermwiseSearch(std::move(search))); + } else { + return SearchIterator::UP(new TermwiseSearch(std::move(search))); + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/termwise_search.h b/searchlib/src/vespa/searchlib/queryeval/termwise_search.h new file mode 100644 index 00000000000..bf2013731d7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/termwise_search.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +/** + * Creates a termwise wrapper for the given search. The wrapper will + * perform termwise evaluation of the underlying search when the + * initRange function is called. All hits for the active range are + * stored in a bitvector fragment in the wrapper. The wrapper will act + * as a normal iterator to be used for parallel query evaluation. Note + * that no match data will be available for the hits returned by the + * wrapper. Termwise evaluation should only ever be used for parts of + * the query not used for ranking. + * + * @return wrapper performing termwise evaluation of the original search + * @param search the search we want to perform termwise evaluation of + * @param strict whether the wrapper itself should be a strict iterator + **/ +SearchIterator::UP make_termwise(SearchIterator::UP search, bool strict); + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt new file mode 100644 index 00000000000..3b2bca7d35b --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_queryeval_test INTERFACE + SOURCES + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h b/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h new file mode 100644 index 00000000000..d6404842b39 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/eagerchild.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace queryeval { +namespace test { + +/** + * Child iterator that has initial docid > 0. + **/ +struct EagerChild : public SearchIterator +{ + EagerChild(uint32_t initial) : SearchIterator() { setDocId(initial); } + virtual void doSeek(uint32_t) { setAtEnd(); } + virtual void doUnpack(uint32_t) {} +}; + +} // namespace test +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h b/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h new file mode 100644 index 00000000000..5fb03c4afa2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/leafspec.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "searchhistory.h" +#include "trackedsearch.h" +#include +#include +#include + +namespace search { +namespace queryeval { +namespace test { + +/** + * Defines the hits to be returned by a wand-like subsearch and creates a TrackedSearch. + **/ +struct LeafSpec +{ + std::string name; + int32_t weight; + int32_t maxWeight; + FakeResult result; + SearchIterator *search; + LeafSpec(const std::string &n, int32_t w = 100) + : name(n), + weight(w), + maxWeight(std::numeric_limits::min()), + result(), + search() + {} + LeafSpec &doc(uint32_t docid) { + result.doc(docid); + return *this; + } + LeafSpec &doc(uint32_t docid, int32_t w) { + result.doc(docid); + result.weight(w); + result.pos(0); + maxWeight = std::max(maxWeight, w); + return *this; + } + LeafSpec &itr(SearchIterator *si) { + search = si; + return *this; + } + SearchIterator *create(SearchHistory &hist, fef::TermFieldMatchData *tfmd) const { + if (search != NULL) { + return new TrackedSearch(name, hist, search); + } else if (tfmd != NULL) { + return new TrackedSearch(name, hist, result, *tfmd, + MinMaxPostingInfo(0, maxWeight)); + } + return new TrackedSearch(name, hist, result, + MinMaxPostingInfo(0, maxWeight)); + } +}; + +} // namespace test +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h b/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h new file mode 100644 index 00000000000..3e528e9a370 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/searchhistory.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace queryeval { +namespace test { + +/** + * Seek and unpack history for a search iterator. + **/ +struct SearchHistory { + struct Entry { + std::string target; + std::string op; + uint32_t docid; + Entry(const std::string &t, const std::string &o, uint32_t id) + : target(t), op(o), docid(id) {} + bool operator==(const Entry &rhs) const { + return ((target == rhs.target) && + (op == rhs.op) && + (docid == rhs.docid)); + } + }; + std::vector _entries; + SearchHistory &seek(const std::string &target, uint32_t docid) { + _entries.push_back(Entry(target, "seek", docid)); + return *this; + } + SearchHistory &step(const std::string &target, uint32_t docid) { + _entries.push_back(Entry(target, "setDocId", docid)); + return *this; + } + SearchHistory &unpack(const std::string &target, uint32_t docid) { + _entries.push_back(Entry(target, "unpack", docid)); + return *this; + } + bool operator==(const SearchHistory &rhs) const { + return (_entries == rhs._entries); + } +}; + +std::ostream &operator << (std::ostream &out, const SearchHistory &hist) { + out << "History:\n"; + for (size_t i = 0; i < hist._entries.size(); ++i) { + const SearchHistory::Entry &entry = hist._entries[i]; + out << " " << entry.target << "->" << entry.op << "(" << entry.docid << ")" << std::endl; + } + return out; +} + +} // namespace test +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h b/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h new file mode 100644 index 00000000000..49e3fd6b84a --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/trackedsearch.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "searchhistory.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { +namespace test { + +/** + * Proxy and wrapper for FakeSearch to track search history and + * keep match data. + **/ +class TrackedSearch : public SearchIterator +{ +private: + std::string _name; + SearchHistory &_history; + fef::TermFieldMatchData _matchData; + SearchIterator::UP _search; + MinMaxPostingInfo::UP _minMaxPostingInfo; + + static fef::TermFieldMatchDataArray makeArray(fef::TermFieldMatchData &match) { + fef::TermFieldMatchDataArray array; + array.add(&match); + return array; + } + +protected: + virtual void doSeek(uint32_t docid) { + _history.seek(_name, docid); + _search->seek(docid); + setDocId(_search->getDocId()); + _history.step(_name, getDocId()); + } + virtual void doUnpack(uint32_t docid) { + _history.unpack(_name, docid); + _search->unpack(docid); + } + +public: + // wraps a FakeSearch and owns its match data + TrackedSearch(const std::string &name, SearchHistory &hist, + const FakeResult &result, const MinMaxPostingInfo &minMaxPostingInfo) + : _name(name), _history(hist), _matchData(), + _search(new FakeSearch("", "", "", result, makeArray(_matchData))), + _minMaxPostingInfo(new MinMaxPostingInfo(minMaxPostingInfo)) + { setDocId(_search->getDocId()); } + // wraps a FakeSearch with external match data + TrackedSearch(const std::string &name, SearchHistory &hist, + const FakeResult &result, fef::TermFieldMatchData &tfmd, + const MinMaxPostingInfo &minMaxPostingInfo) + : _name(name), _history(hist), _matchData(), + _search(new FakeSearch("", "", "", result, makeArray(tfmd))), + _minMaxPostingInfo(new MinMaxPostingInfo(minMaxPostingInfo)) + { setDocId(_search->getDocId()); } + // wraps a generic search (typically wand) + TrackedSearch(const std::string &name, SearchHistory &hist, SearchIterator *search) + : _name(name), _history(hist), _matchData(), _search(search), _minMaxPostingInfo() + { setDocId(_search->getDocId()); } + + virtual const PostingInfo *getPostingInfo() const { + return _minMaxPostingInfo.get(); + } +}; + +} // namespace test +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h b/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h new file mode 100644 index 00000000000..a578d1d908d --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/test/wandspec.h @@ -0,0 +1,53 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "leafspec.h" +#include "trackedsearch.h" +#include +#include +#include +#include +#include + +namespace search { +namespace queryeval { +namespace test { + +/** + * Defines the overall behavior of a wand like search with tracked children. + * This struct also owns the search iterator history. + **/ +class WandSpec +{ +private: + std::vector _leafs; + fef::MatchDataLayout _layout; + std::vector _handles; + SearchHistory _history; + +public: + WandSpec() : _leafs(), _layout(), _handles(), _history() {} + WandSpec &leaf(const LeafSpec &l) { + _leafs.push_back(l); + _handles.push_back(_layout.allocTermField(0)); + return *this; + } + wand::Terms getTerms(fef::MatchData *matchData = NULL) { + wand::Terms terms; + for (size_t i = 0; i < _leafs.size(); ++i) { + fef::TermFieldMatchData *tfmd = (matchData != NULL ? matchData->resolveTermField(_handles[i]) : NULL); + terms.push_back(wand::Term(_leafs[i].create(_history, tfmd), + _leafs[i].weight, + _leafs[i].result.inspect().size(), + tfmd)); + } + return terms; + } + SearchHistory &getHistory() { return _history; } + fef::MatchData::UP createMatchData() const { return _layout.createMatchData(); } +}; + +} // namespace test +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp b/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp new file mode 100644 index 00000000000..0c808ce06f2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/truesearch.cpp @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "truesearch.h" + +namespace search { +namespace queryeval { + +void +TrueSearch::doSeek(uint32_t docid) +{ + setDocId(docid); +} + +void +TrueSearch::doUnpack(uint32_t docid) +{ + _tfmd.resetOnlyDocId(docid); +} + +TrueSearch::TrueSearch(fef::TermFieldMatchData & tfmd) : + SearchIterator(), + _tfmd(tfmd) +{ + _tfmd.resetOnlyDocId(0); +} + +TrueSearch::~TrueSearch() +{ +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/truesearch.h b/searchlib/src/vespa/searchlib/queryeval/truesearch.h new file mode 100644 index 00000000000..3f7b36160f4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/truesearch.h @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "searchiterator.h" + +namespace search { +namespace queryeval { + +class TrueSearch : public SearchIterator +{ +private: + fef::TermFieldMatchData & _tfmd; + Trinary is_strict() const override { return Trinary::True; } + void doSeek(uint32_t) override; + void doUnpack(uint32_t) override; + +public: + TrueSearch(fef::TermFieldMatchData & tfmd); + ~TrueSearch(); +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp new file mode 100644 index 00000000000..5a9faf42e8e --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.cpp @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include + +namespace search { +namespace queryeval { + +UnpackInfo::UnpackInfo() + : _size(0) +{ + memset(_unpack, 0, sizeof(_unpack)); +} + +UnpackInfo & +UnpackInfo::add(size_t index) +{ + if ((index <= max_index) && (_size < max_size)) { + _unpack[_size++] = index; + std::sort(&_unpack[0], &_unpack[_size]); + } else { + forceAll(); + } + return *this; +} + +UnpackInfo & +UnpackInfo::insert(size_t index, bool unpack) +{ + if (unpackAll()) { + return *this; + } + for (size_t rp = 0; rp < _size; ++rp) { + if (_unpack[rp] >= index) { + if (_unpack[rp] == max_index) { + forceAll(); + return *this; + } + ++_unpack[rp]; + } + } + if (unpack) { + add(index); + } + return *this; +} + +UnpackInfo & +UnpackInfo::remove(size_t index) +{ + if (unpackAll()) { + return *this; + } + size_t wp = 0; + bool found_index = false; + for (size_t rp = 0; rp < _size; ++rp) { + if (_unpack[rp] == index) { + found_index = true; + } else if (_unpack[rp] > index) { + _unpack[wp++] = (_unpack[rp] - 1); + } else { + _unpack[wp++] = _unpack[rp]; + } + } + if (found_index) { + --_size; + } + assert(wp == _size); + return *this; +} + +bool +UnpackInfo::needUnpack(size_t index) const +{ + if (unpackAll()) { + return true; + } + for (size_t i = 0; i < _size; ++i) { + if (_unpack[i] == index) { + return true; + } + } + return false; +} + +vespalib::string +UnpackInfo::toString() const +{ + vespalib::asciistream os; + if (unpackAll()) { + os << "full-unpack"; + } else if (empty()) { + os << "no-unpack"; + } else { + os << size_t(_unpack[0]); + for (size_t i = 1; i < _size; ++i) { + os << " " << size_t(_unpack[i]); + } + } + return os.str(); +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h new file mode 100644 index 00000000000..8c83ec355fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/unpackinfo.h @@ -0,0 +1,69 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include "multisearch.h" + +namespace search { +namespace queryeval { + +class UnpackInfo +{ +private: + static constexpr size_t max_size = 31; + static constexpr size_t max_index = 255; + + uint8_t _size; + uint8_t _unpack[max_size]; + +public: + UnpackInfo(); + + // add an index to unpack, will not renumber existing indexes + UnpackInfo &add(size_t index); + + // insert an index that may need unpacking, will renumber existing indexes + UnpackInfo &insert(size_t index, bool unpack = true); + + // remove an index and its unpack data, will renumber existing indexes + UnpackInfo &remove(size_t index); + + UnpackInfo &forceAll() { + _size = (max_size + 1); + return *this; + } + + bool unpackAll() const { return (_size > max_size); } + bool empty() const { return (_size == 0); } + bool needUnpack(size_t index) const; + + template + void each(F &&f, size_t n) const { + if (__builtin_expect(unpackAll(), false)) { + for (size_t i = 0; i < n; ++i) { + f(i); + } + } else { + for (size_t i = 0; i < _size; ++i) { + f(_unpack[i]); + } + } + } + + vespalib::string toString() const; +}; + +struct NoUnpack { + void unpack(uint32_t docid, const MultiSearch & search) { + (void) docid; + (void) search; + } + void onRemove(size_t index) { (void) index; } + void onInsert(size_t index) { (void) index; } + bool needUnpack(size_t index) const { (void) index; return false; } +}; + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt new file mode 100644 index 00000000000..a1dbdcfa2f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_queryeval_wand OBJECT + SOURCES + parallel_weak_and_blueprint.cpp + parallel_weak_and_search.cpp + wand_parts.cpp + weak_and_heap.cpp + weak_and_search.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp new file mode 100644 index 00000000000..b57694fc07d --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.cpp @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".queryeval.parallel_weak_and_blueprint"); + +#include "wand_parts.h" +#include "parallel_weak_and_blueprint.h" +#include "parallel_weak_and_search.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field, + uint32_t scoresToTrack, + score_t scoreThreshold, + double thresholdBoostFactor) + : ComplexLeafBlueprint(field), + _field(field), + _scores(scoresToTrack), + _scoreThreshold(scoreThreshold), + _thresholdBoostFactor(thresholdBoostFactor), + _scoresAdjustFrequency(DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY), + _estimate(), + _layout(), + _weights(), + _terms() +{ +} + +ParallelWeakAndBlueprint::ParallelWeakAndBlueprint(const FieldSpec &field, + uint32_t scoresToTrack, + score_t scoreThreshold, + double thresholdBoostFactor, + uint32_t scoresAdjustFrequency) + : ComplexLeafBlueprint(field), + _field(field), + _scores(scoresToTrack), + _scoreThreshold(scoreThreshold), + _thresholdBoostFactor(thresholdBoostFactor), + _scoresAdjustFrequency(scoresAdjustFrequency), + _estimate(), + _layout(), + _weights(), + _terms() +{ +} + +ParallelWeakAndBlueprint::~ParallelWeakAndBlueprint() +{ + while (!_terms.empty()) { + delete _terms.back(); + _terms.pop_back(); + } +} + +FieldSpec +ParallelWeakAndBlueprint::getNextChildField(const FieldSpec &outer) +{ + return FieldSpec(outer.getName(), outer.getFieldId(), _layout.allocTermField(outer.getFieldId()), false); +} + +void +ParallelWeakAndBlueprint::addTerm(Blueprint::UP term, int32_t weight) +{ + HitEstimate childEst = term->getState().estimate(); + if (!childEst.empty) { + if (_estimate.empty) { + _estimate = childEst; + } else { + _estimate.estHits += childEst.estHits; + } + setEstimate(_estimate); + } + _weights.push_back(weight); + _terms.push_back(term.get()); + term.release(); + set_tree_size(_terms.size() + 1); +} + +SearchIterator::UP +ParallelWeakAndBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const +{ + assert(tfmda.size() == 1); + fef::MatchData::UP childrenMatchData = _layout.createMatchData(); + wand::Terms terms; + for (size_t i = 0; i < _terms.size(); ++i) { + const State &childState = _terms[i]->getState(); + assert(childState.numFields() == 1); + terms.push_back(wand::Term(_terms[i]->createSearch(*childrenMatchData, true).release(), + _weights[i], + childState.estimate().estHits, + childState.field(0).resolve(*childrenMatchData))); + } + return SearchIterator::UP + (ParallelWeakAndSearch::create(terms, + ParallelWeakAndSearch::MatchParams(_scores, + _scoreThreshold, + _thresholdBoostFactor, + _scoresAdjustFrequency).setDocIdLimit(get_docid_limit()), + ParallelWeakAndSearch::RankParams(*tfmda[0], + std::move(childrenMatchData)), strict)); +} + +void +ParallelWeakAndBlueprint::fetchPostings(bool) +{ + for (size_t i = 0; i < _terms.size(); ++i) { + _terms[i]->fetchPostings(true); + } +} + +void +ParallelWeakAndBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + LeafBlueprint::visitMembers(visitor); + visit(visitor, "_weights", _weights); + visit(visitor, "_terms", _terms); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h new file mode 100644 index 00000000000..bae74c046cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include "wand_parts.h" +#include "weak_and_heap.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +const uint32_t DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY = 4; + +/** + * Blueprint for the parallel weak and search operator. + */ +class ParallelWeakAndBlueprint : public ComplexLeafBlueprint +{ +private: + typedef wand::score_t score_t; + + const FieldSpec _field; + mutable SharedWeakAndPriorityQueue _scores; + const wand::score_t _scoreThreshold; + double _thresholdBoostFactor; + const uint32_t _scoresAdjustFrequency; + HitEstimate _estimate; + fef::MatchDataLayout _layout; + std::vector _weights; + std::vector _terms; + + ParallelWeakAndBlueprint(const ParallelWeakAndBlueprint &); + ParallelWeakAndBlueprint &operator=(const ParallelWeakAndBlueprint &); + +public: + ParallelWeakAndBlueprint(const FieldSpec &field, + uint32_t scoresToTrack, + score_t scoreThreshold, + double thresholdBoostFactor); + ParallelWeakAndBlueprint(const FieldSpec &field, + uint32_t scoresToTrack, + score_t scoreThreshold, + double thresholdBoostFactor, + uint32_t scoresAdjustFrequency); + virtual ~ParallelWeakAndBlueprint(); + + const WeakAndHeap &getScores() const { return _scores; } + + score_t getScoreThreshold() const { return _scoreThreshold; } + + double getThresholdBoostFactor() const { return _thresholdBoostFactor; } + + // Used by create visitor + FieldSpec getNextChildField(const FieldSpec &outer); + + // Used by create visitor + void addTerm(Blueprint::UP term, int32_t weight); + + // Override doc from blueprint::Leaf. + virtual SearchIterator::UP + createLeafSearch(const search::fef::TermFieldMatchDataArray &tfmda, + bool strict) const; + + // Override doc from blueprint::Leaf. + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + + virtual void fetchPostings(bool strict); +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp new file mode 100644 index 00000000000..05a2d7d6822 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp @@ -0,0 +1,263 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".queryeval.parallel_weak_and_search"); +#include "parallel_weak_and_search.h" +#include +#include +#include "wand_parts.h" +#include +#include +#include +#include +#include + +using vespalib::make_string; + +namespace search { +namespace queryeval { + +typedef ParallelWeakAndSearch::MatchParams MatchParams; +typedef ParallelWeakAndSearch::RankParams RankParams; + +namespace wand { + +namespace { bool should_monitor_wand() { return LOG_WOULD_LOG(spam); } } + + +template +class ParallelWeakAndSearchImpl : public ParallelWeakAndSearch +{ +private: + fef::TermFieldMatchData &_tfmd; + VectorizedTerms _terms; + DualHeap _heaps; + Algorithm _algo; + score_t _threshold; + score_t _boostedThreshold; + const MatchParams _matchParams; + std::vector _localScores; + + void updateThreshold(score_t newThreshold) { + if (newThreshold > _threshold) { + _threshold = newThreshold; + _boostedThreshold = (newThreshold * _matchParams.thresholdBoostFactor); + } + } + + void seek_strict(uint32_t docid) { + _algo.set_candidate(_terms, _heaps, docid); + while (_algo.solve_wand_constraint(_terms, _heaps, GreaterThan(_boostedThreshold))) { + if (_algo.check_score(_terms, _heaps, DotProductScorer(), GreaterThan(_threshold))) { + setDocId(_algo.get_candidate()); + return; + } else { + _algo.set_candidate(_terms, _heaps, _algo.get_candidate() + 1); + } + } + setAtEnd(); + } + + void seek_unstrict(uint32_t docid) { + if (docid > _algo.get_candidate()) { + _algo.set_candidate(_terms, _heaps, docid); + if (_algo.check_wand_constraint(_terms, _heaps, GreaterThan(_boostedThreshold))) { + if (_algo.check_score(_terms, _heaps, DotProductScorer(), GreaterThan(_threshold))) { + setDocId(_algo.get_candidate()); + } + } + } + } + +public: + ParallelWeakAndSearchImpl(fef::TermFieldMatchData &tfmd, + VectorizedTerms &&terms, + const MatchParams &matchParams) + : _tfmd(tfmd), + _terms(std::move(terms)), + _heaps(DocIdOrder(_terms.docId()), _terms.size()), + _algo(), + _threshold(matchParams.scoreThreshold), + _boostedThreshold(_threshold * matchParams.thresholdBoostFactor), + _matchParams(matchParams), + _localScores() + { + } + virtual size_t get_num_terms() const override { return _terms.size(); } + virtual int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } + virtual score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); } + virtual const MatchParams &getMatchParams() const override { return _matchParams; } + + virtual void doSeek(uint32_t docid) override { + updateThreshold(_matchParams.scores.getMinScore()); + if (IS_STRICT) { + seek_strict(docid); + } else { + seek_unstrict(docid); + } + } + virtual void doUnpack(uint32_t docid) override { + score_t score = _algo.get_full_score(_terms, _heaps, DotProductScorer()); + _localScores.push_back(score); + if (_localScores.size() == _matchParams.scoresAdjustFrequency) { + _matchParams.scores.adjust(&_localScores[0], &_localScores[0] + _localScores.size()); + _localScores.clear(); + } + _tfmd.setRawScore(docid, score); + } + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const override { + _terms.visit_members(visitor); + } + void initRange(uint32_t begin, uint32_t end) override { + ParallelWeakAndSearch::initRange(begin, end); + _algo.init_range(_terms, _heaps, begin, end); + } + Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; } +}; + +namespace { + +wand::Terms +insertMonitoringSearchIterator(const wand::Terms &terms) +{ + wand::Terms retval = terms; + for (size_t i = 0; i < terms.size(); ++i) { + wand::Term &t = retval[i]; + t.search = new MonitoringSearchIterator + (make_string("w%d:e%u:m%" PRId64 "", + t.weight, t.estHits, DotProductScorer::calculateMaxScore(t)), + SearchIterator::UP(t.search), true); + } + return retval; +} + +template +SearchIterator * +createWand(const wand::Terms &terms, + const ParallelWeakAndSearch::MatchParams &matchParams, + ParallelWeakAndSearch::RankParams &&rankParams) +{ + typedef ParallelWeakAndSearchImpl WandType; + if (should_monitor_wand()) { + wand::Terms termsWithMonitoring = insertMonitoringSearchIterator(terms); + MonitoringSearchIterator::UP monitoringIterator = + MonitoringSearchIterator::UP(new MonitoringSearchIterator + (make_string("PWAND(%u,%" PRId64 "),strict=%u", + matchParams.scores.getScoresToTrack(), + matchParams.scoreThreshold, + IS_STRICT), + SearchIterator::UP(new WandType(rankParams.rootMatchData, + VectorizedIteratorTerms(termsWithMonitoring, + DotProductScorer(), + matchParams.docIdLimit, + std::move(rankParams.childrenMatchData)), + matchParams)), + false)); + return new MonitoringDumpIterator(std::move(monitoringIterator)); + } + return new WandType(rankParams.rootMatchData, + VectorizedIteratorTerms(terms, + DotProductScorer(), + matchParams.docIdLimit, + std::move(rankParams.childrenMatchData)), + matchParams); +} + +} // namespace search::queryeval::wand:: + +} // namespace search::queryeval::wand + +SearchIterator * +ParallelWeakAndSearch::createArrayWand(const Terms &terms, + const MatchParams &matchParams, + RankParams &&rankParams, + bool strict) +{ + if (strict) { + return wand::createWand(terms, matchParams, std::move(rankParams)); + } else { + return wand::createWand(terms, matchParams, std::move(rankParams)); + } +} + +SearchIterator * +ParallelWeakAndSearch::createHeapWand(const Terms &terms, + const MatchParams &matchParams, + RankParams &&rankParams, + bool strict) +{ + if (strict) { + return wand::createWand(terms, matchParams, std::move(rankParams)); + } else { + return wand::createWand(terms, matchParams, std::move(rankParams)); + } +} + +SearchIterator * +ParallelWeakAndSearch::create(const Terms &terms, + const MatchParams &matchParams, + RankParams &&rankParams, + bool strict) +{ + if (terms.size() < 128) { + return createArrayWand(terms, matchParams, std::move(rankParams), strict); + } else { + return createHeapWand(terms, matchParams, std::move(rankParams), strict); + } +} + +//----------------------------------------------------------------------------- + +namespace { + +template +SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams ¶ms, bool strict) { + return (strict) + ? SearchIterator::UP(new wand::ParallelWeakAndSearchImpl(tfmd, std::move(terms), params)) + : SearchIterator::UP( new wand::ParallelWeakAndSearchImpl(tfmd, std::move(terms), params)); +} + +template +SearchIterator::UP create_helper(search::fef::TermFieldMatchData &tfmd, VectorizedTerms &&terms, const MatchParams ¶ms, bool strict, bool use_array) { + return (use_array) + ? create_helper(tfmd, std::move(terms), params, strict) + : create_helper(tfmd, std::move(terms), params, strict); +} + +} // namespace search::queryeval:: + +SearchIterator::UP +ParallelWeakAndSearch::create(search::fef::TermFieldMatchData &tfmd, + const MatchParams &matchParams, + const std::vector &weights, + const std::vector &dict_entries, + const IDocumentWeightAttribute &attr, + bool strict) +{ + assert(weights.size() == dict_entries.size()); + if (!wand::should_monitor_wand()) { + wand::VectorizedAttributeTerms terms(weights, dict_entries, attr, wand::DotProductScorer(), matchParams.docIdLimit); + return create_helper(tfmd, std::move(terms), matchParams, strict, (weights.size() < 128)); + } else { + // reverse-wrap direct iterators into old API to be compatible with monitoring + fef::MatchDataLayout layout; + std::vector handles; + for (size_t i = 0; i < weights.size(); ++i) { + handles.push_back(layout.allocTermField(tfmd.getFieldId())); + } + fef::MatchData::UP childrenMatchData = layout.createMatchData(); + assert(childrenMatchData->getNumTermFields() == dict_entries.size()); + wand::Terms terms; + for (size_t i = 0; i < dict_entries.size(); ++i) { + terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + weights[i], + dict_entries[i].posting_size, + childrenMatchData->resolveTermField(handles[i]))); + } + assert(terms.size() == dict_entries.size()); + return SearchIterator::UP(create(terms, matchParams, RankParams(tfmd, std::move(childrenMatchData)), strict)); + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h new file mode 100644 index 00000000000..68c43844520 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.h @@ -0,0 +1,85 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include "wand_parts.h" +#include "parallel_weak_and_search.h" +#include "weak_and_heap.h" +#include +#include + +namespace search { +namespace queryeval { + +/** + * WAND search iterator that uses a shared heap between match threads. + */ +struct ParallelWeakAndSearch : public SearchIterator +{ + typedef wand::score_t score_t; + typedef wand::docid_t docid_t; + + /** + * Params used to tweak the behavior of the WAND algorithm. + */ + struct MatchParams + { + WeakAndHeap &scores; + score_t scoreThreshold; + double thresholdBoostFactor; + uint32_t scoresAdjustFrequency; + docid_t docIdLimit; + MatchParams(WeakAndHeap &scores_, + score_t scoreThreshold_, + double thresholdBoostFactor_, + uint32_t scoresAdjustFrequency_) + : scores(scores_), + scoreThreshold(scoreThreshold_), + thresholdBoostFactor(thresholdBoostFactor_), + scoresAdjustFrequency(scoresAdjustFrequency_), + docIdLimit(0) + {} + MatchParams &setDocIdLimit(docid_t value) { + docIdLimit = value; + return *this; + } + }; + + /** + * Params used for rank calculation. + */ + struct RankParams + { + fef::TermFieldMatchData &rootMatchData; + fef::MatchData::UP childrenMatchData; + RankParams(fef::TermFieldMatchData &rootMatchData_, + fef::MatchData::UP &&childrenMatchData_) + : rootMatchData(rootMatchData_), + childrenMatchData(std::move(childrenMatchData_)) + {} + }; + + typedef wand::Terms Terms; + + virtual size_t get_num_terms() const = 0; + virtual int32_t get_term_weight(size_t idx) const = 0; + virtual score_t get_max_score(size_t idx) const = 0; + virtual const MatchParams &getMatchParams() const = 0; + + static SearchIterator *createArrayWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict); + static SearchIterator *createHeapWand(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict); + static SearchIterator *create(const Terms &terms, const MatchParams &matchParams, RankParams &&rankParams, bool strict); + + static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, + const MatchParams &matchParams, + const std::vector &weights, + const std::vector &dict_entries, + const IDocumentWeightAttribute &attr, + bool strict); +}; + +} // namespace queryeval +} // namespace search + + + diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp new file mode 100644 index 00000000000..c021557a0f5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.cpp @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "wand_parts.h" +#include + +namespace search { +namespace queryeval { +namespace wand { + +void +VectorizedIteratorTerms::visit_members(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "children", _terms); +} + +} // namespace wand +} // namespace queryeval +} // namespace search + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::wand::Term &obj) +{ + self.openStruct(name, "search::queryeval::wand::Term"); + visit(self, "weight", obj.weight); + visit(self, "estHits", obj.estHits); + visit(self, "search", obj.search); + self.closeStruct(); +} diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h new file mode 100644 index 00000000000..f28caa3f529 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h @@ -0,0 +1,615 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace search { +namespace queryeval { +namespace wand { + +//----------------------------------------------------------------------------- + +struct Term; +typedef std::vector Terms; +typedef int64_t score_t; +typedef uint32_t docid_t; +typedef uint16_t ref_t; + +typedef IDocumentWeightAttribute Attr; +typedef Attr::LookupResult AttrDictEntry; +typedef std::vector AttrDictEntries; + +//----------------------------------------------------------------------------- + +/** + * Wrapper used to specify underlying terms during setup + **/ +struct Term { + SearchIterator *search; + int32_t weight; + uint32_t estHits; + fef::TermFieldMatchData *matchData; + score_t maxScore = 0.0; // <- only used by rise wand test + Term(SearchIterator *s, int32_t w, uint32_t e, fef::TermFieldMatchData *tfmd) + : search(s), weight(w), estHits(e), matchData(tfmd) {} + Term() : Term(nullptr, 0, 0, nullptr){} + Term(SearchIterator *s, int32_t w, uint32_t e) : Term(s, w, e, nullptr) {} +}; + +//----------------------------------------------------------------------------- + +// input manipulation utilities + +namespace { + +struct Ident { + template T operator()(const T &t) const { return t; } +}; + +struct NumericOrder { + size_t my_size; + NumericOrder(size_t my_size_in) : my_size(my_size_in) {} + size_t size() const { return my_size; } + ref_t operator[](size_t idx) const { return idx; } +}; + +template +auto assemble(const F &f, const Order &order)->std::vector { + std::vector result; + result.reserve(order.size()); + for (size_t i = 0; i < order.size(); ++i) { + result.push_back(f(order[i])); + } + return result; +} + +int32_t get_max_weight(const SearchIterator &search) { + const MinMaxPostingInfo *minMax = dynamic_cast(search.getPostingInfo()); + return (minMax != nullptr) ? minMax->getMaxWeight() : std::numeric_limits::max(); +} + +} // namespace search::wand:: + +struct TermInput { + const Terms &terms; + TermInput(const Terms &terms_in) : terms(terms_in) {} + size_t size() const { return terms.size(); } + int32_t get_weight(ref_t ref) const { return terms[ref].weight; } + uint32_t get_est_hits(ref_t ref) const { return terms[ref].estHits; } + int32_t get_max_weight(ref_t ref) const { return ::search::queryeval::wand::get_max_weight(*(terms[ref].search)); } + docid_t get_initial_docid(ref_t ref) const { return terms[ref].search->getDocId(); } +}; + +struct AttrInput { + const std::vector &weights; + const std::vector &dict_entries; + AttrInput(const std::vector &weights_in, + const std::vector &dict_entries_in) + : weights(weights_in), dict_entries(dict_entries_in) {} + size_t size() const { return weights.size(); } + int32_t get_weight(ref_t ref) const { return weights[ref]; } + uint32_t get_est_hits(ref_t ref) const { return dict_entries[ref].posting_size; } + int32_t get_max_weight(ref_t ref) const { return dict_entries[ref].max_weight; } + docid_t get_initial_docid(ref_t) const { return SearchIterator::beginId(); } +}; + +template +struct MaxSkipOrder { + double estNumDocs; + const Input &input; + const std::vector &max_score; + MaxSkipOrder(docid_t docIdLimit, const Input &input_in, + const std::vector &max_score_in) + : estNumDocs(1.0), input(input_in), max_score(max_score_in) + { + estNumDocs = std::max(estNumDocs, docIdLimit - 1.0); + for (size_t i = 0; i < input.size(); ++i) { + estNumDocs = std::max(estNumDocs, (double)input.get_est_hits(i)); + } + } + double p_not_hit(double estHits) const { + return ((estNumDocs - estHits) / (estNumDocs)); + } + bool operator()(ref_t a, ref_t b) const { + return ((p_not_hit(input.get_est_hits(a)) * max_score[a]) > (p_not_hit(input.get_est_hits(b)) * max_score[b])); + } +}; + +//----------------------------------------------------------------------------- + +namespace { + +template +vespalib::string do_stringify(const vespalib::string &title, ITR begin, ITR end, const F &f) { + vespalib::string result = vespalib::make_string("[%s]{", title.c_str()); + for (ITR pos = begin; pos != end; ++pos) { + if (pos != begin) { + result.append(", "); + } + result.append(f(*pos)); + } + result.append("}"); + return result; +} + +} // namespace searchlib::wand:: + +//----------------------------------------------------------------------------- + +template +class VectorizedState +{ +private: + std::vector _docId; + std::vector _weight; + std::vector _maxScore; + IteratorPack _iteratorPack; + +public: + VectorizedState() : _docId(), _weight(), _maxScore(), _iteratorPack() {} + + template + std::vector init_state(const Input &input, uint32_t docIdLimit) { + std::vector order; + std::vector max_scores; + order.reserve(input.size()); + max_scores.reserve(input.size()); + for (size_t i = 0; i < input.size(); ++i) { + order.push_back(i); + max_scores.push_back(Scorer::calculate_max_score(input, i)); + } + std::sort(order.begin(), order.end(), MaxSkipOrder(docIdLimit, input, max_scores)); + _docId = assemble([&input](ref_t ref){ return input.get_initial_docid(ref); }, order); + _weight = assemble([&input](ref_t ref){ return input.get_weight(ref); }, order); + _maxScore = assemble([&max_scores](ref_t ref){ return max_scores[ref]; }, order); + return order; + } + + docid_t *docId() { return &(_docId[0]); } + const int32_t *weight() const { return &(_weight[0]); } + const score_t *maxScore() const { return &(_maxScore[0]); } + + docid_t &docId(ref_t ref) { return _docId[ref]; } + int32_t weight(ref_t ref) const { return _weight[ref]; } + score_t maxScore(ref_t ref) const { return _maxScore[ref]; } + + size_t size() const { return _docId.size(); } + IteratorPack &iteratorPack() { return _iteratorPack; } + + uint32_t seek(uint16_t ref, uint32_t docid) { return _iteratorPack.seek(ref, docid); } + int32_t get_weight(uint16_t ref, uint32_t docid) { return _iteratorPack.get_weight(ref, docid); } + + vespalib::string stringify_docid() const { + auto range = assemble(Ident(), NumericOrder(_docId.size())); + return do_stringify("state{docid}", range.begin(), range.end(), + [this](ref_t ref) + { + return vespalib::make_string("%u:%u/%u", ref, _docId[ref], _iteratorPack.get_docid(ref)); + }); + } +}; + +//----------------------------------------------------------------------------- + +class VectorizedIteratorTerms : public VectorizedState +{ +private: + Terms _terms; // TODO: want to get rid of this + +public: + template + VectorizedIteratorTerms(const Terms &t, const Scorer &, uint32_t docIdLimit, + fef::MatchData::UP childrenMatchData) + : _terms() + { + std::vector order = init_state(TermInput(t), docIdLimit); + _terms = assemble([&t](ref_t ref){ return t[ref]; }, order); + iteratorPack() = SearchIteratorPack(assemble([&t](ref_t ref){ return t[ref].search; }, order), + assemble([&t](ref_t ref){ return t[ref].matchData; }, order), + std::move(childrenMatchData)); + } + void unpack(uint16_t ref, uint32_t docid) { iteratorPack().unpack(ref, docid); } + void visit_members(vespalib::ObjectVisitor &visitor) const; + const Terms &input_terms() const { return _terms; } +}; + +//----------------------------------------------------------------------------- + +struct VectorizedAttributeTerms : VectorizedState { + template + VectorizedAttributeTerms(const std::vector &weights, + const std::vector &dict_entries, + const IDocumentWeightAttribute &attr, + const Scorer &, + docid_t docIdLimit) + { + std::vector order = init_state(AttrInput(weights, dict_entries), docIdLimit); + std::vector iterators; + iterators.reserve(order.size()); + for (size_t i = 0; i < order.size(); ++i) { + attr.create(dict_entries[order[i]].posting_idx, iterators); + docId(i) = (iterators.back().valid()) ? iterators.back().getKey() : search::endDocId; + } + iteratorPack() = AttributeIteratorPack(std::move(iterators)); + } + void visit_members(vespalib::ObjectVisitor &) const {} +}; + +//----------------------------------------------------------------------------- + +/** + * Comparator used on vectorized state to sort by increasing document + * id + **/ +struct DocIdOrder { + const docid_t *termPos; + DocIdOrder(docid_t *pos) : termPos(pos) {} + bool at_end(ref_t ref) const { return termPos[ref] == search::endDocId; } + docid_t get_pos(ref_t ref) const { return termPos[ref]; } + bool operator()(ref_t a, ref_t b) const { + return (termPos[a] < termPos[b]); + } +}; + +//----------------------------------------------------------------------------- + +template +class DualHeap +{ +private: + DocIdOrder _futureCmp; + std::vector _space; + ref_t *_future; // start of future heap + ref_t *_present; // start of present array + ref_t *_past; // start of past heap + ref_t *_trash; // end of used data + size_t _size; + +public: + DualHeap(const DocIdOrder &futureCmp, size_t size) + : _futureCmp(futureCmp), _space(), _future(nullptr), _present(nullptr), _past(nullptr), _trash(nullptr), _size(size) + { + FutureHeap::require_left_heap(); + PastHeap::require_right_heap(); + _space.reserve(size); + init(); + } + void init() { + _space.clear(); + _future = &(_space[0]); + _present = _future; + for (size_t i = 0; i < _size; ++i) { + if (!_futureCmp.at_end(i)) { + _space.push_back(i); + FutureHeap::push(_future, ++_present, _futureCmp); + } + } + _past = _present; + _trash = _past; + assert(_future == &(_space[0])); // space has not moved + } + bool has_future() const { return (_future != _present);} + bool has_present() const { return (_present != _past);} + bool has_past() const { return (_past != _trash);} + ref_t future() const { return FutureHeap::front(_future, _present); } + ref_t first_present() const { return *_present; } + ref_t last_present() const { return *(_past - 1); } + void swap_presents() { std::swap(*_present, *(_past - 1)); } + void push_future() { FutureHeap::push(_future, ++_present, _futureCmp); } + void pop_future() { FutureHeap::pop(_future, _present--, _futureCmp); } + void push_past() { PastHeap::push(--_past, _trash, std::less()); } + void pop_past() { PastHeap::pop(_past++, _trash, std::less()); } + void pop_any_past() { _past++; } + void discard_last_present() { + memmove((_past - 1), _past, + (_trash - _past) * sizeof(ref_t)); + --_past; + --_trash; + } + ref_t *present_begin() const { return _present; } + ref_t *present_end() const { return _past; } + vespalib::string stringify() const { + return "Heaps: " + + do_stringify("future", _future, _present, + [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); }) + + " " + do_stringify("present", _present, _past, + [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); }) + + " " + do_stringify("past", _past, _trash, + [this](ref_t ref){ return vespalib::make_string("%u@%u", ref, _futureCmp.get_pos(ref)); }); + } +}; + +//----------------------------------------------------------------------------- + +#define TermFrequencyScorer_TERM_SCORE_FACTOR 1000000.0 + +/** + * Scorer used with WeakAndAlgorithm that calculates a pseudo term frequency + * as max score and regular score for a term. + */ +struct TermFrequencyScorer +{ + // weight * idf, scaled to fixedpoint + static score_t calculateMaxScore(double estHits, double weight) { + return (score_t) (TermFrequencyScorer_TERM_SCORE_FACTOR * weight / (1.0 + log(1.0 + (estHits / 1000.0)))); + } + + static score_t calculateMaxScore(const Term &term) { + return calculateMaxScore(term.estHits, term.weight) + 1; + } + + template + static score_t calculate_max_score(const Input &input, ref_t ref) { + return calculateMaxScore(input.get_est_hits(ref), input.get_weight(ref)) + 1; + } +}; + +//----------------------------------------------------------------------------- + +/** + * Scorer used with WeakAndAlgorithm that calculates a real dot product upper + * bound as max score and dot product component score per term. + */ +struct DotProductScorer +{ + static score_t calculateMaxScore(const Term &term) { + int32_t maxWeight = std::numeric_limits::max(); + const PostingInfo *postingInfo = term.search->getPostingInfo(); + if (postingInfo != NULL) { + const MinMaxPostingInfo *minMax = dynamic_cast(postingInfo); + if (minMax != NULL) { + maxWeight = minMax->getMaxWeight(); + } + } + return (score_t)term.weight * maxWeight; + } + + template + static score_t calculate_max_score(const Input &input, ref_t ref) { + return input.get_weight(ref) * (score_t) input.get_max_weight(ref); + } + + static score_t calculateScore(const Term &term, docid_t docId) { + term.search->doUnpack(docId); + return (score_t)term.weight * term.matchData->getWeight(); + } + + template + static score_t calculateScore(VectorizedTerms &terms, ref_t ref, docid_t docId) { + return terms.weight(ref) * (score_t)terms.get_weight(ref, docId); + } +}; + +//----------------------------------------------------------------------------- + +// used with parallel wand where we can safely discard hits based on score +struct GreaterThan { + score_t threshold; + GreaterThan(score_t t) : threshold(t) {} + bool operator()(score_t score) const { return (score > threshold); } +}; + +// used with old-style vespa wand to ensure at least AND'ish results +struct GreaterThanEqual { + score_t threshold; + GreaterThanEqual(score_t t) : threshold(t) {} + bool operator()(score_t score) const { return (score >= threshold); } +}; + +//----------------------------------------------------------------------------- + +class Algorithm +{ +private: + docid_t _candidate; + score_t _upperBound; + score_t _maxUpperBound; + score_t _partial_score; + + template + bool step_term(VectorizedTerms &terms, ref_t ref) { + terms.docId(ref) = terms.seek(ref, _candidate); + return (terms.docId(ref) == _candidate); + } + + template + void evict_last_present(VectorizedTerms &terms, Heaps &heaps) { + _maxUpperBound -= terms.maxScore(heaps.last_present()); + if (terms.docId(heaps.last_present()) != search::endDocId) { + heaps.swap_presents(); + heaps.push_future(); + } else { + heaps.discard_last_present(); + } + } + + template + void discard_candidate(Heaps &heaps) { + while (heaps.has_present()) { + heaps.push_past(); + } + _upperBound = 0; + } + + template + void step_optimal_term(VectorizedTerms &terms, Heaps &heaps) { + heaps.pop_past(); + if (step_term(terms, heaps.last_present())) { + _upperBound += terms.maxScore(heaps.last_present()); + } else { + evict_last_present(terms, heaps); + } + } + + template + void step_candidate(VectorizedTerms &terms, Heaps &heaps) { + discard_candidate(heaps); // will reset upper bound + _candidate = terms.docId(heaps.future()); + do { + heaps.pop_future(); + _upperBound += terms.maxScore(heaps.first_present()); + } while (heaps.has_future() && terms.docId(heaps.future()) == _candidate); + _maxUpperBound += _upperBound; + } + + template + bool check_present_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) { + ref_t *end = heaps.present_end(); + for (ref_t *ref = heaps.present_begin(); ref != end; ++ref) { + score_t term_score = Scorer::calculateScore(terms, *ref, _candidate); + _partial_score += term_score; + max_score -= (terms.maxScore(*ref) - term_score); + if (!aboveThreshold(max_score)) { + return false; + } + } + return true; + } + + template + bool check_past_score(VectorizedTerms &terms, Heaps &heaps, score_t &max_score, const Scorer &, AboveThreshold &&aboveThreshold) { + while (heaps.has_past() && !aboveThreshold(_partial_score)) { + heaps.pop_past(); + if (step_term(terms, heaps.last_present())) { + score_t term_score = Scorer::calculateScore(terms, heaps.last_present(), _candidate); + _partial_score += term_score; + max_score -= (terms.maxScore(heaps.last_present()) - term_score); + } else { + max_score -= terms.maxScore(heaps.last_present()); + evict_last_present(terms, heaps); + } + if (!aboveThreshold(max_score)) { + return false; + } + } + return true; + } + + void reset() { + _candidate = SearchIterator::beginId(); + _upperBound = 0; + _maxUpperBound = 0; + _partial_score = 0; + } + +public: + Algorithm() : _candidate(SearchIterator::beginId()), _upperBound(0), _maxUpperBound(0), _partial_score(0) {} + + template + void init_range(VectorizedTerms &terms, Heaps &heaps, uint32_t begin_id, uint32_t end_id) { + reset(); + terms.iteratorPack().initRange(begin_id, end_id); + for (size_t i = 0; i < terms.size(); ++i) { + terms.docId(i) = terms.iteratorPack().get_docid(i); + } + heaps.init(); + } + + docid_t get_candidate() const { return _candidate; } + score_t get_upper_bound() const { return _upperBound; } + + template + void set_candidate(VectorizedTerms &terms, Heaps &heaps, docid_t candidate) { + _candidate = candidate; + while (heaps.has_future() && terms.docId(heaps.future()) < candidate) { + heaps.pop_future(); + _maxUpperBound += terms.maxScore(heaps.first_present()); + } + discard_candidate(heaps); // will reset upper bound + while (heaps.has_future() && terms.docId(heaps.future()) == candidate) { + heaps.pop_future(); + _upperBound += terms.maxScore(heaps.first_present()); + } + _maxUpperBound += _upperBound; + } + + template + bool solve_wand_constraint(VectorizedTerms &terms, Heaps &heaps, AboveThreshold &&aboveThreshold) { + while (!aboveThreshold(_upperBound)) { + if (aboveThreshold(_maxUpperBound)) { + step_optimal_term(terms, heaps); + } else if (heaps.has_future()) { + step_candidate(terms, heaps); + } else { + return false; + } + } + return true; + } + + template + bool check_wand_constraint(VectorizedTerms &terms, Heaps &heaps, AboveThreshold &&aboveThreshold) { + while (!aboveThreshold(_upperBound)) { + if (aboveThreshold(_maxUpperBound)) { + step_optimal_term(terms, heaps); + } else { + return false; + } + } + return true; + } + + template + bool check_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&scorer, AboveThreshold &&aboveThreshold) { + _partial_score = 0; + score_t max_score = _maxUpperBound; + if (check_present_score(terms, heaps, max_score, scorer, aboveThreshold)) { + if (check_past_score(terms, heaps, max_score, scorer, aboveThreshold)) { + return aboveThreshold(_partial_score); + } + } + return false; + } + + template + score_t get_full_score(VectorizedTerms &terms, Heaps &heaps, Scorer &&) { + score_t score = _partial_score; + while (heaps.has_past()) { + heaps.pop_any_past(); + if (step_term(terms, heaps.last_present())) { + score += Scorer::calculateScore(terms, heaps.last_present(), _candidate); + } else { + evict_last_present(terms, heaps); + } + } + return score; + } + + template + void find_matching_terms(VectorizedTerms &terms, Heaps &heaps) { + while (heaps.has_past()) { + heaps.pop_any_past(); + if (step_term(terms, heaps.last_present())) { + _upperBound += terms.maxScore(heaps.last_present()); + } else { + evict_last_present(terms, heaps); + } + } + } +}; + +//----------------------------------------------------------------------------- + +} // namespace wand +} // namespace queryeval +} // namespace search + +//----------------------------------------------------------------------------- + +void visit(vespalib::ObjectVisitor &self, const vespalib::string &name, + const search::queryeval::wand::Term &obj); + +//----------------------------------------------------------------------------- + diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp new file mode 100644 index 00000000000..e8e149da476 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.cpp @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".queryeval.weak_and_heap"); +#include "weak_and_heap.h" +#include + +namespace search { +namespace queryeval { + +SharedWeakAndPriorityQueue::SharedWeakAndPriorityQueue(uint32_t scoresToTrack) : + WeakAndHeap(scoresToTrack), + _bestScores(), + _lock() +{ + _bestScores.reserve(scoresToTrack); +} + +void +SharedWeakAndPriorityQueue::adjust(score_t *begin, score_t *end) +{ + if (getScoresToTrack() == 0) { + return; + } + vespalib::LockGuard guard(_lock); + for (score_t *itr = begin; itr != end; ++itr) { + score_t score = *itr; + if (!is_full()) { + _bestScores.push(score); + } else if (_bestScores.front() < score) { + _bestScores.push(score); + _bestScores.pop_front(); + } + } + if (is_full()) { + setMinScore(_bestScores.front()); + } +} + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h new file mode 100644 index 00000000000..7208dca2dbe --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_heap.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "wand_parts.h" +#include +#include + +namespace search { +namespace queryeval { + +/** + * An interface used to insert scores into an underlying heap (or similar data structure) + * that can be shared between multiple search iterators. + * An implementation of this interface must keep the best N scores and + * provide the threshold score (lowest score among the best N). + */ +class WeakAndHeap { +public: + typedef wand::score_t score_t; + WeakAndHeap(uint32_t scoresToTrack) : + _minScore((scoresToTrack == 0) + ? std::numeric_limits::max() + : 0), + _scoresToTrack(scoresToTrack) + { } + virtual ~WeakAndHeap() {} + /** + * Consider the given scores for insertion into the underlying structure. + * The implementation may change the given score array to speed up execution. + */ + virtual void adjust(score_t *begin, score_t *end) = 0; + + /** + * The number of scores this heap is tracking. + **/ + uint32_t getScoresToTrack() const { return _scoresToTrack; } + + score_t getMinScore() const { return _minScore; } +protected: + void setMinScore(score_t minScore) { _minScore = minScore; } +private: + score_t _minScore; + const uint32_t _scoresToTrack; +}; + +/** + * An implementation using an underlying priority queue to keep track of the N + * best hits that can be shared among multiple search iterators. + */ +class SharedWeakAndPriorityQueue : public WeakAndHeap +{ +private: + typedef vespalib::PriorityQueue Scores; + Scores _bestScores; + vespalib::Lock _lock; + + bool is_full() const { return (_bestScores.size() >= getScoresToTrack()); } + +public: + SharedWeakAndPriorityQueue(uint32_t scoresToTrack); + Scores &getScores() { return _bestScores; } + void adjust(score_t *begin, score_t *end) override; +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp new file mode 100644 index 00000000000..988be3f6ba9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.cpp @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "wand_parts.h" +#include "weak_and_search.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { +namespace wand { + +template +class WeakAndSearchLR : public WeakAndSearch +{ +private: + typedef vespalib::PriorityQueue Scores; + + VectorizedIteratorTerms _terms; + DualHeap _heaps; + Algorithm _algo; + score_t _threshold; // current score threshold + Scores _scores; // best n scores + const uint32_t _n; + + void seek_strict(uint32_t docid) { + _algo.set_candidate(_terms, _heaps, docid); + if (_algo.solve_wand_constraint(_terms, _heaps, GreaterThanEqual(_threshold))) { + setDocId(_algo.get_candidate()); + } else { + setAtEnd(); + } + } + + void seek_unstrict(uint32_t docid) { + if (docid > _algo.get_candidate()) { + _algo.set_candidate(_terms, _heaps, docid); + if (_algo.check_wand_constraint(_terms, _heaps, GreaterThanEqual(_threshold))) { + setDocId(_algo.get_candidate()); + } + } + } + +public: + WeakAndSearchLR(const Terms &terms, uint32_t n) + : _terms(terms, + TermFrequencyScorer(), + 0, + fef::MatchData::UP(nullptr)), + _heaps(DocIdOrder(_terms.docId()), _terms.size()), + _algo(), + _threshold(1), + _scores(), + _n(n) + { + } + virtual size_t get_num_terms() const override { return _terms.size(); } + virtual int32_t get_term_weight(size_t idx) const override { return _terms.weight(idx); } + virtual score_t get_max_score(size_t idx) const override { return _terms.maxScore(idx); } + const Terms &getTerms() const { return _terms.input_terms(); } + uint32_t getN() const { return _n; } + void doSeek(uint32_t docid) override { + if (IS_STRICT) { + seek_strict(docid); + } else { + seek_unstrict(docid); + } + } + void doUnpack(uint32_t docid) override { + _algo.find_matching_terms(_terms, _heaps); + _scores.push(_algo.get_upper_bound()); + if (_scores.size() > _n) { + _scores.pop_front(); + } + if (_scores.size() == _n) { + _threshold = _scores.front(); + } + ref_t *end = _heaps.present_end(); + for (ref_t *ref = _heaps.present_begin(); ref != end; ++ref) { + _terms.unpack(*ref, docid); + } + } + void initRange(uint32_t begin, uint32_t end) override { + WeakAndSearch::initRange(begin, end); + _algo.init_range(_terms, _heaps, begin, end); + if (_n == 0) { + setAtEnd(); + } + } + Trinary is_strict() const override { return IS_STRICT ? Trinary::True : Trinary::False; } +}; + +//----------------------------------------------------------------------------- + +} // namespace search::queryeval::wand + +//----------------------------------------------------------------------------- + +void +WeakAndSearch::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + visit(visitor, "n", getN()); + visit(visitor, "terms", getTerms()); +} + +//----------------------------------------------------------------------------- + +SearchIterator * +WeakAndSearch::createArrayWand(const Terms &terms, uint32_t n, bool strict) +{ + if (strict) { + return new wand::WeakAndSearchLR(terms, n); + } else { + return new wand::WeakAndSearchLR(terms, n); + } +} + +SearchIterator * +WeakAndSearch::createHeapWand(const Terms &terms, uint32_t n, bool strict) +{ + if (strict) { + return new wand::WeakAndSearchLR(terms, n); + } else { + return new wand::WeakAndSearchLR(terms, n); + } +} + +SearchIterator * +WeakAndSearch::create(const Terms &terms, uint32_t n, bool strict) +{ + if (terms.size() < 128) { + return createArrayWand(terms, n, strict); + } else { + return createHeapWand(terms, n, strict); + } +} + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h new file mode 100644 index 00000000000..814e84c2d79 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/wand/weak_and_search.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "wand_parts.h" + +namespace search { +namespace queryeval { + +struct WeakAndSearch : SearchIterator { + typedef wand::Terms Terms; + virtual size_t get_num_terms() const = 0; + virtual int32_t get_term_weight(size_t idx) const = 0; + virtual wand::score_t get_max_score(size_t idx) const = 0; + virtual const Terms &getTerms() const = 0; + virtual uint32_t getN() const = 0; + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; + static SearchIterator *createArrayWand(const Terms &terms, uint32_t n, bool strict); + static SearchIterator *createHeapWand(const Terms &terms, uint32_t n, bool strict); + static SearchIterator *create(const Terms &terms, uint32_t n, bool strict); +}; + +} // namespace queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp new file mode 100644 index 00000000000..d572fd5f48b --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.cpp @@ -0,0 +1,89 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.weighted_set_term.blueprint"); + +#include "weighted_set_term_blueprint.h" +#include "weighted_set_term_search.h" +#include +#include +#include +#include + +namespace search { +namespace queryeval { + +WeightedSetTermBlueprint::WeightedSetTermBlueprint(const FieldSpec &field) + : ComplexLeafBlueprint(field), + _estimate(), + _weights(), + _terms() +{ +} + +WeightedSetTermBlueprint::~WeightedSetTermBlueprint() +{ + while (!_terms.empty()) { + delete _terms.back(); + _terms.pop_back(); + } +} + +void +WeightedSetTermBlueprint::addTerm(Blueprint::UP term, int32_t weight) +{ + HitEstimate childEst = term->getState().estimate(); + if (! childEst.empty) { + if (_estimate.empty) { + _estimate = childEst; + } else { + _estimate.estHits += childEst.estHits; + } + setEstimate(_estimate); + } + _weights.push_back(weight); + _terms.push_back(term.get()); + term.release(); +} + +SearchIterator::UP +WeightedSetTermBlueprint::createSearch(search::fef::MatchData &md, + bool) const +{ + const State &state = getState(); + assert(state.numFields() == 1); + search::fef::TermFieldMatchData &tfmd = *state.field(0).resolve(md); + + std::vector children(_terms.size()); + for (size_t i = 0; i < _terms.size(); ++i) { + children[i] = _terms[i]->createSearch(md, true).release(); + } + return SearchIterator::UP(WeightedSetTermSearch::create(children, tfmd, _weights)); +} + +void +WeightedSetTermBlueprint::fetchPostings(bool strict) +{ + (void) strict; + for (size_t i = 0; i < _terms.size(); ++i) { + _terms[i]->fetchPostings(true); + } +} + +void +WeightedSetTermBlueprint::visitMembers(vespalib::ObjectVisitor &visitor) const +{ + LeafBlueprint::visitMembers(visitor); + visit(visitor, "_weights", _weights); + visit(visitor, "_terms", _terms); +} + +SearchIterator::UP +WeightedSetTermBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const +{ + abort(); +} + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h new file mode 100644 index 00000000000..8d7916b8f42 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_blueprint.h @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "searchable.h" +#include +#include +#include + +namespace search { +namespace fef { class TermFieldMatchData; } + +namespace queryeval { + +class WeightedSetTermBlueprint : public ComplexLeafBlueprint +{ + HitEstimate _estimate; + std::vector _weights; + std::vector _terms; + + WeightedSetTermBlueprint(const WeightedSetTermBlueprint &); // disabled + WeightedSetTermBlueprint &operator=(const WeightedSetTermBlueprint &); // disabled + +public: + WeightedSetTermBlueprint(const FieldSpec &field); + ~WeightedSetTermBlueprint(); + + // used by create visitor + // matches signature in dot product blueprint for common blueprint + // building code. Hands out its own field spec to children. NOTE: + // this is only ok since children will never be unpacked. + FieldSpec getNextChildField(const FieldSpec &outer) { return outer; } + + // used by create visitor + void addTerm(Blueprint::UP term, int32_t weight); + + SearchIterator::UP createSearch(search::fef::MatchData &md, bool strict) const override; + void visitMembers(vespalib::ObjectVisitor &visitor) const override; + +private: + SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const override; + void fetchPostings(bool strict) override; +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp new file mode 100644 index 00000000000..fe8c3273153 --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".queryeval.weighted_set_term.search"); + +#include "weighted_set_term_search.h" +#include +#include +#include +#include +#include "iterator_pack.h" + +using search::fef::TermFieldMatchData; +using vespalib::ObjectVisitor; + +namespace search { +namespace queryeval { + +template +class WeightedSetTermSearchImpl : public WeightedSetTermSearch +{ +private: + typedef uint32_t ref_t; + + struct CmpDocId { + const uint32_t *termPos; + CmpDocId(const uint32_t *tp) : termPos(tp) {} + bool operator()(const ref_t &a, const ref_t &b) const { + return (termPos[a] < termPos[b]); + } + }; + + struct CmpWeight { + const int32_t *weight; + CmpWeight(const int32_t *w) : weight(w) {} + bool operator()(const ref_t &a, const ref_t &b) const { + return (weight[a] > weight[b]); + } + }; + + fef::TermFieldMatchData &_tmd; + std::vector _weights; + std::vector _termPos; + CmpDocId _cmpDocId; + CmpWeight _cmpWeight; + std::vector _data_space; + ref_t *_data_begin; + ref_t *_data_stash; + ref_t *_data_end; + IteratorPack _children; + + void seek_child(ref_t child, uint32_t docId) { + _termPos[child] = _children.seek(child, docId); + } + +public: + WeightedSetTermSearchImpl(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + IteratorPack &&iteratorPack) + : _tmd(tmd), + _weights(weights), + _termPos(weights.size()), + _cmpDocId(&_termPos[0]), + _cmpWeight(&_weights[0]), + _data_space(), + _data_begin(nullptr), + _data_stash(nullptr), + _data_end(nullptr), + _children(std::move(iteratorPack)) + { + HEAP::require_left_heap(); + assert(_children.size() > 0); + assert(_children.size() == _weights.size()); + _data_space.reserve(_children.size()); + for (size_t i = 0; i < _children.size(); ++i) { + _data_space.push_back(i); + } + _data_begin = &_data_space[0]; + _data_end = _data_begin + _data_space.size(); + } + + void doSeek(uint32_t docId) override { + while (_data_stash < _data_end) { + seek_child(*_data_stash, docId); + HEAP::push(_data_begin, ++_data_stash, _cmpDocId); + } + while (_termPos[HEAP::front(_data_begin, _data_stash)] < docId) { + seek_child(HEAP::front(_data_begin, _data_stash), docId); + HEAP::adjust(_data_begin, _data_stash, _cmpDocId); + } + setDocId(_termPos[HEAP::front(_data_begin, _data_stash)]); + } + + void doUnpack(uint32_t docId) override { + _tmd.reset(docId); + while ((_data_begin < _data_stash) && + _termPos[HEAP::front(_data_begin, _data_stash)] == docId) + { + HEAP::pop(_data_begin, _data_stash--, _cmpDocId); + } + std::sort(_data_stash, _data_end, _cmpWeight); + for (ref_t *ptr = _data_stash; ptr < _data_end; ++ptr) { + fef::TermFieldMatchDataPosition pos; + pos.setElementWeight(_weights[*ptr]); + _tmd.appendPosition(pos); + } + } + + void initRange(uint32_t begin, uint32_t end) override { + WeightedSetTermSearch::initRange(begin, end); + _children.initRange(begin, end); + for (size_t i = 0; i < _children.size(); ++i) { + _termPos[i] = _children.get_docid(i); + } + _data_stash = _data_begin; + while (_data_stash < _data_end) { + HEAP::push(_data_begin, ++_data_stash, _cmpDocId); + } + } + Trinary is_strict() const override { return Trinary::True; } + + void visitMembers(vespalib::ObjectVisitor &) const override {} +}; + +//----------------------------------------------------------------------------- + +SearchIterator * +WeightedSetTermSearch::create(const std::vector &children, + TermFieldMatchData &tmd, + const std::vector &weights) +{ + typedef WeightedSetTermSearchImpl ArrayHeapImpl; + typedef WeightedSetTermSearchImpl HeapImpl; + + if (children.size() < 128) { + return new ArrayHeapImpl(tmd, weights, SearchIteratorPack(children)); + } + return new HeapImpl(tmd, weights, SearchIteratorPack(children)); +} + +//----------------------------------------------------------------------------- + +SearchIterator::UP +WeightedSetTermSearch::create(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + std::vector &&iterators) +{ + typedef WeightedSetTermSearchImpl ArrayHeapImpl; + typedef WeightedSetTermSearchImpl HeapImpl; + + if (iterators.size() < 128) { + return SearchIterator::UP(new ArrayHeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); + } + return SearchIterator::UP(new HeapImpl(tmd, weights, AttributeIteratorPack(std::move(iterators)))); +} + +//----------------------------------------------------------------------------- + +} // namespace search::queryeval +} // namespace search diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h new file mode 100644 index 00000000000..536d13836ff --- /dev/null +++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multisearch.h" +#include +#include +#include +#include +#include +#include + +namespace search { +namespace fef { +class TermFieldMatchData; +} // namespace fef + +namespace queryeval { + +/** + * Search iterator for a weighted set, based on a set of child search + * iterators. + */ +class WeightedSetTermSearch : public SearchIterator +{ +protected: + WeightedSetTermSearch() {} + +public: + static SearchIterator* create(const std::vector &children, + search::fef::TermFieldMatchData &tmd, + const std::vector &weights); + + static SearchIterator::UP create(search::fef::TermFieldMatchData &tmd, + const std::vector &weights, + std::vector &&iterators); +}; + +} // namespace search::queryeval +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/.gitignore b/searchlib/src/vespa/searchlib/test/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt new file mode 100644 index 00000000000..6b23f41a34a --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_test + SOURCES + statefile.cpp + statestring.cpp + initrange.cpp + document_weight_attribute_helper.cpp + $ + $ + DEPENDS + searchlib_searchlib_test_memoryindex +) diff --git a/searchlib/src/vespa/searchlib/test/OWNERS b/searchlib/src/vespa/searchlib/test/OWNERS new file mode 100644 index 00000000000..64735d11d93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/OWNERS @@ -0,0 +1 @@ +tegge diff --git a/searchlib/src/vespa/searchlib/test/diskindex/.gitignore b/searchlib/src/vespa/searchlib/test/diskindex/.gitignore new file mode 100644 index 00000000000..5dae353d999 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/.gitignore @@ -0,0 +1,2 @@ +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt new file mode 100644 index 00000000000..6f43be53471 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_test_diskindex OBJECT + SOURCES + threelevelcountbuffers.cpp + testdiskindex.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp new file mode 100644 index 00000000000..41cb1cea68a --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.cpp @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include + +namespace search { + +using index::Schema; +using index::DummyFileHeaderContext; +using index::WordDocElementWordPosFeatures; + +namespace diskindex { + +struct Builder +{ + search::diskindex::IndexBuilder _ib; + TuneFileIndexing _tuneFileIndexing; + DummyFileHeaderContext _fileHeaderContext; + + Builder(const std::string &dir, + const Schema &s, + uint32_t docIdLimit, + uint64_t numWordIds, + bool directio) + : _ib(s) + { + if (directio) { + _tuneFileIndexing._read.setWantDirectIO(); + _tuneFileIndexing._write.setWantDirectIO(); + } + _ib.setPrefix(dir); + _ib.open(docIdLimit, numWordIds, _tuneFileIndexing, + _fileHeaderContext); + } + + void + addDoc(uint32_t docId) + { + _ib.startDocument(docId); + _ib.startElement(0, 1, 1); + _ib.addOcc(WordDocElementWordPosFeatures(0)); + _ib.endElement(); + _ib.endDocument(); + } + + void + close() + { + _ib.close(); + } +}; + + +void +TestDiskIndex::buildSchema(void) +{ + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING)); + _schema.addFieldSet(Schema::FieldSet("c2"). + addField("f1"). + addField("f2")); +} + +void +TestDiskIndex::buildIndex(const std::string & dir, bool directio, + bool fieldEmpty, bool docEmpty, bool wordEmpty) +{ + Builder b(dir, _schema, docEmpty ? 1 : 32, wordEmpty ? 0 : 2, directio); + if (!wordEmpty && !fieldEmpty && !docEmpty) { + // f1 + b._ib.startField(0); + b._ib.startWord("w1"); + b.addDoc(1); + b.addDoc(3); + b._ib.endWord(); + b._ib.endField(); + // f2 + b._ib.startField(1); + b._ib.startWord("w1"); + b.addDoc(2); + b.addDoc(4); + b.addDoc(6); + b._ib.endWord(); + b._ib.startWord("w2"); + for (uint32_t docId = 1; docId < 18; ++docId) { + b.addDoc(docId); + } + b._ib.endWord(); + b._ib.endField(); + } + b.close(); +} + +void +TestDiskIndex::openIndex(const std::string &dir, bool directio, bool readmmap, + bool fieldEmpty, bool docEmpty, bool wordEmpty) +{ + buildIndex(dir, directio, fieldEmpty, docEmpty, wordEmpty); + TuneFileRandRead tuneFileRead; + if (directio) { + tuneFileRead.setWantDirectIO(); + } + if (readmmap) { + tuneFileRead.setWantMemoryMap(); + } + _index.reset(new DiskIndex(dir)); + bool ok(_index->setup(tuneFileRead)); + assert(ok); +} + +TestDiskIndex::TestDiskIndex() : + _schema(), + _index() +{ +} + +} +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h new file mode 100644 index 00000000000..d340b02c3b3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/testdiskindex.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace diskindex { + +class TestDiskIndex { +private: + void buildIndex(const std::string &dir, bool directio, + bool fieldEmpty, bool docEmpty, bool wordEmpty); +protected: + index::Schema _schema; + std::unique_ptr _index; + +public: + TestDiskIndex(); + DiskIndex & getIndex() { return *_index; } + void buildSchema(); + void openIndex(const std::string &dir, bool directio, bool readmmap, + bool fieldEmpty, bool docEmpty, bool wordEmpty); +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp new file mode 100644 index 00000000000..594035af760 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include +#include +#include +#include "threelevelcountbuffers.h" + +LOG_SETUP(".threelevelcountbuffers"); + +namespace search +{ + +namespace diskindex +{ + + +ThreeLevelCountWriteBuffers:: +ThreeLevelCountWriteBuffers(EC &sse, EC &spe, EC &pe) + : _sse(sse), + _spe(spe), + _pe(pe), + _wcsse(sse), + _wcspe(spe), + _wcpe(pe), + _ssHeaderLen(0u), + _spHeaderLen(0u), + _pHeaderLen(0u), + _ssFileBitSize(0u), + _spFileBitSize(0u), + _pFileBitSize(0u) +{ + _wcsse.allocComprBuf(); + _sse.setWriteContext(&_wcsse); + _sse.setupWrite(_wcsse); + assert(_sse.getWriteOffset() == 0); + + _wcspe.allocComprBuf(); + _spe.setWriteContext(&_wcspe); + _spe.setupWrite(_wcspe); + assert(_spe.getWriteOffset() == 0); + + _wcpe.allocComprBuf(); + _pe.setWriteContext(&_wcpe); + _pe.setupWrite(_wcpe); + assert(_pe.getWriteOffset() == 0); +} + + +void +ThreeLevelCountWriteBuffers::flush(void) +{ + _ssFileBitSize = _sse.getWriteOffset(); + _spFileBitSize = _spe.getWriteOffset(); + _pFileBitSize = _pe.getWriteOffset(); + _sse.padBits(128); + _sse.flush(); + _spe.padBits(128); + _spe.flush(); + _pe.padBits(128); + _pe.flush(); +} + + +void +ThreeLevelCountWriteBuffers::startPad(uint32_t ssHeaderLen, + uint32_t spHeaderLen, + uint32_t pHeaderLen) +{ + _sse.padBits(ssHeaderLen * 8); + _spe.padBits(spHeaderLen * 8); + _pe.padBits(pHeaderLen * 8); + _ssHeaderLen = ssHeaderLen; + _spHeaderLen = spHeaderLen; + _pHeaderLen = pHeaderLen; +} + + +ThreeLevelCountReadBuffers::ThreeLevelCountReadBuffers(DC &ssd, + DC &spd, + DC &pd, + ThreeLevelCountWriteBuffers &wb) + : _ssd(ssd), + _spd(spd), + _pd(pd), + _rcssd(ssd), + _rcspd(spd), + _rcpd(pd), + _ssHeaderLen(wb._ssHeaderLen), + _spHeaderLen(wb._spHeaderLen), + _pHeaderLen(wb._pHeaderLen), + _ssFileBitSize(wb._ssFileBitSize), + _spFileBitSize(wb._spFileBitSize), + _pFileBitSize(wb._pFileBitSize) +{ + ssd.setReadContext(&_rcssd); + spd.setReadContext(&_rcspd); + pd.setReadContext(&_rcpd); + _rcssd.referenceWriteContext(wb._wcsse); + _rcspd.referenceWriteContext(wb._wcspe); + _rcpd.referenceWriteContext(wb._wcpe); + ssd.skipBits(_ssHeaderLen * 8); + spd.skipBits(_spHeaderLen * 8); + pd.skipBits(_pHeaderLen * 8); +} + + +ThreeLevelCountReadBuffers::ThreeLevelCountReadBuffers(DC &ssd, + DC &spd, + DC &pd) + : _ssd(ssd), + _spd(spd), + _pd(pd), + _rcssd(ssd), + _rcspd(spd), + _rcpd(pd), + _ssHeaderLen(0u), + _spHeaderLen(0u), + _pHeaderLen(0u), + _ssFileBitSize(0u), + _spFileBitSize(0u), + _pFileBitSize(0u) +{ + ssd.setReadContext(&_rcssd); + spd.setReadContext(&_rcspd); + pd.setReadContext(&_rcpd); +} + + +} // namespace diskindex + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h new file mode 100644 index 00000000000..cf7f5adc791 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/diskindex/threelevelcountbuffers.h @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search +{ + +namespace diskindex +{ + +class ThreeLevelCountWriteBuffers +{ +public: + typedef search::bitcompression::PostingListCountFileEncodeContext EC; + EC &_sse; + EC &_spe; + EC &_pe; + ComprFileWriteContext _wcsse; + ComprFileWriteContext _wcspe; + ComprFileWriteContext _wcpe; + + uint32_t _ssHeaderLen; // Length of header for sparse sparse file (bytes) + uint32_t _spHeaderLen; // Length of header for sparse page file (bytes) + uint32_t _pHeaderLen; // Length of header for page file (bytes) + + uint64_t _ssFileBitSize; + uint64_t _spFileBitSize; + uint64_t _pFileBitSize; + + ThreeLevelCountWriteBuffers(EC &sse, EC &spe, EC &pe); + + void + flush(void); + + // unit test method. Just pads without writing proper header + void + startPad(uint32_t ssHeaderLen, + uint32_t spHeaderLen, + uint32_t pHeaderLen); +}; + + +class ThreeLevelCountReadBuffers +{ +public: + typedef search::bitcompression::PostingListCountFileEncodeContext EC; + typedef search::bitcompression::PostingListCountFileDecodeContext DC; + DC &_ssd; + DC &_spd; + DC &_pd; + ComprFileReadContext _rcssd; + ComprFileReadContext _rcspd; + ComprFileReadContext _rcpd; + + uint32_t _ssHeaderLen; + uint32_t _spHeaderLen; + uint32_t _pHeaderLen; + + uint64_t _ssFileBitSize; + uint64_t _spFileBitSize; + uint64_t _pFileBitSize; + + // Unit test usage constructor. + ThreeLevelCountReadBuffers(DC &ssd, + DC &spd, + DC &pd, + ThreeLevelCountWriteBuffers &wb); + + // Normal usage constructor + ThreeLevelCountReadBuffers(DC &ssd, + DC &spd, + DC &pd); +}; + + +} // namespace diskindex + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp new file mode 100644 index 00000000000..cfd06a86d0e --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.cpp @@ -0,0 +1,9 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "document_weight_attribute_helper.h" + +namespace search { +namespace test { + +} // namespace search::test +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h new file mode 100644 index 00000000000..cf63d881d93 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace search { +namespace test { + +class DocumentWeightAttributeHelper +{ +private: + AttributeVector::SP _attr; + IntegerAttribute *_int_attr; + const IDocumentWeightAttribute *_dwa; + + AttributeVector::SP make_attr() { + attribute::Config cfg(attribute::BasicType::INT64, attribute::CollectionType::WSET); + cfg.setFastSearch(true); + return AttributeFactory::createAttribute("my_attribute", cfg); + } + +public: + DocumentWeightAttributeHelper() : _attr(make_attr()), + _int_attr(dynamic_cast(_attr.get())), + _dwa(_attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(_int_attr != nullptr); + ASSERT_TRUE(_dwa != nullptr); + } + + void add_docs(size_t limit) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + _attr->addDoc(docid); + } + _attr->commit(); + ASSERT_EQUAL((limit - 1), docid); + } + + void set_doc(uint32_t docid, int64_t key, int32_t weight) { + _int_attr->clearDoc(docid); + _int_attr->append(docid, key, weight); + _int_attr->commit(); + } + + const IDocumentWeightAttribute &dwa() const { return *_dwa; } +}; + +} // namespace search::test +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/.gitignore b/searchlib/src/vespa/searchlib/test/fakedata/.gitignore new file mode 100644 index 00000000000..5dae353d999 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/.gitignore @@ -0,0 +1,2 @@ +.depend +Makefile diff --git a/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt new file mode 100644 index 00000000000..b01ad63e02f --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_fakedata OBJECT + SOURCES + fakeword.cpp + fakewordset.cpp + fakeposting.cpp + fakefilterocc.cpp + fakeegcompr64filterocc.cpp + fakememtreeocc.cpp + fakezcfilterocc.cpp + fakezcbfilterocc.cpp + fpfactory.cpp + bitencode64.cpp + bitdecode64.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp new file mode 100644 index 00000000000..cda9314366c --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.cpp @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".bitdecode64"); +#include "bitencode64.h" +#include "bitdecode64.h" + + +namespace search +{ + +namespace fakedata +{ + +template class BitDecode64; + +template class BitDecode64; + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h new file mode 100644 index 00000000000..05dfdaf0fb1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/bitdecode64.h @@ -0,0 +1,91 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "bitencode64.h" +#include +#include + +namespace search +{ + +namespace fakedata +{ + +template +class BitDecode64 : public bitcompression::DecodeContext64 +{ +private: + const uint64_t *_comprBase; + int _bitOffsetBase; + typedef bitcompression::DecodeContext64 ParentClass; + +public: + using ParentClass::_val; + using ParentClass::_valI; + using ParentClass::_preRead; + using ParentClass::_cacheInt; + typedef typename bitcompression::DecodeContext64::EC EC; + + BitDecode64(const uint64_t *compr, + int bitOffset) + : bitcompression::DecodeContext64(compr, bitOffset), + _comprBase(compr), + _bitOffsetBase(bitOffset) + { + } + + typedef bitcompression::DecodeContext64 DC; + + void + seek(uint64_t offset) + { + offset += _bitOffsetBase; + const uint64_t *compr = _comprBase + (offset / 64); + int bitOffset = offset & 63; + _valI = compr + 1; + _val = 0; + _cacheInt = EC::bswap(*compr); + _preRead = 64 - bitOffset; + uint32_t length = 64; + UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + } + + uint64_t + getOffset(void) const + { + return 64 * (_valI - _comprBase - 1) - this->_preRead - + _bitOffsetBase; + } + + uint64_t + getOffset(const uint64_t *valI, int preRead) const + { + return 64 * (valI - _comprBase - 1) - preRead - _bitOffsetBase; + } + + const uint64_t * + getComprBase(void) const + { + return _comprBase; + } + + int + getBitOffsetBase(void) const + { + return _bitOffsetBase; + } +}; + + +extern template class BitDecode64; + +extern template class BitDecode64; + +typedef BitDecode64 BitDecode64BE; + +typedef BitDecode64 BitDecode64LE; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp new file mode 100644 index 00000000000..9ceea95e01d --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.cpp @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".bitencode64"); +#include "bitencode64.h" + + +namespace search +{ + +namespace fakedata +{ + +template +BitEncode64::BitEncode64(void) + : bitcompression::EncodeContext64(), + _cbuf(*this) +{ + _cbuf.allocComprBuf(64, 1); + this->afterWrite(_cbuf, 0, 0); +} + + +template +BitEncode64::~BitEncode64(void) +{ +} + +template class BitEncode64; + +template class BitEncode64; + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h new file mode 100644 index 00000000000..893410aa86b --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/bitencode64.h @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search +{ + +namespace fakedata +{ + +template +class BitEncode64 : public bitcompression::EncodeContext64 +{ + search::ComprFileWriteContext _cbuf; + +public: + BitEncode64(void); + + ~BitEncode64(void); + + typedef bitcompression::EncodeContext64 EC; + + void + writeComprBuffer(void) + { + _cbuf.writeComprBuffer(true); + } + + void + writeComprBufferIfNeeded(void) + { + if (this->_valI >= this->_valE) + _cbuf.writeComprBuffer(false); + } + + std::pair + grabComprBuffer(void *&comprBufMalloc) + { + std::pair tres = _cbuf.grabComprBuffer(comprBufMalloc); + return std::make_pair(static_cast(tres.first), + tres.second); + } +}; + +extern template class BitEncode64; + +extern template class BitEncode64; + +typedef BitEncode64 BitEncode64BE; + +typedef BitEncode64 BitEncode64LE; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp new file mode 100644 index 00000000000..dd1190f0945 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.cpp @@ -0,0 +1,1521 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakeegcompr64filterocc"); +#include "fakeegcompr64filterocc.h" +#include +#include +#include +#include +#include "fpfactory.h" + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +#include "bitencode64.h" +#include "bitdecode64.h" + +namespace search +{ + +namespace fakedata +{ + +#define DEBUG_EGCOMPR64FILTEROCC_PRINTF 0 +#define DEBUG_EGCOMPR64FILTEROCC_ASSERT 1 + +static FPFactoryInit +init(std::make_pair("EGCompr64FilterOcc", + makeFPFactory >)); + +#define K_VALUE_FILTEROCC_RESIDUE 8 + +#define K_VALUE_FILTEROCC_FIRST_DOCID 22 + +#define K_VALUE_FILTEROCC_DELTA_DOCID 7 + +#define K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID 13 + +#define K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS 10 + +#define K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID 15 + +#define K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS 12 + +#define K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS 10 + +#define K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID 18 + +#define K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS 15 + +#define K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS 13 + +#define K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS 10 + +#define K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID 21 + +#define K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS 18 + +#define K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS 16 + +#define K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS 13 + +#define K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS 10 + +#define L1SKIPSTRIDE 16 +#define L2SKIPSTRIDE 8 +#define L3SKIPSTRIDE 8 +#define L4SKIPSTRIDE 8 + +FakeEGCompr64FilterOcc::FakeEGCompr64FilterOcc(const FakeWord &fw) + : FakePosting(fw.getName() + ".egc64filterocc"), + _compressed(std::make_pair(static_cast(NULL), 0)), + _l1SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l2SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l3SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l4SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _compressedMalloc(NULL), + _l1SkipCompressedMalloc(NULL), + _l2SkipCompressedMalloc(NULL), + _l3SkipCompressedMalloc(NULL), + _l4SkipCompressedMalloc(NULL), + _docIdLimit(0), + _hitDocs(0), + _lastDocId(0u), + _bitSize(0), + _l1SkipBitSize(0), + _l2SkipBitSize(0), + _l3SkipBitSize(0), + _l4SkipBitSize(0), + _bigEndian(true) +{ + setup(fw); +} + + +FakeEGCompr64FilterOcc::FakeEGCompr64FilterOcc(const FakeWord &fw, + bool bigEndian, + const char *nameSuffix) + : FakePosting(fw.getName() + nameSuffix), + _compressed(std::make_pair(static_cast(NULL), 0)), + _l1SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l2SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l3SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _l4SkipCompressed(std::make_pair(static_cast(NULL), 0)), + _compressedMalloc(NULL), + _l1SkipCompressedMalloc(NULL), + _l2SkipCompressedMalloc(NULL), + _l3SkipCompressedMalloc(NULL), + _l4SkipCompressedMalloc(NULL), + _docIdLimit(0), + _hitDocs(0), + _lastDocId(0u), + _bitSize(0), + _l1SkipBitSize(0), + _l2SkipBitSize(0), + _l3SkipBitSize(0), + _l4SkipBitSize(0), + _bigEndian(bigEndian) +{ + setup(fw); +} + + +void +FakeEGCompr64FilterOcc::setup(const FakeWord &fw) +{ + if (_bigEndian) + setupT(fw); + else + setupT(fw); +} + + +template +void +FakeEGCompr64FilterOcc:: +setupT(const FakeWord &fw) +{ + BitEncode64 bits; + BitEncode64 l1SkipBits; + BitEncode64 l2SkipBits; + BitEncode64 l3SkipBits; + BitEncode64 l4SkipBits; + uint32_t lastDocId = 0u; + uint32_t lastL1SkipDocId = 0u; + uint64_t lastL1SkipDocIdPos = 0; + uint32_t l1SkipCnt = 0; + uint32_t lastL2SkipDocId = 0u; + uint64_t lastL2SkipDocIdPos = 0; + uint64_t lastL2SkipL1SkipPos = 0; + unsigned int l2SkipCnt = 0; + uint32_t lastL3SkipDocId = 0u; + uint64_t lastL3SkipDocIdPos = 0; + uint64_t lastL3SkipL1SkipPos = 0; + uint64_t lastL3SkipL2SkipPos = 0; + unsigned int l3SkipCnt = 0; + uint32_t lastL4SkipDocId = 0u; + uint64_t lastL4SkipDocIdPos = 0; + uint64_t lastL4SkipL1SkipPos = 0; + uint64_t lastL4SkipL2SkipPos = 0; + uint64_t lastL4SkipL3SkipPos = 0; + unsigned int l4SkipCnt = 0; + + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + typedef FW::DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + DWPFL::const_iterator p(fw._wordPosFeatures.begin()); + DWPFL::const_iterator pe(fw._wordPosFeatures.end()); + + if (d != de) { + // Prefix support needs counts embedded in posting list + // if selector bits are dropped. + bits.encodeExpGolomb(fw._postings.size(), + K_VALUE_FILTEROCC_RESIDUE); + bits.writeComprBufferIfNeeded(); + lastL1SkipDocIdPos = bits.getWriteOffset(); + lastL2SkipDocIdPos = bits.getWriteOffset(); + lastL3SkipDocIdPos = bits.getWriteOffset(); + lastL4SkipDocIdPos = bits.getWriteOffset(); + } + while (d != de) { + if (l1SkipCnt >= L1SKIPSTRIDE) { + uint32_t docIdDelta = lastDocId - lastL1SkipDocId; + assert(static_cast(docIdDelta) > 0); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + uint64_t prevL1SkipPos = l1SkipBits.getWriteOffset(); +#endif + l1SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID); + uint64_t lastDocIdPos = bits.getWriteOffset(); + uint32_t docIdPosDelta = lastDocIdPos - lastL1SkipDocIdPos; + l1SkipBits.encodeExpGolomb(docIdPosDelta - 1, + K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS); + l1SkipBits.writeComprBufferIfNeeded(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L1Encode docId=%d (+%u), docIdPos=%d (+%u), " + "L1SkipPos=%d -> %d\n", + lastDocId, + docIdDelta, + (int) lastDocIdPos, + docIdPosDelta, + (int) prevL1SkipPos, + (int) l1SkipBits.getWriteOffset()); +#endif + lastL1SkipDocId = lastDocId; + lastL1SkipDocIdPos = lastDocIdPos; + l1SkipCnt = 0; + ++l2SkipCnt; + if (l2SkipCnt >= L2SKIPSTRIDE) { + docIdDelta = lastDocId - lastL2SkipDocId; + docIdPosDelta = lastDocIdPos - lastL2SkipDocIdPos; + uint64_t lastL1SkipPos = l1SkipBits.getWriteOffset(); + uint32_t l1SkipPosDelta = lastL1SkipPos - lastL2SkipL1SkipPos; + l2SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID); + l2SkipBits.encodeExpGolomb(docIdPosDelta - 1, + K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS); + l2SkipBits.encodeExpGolomb(l1SkipPosDelta - 1, + K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS); + l2SkipBits.writeComprBufferIfNeeded(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L2Encode docId=%d (+%u), docIdPos=%d (+%u), " + "L1SkipPos=%d (+%u)\n", + lastDocId, + docIdDelta, + (int) lastDocIdPos, + docIdPosDelta, + (int) lastL1SkipPos, + l1SkipPosDelta); +#endif + lastL2SkipDocId = lastDocId; + lastL2SkipDocIdPos = lastDocIdPos; + lastL2SkipL1SkipPos = lastL1SkipPos; + l2SkipCnt = 0; + ++l3SkipCnt; + if (l3SkipCnt >= L3SKIPSTRIDE) { + docIdDelta = lastDocId - lastL3SkipDocId; + docIdPosDelta = lastDocIdPos - lastL3SkipDocIdPos; + l1SkipPosDelta = lastL1SkipPos - lastL3SkipL1SkipPos; + uint64_t lastL2SkipPos = l2SkipBits.getWriteOffset(); + uint32_t l2SkipPosDelta = lastL2SkipPos - + lastL3SkipL2SkipPos; + l3SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID); + l3SkipBits.encodeExpGolomb(docIdPosDelta - 1, + K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS); + l3SkipBits.writeComprBufferIfNeeded(); + l3SkipBits.encodeExpGolomb(l1SkipPosDelta - 1, + K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS); + l3SkipBits.encodeExpGolomb(l2SkipPosDelta - 1, + K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS); + l3SkipBits.writeComprBufferIfNeeded(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L3Encode docId=%d (+%u), docIdPos=%d (+%u), " + "L1SkipPos=%d (+%u) L2SkipPos=%d (+%u)\n", + lastDocId, + docIdDelta, + (int) lastDocIdPos, + docIdPosDelta, + (int) lastL1SkipPos, + l1SkipPosDelta, + (int) lastL2SkipPos, + l2SkipPosDelta); +#endif + lastL3SkipDocId = lastDocId; + lastL3SkipDocIdPos = lastDocIdPos; + lastL3SkipL1SkipPos = lastL1SkipPos; + lastL3SkipL2SkipPos = lastL2SkipPos; + l3SkipCnt = 0; + ++l4SkipCnt; + if (l4SkipCnt >= L4SKIPSTRIDE) { + docIdDelta = lastDocId - lastL4SkipDocId; + docIdPosDelta = lastDocIdPos - lastL4SkipDocIdPos; + l1SkipPosDelta = lastL1SkipPos - lastL4SkipL1SkipPos; + l2SkipPosDelta = lastL2SkipPos - lastL4SkipL2SkipPos; + uint64_t lastL3SkipPos = l3SkipBits.getWriteOffset(); + uint32_t l3SkipPosDelta = lastL3SkipPos - + lastL4SkipL3SkipPos; + l4SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID); + l4SkipBits.encodeExpGolomb(docIdPosDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS); + l4SkipBits.writeComprBufferIfNeeded(); + l4SkipBits.encodeExpGolomb(l1SkipPosDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS); + l4SkipBits.encodeExpGolomb(l2SkipPosDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS); + l4SkipBits.encodeExpGolomb(l3SkipPosDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS); + l4SkipBits.writeComprBufferIfNeeded(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L4Encode docId=%d (+%u), docIdPos=%d (+%u), " + "L1SkipPos=%d (+%u) L2SkipPos=%d (+%u)" + "L3SkipPos=%d (+%u)\n", + lastDocId, + docIdDelta, + (int) lastDocIdPos, + docIdPosDelta, + (int) lastL1SkipPos, + l1SkipPosDelta, + (int) lastL2SkipPos, + l2SkipPosDelta, + (int) lastL3SkipPos, + l3SkipPosDelta); +#endif + lastL4SkipDocId = lastDocId; + lastL4SkipDocIdPos = lastDocIdPos; + lastL4SkipL1SkipPos = lastL1SkipPos; + lastL4SkipL2SkipPos = lastL2SkipPos; + lastL4SkipL3SkipPos = lastL3SkipPos; + l4SkipCnt = 0; + } + } + } + } + if (lastDocId == 0u) { + bits.encodeExpGolomb(d->_docId - 1, + K_VALUE_FILTEROCC_FIRST_DOCID); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("Encode docId=%d\n", d->_docId); +#endif + } else { + uint32_t docIdDelta = d->_docId - lastDocId; + bits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_DELTA_DOCID); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("Encode docId=%d (+%u)\n", + d->_docId, + docIdDelta); +#endif + } + bits.writeComprBufferIfNeeded(); + lastDocId = d->_docId; + ++l1SkipCnt; + ++d; + } + // Extra partial entries for skip tables to simplify iterator during search + uint32_t docIdDelta = lastDocId - lastL1SkipDocId; + assert(static_cast(docIdDelta) > 0); + l1SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID); + docIdDelta = lastDocId - lastL2SkipDocId; + assert(static_cast(docIdDelta) > 0); + l2SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID); + docIdDelta = lastDocId - lastL3SkipDocId; + assert(static_cast(docIdDelta) > 0); + l3SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID); + docIdDelta = lastDocId - lastL4SkipDocId; + assert(static_cast(docIdDelta) > 0); + l4SkipBits.encodeExpGolomb(docIdDelta - 1, + K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID); + _hitDocs = fw._postings.size(); + _bitSize = bits.getWriteOffset(); + _l1SkipBitSize = l1SkipBits.getWriteOffset(); + _l2SkipBitSize = l2SkipBits.getWriteOffset(); + _l3SkipBitSize = l3SkipBits.getWriteOffset(); + _l4SkipBitSize = l4SkipBits.getWriteOffset(); + bits.writeComprBufferIfNeeded(); + bits.writeBits(static_cast(-1), 64); + bits.writeBits(static_cast(-1), 64); + bits.writeComprBufferIfNeeded(); + bits.writeBits(static_cast(-1), 64); + bits.writeBits(static_cast(-1), 64); + bits.flush(); + bits.writeComprBuffer(); + l1SkipBits.writeComprBufferIfNeeded(); + l1SkipBits.writeBits(static_cast(-1), 64); + l1SkipBits.writeBits(static_cast(-1), 64); + l1SkipBits.writeComprBufferIfNeeded(); + l1SkipBits.writeBits(static_cast(-1), 64); + l1SkipBits.writeBits(static_cast(-1), 64); + l1SkipBits.flush(); + l1SkipBits.writeComprBuffer(); + l2SkipBits.writeComprBufferIfNeeded(); + l2SkipBits.writeBits(static_cast(-1), 64); + l2SkipBits.writeBits(static_cast(-1), 64); + l2SkipBits.writeComprBufferIfNeeded(); + l2SkipBits.writeBits(static_cast(-1), 64); + l2SkipBits.writeBits(static_cast(-1), 64); + l2SkipBits.flush(); + l2SkipBits.writeComprBuffer(); + l3SkipBits.writeComprBufferIfNeeded(); + l3SkipBits.writeBits(static_cast(-1), 64); + l3SkipBits.writeBits(static_cast(-1), 64); + l3SkipBits.writeComprBufferIfNeeded(); + l3SkipBits.writeBits(static_cast(-1), 64); + l3SkipBits.writeBits(static_cast(-1), 64); + l3SkipBits.flush(); + l3SkipBits.writeComprBuffer(); + l4SkipBits.writeComprBufferIfNeeded(); + l4SkipBits.writeBits(static_cast(-1), 64); + l4SkipBits.writeBits(static_cast(-1), 64); + l4SkipBits.writeComprBufferIfNeeded(); + l4SkipBits.writeBits(static_cast(-1), 64); + l4SkipBits.writeBits(static_cast(-1), 64); + l4SkipBits.flush(); + l4SkipBits.writeComprBuffer(); + _compressed = bits.grabComprBuffer(_compressedMalloc); + _l1SkipCompressed = l1SkipBits.grabComprBuffer(_l1SkipCompressedMalloc); + _l2SkipCompressed = l2SkipBits.grabComprBuffer(_l2SkipCompressedMalloc); + _l3SkipCompressed = l3SkipBits.grabComprBuffer(_l3SkipCompressedMalloc); + _l4SkipCompressed = l4SkipBits.grabComprBuffer(_l4SkipCompressedMalloc); + _docIdLimit = fw._docIdLimit; + _lastDocId = lastDocId; +} + + +FakeEGCompr64FilterOcc::~FakeEGCompr64FilterOcc(void) +{ + free(_compressedMalloc); + free(_l1SkipCompressedMalloc); + free(_l2SkipCompressedMalloc); + free(_l3SkipCompressedMalloc); + free(_l4SkipCompressedMalloc); +} + + +void +FakeEGCompr64FilterOcc::forceLink(void) +{ +} + + +size_t +FakeEGCompr64FilterOcc::bitSize(void) const +{ + return _bitSize; +} + + +bool +FakeEGCompr64FilterOcc::hasWordPositions(void) const +{ + return false; +} + + +size_t +FakeEGCompr64FilterOcc::skipBitSize(void) const +{ + return _l1SkipBitSize + _l2SkipBitSize + _l3SkipBitSize + _l4SkipBitSize; +} + + +size_t +FakeEGCompr64FilterOcc::l1SkipBitSize(void) const +{ + return _l1SkipBitSize; +} + + +size_t +FakeEGCompr64FilterOcc::l2SkipBitSize(void) const +{ + return _l2SkipBitSize; +} + + +size_t +FakeEGCompr64FilterOcc::l3SkipBitSize(void) const +{ + return _l3SkipBitSize; +} + + +size_t +FakeEGCompr64FilterOcc::l4SkipBitSize(void) const +{ + return _l4SkipBitSize; +} + + +int +FakeEGCompr64FilterOcc::lowLevelSinglePostingScan(void) const +{ + return 0; +} + + +int +FakeEGCompr64FilterOcc::lowLevelSinglePostingScanUnpack(void) const +{ + return 0; +} + + +int +FakeEGCompr64FilterOcc:: +lowLevelAndPairPostingScan(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +int +FakeEGCompr64FilterOcc:: +lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + +#define UC64_FILTEROCC_READ_RESIDUE(val, valI, preRead, cacheInt, \ + residue, EC) \ + do { \ + UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, \ + K_VALUE_FILTEROCC_RESIDUE, EC); \ + residue = val64; \ + } while (0) + + +#define UC64_FILTEROCC_READ_FIRST_DOC(val, valI, preRead, cacheInt, \ + docId, EC) \ + do { \ + UC64_DECODEEXPGOLOMB(val, valI, preRead, cacheInt, \ + K_VALUE_FILTEROCC_FIRST_DOCID, EC); \ + docId = val64 + 1; \ + } while (0) + + +#define UC64_FILTEROCC_READ_NEXT_DOC(val, valI, preRead, cacheInt, \ + docId, EC) \ + do { \ + UC64_DECODEEXPGOLOMB_SMALL(val, valI, preRead, cacheInt, \ + K_VALUE_FILTEROCC_DELTA_DOCID, EC); \ + docId += val64 + 1; \ + } while (0) + + +#define UC64_FILTEROCC_READ_NEXT_DOC_NS(prefix, EC) \ + do { \ + UC64_FILTEROCC_READ_NEXT_DOC(prefix ## Val, prefix ## Compr, \ + prefix ## PreRead, \ + prefix ## CacheInt, \ + prefix ## DocId, EC); \ + } while (0) + + +#define UC64_FILTEROCC_DECODECONTEXT \ + uint64_t val64; \ + unsigned int length; + + +class BitDecode64BEDocIds : public BitDecode64BE +{ +public: + BitDecode64BEDocIds(const uint64_t *compr, + int bitOffset) + : BitDecode64BE(compr, bitOffset) + { + } + + uint32_t + getDocIdDelta(void) + { + uint32_t ret; + unsigned int length; + const bool bigEndian = true; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt, + K_VALUE_FILTEROCC_DELTA_DOCID, EC, + ret = 1 +); + return ret; + } + + uint32_t + getL1SkipDocIdDelta(void) + { + uint32_t ret; + unsigned int length; + const bool bigEndian = true; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC, + ret = 1 +); + return ret; + } + + uint32_t + getL2SkipDocIdDelta(void) + { + uint32_t ret; + unsigned int length; + const bool bigEndian = true; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC, + ret = 1 +); + return ret; + } + + uint32_t + getL3SkipDocIdDelta(void) + { + uint32_t ret; + unsigned int length; + UC64BE_DECODEEXPGOLOMB_SMALL_APPLY(_val, _valI, _preRead, _cacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC, + ret = 1 +); + return ret; + } +}; + +template +class FakeFilterOccEGCompressed64ArrayIterator + : public queryeval::RankedSearchIteratorBase +{ +private: + + FakeFilterOccEGCompressed64ArrayIterator(const FakeFilterOccEGCompressed64ArrayIterator &other); + + FakeFilterOccEGCompressed64ArrayIterator& + operator=(const FakeFilterOccEGCompressed64ArrayIterator &other); + + typedef BitEncode64 EC; + typedef BitDecode64 DC; + +public: + DC _docIdBits; + uint32_t _residue; + uint32_t _lastDocId; + + FakeFilterOccEGCompressed64ArrayIterator(const uint64_t *compressedOccurrences, + int compressedBitOffset, + uint32_t residue, + uint32_t lastDocId, + const search::fef::TermFieldMatchDataArray &matchData); + + ~FakeFilterOccEGCompressed64ArrayIterator(void); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +template +FakeFilterOccEGCompressed64ArrayIterator:: +FakeFilterOccEGCompressed64ArrayIterator(const uint64_t *compressedOccurrences, + int compressedBitOffset, + uint32_t residue, + uint32_t lastDocId, + const search::fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _docIdBits(compressedOccurrences, compressedBitOffset), + _residue(residue), + _lastDocId(lastDocId) +{ + clearUnpacked(); +} + +template +void +FakeFilterOccEGCompressed64ArrayIterator:: +initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + UC64_FILTEROCC_DECODECONTEXT; + uint32_t docId = 0; + uint32_t myResidue = 0; + UC64_FILTEROCC_READ_RESIDUE(_docIdBits._val, + _docIdBits._valI, + _docIdBits._preRead, + _docIdBits._cacheInt, myResidue, EC); + assert(myResidue == _residue); + (void) myResidue; + if (_residue > 0) { + UC64_FILTEROCC_READ_FIRST_DOC(_docIdBits._val, + _docIdBits._valI, + _docIdBits._preRead, + _docIdBits._cacheInt, docId, EC); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("DecodeInit docId=%d\n", + docId); +#endif + setDocId(docId); + } else { + setAtEnd(); + } +} + + +template +FakeFilterOccEGCompressed64ArrayIterator:: +~FakeFilterOccEGCompressed64ArrayIterator(void) +{ +} + + +template +void +FakeFilterOccEGCompressed64ArrayIterator::doSeek(uint32_t docId) +{ + unsigned int length; + uint32_t oDocId = getDocId(); + UC64_DECODECONTEXT_CONSTRUCTOR(o, this->_docIdBits._); + + if (getUnpacked()) + clearUnpacked(); + while (__builtin_expect(oDocId < docId, true)) { + if (__builtin_expect(--_residue == 0, false)) + goto atbreak; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(oVal, oCompr, + oPreRead, oCacheInt, + K_VALUE_FILTEROCC_DELTA_DOCID, EC, + oDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + UC64_DECODECONTEXT_STORE(o, this->_docIdBits._); + setDocId(oDocId); + return; + atbreak: + UC64_DECODECONTEXT_STORE(o, this->_docIdBits._); + setAtEnd(); // Mark end of data + return; +} + + +template +void +FakeFilterOccEGCompressed64ArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +search::queryeval::SearchIterator * +FakeEGCompr64FilterOcc:: +createIterator(const fef::TermFieldMatchDataArray &matchData) const +{ + const uint64_t *arr = _compressed.first; + if (_bigEndian) + return new FakeFilterOccEGCompressed64ArrayIterator(arr, + 0, + _hitDocs, + _lastDocId, + matchData); + else + return new FakeFilterOccEGCompressed64ArrayIterator(arr, + 0, + _hitDocs, + _lastDocId, + matchData); +} + + +class FakeEGCompr64LEFilterOcc : public FakeEGCompr64FilterOcc +{ +public: + FakeEGCompr64LEFilterOcc(const FakeWord &fw); + + ~FakeEGCompr64LEFilterOcc(void); +}; + + +FakeEGCompr64LEFilterOcc::FakeEGCompr64LEFilterOcc(const FakeWord &fw) + : FakeEGCompr64FilterOcc(fw, false, ".egc64lefilterocc") +{ +} + + +FakeEGCompr64LEFilterOcc::~FakeEGCompr64LEFilterOcc(void) +{ +} + + +static FPFactoryInit +initLE(std::make_pair("EGCompr64LEFilterOcc", + makeFPFactory >)); + + +template +class FakeEGCompr64SkipFilterOcc : public FakeEGCompr64FilterOcc +{ +public: + FakeEGCompr64SkipFilterOcc(const FakeWord &fw); + + ~FakeEGCompr64SkipFilterOcc(void); + + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + + +static FPFactoryInit +initNoSkip(std::make_pair("EGCompr64NoSkipFilterOcc", + makeFPFactory > >)); + + +static FPFactoryInit +initSkip(std::make_pair("EGCompr64SkipFilterOcc", + makeFPFactory > >)); + + +template<> +FakeEGCompr64SkipFilterOcc::FakeEGCompr64SkipFilterOcc(const FakeWord &fw) + : FakeEGCompr64FilterOcc(fw, true, ".egc64skipfilterocc") +{ +} + + +template<> +FakeEGCompr64SkipFilterOcc::FakeEGCompr64SkipFilterOcc(const FakeWord &fw) + : FakeEGCompr64FilterOcc(fw, true, ".egc64noskipfilterocc") +{ +} + + +template +FakeEGCompr64SkipFilterOcc::~FakeEGCompr64SkipFilterOcc(void) +{ +} + + +template +class FakeFilterOccEGCompressed64SkipArrayIterator + : public queryeval::RankedSearchIteratorBase +{ +private: + + FakeFilterOccEGCompressed64SkipArrayIterator(const FakeFilterOccEGCompressed64SkipArrayIterator &other); + + FakeFilterOccEGCompressed64SkipArrayIterator& + operator=(const FakeFilterOccEGCompressed64SkipArrayIterator &other); + + typedef bitcompression::EncodeContext64BE EC; + +public: + BitDecode64BEDocIds _docIdBits; + uint32_t _lastDocId; + uint32_t _l1SkipDocId; + uint32_t _l2SkipDocId; + uint32_t _l3SkipDocId; + uint32_t _l4SkipDocId; + uint64_t _l1SkipDocIdBitsOffset; + uint64_t _l2SkipDocIdBitsOffset; + uint64_t _l2SkipL1SkipBitsOffset; + uint64_t _l3SkipDocIdBitsOffset; + uint64_t _l3SkipL1SkipBitsOffset; + uint64_t _l3SkipL2SkipBitsOffset; + uint64_t _l4SkipDocIdBitsOffset; + uint64_t _l4SkipL1SkipBitsOffset; + uint64_t _l4SkipL2SkipBitsOffset; + uint64_t _l4SkipL3SkipBitsOffset; + BitDecode64BEDocIds _l1SkipBits; + BitDecode64BEDocIds _l2SkipBits; + BitDecode64BEDocIds _l3SkipBits; + BitDecode64BE _l4SkipBits; + std::string _name; + + FakeFilterOccEGCompressed64SkipArrayIterator(const uint64_t *compressedOccurrences, + int compressedBitOffset, + uint32_t lastDocId, + const uint64_t *compressedL1SkipOccurrences, + int compressedL1SkipBitOffset, + const uint64_t *compressedL2SkipOccurrences, + int compressedL2SkipBitOffset, + const uint64_t *compressedL3SkipOccurrences, + int compressedL3SkipBitOffset, + const uint64_t *compressedL4SkipOccurrences, + int compressedL4SkipBitOffset, + const std::string &name, + const fef::TermFieldMatchDataArray &matchData); + + ~FakeFilterOccEGCompressed64SkipArrayIterator(void); + + + void doL4SkipSeek(uint32_t docid); + void doL3SkipSeek(uint32_t docid); + void doL2SkipSeek(uint32_t docid); + void doL1SkipSeek(uint32_t docId); + + void doUnpack(uint32_t docId); + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +template +FakeFilterOccEGCompressed64SkipArrayIterator:: +FakeFilterOccEGCompressed64SkipArrayIterator(const uint64_t *compressedOccurrences, + int compressedBitOffset, + uint32_t lastDocId, + const uint64_t *compressedL1SkipOccurrences, + int compressedL1SkipBitOffset, + const uint64_t *compressedL2SkipOccurrences, + int compressedL2SkipBitOffset, + const uint64_t *compressedL3SkipOccurrences, + int compressedL3SkipBitOffset, + const uint64_t *compressedL4SkipOccurrences, + int compressedL4SkipBitOffset, + const std::string &name, + const fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _docIdBits(compressedOccurrences, compressedBitOffset), + _lastDocId(lastDocId), + _l1SkipDocId(0), + _l2SkipDocId(0), + _l3SkipDocId(0), + _l4SkipDocId(0), + _l1SkipDocIdBitsOffset(0), + _l2SkipDocIdBitsOffset(0), + _l2SkipL1SkipBitsOffset(0), + _l3SkipDocIdBitsOffset(0), + _l3SkipL1SkipBitsOffset(0), + _l3SkipL2SkipBitsOffset(0), + _l4SkipDocIdBitsOffset(0), + _l4SkipL1SkipBitsOffset(0), + _l4SkipL2SkipBitsOffset(0), + _l4SkipL3SkipBitsOffset(0), + _l1SkipBits(compressedL1SkipOccurrences, compressedL1SkipBitOffset), + _l2SkipBits(compressedL2SkipOccurrences, compressedL2SkipBitOffset), + _l3SkipBits(compressedL3SkipOccurrences, compressedL3SkipBitOffset), + _l4SkipBits(compressedL4SkipOccurrences, compressedL4SkipBitOffset), + _name(name) +{ + clearUnpacked(); +} + +template +void +FakeFilterOccEGCompressed64SkipArrayIterator:: +initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + + const bool bigEndian = true; + UC64_FILTEROCC_DECODECONTEXT; + assert(_docIdBits.getOffset() == 0); + uint32_t docId = 0; + if (_lastDocId > 0) { + UC64_FILTEROCC_READ_FIRST_DOC(_docIdBits._val, + _docIdBits._valI, + _docIdBits._preRead, + _docIdBits._cacheInt, docId, EC); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("DecodeInit docId=%d\n", + docId); +#endif + UC64_DECODECONTEXT_CONSTRUCTOR(s, _l1SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC, + _l1SkipDocId = 1 +); + UC64_DECODECONTEXT_STORE(s, _l1SkipBits._); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L1DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n", + _l1SkipDocId, + (int) _l1SkipDocIdBitsOffset, + (int) _l1SkipBits.getOffset()); +#endif + UC64_DECODECONTEXT_LOAD(s, _l2SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC, + _l2SkipDocId = 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L2DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n", + _l2SkipDocId, + (int) _l2SkipDocIdBitsOffset, + (int) _l2SkipL1SkipBitsOffset); +#endif + UC64_DECODECONTEXT_STORE(s, _l2SkipBits._); + UC64_DECODECONTEXT_LOAD(s, _l3SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC, + _l3SkipDocId = 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L3DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n", + _l3SkipDocId, + (int) _l3SkipDocIdBitsOffset, + (int) _l3SkipL1SkipBitsOffset); +#endif + UC64_DECODECONTEXT_STORE(s, _l3SkipBits._); + UC64_DECODECONTEXT_LOAD(s, _l4SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC, + _l4SkipDocId = 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L4DecodeInit docId=%d, docIdPos=%d, L1SkipPos=%d\n", + _l4SkipDocId, + (int) _l4SkipDocIdBitsOffset, + (int) _l4SkipL1SkipBitsOffset); +#endif + UC64_DECODECONTEXT_STORE(s, _l4SkipBits._); + setDocId(docId); + } else { + setAtEnd(); + _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId; + } +} + + +template +FakeFilterOccEGCompressed64SkipArrayIterator:: +~FakeFilterOccEGCompressed64SkipArrayIterator(void) +{ +} + + +template<> +void +FakeFilterOccEGCompressed64SkipArrayIterator:: +doL4SkipSeek(uint32_t docId) +{ + unsigned int length; + uint32_t lastL4SkipDocId; + const bool bigEndian = true; + + if (__builtin_expect(docId > _lastDocId, false)) { + _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId; + setAtEnd(); + return; + } + + UC64_DECODECONTEXT_CONSTRUCTOR(s, _l4SkipBits._); + do { + lastL4SkipDocId = _l4SkipDocId; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS, EC, + _l4SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS, EC, + _l4SkipL1SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS, EC, + _l4SkipL2SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS, EC, + _l4SkipL3SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC, + _l4SkipDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L4Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n", + lastL4SkipDocId, + (int) _l4SkipDocIdBitsOffset, + (int) _l4SkipL1SkipBitsOffset, + _l4SkipDocId); +#endif + } while (docId > _l4SkipDocId); + UC64_DECODECONTEXT_STORE(s, _l4SkipBits._); + _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId; + _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset = _l3SkipDocIdBitsOffset = + _l4SkipDocIdBitsOffset; + _l2SkipL1SkipBitsOffset = _l3SkipL1SkipBitsOffset =_l4SkipL1SkipBitsOffset; + _l3SkipL2SkipBitsOffset =_l4SkipL2SkipBitsOffset; + _docIdBits.seek(_l4SkipDocIdBitsOffset); + _l1SkipBits.seek(_l4SkipL1SkipBitsOffset); + _l2SkipBits.seek(_l4SkipL2SkipBitsOffset); + _l3SkipBits.seek(_l4SkipL3SkipBitsOffset); + lastL4SkipDocId += _docIdBits.getDocIdDelta(); + _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta(); + _l2SkipDocId += _l2SkipBits.getL2SkipDocIdDelta(); + _l3SkipDocId += _l3SkipBits.getL3SkipDocIdDelta(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L4Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n", + lastL4SkipDocId, + (int) _l4SkipDocIdBitsOffset, + (int) _l4SkipL1SkipBitsOffset, + _l4SkipDocId); +#endif + setDocId(lastL4SkipDocId); +} + + +template<> +void +FakeFilterOccEGCompressed64SkipArrayIterator:: +doL3SkipSeek(uint32_t docId) +{ + unsigned int length; + uint32_t lastL3SkipDocId; + const bool bigEndian = true; + + if (__builtin_expect(docId > _l4SkipDocId, false)) { + doL4SkipSeek(docId); + if (docId <= _l3SkipDocId) + return; + } + + UC64_DECODECONTEXT_CONSTRUCTOR(s, _l3SkipBits._); + do { + lastL3SkipDocId = _l3SkipDocId; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS, EC, + _l3SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS, EC, + _l3SkipL1SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS, EC, + _l3SkipL2SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC, + _l3SkipDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L3Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n", + lastL3SkipDocId, + (int) _l3SkipDocIdBitsOffset, + (int) _l3SkipL1SkipBitsOffset, + _l3SkipDocId); +#endif + } while (docId > _l3SkipDocId); + UC64_DECODECONTEXT_STORE(s, _l3SkipBits._); + _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId; + _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset = _l3SkipDocIdBitsOffset; + _l2SkipL1SkipBitsOffset = _l3SkipL1SkipBitsOffset; + _docIdBits.seek(_l3SkipDocIdBitsOffset); + _l1SkipBits.seek(_l3SkipL1SkipBitsOffset); + _l2SkipBits.seek(_l3SkipL2SkipBitsOffset); + lastL3SkipDocId += _docIdBits.getDocIdDelta(); + _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta(); + _l2SkipDocId += _l2SkipBits.getL2SkipDocIdDelta(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L3Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n", + lastL3SkipDocId, + (int) _l3SkipDocIdBitsOffset, + (int) _l3SkipL1SkipBitsOffset, + _l3SkipDocId); +#endif + setDocId(lastL3SkipDocId); +} + + +template<> +void +FakeFilterOccEGCompressed64SkipArrayIterator:: +doL2SkipSeek(uint32_t docId) +{ + unsigned int length; + uint32_t lastL2SkipDocId; + const bool bigEndian = true; + + if (__builtin_expect(docId > _l3SkipDocId, false)) { + doL3SkipSeek(docId); + if (docId <= _l2SkipDocId) + return; + } + + UC64_DECODECONTEXT_CONSTRUCTOR(s, _l2SkipBits._); + do { + lastL2SkipDocId = _l2SkipDocId; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS, EC, + _l2SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS, EC, + _l2SkipL1SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC, + _l2SkipDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L2Decode docId=%d, docIdPos=%d, l1SkipPos=%d, nextDocId %d\n", + lastL2SkipDocId, + (int) _l2SkipDocIdBitsOffset, + (int) _l2SkipL1SkipBitsOffset, + _l2SkipDocId); +#endif + } while (docId > _l2SkipDocId); + UC64_DECODECONTEXT_STORE(s, _l2SkipBits._); + _l1SkipDocId = lastL2SkipDocId; + _l1SkipDocIdBitsOffset = _l2SkipDocIdBitsOffset; + _docIdBits.seek(_l2SkipDocIdBitsOffset); + _l1SkipBits.seek(_l2SkipL1SkipBitsOffset); + lastL2SkipDocId += _docIdBits.getDocIdDelta(); + _l1SkipDocId += _l1SkipBits.getL1SkipDocIdDelta(); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n", + lastL2SkipDocId, + (int) _l2SkipDocIdBitsOffset, + (int) _l2SkipL1SkipBitsOffset, + _l2SkipDocId); +#endif + setDocId(lastL2SkipDocId); +} + + +template<> +void +FakeFilterOccEGCompressed64SkipArrayIterator::doL1SkipSeek(uint32_t docId) +{ + (void) docId; +} + + +template<> +void +FakeFilterOccEGCompressed64SkipArrayIterator::doL1SkipSeek(uint32_t docId) +{ + unsigned int length; + uint32_t lastL1SkipDocId; + const bool bigEndian = true; + + if (__builtin_expect(docId > _l2SkipDocId, false)) { + doL2SkipSeek(docId); + if (docId <= _l1SkipDocId) + return; + } + UC64_DECODECONTEXT_CONSTRUCTOR(s, _l1SkipBits._); + do { + lastL1SkipDocId = _l1SkipDocId; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS, EC, + _l1SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(sVal, sCompr, sPreRead, sCacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC, + _l1SkipDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L1Decode docId=%d docIdPos=%d, L1SkipPos=%d, nextDocId %d\n", + lastL1SkipDocId, + (int) _l1SkipDocIdBitsOffset, + (int) _l1SkipBits.getOffset(sCompr, sPreRead), + _l1SkipDocId); +#endif + } while (docId > _l1SkipDocId); + UC64_DECODECONTEXT_STORE(s, _l1SkipBits._); + _docIdBits.seek(_l1SkipDocIdBitsOffset); + lastL1SkipDocId += _docIdBits.getDocIdDelta(); + setDocId(lastL1SkipDocId); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n", + lastL1SkipDocId, + (int) _l1SkipDocIdBitsOffset, + _l1SkipDocId); +#endif +} + + +template +void +FakeFilterOccEGCompressed64SkipArrayIterator::doSeek(uint32_t docId) +{ + if (getUnpacked()) + clearUnpacked(); + if (doSkip && docId > _l1SkipDocId) { + doL1SkipSeek(docId); + } + unsigned int length; + uint32_t oDocId = getDocId(); + const bool bigEndian = true; + if (doSkip) { +#if DEBUG_EGCOMPR64FILTEROCC_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(docId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(docId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(docId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); + assert(docId <= _l4SkipDocId); +#endif + } + UC64_DECODECONTEXT_CONSTRUCTOR(o, this->_docIdBits._); + while (__builtin_expect(oDocId < docId, true)) { + if (!doSkip) { + if (__builtin_expect(oDocId >= _lastDocId, false)) { +#if DEBUG_ZCFILTEROCC_ASSERT + assert(_l1SkipDocId == _lastDocId); + assert(_l2SkipDocId == _lastDocId); + assert(_l3SkipDocId == _lastDocId); + assert(_l4SkipDocId == _lastDocId); +#endif + oDocId = _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = _l4SkipDocId = search::endDocId; + break; + } + } + if (doSkip) { +#if DEBUG_EGCOMPR64FILTEROCC_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); +#endif + } else if (__builtin_expect(oDocId >= _l1SkipDocId, false)) { + // Validate L1 Skip information + assert(oDocId == _l1SkipDocId); + uint64_t docIdBitsOffset = _docIdBits.getOffset(oCompr, oPreRead); + UC64_DECODECONTEXT_CONSTRUCTOR(s1, _l1SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s1Val, s1Compr, s1PreRead, + s1CacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_BITPOS, EC, + _l1SkipDocIdBitsOffset += 1 +); + assert(docIdBitsOffset = _l1SkipDocIdBitsOffset); + if (__builtin_expect(oDocId >= _l2SkipDocId, false)) { + // Validate L2 Skip information + assert(oDocId == _l2SkipDocId); + uint64_t l1SkipBitsOffset = + _l1SkipBits.getOffset(s1Compr, s1PreRead); + UC64_DECODECONTEXT_CONSTRUCTOR(s2, _l2SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead, + s2CacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_BITPOS, EC, + _l2SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead, + s2CacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_L1SKIPBITPOS, EC, + _l2SkipL1SkipBitsOffset += 1 +); + assert(docIdBitsOffset == _l2SkipDocIdBitsOffset); + assert(l1SkipBitsOffset == _l2SkipL1SkipBitsOffset); + if (__builtin_expect(oDocId >= _l3SkipDocId, false)) { + // Validate L3 Skip information + assert(oDocId == _l3SkipDocId); + uint64_t l2SkipBitsOffset = + _l2SkipBits.getOffset(s2Compr, s2PreRead); + UC64_DECODECONTEXT_CONSTRUCTOR(s3, _l3SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr, + s3PreRead, + s3CacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_BITPOS, EC, + _l3SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr, + s3PreRead, + s3CacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_L1SKIPBITPOS, EC, + _l3SkipL1SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr, + s3PreRead, + s3CacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_L2SKIPBITPOS, EC, + _l3SkipL2SkipBitsOffset += 1 +); + assert(docIdBitsOffset == _l3SkipDocIdBitsOffset); + assert(l1SkipBitsOffset == _l3SkipL1SkipBitsOffset); + assert(l2SkipBitsOffset == _l3SkipL2SkipBitsOffset); + if (__builtin_expect(oDocId >= _l4SkipDocId, false)) { + // Validate L4 Skip information + assert(oDocId == _l4SkipDocId); + uint64_t l3SkipBitsOffset = + _l3SkipBits.getOffset(s3Compr, s3PreRead); + UC64_DECODECONTEXT_CONSTRUCTOR(s4, _l4SkipBits._); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr, + s4PreRead, + s4CacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_BITPOS, EC, + _l4SkipDocIdBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr, + s4PreRead, + s4CacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L1SKIPBITPOS, EC, + _l4SkipL1SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr, + s4PreRead, + s4CacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L2SKIPBITPOS, EC, + _l4SkipL2SkipBitsOffset += 1 +); + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr, + s4PreRead, + s4CacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_L3SKIPBITPOS, EC, + _l4SkipL3SkipBitsOffset += 1 +); + assert(docIdBitsOffset == _l4SkipDocIdBitsOffset); + (void) docIdBitsOffset; + assert(l1SkipBitsOffset == _l4SkipL1SkipBitsOffset); + (void) l1SkipBitsOffset; + assert(l2SkipBitsOffset == _l4SkipL2SkipBitsOffset); + (void) l2SkipBitsOffset; + assert(l3SkipBitsOffset == _l4SkipL3SkipBitsOffset); + (void) l3SkipBitsOffset; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s4Val, s4Compr, + s4PreRead, + s4CacheInt, + K_VALUE_FILTEROCC_L4SKIPDELTA_DOCID, EC, + _l4SkipDocId += 1 +); + UC64_DECODECONTEXT_STORE(s4, _l4SkipBits._); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L4DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n", + _l4SkipDocId, + (int) _l4SkipDocIdBitsOffset, + (int) _l4SkipL1SkipBitsOffset); +#endif + } + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s3Val, s3Compr, + s3PreRead, + s3CacheInt, + K_VALUE_FILTEROCC_L3SKIPDELTA_DOCID, EC, + _l3SkipDocId += 1 +); + UC64_DECODECONTEXT_STORE(s3, _l3SkipBits._); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L3DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n", + _l3SkipDocId, + (int) _l3SkipDocIdBitsOffset, + (int) _l3SkipL1SkipBitsOffset); +#endif + } + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s2Val, s2Compr, s2PreRead, + s2CacheInt, + K_VALUE_FILTEROCC_L2SKIPDELTA_DOCID, EC, + _l2SkipDocId += 1 +); + UC64_DECODECONTEXT_STORE(s2, _l2SkipBits._); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L2DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n", + _l2SkipDocId, + (int) _l2SkipDocIdBitsOffset, + (int) _l2SkipL1SkipBitsOffset); +#endif + } + UC64_DECODEEXPGOLOMB_SMALL_APPLY(s1Val, s1Compr, s1PreRead, + s1CacheInt, + K_VALUE_FILTEROCC_L1SKIPDELTA_DOCID, EC, + _l1SkipDocId += 1 +); + UC64_DECODECONTEXT_STORE(s1, _l1SkipBits._); + assert(docIdBitsOffset == _l1SkipDocIdBitsOffset); + BitDecode64BE + checkDocIdBits(_docIdBits.getComprBase(), + _docIdBits.getBitOffsetBase()); + checkDocIdBits.seek(_l1SkipDocIdBitsOffset); + if (checkDocIdBits._valI != oCompr || + checkDocIdBits._val != oVal || + checkDocIdBits._cacheInt != oCacheInt || + checkDocIdBits._preRead != oPreRead) { + printf("seek problem: check " + "(%p,%d) " + "%p,%" PRIu64 ",%" PRIu64 ",%u != " + "(%p,%d) " + "%p,%" PRIu64 ",%" PRIu64 ",%u for " + "offset %" PRIu64 "\n", + checkDocIdBits.getComprBase(), + checkDocIdBits.getBitOffsetBase(), + checkDocIdBits._valI, + checkDocIdBits._val, + checkDocIdBits._cacheInt, + checkDocIdBits._preRead, + _docIdBits.getComprBase(), + _docIdBits.getBitOffsetBase(), + oCompr, + oVal, + oCacheInt, + oPreRead, + _l1SkipDocIdBitsOffset); + abort(); + } +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("L1DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n", + _l1SkipDocId, + (int) _l2SkipDocIdBitsOffset, + (int) _l2SkipL1SkipBitsOffset); +#endif + } + UC64_DECODEEXPGOLOMB_SMALL_APPLY(oVal, oCompr, oPreRead, oCacheInt, + K_VALUE_FILTEROCC_DELTA_DOCID, EC, + oDocId += 1 +); +#if DEBUG_EGCOMPR64FILTEROCC_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + UC64_DECODECONTEXT_STORE(o, this->_docIdBits._); + setDocId(oDocId); + return; +} + + +template +void +FakeFilterOccEGCompressed64SkipArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +template +search::queryeval::SearchIterator * +FakeEGCompr64SkipFilterOcc:: +createIterator(const fef::TermFieldMatchDataArray &matchData) const +{ + unsigned int length; + uint64_t val64; + const uint64_t *arr = _compressed.first; + const bool bigEndian = true; + BitDecode64BE docIdBits(arr, 0); + assert(docIdBits.getCompr() == arr); + assert(docIdBits.getBitOffset() == 0); + assert(docIdBits.getOffset() == 0); + + typedef bitcompression::EncodeContext64BE EC; + + uint32_t myResidue = 0; + UC64_FILTEROCC_READ_RESIDUE(docIdBits._val, + docIdBits._valI, + docIdBits._preRead, + docIdBits._cacheInt, myResidue, EC); + assert(myResidue == _hitDocs); + (void) myResidue; + + const uint64_t *l1SkipArr = _l1SkipCompressed.first; + const uint64_t *l2SkipArr = _l2SkipCompressed.first; + const uint64_t *l3SkipArr = _l3SkipCompressed.first; + const uint64_t *l4SkipArr = _l4SkipCompressed.first; + return new FakeFilterOccEGCompressed64SkipArrayIterator(docIdBits.getCompr(), + docIdBits.getBitOffset(), + _lastDocId, + l1SkipArr, 0, + l2SkipArr, 0, + l3SkipArr, 0, + l4SkipArr, 0, + getName(), + matchData); +} + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h new file mode 100644 index 00000000000..333f029cd08 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "fakeword.h" +#include "fakeposting.h" + +namespace search +{ + +namespace fakedata +{ + +/* + * Old compressed posocc format. + */ +class FakeEGCompr64FilterOcc : public FakePosting +{ +protected: + std::pair _compressed; + std::pair _l1SkipCompressed; + std::pair _l2SkipCompressed; + std::pair _l3SkipCompressed; + std::pair _l4SkipCompressed; + void *_compressedMalloc; + void *_l1SkipCompressedMalloc; + void *_l2SkipCompressedMalloc; + void *_l3SkipCompressedMalloc; + void *_l4SkipCompressedMalloc; + unsigned int _docIdLimit; + unsigned int _hitDocs; + uint32_t _lastDocId; + size_t _bitSize; + size_t _l1SkipBitSize; + size_t _l2SkipBitSize; + size_t _l3SkipBitSize; + size_t _l4SkipBitSize; + bool _bigEndian; + +private: + void + setup(const FakeWord &fw); + + template + void + setupT(const FakeWord &fw); + +public: + FakeEGCompr64FilterOcc(const FakeWord &fw); + + FakeEGCompr64FilterOcc(const FakeWord &fw, + bool bigEndian, + const char *nameSuffix); + + ~FakeEGCompr64FilterOcc(void); + + static void + forceLink(void); + + /* + * Size of posting list, in bits. + */ + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + /* + * Size of posting skip list, in bits. + */ + size_t + skipBitSize(void) const; + + size_t + l1SkipBitSize(void) const; + + size_t + l2SkipBitSize(void) const; + + size_t + l3SkipBitSize(void) const; + + size_t + l4SkipBitSize(void) const; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp new file mode 100644 index 00000000000..749803cc6ed --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.cpp @@ -0,0 +1,206 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakefilterocc"); +#include +#include "fakefilterocc.h" +#include "fpfactory.h" + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +namespace search +{ + +namespace fakedata +{ + +static FPFactoryInit +init(std::make_pair("FilterOcc", + makeFPFactory >)); + +FakeFilterOcc::FakeFilterOcc(const FakeWord &fw) + : FakePosting(fw.getName() + ".filterocc"), + _uncompressed(), + _docIdLimit(0), + _hitDocs(0) +{ + std::vector fake; + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + + while (d != de) { + fake.push_back(d->_docId); + ++d; + } + std::swap(_uncompressed, fake); + _docIdLimit = fw._docIdLimit; + _hitDocs = fw._postings.size(); +} + + +FakeFilterOcc::~FakeFilterOcc(void) +{ +} + + +void +FakeFilterOcc::forceLink(void) +{ +} + + +size_t +FakeFilterOcc::bitSize(void) const +{ + return 32 * _uncompressed.size(); +} + + +bool +FakeFilterOcc::hasWordPositions(void) const +{ + return false; +} + + +int +FakeFilterOcc::lowLevelSinglePostingScan(void) const +{ + return 0; +} + + +int +FakeFilterOcc::lowLevelSinglePostingScanUnpack(void) const +{ + return 0; +} + + +int +FakeFilterOcc:: +lowLevelAndPairPostingScan(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +int +FakeFilterOcc:: +lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +class FakeFilterOccArrayIterator: public queryeval::RankedSearchIteratorBase +{ +private: + FakeFilterOccArrayIterator(const FakeFilterOccArrayIterator &other); + + FakeFilterOccArrayIterator& operator=(const FakeFilterOccArrayIterator &); + +public: + const uint32_t *_arr; + const uint32_t *_arrEnd; + + FakeFilterOccArrayIterator(const uint32_t *arr, + const uint32_t *arrEnd, + const fef::TermFieldMatchDataArray &matchData); + + ~FakeFilterOccArrayIterator(void); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +void +FakeFilterOccArrayIterator::doSeek(uint32_t docId) +{ + const uint32_t *oarr = _arr; + const uint32_t *oarrEnd = _arrEnd; + + if (getUnpacked()) + clearUnpacked(); + if (oarr >= oarrEnd) + goto doneuncompressed; + for (;;) { + if ((int) *oarr >= (int) docId) + goto found; + if (++oarr >= oarrEnd) + goto doneuncompressed; + } + found: + _arr = oarr; + setDocId(*oarr); + return; // Still data + doneuncompressed: + _arr = oarr; + setAtEnd(); // Mark end of data + return; // Ran off end +} + + +FakeFilterOccArrayIterator:: +FakeFilterOccArrayIterator(const uint32_t *arr, + const uint32_t *arrEnd, + const fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _arr(arr), + _arrEnd(arrEnd) +{ + clearUnpacked(); +} + +void +FakeFilterOccArrayIterator::initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + if (_arr < _arrEnd) { + setDocId(*_arr); + } else { + setAtEnd(); + } +} + + +FakeFilterOccArrayIterator::~FakeFilterOccArrayIterator(void) +{ +} + + +void +FakeFilterOccArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +search::queryeval::SearchIterator * +FakeFilterOcc:: +createIterator(const fef::TermFieldMatchDataArray &matchData) const +{ + return new FakeFilterOccArrayIterator(&*_uncompressed.begin(), + &*_uncompressed.end(), + matchData); +} + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h new file mode 100644 index 00000000000..b0d18b94eac --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakefilterocc.h @@ -0,0 +1,74 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "fakeword.h" +#include "fakeposting.h" + +namespace search +{ + +namespace fakedata +{ + +/* + * Old posocc format. + */ +class FakeFilterOcc : public FakePosting +{ +private: + std::vector _uncompressed; + unsigned int _docIdLimit; + unsigned int _hitDocs; +public: + FakeFilterOcc(const FakeWord &fakeword); + + ~FakeFilterOcc(void); + + static void + forceLink(void); + + /* + * Size of posting list, in bits. + */ + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp new file mode 100644 index 00000000000..01bd2551989 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.cpp @@ -0,0 +1,430 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakememtreeocc"); +#include +#include "fakememtreeocc.h" +#include +#include +#include +#include +#include +#include +#include "fpfactory.h" +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +namespace search +{ + +namespace fakedata +{ + +static FPFactoryInit +init(std::make_pair("MemTreeOcc", + makeFPFactory)); + +static FPFactoryInit +init2(std::make_pair("MemTreeOcc2", + makeFPFactory)); + +FakeMemTreeOcc::FakeMemTreeOcc(const FakeWord &fw, + NodeAllocator &allocator, + Tree &tree, + uint64_t featureBitSize, + const FakeMemTreeOccMgr &mgr) + : FakePosting(fw.getName() + ".memtreeocc"), + _allocator(allocator), + _tree(tree), + _fieldsParams(fw.getFieldsParams()), + _packedIndex(fw.getPackedIndex()), + _featureBitSize(featureBitSize), + _mgr(mgr), + _docIdLimit(0), + _hitDocs(0) +{ + _docIdLimit = fw._docIdLimit; + _hitDocs = fw._postings.size(); +} + + +FakeMemTreeOcc::FakeMemTreeOcc(const FakeWord &fw, + NodeAllocator &allocator, + Tree &tree, + uint64_t featureBitSize, + const FakeMemTreeOccMgr &mgr, + const char *suffix) + : FakePosting(fw.getName() + suffix), + _allocator(allocator), + _tree(tree), + _fieldsParams(fw.getFieldsParams()), + _packedIndex(fw.getPackedIndex()), + _featureBitSize(featureBitSize), + _mgr(mgr), + _docIdLimit(0), + _hitDocs(0) +{ + _docIdLimit = fw._docIdLimit; + _hitDocs = fw._postings.size(); +} + + +FakeMemTreeOcc::~FakeMemTreeOcc(void) +{ +} + + +void +FakeMemTreeOcc::forceLink(void) +{ +} + + +size_t +FakeMemTreeOcc::bitSize(void) const +{ + return _tree.bitSize(_allocator) + _featureBitSize; +} + + +bool +FakeMemTreeOcc::hasWordPositions(void) const +{ + return true; +} + + +int +FakeMemTreeOcc::lowLevelSinglePostingScan(void) const +{ + return 0; +} + + +int +FakeMemTreeOcc::lowLevelSinglePostingScanUnpack(void) const +{ + return 0; +} + + +int +FakeMemTreeOcc:: +lowLevelAndPairPostingScan(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +int +FakeMemTreeOcc:: +lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +search::queryeval::SearchIterator * +FakeMemTreeOcc:: +createIterator(const fef::TermFieldMatchDataArray &matchData) const +{ + return new search::memoryindex::PostingIterator(_tree.begin(_allocator), + _mgr._featureStore, + _packedIndex, + matchData); +} + + +FakeMemTreeOccMgr::FakeMemTreeOccMgr(const Schema &schema) + : _generationHandler(), + _allocator(), + _fw2WordIdx(), + _postingIdxs(), + _fakeWords(), + _featureSizes(), + _featureStore(schema) +{ +} + + +FakeMemTreeOccMgr::~FakeMemTreeOccMgr(void) +{ + std::vector >::iterator + it(_postingIdxs.begin()); + std::vector >::iterator + ite(_postingIdxs.end()); + + for (; it != ite; ++it) { + (*it)->clear(); + } + sync(); +} + + +void +FakeMemTreeOccMgr::freeze(void) +{ + _allocator.freeze(); +} + + +void +FakeMemTreeOccMgr::transferHoldLists(void) +{ + _allocator.transferHoldLists(_generationHandler.getCurrentGeneration()); +} + +void +FakeMemTreeOccMgr::incGeneration(void) +{ + _generationHandler.incGeneration(); +} + + +void +FakeMemTreeOccMgr::trimHoldLists(void) +{ + _allocator.trimHoldLists(_generationHandler.getFirstUsedGeneration()); +} + + +void +FakeMemTreeOccMgr::sync(void) +{ + freeze(); + transferHoldLists(); + incGeneration(); + trimHoldLists(); +} + + +void +FakeMemTreeOccMgr::add(uint32_t wordIdx, index::DocIdAndFeatures &features) +{ + typedef FeatureStore::RefType RefType; + + const FakeWord *fw = _fakeWords[wordIdx]; + + std::pair r = + _featureStore.addFeatures(fw->getPackedIndex(), features); + + _featureSizes[wordIdx] += RefType::align((r.second + 7) / 8) * 8; + + _unflushed.push_back(PendingOp(wordIdx, features._docId, r.first)); + + if (_unflushed.size() >= 10000) + flush(); +} + + +void +FakeMemTreeOccMgr::remove(uint32_t wordIdx, uint32_t docId) +{ + _unflushed.push_back(PendingOp(wordIdx, docId)); + + if (_unflushed.size() >= 10000) + flush(); +} + + +void +FakeMemTreeOccMgr::sortUnflushed(void) +{ + typedef std::vector::iterator I; + uint32_t seq = 0; + for (I i(_unflushed.begin()), ie(_unflushed.end()); i != ie; ++i) { + i->setSeq(++seq); + } + std::sort(_unflushed.begin(), _unflushed.end()); +} + + +void +FakeMemTreeOccMgr::flush(void) +{ + typedef FeatureStore::RefType RefType; + typedef std::vector::iterator I; + + if (_unflushed.empty()) + return; + + uint32_t lastWord = std::numeric_limits::max(); + sortUnflushed(); + for (I i(_unflushed.begin()), ie(_unflushed.end()); i != ie; ++i) { + uint32_t wordIdx = i->getWordIdx(); + uint32_t docId = i->getDocId(); + PostingIdx &pidx(*_postingIdxs[wordIdx].get()); + Tree &tree = pidx._tree; + Tree::Iterator &itr = pidx._iterator; + const FakeWord *fw = _fakeWords[wordIdx]; + if (wordIdx != lastWord) + itr.lower_bound(docId); + else if (itr.valid() && itr.getKey() < docId) { + itr.linearSeek(docId); + } + lastWord = wordIdx; + if (i->getRemove()) { + if (itr.valid() && itr.getKey() == docId) { + uint64_t bits = _featureStore.bitSize(fw->getPackedIndex(), + itr.getData()); + _featureSizes[wordIdx] -= RefType::align((bits + 7) / 8) * 8; + tree.remove(itr); + } + } else { + if (!itr.valid() || docId < itr.getKey()) { + tree.insert(itr, docId, i->getFeatureRef().ref()); + } + } + } + _unflushed.clear(); + sync(); +} + +void +FakeMemTreeOccMgr::compactTrees(void) +{ + // compact full trees by calling incremental compaction methods in a loop + + std::vector toHold = _allocator.startCompact(); + for (uint32_t wordIdx = 0; wordIdx < _postingIdxs.size(); ++wordIdx) { + PostingIdx &pidx(*_postingIdxs[wordIdx].get()); + Tree &tree = pidx._tree; + Tree::Iterator &itr = pidx._iterator; + itr.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot()), _allocator); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + } + _allocator.finishCompact(toHold); + sync(); +} + +void +FakeMemTreeOccMgr::finalize(void) +{ + flush(); +} + + +FakeMemTreeOccFactory::FakeMemTreeOccFactory(const Schema &schema) + : _mgr(schema) +{ +} + + +FakeMemTreeOccFactory::~FakeMemTreeOccFactory(void) +{ +} + + +FakePosting::SP +FakeMemTreeOccFactory::make(const FakeWord &fw) +{ + std::map::const_iterator + i(_mgr._fw2WordIdx.find(&fw)); + + if (i == _mgr._fw2WordIdx.end()) + abort(); + + uint32_t wordIdx = i->second; + + assert(_mgr._postingIdxs.size() > wordIdx); + + return FakePosting::SP(new FakeMemTreeOcc(fw, _mgr._allocator, + _mgr._postingIdxs[wordIdx]->_tree, + _mgr._featureSizes[wordIdx], + _mgr)); +} + + +void +FakeMemTreeOccFactory::setup(const std::vector &fws) +{ + typedef FakeMemTreeOccMgr::PostingIdx PostingIdx; + std::vector r; + uint32_t wordIdx = 0; + std::vector::const_iterator fwi(fws.begin()); + std::vector::const_iterator fwe(fws.end()); + while (fwi != fwe) { + _mgr._fakeWords.push_back(*fwi); + _mgr._featureSizes.push_back(0); + _mgr._fw2WordIdx[*fwi] = wordIdx; + _mgr._postingIdxs.push_back( + std::shared_ptr + (new PostingIdx(_mgr._allocator))); + r.push_back(FakeWord::RandomizedReader()); + r.back().setup(*fwi, wordIdx); + ++fwi; + ++wordIdx; + } + + PostingPriorityQueue heap; + std::vector::iterator i(r.begin()); + std::vector::iterator ie(r.end()); + while (i != ie) { + i->read(); + if (i->isValid()) + heap.initialAdd(&*i); +#if 0 + heap.merge(_mgr, 4); +#endif + ++i; + } + heap.merge(_mgr, 4); + assert(heap.empty()); + _mgr.finalize(); +} + + +FakeMemTreeOcc2Factory::FakeMemTreeOcc2Factory(const Schema &schema) + : FakeMemTreeOccFactory(schema) +{ +} + + +FakeMemTreeOcc2Factory::~FakeMemTreeOcc2Factory(void) +{ +} + + +FakePosting::SP +FakeMemTreeOcc2Factory::make(const FakeWord &fw) +{ + std::map::const_iterator + i(_mgr._fw2WordIdx.find(&fw)); + + if (i == _mgr._fw2WordIdx.end()) + abort(); + + uint32_t wordIdx = i->second; + + assert(_mgr._postingIdxs.size() > wordIdx); + + return FakePosting::SP(new FakeMemTreeOcc(fw, _mgr._allocator, + _mgr._postingIdxs[wordIdx]->_tree, + _mgr._featureSizes[wordIdx], + _mgr, + ".memtreeocc2")); +} + + +void +FakeMemTreeOcc2Factory::setup(const std::vector &fws) +{ + FakeMemTreeOccFactory::setup(fws); + LOG(info, "start compacting trees"); + _mgr.compactTrees(); + LOG(info, "done compacting trees"); +} + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h new file mode 100644 index 00000000000..111f3b6ba54 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakememtreeocc.h @@ -0,0 +1,287 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "fakeword.h" +#include "fakeposting.h" +#include "fpfactory.h" +#include +#include +#include +#include + +namespace search +{ + +namespace fakedata +{ + +class FakeMemTreeOccMgr : public FakeWord::RandomizedWriter +{ +public: + typedef memoryindex::Dictionary::PostingList Tree; + typedef Tree::NodeAllocatorType NodeAllocator; + typedef memoryindex::FeatureStore FeatureStore; + typedef btree::EntryRef EntryRef; + typedef index::Schema Schema; + typedef bitcompression::PosOccFieldsParams PosOccFieldsParams; + + vespalib::GenerationHandler _generationHandler; + NodeAllocator _allocator; + + std::map _fw2WordIdx; + class PostingIdx + { + public: + Tree _tree; + Tree::Iterator _iterator; + + PostingIdx(NodeAllocator &allocator) + : _tree(), + _iterator(_tree.getRoot(), allocator) + { + } + + void + clear(void) + { + _tree.clear(_iterator.getAllocator()); + _iterator = _tree.begin(_iterator.getAllocator()); + } + }; + + class PendingOp + { + uint32_t _wordIdx; + uint32_t _docId; + EntryRef _features; + bool _removal; + uint32_t _seq; + + public: + PendingOp(uint32_t wordIdx, uint32_t docId) + : _wordIdx(wordIdx), + _docId(docId), + _features(), + _removal(true), + _seq(0) + { + } + + PendingOp(uint32_t wordIdx, uint32_t docId, EntryRef features) + : _wordIdx(wordIdx), + _docId(docId), + _features(features), + _removal(false), + _seq(0) + { + } + + void + setSeq(uint32_t seq) + { + _seq = seq; + } + + uint32_t + getWordIdx(void) const + { + return _wordIdx; + } + + uint32_t + getDocId(void) const + { + return _docId; + } + + EntryRef + getFeatureRef(void) const + { + return _features; + } + + bool + getRemove(void) const + { + return _removal; + } + + bool + operator<(const PendingOp &rhs) const + { + if (_wordIdx != rhs._wordIdx) + return _wordIdx < rhs._wordIdx; + if (_docId != rhs._docId) + return _docId < rhs._docId; + return _seq < rhs._seq; + } + }; + + std::vector > _postingIdxs; + std::vector _fakeWords; + std::vector _featureSizes; + std::vector _unflushed; + + FeatureStore _featureStore; + + FakeMemTreeOccMgr(const Schema &schema); + + virtual + ~FakeMemTreeOccMgr(void); + + void + freeze(void); + + void + transferHoldLists(void); + + void + incGeneration(void); + + void + trimHoldLists(void); + + void + sync(void); + + virtual void + add(uint32_t wordIdx, index::DocIdAndFeatures &features); + + virtual void + remove(uint32_t wordIdx, uint32_t docId); + + void + sortUnflushed(void); + + void + flush(void); + + void + compactTrees(void); + + void + finalize(void); +}; + + +class FakeMemTreeOccFactory : public FPFactory +{ +public: + typedef FakeMemTreeOccMgr::Tree Tree; + typedef FakeMemTreeOccMgr::NodeAllocator NodeAllocator; + typedef index::Schema Schema; + + FakeMemTreeOccMgr _mgr; + + FakeMemTreeOccFactory(const Schema &schema); + + virtual + ~FakeMemTreeOccFactory(void); + + virtual FakePosting::SP + make(const FakeWord &fw); + + virtual void + setup(const std::vector &fws); +}; + +class FakeMemTreeOcc2Factory : public FakeMemTreeOccFactory +{ +public: + FakeMemTreeOcc2Factory(const Schema &schema); + + virtual + ~FakeMemTreeOcc2Factory(void); + + virtual FakePosting::SP + make(const FakeWord &fw); + + virtual void + setup(const std::vector &fws); +}; + + +/* + * Updateable memory tree format. + */ +class FakeMemTreeOcc : public FakePosting +{ +public: + typedef FakeMemTreeOccMgr::Tree Tree; + typedef FakeMemTreeOccMgr::NodeAllocator NodeAllocator; + typedef FakeMemTreeOccMgr::PosOccFieldsParams PosOccFieldsParams; + + +private: + NodeAllocator &_allocator; + Tree &_tree; + const PosOccFieldsParams &_fieldsParams; + uint32_t _packedIndex; + uint64_t _featureBitSize; + const FakeMemTreeOccMgr &_mgr; + unsigned int _docIdLimit; + unsigned int _hitDocs; +public: + FakeMemTreeOcc(const FakeWord &fakeword, + NodeAllocator &allocator, + Tree &tree, + uint64_t featureBitSize, + const FakeMemTreeOccMgr &mgr); + + FakeMemTreeOcc(const FakeWord &fakeword, + NodeAllocator &allocator, + Tree &tree, + uint64_t featureBitSize, + const FakeMemTreeOccMgr &mgr, + const char *suffix); + + ~FakeMemTreeOcc(void); + + static void + forceLink(void); + + /* + * Size of posting list, in bits. + */ + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp new file mode 100644 index 00000000000..8d0915d4966 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakeposting"); +#include "fakeposting.h" + +namespace search +{ + +namespace fakedata +{ + +FakePosting::FakePosting(const std::string &name) + : _name(name) +{ +} + + +FakePosting::~FakePosting(void) +{ +} + + +size_t +FakePosting::skipBitSize(void) const +{ + return l1SkipBitSize() + l2SkipBitSize() + l3SkipBitSize() + + l4SkipBitSize(); +} + +size_t +FakePosting::l1SkipBitSize(void) const +{ + return 0; +} + + +size_t +FakePosting::l2SkipBitSize(void) const +{ + return 0; +} + + +size_t +FakePosting::l3SkipBitSize(void) const +{ + return 0; +} + + +size_t +FakePosting::l4SkipBitSize(void) const +{ + return 0; +} + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h new file mode 100644 index 00000000000..946d1e05379 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h @@ -0,0 +1,105 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +#include +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +#include +#include +#include + +namespace search +{ + +namespace fakedata +{ + +/* + * Base class for faked posting list formats. + */ +class FakePosting +{ +private: + FakePosting(const FakePosting &); + + FakePosting & + operator=(const FakePosting &); + + std::string _name; +public: + typedef std::shared_ptr SP; + + FakePosting(const std::string &name); + + virtual ~FakePosting(void); + + /* + * Size of posting list, in bits. + */ + virtual size_t + bitSize(void) const = 0; + + virtual size_t + skipBitSize(void) const; + + virtual size_t + l1SkipBitSize(void) const; + + virtual size_t + l2SkipBitSize(void) const; + + virtual size_t + l3SkipBitSize(void) const; + + virtual size_t + l4SkipBitSize(void) const; + + virtual bool + hasWordPositions(void) const = 0; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const = 0; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const = 0; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const = 0; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const = 0; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const = 0; + + const std::string &getName(void) const + { + return _name; + } +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp new file mode 100644 index 00000000000..5ad3140b5b2 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp @@ -0,0 +1,796 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakeword"); +#include "fakeword.h" + +#include +#include +#include +#include +#include + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; +using search::index::WordDocElementFeatures; +using search::index::WordDocElementWordPosFeatures; +using search::index::PostingListFileSeqWrite; +using search::index::DocIdAndFeatures; +using search::index::DocIdAndPosOccFeatures; +using search::index::PostingListCounts; +using search::index::PostingListFileSeqRead; +using search::diskindex::FieldReader; +using search::diskindex::FieldWriter; + +namespace search +{ + +namespace fakedata +{ + + +static void +fillbitset(search::BitVector *bitvector, + unsigned int size, + search::Rand48 &rnd) +{ + unsigned int range; + unsigned int idx; + unsigned int j; + + range = bitvector->size(); + assert(range > 0); + --range; + bitvector->invalidateCachedCount(); + + assert(size <= range); + if (size > range / 2) { + if (range > 0) + bitvector->setInterval(1, range); + + for (j = range; j > size; --j) { + do { + idx = (rnd.lrand48() % range) + 1u; + } while (!bitvector->testBit(idx)); + bitvector->clearBit(idx); + } + } else { + // bitvector->reset(); + bitvector->invalidateCachedCount(); + for (j = bitvector->countTrueBits(); j < size; j++) { + do { + idx = (rnd.lrand48() % range) + 1u; + } while (bitvector->testBit(idx)); + bitvector->setBit(idx); + } + } +} + + +static void +fillcorrelatedbitset(search::BitVector &bitvector, + unsigned int size, + const FakeWord &otherword, + search::Rand48 &rnd) +{ + const FakeWord::DocWordFeatureList &opostings = otherword._postings; + + unsigned int range = opostings.size(); + search::BitVector::UP corrmap(search::BitVector::create(range + 1)); + + if (size > range) + size = range; + fillbitset(corrmap.get(), size, rnd); + + unsigned int idx = corrmap->getNextTrueBit(1u); + while (idx < range) { + unsigned int docId = opostings[idx - 1]._docId; + bitvector.setBit(docId); + ++idx; + if (idx > range) + break; + idx = corrmap->getNextTrueBit(idx); + } +} + + +FakeWord::DocWordPosFeature::DocWordPosFeature(void) + : _elementId(0), + _wordPos(0), + _elementWeight(1), + _elementLen(0) +{ +} + + +FakeWord::DocWordPosFeature::~DocWordPosFeature(void) +{ +} + + +FakeWord::DocWordCollapsedFeature::DocWordCollapsedFeature(void) +{ +} + + +FakeWord::DocWordCollapsedFeature::~DocWordCollapsedFeature(void) +{ +} + + +FakeWord::DocWordFeature::DocWordFeature(void) + : _docId(0), + _collapsedDocWordFeatures(), + _positions(0), + _accPositions(0) +{ +} + +FakeWord::DocWordFeature::~DocWordFeature(void) +{ +} + +FakeWord::FakeWord(uint32_t docIdLimit, + const std::vector & docIds, + const std::string &name, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex) + : _postings(), + _wordPosFeatures(), + _extraPostings(), + _extraWordPosFeatures(), + _docIdLimit(docIdLimit), + _name(name), + _fieldsParams(fieldsParams), + _packedIndex(packedIndex) +{ + search::BitVector::UP bitmap(search::BitVector::create(docIdLimit)); + for (uint32_t docId : docIds) { + bitmap->setBit(docId); + } + search::Rand48 rnd; + fakeup(*bitmap, rnd, _postings, _wordPosFeatures); +} + +FakeWord::FakeWord(uint32_t docIdLimit, + uint32_t wordDocs, + uint32_t tempWordDocs, + const std::string &name, + search::Rand48 &rnd, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex) + : _postings(), + _wordPosFeatures(), + _extraPostings(), + _extraWordPosFeatures(), + _docIdLimit(docIdLimit), + _name(name), + _fieldsParams(fieldsParams), + _packedIndex(packedIndex) +{ + search::BitVector::UP bitmap(search::BitVector::create(docIdLimit)); + + fillbitset(bitmap.get(), wordDocs, rnd); + + fakeup(*bitmap, rnd, _postings, _wordPosFeatures); + fakeupTemps(rnd, docIdLimit, tempWordDocs); + setupRandomizer(rnd); +} + + +FakeWord::FakeWord(uint32_t docIdLimit, + uint32_t wordDocs, + uint32_t tempWordDocs, + const std::string &name, + const FakeWord &otherWord, + size_t overlapDocs, + search::Rand48 &rnd, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex) + : _postings(), + _wordPosFeatures(), + _docIdLimit(docIdLimit), + _name(name), + _fieldsParams(fieldsParams), + _packedIndex(packedIndex) +{ + search::BitVector::UP bitmap(search::BitVector::create(docIdLimit)); + + if (wordDocs * 2 < docIdLimit && + overlapDocs > 0) + fillcorrelatedbitset(*bitmap, overlapDocs, otherWord, rnd); + fillbitset(bitmap.get(), wordDocs, rnd); + + fakeup(*bitmap, rnd, _postings, _wordPosFeatures); + fakeupTemps(rnd, docIdLimit, tempWordDocs); + setupRandomizer(rnd); +} + + +FakeWord::~FakeWord(void) +{ +} + + +void +FakeWord::fakeup(search::BitVector &bitmap, + search::Rand48 &rnd, + DocWordFeatureList &postings, + DocWordPosFeatureList &wordPosFeatures) +{ + DocWordPosFeatureList wpf; + unsigned int idx; + uint32_t numFields = _fieldsParams.getNumFields(); + assert(numFields == 1u); + (void) numFields; + uint32_t docIdLimit = bitmap.size(); + idx = bitmap.getNextTrueBit(1u); + while (idx < docIdLimit) { + DocWordFeature dwf; + unsigned int positions; + + dwf._docId = idx; + positions = ((rnd.lrand48() % 10) == 0) ? 2 : 1; + dwf._positions = positions; + wpf.clear(); + for (unsigned int j = 0; j < positions; ++j) { + DocWordPosFeature dwpf; + dwpf._wordPos = rnd.lrand48() % 8192; + dwpf._elementId = 0; + if (_fieldsParams.getFieldParams()[0]._hasElements) + dwpf._elementId = rnd.lrand48() % 4; + wpf.push_back(dwpf); + } + if (positions > 1) { + /* Sort wordpos list and "avoid" duplicate positions */ + std::sort(wpf.begin(), wpf.end()); + } + do { + DocWordPosFeatureList::iterator ie(wpf.end()); + DocWordPosFeatureList::iterator i(wpf.begin()); + while (i != ie) { + uint32_t lastwordpos = i->_wordPos; + DocWordPosFeatureList::iterator pi(i); + ++i; + while (i != ie && + pi->_elementId == i->_elementId) { + if (i->_wordPos <= lastwordpos) + i->_wordPos = lastwordpos + 1; + lastwordpos = i->_wordPos; + ++i; + } + uint32_t elementLen = (rnd.lrand48() % 8192) + 1 + lastwordpos; + int32_t elementWeight = 1; + if (_fieldsParams.getFieldParams()[0]. + _hasElementWeights) { + uint32_t uWeight = rnd.lrand48() % 2001; + if ((uWeight & 1) != 0) + elementWeight = - (uWeight >> 1) - 1; + else + elementWeight = (uWeight >> 1); + assert(elementWeight <= 1000); + assert(elementWeight >= -1000); + } + while (pi != i) { + pi->_elementLen = elementLen; + pi->_elementWeight = elementWeight; + ++pi; + } + } + } while (0); + dwf._accPositions = wordPosFeatures.size(); + assert(dwf._positions == wpf.size()); + postings.push_back(dwf); + DocWordPosFeatureList::iterator ie(wpf.end()); + DocWordPosFeatureList::iterator i(wpf.begin()); + while (i != ie) { + wordPosFeatures.push_back(*i); + ++i; + } + ++idx; + if (idx >= docIdLimit) + break; + idx = bitmap.getNextTrueBit(idx); + } +} + + +void +FakeWord::fakeupTemps(search::Rand48 &rnd, + uint32_t docIdLimit, + uint32_t tempWordDocs) +{ + uint32_t maxTempWordDocs = docIdLimit / 2; + tempWordDocs = std::min(tempWordDocs, maxTempWordDocs); + if (tempWordDocs > 0) { + search::BitVector::UP bitmap(search::BitVector::create(docIdLimit)); + fillbitset(bitmap.get(), tempWordDocs, rnd); + fakeup(*bitmap, rnd, _extraPostings, _extraWordPosFeatures); + } +} + +void +FakeWord::setupRandomizer(search::Rand48 &rnd) +{ + typedef DocWordFeatureList DWFL; + Randomizer randomAdd; + Randomizer randomRem; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + int32_t ref = 0; + + while (d != de) { + do { + randomAdd._random = rnd.lrand48(); + } while (randomAdd._random < 10000); + randomAdd._ref = ref; + assert(!randomAdd.isExtra()); + assert(!randomAdd.isRemove()); + _randomizer.push_back(randomAdd); + ++d; + ++ref; + } + + DWFL::const_iterator ed(_extraPostings.begin()); + DWFL::const_iterator ede(_extraPostings.end()); + + int32_t eref = -1; + uint32_t tref = 0; + ref = 0; + int32_t refmax = _randomizer.size(); + while (ed != ede) { + while (ref < refmax && _postings[ref]._docId < ed->_docId) + ++ref; + if (ref < refmax && _postings[ref]._docId == ed->_docId) { + randomAdd._random = rnd.lrand48() % (_randomizer[ref]._random - 1); + randomRem._random = _randomizer[ref]._random - 1; + } else { + do { + randomAdd._random = rnd.lrand48(); + randomRem._random = rnd.lrand48(); + } while (randomAdd._random >= randomRem._random); + } + randomAdd._ref = eref; + randomRem._ref = eref - 1; + assert(randomAdd.isExtra()); + assert(!randomAdd.isRemove()); + assert(randomAdd.extraIdx() == tref); + assert(randomRem.isExtra()); + assert(randomRem.isRemove()); + assert(randomRem.extraIdx() == tref); + _randomizer.push_back(randomAdd); + _randomizer.push_back(randomRem); + ++ed; + eref -= 2; + ++tref; + } + std::sort(_randomizer.begin(), _randomizer.end()); +} + + +void +FakeWord::addDocIdBias(uint32_t docIdBias) +{ + typedef DocWordFeatureList DWFL; + DWFL::iterator d(_postings.begin()); + DWFL::iterator de(_postings.end()); + for (; d != de; ++d) { + d->_docId += docIdBias; + } + d = _extraPostings.begin(); + de = _extraPostings.end(); + for (; d != de; ++d) { + d->_docId += docIdBias; + } + _docIdLimit += docIdBias; +} + + +bool +FakeWord::validate(search::queryeval::SearchIterator *iterator, + const fef::TermFieldMatchDataArray &matchData, + uint32_t stride, + bool verbose) const +{ + iterator->initFullRange(); + uint32_t docId = 0; + + typedef DocWordFeatureList DWFL; + typedef DocWordPosFeatureList DWPFL; + typedef TermFieldMatchData::PositionsIterator TMDPI; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + DWPFL::const_iterator p(_wordPosFeatures.begin()); + DWPFL::const_iterator pe(_wordPosFeatures.end()); + + if (verbose) + printf("Start validate word '%s'\n", _name.c_str()); + int strideResidue = stride; + while (d != de) { + if (strideResidue > 1) { + --strideResidue; + unsigned int positions = d->_positions; + while (positions > 0) { + ++p; + --positions; + } + } else { + strideResidue = stride; + docId = d->_docId; + bool seekRes = iterator->seek(docId); + assert(seekRes); + (void) seekRes; + assert(d != de); + unsigned int positions = d->_positions; + iterator->unpack(docId); + for (size_t lfi = 0; lfi < matchData.size(); ++lfi) { + if (matchData[lfi]->getDocId() != docId) + continue; + TMDPI mdpe = matchData[lfi]->end(); + TMDPI mdp = matchData[lfi]->begin(); + while (mdp != mdpe) { + assert(p != pe); + assert(positions > 0); + assert(p->_wordPos == mdp->getPosition()); + assert(p->_elementId == mdp->getElementId()); + assert(p->_elementWeight == mdp->getElementWeight()); + assert(p->_elementLen == mdp->getElementLen()); + ++p; + ++mdp; + --positions; + } + } + assert(positions == 0); + } + ++d; + } + assert(p == pe); + assert(d == de); + if (verbose) + printf("word '%s' validated successfully with unpack\n", + _name.c_str()); + return true; +} + + +bool +FakeWord::validate(search::queryeval::SearchIterator *iterator, + const fef::TermFieldMatchDataArray &matchData, + bool verbose) const +{ + iterator->initFullRange(); + uint32_t docId = 1; + + typedef DocWordFeatureList DWFL; + typedef DocWordPosFeatureList DWPFL; + typedef TermFieldMatchData::PositionsIterator TMDPI; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + DWPFL::const_iterator p(_wordPosFeatures.begin()); + DWPFL::const_iterator pe(_wordPosFeatures.end()); + + if (verbose) + printf("Start validate word '%s'\n", _name.c_str()); + for (;;) { + if (iterator->seek(docId)) { + assert(d != de); + assert(d->_docId == docId); + iterator->unpack(docId); + unsigned int positions = d->_positions; + for (size_t lfi = 0; lfi < matchData.size(); ++lfi) { + if (matchData[lfi]->getDocId() != docId) + continue; + TMDPI mdpe = matchData[lfi]->end(); + TMDPI mdp = matchData[lfi]->begin(); + while (mdp != mdpe) { + assert(p != pe); + assert(positions > 0); + assert(p->_wordPos == mdp->getPosition()); + assert(p->_elementId == mdp->getElementId()); + assert(p->_elementWeight == mdp->getElementWeight()); + assert(p->_elementLen == mdp->getElementLen()); + ++p; + ++mdp; + --positions; + } + } + assert(positions == 0); + ++d; + ++docId; + } else { + if (iterator->getDocId() > docId) + docId = iterator->getDocId(); + else + ++docId; + } + if (docId >= _docIdLimit) + break; + } + assert(p == pe); + assert(d == de); + if (verbose) + printf("word '%s' validated successfully with unpack\n", + _name.c_str()); + return true; +} + + +bool +FakeWord::validate(search::queryeval::SearchIterator *iterator, bool verbose) const +{ + iterator->initFullRange(); + uint32_t docId = 1; + + typedef DocWordFeatureList DWFL; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + + if (verbose) + printf("Start validate word '%s'\n", _name.c_str()); + for (;;) { + if (iterator->seek(docId)) { + assert(d != de); + assert(d->_docId == docId); + ++d; + ++docId; + } else { + if (iterator->getDocId() > docId) + docId = iterator->getDocId(); + else + ++docId; + } + if (docId >= _docIdLimit) + break; + } + assert(d == de); + if (verbose) + printf("word '%s' validated successfully without unpack\n", + _name.c_str()); + return true; +} + + +bool +FakeWord::validate(std::shared_ptr &fieldReader, + uint32_t wordNum, + const fef::TermFieldMatchDataArray &matchData, + bool verbose, + uint32_t &checkPointCheck, + uint32_t checkPointInterval, + CheckPointCallback *const checkPointCallback) const +{ + uint32_t docId = 0; + uint32_t numDocs; + uint32_t residue; + uint32_t presidue; + bool unpres; + + typedef DocWordFeatureList DWFL; + typedef DocWordPosFeatureList DWPFL; + typedef TermFieldMatchData::PositionsIterator TMDPI; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + DWPFL::const_iterator p(_wordPosFeatures.begin()); + DWPFL::const_iterator pe(_wordPosFeatures.end()); + + if (verbose) + printf("Start validate word '%s'\n", _name.c_str()); +#ifdef notyet + // Validate word number +#else + (void) wordNum; +#endif + numDocs = _postings.size(); + for (residue = numDocs; residue > 0; --residue) { + assert(fieldReader->_wordNum == wordNum); + DocIdAndFeatures &features(fieldReader->_docIdAndFeatures); + docId = features._docId; + assert(d != de); + assert(d->_docId == docId); + if (matchData.valid()) { +#ifdef notyet + unpres = features.unpack(matchData); + assert(unpres); +#else + (void) unpres; + + typedef WordDocElementFeatures Elements; + typedef WordDocElementWordPosFeatures Positions; + + std::vector::const_iterator element = + features._elements.begin(); + std::vector::const_iterator position = + features._wordPositions.begin(); + + TermFieldMatchData *tfmd = matchData[0]; + LOG_ASSERT(tfmd != 0); + tfmd->reset(features._docId); + + uint32_t elementResidue = features._elements.size(); + while (elementResidue != 0) { + uint32_t positionResidue = element->getNumOccs(); + while (positionResidue != 0) { + uint32_t wordPos = position->getWordPos(); + TermFieldMatchDataPosition pos(element->getElementId(), + wordPos, + element->getWeight(), + element->getElementLen()); + tfmd->appendPosition(pos); + ++position; + --positionResidue; + } + ++element; + --elementResidue; + } +#endif + unsigned int positions = d->_positions; + presidue = positions; + for (size_t lfi = 0; lfi < matchData.size(); ++lfi) { + if (matchData[lfi]->getDocId() != docId) + continue; + TMDPI mdpe = matchData[lfi]->end(); + TMDPI mdp = matchData[lfi]->begin(); + while (mdp != mdpe) { + assert(p != pe); + assert(presidue > 0); + assert(p->_wordPos == mdp->getPosition()); + assert(p->_elementId == mdp->getElementId()); + assert(p->_elementWeight == mdp->getElementWeight()); + assert(p->_elementLen == mdp->getElementLen()); + ++p; + ++mdp; + --presidue; + } + } + assert(presidue == 0); + ++d; + } + if (++checkPointCheck >= checkPointInterval) { + checkPointCheck = 0; + if (checkPointCallback != NULL) + checkPointCallback->checkPoint(); + } + fieldReader->read(); + } + if (matchData.valid()) { + assert(p == pe); + assert(d == de); + } + if (verbose) + printf("word '%s' validated successfully %s unpack\n", + _name.c_str(), + matchData.valid() ? "with" : "without"); + return true; +} + + +void +FakeWord::validate(const std::vector &docIds) const +{ + typedef DocWordFeatureList DWFL; + typedef std::vector DL; + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + DL::const_iterator di(docIds.begin()); + DL::const_iterator die(docIds.end()); + + while (d != de) { + assert(di != die); + assert(d->_docId == *di); + ++d; + ++di; + } + assert(di == die); +} + + +void +FakeWord::validate(const search::BitVector &bv) const +{ + typedef DocWordFeatureList DWFL; + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + uint32_t bitHits = bv.countTrueBits(); + assert(bitHits == _postings.size()); + (void) bitHits; + uint32_t bi = bv.getNextTrueBit(1u); + while (d != de) { + assert(d->_docId == bi); + ++d; + bi = bv.getNextTrueBit(bi + 1); + } + assert(bi >= bv.size()); +} + + +bool +FakeWord::dump(std::shared_ptr &fieldWriter, + bool verbose, + uint32_t &checkPointCheck, + uint32_t checkPointInterval, + CheckPointCallback *checkPointCallback) const +{ + uint32_t numDocs; + uint32_t residue; + DocIdAndPosOccFeatures features; + + typedef DocWordFeatureList DWFL; + typedef DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(_postings.begin()); + DWFL::const_iterator de(_postings.end()); + DWPFL::const_iterator p(_wordPosFeatures.begin()); + DWPFL::const_iterator pe(_wordPosFeatures.end()); + + if (verbose) + printf("Start dumping word '%s'\n", _name.c_str()); + numDocs = _postings.size(); + for (residue = numDocs; residue > 0; --residue) { + assert(d != de); + setupFeatures(*d, &*p, features); + p += d->_positions; + fieldWriter->add(features); + ++d; + if (++checkPointCheck >= checkPointInterval) { + checkPointCheck = 0; + if (checkPointCallback != NULL) + checkPointCallback->checkPoint(); + } + } + assert(p == pe); + assert(d == de); + if (verbose) + printf("word '%s' dumped successfully\n", + _name.c_str()); + return true; +} + + +FakeWord::RandomizedReader::RandomizedReader(void) + : _r(), + _fw(NULL), + _wordIdx(0u), + _valid(false), + _ri(), + _re() +{ +} + + +void +FakeWord::RandomizedReader::read(void) +{ + if (_ri != _re) { + _r = *_ri; + ++_ri; + } else + _valid = false; +} + + +void +FakeWord::RandomizedReader::setup(const FakeWord *fw, + uint32_t wordIdx) +{ + _fw = fw; + _wordIdx = wordIdx; + _ri = fw->_randomizer.begin(); + _re = fw->_randomizer.end(); + _valid = _ri != _re; +} + + +FakeWord::RandomizedWriter::~RandomizedWriter(void) +{ +} + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h new file mode 100644 index 00000000000..8814bd9cf7e --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h @@ -0,0 +1,355 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace search +{ + +namespace fakedata +{ + + +class CheckPointCallback +{ +public: + CheckPointCallback(void) + { + } + + virtual + ~CheckPointCallback(void) + { + } + + virtual void + checkPoint(void) = 0; +}; + +/* + * General representation of a faked word, containing all features used + * by any of the candidate posting list formats. + */ +class FakeWord +{ +public: + typedef bitcompression::PosOccFieldsParams PosOccFieldsParams; + + class DocWordPosFeature + { + public: + uint32_t _elementId; + uint32_t _wordPos; + int32_t _elementWeight; + uint32_t _elementLen; + + inline bool + operator<(const DocWordPosFeature &rhs) const + { + if (_elementId != rhs._elementId) + return _elementId < rhs._elementId; + return _wordPos < rhs._wordPos; + } + + DocWordPosFeature(void); + ~DocWordPosFeature(void); + }; + + typedef std::vector DocWordPosFeatureList; + + class DocWordCollapsedFeature + { + public: + DocWordCollapsedFeature(void); + ~DocWordCollapsedFeature(void); + }; + + class DocWordFeature + { + public: + uint32_t _docId; + DocWordCollapsedFeature _collapsedDocWordFeatures; + uint32_t _positions; + uint32_t _accPositions; // accumulated positions for previous words + + DocWordFeature(void); + ~DocWordFeature(void); + }; + + typedef std::vector DocWordFeatureList; + + class Randomizer + { + public: + uint32_t _random; + int32_t _ref; + + Randomizer(void) + : _random(0), + _ref(0) + { + } + + bool + operator<(const Randomizer &rhs) const + { + if (_random != rhs._random) + return _random < rhs._random; + return _ref < rhs._ref; + } + + bool + operator==(const Randomizer &rhs) const + { + return _random == rhs._random && _ref == rhs._ref; + } + + bool + isExtra(void) const + { + return _ref < 0; + } + + bool + isRemove(void) const + { + return isExtra() && (_ref & 1) == 0; + } + + uint32_t + extraIdx(void) const + { + return (~_ref) >> 1; + } + + }; + + class RandomizedWriter + { + public: + virtual + ~RandomizedWriter(void); + + virtual void + add(uint32_t wordIdx, index::DocIdAndFeatures &features) = 0; + + virtual void + remove(uint32_t wordIdx, uint32_t docId) = 0; + }; + + class RandomizedReader + { + Randomizer _r; + const FakeWord *_fw; + uint32_t _wordIdx; + bool _valid; + std::vector::const_iterator _ri; + std::vector::const_iterator _re; + index::DocIdAndPosOccFeatures _features; + public: + RandomizedReader(void); + + void + read(void); + + void + write(RandomizedWriter &writer) + { + const FakeWord::DocWordFeature &d = _fw->getDocWordFeature(_r); + if (_r.isRemove()) { + writer.remove(_wordIdx, d._docId); + } else { + const DocWordPosFeature *p = _fw->getDocWordPosFeature(_r, d); + FakeWord::setupFeatures(d, p, _features); + writer.add(_wordIdx, _features); + } + } + + bool + isValid(void) const + { + return _valid; + } + + bool operator<(const RandomizedReader &rhs) const + { + if (_r < rhs._r) + return true; + if (!(_r == rhs._r)) + return false; + return _wordIdx < rhs._wordIdx; + } + + void + setup(const FakeWord *fw, + uint32_t wordIdx); + }; + + DocWordFeatureList _postings; + DocWordPosFeatureList _wordPosFeatures; + DocWordFeatureList _extraPostings; + DocWordPosFeatureList _extraWordPosFeatures; + std::vector _randomizer; + uint32_t _docIdLimit; // Documents in index + std::string _name; + const PosOccFieldsParams &_fieldsParams; + uint32_t _packedIndex; + + void + fakeup(search::BitVector &bitmap, + search::Rand48 &rnd, + DocWordFeatureList &postings, + DocWordPosFeatureList &wordPosFeatures); + + void + fakeupTemps(search::Rand48 &rnd, + uint32_t docIdLimit, + uint32_t tempWordDocs); + + void + setupRandomizer(search::Rand48 &rnd); + + const DocWordFeature & + getDocWordFeature(const Randomizer &r) const + { + if (r.isExtra()) { + assert(r.extraIdx() < _extraPostings.size()); + return _extraPostings[r.extraIdx()]; + } + assert(static_cast(r._ref) < _postings.size()); + return _postings[r._ref]; + } + + const + DocWordPosFeature * + getDocWordPosFeature(const Randomizer &r, const DocWordFeature &d) const + { + if (r.isExtra()) { + assert(d._accPositions + d._positions <= + _extraWordPosFeatures.size()); + return &_extraWordPosFeatures[d._accPositions]; + } + assert(d._accPositions + d._positions <= + _wordPosFeatures.size()); + return &_wordPosFeatures[d._accPositions]; + } + + static void + setupFeatures(const DocWordFeature &d, + const DocWordPosFeature *p, + index::DocIdAndPosOccFeatures &features) + { + unsigned int positions = d._positions; + features.clear(d._docId); + for (unsigned int t = 0; t < positions; ++t) { + features.addNextOcc(p->_elementId, p->_wordPos, + p->_elementWeight, p->_elementLen); + ++p; + } + } + +public: + + FakeWord(uint32_t docIdLimit, + const std::vector & docIds, + const std::string &name, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex); + + FakeWord(uint32_t docIdLimit, + uint32_t wordDocs, + uint32_t tempWordDocs, + const std::string &name, + search::Rand48 &rnd, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex); + + FakeWord(uint32_t docIdLimit, + uint32_t wordDocs, + uint32_t tempWordDocs, + const std::string &name, + const FakeWord &otherWord, + size_t overlapDocs, + search::Rand48 &rnd, + const PosOccFieldsParams &fieldsParams, + uint32_t packedIndex); + + ~FakeWord(void); + + bool + validate(search::queryeval::SearchIterator *iterator, + const fef::TermFieldMatchDataArray &matchData, + uint32_t stride, + bool verbose) const; + + bool + validate(search::queryeval::SearchIterator *iterator, + const fef::TermFieldMatchDataArray &matchData, + bool verbose) const; + + bool + validate(search::queryeval::SearchIterator *iterator, + bool verbose) const; + + bool + validate(std::shared_ptr &fieldReader, + uint32_t wordNum, + const fef::TermFieldMatchDataArray &matchData, + bool verbose, + uint32_t &checkPointCheck, + uint32_t checkPointInterval, + CheckPointCallback *const checkPointCallback) const; + + void + validate(const std::vector &docIds) const; + + void + validate(const BitVector &bv) const; + + bool + dump(std::shared_ptr &fieldWriter, + bool verbose, + uint32_t &checkPointCheck, + uint32_t checkPointInterval, + CheckPointCallback *checkPointCallback) const; + + const std::string &getName(void) const + { + return _name; + } + + uint32_t + getDocIdLimit(void) const + { + return _docIdLimit; + } + + const PosOccFieldsParams & + getFieldsParams(void) const + { + return _fieldsParams; + } + + uint32_t + getPackedIndex(void) const + { + return _packedIndex; + } + + void + addDocIdBias(uint32_t docIdBias); +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp new file mode 100644 index 00000000000..4ecf04bb59c --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.cpp @@ -0,0 +1,161 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakewordset"); +#include "fakewordset.h" +#include "fakeword.h" +#include + +namespace search +{ + +namespace fakedata +{ + +using index::PostingListParams; +using index::SchemaUtil; + +static void +clearFakeWordVector(std::vector &v) +{ + for (unsigned int i = 0; i < v.size(); ++i) + delete v[i]; + v.clear(); +} + + +static void +applyDocIdBiasToVector(std::vector &v, uint32_t docIdBias) +{ + for (unsigned int i = 0; i < v.size(); ++i) + v[i]->addDocIdBias(docIdBias); +} + + +FakeWordSet::FakeWordSet(void) + : _words(NUM_WORDCLASSES), + _schema(), + _fieldsParams() +{ + setupParams(false, false); +} + + +FakeWordSet::FakeWordSet(bool hasElements, + bool hasElementWeights) + : _words(NUM_WORDCLASSES), + _schema(), + _fieldsParams() +{ + setupParams(hasElements, hasElementWeights); +} + + +FakeWordSet::~FakeWordSet(void) +{ + dropWords(); +} + + +void +FakeWordSet::setupParams(bool hasElements, + bool hasElementWeights) +{ + _schema.clear(); + + assert(hasElements || !hasElementWeights); + Schema::CollectionType collectionType(Schema::SINGLE); + if (hasElements) { + if (hasElementWeights) + collectionType = Schema::WEIGHTEDSET; + else + collectionType = Schema::ARRAY; + } + Schema::IndexField indexField("field0", + Schema::STRING, + collectionType); + indexField.setAvgElemLen(512u); + _schema.addIndexField(indexField); + _fieldsParams.resize(_schema.getNumIndexFields()); + SchemaUtil::IndexIterator it(_schema); + for(; it.isValid(); ++it) { + _fieldsParams[it.getIndex()]. + setSchemaParams(_schema, it.getIndex()); + } +} + + +void +FakeWordSet::setupWords(search::Rand48 &rnd, + unsigned int numDocs, + unsigned int commonDocFreq, + unsigned int numWordsPerWordClass) +{ + std::string common = "common"; + std::string medium = "medium"; + std::string rare = "rare"; + FakeWord *fw; + FastOS_Time tv; + double before; + double after; + + LOG(info, "enter setupWords"); + tv.SetNow(); + before = tv.Secs(); + uint32_t packedIndex = _fieldsParams.size() - 1; + for (unsigned int i = 0; i < numWordsPerWordClass; ++i) { + std::ostringstream vi; + + vi << (i + 1); + fw = new FakeWord(numDocs, commonDocFreq, commonDocFreq / 2, + common + vi.str(), rnd, + _fieldsParams[packedIndex], + packedIndex); + _words[COMMON_WORD].push_back(fw); + fw = new FakeWord(numDocs, 1000, 500, + medium + vi.str(), rnd, + _fieldsParams[packedIndex], + packedIndex); + _words[MEDIUM_WORD].push_back(fw); + fw = new FakeWord(numDocs, 10, 5, + rare + vi.str(), rnd, + _fieldsParams[packedIndex], + packedIndex); + _words[RARE_WORD].push_back(fw); + } + tv.SetNow(); + after = tv.Secs(); + LOG(info, "leave setupWords, elapsed %10.6f s", after - before); +} + + +void +FakeWordSet::dropWords(void) +{ + for (unsigned int i = 0; i < _words.size(); ++i) + clearFakeWordVector(_words[i]); +} + + +int +FakeWordSet::getNumWords(void) +{ + int ret = 0; + for (unsigned int i = 0; i < _words.size(); ++i) + ret += _words[i].size(); + return ret; +} + + +void +FakeWordSet::addDocIdBias(uint32_t docIdBias) +{ + for (unsigned int i = 0; i < _words.size(); ++i) + applyDocIdBiasToVector(_words[i], docIdBias); +} + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h new file mode 100644 index 00000000000..51e87ffd817 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakewordset.h @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search +{ +class Rand48; +} + +namespace search +{ + +namespace fakedata +{ + +class FakeWord; + +class FakeWordSet +{ +public: + typedef bitcompression::PosOccFieldsParams PosOccFieldsParams; + typedef bitcompression::PosOccFieldParams PosOccFieldParams; + typedef index::Schema Schema; + + enum { + COMMON_WORD, + MEDIUM_WORD, + RARE_WORD, + NUM_WORDCLASSES, + }; + std::vector > _words; + Schema _schema; + std::vector _fieldsParams; + + FakeWordSet(void); + + FakeWordSet(bool hasElements, + bool hasElementWeights); + + ~FakeWordSet(void); + + void + setupParams(bool hasElements, + bool hasElementWeights); + + void + setupWords(search::Rand48 &rnd, + unsigned int numDocs, + unsigned int commonDocFreq, + unsigned int numWordsPerWordClass); + + void + dropWords(void); + + int + getNumWords(void); + + const PosOccFieldsParams & + getFieldsParams(void) const + { + return _fieldsParams.back(); + } + + uint32_t + getPackedIndex(void) const + { + return _fieldsParams.size() - 1; + } + + const std::vector & + getAllFieldsParams(void) const + { + return _fieldsParams; + } + + const Schema & + getSchema(void) const + { + return _schema; + } + + void + addDocIdBias(uint32_t docIdBias); +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp new file mode 100644 index 00000000000..b1539e2ea2d --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.cpp @@ -0,0 +1,268 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakezcbfilterocc"); +#include "fakezcbfilterocc.h" +#include +#include +#include +#include "fpfactory.h" + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +namespace search +{ + +namespace fakedata +{ + +static FPFactoryInit +init(std::make_pair("ZcbFilterOcc", + makeFPFactory >)); + +static void +zcbEncode(std::vector &bytes, + uint32_t num) +{ + if (num < (1 << 7)) { + num <<= 1; + num += 1; + } else if (num < (1 << 14)) { + num <<= 2; + num += 2; + } else if (num < (1 << 21)) { + num <<= 3; + num += 4; + } else + num <<= 4; + + do { + bytes.push_back(num & 0xff); + num >>= 8; + } while (num != 0); +} + + +#define ZCBDECODE(valI, resop) \ +do { \ + if (__builtin_expect((valI[0] & 1) != 0, true)) { \ + resop (valI[0] >> 1); \ + valI += 1; \ + } else if (__builtin_expect((valI[0] & 2) != 0, true)) { \ + resop (((*(const uint32_t *) valI) >> 2) & ((1 << 14) - 1)); \ + valI += 2; \ + } else if (__builtin_expect((valI[0] & 4) != 0, true)) { \ + resop (((*(const uint32_t *) valI) >> 3) & ((1 << 21) - 1)); \ + valI += 3; \ + } else { \ + resop ((*(const uint32_t *) valI) >> 4); \ + valI += 4; \ + } \ +} while (0) + +FakeZcbFilterOcc::FakeZcbFilterOcc(const FakeWord &fw) + : FakePosting(fw.getName() + ".zcbfilterocc"), + _compressed(), + _docIdLimit(0), + _hitDocs(0), + _bitSize(0) +{ + std::vector bytes; + uint32_t lastDocId = 0u; + + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + typedef FW::DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + DWPFL::const_iterator p(fw._wordPosFeatures.begin()); + DWPFL::const_iterator pe(fw._wordPosFeatures.end()); + + while (d != de) { + if (lastDocId == 0u) { + zcbEncode(bytes, d->_docId - 1); + } else { + uint32_t docIdDelta = d->_docId - lastDocId; + zcbEncode(bytes, docIdDelta - 1); + } + lastDocId = d->_docId; + ++d; + } + // 3 padding bytes to ensure ZCBDECODE reads initialized memory. + bytes.push_back(0); + bytes.push_back(0); + bytes.push_back(0); + _hitDocs = fw._postings.size(); + std::swap(_compressed, bytes); + _docIdLimit = fw._docIdLimit; +} + + +FakeZcbFilterOcc::~FakeZcbFilterOcc(void) +{ +} + + +void +FakeZcbFilterOcc::forceLink(void) +{ +} + + +size_t +FakeZcbFilterOcc::bitSize(void) const +{ + // Do not count the 3 padding bytes here. + return 8 * (_compressed.size() - 3) ; +} + +bool +FakeZcbFilterOcc::hasWordPositions(void) const +{ + return false; +} + + +int +FakeZcbFilterOcc::lowLevelSinglePostingScan(void) const +{ + return 0; +} + + +int +FakeZcbFilterOcc::lowLevelSinglePostingScanUnpack(void) const +{ + return 0; +} + + +int +FakeZcbFilterOcc:: +lowLevelAndPairPostingScan(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +int +FakeZcbFilterOcc:: +lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +class FakeFilterOccZCBArrayIterator + : public queryeval::RankedSearchIteratorBase +{ +private: + + FakeFilterOccZCBArrayIterator(const FakeFilterOccZCBArrayIterator &other); + + FakeFilterOccZCBArrayIterator& + operator=(const FakeFilterOccZCBArrayIterator &other); + +public: + // Pointer to compressed data + const uint8_t *_valI; + unsigned int _residue; + + FakeFilterOccZCBArrayIterator(const uint8_t *compressedOccurrences, + unsigned int residue, + const fef::TermFieldMatchDataArray &matchData); + + ~FakeFilterOccZCBArrayIterator(void); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +FakeFilterOccZCBArrayIterator:: +FakeFilterOccZCBArrayIterator(const uint8_t *compressedOccurrences, + unsigned int residue, + const fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _valI(compressedOccurrences), + _residue(residue) +{ + clearUnpacked(); +} + +void +FakeFilterOccZCBArrayIterator::initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + uint32_t docId = 0; + if (_residue > 0) { + ZCBDECODE(_valI, docId = 1 +); + setDocId(docId); + } else { + setAtEnd(); + } +} + + +FakeFilterOccZCBArrayIterator:: +~FakeFilterOccZCBArrayIterator(void) +{ +} + + +void +FakeFilterOccZCBArrayIterator::doSeek(uint32_t docId) +{ + const uint8_t *oCompr = _valI; + uint32_t oDocId = getDocId(); + + if (getUnpacked()) + clearUnpacked(); + while (oDocId < docId) { + if (--_residue == 0) + goto atbreak; + ZCBDECODE(oCompr, oDocId += 1 +); + } + _valI = oCompr; + setDocId(oDocId); + return; + atbreak: + _valI = oCompr; + setAtEnd(); // Mark end of data + return; +} + + +void +FakeFilterOccZCBArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +search::queryeval::SearchIterator * +FakeZcbFilterOcc:: +createIterator(const fef::TermFieldMatchDataArray &matchData) const +{ + const uint8_t *arr = &*_compressed.begin(); + return new FakeFilterOccZCBArrayIterator(arr, + _hitDocs, + matchData); +} + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h new file mode 100644 index 00000000000..c9d183af80e --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcbfilterocc.h @@ -0,0 +1,75 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "fakeword.h" +#include "fakeposting.h" + +namespace search +{ + +namespace fakedata +{ + +/* + * YST style compression of docid list. + */ +class FakeZcbFilterOcc : public FakePosting +{ +private: + std::vector _compressed; + unsigned int _docIdLimit; + unsigned int _hitDocs; + size_t _bitSize; +public: + FakeZcbFilterOcc(const FakeWord &fw); + + ~FakeZcbFilterOcc(void); + + static void + forceLink(void); + + /* + * Size of posting list, in bits. + */ + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp new file mode 100644 index 00000000000..2fc379c8a71 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -0,0 +1,1823 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fakezcfilterocc"); +#include "fakezcfilterocc.h" +#include +#include +#include +#include +#include +#include "fpfactory.h" + + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::fef::TermFieldMatchDataPosition; +using search::queryeval::SearchIterator; +using search::index::PostingListParams; +using search::index::DocIdAndFeatures; +using search::index::DocIdAndPosOccFeatures; +using search::bitcompression::PosOccFieldParams; +using search::bitcompression::EGPosOccEncodeContext; +using search::bitcompression::EG2PosOccEncodeContext; +using search::bitcompression::FeatureEncodeContext; +using search::ComprFileWriteContext; +using namespace search::diskindex; + +namespace search +{ + +namespace fakedata +{ + + +#define L1SKIPSTRIDE 16 +#define L2SKIPSTRIDE 8 +#define L3SKIPSTRIDE 8 +#define L4SKIPSTRIDE 8 + +#define DEBUG_ZCFILTEROCC_PRINTF 0 +#define DEBUG_ZCFILTEROCC_ASSERT 0 + +static FPFactoryInit +init(std::make_pair("ZcFilterOcc", + makeFPFactory >)); + +static void +zcEncode(std::vector &bytes, + uint32_t num) +{ + for (;;) { + if (num < (1 << 7)) { + bytes.push_back(num); + break; + } + bytes.push_back((num & ((1 << 7) - 1)) | (1 << 7)); + num >>= 7; + } +} + +#define ZCDECODE(valI, resop) \ +do { \ + if (__builtin_expect(valI[0] < (1 << 7), true)) { \ + resop valI[0]; \ + valI += 1; \ + } else if (__builtin_expect(valI[1] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + (valI[1] << 7); \ + valI += 2; \ + } else if (__builtin_expect(valI[2] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + (valI[2] << 14); \ + valI += 3; \ + } else if (__builtin_expect(valI[3] < (1 << 7), true)) { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + ((valI[2] & ((1 << 7) - 1)) << 14) + \ + (valI[3] << 21); \ + valI += 4; \ + } else { \ + resop (valI[0] & ((1 << 7) - 1)) + \ + ((valI[1] & ((1 << 7) - 1)) << 7) + \ + ((valI[2] & ((1 << 7) - 1)) << 14) + \ + ((valI[3] & ((1 << 7) - 1)) << 21) + \ + (valI[4] << 28); \ + valI += 5; \ + } \ +} while (0) + +FakeZcFilterOcc::FakeZcFilterOcc(const FakeWord &fw) + : FakePosting(fw.getName() + ".zcfilterocc"), + _docIdsSize(0), + _l1SkipSize(0), + _l2SkipSize(0), + _l3SkipSize(0), + _l4SkipSize(0), + _docIdLimit(0), + _hitDocs(0), + _lastDocId(0u), + _compressedBits(0), + _compressed(std::make_pair(static_cast(NULL), 0)), + _compressedMalloc(NULL), + _featuresSize(0), + _fieldsParams(fw.getFieldsParams()), + _bigEndian(true) +{ + setup(fw, false, true); +} + + +FakeZcFilterOcc::FakeZcFilterOcc(const FakeWord &fw, + bool bigEndian, + const char *nameSuffix) + : FakePosting(fw.getName() + nameSuffix), + _docIdsSize(0), + _l1SkipSize(0), + _l2SkipSize(0), + _l3SkipSize(0), + _l4SkipSize(0), + _docIdLimit(0), + _hitDocs(0), + _lastDocId(0u), + _compressedBits(0), + _compressed(std::make_pair(static_cast(NULL), 0)), + _featuresSize(0), + _fieldsParams(fw.getFieldsParams()), + _bigEndian(bigEndian) +{ + // subclass responsible for calling setup(fw, false/true); +} + + +void +FakeZcFilterOcc::setup(const FakeWord &fw, bool doFeatures, + bool dynamicK) +{ + if (_bigEndian) + setupT(fw, doFeatures, dynamicK); + else + setupT(fw, doFeatures, dynamicK); +} + + +template +void +FakeZcFilterOcc::setupT(const FakeWord &fw, bool doFeatures, + bool dynamicK) +{ + std::vector bytes; + std::vector l1SkipBytes; + std::vector l2SkipBytes; + std::vector l3SkipBytes; + std::vector l4SkipBytes; + uint32_t lastDocId = 0u; + uint32_t lastL1SkipDocId = 0u; + uint64_t lastL1SkipDocIdPos = 0; + uint64_t lastL1SkipFeaturePos = 0; + unsigned int l1SkipCnt = 0; + uint32_t lastL2SkipDocId = 0u; + uint64_t lastL2SkipDocIdPos = 0; + uint64_t lastL2SkipFeaturePos = 0; + uint64_t lastL2SkipL1SkipPos = 0; + unsigned int l2SkipCnt = 0; + uint32_t lastL3SkipDocId = 0u; + uint64_t lastL3SkipDocIdPos = 0; + uint64_t lastL3SkipFeaturePos = 0; + uint64_t lastL3SkipL1SkipPos = 0; + uint64_t lastL3SkipL2SkipPos = 0; + unsigned int l3SkipCnt = 0; + uint32_t lastL4SkipDocId = 0u; + uint64_t lastL4SkipDocIdPos = 0; + uint64_t lastL4SkipFeaturePos = 0; + uint64_t lastL4SkipL1SkipPos = 0; + uint64_t lastL4SkipL2SkipPos = 0; + uint64_t lastL4SkipL3SkipPos = 0; + unsigned int l4SkipCnt = 0; + uint64_t featurePos = 0; + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + typedef FW::DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + DWPFL::const_iterator p(fw._wordPosFeatures.begin()); + DWPFL::const_iterator pe(fw._wordPosFeatures.end()); + DocIdAndPosOccFeatures features; + EGPosOccEncodeContext f1(&_fieldsParams); + EG2PosOccEncodeContext f0(&_fieldsParams); + FeatureEncodeContext &f = (dynamicK ? + static_cast &>(f1) : + static_cast &>(f0)); + search::ComprFileWriteContext fctx(f); + f.setWriteContext(&fctx); + fctx.allocComprBuf(64, 1); + f.afterWrite(fctx, 0, 0); + + while (d != de) { + if (l1SkipCnt >= L1SKIPSTRIDE) { + uint32_t docIdDelta = lastDocId - lastL1SkipDocId; + assert(static_cast(docIdDelta) > 0); + zcEncode(l1SkipBytes, docIdDelta - 1); + uint64_t lastDocIdPos = bytes.size(); + uint32_t docIdPosDelta = lastDocIdPos - lastL1SkipDocIdPos; + zcEncode(l1SkipBytes, docIdPosDelta - 1); + if (doFeatures) { + featurePos = f.getWriteOffset(); + zcEncode(l1SkipBytes, featurePos - lastL1SkipFeaturePos - 1); + lastL1SkipFeaturePos = featurePos; + } +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L1Encode docId=%d (+%d), docIdPos=%d (+%u)\n", + lastDocId, docIdDelta, + (int) lastDocIdPos, docIdPosDelta); +#endif + lastL1SkipDocId = lastDocId; + lastL1SkipDocIdPos = lastDocIdPos; + l1SkipCnt = 0; + ++l2SkipCnt; + if (l2SkipCnt >= L2SKIPSTRIDE) { + docIdDelta = lastDocId - lastL2SkipDocId; + docIdPosDelta = lastDocIdPos - lastL2SkipDocIdPos; + uint64_t lastL1SkipPos = l1SkipBytes.size(); + uint32_t l1SkipPosDelta = lastL1SkipPos - lastL2SkipL1SkipPos; + zcEncode(l2SkipBytes, docIdDelta - 1); + zcEncode(l2SkipBytes, docIdPosDelta - 1); + if (doFeatures) { + zcEncode(l2SkipBytes, + featurePos - lastL2SkipFeaturePos - 1); + lastL2SkipFeaturePos = featurePos; + } + zcEncode(l2SkipBytes, l1SkipPosDelta - 1); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L2Encode docId=%d (+%d), docIdPos=%d (+%u)," + " l1SkipPos=%d (+%u)\n", + lastDocId, docIdDelta, + (int) lastDocIdPos, docIdPosDelta, + (int) lastL1SkipPos, l1SkipPosDelta); +#endif + lastL2SkipDocId = lastDocId; + lastL2SkipDocIdPos = lastDocIdPos; + lastL2SkipL1SkipPos = lastL1SkipPos; + l2SkipCnt = 0; + ++l3SkipCnt; + if (l3SkipCnt >= L3SKIPSTRIDE) { + docIdDelta = lastDocId - lastL3SkipDocId; + docIdPosDelta = lastDocIdPos - lastL3SkipDocIdPos; + l1SkipPosDelta = lastL1SkipPos - lastL3SkipL1SkipPos; + uint64_t lastL2SkipPos = l2SkipBytes.size(); + uint32_t l2SkipPosDelta = lastL2SkipPos - + lastL3SkipL2SkipPos; + zcEncode(l3SkipBytes, docIdDelta - 1); + zcEncode(l3SkipBytes, docIdPosDelta - 1); + if (doFeatures) { + zcEncode(l3SkipBytes, + featurePos - lastL3SkipFeaturePos - 1); + lastL3SkipFeaturePos = featurePos; + } + zcEncode(l3SkipBytes, l1SkipPosDelta - 1); + zcEncode(l3SkipBytes, l2SkipPosDelta - 1); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L3Encode docId=%d (+%d), docIdPos=%d (+%u)," + " l1SkipPos=%d (+%u) l2SkipPos %d (+%u)\n", + lastDocId, docIdDelta, + (int) lastDocIdPos, docIdPosDelta, + (int) lastL1SkipPos, l1SkipPosDelta, + (int) lastL2SkipPos, l2SkipPosDelta); +#endif + lastL3SkipDocId = lastDocId; + lastL3SkipDocIdPos = lastDocIdPos; + lastL3SkipL1SkipPos = lastL1SkipPos; + lastL3SkipL2SkipPos = lastL2SkipPos; + l3SkipCnt = 0; + ++l4SkipCnt; + if (l4SkipCnt >= L4SKIPSTRIDE) { + docIdDelta = lastDocId - lastL4SkipDocId; + docIdPosDelta = lastDocIdPos - lastL4SkipDocIdPos; + l1SkipPosDelta = lastL1SkipPos - lastL4SkipL1SkipPos; + l2SkipPosDelta = lastL2SkipPos - lastL4SkipL2SkipPos; + uint64_t lastL3SkipPos = l3SkipBytes.size(); + uint32_t l3SkipPosDelta = lastL3SkipPos - + lastL4SkipL3SkipPos; + zcEncode(l4SkipBytes, docIdDelta - 1); + zcEncode(l4SkipBytes, docIdPosDelta - 1); + if (doFeatures) { + zcEncode(l4SkipBytes, + featurePos - lastL4SkipFeaturePos - 1); + lastL4SkipFeaturePos = featurePos; + } + zcEncode(l4SkipBytes, l1SkipPosDelta - 1); + zcEncode(l4SkipBytes, l2SkipPosDelta - 1); + zcEncode(l4SkipBytes, l3SkipPosDelta - 1); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L4Encode docId=%d (+%d), docIdPos=%d (+%u)," + " l1SkipPos=%d (+%u) l2SkipPos %d (+%u)" + " l3SkipPos=%d (+%u)\n", + lastDocId, docIdDelta, + (int) lastDocIdPos, docIdPosDelta, + (int) lastL1SkipPos, l1SkipPosDelta, + (int) lastL2SkipPos, l2SkipPosDelta, + (int) lastL3SkipPos, l3SkipPosDelta); +#endif + lastL4SkipDocId = lastDocId; + lastL4SkipDocIdPos = lastDocIdPos; + lastL4SkipL1SkipPos = lastL1SkipPos; + lastL4SkipL2SkipPos = lastL2SkipPos; + lastL4SkipL3SkipPos = lastL3SkipPos; + l4SkipCnt = 0; + } + } + } + } + if (lastDocId == 0u) { + zcEncode(bytes, d->_docId - 1); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Encode docId=%d\n", + d->_docId); +#endif + } else { + uint32_t docIdDelta = d->_docId - lastDocId; + zcEncode(bytes, docIdDelta - 1); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Encode docId=%d (+%d)\n", + d->_docId, docIdDelta); +#endif + } + if (doFeatures) { + fw.setupFeatures(*d, &*p, features); + p += d->_positions; + f.writeFeatures(features); + } + lastDocId = d->_docId; + ++l1SkipCnt; + ++d; + } + if (doFeatures) { + assert(p == pe); + _featuresSize = f.getWriteOffset(); + // First pad to 64 bits. + uint32_t pad = (64 - f.getWriteOffset()) & 63; + while (pad > 0) { + uint32_t now = std::min(32u, pad); + f.writeBits(0, now); + f.writeComprBufferIfNeeded(); + pad -= now; + } + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + for (unsigned int i = 0; i < 4; i++) { + f.writeBits(0, 32); + f.writeComprBufferIfNeeded(); + } + f.writeComprBufferIfNeeded(); + f.flush(); + f.writeComprBuffer(); + } else { + _featuresSize = 0; + } + // Extra partial entries for skip tables to simplify iterator during search + if (l1SkipBytes.size() > 0) { + uint32_t docIdDelta = lastDocId - lastL1SkipDocId; + assert(static_cast(docIdDelta) > 0); + zcEncode(l1SkipBytes, docIdDelta - 1); + } + if (l2SkipBytes.size() > 0) { + uint32_t docIdDelta = lastDocId - lastL2SkipDocId; + assert(static_cast(docIdDelta) > 0); + zcEncode(l2SkipBytes, docIdDelta - 1); + } + if (l3SkipBytes.size() > 0) { + uint32_t docIdDelta = lastDocId - lastL3SkipDocId; + assert(static_cast(docIdDelta) > 0); + zcEncode(l3SkipBytes, docIdDelta - 1); + } + if (l4SkipBytes.size() > 0) { + uint32_t docIdDelta = lastDocId - lastL4SkipDocId; + assert(static_cast(docIdDelta) > 0); + zcEncode(l4SkipBytes, docIdDelta - 1); + } + _hitDocs = fw._postings.size(); + _docIdLimit = fw._docIdLimit; + _lastDocId = lastDocId; + FeatureEncodeContext e; + ComprFileWriteContext ectx(e); + e.setWriteContext(&ectx); + ectx.allocComprBuf(64, 1); + e.afterWrite(ectx, 0, 0); + + // Encode word header + e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + _docIdsSize = bytes.size() * 8; + _l1SkipSize = l1SkipBytes.size(); + _l2SkipSize = _l3SkipSize = _l4SkipSize = 0; + if (_l1SkipSize != 0) + _l2SkipSize = l2SkipBytes.size(); + if (_l2SkipSize != 0) + _l3SkipSize = l3SkipBytes.size(); + if (_l3SkipSize != 0) + _l4SkipSize = l4SkipBytes.size(); + + e.encodeExpGolomb(bytes.size() - 1, K_VALUE_ZCPOSTING_DOCIDSSIZE); + e.encodeExpGolomb(_l1SkipSize, K_VALUE_ZCPOSTING_L1SKIPSIZE); + e.writeComprBufferIfNeeded(); + if (_l1SkipSize != 0) { + e.encodeExpGolomb(_l2SkipSize, K_VALUE_ZCPOSTING_L2SKIPSIZE); + if (_l2SkipSize != 0) { + e.writeComprBufferIfNeeded(); + e.encodeExpGolomb(_l3SkipSize, K_VALUE_ZCPOSTING_L3SKIPSIZE); + if (_l3SkipSize != 0) { + e.encodeExpGolomb(_l4SkipSize, K_VALUE_ZCPOSTING_L4SKIPSIZE); + } + } + } + e.writeComprBufferIfNeeded(); + if (doFeatures) { + e.encodeExpGolomb(_featuresSize, K_VALUE_ZCPOSTING_FEATURESSIZE); + } + uint32_t docIdK = e.calcDocIdK(_hitDocs, _docIdLimit); + if (dynamicK) + e.encodeExpGolomb(_docIdLimit - 1 - _lastDocId, docIdK); + else + e.encodeExpGolomb(_docIdLimit - 1 - _lastDocId, + K_VALUE_ZCPOSTING_LASTDOCID); + uint64_t bytePad = (- e.getWriteOffset()) & 7; + if (bytePad > 0) + e.writeBits(0, bytePad); + size_t docIdSize = bytes.size(); + if (docIdSize > 0) { + uint8_t *docIdBytes = &bytes[0]; + uint32_t docIdBytesOffset = + reinterpret_cast(docIdBytes) & 7; + e.writeBits(reinterpret_cast(docIdBytes - + docIdBytesOffset), + docIdBytesOffset * 8, + docIdSize * 8); + } + if (_l1SkipSize > 0) { + uint8_t *l1Bytes = &l1SkipBytes[0]; + uint32_t l1BytesOffset = reinterpret_cast(l1Bytes) & 7; + e.writeBits(reinterpret_cast(l1Bytes - + l1BytesOffset), + l1BytesOffset * 8, + _l1SkipSize * 8); + if (_l2SkipSize > 0) { + uint8_t *l2Bytes = &l2SkipBytes[0]; + uint32_t l2BytesOffset = + reinterpret_cast(l2Bytes) & 7; + e.writeBits(reinterpret_cast(l2Bytes - + l2BytesOffset), + l2BytesOffset * 8, + _l2SkipSize * 8); + if (_l3SkipSize > 0) { + uint8_t *l3Bytes = &l3SkipBytes[0]; + uint32_t l3BytesOffset = + reinterpret_cast(l3Bytes) & 7; + e.writeBits(reinterpret_cast(l3Bytes - + l3BytesOffset), + l3BytesOffset * 8, + _l3SkipSize * 8); + if (_l4SkipSize > 0) { + uint8_t *l4Bytes = &l4SkipBytes[0]; + uint32_t l4BytesOffset = + reinterpret_cast(l4Bytes) & 7; + e.writeBits(reinterpret_cast(l4Bytes - + l4BytesOffset), + l4BytesOffset * 8, + _l4SkipSize * 8); + } + } + } + } + if (doFeatures) { + e.writeBits(static_cast(fctx._comprBuf), + 0, + _featuresSize); + } + _compressedBits = e.getWriteOffset(); + // First pad to 64 bits. + uint32_t pad = (64 - e.getWriteOffset()) & 63; + while (pad > 0) { + uint32_t now = std::min(32u, pad); + e.writeBits(0, now); + e.writeComprBufferIfNeeded(); + pad -= now; + } + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + for (unsigned int i = 0; i < 4; i++) { + e.writeBits(0, 32); + e.writeComprBufferIfNeeded(); + } + e.writeComprBufferIfNeeded(); + e.flush(); + e.writeComprBuffer(); + + std::pair ectxData = ectx.grabComprBuffer(_compressedMalloc); + _compressed = std::make_pair(static_cast(ectxData.first), + ectxData.second); +} + + +FakeZcFilterOcc::~FakeZcFilterOcc(void) +{ + free(_compressedMalloc); +} + + +void +FakeZcFilterOcc::forceLink(void) +{ +} + + +size_t +FakeZcFilterOcc::bitSize(void) const +{ + return _compressedBits - + (_l1SkipSize + _l2SkipSize + _l3SkipSize + _l4SkipSize) * 8; +} + + +bool +FakeZcFilterOcc::hasWordPositions(void) const +{ + return false; +} + + +size_t +FakeZcFilterOcc::skipBitSize(void) const +{ + return (_l1SkipSize + _l2SkipSize + _l3SkipSize + _l4SkipSize) * 8; +} + + +size_t +FakeZcFilterOcc::l1SkipBitSize(void) const +{ + return _l1SkipSize * 8; +} + + +size_t +FakeZcFilterOcc::l2SkipBitSize(void) const +{ + return _l2SkipSize * 8; +} + + +size_t +FakeZcFilterOcc::l3SkipBitSize(void) const +{ + return _l3SkipSize * 8; +} + + +size_t +FakeZcFilterOcc::l4SkipBitSize(void) const +{ + return _l4SkipSize * 8; +} + + +int +FakeZcFilterOcc::lowLevelSinglePostingScan(void) const +{ + return 0; +} + + +int +FakeZcFilterOcc::lowLevelSinglePostingScanUnpack(void) const +{ + return 0; +} + + +int +FakeZcFilterOcc:: +lowLevelAndPairPostingScan(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +int +FakeZcFilterOcc:: +lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const +{ + (void) rhs; + return 0; +} + + +class FakeFilterOccZCArrayIterator + : public queryeval::RankedSearchIteratorBase +{ +private: + FakeFilterOccZCArrayIterator(const FakeFilterOccZCArrayIterator &other); + + FakeFilterOccZCArrayIterator& + operator=(const FakeFilterOccZCArrayIterator &other); + +public: + // Pointer to compressed data + const uint8_t *_valI; + unsigned int _residue; + uint32_t _lastDocId; + + typedef search::bitcompression::FeatureDecodeContextBE DecodeContext; + typedef search::bitcompression::FeatureEncodeContextBE EncodeContext; + DecodeContext _decodeContext; + uint32_t _docIdLimit; + + FakeFilterOccZCArrayIterator(const uint64_t *compressed, + int bitOffset, + uint32_t docIdLimit, + const fef::TermFieldMatchDataArray &matchData); + + ~FakeFilterOccZCArrayIterator(void); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +FakeFilterOccZCArrayIterator:: +FakeFilterOccZCArrayIterator(const uint64_t *compressed, + int bitOffset, + uint32_t docIdLimit, + const fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _valI(NULL), + _residue(0), + _lastDocId(0), + _decodeContext(compressed, bitOffset), + _docIdLimit(docIdLimit) +{ + clearUnpacked(); +} + +void +FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + DecodeContext &d = _decodeContext; + typedef EncodeContext EC; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + uint32_t numDocs = static_cast(val64) + 1; + + uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit); + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); + uint32_t docIdsSize = val64 + 1; + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC); + uint32_t l1SkipSize = val64; + uint32_t l2SkipSize = 0; + if (l1SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); + l2SkipSize = val64; + } + uint32_t l3SkipSize = 0; + if (l2SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); + l3SkipSize = val64; + } + uint32_t l4SkipSize = 0; + if (l3SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); + l4SkipSize = val64; + } + // Feature size would be here + UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC); + _lastDocId = _docIdLimit - 1 - val64; + UC64_DECODECONTEXT_STORE(o, d._); + uint64_t bytePad = oPreRead & 7; + if (bytePad > 0) { + length = bytePad; + oVal <<= length; + UC64BE_READBITS_NS(o, EC); + } + UC64_DECODECONTEXT_STORE(o, d._); + assert((d.getBitOffset() & 7) == 0); + const uint8_t *bcompr = d.getByteCompr(); + _valI = bcompr; + bcompr += docIdsSize; + bcompr += l1SkipSize; + bcompr += l2SkipSize; + bcompr += l3SkipSize; + bcompr += l4SkipSize; + d.setByteCompr(bcompr); + uint32_t oDocId; + ZCDECODE(_valI, oDocId = 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("DecodeInit docId=%d\n", + oDocId); +#endif + setDocId(oDocId); + _residue = numDocs; +} + + +FakeFilterOccZCArrayIterator:: +~FakeFilterOccZCArrayIterator(void) +{ +} + + +void +FakeFilterOccZCArrayIterator::doSeek(uint32_t docId) +{ + const uint8_t *oCompr = _valI; + uint32_t oDocId = getDocId(); + + if (getUnpacked()) + clearUnpacked(); + while (oDocId < docId) { + if (--_residue == 0) + goto atbreak; + ZCDECODE(oCompr, oDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Decode docId=%d\n", + docId); +#endif + } + _valI = oCompr; + setDocId(oDocId); + return; + atbreak: + _valI = oCompr; + setAtEnd(); // Mark end of data + return; +} + + +void +FakeFilterOccZCArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +SearchIterator * +FakeZcFilterOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new FakeFilterOccZCArrayIterator(_compressed.first, 0, _docIdLimit, matchData); +} + +template +class FakeZcSkipFilterOcc : public FakeZcFilterOcc +{ +public: + FakeZcSkipFilterOcc(const FakeWord &fw); + + ~FakeZcSkipFilterOcc(void); + + virtual SearchIterator * + createIterator(const TermFieldMatchDataArray &matchData) const; +}; + +static FPFactoryInit +initNoSkip(std::make_pair("ZcNoSkipFilterOcc", + makeFPFactory > >)); + + +static FPFactoryInit +initSkip(std::make_pair("ZcSkipFilterOcc", + makeFPFactory > >)); + +template<> +FakeZcSkipFilterOcc::FakeZcSkipFilterOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, true, ".zcnoskipfilterocc") +{ + setup(fw, false, true); +} + + +template<> +FakeZcSkipFilterOcc::FakeZcSkipFilterOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, true, ".zcskipfilterocc") +{ + setup(fw, false, true); +} + + +template +FakeZcSkipFilterOcc::~FakeZcSkipFilterOcc(void) +{ +} + + +template +class FakeFilterOccZCSkipArrayIterator + : public queryeval::RankedSearchIteratorBase +{ +private: + + FakeFilterOccZCSkipArrayIterator(const FakeFilterOccZCSkipArrayIterator &other); + + FakeFilterOccZCSkipArrayIterator& + operator=(const FakeFilterOccZCSkipArrayIterator &other); + +public: + // Pointer to compressed data + const uint8_t *_valI; + uint32_t _lastDocId; + uint32_t _l1SkipDocId; + uint32_t _l2SkipDocId; + uint32_t _l3SkipDocId; + uint32_t _l4SkipDocId; + const uint8_t *_l1SkipDocIdPos; + const uint8_t *_l1SkipValI; + const uint8_t *_valIBase; + const uint8_t *_l1SkipValIBase; + const uint8_t *_l2SkipDocIdPos; + const uint8_t *_l2SkipValI; + const uint8_t *_l2SkipL1SkipPos; + const uint8_t *_l2SkipValIBase; + const uint8_t *_l3SkipDocIdPos; + const uint8_t *_l3SkipValI; + const uint8_t *_l3SkipL1SkipPos; + const uint8_t *_l3SkipL2SkipPos; + const uint8_t *_l3SkipValIBase; + const uint8_t *_l4SkipDocIdPos; + const uint8_t *_l4SkipValI; + const uint8_t *_l4SkipL1SkipPos; + const uint8_t *_l4SkipL2SkipPos; + const uint8_t *_l4SkipL3SkipPos; + + typedef search::bitcompression::FeatureDecodeContextBE DecodeContext; + typedef search::bitcompression::FeatureEncodeContextBE EncodeContext; + DecodeContext _decodeContext; + uint32_t _docIdLimit; + + FakeFilterOccZCSkipArrayIterator(const uint64_t *compressed, + int bitOffset, + uint32_t docIdLimit, + const TermFieldMatchDataArray &matchData); + + ~FakeFilterOccZCSkipArrayIterator(void); + + void doL4SkipSeek(uint32_t docId); + void doL3SkipSeek(uint32_t docId); + void doL2SkipSeek(uint32_t docId); + void doL1SkipSeek(uint32_t docId); + + void doUnpack(uint32_t docId) override; + void doSeek(uint32_t docId) override; + void initRange(uint32_t begin, uint32_t end) override; + Trinary is_strict() const override { return Trinary::True; } +}; + + +template +FakeFilterOccZCSkipArrayIterator:: +FakeFilterOccZCSkipArrayIterator(const uint64_t *compressed, + int bitOffset, + uint32_t docIdLimit, + const fef::TermFieldMatchDataArray &matchData) + : queryeval::RankedSearchIteratorBase(matchData), + _valI(NULL), + _lastDocId(0), + _l1SkipDocId(0), + _l2SkipDocId(0), + _l3SkipDocId(0), + _l4SkipDocId(0), + _l1SkipDocIdPos(NULL), + _l1SkipValI(NULL), + _valIBase(NULL), + _l1SkipValIBase(NULL), + _l2SkipDocIdPos(NULL), + _l2SkipValI(NULL), + _l2SkipL1SkipPos(NULL), + _l2SkipValIBase(NULL), + _l3SkipDocIdPos(NULL), + _l3SkipValI(NULL), + _l3SkipL1SkipPos(NULL), + _l3SkipL2SkipPos(NULL), + _l3SkipValIBase(NULL), + _l4SkipDocIdPos(NULL), + _l4SkipValI(NULL), + _l4SkipL1SkipPos(NULL), + _l4SkipL2SkipPos(NULL), + _l4SkipL3SkipPos(NULL), + _decodeContext(compressed, bitOffset), + _docIdLimit(docIdLimit) +{ +} + +template +void +FakeFilterOccZCSkipArrayIterator:: +initRange(uint32_t begin, uint32_t end) +{ + queryeval::RankedSearchIteratorBase::initRange(begin, end); + DecodeContext &d = _decodeContext; + typedef EncodeContext EC; + UC64_DECODECONTEXT_CONSTRUCTOR(o, d._); + uint32_t length; + uint64_t val64; + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); + uint32_t numDocs = static_cast(val64) + 1; + + uint32_t docIdK = EC::calcDocIdK(numDocs, _docIdLimit); + + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); + uint32_t docIdsSize = val64 + 1; + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC); + uint32_t l1SkipSize = val64; + uint32_t l2SkipSize = 0; + if (l1SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); + l2SkipSize = val64; + } + uint32_t l3SkipSize = 0; + if (l2SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); + l3SkipSize = val64; + } + uint32_t l4SkipSize = 0; + if (l3SkipSize != 0) { + UC64BE_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); + l4SkipSize = val64; + } + // Feature size would be here + UC64BE_DECODEEXPGOLOMB_NS(o, docIdK, EC); + _lastDocId = _docIdLimit - 1 - val64; + UC64_DECODECONTEXT_STORE(o, d._); + uint64_t bytePad = oPreRead & 7; + if (bytePad > 0) { + length = bytePad; + oVal <<= length; + UC64BE_READBITS_NS(o, EC); + } + UC64_DECODECONTEXT_STORE(o, d._); + assert((d.getBitOffset() & 7) == 0); + const uint8_t *bcompr = d.getByteCompr(); + _valIBase = _valI = bcompr; + _l1SkipDocIdPos = _l2SkipDocIdPos = bcompr; + _l3SkipDocIdPos = _l4SkipDocIdPos = bcompr; + bcompr += docIdsSize; + if (l1SkipSize != 0) { + _l1SkipValIBase = _l1SkipValI = bcompr; + _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr; + bcompr += l1SkipSize; + } else { + _l1SkipValIBase = _l1SkipValI = NULL; + _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL; + } + if (l2SkipSize != 0) { + _l2SkipValIBase = _l2SkipValI = bcompr; + _l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr; + bcompr += l2SkipSize; + } else { + _l2SkipValIBase = _l2SkipValI = NULL; + _l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL; + } + if (l3SkipSize != 0) { + _l3SkipValIBase = _l3SkipValI = bcompr; + _l4SkipL3SkipPos = bcompr; + bcompr += l3SkipSize; + } else { + _l3SkipValIBase = _l3SkipValI = NULL; + _l4SkipL3SkipPos = NULL; + } + if (l4SkipSize != 0) { + _l4SkipValI = bcompr; + bcompr += l4SkipSize; + } else { + _l4SkipValI = NULL; + } + d.setByteCompr(bcompr); + uint32_t oDocId; + ZCDECODE(_valI, oDocId = 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("DecodeInit docId=%d\n", + oDocId); +#endif + setDocId(oDocId); + if (_l1SkipValI != NULL) { + ZCDECODE(_l1SkipValI, _l1SkipDocId = 1 +); + } else + _l1SkipDocId = _lastDocId; +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L1DecodeInit docId=%d\n", + _l1SkipDocId); +#endif + if (_l2SkipValI != NULL) { + ZCDECODE(_l2SkipValI, _l2SkipDocId = 1 +); + } else + _l2SkipDocId = _lastDocId; +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L2DecodeInit docId=%d\n", + _l2SkipDocId); +#endif + if (_l3SkipValI != NULL) { + ZCDECODE(_l3SkipValI, _l3SkipDocId = 1 +); + } else + _l3SkipDocId = _lastDocId; +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L3DecodeInit docId=%d\n", + _l3SkipDocId); +#endif + if (_l4SkipValI != NULL) { + ZCDECODE(_l4SkipValI, _l4SkipDocId = 1 +); + } else + _l4SkipDocId = _lastDocId; +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L4DecodeInit docId=%d\n", + _l4SkipDocId); +#endif + clearUnpacked(); +} + + +template +FakeFilterOccZCSkipArrayIterator:: +~FakeFilterOccZCSkipArrayIterator(void) +{ +} + + +template <> +void +FakeFilterOccZCSkipArrayIterator::doL4SkipSeek(uint32_t docId) +{ + uint32_t lastL4SkipDocId; + + if (__builtin_expect(docId > _lastDocId, false)) { + _l4SkipDocId = _l3SkipDocId = _l2SkipDocId = _l1SkipDocId = search::endDocId; + setAtEnd(); + return; + } + do { + lastL4SkipDocId = _l4SkipDocId; + ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 + ); + ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 + ); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L4Decode docId %d, docIdPos %d," + " l1SkipPos %d, l2SkipPos %d, l3SkipPos %d, nextDocId %d\n", + lastL4SkipDocId, + (int) (_l4SkipDocIdPos - _valIBase), + (int) (_l4SkipL1SkipPos - _l1SkipValIBase), + (int) (_l4SkipL2SkipPos - _l2SkipValIBase), + (int) (_l4SkipL3SkipPos - _l3SkipValIBase), + _l4SkipDocId); +#endif + } while (docId > _l4SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos = + _l4SkipDocIdPos; + _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos; + _l2SkipValI = _l3SkipL2SkipPos = _l4SkipL2SkipPos; + _l3SkipValI = _l4SkipL3SkipPos; + ZCDECODE(_valI, lastL4SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L4Seek, docId %d docIdPos %d" + " L1SkipPos %d L2SkipPos %d L3SkipPos %d, nextDocId %d\n", + lastL4SkipDocId, + (int) (_l4SkipDocIdPos - _valIBase), + (int) (_l4SkipL1SkipPos - _l1SkipValIBase), + (int) (_l4SkipL2SkipPos - _l2SkipValIBase), + (int) (_l4SkipL3SkipPos - _l3SkipValIBase), + _l4SkipDocId); +#endif + setDocId(lastL4SkipDocId); +} + + +template <> +void +FakeFilterOccZCSkipArrayIterator::doL3SkipSeek(uint32_t docId) +{ + uint32_t lastL3SkipDocId; + + if (__builtin_expect(docId > _l4SkipDocId, false)) { + doL4SkipSeek(docId); + if (docId <= _l3SkipDocId) + return; + } + do { + lastL3SkipDocId = _l3SkipDocId; + ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 + ); + ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 + ); + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 + ); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L3Decode docId %d, docIdPos %d," + " l1SkipPos %d, l2SkipPos %d, nextDocId %d\n", + lastL3SkipDocId, + (int) (_l3SkipDocIdPos - _valIBase), + (int) (_l3SkipL1SkipPos - _l1SkipValIBase), + (int) (_l3SkipL2SkipPos - _l2SkipValIBase), + _l3SkipDocId); +#endif + } while (docId > _l3SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos; + _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos; + _l2SkipValI = _l3SkipL2SkipPos; + ZCDECODE(_valI, lastL3SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L3Seek, docId %d docIdPos %d" + " L1SkipPos %d L2SkipPos %d, nextDocId %d\n", + lastL3SkipDocId, + (int) (_l3SkipDocIdPos - _valIBase), + (int) (_l3SkipL1SkipPos - _l1SkipValIBase), + (int) (_l3SkipL2SkipPos - _l2SkipValIBase), + _l3SkipDocId); +#endif + setDocId(lastL3SkipDocId); +} + + +template <> +void +FakeFilterOccZCSkipArrayIterator::doL2SkipSeek(uint32_t docId) +{ + uint32_t lastL2SkipDocId; + + if (__builtin_expect(docId > _l3SkipDocId, false)) { + doL3SkipSeek(docId); + if (docId <= _l2SkipDocId) + return; + } + do { + lastL2SkipDocId = _l2SkipDocId; + ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 + ); + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 + ); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L2Decode docId %d, docIdPos %d, l1SkipPos %d, nextDocId %d\n", + lastL2SkipDocId, + (int) (_l2SkipDocIdPos - _valIBase), + (int) (_l2SkipL1SkipPos - _l1SkipValIBase), + _l2SkipDocId); +#endif + } while (docId > _l2SkipDocId); + _valI = _l1SkipDocIdPos = _l2SkipDocIdPos; + _l1SkipDocId = lastL2SkipDocId; + _l1SkipValI = _l2SkipL1SkipPos; + ZCDECODE(_valI, lastL2SkipDocId += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n", + lastL2SkipDocId, + (int) (_l2SkipDocIdPos - _valIBase), + (int) (_l2SkipL1SkipPos - _l1SkipValIBase), + _l2SkipDocId); +#endif + setDocId(lastL2SkipDocId); +} + + +template <> +void +FakeFilterOccZCSkipArrayIterator::doL1SkipSeek(uint32_t docId) +{ + (void) docId; +} + + +template <> +void +FakeFilterOccZCSkipArrayIterator::doL1SkipSeek(uint32_t docId) +{ + uint32_t lastL1SkipDocId; + if (__builtin_expect(docId > _l2SkipDocId, false)) { + doL2SkipSeek(docId); + if (docId <= _l1SkipDocId) + return; + } + do { + lastL1SkipDocId = _l1SkipDocId; + ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +); + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L1Decode docId %d, docIdPos %d, L1SkipPos %d, nextDocId %d\n", + lastL1SkipDocId, + (int) (_l1SkipDocIdPos - _valIBase), + (int) (_l1SkipValI - _l1SkipValIBase), + _l1SkipDocId); +#endif + } while (docId > _l1SkipDocId); + _valI = _l1SkipDocIdPos; + ZCDECODE(_valI, lastL1SkipDocId += 1 +); + setDocId(lastL1SkipDocId); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n", + lastL1SkipDocId, + (int) (_l1SkipDocIdPos - _valIBase), + _l1SkipDocId); +#endif +} + + +template +void +FakeFilterOccZCSkipArrayIterator::doSeek(uint32_t docId) +{ + if (getUnpacked()) + clearUnpacked(); + if (doSkip && docId > _l1SkipDocId) { + doL1SkipSeek(docId); + } + uint32_t oDocId = getDocId(); + if (doSkip) { +#if DEBUG_ZCFILTEROCC_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(docId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(docId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(docId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); + assert(docId <= _l4SkipDocId); +#endif + } + const uint8_t *oCompr = _valI; + while (__builtin_expect(oDocId < docId, true)) { + if (!doSkip) { + if (__builtin_expect(oDocId >= _lastDocId, false)) { +#if DEBUG_ZCFILTEROCC_ASSERT + assert(_l1SkipDocId == _lastDocId); + assert(_l2SkipDocId == _lastDocId); + assert(_l3SkipDocId == _lastDocId); + assert(_l4SkipDocId == _lastDocId); +#endif + oDocId = _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = + _l4SkipDocId = search::endDocId; + break; + } + } + if (doSkip) { +#if DEBUG_ZCFILTEROCC_ASSERT + assert(oDocId <= _l1SkipDocId); + assert(oDocId <= _l2SkipDocId); + assert(oDocId <= _l3SkipDocId); + assert(oDocId <= _l4SkipDocId); +#endif + } else if (__builtin_expect(oDocId >= _l1SkipDocId, false)) { + // Validate L1 Skip information + assert(oDocId == _l1SkipDocId); + ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +); + assert(oCompr == _l1SkipDocIdPos); + if (__builtin_expect(oDocId >= _l2SkipDocId, false)) { + // Validate L2 Skip information + assert(oDocId == _l2SkipDocId); + ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +); + ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 +); + assert(oCompr = _l2SkipDocIdPos); + assert(_l1SkipValI == _l2SkipL1SkipPos); + if (__builtin_expect(oDocId >= _l3SkipDocId, false)) { + // Validate L3 Skip information + assert(oDocId == _l3SkipDocId); + ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 +); + ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 +); + assert(oCompr = _l3SkipDocIdPos); + assert(_l1SkipValI == _l3SkipL1SkipPos); + assert(_l2SkipValI == _l3SkipL2SkipPos); + if (__builtin_expect(oDocId >= _l4SkipDocId, false)) { + // Validate L4 Skip information + assert(oDocId == _l4SkipDocId); + ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 +); + ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 +); + assert(oCompr = _l4SkipDocIdPos); + assert(_l1SkipValI == _l4SkipL1SkipPos); + assert(_l2SkipValI == _l4SkipL2SkipPos); + assert(_l3SkipValI == _l4SkipL3SkipPos); + ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 +); + assert(_l4SkipDocId <= _lastDocId); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L4DecodeV docId=%d docIdPos=%d" + " L1SkipPos=%d L2SkipPos %d L3SkipPos %d\n", + _l4SkipDocId, + (int) (_l4SkipDocIdPos - _valIBase), + (int) (_l4SkipL1SkipPos - _l1SkipValIBase), + (int) (_l4SkipL2SkipPos - _l2SkipValIBase), + (int) (_l4SkipL3SkipPos - _l3SkipValIBase)); +#endif + } + ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +); + assert(_l3SkipDocId <= _lastDocId); + assert(_l3SkipDocId <= _l4SkipDocId); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L3DecodeV docId=%d docIdPos=%d" + " L1SkipPos=%d L2SkipPos %d\n", + _l3SkipDocId, + (int) (_l3SkipDocIdPos - _valIBase), + (int) (_l3SkipL1SkipPos - _l1SkipValIBase), + (int) (_l3SkipL2SkipPos - _l2SkipValIBase)); +#endif + } + ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +); + assert(_l2SkipDocId <= _lastDocId); + assert(_l2SkipDocId <= _l4SkipDocId); + assert(_l2SkipDocId <= _l3SkipDocId); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L2DecodeV docId=%d docIdPos=%d L1SkipPos=%d\n", + _l2SkipDocId, + (int) (_l2SkipDocIdPos - _valIBase), + (int) (_l2SkipL1SkipPos - _l1SkipValIBase)); +#endif + } + ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +); + assert(_l1SkipDocId <= _lastDocId); + assert(_l1SkipDocId <= _l4SkipDocId); + assert(_l1SkipDocId <= _l3SkipDocId); + assert(_l1SkipDocId <= _l2SkipDocId); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("L1DecodeV docId=%d, docIdPos=%d\n", + _l1SkipDocId, + (int) (_l1SkipDocIdPos - _valIBase)); +#endif + } + ZCDECODE(oCompr, oDocId += 1 +); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Decode docId=%d\n", + oDocId); +#endif + } + _valI = oCompr; + setDocId(oDocId); + return; +} + + +template +void +FakeFilterOccZCSkipArrayIterator::doUnpack(uint32_t docId) +{ + if (_matchData.size() != 1 || getUnpacked()) { + return; + } + assert(docId == getDocId()); + _matchData[0]->reset(docId); + setUnpacked(); +} + + +template +SearchIterator * +FakeZcSkipFilterOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new FakeFilterOccZCSkipArrayIterator(_compressed.first, + 0, + _docIdLimit, + matchData); +} + + +template +class FakeEGCompr64PosOcc : public FakeZcFilterOcc +{ +public: + FakeEGCompr64PosOcc(const FakeWord &fw); + + ~FakeEGCompr64PosOcc(void); + + void + setup(const FakeWord &fw); + + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + virtual SearchIterator * + createIterator(const TermFieldMatchDataArray &matchData) const; +}; + + +template +FakeEGCompr64PosOcc::FakeEGCompr64PosOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, bigEndian, + bigEndian ? ".zcposoccbe" : ".zcposoccle") +{ + setup(fw); +} + + +template +FakeEGCompr64PosOcc::~FakeEGCompr64PosOcc(void) +{ +} + + +template +void +FakeEGCompr64PosOcc::setup(const FakeWord &fw) +{ + uint32_t lastDocId = 0u; + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + typedef FW::DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + DWPFL::const_iterator p(fw._wordPosFeatures.begin()); + DWPFL::const_iterator pe(fw._wordPosFeatures.end()); + DocIdAndPosOccFeatures features; + EGPosOccEncodeContext e(&_fieldsParams); + ComprFileWriteContext ectx(e); + e.setWriteContext(&ectx); + ectx.allocComprBuf(64, 1); + e.afterWrite(ectx, 0, 0); + + _hitDocs = fw._postings.size(); + _docIdLimit = fw._docIdLimit; + if (_hitDocs > 0) + _lastDocId = fw._postings.back()._docId; + else + _lastDocId = 0u; + e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + + uint32_t docIdK = e.calcDocIdK(_hitDocs, _docIdLimit); + + while (d != de) { + e.encodeExpGolomb(d->_docId - lastDocId - 1, docIdK); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Encode docId=%d (+%u + 1)\n", + d->_docId, d->_docId - lastDocId - 1); +#endif + fw.setupFeatures(*d, &*p, features); + p += d->_positions; + e.writeFeatures(features); + lastDocId = d->_docId; + ++d; + } + assert(p == pe); + + _compressedBits = e.getWriteOffset(); + + // First pad to 64 bits. + uint32_t pad = (64 - e.getWriteOffset()) & 63; + while (pad > 0) { + uint32_t now = std::min(32u, pad); + e.writeBits(0, now); + e.writeComprBufferIfNeeded(); + pad -= now; + } + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + for (unsigned int i = 0; i < 4; i++) { + e.writeBits(0, 32); + e.writeComprBufferIfNeeded(); + } + e.writeComprBufferIfNeeded(); + e.flush(); + e.writeComprBuffer(); + + std::pair ectxData = + ectx.grabComprBuffer(_compressedMalloc); + _compressed = std::make_pair(static_cast(ectxData.first), + ectxData.second); +} + + +template +size_t +FakeEGCompr64PosOcc::bitSize(void) const +{ + return _compressedBits; +} + + +template +bool +FakeEGCompr64PosOcc::hasWordPositions(void) const +{ + return true; +} + + +template +SearchIterator * +FakeEGCompr64PosOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new ZcRareWordPosOccIterator(Position(_compressed.first, 0), + _compressedBits, _docIdLimit, &_fieldsParams, matchData); +} + + +template +class FakeEG2Compr64PosOcc : public FakeZcFilterOcc +{ +public: + FakeEG2Compr64PosOcc(const FakeWord &fw); + + ~FakeEG2Compr64PosOcc(void); + + void + setup(const FakeWord &fw); + + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + virtual SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + + +template +FakeEG2Compr64PosOcc::FakeEG2Compr64PosOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, bigEndian, + bigEndian ? ".zc2posoccbe" : ".zc2posoccle") +{ + setup(fw); +} + + +template +FakeEG2Compr64PosOcc::~FakeEG2Compr64PosOcc(void) +{ +} + + +template +void +FakeEG2Compr64PosOcc::setup(const FakeWord &fw) +{ + uint32_t lastDocId = 0u; + + typedef FakeWord FW; + typedef FW::DocWordFeatureList DWFL; + typedef FW::DocWordPosFeatureList DWPFL; + + DWFL::const_iterator d(fw._postings.begin()); + DWFL::const_iterator de(fw._postings.end()); + DWPFL::const_iterator p(fw._wordPosFeatures.begin()); + DWPFL::const_iterator pe(fw._wordPosFeatures.end()); + DocIdAndPosOccFeatures features; + EG2PosOccEncodeContext e(&_fieldsParams); + ComprFileWriteContext ectx(e); + e.setWriteContext(&ectx); + ectx.allocComprBuf(64, 1); + e.afterWrite(ectx, 0, 0); + + _hitDocs = fw._postings.size(); + _docIdLimit = fw._docIdLimit; + if (_hitDocs > 0) + _lastDocId = fw._postings.back()._docId; + else + _lastDocId = 0u; + e.encodeExpGolomb(_hitDocs - 1, K_VALUE_ZCPOSTING_NUMDOCS); + + while (d != de) { + e.encodeExpGolomb(d->_docId - lastDocId - 1, + K_VALUE_ZCPOSTING_DELTA_DOCID); +#if DEBUG_ZCFILTEROCC_PRINTF + printf("Encode docId=%d (+%u + 1)\n", + d->_docId, d->_docId - lastDocId - 1); +#endif + fw.setupFeatures(*d, &*p, features); + p += d->_positions; + e.writeFeatures(features); + lastDocId = d->_docId; + ++d; + } + assert(p == pe); + + _compressedBits = e.getWriteOffset(); + + // First pad to 64 bits. + uint32_t pad = (64 - e.getWriteOffset()) & 63; + while (pad > 0) { + uint32_t now = std::min(32u, pad); + e.writeBits(0, now); + e.writeComprBufferIfNeeded(); + pad -= now; + } + + // Then write 128 more bits. This allows for 64-bit decoding + // with a readbits that always leaves a nonzero preRead + for (unsigned int i = 0; i < 4; i++) { + e.writeBits(0, 32); + e.writeComprBufferIfNeeded(); + } + e.writeComprBufferIfNeeded(); + e.flush(); + e.writeComprBuffer(); + + std::pair ectxData = + ectx.grabComprBuffer(_compressedMalloc); + _compressed = std::make_pair(static_cast(ectxData.first), + ectxData.second); +} + + +template +size_t +FakeEG2Compr64PosOcc::bitSize(void) const +{ + return _compressedBits; +} + + +template +bool +FakeEG2Compr64PosOcc::hasWordPositions(void) const +{ + return true; +} + + +template +SearchIterator * +FakeEG2Compr64PosOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new Zc4RareWordPosOccIterator(Position(_compressed.first, 0), + _compressedBits, _docIdLimit, &_fieldsParams, matchData); +} + + +template +class FakeZcSkipPosOcc : public FakeZcFilterOcc +{ + search::index::PostingListCounts _counts; +public: + FakeZcSkipPosOcc(const FakeWord &fw); + + ~FakeZcSkipPosOcc(void); + + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + virtual SearchIterator * + createIterator(const TermFieldMatchDataArray &matchData) const; +}; + + +template +FakeZcSkipPosOcc::FakeZcSkipPosOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, bigEndian, + bigEndian ? ".zcskipposoccbe" : ".zcskipposoccle") +{ + setup(fw, true, true); + _counts._bitLength = _compressedBits; +} + + +template +FakeZcSkipPosOcc::~FakeZcSkipPosOcc(void) +{ +} + + +template +size_t +FakeZcSkipPosOcc::bitSize(void) const +{ + return _compressedBits - + _l1SkipSize - _l2SkipSize - _l3SkipSize - _l4SkipSize; +} + + +template +bool +FakeZcSkipPosOcc::hasWordPositions(void) const +{ + return true; +} + + +template +SearchIterator * +FakeZcSkipPosOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new ZcPosOccIterator(Position(_compressed.first, 0), _compressedBits, _docIdLimit, + static_cast(-1), + _counts, + &_fieldsParams, + matchData); +} + + +template +class FakeZc2SkipPosOcc : public FakeZcFilterOcc +{ + search::index::PostingListCounts _counts; +public: + FakeZc2SkipPosOcc(const FakeWord &fw); + + ~FakeZc2SkipPosOcc(void); + + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + virtual SearchIterator * + createIterator(const TermFieldMatchDataArray &matchData) const; +}; + + +template +FakeZc2SkipPosOcc::FakeZc2SkipPosOcc(const FakeWord &fw) + : FakeZcFilterOcc(fw, bigEndian, + bigEndian ? ".zc2skipposoccbe" : ".zc2skipposoccle") +{ + setup(fw, true, false); + _counts._bitLength = _compressedBits; +} + + +template +FakeZc2SkipPosOcc::~FakeZc2SkipPosOcc(void) +{ +} + + +template +size_t +FakeZc2SkipPosOcc::bitSize(void) const +{ + return _compressedBits - + _l1SkipSize - _l2SkipSize - _l3SkipSize - _l4SkipSize; +} + + +template +bool +FakeZc2SkipPosOcc::hasWordPositions(void) const +{ + return true; +} + + +template +SearchIterator * +FakeZc2SkipPosOcc:: +createIterator(const TermFieldMatchDataArray &matchData) const +{ + return new Zc4PosOccIterator(Position(_compressed.first, 0), _compressedBits, _docIdLimit, + static_cast(-1), _counts, &_fieldsParams, matchData); +} + + +static FPFactoryInit +initPosbe(std::make_pair("EGCompr64PosOccBE", + makeFPFactory > >)); + +static FPFactoryInit +initPosle(std::make_pair("EGCompr64PosOccLE", + makeFPFactory > >)); + + +static FPFactoryInit +initPos0be(std::make_pair("EG2Compr64PosOccBE", + makeFPFactory > >)); + + +static FPFactoryInit +initPos0le(std::make_pair("EG2Compr64PosOccLE", + makeFPFactory > >)); + + +static FPFactoryInit +initSkipPosbe(std::make_pair("ZcSkipPosOccBE", + makeFPFactory > >)); + + +static FPFactoryInit +initSkipPosle(std::make_pair("ZcSkipPosOccLE", + makeFPFactory > >)); + + +static FPFactoryInit +initSkipPos0be(std::make_pair("Zc2SkipPosOccBE", + makeFPFactory > >)); + + +static FPFactoryInit +initSkipPos0le(std::make_pair("Zc2SkipPosOccLE", + makeFPFactory > >)); + + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h new file mode 100644 index 00000000000..0e1bcba7680 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + + +#include "fakeword.h" +#include "fakeposting.h" +#include +#include + +namespace search +{ + +namespace fakedata +{ + +/* + * YST style compression of docid list. + */ +class FakeZcFilterOcc : public FakePosting +{ +protected: + size_t _docIdsSize; + size_t _l1SkipSize; + size_t _l2SkipSize; + size_t _l3SkipSize; + size_t _l4SkipSize; + unsigned int _docIdLimit; + unsigned int _hitDocs; + uint32_t _lastDocId; + + uint64_t _compressedBits; + std::pair _compressed; + void *_compressedMalloc; + uint64_t _featuresSize; + const search::bitcompression::PosOccFieldsParams &_fieldsParams; + bool _bigEndian; +protected: + void + setup(const FakeWord &fw, bool doFeatures, bool dynamicK); + + template + void + setupT(const FakeWord &fw, bool doFeatures, bool dynamicK); + +public: + FakeZcFilterOcc(const FakeWord &fw); + + FakeZcFilterOcc(const FakeWord &fw, + bool bigEndian, + const char *nameSuffix); + + ~FakeZcFilterOcc(void); + + static void + forceLink(void); + + /* + * Size of posting list, in bits. + */ + size_t + bitSize(void) const; + + virtual bool + hasWordPositions(void) const; + + /* + * Size of posting skip list, in bits. + */ + size_t + skipBitSize(void) const; + + size_t + l1SkipBitSize(void) const; + + size_t + l2SkipBitSize(void) const; + + size_t + l3SkipBitSize(void) const; + + size_t + l4SkipBitSize(void) const; + + /* + * Single posting list performance, without feature unpack. + */ + virtual int + lowLevelSinglePostingScan(void) const; + + /* + * Single posting list performance, with feature unpack. + */ + virtual int + lowLevelSinglePostingScanUnpack(void) const; + + /* + * Two posting lists performance (same format) without feature unpack. + */ + virtual int + lowLevelAndPairPostingScan(const FakePosting &rhs) const; + + /* + * Two posting lists performance (same format) with feature unpack. + */ + virtual int + lowLevelAndPairPostingScanUnpack(const FakePosting &rhs) const; + + + /* + * Iterator factory, for current query evaluation framework. + */ + virtual search::queryeval::SearchIterator * + createIterator(const fef::TermFieldMatchDataArray &matchData) const; +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp new file mode 100644 index 00000000000..94ccd4cd891 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.cpp @@ -0,0 +1,120 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".fpfactory"); +#include "fakeegcompr64filterocc.h" +#include "fakefilterocc.h" +#include "fakezcbfilterocc.h" +#include "fakezcfilterocc.h" +#include "fakememtreeocc.h" +#include "fpfactory.h" +#include "fakewordset.h" + +namespace search +{ + +namespace fakedata +{ + +using index::Schema; + +FPFactory::~FPFactory(void) +{ +} + +void +FPFactory::setup(const FakeWordSet &fws) +{ + std::vector v; + + for (uint32_t wc = 0; wc < fws._words.size(); ++wc) { + std::vector::const_iterator fwi(fws._words[wc].begin()); + std::vector::const_iterator fwe(fws._words[wc].end()); + while (fwi != fwe) { + v.push_back(*fwi); + ++fwi; + } + } + setup(v); +} + + +void +FPFactory::setup(const std::vector &fws) +{ + (void) fws; +} + + +typedef std::map +FPFactoryMap; + +static FPFactoryMap *fpFactoryMap = NULL; + +/* + * Posting list factory glue. + */ + +FPFactory * +getFPFactory(const std::string &name, const Schema &schema) +{ + if (fpFactoryMap == NULL) + return NULL; + + FPFactoryMap::const_iterator i(fpFactoryMap->find(name)); + + if (i != fpFactoryMap->end()) + return i->second(schema); + else + return NULL; +} + + +std::vector +getPostingTypes(void) +{ + std::vector res; + + if (fpFactoryMap != NULL) + for (FPFactoryMap::const_iterator i(fpFactoryMap->begin()); + i != fpFactoryMap->end(); + ++i) + res.push_back(i->first); + return res; +} + + +FPFactoryInit::FPFactoryInit(const FPFactoryMapEntry &fpFactoryMapEntry) + : _key(fpFactoryMapEntry.first) +{ + if (fpFactoryMap == NULL) + fpFactoryMap = new FPFactoryMap; + fpFactoryMap->insert(fpFactoryMapEntry); +} + +FPFactoryInit::~FPFactoryInit(void) +{ + assert(fpFactoryMap != NULL); + size_t eraseRes = fpFactoryMap->erase(_key); + assert(eraseRes == 1); + (void) eraseRes; + if (fpFactoryMap->empty()) { + delete fpFactoryMap; + fpFactoryMap = NULL; + } +} + +void +FPFactoryInit::forceLink(void) +{ + FakeEGCompr64FilterOcc::forceLink(); + FakeFilterOcc::forceLink(); + FakeZcbFilterOcc::forceLink(); + FakeZcFilterOcc::forceLink(); + FakeMemTreeOcc::forceLink(); +}; + +} // namespace fakedata + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h new file mode 100644 index 00000000000..fe09e653e26 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/fakedata/fpfactory.h @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include "fakeposting.h" + +namespace search +{ + +namespace fakedata +{ + +class FakeWord; +class FakeWordSet; + +class FPFactory +{ +public: + virtual + ~FPFactory(void); + + virtual FakePosting::SP + make(const FakeWord &fw) = 0; + + virtual void + setup(const FakeWordSet &fws); + + virtual void + setup(const std::vector &fws); +}; + +template +class FPFactoryT : public FPFactory +{ +public: + FPFactoryT(const index::Schema &schema) + : FPFactory() + { + (void) schema; + } + + virtual FakePosting::SP + make(const FakeWord &fw) + { + return FakePosting::SP(new P(fw)); + } +}; + +typedef FPFactory *(FPFactoryMaker)(const index::Schema &schema); + +typedef std::pair +FPFactoryMapEntry; + +template +static FPFactory * +makeFPFactory(const index::Schema &schema) +{ + return new F(schema); +} + +FPFactory * +getFPFactory(const std::string &name, const index::Schema &schema); + +std::vector +getPostingTypes(void); + +class FPFactoryInit +{ + std::string _key; +public: + FPFactoryInit(const FPFactoryMapEntry &fpFactoryMapEntry); + + ~FPFactoryInit(void); + + static void + forceLink(void); +}; + +} // namespace fakedata + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/test/initrange.cpp b/searchlib/src/vespa/searchlib/test/initrange.cpp new file mode 100644 index 00000000000..30508915d3e --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/initrange.cpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include "initrange.h" +#include +#include +#include + +namespace search { +namespace test { + +using namespace search::queryeval; +using std::make_unique; + +class DocIdIterator : public SearchIterator +{ +public: + DocIdIterator(const InitRangeVerifier::DocIds & docIds, bool strict) : + _strict(strict), + _currIndex(0), + _docIds(docIds) + { } + + void initRange(uint32_t beginId, uint32_t endId) override { + SearchIterator::initRange(beginId, endId); + _currIndex = 0; + if (_strict) { + doSeek(beginId); + } + } + + void doSeek(uint32_t docId) override { + while ((_currIndex < _docIds.size()) && (_docIds[_currIndex] < docId)) { + _currIndex++; + } + if ((_currIndex < _docIds.size()) && (_docIds[_currIndex] < getEndId())) { + if (_docIds[_currIndex] == docId || _strict) { + setDocId(_docIds[_currIndex]); + } + } else { + setAtEnd(); + } + } + + void doUnpack(uint32_t docid) { (void) docid; } + + vespalib::Trinary is_strict() const override { + return _strict ? vespalib::Trinary::True : vespalib::Trinary::False; + } + +private: + const bool _strict; + uint32_t _currIndex; + const InitRangeVerifier::DocIds _docIds; +}; + +InitRangeVerifier::InitRangeVerifier() : + _trueTfmd(), + _docIds() +{ + // (0),1 and 10,11 and 20,21 .... 200,201 etc are hits + // 0 is of course invalid. + for (size_t i(0); (i*10+1) < getDocIdLimit(); i++) { + if (i > 0) { + _docIds.push_back(i * 10); + } + _docIds.push_back(i*10 + 1); + } +} + +InitRangeVerifier::DocIds +InitRangeVerifier::invert(const DocIds & docIds, uint32_t docIdlimit) +{ + DocIds inverted; + inverted.reserve(docIdlimit); + for (size_t i(1), next(0); i < docIdlimit; i++) { + if (next < docIds.size()) { + if (i >= docIds[next]) { + if (i == docIds[next++]) { + continue; + } + } + } + inverted.push_back(i); + } + return inverted; +} + +SearchIterator::UP +InitRangeVerifier::createIterator(const DocIds &docIds, bool strict) const +{ + return make_unique(docIds, strict); +} + +SearchIterator::UP +InitRangeVerifier::createEmptyIterator() const +{ + return make_unique(); +} + +SearchIterator::UP +InitRangeVerifier::createFullIterator() const +{ + return make_unique(_trueTfmd); +} + +void +InitRangeVerifier::verify(SearchIterator * iterator) const +{ + SearchIterator::UP up(iterator); + verify(*up); +} + +void +InitRangeVerifier::verify(SearchIterator & iterator) const +{ + ASSERT_TRUE(iterator.is_strict() != vespalib::Trinary::Undefined); + if (iterator.is_strict() == vespalib::Trinary::True) { + verify(iterator, true); + } + verify(iterator, false); +} + +void +InitRangeVerifier::verify(SearchIterator & iterator, bool strict) const +{ + verify(iterator, Ranges({{1, 202}}), strict); + verify(iterator, Ranges({{1, 202}}), strict); + for (uint32_t rangeWidth : { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 100, 202 }) { + Ranges ranges; + for (uint32_t sum(1); sum < getDocIdLimit(); sum += rangeWidth) { + ranges.emplace_back(sum, std::min(sum+rangeWidth, getDocIdLimit())); + } + verify(iterator, ranges, strict); + std::reverse(ranges.begin(), ranges.end()); + verify(iterator, ranges, strict); + } +} + +void +InitRangeVerifier::verify(SearchIterator & iterator, const Ranges & ranges, bool strict) const +{ + DocIds result = search(iterator, ranges, strict); + ASSERT_EQUAL(_docIds.size(), result.size()); + for (size_t i(0); i < _docIds.size(); i++) { + EXPECT_EQUAL(_docIds[i], result[i]); + } +} + +InitRangeVerifier::DocIds +InitRangeVerifier::search(SearchIterator & it, const Ranges & ranges, bool strict) +{ + DocIds result; + for (Range range: ranges) { + DocIds part = strict ? searchStrict(it, range) : searchRelaxed(it, range); + result.insert(result.end(), part.begin(), part.end()); + } + std::sort(result.begin(), result.end()); + return result; +} + +InitRangeVerifier::DocIds +InitRangeVerifier::searchRelaxed(SearchIterator & it, Range range) +{ + DocIds result; + it.initRange(range.first, range.second); + for (uint32_t docid = range.first; docid < range.second; ++docid) { + if (it.seek(docid)) { + result.emplace_back(docid); + } + } + return result; +} + +InitRangeVerifier::DocIds +InitRangeVerifier::searchStrict(SearchIterator & it, Range range) +{ + DocIds result; + it.initRange(range.first, range.second); + for (uint32_t docId = it.seekFirst(range.first); docId < range.second; docId = it.seekNext(docId + 1)) { + result.push_back(docId); + } + return result; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/test/initrange.h b/searchlib/src/vespa/searchlib/test/initrange.h new file mode 100644 index 00000000000..eb04977d605 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/initrange.h @@ -0,0 +1,38 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { +namespace test { + +class InitRangeVerifier { +public: + typedef queryeval::SearchIterator SearchIterator; + typedef std::vector DocIds; + typedef std::pair Range; + typedef std::vector Ranges; + + static DocIds invert(const DocIds & docIds, uint32_t docIdlimit); + SearchIterator::UP createIterator(const DocIds &docIds, bool strict) const; + SearchIterator::UP createEmptyIterator() const; + SearchIterator::UP createFullIterator() const; + InitRangeVerifier(); + const DocIds & getExpectedDocIds() const { return _docIds; } + uint32_t getDocIdLimit() const { return 207; } + void verify(SearchIterator & iterator) const; + /// Convenience that takes ownership of the pointer. + void verify(SearchIterator * iterator) const; +private: + void verify(SearchIterator & iterator, bool strict) const; + void verify(SearchIterator & iterator, const Ranges & ranges, bool strict) const; + static DocIds search(SearchIterator & iterator, const Ranges & ranges, bool strict); + static DocIds searchRelaxed(SearchIterator & search, Range range); + static DocIds searchStrict(SearchIterator & search, Range range); + mutable search::fef::TermFieldMatchData _trueTfmd; + DocIds _docIds; +}; + +} +} diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt new file mode 100644 index 00000000000..75453abc693 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/memoryindex/CMakeLists.txt @@ -0,0 +1,5 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_searchlib_test_memoryindex INTERFACE + SOURCES + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h b/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h new file mode 100644 index 00000000000..5f2ec0c033a --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/memoryindex/ordereddocumentinserter.h @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +namespace memoryindex +{ + +namespace test +{ + +class OrderedDocumentInserter : public IOrderedDocumentInserter +{ + std::stringstream _ss; + bool _first; + bool _verbose; + uint32_t _fieldId; + + void + addComma() + { + if (!_first) { + _ss << ","; + } else { + _first = false; + } + } +public: + OrderedDocumentInserter() + : _ss(), + _first(true), + _verbose(false), + _fieldId(0) + { + } + + virtual void + setNextWord(const vespalib::stringref word) override + { + addComma(); + _ss << "w=" << word; + } + + void + setFieldId(uint32_t fieldId) + { + _fieldId = fieldId; + } + + virtual void + add(uint32_t docId, + const index::DocIdAndFeatures &features) override + { + (void) features; + addComma(); + _ss << "a=" << docId; + if (_verbose) { + _ss << "("; + auto wpi = features._wordPositions.begin(); + bool firstElement = true; + for (auto &el : features._elements) { + if (!firstElement) { + _ss << ","; + } + firstElement = false; + _ss << "e=" << el.getElementId() << ",w=" << + el.getWeight() << ",l=" << + el.getElementLen() << "["; + bool firstWordPos = true; + for (uint32_t i = 0; i < el.getNumOccs(); ++i) { + if (!firstWordPos) { + _ss << ","; + } + firstWordPos = false; + _ss << wpi->getWordPos(); + } + _ss << "]"; + } + _ss << ")"; + } + } + + virtual void + remove(uint32_t docId) override + { + addComma(); + _ss << "r=" << docId; + } + + virtual void flush() override { } + virtual void rewind() override { + addComma(); + _ss << "f=" << _fieldId; + } + + std::string + toStr(void) const + { + return _ss.str(); + } + + void + reset() + { + _ss.str(""); + _first = true; + _verbose = false; + } + + void setVerbose() { _verbose = true; } +}; + +} // namespace test +} // namespace memoryindex +} // namespace search diff --git a/searchlib/src/vespa/searchlib/test/statefile.cpp b/searchlib/src/vespa/searchlib/test/statefile.cpp new file mode 100644 index 00000000000..005145c5cb3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/statefile.cpp @@ -0,0 +1,48 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include "statefile.h" + +namespace search +{ + +namespace test +{ + +namespace statefile +{ + +vespalib::string +readState(StateFile &sf) +{ + std::vector buf; + sf.readState(buf); + return vespalib::string(buf.begin(), buf.end()); +} + + +std::vector +readHistory(const char *name) +{ + std::vector res; + std::ifstream is(name); + std::string line; + while (!is.eof()) { + std::getline(is, line); + if (is.eof() && line.empty()) { + break; + } + res.push_back(line + "\n"); + } + return res; +} + + +} + +} + +} diff --git a/searchlib/src/vespa/searchlib/test/statefile.h b/searchlib/src/vespa/searchlib/test/statefile.h new file mode 100644 index 00000000000..a5aee2ff2a3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/statefile.h @@ -0,0 +1,22 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +namespace search +{ + +class StateFile; + +namespace test +{ + +namespace statefile +{ + +vespalib::string readState(StateFile &sf); +std::vector readHistory(const char *name); + +} + +} + +} diff --git a/searchlib/src/vespa/searchlib/test/statestring.cpp b/searchlib/src/vespa/searchlib/test/statestring.cpp new file mode 100644 index 00000000000..e1c8df43c4b --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/statestring.cpp @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include "statestring.h" + +namespace search +{ + +namespace test +{ + +namespace statestring +{ + +bool +testStartPos(vespalib::string &s, size_t pos) +{ + return (pos < s.size() && (pos == 0 || s[pos - 1] == ' ')); +} + + +size_t +findStartPos(vespalib::string &s, const vespalib::string &key) +{ + size_t pos = 0; + while (pos < s.size()) { + pos = s.find(key, pos); + if (testStartPos(s, pos)) { + break; + } + ++pos; + } + return pos; +} + + +size_t +scanBreakPos(vespalib::string &s, size_t pos) +{ + while (pos < s.size() && s[pos] != ' ' && s[pos] != '\n') { + ++pos; + } + return pos; +} + + +void +normalizeTimestamp(vespalib::string &s) +{ + size_t pos = findStartPos(s, "ts="); + if (pos < s.size()) { + size_t npos = scanBreakPos(s, pos + 3); + s.replace(pos, npos - pos, "ts=0.0"); + return; + } +} + + +void +normalizeAddr(vespalib::string &s, void *addr) +{ + size_t pos = findStartPos(s, "addr="); + if (pos < s.size()) { + size_t npos = scanBreakPos(s, pos + 5); + std::ostringstream os; + os << "addr=0x"; + os.width(16); + os.fill('0'); + os << std::hex << reinterpret_cast(addr); + s.replace(pos, npos - pos, os.str()); + return; + } +} + + +void +normalizeTimestamps(std::vector &sv) +{ + for (auto &s : sv) { + normalizeTimestamp(s); + } +} + + +void +normalizeAddrs(std::vector &sv, void *addr) +{ + for (auto &s : sv) { + normalizeAddr(s, addr); + } +} + + +} + +} + +} diff --git a/searchlib/src/vespa/searchlib/test/statestring.h b/searchlib/src/vespa/searchlib/test/statestring.h new file mode 100644 index 00000000000..ad5d70186dc --- /dev/null +++ b/searchlib/src/vespa/searchlib/test/statestring.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include + +namespace search +{ + +namespace test +{ + +namespace statestring +{ + +void normalizeTimestamp(vespalib::string &s); +void normalizeAddr(vespalib::string &s, void *addr); +void normalizeTimestamps(std::vector &sv); +void normalizeAddrs(std::vector &sv, void *addr); + +} + +} + +} + + diff --git a/searchlib/src/vespa/searchlib/transactionlog/.gitignore b/searchlib/src/vespa/searchlib/transactionlog/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt b/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt new file mode 100644 index 00000000000..6b01f773124 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_translog OBJECT + SOURCES + common.cpp + domain.cpp + domainpart.cpp + nosyncproxy.cpp + session.cpp + trans_log_server_explorer.cpp + translogclient.cpp + translogserver.cpp + translogserverapp.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/transactionlog/OWNERS b/searchlib/src/vespa/searchlib/transactionlog/OWNERS new file mode 100644 index 00000000000..1037590124e --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/OWNERS @@ -0,0 +1 @@ +balder diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.cpp b/searchlib/src/vespa/searchlib/transactionlog/common.cpp new file mode 100644 index 00000000000..6ff2aee8ee7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/common.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "common.h" + +namespace search +{ + +namespace transactionlog +{ + +using vespalib::nbostream; + +int makeDirectory(const char * dir) +{ + int retval(-1); + + FastOS_StatInfo st; + if ( FastOS_File::Stat(dir, &st) ) { + retval = st._isDirectory ? 0 : -2; + } else { + retval = FastOS_File::MakeDirectory(dir) ? 0 : -3; + } + + return retval; +} + +int64_t SerialNumRange::cmp(const SerialNumRange & b) const +{ + int64_t diff(0); + if ( ! (contains(b) || b.contains(*this)) ) { + diff = _from - b._from; + } + return diff; +} + +Packet::Packet(const void * buf, size_t sz) : + _count(0), + _range(), + _limit(sz), + _buf(static_cast(buf), sz, true) +{ + nbostream os(_buf.c_str(), sz, true); + while ( os.size() > 0 ) { + Entry e; + e.deserialize(os); + if (_range.to() == 0) { + _range.from(e.serial()); + } + _range.to(e.serial()); + _count++; + } +} + +bool Packet::merge(const Packet & packet) +{ + bool retval(_range.to() < packet._range.from()); + if (retval) { + _count += packet._count; + _range.to(packet._range.to()); + _buf.write(packet.getHandle().c_str(), packet.getHandle().size()); + } + return retval; +} + +nbostream & Packet::Entry::deserialize(nbostream & os) +{ + _valid = false; + int32_t len(0); + os >> _unique >> _type >> len; + _data = vespalib::ConstBufferRef(os.peek(), len); + os.adjustReadPos(len); + _valid = true; + return os; +} + +nbostream & Packet::Entry::serialize(nbostream & os) const +{ + os << _unique << _type << static_cast(_data.size()); + os.write(_data.c_str(), _data.size()); + return os; +} + +Packet::Entry::Entry(SerialNum u, Type t, const vespalib::ConstBufferRef & d) : + _unique(u), + _type(t), + _valid(true), + _data(d) +{ +} + + +bool Packet::add(const Packet::Entry & e) +{ + bool retval((_buf.size() < _limit) && (_range.to() < e.serial())); + if (retval) { + if (_buf.empty()) { + _range.from(e.serial()); + } + e.serialize(_buf); + _count++; + _range.to(e.serial()); + } + return retval; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/common.h b/searchlib/src/vespa/searchlib/transactionlog/common.h new file mode 100644 index 00000000000..ae6f27f39a1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/common.h @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace transactionlog { + +/// This represents a type of the entry. Fx update,remove +typedef uint32_t Type; +/// A channel represents one data stream. + +class RPC +{ +public: +enum Result { OK, FULL, ERROR }; +}; + +class SerialNumRange +{ +public: + SerialNumRange() : _from(0), _to(0) { } + SerialNumRange(SerialNum f) : _from(f), _to(f ? f-1 : f) { } + SerialNumRange(SerialNum f, SerialNum t) : _from(f), _to(t) { } + bool operator == (const SerialNumRange & b) const { return cmp(b) == 0; } + bool operator < (const SerialNumRange & b) const { return cmp(b) < 0; } + bool operator > (const SerialNumRange & b) const { return cmp(b) > 0; } + bool operator <= (const SerialNumRange & b) const { return cmp(b) <= 0; } + bool operator >= (const SerialNumRange & b) const { return cmp(b) >= 0; } + SerialNum from() const { return _from; } + SerialNum to() const { return _to; } + void from(SerialNum v) { _from = v; } + void to(SerialNum v) { _to = v; } + + bool contains(SerialNum s) const { + return (_from <= s) && (s <= _to); + } + + bool contains(const SerialNumRange & b) const { + return (_from <= b._from) && (b._to <= _to); + } +private: + int64_t cmp(const SerialNumRange & b) const; + SerialNum _from; + SerialNum _to; +}; + +class Packet +{ +public: + class Entry + { + public: + Entry() : _unique(0), _type(0), _valid(false), _data() { } + Entry(SerialNum u, Type t, const vespalib::ConstBufferRef & d); + SerialNum serial() const { return _unique; } + Type type() const { return _type; } + bool valid() const { return _valid; } + size_t serializedSize() const { return sizeof(SerialNum) + sizeof(Type) + sizeof(uint32_t) + _data.size(); } + const vespalib::ConstBufferRef & data() const { return _data; } + vespalib::nbostream & deserialize(vespalib::nbostream & is); + vespalib::nbostream & serialize(vespalib::nbostream & os) const; + private: + SerialNum _unique; + Type _type; + bool _valid; + vespalib::ConstBufferRef _data; + }; +public: + Packet(size_t m=0xf000) : _count(0), _range(), _limit(m), _buf(m) { } + Packet(const void * buf, size_t sz); + bool add(const Entry & data); + void close() { } + void clear() { _buf.clear(); _count = 0; _range.from(0); _range.to(0); } + const SerialNumRange & range() const { return _range; } + const vespalib::nbostream & getHandle() const { return _buf; } + size_t size() const { return _count; } + bool empty() const { return _count == 0; } + size_t sizeBytes() const { return _buf.size(); } + bool merge(const Packet & packet); +private: + size_t _count; + SerialNumRange _range; + size_t _limit; + vespalib::nbostream _buf; +}; + +int makeDirectory(const char * dir); + +class Writer { +public: + virtual ~Writer() { } + virtual void commit(const vespalib::string & domainName, const Packet & packet) = 0; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.cpp b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp new file mode 100644 index 00000000000..be1de99efef --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/domain.cpp @@ -0,0 +1,405 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".transactionlog.domain"); + +using vespalib::string; +using vespalib::make_string; +using vespalib::make_vespa_string; +using vespalib::LockGuard; +using vespalib::makeTask; +using vespalib::makeClosure; +using vespalib::Monitor; +using vespalib::MonitorGuard; +using search::common::FileHeaderContext; +using std::runtime_error; + +namespace search +{ + +namespace transactionlog +{ + +Domain::Domain(const string &domainName, + const string & baseDir, + vespalib::ThreadStackExecutor & executor, + uint64_t domainPartSize, + bool useFsync, + DomainPart::Crc defaultCrcType, + const FileHeaderContext &fileHeaderContext) : + _defaultCrcType(defaultCrcType), + _executor(executor), + _count(0), + _sessionId(1), + _useFsync(useFsync), + _syncMonitor(), + _pendingSync(false), + _name(domainName), + _domainPartSize(domainPartSize), + _parts(), + _lock(), + _sessionLock(), + _sessions(), + _baseDir(baseDir), + _fileHeaderContext(fileHeaderContext), + _markedDeleted(false) +{ + int retval(0); + if ((retval = makeDirectory(_baseDir.c_str())) != 0) { + throw runtime_error(make_string("Failed creating basedirectory %s r(%d), e(%d)", _baseDir.c_str(), retval, errno)); + } + if ((retval = makeDirectory(dir().c_str())) != 0) { + throw runtime_error(make_string("Failed creating domaindir %s r(%d), e(%d)", dir().c_str(), retval, errno)); + } + SerialNumList partIdVector = scanDir(); + const int64_t lastPart = partIdVector.empty() ? 0 : partIdVector.back(); + for (const int64_t partId : partIdVector) { + if ( partId != -1) { + _executor.execute(makeTask(makeClosure(this, &Domain::addPart, partId, partId == lastPart))); + } + } + _executor.sync(); + if (_parts.empty() || _parts.crbegin()->second->isClosed()) { + _parts[lastPart].reset(new DomainPart(_name, dir(), lastPart, _useFsync, _defaultCrcType, _fileHeaderContext, false)); + } +} + +void Domain::addPart(int64_t partId, bool isLastPart) { + DomainPart::SP dp(new DomainPart(_name, dir(), partId, _useFsync, _defaultCrcType, _fileHeaderContext, isLastPart)); + if (dp->size() == 0) { + // Only last domain part is allowed to be truncated down to + // empty size. + assert(isLastPart); + dp->erase(dp->range().to() + 1); + } else { + { + LockGuard guard(_lock); + _count += dp->size(); + _parts[partId] = dp; + } + if (! isLastPart) { + dp->close(); + } + } +} + +class Sync : public vespalib::Executor::Task +{ +public: + Sync(Monitor &syncMonitor, const DomainPart::SP &dp, bool &pendingSync) : + _syncMonitor(syncMonitor), + _dp(dp), + _pendingSync(pendingSync) + { } +private: + void run() override { + _dp->sync(); + MonitorGuard guard(_syncMonitor); + _pendingSync = false; + guard.broadcast(); + } + + Monitor & _syncMonitor; + DomainPart::SP _dp; + bool & _pendingSync; +}; + +Domain::~Domain() { } + +DomainInfo +Domain::getDomainInfo() const +{ + LockGuard guard(_lock); + DomainInfo info(SerialNumRange(begin(), end()), count(), byteSize()); + for (const auto &entry: _parts) { + const DomainPart &part = *entry.second; + info.parts.emplace_back(PartInfo(part.range(), part.size(), + part.byteSize(), part.fileName())); + } + return info; +} + +SerialNum Domain::begin() const +{ + SerialNum s(0); + if ( ! _parts.empty() ) { + s = _parts.begin()->second->range().from(); + } + return s; +} + +SerialNum Domain::end() const +{ + SerialNum s(0); + if ( ! _parts.empty() ) { + s = _parts.rbegin()->second->range().to(); + } + return s; +} + +size_t Domain::byteSize() const +{ + size_t size = 0; + for (const auto &entry : _parts) { + const DomainPart &part = *entry.second; + size += part.byteSize(); + } + return size; +} + +SerialNum +Domain::getSynced(void) const +{ + SerialNum s(0); + LockGuard guard(_lock); + if (_parts.empty()) { + return s; + } + DomainPartList::const_iterator it(_parts.end()); + --it; + s = it->second->getSynced(); + if (s == 0 && it != _parts.begin()) { + --it; + s = it->second->getSynced(); + } + return s; +} + + +void +Domain::triggerSyncNow(void) +{ + MonitorGuard guard(_syncMonitor); + if (!_pendingSync) { + _pendingSync = true; + DomainPart::SP dp(_parts.rbegin()->second); + _executor.execute(Sync::UP(new Sync(_syncMonitor, dp, _pendingSync))); + } +} + +DomainPart::SP Domain::findPart(SerialNum s) +{ + LockGuard guard(_lock); + DomainPartList::iterator it(_parts.upper_bound(s)); + if (!_parts.empty() && it != _parts.begin()) { + DomainPartList::iterator prev(it); + --prev; + if (prev->second->range().to() > s) { + return prev->second; + } + } + if (it != _parts.end()) { + return it->second; + } + return DomainPart::SP(); +} + +uint64_t Domain::size() const +{ + LockGuard guard(_lock); + return size(guard); +} + +uint64_t Domain::size(const LockGuard & guard) const +{ + (void) guard; + uint64_t sz(0); + for (const auto & part : _parts) { + sz += part.second->size(); + } + return sz; +} + +SerialNum Domain::findOldestActiveVisit() const +{ + SerialNum oldestActive(std::numeric_limits::max()); + LockGuard guard(_sessionLock); + for (const auto & pair : _sessions) { + Session * session(pair.second.get()); + if (!session->inSync()) { + oldestActive = std::min(oldestActive, session->range().from()); + } + } + return oldestActive; +} + +void Domain::cleanSessions() +{ + if ( _sessions.empty()) { + return; + } + LockGuard guard(_sessionLock); + for (SessionList::iterator it(_sessions.begin()), mt(_sessions.end()); it != mt; ) { + Session * session(it->second.get()); + if ((!session->continous() && session->inSync())) { + _sessions.erase(it++); + } else if (session->finished()) { + _sessions.erase(it++); + } else { + it++; + } + } +} + +void Domain::commit(const Packet & packet) +{ + DomainPart::SP dp(_parts.rbegin()->second); + vespalib::nbostream is(packet.getHandle().c_str(), packet.getHandle().size(), true); + Packet::Entry entry; + entry.deserialize(is); + if (dp->byteSize() > _domainPartSize) { + triggerSyncNow(); + { + MonitorGuard guard(_syncMonitor); + while (_pendingSync) { + guard.wait(); + } + } + dp->close(); + dp.reset(new DomainPart(_name, dir(), entry.serial(), _useFsync, _defaultCrcType, _fileHeaderContext, false)); + { + LockGuard guard(_lock); + _parts[entry.serial()] = dp; + } + dp = _parts.rbegin()->second; + } + size_t oldSz(dp->size()); + dp->commit(entry.serial(), packet); + cleanSessions(); + // If commit fails no updates should be sent to subscribers either. + // Is is better to keep a consistent behaviour. + _count += dp->size() - oldSz; + + LockGuard guard(_sessionLock); + for (auto & it : _sessions) { + const Session::SP & session(it.second); + if (session->continous()) { + if (session->ok()) { + Session::enQ(session, entry.serial(), packet); + } + } + } +} + +bool Domain::erase(const SerialNum & to) +{ + bool retval(true); + /// Do not erase the last element + for (DomainPartList::iterator it(_parts.begin()); (_parts.size() > 1) && (it->second.get()->range().to() < to); it = _parts.begin()) { + DomainPart::SP dp(it->second); + { + LockGuard guard(_lock); + _parts.erase(it); + } + retval = retval && dp->erase(to); + } + if (_parts.begin()->second->range().to() >= to) { + _parts.begin()->second->erase(to); + } + return retval; +} + +int Domain::visit(const Domain::SP & domain, const SerialNum & from, const SerialNum & to, FRT_Supervisor & supervisor, FNET_Connection *conn) +{ + assert(this == domain.get()); + cleanSessions(); + SerialNumRange range(from, to); + Session * session = new Session(_sessionId++, range, domain, supervisor, conn); + LockGuard guard(_sessionLock); + _sessions[session->id()] = Session::SP(session); + return session->id(); +} + +int Domain::startSession(int sessionId) +{ + int retval(-1); + LockGuard guard(_sessionLock); + SessionList::iterator found = _sessions.find(sessionId); + if (found != _sessions.end()) { + if ( execute(Session::createTask(found->second)).get() == nullptr ) { + retval = 0; + } else { + _sessions.erase(sessionId); + } + } + return retval; +} + +int Domain::closeSession(int sessionId) +{ + int retval(-1); + { + LockGuard guard(_sessionLock); + SessionList::iterator found = _sessions.find(sessionId); + if (found != _sessions.end()) { + retval = 1; + _executor.sync(); + } + } + if (retval == 1) { + FastOS_Thread::Sleep(10); + LockGuard guard(_sessionLock); + SessionList::iterator found = _sessions.find(sessionId); + if (found != _sessions.end()) { + _sessions.erase(sessionId); + retval = 0; + } else { + retval = 0; + } + } + return retval; +} + +int Domain::subscribe(const Domain::SP & domain, const SerialNum & from, FRT_Supervisor & supervisor, FNET_Connection *conn) +{ + assert(this == domain.get()); + cleanSessions(); + SerialNumRange range(from, end()); + Session * session = new Session(_sessionId++, range, domain, supervisor, conn, true); + LockGuard guard(_sessionLock); + _sessions[session->id()] = Session::SP(session); + return session->id(); +} + + +Domain::SerialNumList +Domain::scanDir(void) +{ + SerialNumList res; + + FastOS_DirectoryScan dirScan(dir().c_str()); + + const char *wantPrefix = _name.c_str(); + size_t wantPrefixLen = strlen(wantPrefix); + + while (dirScan.ReadNext()) { + const char *ename = dirScan.GetName(); + if (strcmp(ename, ".") == 0 || + strcmp(ename, "..") == 0) + continue; + if (strncmp(ename, wantPrefix, wantPrefixLen) != 0) + continue; + if (ename[wantPrefixLen] != '-') + continue; + const char *p = ename + wantPrefixLen + 1; + uint64_t num = strtoull(p, NULL, 10); + string checkName = make_string("%s-%016" PRIu64, _name.c_str(), num); + if (strcmp(checkName.c_str(), ename) != 0) + continue; + res.push_back(static_cast(num)); + } + std::sort(res.begin(), res.end()); + return res; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/domain.h b/searchlib/src/vespa/searchlib/transactionlog/domain.h new file mode 100644 index 00000000000..6309d7113f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/domain.h @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include + +namespace search { +namespace transactionlog { + +struct PartInfo { + SerialNumRange range; + size_t count; + size_t byteSize; + vespalib::string file; + PartInfo(SerialNumRange range_in, size_t count_in, + size_t byteSize_in, + vespalib::stringref file_in) + : range(range_in), count(count_in), byteSize(byteSize_in), + file(file_in) {} +}; + +struct DomainInfo { + SerialNumRange range; + size_t count; + size_t byteSize; + std::vector parts; + DomainInfo(SerialNumRange range_in, size_t count_in, size_t byteSize_in) + : range(range_in), count(count_in), byteSize(byteSize_in), parts() {} + DomainInfo() + : range(), count(0), byteSize(0), parts() {} +}; + +typedef std::map DomainStats; + +class Domain +{ +public: + typedef std::shared_ptr SP; + Domain(const vespalib::string &name, + const vespalib::string &baseDir, + vespalib::ThreadStackExecutor & executor, + uint64_t domainPartSize, + bool useFsync, + DomainPart::Crc defaultCrcType, + const common::FileHeaderContext &fileHeaderContext); + + virtual ~Domain(); + + DomainInfo getDomainInfo() const; + + const vespalib::string & name() const { return _name; } + bool erase(const SerialNum & to); + + void commit(const Packet & packet); + int + visit(const Domain::SP & self, + const SerialNum & from, + const SerialNum & to, + FRT_Supervisor & supervisor, + FNET_Connection *conn); + + int subscribe(const Domain::SP & self, const SerialNum & from, FRT_Supervisor & supervisor, FNET_Connection *conn); + + SerialNum begin() const; + SerialNum end() const; + SerialNum getSynced(void) const; + void triggerSyncNow(void); + bool getMarkedDeleted(void) const { return _markedDeleted; } + void markDeleted(void) { _markedDeleted = true; } + + uint64_t count() const { return _count; } + size_t byteSize() const; + size_t getNumSessions() const { return _sessions.size(); } + + int startSession(int sessionId); + int closeSession(int sessionId); + + SerialNum findOldestActiveVisit() const; + DomainPart::SP findPart(SerialNum s); + + static vespalib::string + getDir(const vespalib::string & base, const vespalib::string & domain) { + return base + "/" + domain; + } + vespalib::Executor::Task::UP execute(vespalib::Executor::Task::UP task) { + return _executor.execute(std::move(task)); + } + uint64_t size() const; +private: + uint64_t size(const vespalib::LockGuard & guard) const; + void cleanSessions(); + vespalib::string dir() const { return getDir(_baseDir, _name); } + void addPart(int64_t partId, bool isLastPart); + + typedef std::vector SerialNumList; + + SerialNumList scanDir(void); + + typedef std::map SessionList; + typedef std::map DomainPartList; + typedef vespalib::ThreadStackExecutor Executor; + + DomainPart::Crc _defaultCrcType; + Executor & _executor; + uint64_t _count; + int _sessionId; + const bool _useFsync; + vespalib::Monitor _syncMonitor; + bool _pendingSync; + vespalib::string _name; + uint64_t _domainPartSize; + DomainPartList _parts; + vespalib::Lock _lock; + vespalib::Lock _sessionLock; + SessionList _sessions; + vespalib::string _baseDir; + const common::FileHeaderContext &_fileHeaderContext; + bool _markedDeleted; + bool _urgentSync; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp new file mode 100644 index 00000000000..274a3495e73 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.cpp @@ -0,0 +1,681 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".transactionlog.domainpart"); + +using vespalib::make_string; +using vespalib::FileHeader; +using vespalib::string; +using vespalib::getLastErrorString; +using vespalib::IllegalHeaderException; +using vespalib::LockGuard; +using vespalib::nbostream; +using search::common::FileHeaderContext; +using std::runtime_error; + +namespace search +{ + +namespace transactionlog +{ + + +namespace +{ + +void +handleSync(FastOS_FileInterface &file) __attribute__ ((noinline)); + +string +handleWriteError(const char *text, + FastOS_FileInterface &file, + int64_t lastKnownGoodPos, + const Packet::Entry &entry, + int bufLen) __attribute__ ((noinline)); + +bool +handleReadError(const char *text, + FastOS_FileInterface &file, + ssize_t len, + ssize_t rlen, + int64_t lastKnownGoodPos, + bool allowTruncate) __attribute__ ((noinline)); + +bool +addPacket(Packet &packet, + const Packet::Entry &e) __attribute__ ((noinline)); + +bool +tailOfFileIsZero(FastOS_FileInterface &file, int64_t lastKnownGoodPos) __attribute__ ((noinline)); + +bool +addPacket(Packet &packet, const Packet::Entry &e) +{ + LOG(spam, "Adding serial #%" PRIu64 ", of type %d and size %zd into packet of size %zu and %zu bytes", + e.serial(), e.type(), e.data().size(), packet.size(), packet.sizeBytes()); + return ! packet.add(e); +} + +void +handleSync(FastOS_FileInterface &file) +{ + if ( file.IsOpened() && ! file.Sync() ) { + int osError = errno; + throw runtime_error(make_string("Failed to synchronize file '%s' of size %" PRId64 " due to '%s'. " + "Does not know how to handle this so throwing an exception.", + file.GetFileName(), file.GetSize(), FastOS_File::getErrorString(osError).c_str())); + } +} + +string +handleWriteError(const char *text, + FastOS_FileInterface &file, + int64_t lastKnownGoodPos, + const Packet::Entry &entry, + int bufLen) +{ + string last(FastOS_File::getLastErrorString()); + string e(make_string("%s. File '%s' at position %" PRId64 " for entry %" PRIu64 " of length %u. " + "OS says '%s'. Rewind to last known good position %" PRId64 ".", + text, file.GetFileName(), file.GetPosition(), entry.serial(), bufLen, + last.c_str(), lastKnownGoodPos)); + LOG(error, "%s", e.c_str()); + if ( ! file.SetPosition(lastKnownGoodPos) ) { + last = FastOS_File::getLastErrorString(); + throw runtime_error(make_string("Failed setting position %" PRId64 " of file '%s' of size %" PRId64 ": OS says '%s'", + lastKnownGoodPos, file.GetFileName(), file.GetSize(), last.c_str())); + } + handleSync(file); + return e; +} + +string +getError(FastOS_FileInterface & f) +{ + return make_string("File '%s' of size %ld has last error of '%s'.", + f.GetFileName(), f.GetSize(), FastOS_File::getLastErrorString().c_str()); +} + +bool +tailOfFileIsZero(FastOS_FileInterface &file, int64_t lastKnownGoodPos) +{ + ssize_t rest(file.GetSize() - lastKnownGoodPos); + if (rest < 0 || rest > 0x100000) { + return false; + } + std::vector buf(rest, 0); + file.ReadBuf(&buf[0], buf.size(), lastKnownGoodPos); + for (char c : buf) { + if (c != 0) { + return false; + } + } + return true; +} + +bool +handleReadError(const char *text, + FastOS_FileInterface &file, + ssize_t len, + ssize_t rlen, + int64_t lastKnownGoodPos, + bool allowTruncate) +{ + bool retval(true); + if (rlen != -1) { + string e; + if (len == rlen) { + e = make_string("Error in data read of size %zd bytes at pos %" PRId64 " trying to read %s. ", + len, file.GetPosition() - rlen, text); + } else { + e = make_string("Short Read. Got only %zd of %zd bytes at pos %" PRId64 " trying to read %s. ", + rlen, len, file.GetPosition() - rlen, text); + } + e += getError(file); + if (!allowTruncate) { + LOG(error, "%s", e.c_str()); + throw runtime_error(e); + } + // Short read. Log error, Truncate, continue. + e += make_string(" Truncate to %" PRId64 " and continue", lastKnownGoodPos); + LOG(error, "%s", e.c_str()); + FastOS_File truncateFile(file.GetFileName()); + file.Close(); + if ( truncateFile.OpenWriteOnlyExisting()) { + if (truncateFile.SetSize(lastKnownGoodPos)) { + if (truncateFile.Close()) { + if (file.OpenReadOnly()) { + if (file.SetPosition(lastKnownGoodPos)) { + retval = false; + } else { + throw runtime_error(make_string("Failed setting position %" PRId64 ". %s", lastKnownGoodPos, getError(file).c_str())); + } + } else { + throw runtime_error(make_string("Failed reopening file after truncate: %s", getError(file).c_str())); + } + } else { + throw runtime_error(make_string("Failed closing truncated file: %s", getError(truncateFile).c_str())); + } + } else { + throw runtime_error(make_string("Failed truncating to %" PRId64 ": %s", lastKnownGoodPos, getError(truncateFile).c_str())); + } + } else { + throw runtime_error(make_string("Failed opening for truncating: %s", getError(file).c_str())); + } + } else { + // Some kind of IO error throw exception. + string errString = FastOS_File::getLastErrorString(); + throw runtime_error(make_string("IO error when reading %zd bytes at pos %" PRId64 "trying to read %s." + " Last known good position is %" PRId64 ": %s", + len, file.GetPosition(), text, lastKnownGoodPos, getError(file).c_str())); + } + return retval; +} + +} + +int64_t +DomainPart::buildPacketMapping(bool allowTruncate) +{ + Fast_BufferedFile transLog; + transLog.EnableDirectIO(); + if ( ! transLog.OpenReadOnly(_transLog.GetFileName())) { + throw runtime_error(make_string("Failed opening '%s' for buffered readinf with direct io.", transLog.GetFileName())); + } + int64_t fSize(transLog.GetSize()); + int64_t currPos(0); + try { + FileHeader header; + _headerLen = header.readFile(transLog); + transLog.SetPosition(_headerLen); + currPos = _headerLen; + } catch (const IllegalHeaderException &e) { + transLog.SetPosition(0); + try { + FileHeader::FileReader fr(transLog); + uint32_t header2Len = FileHeader::readSize(fr); + if (header2Len <= fSize) + e.throwSelf(); // header not truncated + } catch (const IllegalHeaderException &e2) { + } + if (fSize > 0) { + // Truncate file (dropping header) if cannot even read + // header length, or if header has been truncated. + handleReadError("file header", transLog, 0, FileHeader::getMinSize(), 0, allowTruncate); + } + } + while ((currPos < fSize)) { + Packet packet; + SerialNum firstSerial(0); + SerialNum lastSerial(0); + int64_t firstPos(currPos); + bool full(false); + vespalib::DefaultAlloc buf; + for(size_t i(0); !full && (currPos < fSize); i++) { + Packet::Entry e; + if (read(transLog, e, buf, allowTruncate)) { + if (e.valid()) { + if (i == 0) { + firstSerial = e.serial(); + if (currPos == _headerLen) { + _range.from(firstSerial); + } + } + try { + full = addPacket(packet, e); + if ( ! full ) { + lastSerial = e.serial(); + currPos = transLog.GetPosition(); + _sz++; + } else { + transLog.SetPosition(currPos); + } + } catch (const std::exception & ex) { + throw runtime_error(make_string("%s : Failed creating packet for list %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")", + ex.what(), transLog.GetFileName(), fSize, currPos, transLog.GetPosition())); + } + } else { + throw runtime_error(make_string("Invalid entry reading file %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")", + transLog.GetFileName(), fSize, currPos, transLog.GetPosition())); + } + } else { + if (transLog.GetSize() != fSize) { + fSize = transLog.GetSize(); + } else { + throw runtime_error(make_string("Failed reading file %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")", + transLog.GetFileName(), fSize, currPos, transLog.GetPosition())); + } + } + } + packet.close(); + if (!packet.empty()) { + _packets[firstSerial] = packet; + _range.to(lastSerial); + { + LockGuard guard(_lock); + _skipList.push_back(SkipInfo(firstSerial, firstPos)); + } + } + } + transLog.Close(); + return currPos; +} + +DomainPart::DomainPart(const string & name, + const string & baseDir, + SerialNum s, + bool useFsync, + Crc defaultCrc, + const FileHeaderContext &fileHeaderContext, + bool allowTruncate) : + _defaultCrc(defaultCrc), + _useFsync(useFsync), + _lock(), + _fileLock(), + _range(s), + _sz(0), + _byteSize(0), + _packets(), + _fileName(make_string("%s/%s-%016" PRIu64, baseDir.c_str(), name.c_str(), s)), + _transLog(_fileName.c_str()), + _skipList(), + _headerLen(0), + _writeLock(), + _writtenSerial(0), + _syncedSerial(0) +{ + if (_transLog.OpenReadOnly()) { + int64_t currPos = buildPacketMapping(allowTruncate); + if ( ! _transLog.Close() ) { + throw runtime_error(make_string("Failed closing file '%s' after reading.", _transLog.GetFileName())); + } + if ( ! _transLog.OpenWriteOnlyExisting() ) { + string e(make_string("Failed opening existing file '%s' for writing: %s", _transLog.GetFileName(), getLastErrorString().c_str())); + LOG(error, "%s", e.c_str()); + throw runtime_error(e); + } + if (currPos == 0) { + // Previous header was truncated. Write new one. + writeHeader(fileHeaderContext); + currPos = _headerLen; + } + _byteSize = currPos; + } else { + if ( ! _transLog.OpenWriteOnly()) { + string e(make_string("Failed opening new file '%s' for writing: '%s'", _transLog.GetFileName(), getLastErrorString().c_str())); + + LOG(error, "%s", e.c_str()); + throw runtime_error(e); + } + writeHeader(fileHeaderContext); + _byteSize = _headerLen; + } + if ( ! _transLog.SetPosition(_transLog.GetSize()) ) { + throw runtime_error(make_string("Failed moving write pointer to the end of the file %s(%" PRIu64 ").", + _transLog.GetFileName(), _transLog.GetSize())); + } + handleSync(_transLog); + _writtenSerial = _range.to(); + _syncedSerial = _writtenSerial; +} + +DomainPart::~DomainPart() +{ + close(); +} + +void +DomainPart::writeHeader(const FileHeaderContext &fileHeaderContext) +{ + typedef vespalib::GenericHeader::Tag Tag; + FileHeader header; + assert(_transLog.IsOpened()); + assert(_transLog.IsWriteMode()); + assert(_transLog.GetPosition() == 0); + fileHeaderContext.addTags(header, _transLog.GetFileName()); + header.putTag(Tag("desc", "Transaction log domain part file")); + _headerLen = header.writeFile(_transLog); +} + +bool +DomainPart::close() +{ + bool retval(false); + { + LockGuard guard(_fileLock); + /* + * Sync old domainpart before starting writing new, to avoid + * hole. XXX: Feed latency spike due to lack of delayed open + * for new domainpart. + */ + handleSync(_transLog); + _transLog.dropFromCache(); + retval = _transLog.Close(); + LockGuard wguard(_writeLock); + _syncedSerial = _writtenSerial; + } + if ( ! retval ) { + throw runtime_error(make_string("Failed closing file '%s' of size %" PRId64 ".", + _transLog.GetFileName(), _transLog.GetSize())); + } + { + LockGuard guard(_lock); + _packets.clear(); + } + return retval; +} + +bool +DomainPart::openAndFind(FastOS_FileInterface &file, const SerialNum &from) +{ + bool retval(file.OpenReadOnly(_transLog.GetFileName())); + if (retval) { + int64_t pos(_headerLen); + LockGuard guard(_lock); + for(SkipList::const_iterator it(_skipList.begin()), mt(_skipList.end()); + (it < mt) && (it->id() <= from); + it++) + { + pos = it->filePos(); + } + retval = file.SetPosition(pos); + } + return retval; +} + +bool +DomainPart::erase(SerialNum to) +{ + bool retval(true); + if (to > _range.to()) { + close(); + _transLog.Delete(); + } else { + _range.from(std::max(to, _range.from())); + } + return retval; +} + +void +DomainPart::commit(SerialNum firstSerial, const Packet &packet) +{ + int64_t firstPos(_transLog.GetPosition()); + nbostream h(packet.getHandle().c_str(), packet.getHandle().size(), true); + if (_range.from() == 0) { + _range.from(firstSerial); + } + for (size_t i(0); h.size() > 0; i++) { + //LOG(spam, + //"Pos(%d) Len(%d), Lim(%d), Remaining(%d)", + //h.getPos(), h.getLength(), h.getLimit(), h.getRemaining()); + Packet::Entry entry; + entry.deserialize(h); + if (_range.to() < entry.serial()) { + write(_transLog, entry); + _sz++; + _range.to(entry.serial()); + } else { + throw runtime_error(make_string("Incomming serial number(%ld) must be bigger than the last one (%ld).", + entry.serial(), _range.to())); + } + } + if (_useFsync) { + sync(); + } + + bool merged(false); + LockGuard guard(_lock); + if ( ! _packets.empty() ) { + Packet & lastPacket = _packets.rbegin()->second; + if (lastPacket.sizeBytes() < 0xf000) { + if ( ! (merged = lastPacket.merge(packet)) ) { + LOG(error, "Failed merging packet [%" PRIu64 ", %" PRIu64 "] with [%" PRIu64 ", %" PRIu64 "]", + lastPacket.range().from(), lastPacket.range().to(), + packet.range().from(), packet.range().to()); + } + } + } + if (! merged ) { + _packets[firstSerial] = packet; + _skipList.push_back(SkipInfo(firstSerial, firstPos)); + } +} + +void DomainPart::sync() +{ + SerialNum syncSerial(0); + { + LockGuard guard(_writeLock); + syncSerial = _writtenSerial; + } + LockGuard guard(_fileLock); + handleSync(_transLog); + LockGuard wguard(_writeLock); + if (_syncedSerial < syncSerial) { + _syncedSerial = syncSerial; + } +} + +bool +DomainPart::visit(SerialNumRange &r, Packet &packet) +{ + bool retval(false); + LockGuard guard(_lock); + LOG(debug, "Visit r(%" PRIu64 ", %" PRIu64 "] Checking %" PRIu64 " packets", + r.from(), r.to(), uint64_t(_packets.size())); + if ( ! isClosed() ) { + PacketList::const_iterator start(_packets.lower_bound(r.from() + 1)); + PacketList::const_iterator end(_packets.upper_bound(r.to())); + if (start != _packets.end()) { + if ( ! start->second.range().contains(r.from() + 1) && + (start != _packets.begin())) { + PacketList::const_iterator prev(start); + prev--; + if (prev->second.range().contains(r.from() + 1)) { + start--; + } + } + } else { + if (!_packets.empty()) + start--; + } + if ( start != _packets.end() && start->first <= r.to()) { + PacketList::const_iterator next(start); + next++; + if ((r.from() < start->first) && + ((next != end) || ((next != _packets.end()) && ((r.to() + 1) == next->first)))) + { + packet = start->second; + LOG(debug, "Visit whole packet[%" PRIu64 ", %" PRIu64 "]", packet.range().from(), packet.range().to()); + if (next != _packets.end()) { + r.from(next->first - 1); + retval = true; + } else { + /// This is the very last package. Can safely finish. + } + } else { + const nbostream & tmp = start->second.getHandle(); + nbostream h(tmp.c_str(), tmp.size(), true); + LOG(debug, "Visit partial[%" PRIu64 ", %" PRIu64 "] (%zd, %zd, %zd)", + start->second.range().from(), start->second.range().to(), h.rp(), h.size(), h.capacity()); + Packet newPacket(h.size()); + for (; (h.size() > 0) && (r.from() < r.to()); ) { + Packet::Entry e; + e.deserialize(h); + if (r.from() < e.serial()) { + if (e.serial() <= r.to()) { + LOG(spam, "Adding serial #%" PRIu64 ", of type %d and size %zd into packet of size %zu and %zu bytes", + e.serial(), e.type(), e.data().size(), newPacket.size(), newPacket.sizeBytes()); + if (newPacket.add(e)) { + r.from(e.serial()); + } else { + throw runtime_error("Could not add entry to packet. Here is some mumbo jumbo. Fix."); + } + } else { + // Force breakout on visiting empty interval. + r.from(r.to()); + } + } + } + newPacket.close(); + packet = newPacket; + retval = next != _packets.end(); + } + } else { + packet.close(); + } + } else { + /// File has been closed must continue from file. + retval = true; + } + return retval; +} + + +bool +DomainPart::visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet) +{ + bool retval(true); + if ( ! file.IsOpened() ) { + retval = openAndFind(file, r.from() + 1); + } + if (retval) { + Packet newPacket; + vespalib::DefaultAlloc buf; + for (bool full(false);!full && retval && (r.from() < r.to());) { + Packet::Entry e; + int64_t fPos = file.GetPosition(); + retval = read(file, e, buf, false); + if (retval && + e.valid() && + (r.from() < e.serial()) && + (e.serial() <= r.to())) { + try { + full = addPacket(newPacket, e); + } catch (const std::exception & ex) { + throw runtime_error(make_string("%s : Failed creating packet for visit %s(%" PRIu64 ") at pos(%" PRIu64 ", %" PRIu64 ")", + ex.what(), file.GetFileName(), file.GetSize(), fPos, file.GetPosition())); + } + if ( !full ) { + r.from(e.serial()); + } else { + if ( ! file.SetPosition(fPos) ) { + throw runtime_error(make_string("Failed setting read position for file '%s' of size %" PRId64 " from %" PRId64 " to %" PRId64 ".", + file.GetFileName(), file.GetSize(), file.GetPosition(), fPos)); + } + } + } + } + newPacket.close(); + packet = newPacket; + } + + return retval; +} + +void +DomainPart::write(FastOS_FileInterface &file, const Packet::Entry &entry) +{ + int64_t lastKnownGoodPos(file.GetPosition()); + int32_t crc(0); + uint32_t len(entry.serializedSize() + sizeof(crc)); + nbostream os; + os << static_cast(_defaultCrc); + os << len; + size_t start(os.size()); + entry.serialize(os); + size_t end(os.size()); + crc = calcCrc(_defaultCrc, os.c_str()+start, end - start); + os << crc; + size_t osSize = os.size(); + assert(osSize == len + sizeof(len) + sizeof(uint8_t)); + + LockGuard guard(_writeLock); + if ( ! file.CheckedWrite(os.c_str(), osSize) ) { + throw runtime_error(handleWriteError("Failed writing the entry.", file, lastKnownGoodPos, entry, end - start)); + } + _writtenSerial = entry.serial(); + _byteSize.store(lastKnownGoodPos + osSize, std::memory_order_release); +} + +bool +DomainPart::read(FastOS_FileInterface &file, + Packet::Entry &entry, + vespalib::DefaultAlloc & buf, + bool allowTruncate) +{ + bool retval(true); + char tmp[5]; + int64_t lastKnownGoodPos(file.GetPosition()); + size_t rlen = file.Read(tmp, sizeof(tmp)); + nbostream his(tmp, sizeof(tmp)); + uint8_t version(-1); + uint32_t len(0); + his >> version >> len; + if ((retval = (rlen == sizeof(tmp)))) { + if ( ! (retval = (version == ccitt_crc32) || version == xxh64)) { + vespalib::string msg(make_string("Version mismatch. Expected 'ccitt_crc32=1' or 'xxh64=2'," + " got %d from '%s' at position %ld", + version, file.GetFileName(), lastKnownGoodPos)); + if ((version == 0) && (len == 0) && tailOfFileIsZero(file, lastKnownGoodPos)) { + LOG(warning, "%s", msg.c_str()); + return handleReadError("packet version", file, sizeof(tmp), rlen, lastKnownGoodPos, allowTruncate); + } else { + throw runtime_error(msg); + } + } + if (len > buf.size()) { + vespalib::DefaultAlloc(len).swap(buf); + } + rlen = file.Read(buf.get(), len); + retval = rlen == len; + if (!retval) { + retval = handleReadError("packet blob", file, len, rlen, lastKnownGoodPos, allowTruncate); + } else { + nbostream is(buf.get(), len, true); + entry.deserialize(is); + int32_t crc(0); + is >> crc; + int32_t crcVerify(calcCrc(static_cast(version), buf.get(), len - sizeof(crc))); + if (crc != crcVerify) { + throw runtime_error(make_string("Got bad crc for packet from '%s' (len pos=%" PRId64 ", len=%d) : crcVerify = %d, expected %d", + file.GetFileName(), file.GetPosition() - len - sizeof(len), + static_cast(len), static_cast(crcVerify), static_cast(crc))); + } + } + } else { + if (rlen == 0) { + // Eof + } else { + retval = handleReadError("packet length", file, sizeof(len), rlen, lastKnownGoodPos, allowTruncate); + } + } + return retval; +} + +int32_t DomainPart::calcCrc(Crc version, const void * buf, size_t sz) +{ + if (version == xxh64) { + return static_cast(XXH64(buf, sz, 0ll)); + } else if (version == ccitt_crc32) { + vespalib::crc_32_type calculator; + calculator.process_bytes(buf, sz); + return calculator.checksum(); + } else { + assert(false); + } +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/domainpart.h b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h new file mode 100644 index 00000000000..04041a2cba0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/domainpart.h @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "common.h" +#include +#include +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + + +namespace transactionlog +{ + +class DomainPart { +private: + DomainPart(const DomainPart &); + DomainPart& operator=(const DomainPart &); + +public: + enum Crc { + ccitt_crc32=1, + xxh64=2 + }; + typedef std::shared_ptr SP; + DomainPart(const vespalib::string &name, + const vespalib::string &baseDir, + SerialNum s, + bool useFsync, + Crc defaultCrc, + const common::FileHeaderContext &FileHeaderContext, + bool allowTruncate); + + ~DomainPart(); + + const vespalib::string &fileName() const { return _fileName; } + void commit(SerialNum firstSerial, const Packet &packet); + bool erase(SerialNum to); + bool visit(SerialNumRange &r, Packet &packet); + bool visit(FastOS_FileInterface &file, SerialNumRange &r, Packet &packet); + bool close(); + void sync(); + SerialNumRange range() const { return _range; } + + SerialNum getSynced(void) const { + vespalib::LockGuard guard(_writeLock); + return _syncedSerial; + } + + size_t size() const { return _sz; } + size_t byteSize() const { + return _byteSize.load(std::memory_order_acquire); + } + bool isClosed() const { return ! _transLog.IsOpened(); } +private: + bool openAndFind(FastOS_FileInterface &file, const SerialNum &from); + int64_t buildPacketMapping(bool allowTruncate); + + static bool + read(FastOS_FileInterface &file, + Packet::Entry &entry, + vespalib::DefaultAlloc &buf, + bool allowTruncate); + + void write(FastOS_FileInterface &file, const Packet::Entry &entry); + static int32_t calcCrc(Crc crc, const void * buf, size_t len); + void writeHeader(const common::FileHeaderContext &fileHeaderContext); + + class SkipInfo + { + public: + SkipInfo(SerialNum s, uint64_t p) : + _id(s), + _pos(p) + { + } + + bool operator ==(const SkipInfo &b) const { return cmp(b) == 0; } + bool operator <(const SkipInfo &b) const { return cmp(b) < 0; } + bool operator >(const SkipInfo &b) const { return cmp(b) > 0; } + bool operator <=(const SkipInfo &b) const { return cmp(b) <= 0; } + bool operator >=(const SkipInfo &b) const { return cmp(b) >= 0; } + int64_t filePos() const { return _pos; } + SerialNum id() const { return _id; } + private: + int64_t cmp(const SkipInfo & b) const { return _id - b._id; } + SerialNum _id; + uint64_t _pos; + }; + typedef std::vector SkipList; + typedef std::map PacketList; + const Crc _defaultCrc; + const bool _useFsync; + vespalib::Lock _lock; + vespalib::Lock _fileLock; + SerialNumRange _range; + size_t _sz; + std::atomic _byteSize; + PacketList _packets; + vespalib::string _fileName; + FastOS_File _transLog; + SkipList _skipList; + uint32_t _headerLen; + vespalib::Lock _writeLock; + // Protected by _writeLock + SerialNum _writtenSerial; + SerialNum _syncedSerial; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp new file mode 100644 index 00000000000..2b8e2935752 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.cpp @@ -0,0 +1,28 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "nosyncproxy.h" + +namespace search +{ +namespace transactionlog +{ + +NoSyncProxy::NoSyncProxy(void) +{ +} + + +NoSyncProxy::~NoSyncProxy(void) +{ +} + + +void +NoSyncProxy::sync(SerialNum syncTo) +{ + (void) syncTo; +} + +} + +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h new file mode 100644 index 00000000000..0c8faba2979 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/nosyncproxy.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "syncproxy.h" + +namespace search +{ +namespace transactionlog +{ + +class NoSyncProxy : public SyncProxy +{ +public: + NoSyncProxy(void); + + virtual + ~NoSyncProxy(void); + + virtual void + sync(SerialNum syncTo); +}; + +} + +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/session.cpp b/searchlib/src/vespa/searchlib/transactionlog/session.cpp new file mode 100644 index 00000000000..bbb786b25c3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/session.cpp @@ -0,0 +1,275 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".transactionlog.session"); + +using vespalib::LockGuard; + +namespace search { +namespace transactionlog { + +namespace { + const double NEVER(-1.0); +} + +vespalib::Executor::Task::UP +Session::createTask(const Session::SP & session) +{ + if (session->continous()) { + return Task::UP(new SubscribeTask(session)); + } else { + return Task::UP(new VisitTask(session)); + } +} + +void +Session::SubscribeTask::run() +{ + _session->subscribe(); +} + +void +Session::VisitTask::run() +{ + _session->visitOnly(); +} + +void +Session::SendTask::run() +{ + _session->sendPending(); +} + +bool +Session::inSync() const +{ + return _inSync; +} + +void +Session::visit() +{ + LOG(debug, "[%d] : Visiting %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to()); + for (DomainPart::SP dpSafe = _domain->findPart(_range.from()); dpSafe.get() && (_range.from() < _range.to()) && (dpSafe.get()->range().from() <= _range.to()); dpSafe = _domain->findPart(_range.from())) { + // Must use findPart and iterate until no candidate parts found. + DomainPart * dp(dpSafe.get()); + LOG(debug, "[%d] : Visiting the interval %" PRIu64 " - %" PRIu64 " in domain part [%" PRIu64 ", %" PRIu64 "]", _id, _range.from(), _range.to(), dp->range().from(), dp->range().to()); + Fast_BufferedFile file; + file.EnableDirectIO(); + for(bool more(true); ok() && more && (_range.from() < _range.to()); ) { + LOG(debug, "[%d] : Visiting the interval %" PRIu64 " - %" PRIu64 " in subpart", _id, _range.from(), _range.to()); + Packet packet; + if (dp->isClosed()) { + more = dp->visit(file, _range, packet); + } else { + more = dp->visit(_range, packet); + } + if (packet.getHandle().size() > 0) { + LOG(debug, "[%d] : Sending the interval %" PRIu64 " - %" PRIu64 ". Packet : [%" PRIu64 ", %" PRIu64 "]", _id, _range.from(), _range.to(), packet.range().from(), packet.range().to()); + send(packet); + } + } + // Nothing more in this DomainPart, force switch to next one. + if (_range.from() < dp->range().to()) { + _range.from(std::min(dp->range().to(), _range.to())); + } + } + + LOG(debug, "[%d] : Done visiting, starting subscribe %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to()); +} + +void +Session::visitOnly() +{ + visit(); + sendDone(); + finalize(); +} + +void +Session::enQ(const SP & session, SerialNum serial, const Packet & packet) +{ + LockGuard guard(session->_lock); + session->_packetQ.push_back(QPacket(serial,packet)); + if (session->_inSync) { + session->_domain->execute(Task::UP(new SendTask(session))); + } +} + +void +Session::subscribe() +{ + visit(); + sendPending(); + sendSync(); +} + +void +Session::sendPending() +{ + for (;;) { + QPacket packet; + { + LockGuard guard(_lock); + if (_packetQ.empty() || !ok()) + break; + packet = std::move(_packetQ.front()); + _packetQ.pop_front(); + } + sendPacket(packet._serial, *packet._packet); + } +} + +void +Session::sendPacket(SerialNum serial, const Packet & packet) +{ + if (_range.from() < serial) { + send(packet); + } else { + LOG(debug, "[%d] : Skipping %" PRIu64 ". Last sent is %" PRIu64, _id, serial, _range.from()); + } +} + +void +Session::finalize() +{ + if (!ok()) { + LOG(error, "[%d] : Error in %s(%" PRIu64 " - %" PRIu64 "), stopping since I have no idea on what to do.", _id, (continous() ? "subscriber" : "visitor"), _range.from(), _range.to()); + } + LOG(debug, "[%d] : Stopped %" PRIu64 " - %" PRIu64, _id, _range.from(), _range.to()); + _finished = true; +} + +int32_t +Session::rpc(FRT_RPCRequest * req) +{ + int32_t retval(-7); + LOG(debug, "rpc %s starting.", req->GetMethodName()); + FRT_Supervisor::InvokeSync(_supervisor.GetTransport(), _connection, req, NEVER); + if (req->GetErrorCode() == FRTE_NO_ERROR) { + retval = (req->GetReturn()->GetValue(0)._intval32); + LOG(debug, "rpc %s = %d\n", req->GetMethodName(), retval); + } else if (req->GetErrorCode() == FRTE_RPC_TIMEOUT) { + LOG(warning, "rpc %s timed out. Will allow to continue: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage()); + retval = -req->GetErrorCode(); + } else { + if (req->GetErrorCode() != FRTE_RPC_CONNECTION) { + LOG(warning, "rpc %s: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage()); + } + retval = -req->GetErrorCode(); + _ok = false; + } + return retval; +} + +void +Session::RequestDone(FRT_RPCRequest * req) +{ + _ok = (req->GetErrorCode() == FRTE_NO_ERROR); + if (req->GetErrorCode() != FRTE_NO_ERROR) { + LOG(warning, "rpcAsync failed %s: error(%d): %s\n", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage()); + } else { + int32_t retval = req->GetReturn()->GetValue(0)._intval32; + if (retval != RPC::OK) { + LOG(error, "Return value != OK in RequestDone for method '%s'", req->GetMethodName()); + } + } + req->SubRef(); +} + +int32_t +Session::rpcAsync(FRT_RPCRequest * req) +{ + int32_t retval(-7); + LOG(debug, "rpcAsync %s starting.", req->GetMethodName()); + FRT_Supervisor::InvokeAsync(_supervisor.GetTransport(), _connection, req, NEVER, this); + if (ok()) { + LOG(debug, "rpcAsync %s OK", req->GetMethodName()); + retval = 0; + } else { + LOG(warning, "rpcAsync %s FAILED", req->GetMethodName()); + } + return retval; +} + +Session::Session(int sId, const SerialNumRange & r, const Domain::SP & d, + FRT_Supervisor & supervisor, FNET_Connection *conn, bool subscriber) : + _supervisor(supervisor), + _connection(conn), + _domain(d), + _range(r), + _id(sId), + _subscriber(subscriber), + _inSync(false), + _ok(true), + _finished(false), + _packetQ() +{ + _connection->AddRef(); +} + +Session::~Session() +{ + _connection->SubRef(); +} + +bool +Session::send(const Packet & packet) +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("visitCallback"); + req->GetParams()->AddString(_domain->name().c_str()); + req->GetParams()->AddInt32(id()); + req->GetParams()->AddData(packet.getHandle().c_str(), packet.getHandle().size()); + return send(req, true); +} + +bool +Session::send(FRT_RPCRequest * req, bool wait) +{ + int32_t retval(-1); + if (wait) { + retval = rpc(req); + if ( ! ((retval == RPC::OK) || (retval == FRTE_RPC_CONNECTION)) ) { + LOG(error, "Return value != OK(%d) in send for method 'visitCallback'.", retval); + } + req->SubRef(); + } else { + retval = rpcAsync(req); + } + return (retval == RPC::OK); +} + +bool +Session::sendSync() +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("syncCallback"); + req->GetParams()->AddString(_domain->name().c_str()); + req->GetParams()->AddInt32(id()); + bool retval(send(req, true)); + LockGuard guard(_lock); + _inSync = true; + return retval; +} + +bool +Session::sendDone() +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("eofCallback"); + req->GetParams()->AddString(_domain->name().c_str()); + req->GetParams()->AddInt32(id()); + bool retval(send(req, true)); + LockGuard guard(_lock); + _inSync = true; + return retval; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/session.h b/searchlib/src/vespa/searchlib/transactionlog/session.h new file mode 100644 index 00000000000..69d22e69fc1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/session.h @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "common.h" +#include +#include +#include +#include + +namespace search { +namespace transactionlog { + +class Domain; +typedef std::shared_ptr DomainSP; + +class Session : public FRT_IRequestWait, + public vespalib::noncopyable +{ +private: + typedef vespalib::Executor::Task Task; + +public: + typedef std::shared_ptr SP; + Session(int sId, const SerialNumRange & r, const DomainSP & d, FRT_Supervisor & supervisor, FNET_Connection *conn, bool subscriber=false); + virtual ~Session(); + const SerialNumRange & range() const { return _range; } + int id() const { return _id; } + bool inSync() const; + bool continous() const { return _subscriber; } + bool ok() const { return _ok; } + bool finished() const { return _finished || (_connection->GetState() != FNET_Connection::FNET_CONNECTED);} + static void enQ(const SP & session, SerialNum serial, const Packet & packet); + static Task::UP createTask(const Session::SP & session); +private: + struct QPacket { + QPacket() : _serial(0), _packet() {} + QPacket(SerialNum s, const Packet & p) + : _serial(s), + _packet(new Packet(p)) + { + } + SerialNum _serial; + std::unique_ptr _packet; + }; + class VisitTask : public Task { + public: + VisitTask(const Session::SP & session) : _session(session) { } + private: + virtual void run(); + Session::SP _session; + }; + class SubscribeTask : public Task { + public: + SubscribeTask(const Session::SP & session) : _session(session) { } + private: + virtual void run(); + Session::SP _session; + }; + class SendTask : public Task { + public: + SendTask(const Session::SP & session) : _session(session) { } + virtual void run(); + private: + Session::SP _session; + }; + bool send(FRT_RPCRequest * req, bool wait); + virtual void RequestDone(FRT_RPCRequest *req); + bool send(const Packet & packet); + void sendPacket(SerialNum serial, const Packet & packet); + bool sendDone(); + bool sendSync(); + void sendPending(); + void visit(); + void visitOnly(); + void subscribe(); + void finalize(); + int32_t rpc(FRT_RPCRequest * req); + int32_t rpcAsync(FRT_RPCRequest * req); + FRT_Supervisor & _supervisor; + FNET_Connection * _connection; + DomainSP _domain; + SerialNumRange _range; + int _id; + bool _subscriber; + bool _inSync; + bool _ok; + bool _finished; + std::deque _packetQ; + vespalib::Lock _lock; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h b/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h new file mode 100644 index 00000000000..baf533518e7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/syncproxy.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ +namespace transactionlog +{ + +class SyncProxy +{ +public: + virtual + ~SyncProxy(void) + { + } + + virtual void + sync(SerialNum syncTo) = 0; +}; + +} + +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp new file mode 100644 index 00000000000..49c16940be5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.cpp @@ -0,0 +1,71 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "trans_log_server_explorer.h" +#include "domain.h" +#include + +using vespalib::slime::Inserter; +using vespalib::slime::Cursor; + +namespace search { +namespace transactionlog { + +namespace { + +struct DomainExplorer : vespalib::StateExplorer { + Domain::SP domain; + DomainExplorer(Domain::SP domain_in) : domain(std::move(domain_in)) {} + virtual void get_state(const Inserter &inserter, bool full) const override { + Cursor &state = inserter.insertObject(); + DomainInfo info = domain->getDomainInfo(); + state.setLong("from", info.range.from()); + state.setLong("to", info.range.to()); + state.setLong("count", info.count); + state.setLong("byteSize", info.byteSize); + if (full) { + Cursor &array = state.setArray("parts"); + for (const PartInfo &part_in: info.parts) { + Cursor &part = array.addObject(); + part.setLong("from", part_in.range.from()); + part.setLong("to", part_in.range.to()); + part.setLong("count", part_in.count); + part.setLong("byteSize", part_in.byteSize); + part.setString("file", part_in.file); + { + FastOS_StatInfo stat_info; + FastOS_File::Stat(part_in.file.c_str(), &stat_info); + part.setString("lastModified", fastos::TimeStamp::asString(stat_info._modifiedTime)); + } + } + } + } +}; + +} // namespace search::transactionlog:: + +void +TransLogServerExplorer::get_state(const Inserter &inserter, bool full) const +{ + (void) full; + inserter.insertObject(); +} + +std::vector +TransLogServerExplorer::get_children_names() const +{ + return _server->getDomainNames(); +} + +std::unique_ptr +TransLogServerExplorer::get_child(vespalib::stringref name) const +{ + Domain::SP domain = _server->findDomain(name); + if (!domain) { + return std::unique_ptr(nullptr); + } + return std::unique_ptr(new DomainExplorer(std::move(domain))); +} + +} // namespace search::transactionlog +} // namespace search diff --git a/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h new file mode 100644 index 00000000000..8d3f7080385 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/trans_log_server_explorer.h @@ -0,0 +1,27 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "translogserver.h" +#include + +namespace search { +namespace transactionlog { + +/** + * Class used to explore the state of a transaction log server. + */ +class TransLogServerExplorer : public vespalib::StateExplorer +{ +private: + TransLogServer::SP _server; + +public: + TransLogServerExplorer(TransLogServer::SP server) : _server(std::move(server)) {} + virtual void get_state(const vespalib::slime::Inserter &inserter, bool full) const override; + virtual std::vector get_children_names() const override; + virtual std::unique_ptr get_child(vespalib::stringref name) const override; +}; + +} // namespace search::transactionlog +} // namespace search diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp new file mode 100644 index 00000000000..47a2897fba6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogclient.cpp @@ -0,0 +1,402 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include + +LOG_SETUP(".translogclient"); + +namespace search { +namespace transactionlog { + +namespace { + const double NEVER(-1.0); +} + +using vespalib::LockGuard; + +TransLogClient::TransLogClient(const vespalib::string & rpcTarget) : + _rpcTarget(rpcTarget), + _sessions(), + _supervisor(), + _target(NULL) +{ + reconnect(); + exportRPC(_supervisor); + _supervisor.Start(); +} + +TransLogClient::~TransLogClient() +{ + disconnect(); + _supervisor.ShutDown(true); +} + +bool TransLogClient::reconnect() +{ + disconnect(); + _target = _supervisor.Get2WayTarget(_rpcTarget.c_str()); + return isConnected(); +} + +void TransLogClient::disconnect() +{ + if (_target) { + _target->SubRef(); + } +} + +bool TransLogClient::create(const vespalib::string & domain) +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("createDomain"); + req->GetParams()->AddString(domain.c_str()); + int32_t retval(rpc(req)); + req->SubRef(); + return (retval == 0); +} + +bool TransLogClient::remove(const vespalib::string & domain) +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("deleteDomain"); + req->GetParams()->AddString(domain.c_str()); + int32_t retval(rpc(req)); + req->SubRef(); + return (retval == 0); +} + +TransLogClient::Session::UP TransLogClient::open(const vespalib::string & domain) +{ + Session::UP session; + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("openDomain"); + req->GetParams()->AddString(domain.c_str()); + int32_t retval(rpc(req)); + if (retval == 0) { + session.reset(new Session(domain, *this)); + } + req->SubRef(); + return session; +} + +TransLogClient::Subscriber::UP TransLogClient::createSubscriber(const vespalib::string & domain, TransLogClient::Session::Callback & callBack) +{ + return TransLogClient::Subscriber::UP(new Subscriber(domain, *this, callBack)); +} + +TransLogClient::Visitor::UP TransLogClient::createVisitor(const vespalib::string & domain, TransLogClient::Session::Callback & callBack) +{ + return TransLogClient::Visitor::UP(new Visitor(domain, *this, callBack)); +} + +bool TransLogClient::listDomains(std::vector & dir) +{ + FRT_RPCRequest *req = _supervisor.AllocRPCRequest(); + req->SetMethodName("listDomains"); + int32_t retval(rpc(req)); + if (retval == 0) { + char * s = req->GetReturn()->GetValue(1)._string._str; + for (const char * d(strsep(&s, "\n")); d && (*d != '\0'); d = strsep(&s, "\n")) { + dir.push_back(d); + } + } + req->SubRef(); + return (retval == 0); +} + +int32_t TransLogClient::rpc(FRT_RPCRequest * req) +{ + int32_t retval(-7); + if (_target) { + _target->InvokeSync(req, NEVER); + if (req->GetErrorCode() == FRTE_NO_ERROR) { + retval = (req->GetReturn()->GetValue(0)._intval32); + LOG(debug, "rpc %s = %d", req->GetMethodName(), retval); + } else { + LOG(warning, "%s: error(%d): %s", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage()); + } + } else { + retval = -6; + } + return retval; +} + +TransLogClient::Session * TransLogClient::findSession(const vespalib::string & domainName, int sessionId) +{ + SessionKey key(domainName, sessionId); + SessionMap::iterator found(_sessions.find(key)); + Session * session((found != _sessions.end()) ? found->second : NULL); + return session; +} + +void TransLogClient::exportRPC(FRT_Supervisor & supervisor) +{ + FRT_ReflectionBuilder rb( & supervisor); + + //-- Visit Callbacks ----------------------------------------------------------- + rb.DefineMethod("visitCallback", "six", "i", false, FRT_METHOD(TransLogClient::visitCallbackRPC), this); + rb.MethodDesc("Will return data asked from a subscriber/visitor."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("session", "Session handle."); + rb.ParamDesc("packet", "The data packet."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error."); + + //-- Visit Callbacks ----------------------------------------------------------- + rb.DefineMethod("syncCallback", "si", "i", false, FRT_METHOD(TransLogClient::syncCallbackRPC), this); + rb.MethodDesc("Will tell you that now you are uptodate on the subscribtion."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("session", "Session handle."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error."); + + //-- Visit Callbacks ----------------------------------------------------------- + rb.DefineMethod("eofCallback", "si", "i", false, FRT_METHOD(TransLogClient::eofCallbackRPC), this); + rb.MethodDesc("Will tell you that you are done with the visitor."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("session", "Session handle."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Non zero number indicates error."); +} + +void TransLogClient::visitCallbackRPC(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + int32_t sessionId(params[1]._intval32); + LOG(spam, "visitCallback(%s, %d)(%d)", domainName, sessionId, params[2]._data._len); + Session * session(findSession(domainName, sessionId)); + if (session != NULL) { + Packet packet(params[2]._data._buf, params[2]._data._len); + retval = session->visit(packet); + } + ret.AddInt32(retval); + LOG(debug, "visitCallback(%s, %d)=%d done", domainName, sessionId, retval); +} + +void TransLogClient::syncCallbackRPC(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + int32_t sessionId(params[1]._intval32); + LOG(debug, "syncCallback(%s, %d)", domainName, sessionId); + LockGuard guard(_lock); + Session * session(findSession(domainName, sessionId)); + if (session != NULL) { + session->inSync(); + retval = 0; + } + ret.AddInt32(retval); + LOG(debug, "syncCallback(%s, %d)=%d done", domainName, sessionId, retval); +} + +void TransLogClient::eofCallbackRPC(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + int32_t sessionId(params[1]._intval32); + LOG(debug, "eofCallback(%s, %d)", domainName, sessionId); + Session * session(findSession(domainName, sessionId)); + if (session != NULL) { + session->eof(); + retval = 0; + } + ret.AddInt32(retval); + LOG(debug, "eofCallback(%s, %d)=%d done", domainName, sessionId, retval); +} + + +TransLogClient::Session::Session(const vespalib::string & domain, TransLogClient & tlc) : + _tlc(tlc), + _domain(domain), + _sessionId(0) +{ +} + +TransLogClient::Session::~Session() +{ + close(); + clear(); +} + +bool TransLogClient::Session::commit(const vespalib::ConstBufferRef & buf) +{ + bool retval(true); + if (buf.size() != 0) { + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainCommit"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddData(buf.c_str(), buf.size()); + int retcode = _tlc.rpc(req); + retval = (retcode == 0); + if (retval) { + req->SubRef(); + } else { + vespalib::string msg; + if (req->GetReturn() != 0) { + msg = req->GetReturn()->GetValue(1)._string._str; + } else { + msg = vespalib::make_string("Clientside error %s: error(%d): %s", req->GetMethodName(), req->GetErrorCode(), req->GetErrorMessage()); + } + req->SubRef(); + throw std::runtime_error(vespalib::make_string("commit failed with code %d. server says: %s", retcode, msg.c_str())); + } + } + return retval; +} + +bool TransLogClient::Session::status(SerialNum & b, SerialNum & e, size_t & count) +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainStatus"); + req->GetParams()->AddString(_domain.c_str()); + int32_t retval(_tlc.rpc(req)); + if (retval == 0) { + b = req->GetReturn()->GetValue(1)._intval64; + e = req->GetReturn()->GetValue(2)._intval64; + count = req->GetReturn()->GetValue(3)._intval64; + } + req->SubRef(); + return (retval == 0); +} + +bool TransLogClient::Session::erase(const SerialNum & to) +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainPrune"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddInt64(to); + int32_t retval(_tlc.rpc(req)); + req->SubRef(); + if (retval == 1) { + LOG(warning, "Prune to %" PRIu64 " denied since there were active visitors in that area", to); + } + return (retval == 0); +} + + +bool +TransLogClient::Session::sync(const SerialNum &syncTo, SerialNum &syncedTo) +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainSync"); + FRT_Values & params = *req->GetParams(); + params.AddString(_domain.c_str()); + params.AddInt64(syncTo); + int32_t retval(_tlc.rpc(req)); + if (retval == 0) { + syncedTo = req->GetReturn()->GetValue(1)._intval64; + } + req->SubRef(); + return (retval == 0); +} + + +void TransLogClient::Session::clear() +{ + if (_sessionId > 0) { + LockGuard guard(_tlc._lock); + _tlc._sessions.erase(SessionKey(_domain, _sessionId)); + } + _sessionId = 0; +} + +int TransLogClient::SessionKey::cmp(const TransLogClient::SessionKey & b) const +{ + int diff(strcmp(_domain.c_str(), b._domain.c_str())); + if (diff == 0) { + diff = _sessionId - b._sessionId; + } + return diff; +} + +TransLogClient::Subscriber::Subscriber(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack) : + Session(domain, tlc), + _callback(callBack) +{ +} + +TransLogClient::Subscriber::~Subscriber() +{ +} + +TransLogClient::Visitor::Visitor(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack) : + Subscriber(domain, tlc, callBack) +{ +} + +bool TransLogClient::Session::init(FRT_RPCRequest *req) +{ + int32_t retval(_tlc.rpc(req)); + req->SubRef(); + if (retval > 0) { + clear(); + _sessionId = retval; + SessionKey key(_domain, _sessionId); + { + LockGuard guard(_tlc._lock); + _tlc._sessions[key] = this; + } + retval = run(); + } + return (retval > 0); +} + +bool TransLogClient::Visitor::visit(const SerialNum & from, const SerialNum & to) +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainVisit"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddInt64(from); + req->GetParams()->AddInt64(to); + return init(req); +} + +bool TransLogClient::Subscriber::subscribe(const SerialNum & from) +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainSubscribe"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddInt64(from); + return init(req); +} + +bool TransLogClient::Session::run() +{ + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainSessionRun"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddInt32(_sessionId); + int32_t retval(_tlc.rpc(req)); + req->SubRef(); + return (retval == 0); +} + +bool TransLogClient::Session::close() +{ + int retval(0); + if (_sessionId > 0) { + do { + FRT_RPCRequest *req = _tlc._supervisor.AllocRPCRequest(); + req->SetMethodName("domainSessionClose"); + req->GetParams()->AddString(_domain.c_str()); + req->GetParams()->AddInt32(_sessionId); + if ( (retval = _tlc.rpc(req)) > 0) { + FastOS_Thread::Sleep(10); + } + req->SubRef(); + } while ( retval == 1 ); + } + return (retval == 0); +} + +TransLogClient::Visitor::~Visitor() +{ +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogclient.h b/searchlib/src/vespa/searchlib/transactionlog/translogclient.h new file mode 100644 index 00000000000..702a7cd260f --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogclient.h @@ -0,0 +1,140 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "common.h" +#include +#include +#include +#include +#include +#include + +namespace search { +namespace transactionlog { + +class TransLogClient : private FRT_Invokable +{ +private: + TransLogClient(const TransLogClient &); + TransLogClient& operator=(const TransLogClient &); + +public: + class Session + { + public: + class Callback { + public: + virtual ~Callback() { } + virtual RPC::Result receive(const Packet & packet) = 0; + virtual void inSync() { } + virtual void eof() { } + }; + public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + Session(const vespalib::string & domain, TransLogClient & tlc); + virtual ~Session(); + /// You can commit data of any registered type to any channel. + bool commit(const vespalib::ConstBufferRef & packet); + /// Will erase all entries prior to + bool erase(const SerialNum & to); + bool status(SerialNum & b, SerialNum & e, size_t & count); + + bool sync(const SerialNum &syncTo, SerialNum &syncedTo); + + virtual RPC::Result visit(const Packet & ) { return RPC::OK; } + virtual void inSync() { } + virtual void eof() { } + bool close(); + void clear(); + const vespalib::string & getDomain() const { return _domain; } + const TransLogClient & getTLC() const { return _tlc; } + protected: + bool init(FRT_RPCRequest * req); + bool run(); + TransLogClient & _tlc; + vespalib::string _domain; + int _sessionId; + }; + /// Here you connect to the incomming data getting everything from + class Subscriber : public Session + { + public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + Subscriber(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack); + bool subscribe(const SerialNum & from); + virtual ~Subscriber(); + virtual RPC::Result visit(const Packet & packet) { return _callback.receive(packet); } + virtual void inSync() { _callback.inSync(); } + virtual void eof() { _callback.eof(); } + private: + Callback & _callback; + }; + /// Here you read the incomming data getting everything from + class Visitor : public Subscriber + { + public: + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + Visitor(const vespalib::string & domain, TransLogClient & tlc, Callback & callBack); + bool visit(const SerialNum & from, const SerialNum & to); + virtual ~Visitor(); + }; +public: + typedef std::unique_ptr UP; + + TransLogClient(const vespalib::string & rpctarget); + virtual ~TransLogClient(); + + /// Here you create a new domain + bool create(const vespalib::string & domain); + /// Here you remove a domain + bool remove(const vespalib::string & domain); + /// Here you open an existing domain + Session::UP open(const vespalib::string & domain); + /// Here you can get a list of available domains. + bool listDomains(std::vector & dir); + /// Here you get a subscriber + Subscriber::UP createSubscriber(const vespalib::string & domain, Session::Callback & callBack); + Visitor::UP createVisitor(const vespalib::string & domain, Session::Callback & callBack); + + bool isConnected() const { return (_target != NULL) && _target->IsValid(); } + void disconnect(); + bool reconnect(); + const vespalib::string &getRPCTarget() const { return _rpcTarget; } +private: + void exportRPC(FRT_Supervisor & supervisor); + void visitCallbackRPC(FRT_RPCRequest *req); + void syncCallbackRPC(FRT_RPCRequest *req); + void eofCallbackRPC(FRT_RPCRequest *req); + int32_t rpc(FRT_RPCRequest * req); + Session * findSession(const vespalib::string & domain, int sessionId); + + class SessionKey + { + public: + SessionKey(const vespalib::string & domain, int sessionId) : _domain(domain), _sessionId(sessionId) { } + bool operator < (const SessionKey & b) const { return cmp(b) < 0; } + private: + int cmp(const SessionKey & b) const; + vespalib::string _domain; + int _sessionId; + }; + + typedef std::map< SessionKey, Session * > SessionMap; + + vespalib::string _rpcTarget; + SessionMap _sessions; + //Brute force lock for subscriptions. For multithread safety. + vespalib::Lock _lock; + FRT_Supervisor _supervisor; + FRT_Target * _target; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp new file mode 100644 index 00000000000..79b7413c1b4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.cpp @@ -0,0 +1,672 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include + +LOG_SETUP(".transactionlog.server"); + +using vespalib::make_string; +using vespalib::stringref; +using vespalib::make_vespa_string; +using vespalib::IllegalArgumentException; +using search::common::FileHeaderContext; + +namespace search +{ + +namespace transactionlog +{ + +namespace +{ + +class SyncHandler : public FNET_Task +{ + FRT_RPCRequest & _req; + Domain::SP _domain; + TransLogServer::Session::SP _session; + SerialNum _syncTo; + +public: + SyncHandler(FRT_Supervisor *supervisor, + FRT_RPCRequest *req,const Domain::SP &domain, + const TransLogServer::Session::SP &session, + SerialNum syncTo); + + ~SyncHandler(void); + void PerformTask(void) override; +}; + + +SyncHandler::SyncHandler(FRT_Supervisor *supervisor, + FRT_RPCRequest *req, + const Domain::SP &domain, + const TransLogServer::Session::SP &session, + SerialNum syncTo) + : FNET_Task(supervisor->GetScheduler()), + _req(*req), + _domain(domain), + _session(session), + _syncTo(syncTo) +{ +} + + +SyncHandler::~SyncHandler(void) +{ +} + + +void +SyncHandler::PerformTask(void) +{ + SerialNum synced(_domain->getSynced()); + if (_session->getDown() || + _domain->getMarkedDeleted() || + synced >= _syncTo) { + FRT_Values &rvals = *_req.GetReturn(); + rvals.AddInt32(0); + rvals.AddInt64(synced); + _req.Return(); + delete this; + } else { + _domain->triggerSyncNow(); + Schedule(0.05); // Retry in 0.05 seconds + } +} + +} + + + +TransLogServer::TransLogServer(const vespalib::string &name, + int listenPort, + const vespalib::string &baseDir, + const FileHeaderContext &fileHeaderContext, + uint64_t domainPartSize, + bool useFsync, + size_t maxThreads, + DomainPart::Crc defaultCrcType) + : FRT_Invokable(), + _name(name), + _baseDir(baseDir), + _domainPartSize(domainPartSize), + _useFsync(useFsync), + _defaultCrcType(defaultCrcType), + _executor(maxThreads, 128*1024), + _threadPool(8192, 1), + _supervisor(), + _domains(), + _reqQ(), + _fileHeaderContext(fileHeaderContext) +{ + int retval(0); + if ((retval = makeDirectory(_baseDir.c_str())) == 0) { + if ((retval = makeDirectory(dir().c_str())) == 0) { + std::ifstream domainDir(domainList().c_str()); + while (domainDir.good() && !domainDir.eof()) { + vespalib::string domainName; + domainDir >> domainName; + if ( ! domainName.empty()) { + try { + Domain::SP domain(new Domain(domainName, + dir(), + _executor, + _domainPartSize, + _useFsync, + _defaultCrcType, + _fileHeaderContext)); + _domains[domain->name()] = domain; + } catch (const std::exception & e) { + LOG(warning, "Failed creating %s domain on startup. Exception = %s", domainName.c_str(), e.what()); + } + } + } + exportRPC(_supervisor); + char listenSpec[32]; + sprintf(listenSpec, "tcp/%d", listenPort); + bool listenOk(false); + for (int i(600); !listenOk && i; i--) { + if (_supervisor.Listen(listenSpec)) { + _supervisor.Start(); + listenOk = true; + } else { + LOG(warning, "Failed listening at port %s trying for %d seconds more.", listenSpec, i); + FastOS_Thread::Sleep(1000); + } + } + if ( ! listenOk ) { + throw std::runtime_error(make_string("Failed listening at port %s. Giving up. Requires manual intervention.", listenSpec)); + } + } else { + throw std::runtime_error(make_string("Failed creating tls dir %s r(%d), e(%d). Requires manual intervention.", dir().c_str(), retval, errno)); + } + } else { + throw std::runtime_error(make_string("Failed creating tls base dir %s r(%d), e(%d). Requires manual intervention.", _baseDir.c_str(), retval, errno)); + } + start(_threadPool); +} + +TransLogServer::~TransLogServer() +{ + stop(); + join(); + _supervisor.ShutDown(true); +} + +bool TransLogServer::onStop() +{ + LOG(info, "Stopping TLS"); + _reqQ.push(NULL); + return true; +} + +void TransLogServer::run() +{ + FRT_RPCRequest *req(NULL); + bool hasPacket(false); + logMetric(); + do { + for (req = NULL; (hasPacket = _reqQ.pop(req, 60000)) && (req != NULL); req = NULL) { + bool immediate = true; + if (strcmp(req->GetMethodName(), "domainSessionClose") == 0) { + domainSessionClose(req); + } else if (strcmp(req->GetMethodName(), "domainSubscribe") == 0) { + domainSubscribe(req); + } else if (strcmp(req->GetMethodName(), "domainVisit") == 0) { + domainVisit(req); + } else if (strcmp(req->GetMethodName(), "createDomain") == 0) { + createDomain(req); + } else if (strcmp(req->GetMethodName(), "deleteDomain") == 0) { + deleteDomain(req); + } else if (strcmp(req->GetMethodName(), "openDomain") == 0) { + openDomain(req); + } else if (strcmp(req->GetMethodName(), "listDomains") == 0) { + listDomains(req); + } else if (strcmp(req->GetMethodName(), "domainStatus") == 0) { + domainStatus(req); + } else if (strcmp(req->GetMethodName(), "domainCommit") == 0) { + domainCommit(req); + } else if (strcmp(req->GetMethodName(), "domainPrune") == 0) { + domainPrune(req); + } else if (strcmp(req->GetMethodName(), "domainSessionRun") == 0) { + domainSessionRun(req); + } else if (strcmp(req->GetMethodName(), "domainSync") == 0) { + immediate = false; + domainSync(req); + } else { + LOG(warning, "Received unknown RPC command %s", req->GetMethodName()); + } + if (immediate) { + req->Return(); + } + } + logMetric(); + } while (running() && !(hasPacket && (req == NULL))); + LOG(info, "TLS Stopped"); +} + +void TransLogServer::logMetric() const +{ + Guard domainGuard(_lock); + for (DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) { + vespalib::string prefix("translogserver." + it->first + ".serialnum."); + EV_COUNT((prefix + "last").c_str(), it->second->end()); + EV_COUNT((prefix + "first").c_str(), it->second->begin()); + EV_VALUE((prefix + "numused").c_str(), it->second->size()); + EV_COUNT((prefix + "count").c_str(), it->second->count()); + } +} + +DomainStats +TransLogServer::getDomainStats() const +{ + DomainStats retval; + Guard domainGuard(_lock); + for (const auto &elem : _domains) { + retval[elem.first] = elem.second->getDomainInfo(); + } + return retval; +} + +std::vector +TransLogServer::getDomainNames() +{ + std::vector names; + Guard guard(_lock); + for(const auto &domain: _domains) { + names.push_back(domain.first); + } + return names; +} + +Domain::SP +TransLogServer::findDomain(const stringref &domainName) +{ + Guard domainGuard(_lock); + Domain::SP domain; + DomainList::iterator found(_domains.find(domainName)); + if (found != _domains.end()) { + domain = found->second; + } + return domain; +} + +void TransLogServer::exportRPC(FRT_Supervisor & supervisor) +{ + _supervisor.SetSessionInitHook(FRT_METHOD(TransLogServer::initSession), this); + _supervisor.SetSessionFiniHook(FRT_METHOD(TransLogServer::finiSession), this); + _supervisor.SetSessionDownHook(FRT_METHOD(TransLogServer::downSession), this); + FRT_ReflectionBuilder rb( & supervisor); + + //-- Create Domain ----------------------------------------------------------- + rb.DefineMethod("createDomain", "s", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Create a new domain."); + rb.ParamDesc("name", "The name of the domain."); + rb.ReturnDesc("handle", "A handle(int) to the domain. Negative number indicates error."); + + //-- Delete Domain ----------------------------------------------------------- + rb.DefineMethod("deleteDomain", "s", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Create a new domain."); + rb.ParamDesc("name", "The name of the domain."); + rb.ReturnDesc("retval", "0 on success. Negative number indicates error."); + rb.ReturnDesc("errormsg", "Message describing the error, if any."); + + //-- Open Domain ----------------------------------------------------------- + rb.DefineMethod("openDomain", "s", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Open an existing domain."); + rb.ParamDesc("name", "The name of the domain."); + rb.ReturnDesc("handle", "A handle(int) to the domain. Negative number indicates error."); + + //-- List Domains ----------------------------------------------------------- + rb.DefineMethod("listDomains", "", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Will return a list of all the domains."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error."); + rb.ReturnDesc("domains", "List of all the domains in a newline separated string"); + + //-- Domain Status ----------------------------------------------------------- + rb.DefineMethod("domainStatus", "s", "illl", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("This will return key status information about the domain."); + rb.ParamDesc("name", "The name of the domain."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error."); + rb.ReturnDesc("begin", "The id of the first element in the log."); + rb.ReturnDesc("end", "The id of the last element in the log."); + rb.ReturnDesc("size", "Number of elements in the log."); + + //-- Domain Commit ----------------------------------------------------------- + rb.DefineMethod("domainCommit", "sx", "is", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Will commit the data to the log."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("packet", "The data to commit to the domain."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error."); + rb.ReturnDesc("message", "A textual description of the result code."); + + //-- Domain Prune ----------------------------------------------------------- + rb.DefineMethod("domainPrune", "sl", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Will erase all operations prior to the serial number."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("to", "Will erase all up and including."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error."); + + //-- Domain Subscribe ----------------------------------------------------------- + rb.DefineMethod("domainSubscribe", "sl", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("This will create a subscription. It will live till the connection is closed."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("from", "Will return all entries following(not including) ."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. Positive number is the sessionid"); + + //-- Domain Visit ----------------------------------------------------------- + rb.DefineMethod("domainVisit", "sll", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("This will create a visitor that return all operations in the range."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("from", "Will return all entries following(not including) ."); + rb.ParamDesc("to", "Will return all entries including ."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. Positive number is the sessionid"); + + //-- Domain Session Run ----------------------------------------------------------- + rb.DefineMethod("domainSessionRun", "si", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("This will start the session thread."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("sessionid", "The session identifier."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error."); + + //-- Domain Session Close ----------------------------------------------------------- + rb.DefineMethod("domainSessionClose", "si", "i", true, FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("This will close the session."); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("sessionid", "The session identifier."); + rb.ReturnDesc("result", "A resultcode(int) of the operation. Negative number indicates error. 1 means busy -> retry. 0 is OK."); + + //-- Domain Sync -- + rb.DefineMethod("domainSync", "sl", "il", true, + FRT_METHOD(TransLogServer::relayToThreadRPC), this); + rb.MethodDesc("Sync domain to given entry"); + rb.ParamDesc("name", "The name of the domain."); + rb.ParamDesc("syncto", "Entry to sync to"); + rb.ReturnDesc("result", + "A resultcode(int) of the operation. " + "Negative number indicates error."); + rb.ReturnDesc("syncedto", "Entry synced to"); +} + +void TransLogServer::createDomain(FRT_RPCRequest *req) +{ + uint32_t retval(0); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + + const char * domainName = params[0]._string._str; + LOG(debug, "createDomain(%s)", domainName); + + Guard createDeleteGuard(_fileLock); + Domain::SP domain(findDomain(domainName)); + if ( !domain ) { + try { + domain.reset(new Domain(domainName, + dir(), + _executor, + _domainPartSize, + _useFsync, + _defaultCrcType, + _fileHeaderContext)); + { + Guard domainGuard(_lock); + _domains[domain->name()] = domain; + } + std::ofstream domainDir(domainList().c_str(), std::ios::app); + domainDir << domain->name() << std::endl; + } catch (const std::exception & e) { + LOG(warning, "Failed creating %s domain. Exception = %s", domainName, e.what()); + retval = uint32_t(-1); + } + } + + ret.AddInt32(retval); +} + +void TransLogServer::deleteDomain(FRT_RPCRequest *req) +{ + uint32_t retval(0); + vespalib::string msg("ok"); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + + const char * domainName = params[0]._string._str; + LOG(debug, "deleteDomain(%s)", domainName); + + Guard createDeleteGuard(_fileLock); + Domain::SP domain(findDomain(domainName)); + if ( !domain || (domain->getNumSessions() == 0)) { + try { + if (domain) { + domain->markDeleted(); + Guard domainGuard(_lock); + _domains.erase(domainName); + } + vespalib::rmdir(Domain::getDir(dir(), domainName).c_str(), true); + std::ofstream domainDir(domainList().c_str(), std::ios::trunc); + Guard domainGuard(_lock); + for (DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) { + domainDir << it->first << std::endl; + } + } catch (const std::exception & e) { + msg = make_vespa_string("Failed deleting %s domain. Exception = %s", domainName, e.what()); + retval = -1; + LOG(warning, "%s", msg.c_str()); + } + } else { + retval = -2; + msg = vespalib::make_vespa_string("Domain '%s' is open. Can not delete open domains.", domainName); + LOG(warning, "%s", msg.c_str()); + } + ret.AddInt32(retval); + ret.AddString(msg.c_str()); +} + +void TransLogServer::openDomain(FRT_RPCRequest *req) +{ + uint32_t retval(0); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + + const char * domainName = params[0]._string._str; + LOG(debug, "openDomain(%s)", domainName); + + Domain::SP domain(findDomain(domainName)); + if ( !domain ) { + retval = uint32_t(-1); + } + + ret.AddInt32(retval); +} + +void TransLogServer::listDomains(FRT_RPCRequest *req) +{ + FRT_Values & ret = *req->GetReturn(); + LOG(debug, "listDomains()"); + + vespalib::string domains; + Guard domainGuard(_lock); + for(DomainList::const_iterator it(_domains.begin()), mt(_domains.end()); it != mt; it++) { + domains += it->second->name(); + domains += "\n"; + } + ret.AddInt32(0); + ret.AddString(domains.c_str()); +} + +void TransLogServer::domainStatus(FRT_RPCRequest *req) +{ + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + LOG(debug, "domainStatus(%s)", domainName); + Domain::SP domain(findDomain(domainName)); + if (domain) { + ret.AddInt32(0); + ret.AddInt64(domain->begin()); + ret.AddInt64(domain->end()); + ret.AddInt64(domain->size()); + } else { + ret.AddInt32(uint32_t(-1)); + ret.AddInt64(0); + ret.AddInt64(0); + ret.AddInt64(0); + } +} + +void TransLogServer::commit(const vespalib::string & domainName, const Packet & packet) +{ + Domain::SP domain(findDomain(domainName)); + if (domain) { + domain->commit(packet); + } else { + throw IllegalArgumentException("Could not find domain " + domainName); + } +} + +void TransLogServer::domainCommit(FRT_RPCRequest *req) +{ + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + LOG(debug, "domainCommit(%s)(%d)", domainName, params[1]._data._len); + Domain::SP domain(findDomain(domainName)); + if (domain) { + Packet packet(params[1]._data._buf, params[1]._data._len); + try { + domain->commit(packet); + ret.AddInt32(0); + ret.AddString("ok"); + } catch (const std::exception & e) { + ret.AddInt32(-2); + ret.AddString(make_string("Exception during commit on %s : %s", domainName, e.what()).c_str()); + } + } else { + ret.AddInt32(-1); + ret.AddString(make_string("Could not find domain %s", domainName).c_str()); + } +} + +void TransLogServer::domainSubscribe(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + LOG(debug, "domainSubscribe(%s)", domainName); + Domain::SP domain(findDomain(domainName)); + if (domain) { + SerialNum from(params[1]._intval64); + LOG(debug, "domainSubscribe(%s, %" PRIu64 ")", domainName, from); + retval = domain->subscribe(domain, from, _supervisor, req->GetConnection()); + } + ret.AddInt32(retval); +} + +void TransLogServer::domainVisit(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + LOG(debug, "domainVisit(%s)", domainName); + Domain::SP domain(findDomain(domainName)); + if (domain) { + SerialNum from(params[1]._intval64); + SerialNum to(params[2]._intval64); + LOG(debug, "domainVisit(%s, %" PRIu64 ", %" PRIu64 ")", domainName, from, to); + retval = domain->visit(domain, from, to, _supervisor, req->GetConnection()); + } + ret.AddInt32(retval); +} + +void TransLogServer::domainSessionRun(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + int sessionId(params[1]._intval32); + LOG(debug, "domainSessionRun(%s, %d)", domainName, sessionId); + Domain::SP domain(findDomain(domainName)); + if (domain) { + LOG(debug, "Valid domain domainSessionRun(%s, %d)", domainName, sessionId); + retval = domain->startSession(sessionId); + } + ret.AddInt32(retval); +} + +void TransLogServer::relayToThreadRPC(FRT_RPCRequest *req) +{ + req->Detach(); + _reqQ.push(req); +} + +void TransLogServer::domainSessionClose(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + int sessionId(params[1]._intval32); + LOG(debug, "domainSessionClose(%s, %d)", domainName, sessionId); + Domain::SP domain(findDomain(domainName)); + if (domain) { + LOG(debug, "Valid domain domainSessionClose(%s, %d)", domainName, sessionId); + retval = domain->closeSession(sessionId); + } + LOG(debug, "domainSessionClose(%s, %d) = %d", domainName, sessionId, retval); + ret.AddInt32(retval); +} + +void TransLogServer::domainPrune(FRT_RPCRequest *req) +{ + uint32_t retval(uint32_t(-1)); + FRT_Values & params = *req->GetParams(); + FRT_Values & ret = *req->GetReturn(); + const char * domainName = params[0]._string._str; + LOG(debug, "domainPrune(%s)", domainName); + Domain::SP domain(findDomain(domainName)); + if (domain) { + SerialNum to(params[1]._intval64); + SerialNum oldestActive = domain->findOldestActiveVisit(); + if (oldestActive < to) { + retval = 1; + } else if (domain->erase(to)) { + retval = 0; + } + } + ret.AddInt32(retval); +} + + +const TransLogServer::Session::SP & +TransLogServer::getSession(FRT_RPCRequest *req) +{ + FNET_Connection *conn = req->GetConnection(); + void *vctx = conn->GetContext()._value.VOIDP; + Session::SP *sessionspp = static_cast(vctx); + return *sessionspp; +} + + +void +TransLogServer::initSession(FRT_RPCRequest *req) +{ + req->GetConnection()->SetContext(new Session::SP(new Session())); +} + + +void +TransLogServer::finiSession(FRT_RPCRequest *req) +{ + FNET_Connection *conn = req->GetConnection(); + void *vctx = conn->GetContext()._value.VOIDP; + conn->GetContextPT()->_value.VOIDP = NULL; + Session::SP *sessionspp = static_cast(vctx); + delete sessionspp; +} + + +void +TransLogServer::downSession(FRT_RPCRequest *req) +{ + getSession(req)->setDown(); +} + + +void +TransLogServer::domainSync(FRT_RPCRequest *req) +{ + FRT_Values & params = *req->GetParams(); + const char * domainName = params[0]._string._str; + SerialNum syncTo(params[1]._intval64); + LOG(debug, "domainSync(%s, %" PRIu64 ")", domainName, syncTo); + Domain::SP domain(findDomain(domainName)); + Session::SP session(getSession(req)); + + if (domain.get() == nullptr) { + FRT_Values &rvals = *req->GetReturn(); + rvals.AddInt32(0); + rvals.AddInt64(0); + req->Return(); + return; + } + + SyncHandler *syncHandler = new SyncHandler(&_supervisor, + req, + domain, + session, + syncTo); + + syncHandler->ScheduleNow(); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserver.h b/searchlib/src/vespa/searchlib/transactionlog/translogserver.h new file mode 100644 index 00000000000..98a24393814 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogserver.h @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace transactionlog +{ + +class TransLogServerExplorer; + +class TransLogServer : public document::Runnable, private FRT_Invokable, public Writer +{ +public: + friend class TransLogServerExplorer; + typedef std::unique_ptr UP; + typedef std::shared_ptr SP; + + TransLogServer(const vespalib::string &name, + int listenPort, + const vespalib::string &baseDir, + const common::FileHeaderContext &fileHeaderContext, + uint64_t domainPartSize=0x10000000, + bool useFsync=false, + size_t maxThreads=4, + DomainPart::Crc defaultCrc=DomainPart::xxh64); + virtual ~TransLogServer(); + uint64_t getDomainPartSize() const { return _domainPartSize; } + uint64_t setDomainPartSize(); + DomainStats getDomainStats() const; + + virtual void commit(const vespalib::string & domainName, const Packet & packet); + + + class Session + { + bool _down; + public: + typedef std::shared_ptr SP; + + Session(void) : _down(false) { } + bool getDown(void) const { return _down; } + void setDown(void) { _down = true; } + }; + +private: + virtual bool onStop(); + virtual void run(); + void exportRPC(FRT_Supervisor & supervisor); + void relayToThreadRPC(FRT_RPCRequest *req); + + void createDomain(FRT_RPCRequest *req); + void deleteDomain(FRT_RPCRequest *req); + void openDomain(FRT_RPCRequest *req); + void listDomains(FRT_RPCRequest *req); + + void domainStatus(FRT_RPCRequest *req); + void domainCommit(FRT_RPCRequest *req); + void domainSessionRun(FRT_RPCRequest *req); + void domainPrune(FRT_RPCRequest *req); + void domainVisit(FRT_RPCRequest *req); + void domainSubscribe(FRT_RPCRequest *req); + void domainSessionClose(FRT_RPCRequest *req); + void domainSync(FRT_RPCRequest *req); + + void initSession(FRT_RPCRequest *req); + void finiSession(FRT_RPCRequest *req); + void downSession(FRT_RPCRequest *req); + + void logMetric() const; + std::vector getDomainNames(); + Domain::SP findDomain(const vespalib::stringref &name); + vespalib::string dir() const { return _baseDir + "/" + _name; } + vespalib::string domainList() const { return dir() + "/" + _name + ".domains"; } + + static const Session::SP & getSession(FRT_RPCRequest *req); + + typedef std::map DomainList; + + vespalib::string _name; + vespalib::string _baseDir; + const uint64_t _domainPartSize; + const bool _useFsync; + const DomainPart::Crc _defaultCrcType; + vespalib::ThreadStackExecutor _executor; + FastOS_ThreadPool _threadPool; + FRT_Supervisor _supervisor; + DomainList _domains; + mutable std::mutex _lock; // Protects _domains + std::mutex _fileLock; // Protects the creating and deleting domains including file system operations. + document::Queue _reqQ; + const common::FileHeaderContext &_fileHeaderContext; + using Guard = std::lock_guard; +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp new file mode 100644 index 00000000000..33918e373d1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.cpp @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include + +LOG_SETUP(".translogserverapp"); + +using search::common::FileHeaderContext; + +namespace search +{ + +namespace transactionlog +{ + +TransLogServerApp::TransLogServerApp(const config::ConfigUri & tlsConfigUri, + const FileHeaderContext & fileHeaderContext) + : _tls(), + _tlsConfig(), + _tlsConfigFetcher(tlsConfigUri.getContext()), + _fileHeaderContext(fileHeaderContext) +{ + _tlsConfigFetcher.subscribe(tlsConfigUri.getConfigId(), this); + _tlsConfigFetcher.start(); +} + +namespace { + +DomainPart::Crc getCrc(searchlib::TranslogserverConfig::Crcmethod crcType) +{ + switch (crcType) { + case searchlib::TranslogserverConfig::ccitt_crc32: + return DomainPart::ccitt_crc32; + case searchlib::TranslogserverConfig::xxh64: + return DomainPart::xxh64; + } + assert(false); +} + +} + +void TransLogServerApp::start() +{ + std::shared_ptr c = _tlsConfig.get(); + _tls.reset(new TransLogServer(c->servername, + c->listenport, + c->basedir, + _fileHeaderContext, + c->filesizemax, + c->usefsync, + c->maxthreads, + getCrc(c->crcmethod))); +} + +TransLogServerApp::~TransLogServerApp() +{ + _tlsConfigFetcher.close(); +} + +void TransLogServerApp::configure(std::unique_ptr cfg) +{ + LOG(config, "configure Transaction Log Server %s at port %d", cfg->servername.c_str(), cfg->listenport); + _tlsConfig.set(cfg.release()); + _tlsConfig.latch(); +} + +} +} diff --git a/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h new file mode 100644 index 00000000000..5478fee61ed --- /dev/null +++ b/searchlib/src/vespa/searchlib/transactionlog/translogserverapp.h @@ -0,0 +1,46 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include + +namespace search +{ + +namespace common +{ + +class FileHeaderContext; + +} + +namespace transactionlog +{ + +class TransLogServerApp : public config::IFetcherCallback +{ +private: + TransLogServer::SP _tls; + vespalib::PtrHolder _tlsConfig; + config::ConfigFetcher _tlsConfigFetcher; + const common::FileHeaderContext & _fileHeaderContext; + + void configure(std::unique_ptr cfg); + +public: + typedef std::unique_ptr UP; + + TransLogServerApp(const config::ConfigUri & tlsConfigUri, + const common::FileHeaderContext &fileHeaderContext); + ~TransLogServerApp(); + + TransLogServer::SP getTransLogServer() const { return _tls; } + + void start(); +}; + +} // namespace transactionlog +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/.gitignore b/searchlib/src/vespa/searchlib/util/.gitignore new file mode 100644 index 00000000000..ee8938b6bf4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/.gitignore @@ -0,0 +1,6 @@ +*.So +*.exe +*.ilk +*.pdb +.depend* +Makefile diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt new file mode 100644 index 00000000000..a7d9d2290a3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_library(searchlib_util OBJECT + SOURCES + bufferwriter.cpp + comprbuffer.cpp + comprfile.cpp + dirtraverse.cpp + drainingbufferwriter.cpp + filealign.cpp + fileheadertk.cpp + filekit.cpp + filesizecalculator.cpp + fileutil.cpp + foldedstringcompare.cpp + ioerrorhandler.cpp + logutil.cpp + rawbuf.cpp + sigbushandler.cpp + slime_output_raw_buf_adapter.cpp + statebuf.cpp + statefile.cpp + stringenum.cpp + url.cpp + DEPENDS +) diff --git a/searchlib/src/vespa/searchlib/util/bufferwriter.cpp b/searchlib/src/vespa/searchlib/util/bufferwriter.cpp new file mode 100644 index 00000000000..21e1d16a079 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/bufferwriter.cpp @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "bufferwriter.h" + +namespace search +{ + +BufferWriter::BufferWriter() + : _cur(nullptr), + _end(nullptr), + _start(nullptr) +{ +} + + +BufferWriter::~BufferWriter() +{ +} + + +void +BufferWriter::writeSlow(const void *src, size_t len) +{ + size_t residue = len; + const char *csrc = static_cast(src); + for (;;) { + size_t maxLen = freeLen(); + if (residue <= maxLen) { + writeFast(csrc, residue); + break; + } + if (maxLen != 0) { + writeFast(csrc, maxLen); + csrc += maxLen; + residue -= maxLen; + } + flush(); + } +} + + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/bufferwriter.h b/searchlib/src/vespa/searchlib/util/bufferwriter.h new file mode 100644 index 00000000000..b4183f779ed --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/bufferwriter.h @@ -0,0 +1,55 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + +/** + * Abstract class to write to a buffer with an abstract backing store + * and abstract backing buffer. Each time backing buffer is full, + * flush() is called to resize it or drain it to the backing store. + */ +class BufferWriter +{ + char *_cur; + char *_end; + char *_start; +protected: + void rewind() { _cur = _start; } + + void setup(void *start, size_t len) { + _start = static_cast(start); + _end = _start + len; + rewind(); + } + + size_t freeLen() const { return _end - _cur; } + size_t usedLen() const { return _cur - _start; } + + void writeFast(const void *src, size_t len) + { + __builtin_memcpy(_cur, src, len); + _cur += len; + } + + void writeSlow(const void *src, size_t len); + +public: + BufferWriter(); + + virtual ~BufferWriter(); + + virtual void flush() = 0; + + void write(const void *src, size_t len) + { + if (__builtin_expect(len <= freeLen(), true)) { + writeFast(src, len); + return; + } + writeSlow(src, len); + } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/comprbuffer.cpp b/searchlib/src/vespa/searchlib/util/comprbuffer.cpp new file mode 100644 index 00000000000..b76cfc2674d --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/comprbuffer.cpp @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +#include +#include + +LOG_SETUP(".comprbuffer"); + +namespace search +{ + +using vespalib::nbostream; + +ComprBuffer::ComprBuffer(uint32_t unitSize) + : _comprBuf(NULL), + _comprBufSize(0), + _unitSize(unitSize), + _comprBufMalloc(NULL) +{ +} + + +ComprBuffer::~ComprBuffer(void) +{ + dropComprBuf(); +} + + +void +ComprBuffer::dropComprBuf(void) +{ + free(_comprBufMalloc); + _comprBuf = NULL; + _comprBufMalloc = NULL; +} + + +void +ComprBuffer::allocComprBuf(size_t comprBufSize, + size_t preferredFileAlignment, + FastOS_FileInterface *file, + bool padBefore) +{ + comprBufSize = _aligner.setupAlign(comprBufSize, + _unitSize, + file, + preferredFileAlignment); + _comprBufSize = comprBufSize; + _padBefore = padBefore; + allocComprBuf(); +} + +void +ComprBuffer::allocComprBuf(void) +{ + dropComprBuf(); + /* + * Add padding after normal buffer, to allow buffer to be completely + * full before normal flushes for encoding. Any spillover into padding + * area should be copied to start of buffer after write. This allows + * for better alignment of write operations since buffer writes can then + * normally write full buffers. + * + * For read, the padding after normal buffer gives some slack for the + * decoder prefetch at end of file. + */ + size_t paddingAfter = minimumPadding() * _unitSize; + size_t paddingBefore = 0; + if (_padBefore) { + /* + * Add padding before normal buffer, to allow last data at end of + * buffer to be copied to the padding area before the normal buffer + * prior to a full buffer read. This allows for better alignment of + * read operations since buffer reads can then normally read full + * buffers. + */ + paddingBefore = paddingAfter + 2 * _unitSize; + size_t memalign = FastOS_File::getMaxDirectIOMemAlign(); + if (paddingBefore < memalign) + paddingBefore = memalign; + } + size_t fullpadding = paddingAfter + paddingBefore; + size_t allocLen = _comprBufSize * _unitSize + fullpadding; + void *alignedBuf = FastOS_File::allocateGenericDirectIOBuffer(allocLen, + _comprBufMalloc); + memset(alignedBuf, 0, allocLen); + /* + * Set pointer to the start of normal buffer, which should be properly + * aligned in memory for direct IO. + */ + _comprBuf = reinterpret_cast + (static_cast(alignedBuf) + paddingBefore); +} + + +void +ComprBuffer::expandComprBuf(uint32_t overflowUnits) +{ + size_t newSize = static_cast(_comprBufSize) * 2; + assert(static_cast(newSize) == newSize); + if (newSize < 16) + newSize = 16; + size_t paddingAfter = minimumPadding() * _unitSize; + assert(overflowUnits <= minimumPadding()); + void *newBuf = malloc(newSize * _unitSize + paddingAfter); + size_t oldLen = (static_cast(_comprBufSize) + overflowUnits) * + _unitSize; + if (oldLen > 0) + memcpy(newBuf, _comprBuf, oldLen); + free(_comprBufMalloc); + _comprBuf = _comprBufMalloc = newBuf; + _comprBufSize = newSize; +} + + +void +ComprBuffer::referenceComprBuf(const ComprBuffer &rhs) +{ + _comprBuf = rhs._comprBuf; + _comprBufSize = rhs._comprBufSize; +} + + +void +ComprBuffer::checkPointWrite(nbostream &out) +{ + _aligner.checkPointWrite(out); + out << _comprBufSize << _unitSize << _padBefore; +} + + +void +ComprBuffer::checkPointRead(nbostream &in) +{ + _aligner.checkPointRead(in); + uint32_t unitSize; + in >> _comprBufSize >> unitSize >> _padBefore; + assert(unitSize == _unitSize); + + allocComprBuf(); +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/comprbuffer.h b/searchlib/src/vespa/searchlib/util/comprbuffer.h new file mode 100644 index 00000000000..43a67bcfb97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/comprbuffer.h @@ -0,0 +1,98 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include + +namespace search { + +class ComprBuffer +{ +private: + ComprBuffer(const ComprBuffer &); + + ComprBuffer & + operator=(const ComprBuffer &); + + void + allocComprBuf(void); +public: + void *_comprBuf; + size_t _comprBufSize; + uint32_t _unitSize; // Size of unit in bytes, doubles up as alignment + bool _padBefore; + void *_comprBufMalloc; + FileAlign _aligner; + + ComprBuffer(uint32_t unitSize); + + virtual + ~ComprBuffer(void); + + void + dropComprBuf(void); + + void + allocComprBuf(size_t comprBufSize, + size_t preferredFileAlignment, + FastOS_FileInterface *const file, + bool padbefore); + + static size_t + minimumPadding(void) + { + return 8; + } + + uint32_t + getUnitBitSize(void) const + { + return _unitSize * 8; + } + + bool + getPadBefore(void) const + { + return _padBefore; + } + + bool + getCheckPointResumed(void) const + { + return _aligner.getCheckPointResumed(); + } + + /* + * When encoding to memory instead of file, the compressed buffer must + * be able to grow. + */ + void + expandComprBuf(uint32_t overflowUnits); + + /* + * For unit testing only. Reference data owned by rhs, only works as + * long as rhs is live and unchanged. + */ + void + referenceComprBuf(const ComprBuffer &rhs); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + * + */ + void + checkPointRead(vespalib::nbostream &in); +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/comprfile.cpp b/searchlib/src/vespa/searchlib/util/comprfile.cpp new file mode 100644 index 00000000000..719b423861c --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/comprfile.cpp @@ -0,0 +1,650 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +#include +#include +#include + +LOG_SETUP(".comprbuffer"); + +namespace search +{ + +using vespalib::nbostream; + +void +ComprFileReadBase::ReadComprBuffer(uint64_t stopOffset, + bool readAll, + ComprFileDecodeContext &decodeContext, + int &bitOffset, + FastOS_FileInterface &file, + uint64_t &fileReadByteOffset, + uint64_t fileSize, + ComprBuffer &cbuf) +{ + assert(cbuf._comprBuf != NULL); + + bool isretryread = false; + + retry: + if (decodeContext.lastChunk()) + return; // Already reached end of file. + int remainingUnits = decodeContext.remainingUnits(); + + // There's a good amount of data here already. + if (remainingUnits > + static_cast(ComprBuffer::minimumPadding())) //FIX! Tune + return; + + // Assert that file read offset is aligned on unit boundary + assert((static_cast(fileReadByteOffset) & + (cbuf._unitSize - 1)) == 0); + // Get direct IO file alignment + size_t fileDirectIOAlign = cbuf._aligner.getDirectIOFileAlign(); + // calculate number of pad units before requested start + int padBeforeUnits = static_cast + (static_cast(fileReadByteOffset) & + (fileDirectIOAlign - 1)) / cbuf._unitSize; + // No padding before if at end of file. + if (fileReadByteOffset >= fileSize) + padBeforeUnits = 0; + // Continuation reads starts at aligned boundary. + assert(remainingUnits == 0 || padBeforeUnits == 0); + + if (readAll) + stopOffset = fileSize << 3; + else if (!isretryread) { + stopOffset += 8 * cbuf.getUnitBitSize(); // XXX: Magic integer + // Realign stop offset to direct IO alignment boundary + uint64_t fileDirectIOBitAlign = + static_cast(fileDirectIOAlign) << 3; + if ((stopOffset & (fileDirectIOBitAlign - 1)) != 0) + stopOffset += fileDirectIOBitAlign - + (stopOffset & (fileDirectIOBitAlign - 1)); + } + + bool isMore = true; + if (stopOffset >= (fileSize << 3)) { + stopOffset = fileSize << 3; + isMore = false; + } + + int64_t readBits = static_cast(stopOffset) - + (static_cast(fileReadByteOffset) << 3) + + padBeforeUnits * cbuf.getUnitBitSize(); + int64_t bufferBits = cbuf._comprBufSize * cbuf.getUnitBitSize(); + if (readBits > 0 && (bufferBits < readBits)) + { + isMore = true; + readBits = bufferBits; + } + + int extraRemainingUnits = 0; + if (bitOffset == -1) { + // Ensure that compressed data for current position is still available + // in buffer form. + extraRemainingUnits = 2; + } + // Move remaining integers to padding area before start of buffer + if (remainingUnits + extraRemainingUnits > 0) + memmove(static_cast(cbuf._comprBuf) - + (remainingUnits + extraRemainingUnits) * cbuf._unitSize, + static_cast(decodeContext.getUnitPtr()) - + extraRemainingUnits * cbuf._unitSize, + (remainingUnits + extraRemainingUnits) * cbuf._unitSize); + + // Adjust file position to direct IO boundary if needed before read + if (padBeforeUnits != 0) { + fileReadByteOffset -= padBeforeUnits * cbuf._unitSize; + file.SetPosition(fileReadByteOffset); + } + int readUnits0 = 0; + if (readBits > 0) + readUnits0 = static_cast((readBits + cbuf.getUnitBitSize() - 1) / + cbuf.getUnitBitSize()); + + // Try to align end of read to an alignment boundary + int readUnits = cbuf._aligner.adjustElements(fileReadByteOffset / + cbuf._unitSize, readUnits0); + if (readUnits < readUnits0) + isMore = true; + + if (readUnits > 0) { + int64_t padBytes = fileReadByteOffset + + static_cast(readUnits) * cbuf._unitSize - + fileSize; + if (!isMore && padBytes > 0) { + // Pad reading of file written with smaller unit size with + // NUL bytes. + file.ReadBuf(cbuf._comprBuf, readUnits * cbuf._unitSize - + padBytes); + memset(static_cast(cbuf._comprBuf) + + readUnits * cbuf._unitSize - padBytes, + 0, + padBytes); + } else + file.ReadBuf(cbuf._comprBuf, readUnits * cbuf._unitSize); + } + // If at end of file then add units of zero bits as padding + if (!isMore) + memset(static_cast(cbuf._comprBuf) + + readUnits * cbuf._unitSize, + 0, + cbuf._unitSize * ComprBuffer::minimumPadding()); + + assert(remainingUnits + readUnits >= 0); + decodeContext.afterRead(static_cast(cbuf._comprBuf) + + (padBeforeUnits - remainingUnits) * + static_cast(cbuf._unitSize), + (remainingUnits + readUnits - padBeforeUnits), + fileReadByteOffset + + readUnits * cbuf._unitSize, + isMore); + fileReadByteOffset += readUnits * cbuf._unitSize; + if (!isretryread && + decodeContext.endOfChunk() && + isMore) { + isretryread = true; + goto retry; // Alignment caused too short read + } + + if (bitOffset != -1) { + decodeContext.setupBits(bitOffset); + bitOffset = -1; + } + +} + + +void +ComprFileReadBase::SetPosition(uint64_t newPosition, + uint64_t stopOffset, + bool readAll, + ComprFileDecodeContext &decodeContext, + int &bitOffset, + FastOS_FileInterface &file, + uint64_t &fileReadByteOffset, + uint64_t fileSize, + ComprBuffer &cbuf) +{ + int64_t pos; + uint64_t oldPosition; + + oldPosition = decodeContext.getBitPos(bitOffset, fileReadByteOffset); + assert(oldPosition == decodeContext.getBitPosV()); + if (newPosition == oldPosition) + return; + if (newPosition > oldPosition && newPosition <= (fileReadByteOffset << 3)) { + size_t skip = newPosition - oldPosition; + if (skip < 2 * cbuf.getUnitBitSize()) { + // Cached bits might still be needed, just read and ignore bits + if (decodeContext.endOfChunk()) + ReadComprBuffer(stopOffset, + readAll, + decodeContext, + bitOffset, + file, + fileReadByteOffset, + fileSize, + cbuf); + decodeContext.skipBits(skip); + assert(decodeContext.getBitPos(bitOffset, + fileReadByteOffset) == newPosition); + assert(decodeContext.getBitPosV() == newPosition); + return; + } + // Cached bits not needed, skip to new position in buffer + size_t left = (fileReadByteOffset << 3) - newPosition; + decodeContext.adjUnitPtr((left + cbuf.getUnitBitSize() - 1) / + cbuf.getUnitBitSize()); + bitOffset = static_cast + (static_cast(newPosition) & + (cbuf.getUnitBitSize() - 1)); + // We might now be at end of chunk, read more if needed in order + // for setupBits() to be safe. + if (decodeContext.endOfChunk()) + ReadComprBuffer(stopOffset, + readAll, + decodeContext, + bitOffset, + file, + fileReadByteOffset, + fileSize, + cbuf); + // Only call SetupBits() if ReadComprBuffer() didn't do it. + if (bitOffset != -1) { + decodeContext.setupBits(bitOffset); + bitOffset = -1; + } + assert(decodeContext.getBitPos(bitOffset, + fileReadByteOffset) == newPosition); + assert(decodeContext.getBitPosV() == newPosition); + return; + } + pos = newPosition / cbuf.getUnitBitSize(); + pos *= cbuf._unitSize; + fileReadByteOffset = pos; + bitOffset = static_cast(static_cast(newPosition) & + (cbuf.getUnitBitSize() - 1)); + + assert(pos <= static_cast(fileSize)); + + file.SetPosition(pos); + assert(pos == file.GetPosition()); + + decodeContext.emptyBuffer(newPosition); + assert(decodeContext.getBitPos(bitOffset, + fileReadByteOffset) == newPosition); + assert(decodeContext.getBitPosV() == newPosition); +} + + +void +ComprFileWriteBase:: +WriteComprBuffer(ComprFileEncodeContext &encodeContext, + ComprBuffer &cbuf, + FastOS_FileInterface &file, + uint64_t &fileWriteByteOffset, + bool flushSlack) +{ + assert(cbuf._comprBuf != NULL); + + int chunkUsedUnits = encodeContext.getUsedUnits(cbuf._comprBuf); + + if (chunkUsedUnits == 0) + return; + int chunkSizeNormalMax = encodeContext.getNormalMaxUnits(cbuf._comprBuf); + int chunksize = chunkUsedUnits; + /* + * Normally, only flush the normal buffer and copy the slack + * after the buffer to the start of buffer. + */ + if (!flushSlack && chunksize > chunkSizeNormalMax) + chunksize = chunkSizeNormalMax; + assert(static_cast(chunksize) <= cbuf._comprBufSize || + (flushSlack && + static_cast(chunksize) <= cbuf._comprBufSize + + ComprBuffer::minimumPadding())); + file.WriteBuf(cbuf._comprBuf, cbuf._unitSize * chunksize); + + int remainingUnits = chunkUsedUnits - chunksize; + assert(remainingUnits == 0 || + (!flushSlack && + static_cast(remainingUnits) <= + ComprBuffer::minimumPadding())); + // Copy any slack after buffer to the start of the buffer + if (remainingUnits > 0) + memmove(cbuf._comprBuf, + static_cast(cbuf._comprBuf) + + chunksize * cbuf._unitSize, + cbuf._unitSize * remainingUnits); + + fileWriteByteOffset += chunksize * cbuf._unitSize; + encodeContext.afterWrite(cbuf, + remainingUnits, + fileWriteByteOffset); +} + + +ComprFileReadContext:: +ComprFileReadContext(ComprFileDecodeContext &decodeContext) + : ComprBuffer(decodeContext.getUnitByteSize()), + _decodeContext(&decodeContext), + _fileSize(0), + _fileReadByteOffset(0), + _bitOffset(0), + _stopOffset(0), + _readAll(true), + _checkPointOffsetValid(false), + _file(NULL), + _checkPointOffset(0) +{ +} + + +ComprFileReadContext:: +ComprFileReadContext(uint32_t unitSize) + : ComprBuffer(unitSize), + _decodeContext(NULL), + _fileSize(0), + _fileReadByteOffset(0), + _bitOffset(0), + _stopOffset(0), + _readAll(true), + _checkPointOffsetValid(false), + _file(NULL), + _checkPointOffset(0) +{ +} + + +ComprFileReadContext::~ComprFileReadContext(void) +{ +} + + +void +ComprFileReadContext::readComprBuffer(uint64_t stopOffset, bool readAll) +{ + search::ComprFileReadBase::ReadComprBuffer(stopOffset, + readAll, + *_decodeContext, + _bitOffset, + *_file, + _fileReadByteOffset, + _fileSize, + *this); +} + + +void +ComprFileReadContext::readComprBuffer(void) +{ + search::ComprFileReadBase::ReadComprBuffer(_stopOffset, + _readAll, + *_decodeContext, + _bitOffset, + *_file, + _fileReadByteOffset, + _fileSize, + *this); +} + + +void +ComprFileReadContext::setPosition(uint64_t newPosition, + uint64_t stopOffset, + bool readAll) +{ + setStopOffset(stopOffset, readAll); + search::ComprFileReadBase::SetPosition(newPosition, + stopOffset, + readAll, + *_decodeContext, + _bitOffset, + *_file, + _fileReadByteOffset, + _fileSize, + *this); +} + + +void +ComprFileReadContext::setPosition(uint64_t newPosition) +{ + search::ComprFileReadBase::SetPosition(newPosition, + _stopOffset, + _readAll, + *_decodeContext, + _bitOffset, + *_file, + _fileReadByteOffset, + _fileSize, + *this); +} + + +void +ComprFileReadContext::allocComprBuf(unsigned int comprBufSize, + size_t preferredFileAlignment) +{ + ComprBuffer::allocComprBuf(comprBufSize, preferredFileAlignment, + _file, true); +} + + +void +ComprFileReadContext::referenceWriteContext(const ComprFileWriteContext &rhs) +{ + ComprFileEncodeContext *e = rhs.getEncodeContext(); + ComprFileDecodeContext *d = getDecodeContext(); + + assert(e != NULL); + int usedUnits = e->getUsedUnits(rhs._comprBuf); + assert(usedUnits >= 0); + + referenceComprBuf(rhs); + setBufferEndFilePos(static_cast(usedUnits) * _unitSize); + setFileSize(static_cast(usedUnits) * _unitSize); + if (d != NULL) { + d->afterRead(_comprBuf, + usedUnits, + static_cast(usedUnits) * _unitSize, + false); + d->setupBits(0); + setBitOffset(-1); + assert(d->getBitPosV() == 0); + } +} + + +void +ComprFileReadContext::copyWriteContext(const ComprFileWriteContext &rhs) +{ + ComprFileEncodeContext *e = rhs.getEncodeContext(); + ComprFileDecodeContext *d = getDecodeContext(); + + assert(e != NULL); + int usedUnits = e->getUsedUnits(rhs._comprBuf); + assert(usedUnits >= 0); + + dropComprBuf(); + allocComprBuf(usedUnits, 32768); + assert(_comprBufSize >= static_cast(usedUnits)); + memcpy(_comprBuf, rhs._comprBuf, + static_cast(usedUnits) * _unitSize); + setBufferEndFilePos(static_cast(usedUnits) * _unitSize); + setFileSize(static_cast(usedUnits) * _unitSize); + if (d != NULL) { + d->afterRead(_comprBuf, + usedUnits, + static_cast(usedUnits) * _unitSize, + false); + d->setupBits(0); + setBitOffset(-1); + assert(d->getBitPosV() == 0); + } +} + + +void +ComprFileReadContext::referenceReadContext(const ComprFileReadContext &rhs) +{ + ComprFileDecodeContext *d = getDecodeContext(); + + int usedUnits = rhs.getBufferEndFilePos() / _unitSize; + assert(usedUnits >= 0); + assert(static_cast(usedUnits) * _unitSize == + rhs.getBufferEndFilePos()); + + referenceComprBuf(rhs); + setBufferEndFilePos(static_cast(usedUnits) * _unitSize); + setFileSize(static_cast(usedUnits) * _unitSize); + if (d != NULL) { + d->afterRead(_comprBuf, + usedUnits, + static_cast(usedUnits) * _unitSize, + false); + d->setupBits(0); + setBitOffset(-1); + assert(d->getBitPosV() == 0); + } +} + + +void +ComprFileReadContext::copyReadContext(const ComprFileReadContext &rhs) +{ + ComprFileDecodeContext *d = getDecodeContext(); + + int usedUnits = rhs.getBufferEndFilePos() / _unitSize; + assert(usedUnits >= 0); + assert(static_cast(usedUnits) * _unitSize == + rhs.getBufferEndFilePos()); + + dropComprBuf(); + allocComprBuf(usedUnits, 32768); + assert(_comprBufSize >= static_cast(usedUnits)); + memcpy(_comprBuf, rhs._comprBuf, + static_cast(usedUnits) * _unitSize); + setBufferEndFilePos(static_cast(usedUnits) * _unitSize); + setFileSize(static_cast(usedUnits) * _unitSize); + if (d != NULL) { + d->afterRead(_comprBuf, + usedUnits, + static_cast(usedUnits) * _unitSize, + false); + d->setupBits(0); + setBitOffset(-1); + assert(d->getBitPosV() == 0); + } +} + + +void +ComprFileReadContext::checkPointWrite(nbostream &out) +{ + ComprBuffer::checkPointWrite(out); + ComprFileDecodeContext &d = *_decodeContext; + d.checkPointWrite(out); + uint64_t bitOffset = d.getBitPosV(); + out << bitOffset; +} + + +void +ComprFileReadContext::checkPointRead(nbostream &in) +{ + ComprBuffer::checkPointRead(in); + ComprFileDecodeContext &d = *_decodeContext; + d.checkPointRead(in); + in >> _checkPointOffset; // Cannot seek until file is opened + _checkPointOffsetValid = true; +} + +ComprFileWriteContext:: +ComprFileWriteContext(ComprFileEncodeContext &encodeContext) + : ComprBuffer(encodeContext.getUnitByteSize()), + _encodeContext(&encodeContext), + _file(NULL), + _fileWriteByteOffset(0) +{ +} + + +ComprFileWriteContext:: +ComprFileWriteContext(uint32_t unitSize) + : ComprBuffer(unitSize), + _encodeContext(NULL), + _file(NULL), + _fileWriteByteOffset(0) +{ +} + + +ComprFileWriteContext::~ComprFileWriteContext(void) +{ +} + + +void +ComprFileWriteContext::writeComprBuffer(bool flushSlack) +{ + if (_file != NULL) { + search::ComprFileWriteBase::WriteComprBuffer(*_encodeContext, + *this, + *_file, + _fileWriteByteOffset, + flushSlack); + return; + } + + int chunkUsedUnits = _encodeContext->getUsedUnits(_comprBuf); + int chunkSizeNormalMax = _encodeContext->getNormalMaxUnits(_comprBuf); + + if (chunkUsedUnits >= chunkSizeNormalMax) { + int overflowUnits = chunkUsedUnits - chunkSizeNormalMax; + expandComprBuf(overflowUnits); + } + + _encodeContext->afterWrite(*this, + chunkUsedUnits, + 0); +} + + +std::pair +ComprFileWriteContext::grabComprBuffer(void *&comprBufMalloc) +{ + assert(_file == NULL); + std::pair res = + std::make_pair(_comprBuf, _encodeContext->getUsedUnits(_comprBuf)); + comprBufMalloc = _comprBufMalloc; + _comprBuf = _comprBufMalloc = NULL; + _comprBufSize = 0; + return res; +} + + +void +ComprFileWriteContext::allocComprBuf(unsigned int comprBufSize, + size_t preferredFileAlignment) +{ + ComprBuffer::allocComprBuf(comprBufSize, preferredFileAlignment, + _file, false); +} + + +void +ComprFileWriteContext::allocComprBuf(void) +{ + allocComprBuf(32768, 32768); +} + + +void +ComprFileWriteContext::checkPointWrite(nbostream &out) +{ + ComprBuffer::checkPointWrite(out); + ComprFileEncodeContext &e = *_encodeContext; + uint64_t bufferStartFilePos = getBufferStartFilePos(); + uint64_t usedSize = e.getUsedUnits(_comprBuf) * + e.getUnitByteSize(); + out << bufferStartFilePos << usedSize; + e.checkPointWrite(out); + if (usedSize != 0) { + out.write(_comprBuf, usedSize); + } + uint64_t bitOffset = e.getBitPosV(); + out << bitOffset; +} + + +void +ComprFileWriteContext::checkPointRead(nbostream &in) +{ + ComprBuffer::checkPointRead(in); + ComprFileEncodeContext &e = *_encodeContext; + uint64_t bufferStartFilePos = 0; + uint64_t usedSize = 0; + in >> bufferStartFilePos >> usedSize; + e.checkPointRead(in); + if (usedSize != 0) { + assert((usedSize % e.getUnitByteSize()) == 0); + assert(_comprBufSize >= usedSize / e.getUnitByteSize()); + in.read(_comprBuf, usedSize); + } + setBufferStartFilePos(bufferStartFilePos); + e.afterWrite(*this, usedSize / e.getUnitByteSize(), bufferStartFilePos); + uint64_t bitOffset = 0; + in >> bitOffset; + uint64_t writeOffset = e.getBitPosV(); + assert(bitOffset == writeOffset); + (void) writeOffset; +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/comprfile.h b/searchlib/src/vespa/searchlib/util/comprfile.h new file mode 100644 index 00000000000..8c05884a0f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/comprfile.h @@ -0,0 +1,456 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1999-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include +#include +#include + +namespace vespalib +{ + +class nbostream; + +} + + +namespace search { + +class ComprFileWriteContext; + +class ComprFileDecodeContext +{ +public: + virtual + ~ComprFileDecodeContext(void) + { + } + + /** + * + * Check if the chunk referenced by the decode context was the + * last chunk in the file (e.g. _valE > _realValE) + */ + virtual bool + lastChunk(void) const = 0; + + /** + * Check if we're at the end of the current chunk (e.g. _valI >= _valE) + */ + virtual bool + endOfChunk(void) const = 0; + + /** + * Get remaining units in buffer (e.g. _realValE - _valI) + */ + + virtual int32_t + remainingUnits(void) const = 0; + + /** + * Get unit ptr (e.g. _valI) from decode context. + */ + virtual const void * + getUnitPtr(void) const = 0; + + /** + * Setup unit buffer in decode context after read. + */ + virtual void + afterRead(const void *start, + size_t bufferUnits, + uint64_t bufferEndFilePos, + bool isMore) = 0; + + /** + * Setup for bitwise reading. + */ + virtual void + setupBits(int bitOffset) = 0; + + virtual uint64_t + getBitPos(int bitOffset, + uint64_t bufferEndFilePos) const = 0; + + virtual uint64_t + getBitPosV(void) const = 0; + + virtual + void skipBits(int bits) = 0; + + virtual void + adjUnitPtr(int newRemainingUnits) = 0; + + virtual void + emptyBuffer(uint64_t newBitPosition) = 0; + + /** + * Get size of each unit (typically 4 or 8) + */ + virtual uint32_t + getUnitByteSize(void) const = 0; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Caller must + * save position. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + * Caller must restore position. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; +}; + +class ComprFileReadBase +{ +public: + static void ReadComprBuffer(uint64_t stopOffset, + bool readAll, + ComprFileDecodeContext &decodeContext, + int &bitOffset, + FastOS_FileInterface &file, + uint64_t &fileReadByteOffset, + uint64_t fileSize, + ComprBuffer &cbuf); + static void SetPosition(uint64_t newPosition, + uint64_t stopOffset, + bool readAll, + ComprFileDecodeContext &decodeContext, + int &bitOffset, + FastOS_FileInterface &file, + uint64_t &fileReadByteOffset, + uint64_t fileSize, + ComprBuffer &cbuf); + +protected: + virtual ~ComprFileReadBase(void) { } +}; + + +class ComprFileReadContext : public ComprBuffer +{ +private: + ComprFileDecodeContext *_decodeContext; + uint64_t _fileSize; + uint64_t _fileReadByteOffset; + int _bitOffset; + uint64_t _stopOffset; + bool _readAll; + bool _checkPointOffsetValid; // Set only if checkpoint has been read + FastOS_FileInterface *_file; + uint64_t _checkPointOffset; // bit offset saved by checkPointRead + +public: + ComprFileReadContext(ComprFileDecodeContext &decodeContext); + + ComprFileReadContext(uint32_t unitSize); + + ~ComprFileReadContext(void); + + void + readComprBuffer(uint64_t stopOffset, bool readAll); + + void + readComprBuffer(void); + + void + setPosition(uint64_t newPosition, + uint64_t stopOffset, + bool readAll); + + void + setPosition(uint64_t newPosition); + + void + allocComprBuf(unsigned int comprBufSize, + size_t preferredFileAlignment); + + void + setDecodeContext(ComprFileDecodeContext *decodeContext) + { + _decodeContext = decodeContext; + } + + ComprFileDecodeContext * + getDecodeContext(void) const + { + return _decodeContext; + } + + void + setFile(FastOS_FileInterface *file) + { + _file = file; + } + + FastOS_FileInterface * + getFile(void) const + { + return _file; + } + + /** + * Get file offset for end of compressed buffer. + */ + uint64_t + getBufferEndFilePos(void) const + { + return _fileReadByteOffset; + } + + /** + * Set file offset for end of compressed byffer. + */ + void + setBufferEndFilePos(uint64_t bufferEndFilePos) + { + _fileReadByteOffset = bufferEndFilePos; + } + + void + setBitOffset(int bitOffset) + { + _bitOffset = bitOffset; + } + + void + setFileSize(uint64_t fileSize) + { + _fileSize = fileSize; + } + + /* + * Set stop offset for sequential read. + */ + void + setStopOffset(uint64_t stopOffset, bool readAll) + { + _stopOffset = stopOffset; + _readAll = readAll; + } + + /* + * For unit testing only. Reference data owned by rhs, only works as + * long as rhs is live and unchanged. + */ + void + referenceReadContext(const ComprFileReadContext &rhs); + + /* + * For unit testing only. Copy data owned by rhs. + */ + void + copyReadContext(const ComprFileReadContext &rhs); + + /* + * For unit testing only. Reference data owned by rhs, only works as + * long as rhs is live and unchanged. + */ + void + referenceWriteContext(const ComprFileWriteContext &rhs); + + /* + * For unit testing only. Copy data owned by rhs. + */ + void + copyWriteContext(const ComprFileWriteContext &rhs); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); + + bool + getCheckPointOffsetValid(void) const + { + return _checkPointOffsetValid; + } + + uint64_t + getCheckPointOffset(void) const + { + return _checkPointOffset; + } +}; + + +class ComprFileEncodeContext +{ +public: + virtual + ~ComprFileEncodeContext(void) + { + } + + /** + * Get number of used units (e.g. _valI - start) + */ + virtual int + getUsedUnits(void *start) = 0; + + /** + * Get normal full buffer size (e.g. _valE - start) + */ + virtual int + getNormalMaxUnits(void *start) = 0; + + /** + * Adjust buffer after write (e.g. _valI, _fileWriteBias) + */ + virtual void + afterWrite(ComprBuffer &cbuf, + uint32_t remainingUnits, + uint64_t bufferStartFilePos) = 0; + + + /** + * Adjust buffer size to align end of buffer. + */ + virtual void + adjustBufSize(ComprBuffer &cbuf) = 0; + + /** + * Get size of each unit (typically 4 or 8) + */ + virtual uint32_t + getUnitByteSize(void) const = 0; + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. Caller must + * save position, although partial unit is saved. + */ + virtual void + checkPointWrite(vespalib::nbostream &out) = 0; + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + * Caller must restore positon, although partial unit is restored. + */ + virtual void + checkPointRead(vespalib::nbostream &in) = 0; + + virtual uint64_t + getBitPosV(void) const = 0; +}; + +class ComprFileWriteBase +{ +public: + static void WriteComprBuffer(ComprFileEncodeContext &encodeContext, + ComprBuffer &cbuf, + FastOS_FileInterface &file, + uint64_t &fileWriteByteOffset, + bool flushSlack); + +protected: + virtual ~ComprFileWriteBase(void) { } +}; + + +class ComprFileWriteContext : public ComprBuffer +{ +private: + ComprFileEncodeContext *_encodeContext; + FastOS_FileInterface *_file; + uint64_t _fileWriteByteOffset; // XXX: Migrating from encode context + +public: + ComprFileWriteContext(ComprFileEncodeContext &encodeContext); + + ComprFileWriteContext(uint32_t unitSize); + + ~ComprFileWriteContext(void); + + void + writeComprBuffer(bool flushSlack); + + void + allocComprBuf(unsigned int comprBufSize, + size_t preferredFileAlignment); + + void + allocComprBuf(void); + + void + setEncodeContext(ComprFileEncodeContext *encodeContext) + { + _encodeContext = encodeContext; + } + + ComprFileEncodeContext * + getEncodeContext(void) const + { + return _encodeContext; + } + + void + setFile(FastOS_FileInterface *file) + { + _file = file; + } + + FastOS_FileInterface * + getFile(void) const + { + return _file; + } + + /** + * Get file offset for start of compressed buffer. + */ + uint64_t + getBufferStartFilePos(void) const + { + return _fileWriteByteOffset; + } + + /** + * Set file offset for start of compressed byffer. + */ + void + setBufferStartFilePos(uint64_t bufferStartFilePos) + { + _fileWriteByteOffset = bufferStartFilePos; + } + + /** + * Grab compressed buffer from write context. This is only legal when + * no file is attached. + */ + std::pair + grabComprBuffer(void *&comprBufMalloc); + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); +}; + + +} + diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.cpp b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp new file mode 100644 index 00000000000..a6c716a13cd --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/dirtraverse.cpp @@ -0,0 +1,289 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include "dirtraverse.h" + +namespace search +{ + +extern "C" { +static int cmpname(const void *av, const void *bv) +{ + const DirectoryTraverse::Name *const a = + *(const DirectoryTraverse::Name *const *) av; + const DirectoryTraverse::Name *const b = + *(const DirectoryTraverse::Name *const *) bv; + return strcmp(a->_name, b->_name); +} +} + + +DirectoryTraverse::Name * +DirectoryTraverse::Name::sort(Name *head, + int count) +{ + Name *nl; + Name **names; + int i; + + names = new Name *[count]; + i = 0; + for(nl = head; nl != NULL; nl = nl->_next) + names[i++] = nl; + assert(i == count); + qsort(names, count, sizeof(Name *), cmpname); + for (i = 0; i < count; i++) { + if (i + 1 < count) + names[i]->_next = names[i + 1]; + else + names[i]->_next = NULL; + } + head = names[0]; + delete [] names; + return head; +} + + +void +DirectoryTraverse::QueueDir(const char *name) +{ + Name *n = new Name(name); + if (_dirTail == NULL) + _dirHead = n; + else + _dirTail->_next = n; + _dirTail = n; +} + + +void +DirectoryTraverse::PushDir(const char *name) +{ + Name *n = new Name(name); + n->_next = _pdirHead; + _pdirHead = n; +} + + +void +DirectoryTraverse::PushRemoveDir(const char *name) +{ + Name *n = new Name(name); + n->_next = _rdirHead; + _rdirHead = n; +} + + +void +DirectoryTraverse::PushPushedDirs(void) +{ + Name *n; + while (_pdirHead != NULL) { + n = _pdirHead; + _pdirHead = n->_next; + n->_next = _dirHead; + _dirHead = n; + if (_dirTail == NULL) + _dirTail = n; + } +} + + +DirectoryTraverse::Name * +DirectoryTraverse::UnQueueDir(void) +{ + Name *n; + PushPushedDirs(); + if (_dirHead == NULL) + return NULL; + n = _dirHead; + _dirHead = n->_next; + n->_next = NULL; + if (_dirHead == NULL) + _dirTail = NULL; + return n; +} + +DirectoryTraverse::Name * +DirectoryTraverse::UnQueueName(void) +{ + Name *n; + if (_nameHead == NULL) + return NULL; + n = _nameHead; + _nameHead = n->_next; + n->_next = NULL; + _nameCount--; + return n; +} + + +void +DirectoryTraverse::ScanSingleDir(void) +{ + assert(_nameHead == NULL); + assert(_nameCount == 0); + delete _curDir; + free(_fullDirName); + _fullDirName = NULL; + _curDir = UnQueueDir(); + if (_curDir == NULL) + return; + _fullDirName = (char *) malloc(strlen(_baseDir) + 1 + + strlen(_curDir->_name) + 1); + strcpy(_fullDirName, _baseDir); + if (_curDir->_name[0] != '\0') { + strcat(_fullDirName, "/"); + strcat(_fullDirName, _curDir->_name); + } + FastOS_DirectoryScan *dirscan = new FastOS_DirectoryScan(_fullDirName); + while (dirscan->ReadNext()) { + const char *name = dirscan->GetName(); + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + Name *nl = new Name(name); + nl->_next = _nameHead; + _nameHead = nl; + _nameCount++; + } + if (_nameCount > 1) + _nameHead = _nameHead->sort(_nameHead, _nameCount); + delete dirscan; +} + + +bool +DirectoryTraverse::NextName(void) +{ + delete _curName; + _curName = NULL; + while (_nameHead == NULL && (_dirHead != NULL || _pdirHead != NULL)) + ScanSingleDir(); + if (_nameHead == NULL) + return false; + _curName = UnQueueName(); + free(_fullName); + _fullName = (char *) malloc(strlen(_fullDirName) + 1 + + strlen(_curName->_name) + 1); + strcpy(_fullName, _fullDirName); + _relName = _fullName + strlen(_baseDir) + 1; + strcat(_fullName, "/"); + strcat(_fullName, _curName->_name); + return true; +} + + +bool +DirectoryTraverse::NextRemoveDir(void) +{ + Name *curName; + + delete _curName; + _curName = NULL; + if (_rdirHead == NULL) + return false; + curName = _rdirHead; + _rdirHead = curName->_next; + free(_fullName); + _fullName = (char *) malloc(strlen(_baseDir) + 1 + + strlen(curName->_name) + 1); + strcpy(_fullName, _baseDir); + _relName = _fullName + strlen(_baseDir) + 1; + strcat(_fullName, "/"); + strcat(_fullName, curName->_name); + delete curName; + return true; +} + + +bool +DirectoryTraverse::RemoveTree(void) +{ + FastOS_StatInfo statInfo; + + while (NextName()) { + const char *relname = GetRelName(); + const char *fullname = GetFullName(); + if (FastOS_File::Stat(fullname, &statInfo)) { + if (statInfo._isDirectory) { + PushDir(relname); + PushRemoveDir(relname); + } else { + FastOS_File::Delete(fullname); + } + } + } + while (NextRemoveDir()) { + const char *fullname = GetFullName(); + FastOS_File::RemoveDirectory(fullname); + } + FastOS_File::RemoveDirectory(_baseDir); + return true; +} + +uint64_t +DirectoryTraverse::GetTreeSize() +{ + FastOS_StatInfo statInfo; + uint64_t size = 0; + const uint64_t blockSize = 4096; + + while (NextName()) { + const char *relname = GetRelName(); + const char *fullname = GetFullName(); + if (FastOS_File::Stat(fullname, &statInfo)) { + uint64_t adjSize = ((statInfo._size + blockSize - 1) / blockSize) * blockSize; + size += adjSize; + if (statInfo._isDirectory) { + PushDir(relname); + } + } + } + return size; +} + +DirectoryTraverse::DirectoryTraverse(const char *baseDir) + : _baseDir(NULL), + _nameHead(NULL), + _nameCount(0), + _dirHead(NULL), + _dirTail(NULL), + _pdirHead(NULL), + _rdirHead(NULL), + _curDir(NULL), + _curName(NULL), + _fullDirName(NULL), + _fullName(NULL), + _relName(NULL) +{ + _baseDir = strdup(baseDir); + QueueDir(""); + ScanSingleDir(); +} + + +DirectoryTraverse::~DirectoryTraverse(void) +{ + free(_fullDirName); + free(_fullName); + free(_baseDir); + delete _curDir; + delete _curName; + PushPushedDirs(); + while (_dirHead != NULL) + delete UnQueueDir(); + while (_nameHead != NULL) + delete UnQueueName(); + while (_rdirHead != NULL) { + Name *n; + n = _rdirHead; + _rdirHead = n->_next; + n->_next = NULL; + delete n; + } +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/dirtraverse.h b/searchlib/src/vespa/searchlib/util/dirtraverse.h new file mode 100644 index 00000000000..550da2fa7d1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/dirtraverse.h @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +namespace search +{ + +class DirectoryTraverse +{ +private: + DirectoryTraverse(const DirectoryTraverse &); + DirectoryTraverse& operator=(const DirectoryTraverse &); + +public: + class Name + { + private: + Name(const Name &); + Name& operator=(const Name &); + + public: + char *_name; + Name *_next; + explicit Name(const char *name) + : _name(NULL), + _next(NULL) + { + _name = strdup(name); + } + ~Name(void) { free(_name); } + static Name *sort(Name *head, int count); + }; +private: + char *_baseDir; + Name *_nameHead; + int _nameCount; + Name *_dirHead; + Name *_dirTail; + Name *_pdirHead; + Name *_rdirHead; + Name *_curDir; + Name *_curName; + char *_fullDirName; + char *_fullName; + char *_relName; +public: + const char *GetFullName(void) const { return _fullName; } + const char *GetRelName(void) const { return _relName; } + void QueueDir(const char *name); + void PushDir(const char *name); + void PushRemoveDir(const char *name); + void PushPushedDirs(void); + Name *UnQueueDir(void); + Name *UnQueueName(void); + void ScanSingleDir(void); + bool NextName(void); + bool NextRemoveDir(void); + bool RemoveTree(void); + uint64_t GetTreeSize(); // Returns size of directory in bytes + explicit DirectoryTraverse(const char *baseDir); + ~DirectoryTraverse(void); +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp new file mode 100644 index 00000000000..9c1150917a7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.cpp @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "drainingbufferwriter.h" + +namespace search +{ + +DrainingBufferWriter::DrainingBufferWriter() + : BufferWriter(), + _buf(), + _bytesWritten(0), + _incompleteBuffers(0) +{ + _buf.resize(BUFFER_SIZE); + setup(&_buf[0], _buf.size()); +} + + +DrainingBufferWriter::~DrainingBufferWriter() +{ +} + + +void +DrainingBufferWriter::flush() { + // measure overhead above this flush method + assert(_incompleteBuffers == 0); // all previous buffers must have been full + size_t nowLen = usedLen(); + if (nowLen != _buf.size()) { + // buffer is not full, only allowed for last buffer + ++_incompleteBuffers; + } + if (nowLen == 0) { + return; // empty buffer + } + _bytesWritten += nowLen; + rewind(); +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h new file mode 100644 index 00000000000..2c471ce2148 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/drainingbufferwriter.h @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "bufferwriter.h" +#include + +namespace search +{ + +/** + * Class to write to a "drain" buffer, used to measure performance of + * BufferWriter and measure number of bytes written. + */ +class DrainingBufferWriter : public BufferWriter +{ + std::vector _buf; + size_t _bytesWritten; + uint32_t _incompleteBuffers; +public: + static constexpr size_t BUFFER_SIZE = 262144; + + DrainingBufferWriter(); + + virtual ~DrainingBufferWriter(); + + virtual void flush() override; + + size_t getBytesWritten() const { return _bytesWritten; } +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/filealign.cpp b/searchlib/src/vespa/searchlib/util/filealign.cpp new file mode 100644 index 00000000000..e3a7b85f0f0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filealign.cpp @@ -0,0 +1,145 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +#include + +namespace search +{ + +using vespalib::nbostream; + +namespace { + +size_t +gcd(size_t a, size_t b) +{ + size_t remainder; + + for (;;) { + remainder = a % b; + if (remainder == 0) + return b; + a = b; + b = remainder; + } +} + + +size_t +getMinBlocking(size_t elementsize, size_t alignment) +{ + return alignment / gcd(alignment, elementsize); +} + +} + + +FileAlign::FileAlign(void) + : _directIOFileAlign(1), + _preferredFileAlign(1), + _minDirectIOSize(1), + _minAlignedSize(1), + _elemSize(1), + _directIOMemAlign(1), + _directio(false), + _checkPointResumed(false) +{ +} + + +FileAlign::~FileAlign(void) +{ +} + + +size_t +FileAlign::adjustSize(int64_t offset, size_t size) +{ + if (_directio && (offset & (_directIOFileAlign - 1)) != 0) { + // Align end of IO to direct IO boundary + assert(offset % _elemSize == 0); + size_t maxSize = _minDirectIOSize - (offset % _minDirectIOSize); + if (size > maxSize) + size = maxSize; + } else if ((offset & (_preferredFileAlign - 1)) != 0) { + // Align end of IO to preferred boundary + assert(offset % _elemSize == 0); + size_t tailLen = (offset + size) % _minAlignedSize; + if (tailLen < size) + size -= tailLen; + } + assert(size % _elemSize == 0); + return size; +} + + +size_t +FileAlign::adjustElements(int64_t eoffset, size_t esize) +{ + return adjustSize(eoffset * _elemSize, esize * _elemSize) / _elemSize; +} + + +size_t +FileAlign::setupAlign(size_t elements, + size_t elemSize, + FastOS_FileInterface *file, + size_t preferredFileAlignment) +{ + size_t memoryAlignment; + size_t transferGranularity; + size_t transferMaximum; + + if (file != NULL) { + _directio = + file->GetDirectIORestrictions(memoryAlignment, + transferGranularity, + transferMaximum); + } else + _directio = false; + if (_directio) { + _directIOFileAlign = transferGranularity; + _directIOMemAlign = memoryAlignment; + if (preferredFileAlignment < _directIOFileAlign) + preferredFileAlignment = _directIOFileAlign; + } else { + _directIOFileAlign = 1; + _directIOMemAlign = 1; + } + if (preferredFileAlignment < 4096) + preferredFileAlignment = 4096; + _preferredFileAlign = preferredFileAlignment; + + size_t minDirectIOElements = getMinBlocking(elemSize, _directIOFileAlign); + size_t minAlignedElements = getMinBlocking(elemSize, _preferredFileAlign); + + if (elements % minAlignedElements != 0) + elements += minAlignedElements - (elements % minAlignedElements); + _minDirectIOSize = minDirectIOElements * elemSize; + _minAlignedSize = minAlignedElements * elemSize; + _elemSize = elemSize; + return elements; +} + + +void +FileAlign::checkPointWrite(nbostream &out) +{ + out << _directIOFileAlign << _preferredFileAlign << + _minDirectIOSize << _minAlignedSize << _elemSize << + _directIOMemAlign << _directio; +} + + +void +FileAlign::checkPointRead(nbostream &in) +{ + in >> _directIOFileAlign >> _preferredFileAlign >> + _minDirectIOSize >> _minAlignedSize >> _elemSize >> + _directIOMemAlign >> _directio; + _checkPointResumed = true; +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/filealign.h b/searchlib/src/vespa/searchlib/util/filealign.h new file mode 100644 index 00000000000..2bad98c9dd3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filealign.h @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace vespalib +{ + +class nbostream; + +} + +namespace search +{ + +class FileAlign +{ +private: + size_t _directIOFileAlign; + size_t _preferredFileAlign; + size_t _minDirectIOSize; + size_t _minAlignedSize; + size_t _elemSize; + size_t _directIOMemAlign; + bool _directio; + bool _checkPointResumed; + + +public: + FileAlign(void); + + ~FileAlign(void); + + /** + * Adjust number of bytes for IO (read or write), reducing + * number of bytes if it helps making end of IO matching + * an alignment boundary. + * + * @param offset position of start of IO, measured in bytes + * @param size number of bytes for IO + * + * @return adjusted number of bytes for IO + */ + size_t + adjustSize(int64_t offset, size_t size); + + /** + * Adjust number of elements for IO (read or write), reducing + * number of elements if it helps making end of IO matching + * an alignment boundary. + * + * @param eoffset position of start of IO, measured in elements + * @param esize number of elements for IO + * + * @return adjusted number of elements for IO + */ + size_t + adjustElements(int64_t eoffset, size_t esize); + + /** + * Setup alignment + * + * @param elements suggested number of elements in buffer + * @param elemSize size of each elements + * @param file File interface for IO + * @param preferredFileAlignment prefered alignment for IO + * + * @return adjusted number of elements in buffer + */ + size_t + setupAlign(size_t elements, + size_t elemSize, + FastOS_FileInterface *file, + size_t preferredFileAlignment); + + bool + getDirectIO(void) const + { + return _directio; + } + + bool + getCheckPointResumed(void) const + { + return _checkPointResumed; + } + + size_t + getDirectIOFileAlign(void) const + { + return _directIOFileAlign; + } + + size_t + getDirectIOMemAlign(void) const + { + return _directIOMemAlign; + } + + size_t + getMinDirectIOSize(void) const + { + return _minDirectIOSize; + } + + size_t + getMinAlignedSize(void) const + { + return _minAlignedSize; + } + + size_t + getPreferredFileAlign(void) const + { + return _preferredFileAlign; + } + + size_t + getElemSize(void) const + { + return _elemSize; + } + + /** + * Checkpoint write. Used at semi-regular intervals during indexing + * to allow for continued indexing after an interrupt. + */ + void + checkPointWrite(vespalib::nbostream &out); + + /** + * Checkpoint read. Used when resuming indexing after an interrupt. + */ + void + checkPointRead(vespalib::nbostream &in); +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/fileheadertk.cpp b/searchlib/src/vespa/searchlib/util/fileheadertk.cpp new file mode 100644 index 00000000000..d1ae39d15c7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/fileheadertk.cpp @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include "fileheadertk.h" + +using namespace search; +using vespalib::GenericHeader; + +void +FileHeaderTk::addVersionTags(vespalib::GenericHeader &header) +{ +#ifdef V_TAG + header.putTag(GenericHeader::Tag("version-tag", V_TAG));; + header.putTag(GenericHeader::Tag("version-date", V_TAG_DATE));; + header.putTag(GenericHeader::Tag("version-pkg", V_TAG_PKG));; + header.putTag(GenericHeader::Tag("version-arch", V_TAG_ARCH));; + header.putTag(GenericHeader::Tag("version-system", V_TAG_SYSTEM)); + header.putTag(GenericHeader::Tag("version-system-rev", V_TAG_SYSTEM_REV)); + header.putTag(GenericHeader::Tag("version-builder", V_TAG_BUILDER)); + header.putTag(GenericHeader::Tag("version-component", V_TAG_COMPONENT)); +#else + (void)header; +#endif +} diff --git a/searchlib/src/vespa/searchlib/util/fileheadertk.h b/searchlib/src/vespa/searchlib/util/fileheadertk.h new file mode 100644 index 00000000000..8b88df997eb --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/fileheadertk.h @@ -0,0 +1,23 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { + +/** + * This class offers convenience methods to add tags to a GenericHeader. + */ +class FileHeaderTk { +public: + /** + * Adds all available version tags to the given header. These tags are set by the build environment and + * describe things such as build time, build tag, builder, etc. + * + * @param header The header to add tags to. + */ + static void addVersionTags(vespalib::GenericHeader &header); +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/filekit.cpp b/searchlib/src/vespa/searchlib/util/filekit.cpp new file mode 100644 index 00000000000..df509f2ea96 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filekit.cpp @@ -0,0 +1,108 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include +#include +#include +#include +#include +#include +LOG_SETUP(".filekit"); + +namespace search +{ + +using vespalib::getLastErrorString; + +bool +FileKit::createStamp(const vespalib::stringref &name) +{ + FastOS_File stamp; + FastOS_StatInfo statInfo; + bool statres; + + statres = FastOS_File::Stat(name.c_str(), &statInfo); + + if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { + LOG(error, "FATAL: Could not check stamp file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + if (statres && statInfo._size > 0) { + LOG(error, "FATAL: Stamp file not empty: %s", name.c_str()); + return false; + } + + if (!stamp.OpenWriteOnlyTruncate(name.c_str())) { + LOG(error, "FATAL: Could not create stamp file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + return true; +} + + +bool +FileKit::hasStamp(const vespalib::stringref &name) +{ + FastOS_StatInfo statInfo; + bool statres; + + statres = FastOS_File::Stat(name.c_str(), &statInfo); + + if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { + LOG(error, "FATAL: Could not check stamp file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + return statres; +} + + +bool +FileKit::removeStamp(const vespalib::stringref &name) +{ + FastOS_StatInfo statInfo; + bool deleteres; + bool statres; + + statres = FastOS_File::Stat(name.c_str(), &statInfo); + + if (!statres && (statInfo._error != FastOS_StatInfo::FileNotFound)) { + LOG(error, "FATAL: Could not check stamp file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + if (statres && statInfo._size > 0) { + LOG(error, "FATAL: Stamp file not empty: %s", name.c_str()); + return false; + } + + do { + deleteres = FastOS_File::Delete(name.c_str()); + //FIX! errno + } while (!deleteres && errno == EINTR); + + if (!deleteres && + FastOS_File::GetLastError() != FastOS_File::ERR_ENOENT) { + LOG(error, "FATAL: Could not remove stamp file %s: %s", + name.c_str(), getLastErrorString().c_str()); + return false; + } + return true; +} + + +fastos::TimeStamp +FileKit::getModificationTime(const vespalib::stringref &name) +{ + FastOS_StatInfo statInfo; + if (FastOS_File::Stat(name.c_str(), &statInfo)) { + return fastos::TimeStamp(statInfo._modifiedTimeNS); + } + return fastos::TimeStamp(); +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/filekit.h b/searchlib/src/vespa/searchlib/util/filekit.h new file mode 100644 index 00000000000..fb2332529d4 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filekit.h @@ -0,0 +1,35 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +class FileKit +{ +private: + static bool _syncFiles; +public: + static bool + createStamp(const vespalib::stringref &name); + + static bool + hasStamp(const vespalib::stringref &name); + + static bool + removeStamp(const vespalib::stringref &name); + + /** + * Returns the modification time of the given file/directory, + * or time stamp 0 if stating of file/directory fails. + **/ + static fastos::TimeStamp + getModificationTime(const vespalib::stringref &name); + +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp b/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp new file mode 100644 index 00000000000..4ae53c03430 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filesizecalculator.cpp @@ -0,0 +1,59 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include +LOG_SETUP(".searchlib.util.filesizecalculator"); + +#include "filesizecalculator.h" +#include + +namespace search +{ + +namespace { + +const vespalib::string fileBitSizeTag = "fileBitSize"; + +bool byteAligned(uint64_t bitSize) +{ + return ((bitSize % 8) == 0); +} + +} + +bool +FileSizeCalculator::extractFileSize(const vespalib::GenericHeader &header, + size_t headerLen, + vespalib::string fileName, size_t &fileSize) +{ + if (!header.hasTag(fileBitSizeTag)) { + return true; + } + uint64_t fileBitSize = header.getTag(fileBitSizeTag).asInteger(); + uint64_t fileByteSize = fileBitSize / 8; + if (!byteAligned(fileBitSize)) { + LOG(error, + "Bad header file size tag for %s, fileBitSize=%" PRIu64 + " which is not a multiple of 8", + fileName.c_str(), fileBitSize); + return false; + } + if (fileByteSize < headerLen) { + LOG(error, + "Bad header file size tag for %s, fileBitSize=%" PRIu64 + " but header is %" PRIu64 "bits", + fileName.c_str(), fileBitSize, headerLen * 8); + return false; + } + if (fileByteSize > fileSize) { + LOG(error, + "Bad header file size tag for %s, fileBitSize=%" PRIu64 + " but whole file size is %" PRIu64 "bits", + fileName.c_str(), fileBitSize, fileSize * 8); + return false; + } + fileSize = fileByteSize; + return true; +} + +} diff --git a/searchlib/src/vespa/searchlib/util/filesizecalculator.h b/searchlib/src/vespa/searchlib/util/filesizecalculator.h new file mode 100644 index 00000000000..233f1fe56f3 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/filesizecalculator.h @@ -0,0 +1,26 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace vespalib { class GenericHeader; } + +namespace search +{ + +/* + * Class to calculate logical file size of a file based on header tags + * and physical file size. Logical file size can be smaller than + * physical file size due to padding for directio alignment + * constraints. + */ +class FileSizeCalculator +{ +public: + static bool + extractFileSize(const vespalib::GenericHeader &header, size_t headerLen, + vespalib::string fileName, size_t &fileSize); +}; + +} diff --git a/searchlib/src/vespa/searchlib/util/fileutil.cpp b/searchlib/src/vespa/searchlib/util/fileutil.cpp new file mode 100644 index 00000000000..d3407ba030b --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/fileutil.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include "filesizecalculator.h" +#include +#include +LOG_SETUP(".searchlib.util.fileutil"); + +using vespalib::make_string; +using vespalib::IllegalStateException; +using vespalib::GenericHeader; +using vespalib::FileDescriptor; +using vespalib::getLastErrorString; + +namespace search +{ + + +FileUtil::LoadedMmap::LoadedMmap(const vespalib::string &fileName) + : LoadedBuffer(NULL, 0), + _mapBuffer(NULL), + _mapSize(0) +{ + FileDescriptor fd(open(fileName.c_str(), O_RDONLY, 0664)); + if (fd.valid()) { + struct stat stbuf; + int res = fstat(fd.fd(), &stbuf); + if (res == 0) { + size_t sz = stbuf.st_size; + if (sz) { + void *tmpBuffer = mmap(NULL, sz, + PROT_READ, MAP_PRIVATE, + fd.fd(), 0); + if (tmpBuffer != MAP_FAILED) { + _mapSize = sz; + _mapBuffer = tmpBuffer; + uint32_t hl = GenericHeader::getMinSize(); + bool badHeader = true; + if (sz >= hl) { + GenericHeader::MMapReader rd(static_cast + (tmpBuffer), sz); + _header = std::make_unique(); + size_t headerLen = _header->read(rd); + if ((headerLen <= _mapSize) && + FileSizeCalculator::extractFileSize(*_header, + headerLen, + fileName, + sz)) { + _size = sz - headerLen; + _buffer = static_cast + (_mapBuffer) + headerLen; + badHeader = false; + } + } + if (badHeader) { + throw IllegalStateException( + make_string("bad file header: %s", + fileName.c_str())); + } + } else { + throw IllegalStateException( + make_string("Failed mmaping '%s'" + " of size %" PRIu64 " errno(%d)", + fileName.c_str(), + static_cast(sz), + errno)); + } + } + } else { + throw IllegalStateException( + make_string("Failed fstat '%s' of fd %d with result = %d", + fileName.c_str(), fd.fd(), res)); + } + } else { + throw IllegalStateException( + make_string("Failed opening '%s' for reading errno(%d)", + fileName.c_str(), errno)); + } +} + + + +FileUtil::LoadedMmap::~LoadedMmap() +{ + madvise(_mapBuffer, _mapSize, MADV_DONTNEED); + munmap(_mapBuffer, _mapSize); +} + + +std::unique_ptr +FileUtil::openFile(const vespalib::string &fileName) +{ + std::unique_ptr file(new Fast_BufferedFile()); + file->EnableDirectIO(); + if (!file->OpenReadOnly(fileName.c_str())) { + LOG(error, "could not open %s: %s", + file->GetFileName(), getLastErrorString().c_str()); + file->Close(); + throw IllegalStateException( + make_string("Failed opening '%s' for direct IO reading.", + file->GetFileName())); + } + return file; +} + + +FileUtil::LoadedBuffer::UP +FileUtil::loadFile(const vespalib::string &fileName) +{ + LoadedBuffer::UP data(new LoadedMmap(fileName)); + FastOS_File file(fileName.c_str()); + if (!file.OpenReadOnly()) { + LOG(error, "could not open %s: %s", + file.GetFileName(), getLastErrorString().c_str()); + } + file.Close(); + return data; +} + + +void FileReaderBase::handleError(ssize_t numRead, size_t wanted) +{ + if (numRead == 0) { + throw std::runtime_error(vespalib::make_string("Trying to read past EOF of file %s", _file.GetFileName())); + } else { + throw std::runtime_error(vespalib::make_string("Partial read(%zd of %zu) of file %s", numRead, wanted, _file.GetFileName())); + } +} + +void FileWriterBase::handleError(ssize_t numRead, size_t wanted) +{ + if (numRead == 0) { + throw std::runtime_error(vespalib::make_string("Failed writing anything to file %s", _file.GetFileName())); + } else { + throw std::runtime_error(vespalib::make_string("Partial read(%zd of %zu) of file %s", numRead, wanted, _file.GetFileName())); + } +} + +SequentialFileArray::SequentialFileArray(const vespalib::string & fname) : + _backingFile(), + _name(fname) +{ + _backingFile.EnableDirectIO(); +} + +void SequentialFileArray::rewind() +{ + assert(_backingFile.SetPosition(0)); +} + +void SequentialFileArray::close() +{ + _backingFile.Close(); +} + +void SequentialFileArray::erase() +{ + close(); + FastOS_File::Delete(_backingFile.GetFileName()); +} + +void SequentialFileArray::openReadOnly() +{ + _backingFile.ReadOpen(_name.c_str()); +} + +void SequentialFileArray::openWriteOnly() +{ + _backingFile.OpenWriteOnlyTruncate(_name.c_str()); +} + +} diff --git a/searchlib/src/vespa/searchlib/util/fileutil.h b/searchlib/src/vespa/searchlib/util/fileutil.h new file mode 100644 index 00000000000..f809da8da5c --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/fileutil.h @@ -0,0 +1,389 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include +#include +#include +#include +#include + +using vespalib::GenericHeader; + +namespace search { + +/** + * Util class with static functions for handling attribute data files. + **/ +class FileUtil +{ +public: + /** + * Buffer class with content loaded from file. + **/ + class LoadedBuffer + { + private: + LoadedBuffer(const LoadedBuffer & rhs); + + LoadedBuffer & + operator =(const LoadedBuffer & rhs); + protected: + void * _buffer; + size_t _size; + std::unique_ptr _header; + public: + typedef std::unique_ptr UP; + + LoadedBuffer(void * buf, size_t sz) + : _buffer(buf), + _size(sz), + _header(nullptr) + { + } + + virtual + ~LoadedBuffer() + { + } + + const void * + buffer() const + { + return _buffer; + } + + const char * + c_str() const + { + return static_cast(_buffer); + } + + size_t + size() const + { + return _size; + } + + bool + empty() const + { + return _size == 0; + } + + size_t + size(size_t elemSize) const + { + return _size/elemSize; + } + + const GenericHeader & + getHeader() const + { + return *_header; + } + }; + + /** + * Buffer class with content mmapped from file. + **/ + class LoadedMmap : public LoadedBuffer + { + void * _mapBuffer; + size_t _mapSize; + public: + LoadedMmap(const vespalib::string &fileName); + + virtual + ~LoadedMmap(); + }; + + /** + * Opens and returns the file with the given name for reading. + * Enables direct IO on the file. + **/ + static std::unique_ptr + openFile(const vespalib::string &fileName); + + /** + * Loads and returns the file with the given name. + * Mmaps the file into the returned buffer. + **/ + static LoadedBuffer::UP + loadFile(const vespalib::string &fileName); +}; + +class FileReaderBase +{ +public: + FileReaderBase(FastOS_FileInterface & file) : _file(file) { } + ssize_t read(void *buf, size_t sz) { + ssize_t numRead = _file.Read(buf, sz); + if (numRead != ssize_t(sz)) { + handleError(numRead, sz); + } + return numRead; + } +private: + void handleError(ssize_t numRead, size_t wanted); + FastOS_FileInterface & _file; +}; + +class FileWriterBase +{ +public: + FileWriterBase(FastOS_FileInterface & file) : _file(file) { } + ssize_t write(const void *buf, size_t sz) { + ssize_t numWritten = _file.Write2(buf, sz); + if (numWritten != ssize_t(sz)) { + handleError(numWritten, sz); + } + return numWritten; + } +protected: + void handleError(ssize_t numWritten, size_t wanted); +private: + FastOS_FileInterface & _file; +}; + +template +class FileReader : public FileReaderBase +{ +public: + FileReader(FastOS_FileInterface & file) : FileReaderBase(file) { } + T readHostOrder() { + T result; + read(&result, sizeof(result)); + return result; + } +}; + +class SequentialFileArray +{ +public: + SequentialFileArray(const vespalib::string & fname); + virtual ~SequentialFileArray() { close(); } + const vespalib::string & getName() const { return _name; } + void rewind(); + void close(); + void erase(); +protected: + void openReadOnly(); + void openWriteOnly(); + mutable Fast_BufferedFile _backingFile; + vespalib::string _name; +}; + +template +class SequentialFileArrayRead : public SequentialFileArray +{ +public: + SequentialFileArrayRead(const vespalib::string & fname); + T getNext() const { return _fileReader.readHostOrder(); } + bool hasNext() const { return _backingFile.BytesLeft() >= sizeof(T); } + size_t size() const { return _backingFile.GetSize()/sizeof(T); } +private: + mutable FileReader _fileReader; +}; + +template +class SequentialFileArrayWrite : public SequentialFileArray +{ +public: + SequentialFileArrayWrite(const vespalib::string & fname); + void push_back(const T & v) { _count++; _fileWriter.write(&v, sizeof(v)); } + size_t size() const { return _count; } + bool empty() const { return _count == 0; } +private: + size_t _count; + FileWriterBase _fileWriter; +}; + +template +SequentialFileArrayRead::SequentialFileArrayRead(const vespalib::string & fname) : + SequentialFileArray(fname), + _fileReader(_backingFile) +{ + openReadOnly(); +} + +template +SequentialFileArrayWrite::SequentialFileArrayWrite(const vespalib::string & fname) : + SequentialFileArray(fname), + _count(0), + _fileWriter(_backingFile) +{ + openWriteOnly(); +} + +template +class MergeSorter +{ +public: + MergeSorter(const vespalib::string & name, size_t chunkSize); + void push_back(const T & v); + void commit() { sortChunk(); merge(); } + const vespalib::string & getName() const { return _name; } + void rewind() { } +private: + vespalib::string genName(size_t n); + void merge(); + void sortChunk(); + + std::vector _chunk; + size_t _chunkCount; + vespalib::string _name; +}; + +template +MergeSorter::MergeSorter(const vespalib::string & name, size_t chunkSize) : + _chunk(), + _chunkCount(0), + _name(name + ".sorted") +{ + _chunk.reserve(chunkSize); +} + +template +void MergeSorter::push_back(const T & v) +{ + if (_chunk.size() < _chunk.capacity()) { + _chunk.push_back(v); + if (_chunk.size() == _chunk.capacity()) { + sortChunk(); + } + } +} + +template +vespalib::string MergeSorter::genName(size_t n) +{ + char tmp[32]; + sprintf(tmp, ".%zd", n); + vespalib::string fname(_name); + fname += tmp; + return fname; +} + +template +void MergeSorter::merge() +{ + S sorter; + std::vector< SequentialFileArrayRead *> fileParts; + size_t count(0); + for(size_t i(0); i < _chunkCount; i++) { + std::unique_ptr< SequentialFileArrayRead > part(new SequentialFileArrayRead(genName(i))); + size_t sz = part->size(); + if (sz > 0) { + fileParts.push_back(part.release()); + } else { + part->erase(); + } + count += sz; + } + + std::vector cachedValue; + for(size_t i(0), m(fileParts.size()); i < m; i++) { + cachedValue.push_back(fileParts[i]->getNext()); + } + SequentialFileArrayWrite merged(_name); + for(size_t j(0); j < count; j++) { + size_t firstIndex(0); + for(size_t i(1), m(cachedValue.size()); i < m; i++) { + if (sorter.cmp(cachedValue[i], cachedValue[firstIndex])) { + firstIndex = i; + } + } + merged.push_back(cachedValue[firstIndex]); + if ( ! fileParts[firstIndex]->hasNext() ) { + fileParts[firstIndex]->erase(); + delete fileParts[firstIndex]; + fileParts.erase(fileParts.begin()+firstIndex); + cachedValue.erase(cachedValue.begin()+firstIndex); + } else { + cachedValue[firstIndex] = fileParts[firstIndex]->getNext(); + } + } +} + +template +void MergeSorter::sortChunk() +{ + S sorter; + sorter.sort(&_chunk[0], _chunk.size()); + FastOS_File chunkFile(genName(_chunkCount).c_str()); + chunkFile.EnableDirectIO(); + if (chunkFile.OpenWriteOnlyTruncate()) { + chunkFile.CheckedWrite(&_chunk[0], _chunk.size()*sizeof(_chunk[0])); + } + chunkFile.Close(); + _chunkCount++; + _chunk.clear(); +} + +template +class SequentialReadModifyWriteInterface +{ +public: + typedef T Type; + virtual ~SequentialReadModifyWriteInterface() { } + virtual const T & read() = 0; + virtual void write(const T & v) = 0; + virtual bool next() = 0; + virtual bool empty() const { return size() == 0; } + virtual size_t size() const = 0; + virtual void rewind() = 0; +}; + +template +class SequentialReadModifyWriteVector : public SequentialReadModifyWriteInterface, public vespalib::Array +{ +private: + typedef vespalib::Array Vector; +public: + SequentialReadModifyWriteVector() : Vector(), _rp(0), _wp(0) { } + SequentialReadModifyWriteVector(size_t sz) : Vector(sz), _rp(0), _wp(0) { } + virtual const T & read() { return (*this)[_rp]; } + virtual void write(const T & v) { (*this)[_wp++] = v; } + virtual bool next() { _rp++; return _rp < Vector::size(); } + virtual bool empty() const { return Vector::empty(); } + virtual size_t size() const { return Vector::size(); } + virtual void rewind() { _rp = 0; _wp = 0; } +private: + size_t _rp; + size_t _wp; +}; + +template +class SequentialReaderWriter : public SequentialReadModifyWriteInterface +{ +public: + SequentialReaderWriter(R & reader, W & writer) : + _reader(reader), + _writer(writer) + { + next(); + } + virtual const T & read() { return _lastRead; } + virtual void write(const T & v) { _writer.push_back(v); } + virtual bool next() { + bool hasMore(_reader.hasNext()); + if (hasMore) { + _lastRead = _reader.getNext(); + } + return hasMore; + } + virtual size_t size() const { return _reader.size(); } + virtual void rewind() { + _reader.rewind(); + next(); + _writer.rewind(); + } +private: + T _lastRead; + R & _reader; + W & _writer; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp new file mode 100644 index 00000000000..ac63d1a7a64 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp @@ -0,0 +1,82 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "foldedstringcompare.h" +#include +#include + +using vespalib::LowerCase; + +namespace search { + +size_t +FoldedStringCompare:: +size(const char *key) const +{ + return vespalib::Utf8ReaderForZTS::countChars(key); +} + +int +FoldedStringCompare:: +compareFolded(const char *key, const char *okey) const +{ + vespalib::Utf8ReaderForZTS kreader(key); + vespalib::Utf8ReaderForZTS oreader(okey); + + for (;;) { + uint32_t kval = LowerCase::convert(kreader.getChar()); + uint32_t oval = LowerCase::convert(oreader.getChar()); + + if (kval != oval) { + if (kval < oval) { + return -1; + } else { + return 1; + } + } + if (kval == 0) { + return 0; + } + } +} + + +int +FoldedStringCompare:: +compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen) const +{ + vespalib::Utf8ReaderForZTS kreader(key); + vespalib::Utf8ReaderForZTS oreader(okey); + + for (size_t j = 0; j < prefixLen; ++j ) { + uint32_t kval = LowerCase::convert(kreader.getChar()); + uint32_t oval = LowerCase::convert(oreader.getChar()); + + if (kval != oval) { + if (kval < oval) { + return -1; + } else { + return 1; + } + } + if (kval == 0) return 0; + } + // reached end of prefix + return 0; +} + + +int +FoldedStringCompare:: +compare(const char *key, const char *okey) const +{ + int res; + + res = compareFolded(key, okey); + if (res != 0) + return res; + return strcmp(key, okey); +} + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h new file mode 100644 index 00000000000..1904e0ca940 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + + +namespace search { + +class FoldedStringCompare +{ +public: + FoldedStringCompare(void) {} + + /** + * count number of UCS-4 characters in utf8 string + * + * @param key NUL terminated utf8 string + * @return integer number of symbols in utf8 string before NUL + */ + size_t size(const char *key) const; + + /** + * Compare utf8 key with utf8 other key after folding both + * + * @param key NUL terminated utf8 string + * @param okey NUL terminated utf8 string + * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey + **/ + int compareFolded(const char *key, const char *okey) const; + + /** + * Compare utf8 key with utf8 other key after folding both. + * + * @param key NUL terminated utf8 string + * @param okey NUL terminated utf8 string + * @param prefixLen max number of symbols to compare before + * considering keys identical. + * + * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey + */ + int compareFoldedPrefix(const char *key, + const char *okey, + size_t prefixLen) const; + + /* + * Compare utf8 key with utf8 other key after folding both, if + * they seem equal then fall back to comparing without folding. + * + * @param key NUL terminated utf8 string + * @param okey NUL terminated utf8 string + * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey + */ + int compare(const char *key, const char *okey) const; +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/inline.h b/searchlib/src/vespa/searchlib/util/inline.h new file mode 100644 index 00000000000..85e2e096406 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/inline.h @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + diff --git a/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp b/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp new file mode 100644 index 00000000000..a7f548a7c67 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/ioerrorhandler.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "ioerrorhandler.h" +#include "statebuf.h" +#include "statefile.h" +#include + +namespace search +{ + + +IOErrorHandler *IOErrorHandler::_instance = nullptr; + +namespace +{ + +std::atomic nesting; + +} + +void +IOErrorHandler::trap(void) +{ + _instance = this; + FastOS_File::SetFailedHandler(forward); + _trapped = true; +} + + +void +IOErrorHandler::untrap(void) +{ +#ifdef notyet + FastOS_File::SetFailedHandler(nullptr); +#endif + _trapped = false; + _instance = nullptr; +} + + +void +IOErrorHandler::forward(const char *op, const char *file, + int error, int64_t offset, size_t len, ssize_t rlen) +{ + nesting++; + IOErrorHandler *instance = _instance; + if (instance) { + instance->handle(op, file, error, offset, len, rlen); + } + nesting--; +} + + +void +IOErrorHandler::handle(const char *op, const char *file, + int error, int64_t offset, size_t len, ssize_t rlen) +{ + std::vector buf(4096); + StateBuf sb(&buf[0], buf.size()); + sb.appendKey("state") << "down"; + sb.appendTimestamp(); + sb.appendKey("operation") << op; + sb.appendKey("file") << file; + sb.appendKey("error") << error; + sb.appendKey("offset") << offset; + sb.appendKey("len") << len; + sb.appendKey("rlen") << rlen; + sb << '\n'; + if (_stateFile != nullptr) { + _stateFile->addState(sb.base(), sb.size(), false); + } + _fired = true; + sleep(3); +} + + +IOErrorHandler::IOErrorHandler(StateFile *stateFile) + : _stateFile(stateFile), + _trapped(false), + _fired(false) +{ + trap(); +} + + +IOErrorHandler::~IOErrorHandler() +{ + untrap(); + // Drain callbacks + while (nesting != 0) { + sleep(1); + } +} + +} diff --git a/searchlib/src/vespa/searchlib/util/ioerrorhandler.h b/searchlib/src/vespa/searchlib/util/ioerrorhandler.h new file mode 100644 index 00000000000..acdc0d12282 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/ioerrorhandler.h @@ -0,0 +1,61 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search +{ + +class StateFile; + +/* + * Class used to handle io error callsbacks from fastos. + */ +class IOErrorHandler +{ + static IOErrorHandler *_instance; + StateFile *_stateFile; + bool _trapped; + bool _fired; + + using FailedHandler = void (*)(const char *op, + const char *file, + int error, + int64_t offset, + size_t len, + ssize_t rlen); + void + trap(); + + void + untrap(); + + static void + forward(const char *op, + const char *file, + int error, + int64_t offset, + size_t len, + ssize_t rlen); + + void + handle(const char *op, + const char *file, + int error, + int64_t offset, + size_t len, + ssize_t rlen); + +public: + IOErrorHandler(StateFile *stateFile); + + ~IOErrorHandler(); + + bool + fired() const + { + return _fired; + } +}; + + +} diff --git a/searchlib/src/vespa/searchlib/util/logutil.cpp b/searchlib/src/vespa/searchlib/util/logutil.cpp new file mode 100644 index 00000000000..f8dd7120aac --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/logutil.cpp @@ -0,0 +1,54 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +LOG_SETUP(".searchlib.util.logutil"); + +#include "logutil.h" +#include +#include + +using vespalib::JSONStringer; + +namespace search { +namespace util { + +vespalib::string +LogUtil::extractLastElements(const vespalib::string & path, size_t numElems) +{ + std::vector elems; + for (size_t pos = 0; pos < path.size(); ) { + size_t fpos = path.find('/', pos); + if (fpos == vespalib::string::npos) { + fpos = path.size(); + } + size_t len = fpos - pos; + if (len > 0) { + elems.push_back(path.substr(pos, len)); + } + pos = fpos + 1; + } + vespalib::string retval; + if (numElems >= elems.size() && path[0] == '/') { + retval.append("/"); + } + size_t num = std::min(numElems, elems.size()); + size_t pos = elems.size() - num; + for (size_t i = 0; i < num; ++i) { + if (i != 0) retval.append("/"); + retval.append(elems[pos + i]); + } + return retval; +} + +void +LogUtil::logDir(JSONStringer & jstr, const vespalib::string & path, size_t numElems) +{ + jstr.beginObject(); + jstr.appendKey("dir").appendString(LogUtil::extractLastElements(path, numElems)); + search::DirectoryTraverse dirt(path.c_str()); + jstr.appendKey("size").appendInt64(dirt.GetTreeSize()); + jstr.endObject(); +} + +} // namespace util +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/logutil.h b/searchlib/src/vespa/searchlib/util/logutil.h new file mode 100644 index 00000000000..dc984277c16 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/logutil.h @@ -0,0 +1,29 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search { +namespace util { + +class LogUtil { +public: + /** + * Extract the last num elements from the given path and + * return a new path with these elements. + **/ + static vespalib::string extractLastElements(const vespalib::string & path, size_t numElems); + + /** + * Log the given directory (with size) to the given json stringer. + * + * @param jstr the json stringer to log into. + * @param path the path of the directory to log. + * @param numElems the last number of elements from the path to log. + **/ + static void logDir(vespalib::JSONStringer & jstr, const vespalib::string & path, size_t numElems); +}; + +} // namespace util +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/memorytub.h b/searchlib/src/vespa/searchlib/util/memorytub.h new file mode 100644 index 00000000000..40068e8e674 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/memorytub.h @@ -0,0 +1,94 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#if defined(MEMTUB_CLASS) || defined(MEMTUB_REFCLASS) || defined(MEMTUB_CHUNK) || defined(MEMTUB_LIMIT) +#error "Memory tub 'template' parameters collide with other defines..." +#endif + +#include + +#include + +namespace search { +namespace util { + +class IMemTub +{ +public: + /** + * Destructor. No cleanup needed for base class. + */ + virtual ~IMemTub(void) { } + + virtual void *TubAlloc(size_t size) = 0; + virtual void AddRef() = 0; + virtual void SubRef() = 0; + static uint32_t Align(uint32_t size) + { + return ((size + (sizeof(char *) - 1)) + & ~(sizeof(char *) - 1)); + } +}; + +} +} + +inline void * +operator new(size_t size, search::util::IMemTub *tub) +{ + return tub->TubAlloc(size); +} + +inline void * +operator new[](size_t size, search::util::IMemTub *tub) +{ + return tub->TubAlloc(size); +} + +#define MEMTUB_CLASS MicroMemoryTub +#define MEMTUB_REFCLASS MicroMemoryTubRefCnt +#define MEMTUB_CHUNK (8192 - 256) +#define MEMTUB_LIMIT 2048 +#include + +#define MEMTUB_CLASS TinyMemoryTub +#define MEMTUB_REFCLASS TinyMemoryTubRefCnt +#define MEMTUB_CHUNK (16384 - 256) +#define MEMTUB_LIMIT 4096 +#include + +#define MEMTUB_CLASS SmallMemoryTub +#define MEMTUB_REFCLASS SmallMemoryTubRefCnt +#define MEMTUB_CHUNK (32768 - 256) +#define MEMTUB_LIMIT 8192 +#include + +#define MEMTUB_CLASS MediumMemoryTub +#define MEMTUB_REFCLASS MediumMemoryTubRefCnt +#define MEMTUB_CHUNK (65536 - 256) +#define MEMTUB_LIMIT 16384 +#include + +#define MEMTUB_CLASS LargeMemoryTub +#define MEMTUB_REFCLASS LargeMemoryTubRefCnt +#define MEMTUB_CHUNK (131072 - 256) +#define MEMTUB_LIMIT 32768 +#include + +#define MEMTUB_CLASS HugeMemoryTub +#define MEMTUB_REFCLASS HugeMemoryTubRefCnt +#define MEMTUB_CHUNK (262144 - 256) +#define MEMTUB_LIMIT 65536 +#include + +namespace search { +namespace util { + +class DocSumMemoryPool : public SmallMemoryTub {}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/util/memorytub_impl.h b/searchlib/src/vespa/searchlib/util/memorytub_impl.h new file mode 100644 index 00000000000..802a34cf976 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/memorytub_impl.h @@ -0,0 +1,202 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2003-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#if !defined(MEMTUB_CLASS) || !defined(MEMTUB_REFCLASS) || !defined(MEMTUB_CHUNK) || !defined(MEMTUB_LIMIT) +#error "Missing 'template' parameter(s)..." +#endif + + +namespace search { +namespace util { + +/** + * These classes are used to speed up allocation and deallocation of + * memory. The poor mans template HACK is in honor of AIX. The denial + * of array alloc operations is in honor of Microsoft (VC++). + **/ +class MEMTUB_CLASS : public IMemTub +{ +private: + MEMTUB_CLASS(const MEMTUB_CLASS &); + MEMTUB_CLASS& operator=(const MEMTUB_CLASS &); + +public: + + struct AllocInfo { + private: + AllocInfo(const AllocInfo &); + AllocInfo &operator=(const AllocInfo &); + + public: + AllocInfo *_next; + void *_data; + uint32_t _size; + + AllocInfo(AllocInfo *next, void *data, uint32_t size) + : _next(next), _data(data), _size(size) {} + }; + + struct Chunk { + private: + Chunk(const Chunk &); + Chunk &operator=(const Chunk &); + public: + uint32_t _used; + Chunk *_next; + char _data[MEMTUB_CHUNK]; + + void *Alloc(size_t size) + { + size_t alignedsize = Align(size); + if (_used + alignedsize <= sizeof(_data)) { + void *ret = &_data[_used]; + _used += alignedsize; + return ret; + } + return NULL; + } + Chunk(uint32_t used, + Chunk *next) + : _used(used), + _next(next) + { + } + }; + +private: + + Chunk _fixedChunk; + Chunk *_chunkHead; + AllocInfo *_allocHead; + + void *SlowAlloc(size_t size) { + Chunk *chunk = static_cast(malloc(sizeof(Chunk))); + assert(chunk != NULL); + chunk->_used = 0; + chunk->_next = _chunkHead; + _chunkHead = chunk; + return _chunkHead->Alloc(size); + } + void *SmallAlloc(size_t size) { + void *tmp = _chunkHead->Alloc(size); + return (tmp != NULL) ? tmp : SlowAlloc(size); + } + void *BigAlloc(size_t size) { + void *ret = malloc(size); + assert(ret != NULL); + _allocHead = new (SmallAlloc(sizeof(AllocInfo))) AllocInfo(_allocHead, ret, size); + return ret; + } + +public: + MEMTUB_CLASS() + : _fixedChunk(0, NULL), + _chunkHead(&_fixedChunk), + _allocHead(NULL) + { + assert(MEMTUB_CHUNK >= MEMTUB_LIMIT * 2); + assert(MEMTUB_LIMIT >= sizeof(AllocInfo)); + } + + uint32_t GetChunkSize() const { return MEMTUB_CHUNK; } + uint32_t GetAllocLimit() const { return MEMTUB_LIMIT; } + + inline bool InTub(const void *pt) const { + const char *p = static_cast(pt); + + for (Chunk *chunk = _chunkHead; chunk != NULL; chunk = chunk->_next) + if (p >= chunk->_data && + p < chunk->_data + chunk->_used) + return true; + + for (AllocInfo *info = _allocHead; info != NULL; info = info->_next) + if (p >= static_cast(info->_data) && + p < static_cast(info->_data) + info->_size) + return true; + + return false; + } + + void *Alloc(size_t size) { + return (size > MEMTUB_LIMIT) ? BigAlloc(size) : SmallAlloc(size); + } + + void Reset() + { + for (AllocInfo *info = _allocHead; + info != NULL; info = info->_next) { + free(info->_data); + } + _allocHead = NULL; + while (_chunkHead != &_fixedChunk) { + Chunk *tmp = _chunkHead; + _chunkHead = tmp->_next; + free(tmp); + } + _fixedChunk._used = 0; + } + + virtual ~MEMTUB_CLASS() + { + Reset(); + } + + // IMemTub implementation + virtual void *TubAlloc(size_t size) { + return Alloc(size); + } + virtual void AddRef() {} + virtual void SubRef() {} +}; + + +class MEMTUB_REFCLASS : public MEMTUB_CLASS +{ +private: + FastOS_Mutex _lock; + int _refcnt; + +public: + MEMTUB_REFCLASS() : _lock(), _refcnt(1) {} + virtual ~MEMTUB_REFCLASS() { assert(_refcnt == 0); } + virtual void AddRef() + { + _lock.Lock(); + _refcnt++; + _lock.Unlock(); + } + virtual void SubRef() + { + _lock.Lock(); + assert(_refcnt > 0); + if (--_refcnt > 0) { + _lock.Unlock(); + return; + } + _lock.Unlock(); + delete this; + } +}; + +} +} + +inline void * +operator new(size_t size, search::util::MEMTUB_CLASS *tub) +{ + return tub->Alloc(size); +} + + +inline void * +operator new[](size_t size, search::util::MEMTUB_CLASS *tub) +{ + return tub->Alloc(size); +} + + +#undef MEMTUB_CLASS +#undef MEMTUB_REFCLASS +#undef MEMTUB_CHUNK +#undef MEMTUB_LIMIT diff --git a/searchlib/src/vespa/searchlib/util/memoryusage.h b/searchlib/src/vespa/searchlib/util/memoryusage.h new file mode 100644 index 00000000000..cc331f73d2a --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/memoryusage.h @@ -0,0 +1,123 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +namespace search { + +class MemoryUsage { +private: + size_t _allocatedBytes; + size_t _usedBytes; + size_t _deadBytes; + size_t _allocatedBytesOnHold; + +public: + MemoryUsage() + : _allocatedBytes(0), + _usedBytes(0), + _deadBytes(0), + _allocatedBytesOnHold(0) + { + } + + MemoryUsage(size_t allocated, + size_t used, + size_t dead, + size_t onHold) + : _allocatedBytes(allocated), + _usedBytes(used), + _deadBytes(dead), + _allocatedBytesOnHold(onHold) + { + } + + size_t + allocatedBytes(void) const + { + return _allocatedBytes; + } + + size_t + usedBytes(void) const + { + return _usedBytes; + } + + size_t + deadBytes(void) const + { + return _deadBytes; + } + + size_t + allocatedBytesOnHold(void) const + { + return _allocatedBytesOnHold; + } + + void + incAllocatedBytes(size_t inc) + { + _allocatedBytes += inc; + } + + void + decAllocatedBytes(size_t dec) + { + _allocatedBytes -= dec; + } + + void + incUsedBytes(size_t inc) + { + _usedBytes += inc; + } + + void + incDeadBytes(size_t inc) + { + _deadBytes += inc; + } + + void + incAllocatedBytesOnHold(size_t inc) + { + _allocatedBytesOnHold += inc; + } + + void + setAllocatedBytes(size_t alloc) + { + _allocatedBytes = alloc; + } + + void + setUsedBytes(size_t used) + { + _usedBytes = used; + } + + void + setDeadBytes(size_t dead) + { + _deadBytes = dead; + } + + void + setAllocatedBytesOnHold(size_t onHold) + { + _allocatedBytesOnHold = onHold; + } + + void + merge(const MemoryUsage & rhs) + { + _allocatedBytes += rhs._allocatedBytes; + _usedBytes += rhs._usedBytes; + _deadBytes += rhs._deadBytes; + _allocatedBytesOnHold += rhs._allocatedBytesOnHold; + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h b/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h new file mode 100644 index 00000000000..42519e10b9f --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/postingpriorityqueue.h @@ -0,0 +1,258 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +/* + * Provide priority queue semantics for a set of posting inputs. + */ +template +class PostingPriorityQueue +{ +public: + class Ref + { + IN *_ref; + public: + Ref(IN *ref) + : _ref(ref) + { + } + + bool + operator<(const Ref &rhs) const + { + return *_ref < *rhs._ref; + } + + IN * + get(void) const + { + return _ref; + } + }; + + typedef std::vector Vector; + Vector _vec; + + PostingPriorityQueue(void) + : _vec() + { + } + + bool + empty(void) const + { + return _vec.empty(); + } + + void + clear(void) + { + _vec.clear(); + } + + void + initialAdd(IN *it) + { + _vec.push_back(Ref(it)); + } + + /* + * Sort vector after a set of initial add operations, so lowest() + * and adjust() can be used. + */ + void + sort(void) + { + std::sort(_vec.begin(), _vec.end()); + } + + /* + * Return lowest value. Assumes vector is sorted. + */ + IN * + lowest(void) const + { + return _vec.front().get(); + } + + /* + * The vector might no longer be sorted since the first element has changed + * value. Perform adjustments to make vector sorted again. + */ + void + adjust(void); + + + template + void + mergeHeap(OUT &out) __attribute__((noinline)); + + template + static void + mergeOne(OUT &out, IN &in) __attribute__((noinline)); + + template + static void + mergeTwo(OUT &out, IN &in1, IN &in2) __attribute__((noinline)); + + template + static void + mergeSmall(OUT &out, + typename Vector::iterator ib, + typename Vector::iterator ie) + __attribute__((noinline)); + + template + void + merge(OUT &out, uint32_t heapLimit) __attribute__((noinline)); +}; + + +template +void +PostingPriorityQueue::adjust(void) +{ + typedef typename Vector::iterator VIT; + if (!_vec.front().get()->isValid()) { + _vec.erase(_vec.begin()); // Iterator no longer valid + return; + } + if (_vec.size() == 1) // Only one iterator left + return; + // Peform binary search to find first element higher than changed value + VIT gt = std::upper_bound(_vec.begin() + 1, _vec.end(), _vec.front()); + VIT to = _vec.begin(); + VIT from = to; + ++from; + Ref changed = *to; // Remember changed value + while (from != gt) { // Shift elements to make space for changed value + *to = *from; + ++from; + ++to; + } + *to = changed; // Save changed value at right location +} + + +template +template +void +PostingPriorityQueue::mergeHeap(OUT &out) +{ + while (!empty()) { + IN *low = lowest(); + low->write(out); + low->read(); + adjust(); + } +} + + +template +template +void +PostingPriorityQueue::mergeOne(OUT &out, IN &in) +{ + while (in.isValid()) { + in.write(out); + in.read(); + } +} + +template +template +void +PostingPriorityQueue::mergeTwo(OUT &out, IN &in1, IN &in2) +{ + for (;;) { + IN &low = in2 < in1 ? in2 : in1; + low.write(out); + low.read(); + if (!low.isValid()) + break; + } +} + + +template +template +void +PostingPriorityQueue::mergeSmall(OUT &out, + typename Vector::iterator ib, + typename Vector::iterator ie) +{ + for (;;) { + typename Vector::iterator i = ib; + IN *low = i->get(); + for (++i; i != ie; ++i) + if (*i->get() < *low) + low = i->get(); + low->write(out); + low->read(); + if (!low->isValid()) + break; + } +} + + +template +template +void +PostingPriorityQueue::merge(OUT &out, uint32_t heapLimit) +{ + if (_vec.empty()) + return; + for (typename Vector::iterator i = _vec.begin(), ie = _vec.end(); i != ie; + ++i) { + assert(i->get()->isValid()); + } + if (_vec.size() >= heapLimit) { + sort(); + void (PostingPriorityQueue::*mergeHeapFunc)(OUT &out) = + &PostingPriorityQueue::mergeHeap; + (this->*mergeHeapFunc)(out); + return; + } + for (;;) { + if (_vec.size() == 1) { + void (*mergeOneFunc)(OUT &out, IN &in) = + &PostingPriorityQueue::mergeOne; + (*mergeOneFunc)(out, *_vec.front().get()); + _vec.clear(); + return; + } + if (_vec.size() == 2) { + void (*mergeTwoFunc)(OUT &out, IN &in1, IN &in2) = + &PostingPriorityQueue::mergeTwo; + (*mergeTwoFunc)(out, *_vec[0].get(), *_vec[1].get()); + } else { + void (*mergeSmallFunc)(OUT &out, + typename Vector::iterator ib, + typename Vector::iterator ie) = + &PostingPriorityQueue::mergeSmall; + (*mergeSmallFunc)(out, _vec.begin(), _vec.end()); + } + for (typename Vector::iterator i = _vec.begin(), ie = _vec.end(); + i != ie; ++i) { + if (!i->get()->isValid()) { + _vec.erase(i); + break; + } + } + for (typename Vector::iterator i = _vec.begin(), ie = _vec.end(); + i != ie; ++i) { + assert(i->get()->isValid()); + } + assert(!_vec.empty()); + } +} + + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/rand48.h b/searchlib/src/vespa/searchlib/util/rand48.h new file mode 100644 index 00000000000..91fcf1b03e9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/rand48.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +namespace search { + +/* + * Simple random generator based on lrand48() spec. + */ +class Rand48 +{ +private: + uint64_t _state; +public: + void + srand48(long seed) + { + _state = ((static_cast(seed & 0xffffffffu)) << 16) + 0x330e; + } + + Rand48(void) + : _state(0) + { + srand48(0x1234abcd); + }; + void iterate(void) { + _state = (UINT64_C(0x5DEECE66D) * _state + 0xb) & + UINT64_C(0xFFFFFFFFFFFF); + } + /* + * Return value from 0 to 2^31 - 1 + */ + long lrand48(void) { + iterate(); + return static_cast(_state >> 17); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/randomgenerator.h b/searchlib/src/vespa/searchlib/util/randomgenerator.h new file mode 100644 index 00000000000..84bab6f03c9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/randomgenerator.h @@ -0,0 +1,63 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include +#include +#include + +namespace search { +class RandomGenerator +{ +private: + Rand48 _rnd; + +public: + RandomGenerator() : _rnd() {} + + RandomGenerator(long seed) : _rnd() { + _rnd.srand48(seed); + } + + void srand(long seed) { + _rnd.srand48(seed); + } + + uint32_t rand(uint32_t min, uint32_t max) { + assert(min <= max); + uint32_t divider = max - min + 1; + return (divider == 0 ? _rnd.lrand48() : min + _rnd.lrand48() % divider); + } + + vespalib::string getRandomString(uint32_t minLen, uint32_t maxLen) { + uint32_t len = rand(minLen, maxLen); + vespalib::string retval; + for (uint32_t i = 0; i < len; ++i) { + char c = static_cast(rand('a', 'z')); + retval.push_back(c); + } + return retval; + } + + void fillRandomStrings(std::vector & vec, uint32_t numStrings, + uint32_t minLen, uint32_t maxLen) { + vec.clear(); + vec.reserve(numStrings); + for (uint32_t i = 0; i < numStrings; ++i) { + vec.push_back(getRandomString(minLen, maxLen)); + } + } + + template + void fillRandomIntegers(std::vector & vec, uint32_t numValues) { + vec.clear(); + vec.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vec.push_back(static_cast(_rnd.lrand48())); + } + } +}; + +} // search + diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.cpp b/searchlib/src/vespa/searchlib/util/rawbuf.cpp new file mode 100644 index 00000000000..1a19792604f --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/rawbuf.cpp @@ -0,0 +1,360 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#include +#include +#include + +namespace search { + +static inline size_t smin(size_t a, size_t b) { return (a < b) ? a : b; } + +RawBuf::RawBuf(size_t size) + : _bufStart(NULL), + _bufEnd(NULL), + _bufFillPos(NULL), + _bufDrainPos(NULL), + _initialBufStart(NULL), + _initialSize(size) +{ + if (size > 0) { + _bufStart = static_cast(malloc(size)); + } + _bufEnd = _bufStart + size; + _bufDrainPos = _bufFillPos = _bufStart; +} + + +RawBuf::RawBuf(char *start, size_t size) + : _bufStart(NULL), + _bufEnd(NULL), + _bufFillPos(NULL), + _bufDrainPos(NULL), + _initialBufStart(start), + _initialSize(size) +{ + _bufStart = start; + _bufEnd = _bufStart + size; + _bufDrainPos = _bufFillPos = _bufStart; +} + + +RawBuf::~RawBuf(void) +{ + if (_bufStart != _initialBufStart) + free(_bufStart); +} + + +/** + * Allocate a new buffer at least as large as the parameter value, + * move any content to the new and delete the old buffer. + */ +void +RawBuf::expandBuf(size_t needlen) +{ + size_t size = (_bufEnd - _bufStart) * 2; + if (size < 1) + size = 2; + needlen += _bufEnd - _bufStart; + while (size < needlen) + size *= 2; + char* nbuf = static_cast(malloc(size)); + if (_bufFillPos != _bufDrainPos) + memcpy(nbuf, _bufDrainPos, _bufFillPos - _bufDrainPos); + _bufFillPos = _bufFillPos - _bufDrainPos + nbuf; + _bufDrainPos = nbuf; + if (_bufStart != _initialBufStart) + free(_bufStart); + _bufStart = nbuf; + _bufEnd = _bufStart + size; +} + + +/** + * Put 'data' of 'len'gth into the buffer. If insufficient room, + * make the buffer larger. + */ +void +RawBuf::append(const void *data, size_t len) +{ + ensureSize(len); + memcpy(_bufFillPos, data, len); + _bufFillPos += len; +} + +void +RawBuf::append(uint8_t byte) +{ + ensureSize(1); + *_bufFillPos++ = byte; +} + +void +RawBuf::appendCompressedPositiveNumber(uint64_t n) +{ + size_t len(vespalib::compress::Integer::compressedPositiveLength(n)); + ensureSize(len); + _bufFillPos += vespalib::compress::Integer::compressPositive(n, _bufFillPos); +} + +void +RawBuf::appendCompressedNumber(int64_t n) +{ + size_t len(vespalib::compress::Integer::compressedLength(n)); + ensureSize(len); + _bufFillPos += vespalib::compress::Integer::compress(n, _bufFillPos); +} + + +/** + * Has the entire contents of the buffer been used up, i.e. freed? + */ +bool +RawBuf::IsEmpty(void) +{ + return _bufFillPos == _bufDrainPos; +} + + +/** + * Free 'len' bytes from the start of the contents. (These + * have presumably been written or read.) + */ +void +RawBuf::Drain(size_t len) +{ + _bufDrainPos += len; + if (_bufDrainPos == _bufFillPos) + reset(); +} + + +/** + * Compact any free space from the beginning of the buffer, by + * copying the contents to the start of the buffer. + * If the resulting buffer doesn't have room for 'len' more + * bytes of contents, make it large enough. + */ +void +RawBuf::preAlloc(size_t len) +{ + size_t curfree = _bufEnd - _bufFillPos; + if (curfree >= len) + return; + if (_bufEnd - _bufStart < len + _bufFillPos - _bufDrainPos) { + expandBuf(len); + assert(_bufEnd - _bufStart >= len + _bufFillPos - _bufDrainPos); + curfree = _bufEnd - _bufFillPos; + if (curfree >= len) + return; + } + memmove(_bufStart, _bufDrainPos, _bufFillPos - _bufDrainPos); + _bufFillPos -= (_bufDrainPos - _bufStart); + _bufDrainPos = _bufStart; + assert(static_cast(_bufEnd -_bufFillPos) >= len); +} + + +void +RawBuf::Compact(void) +{ + if (_bufDrainPos == _bufStart) + return; + if (_bufFillPos != _bufDrainPos) + memmove(_bufStart, _bufDrainPos, _bufFillPos - _bufDrainPos); + _bufFillPos -= (_bufDrainPos - _bufStart); + _bufDrainPos = _bufStart; +} + + +void +RawBuf::Reuse(void) +{ + if (static_cast(_bufEnd - _bufStart) > _initialSize * 4) { + free(_bufStart); + if (_initialSize > 0) { + if (_initialBufStart != NULL) + _bufStart = _initialBufStart; + else + _bufStart = static_cast(malloc(_initialSize)); + assert(_bufStart != NULL); + } else + _bufStart = NULL; + _bufEnd = _bufStart + _initialSize; + } + _bufDrainPos = _bufFillPos = _bufStart; +} + + +void +RawBuf::operator+=(const char *src) +{ + while (*src) { + char *cachedBufFillPos = _bufFillPos; + const char *cachedBufEnd = _bufEnd; + while (cachedBufFillPos < cachedBufEnd && *src) + *cachedBufFillPos++ = *src++; + _bufFillPos = cachedBufFillPos; + if (_bufFillPos >= _bufEnd) + expandBuf(1); + } +} + + +void +RawBuf::operator+=(const RawBuf& buffer) +{ + size_t nbytes = buffer.GetUsedLen(); + if (nbytes == 0) + return; + + while (GetFreeLen() < nbytes) + expandBuf(nbytes); + memcpy(_bufFillPos, buffer._bufDrainPos, nbytes); + _bufFillPos += nbytes; +} + + +bool +RawBuf::operator==(const RawBuf &buffer) +{ + size_t nbytes = buffer.GetUsedLen(); + if (nbytes != GetUsedLen()) + return false; + + const char *p, *t; + for (p=_bufDrainPos, t=buffer._bufDrainPos; p<_bufFillPos; p++, t++) { + if (*p != *t) + return false; + } + + return true; +} + +/** + * Append the value of param 'num' to the buffer, as a decimal + * number right adjusted in a field of width 'fieldw', remaining + * space filled with 'fill' characters. + */ +void +RawBuf::addNum(size_t num, size_t fieldw, char fill) +{ + char buf1[20]; + char *p = buf1; + do { + *p++ = '0' + (num % 10); + num /= 10; + } while (num != 0); + size_t plen = p - buf1; + size_t wantlen = fieldw; + if (plen > wantlen) + wantlen = plen; + if (_bufFillPos + wantlen >= _bufEnd) + expandBuf(wantlen); + char *cachedBufFillPos = _bufFillPos; + while (plen < wantlen) { + *cachedBufFillPos++ = fill; + wantlen--; + } + while (p > buf1) { + *cachedBufFillPos++ = *--p; + } + _bufFillPos = cachedBufFillPos; +} + + +void +RawBuf::addNum32(int32_t num, size_t fieldw, char fill) +{ + char buf1[11]; + uint32_t unum = num >= 0 ? num : -num; + char *p = buf1; + do { + *p++ = '0' + (unum % 10); + unum /= 10; + } while (unum != 0); + if (num < 0) + *p++ = '-'; + size_t plen = p - buf1; + size_t wantlen = fieldw; + if (plen > wantlen) + wantlen = plen; + if (_bufFillPos + wantlen >= _bufEnd) + expandBuf(wantlen); + char *cachedBufFillPos = _bufFillPos; + while (plen < wantlen) { + *cachedBufFillPos++ = fill; + wantlen--; + } + while (p > buf1) { + *cachedBufFillPos++ = *--p; + } + _bufFillPos = cachedBufFillPos; +} + + + +void +RawBuf::addNum64(int64_t num, size_t fieldw, char fill) +{ + char buf1[21]; + uint64_t unum = num >= 0 ? num : -num; + char *p = buf1; + do { + *p++ = '0' + (unum % 10); + unum /= 10; + } while (unum != 0); + if (num < 0) + *p++ = '-'; + size_t plen = p - buf1; + size_t wantlen = fieldw; + if (plen > wantlen) + wantlen = plen; + if (_bufFillPos + wantlen >= _bufEnd) + expandBuf(wantlen); + char *cachedBufFillPos = _bufFillPos; + while (plen < wantlen) { + *cachedBufFillPos++ = fill; + wantlen--; + } + while (p > buf1) { + *cachedBufFillPos++ = *--p; + } + _bufFillPos = cachedBufFillPos; +} + + +void +RawBuf::addHitRank(HitRank num) +{ + char buf1[100]; + snprintf(buf1, sizeof(buf1), "%g", static_cast(num)); + append(buf1, strlen(buf1)); +} + + +void +RawBuf::addSignedHitRank(SignedHitRank num) +{ + char buf1[100]; + snprintf(buf1, sizeof(buf1), "%g", static_cast(num)); + append(buf1, strlen(buf1)); +} + +/** + * Read from the indicated file into the buffer, no more that the + * given number of bytes and no more than will fit in the buffer. + */ +size_t +RawBuf::readFile(FastOS_File &file, size_t maxlen) +{ + size_t got = file.Read(_bufFillPos, smin((_bufEnd - _bufFillPos), maxlen)); + if (got > 0) + _bufFillPos += got; + return got; +} + +} diff --git a/searchlib/src/vespa/searchlib/util/rawbuf.h b/searchlib/src/vespa/searchlib/util/rawbuf.h new file mode 100644 index 00000000000..39e791a070f --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/rawbuf.h @@ -0,0 +1,163 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include + +#include + +namespace search { +/** + * A buffer with an input point and an output point. The space + * is dynamically allocated by the constructor, and can be extended + * when needed. Buffer contents may be moved around when there is + * insufficient room. + */ + +class FASTOS_LOADABLE_EXPORT RawBuf +{ +private: + RawBuf(const RawBuf &); + RawBuf& operator=(const RawBuf &); + + char* _bufStart; // ref. to start of buffer (don't move this!) + char* _bufEnd; // ref. to byte after last in buffer (don't mo) + char* _bufFillPos; // ref. to byte where next should be put in + char* _bufDrainPos; // ref. to next byte to take out of buffer + char* _initialBufStart; + size_t _initialSize; +public: + + RawBuf(char *start, size_t size);// Initially use provided buffer + RawBuf(size_t size); // malloc-s given size, assigns to _bufStart + ~RawBuf(void); // Frees _bufStart, i.e. the char[]. + + void operator+=(const char *src); + void operator+=(const RawBuf& buffer); + bool operator==(const RawBuf &buffer); + void addNum(size_t num, size_t fieldw, char fill); + void addNum32(int32_t num, size_t fieldw, char fill); + void addNum64(int64_t num, size_t fieldw, char fill); + + void addHitRank(HitRank num); + void addSignedHitRank(SignedHitRank num); + + void append(const void *data, size_t len); + void append(uint8_t byte); + void appendLong(uint64_t n); + void appendCompressedPositiveNumber(uint64_t n); + void appendCompressedNumber(int64_t n); + bool IsEmpty(void); // Return whether all written. + void expandBuf(size_t needlen); + size_t GetFreeLen(void) const { return _bufEnd - _bufFillPos; } + size_t GetDrainLen(void) const { return _bufDrainPos - _bufStart; } + const char *GetDrainPos(void) const { return _bufDrainPos; } + const char *GetFillPos(void) const { return _bufFillPos; } + char * GetWritableFillPos(void) const { return _bufFillPos; } + char * GetWritableFillPos(size_t len) { preAlloc(len); return _bufFillPos; } + char * GetWritableDrainPos(size_t offset) { return _bufDrainPos + offset; } + void truncate(size_t offset) { _bufFillPos = _bufDrainPos + offset; } + void preAlloc(size_t len); // Ensure room for 'len' more bytes. + size_t readFile(FastOS_File &file, size_t maxlen); + void reset(void) { _bufDrainPos = _bufFillPos = _bufStart; } + void Compact(void); + void Reuse(void); + size_t GetUsedAndDrainLen(void) const { return _bufFillPos - _bufStart; } + size_t GetUsedLen(void) const { return _bufFillPos - _bufDrainPos; } + void Drain(size_t len); // Adjust drain pos. + void Fill(size_t len) { _bufFillPos += len; } + + void ensureSize(size_t size) { + if (static_cast(_bufEnd - _bufFillPos) < size) { + expandBuf(size); + assert(static_cast(_bufEnd - _bufFillPos) >= size); + } + } + + /** + * Convert from interNet highendian order at 'src', to unsigned integers + */ + static uint16_t InetTo16(const unsigned char *src) { + return (static_cast(*src) << 8) + *(src + 1); + }; + static uint16_t InetTo16(const char* src) { + return InetTo16(reinterpret_cast(src)); + }; + static uint32_t InetTo32(const unsigned char* src) { + return (((((static_cast(*src) << 8) + *(src + 1)) << 8) + + *(src + 2)) << 8) + *(src + 3); + }; + static uint32_t InetTo32(const char* src) { + return InetTo32(reinterpret_cast(src)); + }; + + /** + * Convert unsigned int.s 'src', to interNet highendian order, at 'dst' + * or _bufFillPos. Update or return ref to next char after those filled in. + */ + static unsigned char* ToInet(uint16_t src, unsigned char* dst) { + *(dst + 1) = static_cast(src); // The least significant 8 bits + src >>= 8; // of 'src' are stored. + *dst = static_cast(src); + return dst + 2; + }; + void Put16ToInet(uint16_t src) { + ensureSize(2); + _bufFillPos = reinterpret_cast + (ToInet(src, + reinterpret_cast(_bufFillPos))); + }; + static unsigned char* ToInet(uint32_t src, unsigned char* dst) { + *(dst + 3) = src; // The least significant 8 bits + src >>= 8; // of 'src' are stored. + *(dst + 2) = src; + src >>= 8; + *(dst + 1) = src; + src >>= 8; + *dst = src; + return dst + 4; + }; + void PutToInet(uint32_t src) { + ensureSize(4); + _bufFillPos = reinterpret_cast + (ToInet(src, + reinterpret_cast(_bufFillPos))); + }; + + static unsigned char* ToInet(uint64_t src, unsigned char* dst) { + ToInet(static_cast(src >> 32), dst); + ToInet(static_cast(src & 0xffffffffull), dst + 4); + return dst + 8; + }; + void Put64ToInet(uint64_t src) { + ensureSize(8); + _bufFillPos = reinterpret_cast + (ToInet(src, + reinterpret_cast(_bufFillPos))); + }; + + /** + * Check that char-s are loaded to and stored from the 8 least + * significant bits of a 32 bit value, and that shift works the usual + * way. (It is placed in this class to keep it out of view.) + */ + static void CheckHardware(void) { + uint32_t i = 0xe2345678; + unsigned char b = 67, // 'C' + c = 65, // 'A' + d = 66; // 'B' + unsigned char* p = &c; + assert(sizeof(uint32_t) == 4 && + sizeof(long int) == 8 && + static_cast(*p) << 16 == 4259840); + *p = i >> 16; + if ( !(b == 67 && c == 52 && d == 66)) { + abort(); + } + }; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/util/runnable.h b/searchlib/src/vespa/searchlib/util/runnable.h new file mode 100644 index 00000000000..437a2333231 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/runnable.h @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search { + +class Runnable : public FastOS_Runnable +{ +protected: + vespalib::Monitor _cond; + bool _done; + bool _stopped; + +public: + Runnable() : + _cond(), _done(false), _stopped(false) + { } + void Run(FastOS_ThreadInterface *, void *) { + doRun(); + + vespalib::MonitorGuard guard(_cond); + _stopped = true; + guard.broadcast(); + } + virtual void doRun() = 0; + void stop() { + vespalib::MonitorGuard guard(_cond); + _done = true; + } + void join() { + vespalib::MonitorGuard guard(_cond); + while (!_stopped) { + guard.wait(); + } + } +}; + +} // search + diff --git a/searchlib/src/vespa/searchlib/util/searchable_stats.h b/searchlib/src/vespa/searchlib/util/searchable_stats.h new file mode 100644 index 00000000000..a7d5764de7c --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/searchable_stats.h @@ -0,0 +1,44 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +namespace search { + +/** + * Simple statistics for a single Searchable component. Used for + * internal aggregation before inserting numbers into the metrics + * framework. + **/ +class SearchableStats +{ +private: + size_t _memoryUsage; + size_t _docsInMemory; + size_t _sizeOnDisk; + +public: + SearchableStats() : _memoryUsage(0), _docsInMemory(0), _sizeOnDisk(0) {} + SearchableStats &memoryUsage(size_t value) { + _memoryUsage = value; + return *this; + } + size_t memoryUsage() const { return _memoryUsage; } + SearchableStats &docsInMemory(size_t value) { + _docsInMemory = value; + return *this; + } + size_t docsInMemory() const { return _docsInMemory; } + SearchableStats &sizeOnDisk(size_t value) { + _sizeOnDisk = value; + return *this; + } + size_t sizeOnDisk() const { return _sizeOnDisk; } + SearchableStats &add(const SearchableStats &rhs) { + _memoryUsage += rhs._memoryUsage; + _docsInMemory += rhs._docsInMemory; + _sizeOnDisk += rhs._sizeOnDisk; + return *this; + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/sigbushandler.cpp b/searchlib/src/vespa/searchlib/util/sigbushandler.cpp new file mode 100644 index 00000000000..7670388572a --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/sigbushandler.cpp @@ -0,0 +1,168 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "statefile.h" +#include "statebuf.h" +#include "sigbushandler.h" +#include +#include +#include +#include + +namespace search +{ + + +SigBusHandler *SigBusHandler::_instance = nullptr; + + +namespace +{ + +std::atomic sigBusNesting; + +class TryLockGuard +{ + bool _gotLock; +public: + TryLockGuard() noexcept + : _gotLock(false) + { + int expzero = 0; + _gotLock = sigBusNesting.compare_exchange_strong(expzero, 1); + } + + ~TryLockGuard() noexcept + { + if (_gotLock) { + sigBusNesting = 0; + } + } + + bool + gotLock() const noexcept + { + return _gotLock; + } +}; + + +/* + * Write string to standard error using only async signal safe methods. + */ +void +mystderr(const char *msg) noexcept +{ + const char *p = msg; + while (*p != '\0') { + ++p; + } + write(STDERR_FILENO, msg, static_cast(p - msg)); +} + +} + +void +SigBusHandler::trap(void) +{ + struct sigaction sa; + _instance = this; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = SigBusHandler::forward; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaddset(&sa.sa_mask, SIGBUS); + sigaction(SIGBUS, &sa, nullptr); + _trapped = true; +} + + +void +SigBusHandler::untrap(void) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + sigaction(SIGBUS, &sa, nullptr); + _trapped = false; + _instance = nullptr; +} + + +void +SigBusHandler::forward(int sig, siginfo_t *si, void *ucv) +{ + _instance->handle(sig, si, ucv); +} + + +void +SigBusHandler::handle(int sig, siginfo_t *si, void *ucv) +{ + (void) sig; + (void) ucv; + + StateBuf sb(_buf, sizeof(_buf)); + bool raced = false; + do { + // Protect against multiple threads. + TryLockGuard guard; + if (!guard.gotLock()) { + raced = true; + break; + } + sb.appendKey("state") << "down"; + sb.appendTimestamp(); + sb.appendKey("operation") << "sigbus"; + sb.appendKey("errno") << static_cast(si->si_errno); + sb.appendKey("code") << static_cast(si->si_code); + if (si->si_code != 0) { + sb.appendAddr(si->si_addr); + } + sb << '\n'; + // TODO: Report backing store file, for quick diagnostics. + if (_stateFile != nullptr) { + _stateFile->addState(sb.base(), sb.size(), true); + } + _fired = true; + } while (0); + if (raced) { + mystderr("SIGBUS handler call race, ignoring signal\n"); + sleep(5); + return; + } + untrap(); // Further bus errors will trigger core dump + + if (_unwind != nullptr) { + // Unit test is using siglongjmp based unwinding + sigjmp_buf *unwind = _unwind; + _unwind = nullptr; + siglongjmp(*unwind, 1); + } else { + // Normal case, sleep 3 seconds (i.e. allow main thread to detect + // issue and notify cluster controller) before returning and + // likely core dumping. + sleep(3); + } +} + + +SigBusHandler::SigBusHandler(StateFile *stateFile) + : _stateFile(stateFile), + _unwind(nullptr), + _trapped(false), + _fired(false) +{ + trap(); +} + + +SigBusHandler::~SigBusHandler() +{ + untrap(); +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/sigbushandler.h b/searchlib/src/vespa/searchlib/util/sigbushandler.h new file mode 100644 index 00000000000..49c7879b10a --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/sigbushandler.h @@ -0,0 +1,60 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +class StateFile; + +/* + * Class used to handle SIGBUS signals, which are generated on IO errors + * on backing file for a memory map. + */ +class SigBusHandler +{ + static SigBusHandler *_instance; + StateFile *_stateFile; + sigjmp_buf *_unwind; + bool _trapped; + bool _fired; + char _buf[2048]; + + void + trap(); + + void + untrap(); + + static void + forward(int sig, siginfo_t *si, void *ucv); + + void + handle(int sig, siginfo_t *si, void *ucv); + +public: + SigBusHandler(StateFile *stateFile); + + ~SigBusHandler(); + + bool + fired() const + { + return _fired; + } + + /* + * Setup siglongjmp based unwinding, used by unit tests. + */ + void + setUnwind(sigjmp_buf *unwind) + { + _unwind = unwind; + } +}; + + +} + diff --git a/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp new file mode 100644 index 00000000000..c86303e5a97 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.cpp @@ -0,0 +1,8 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "slime_output_raw_buf_adapter.h" + +namespace search { + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h new file mode 100644 index 00000000000..9ab88d07c28 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/slime_output_raw_buf_adapter.h @@ -0,0 +1,24 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include "rawbuf.h" + +namespace search { + +class SlimeOutputRawBufAdapter : public ::vespalib::slime::Output +{ +private: + RawBuf &_buf; + +public: + SlimeOutputRawBufAdapter(RawBuf &buf) : _buf(buf) {} + virtual char *exchange(char *, size_t commit, size_t reserve) { + _buf.Fill(commit); + return _buf.GetWritableFillPos(reserve); + } +}; + +} // namespace search + diff --git a/searchlib/src/vespa/searchlib/util/sort.h b/searchlib/src/vespa/searchlib/util/sort.h new file mode 100644 index 00000000000..70324731ca5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/sort.h @@ -0,0 +1,143 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 1998-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#pragma once + +#include +#include + +namespace search { +/* Various sorting-related functions */ + +template +inline always_inline__ T * +median3(T *a, T *b, T *c, Compare *compobj) +{ + return Compare::Compare(compobj, *a, *b) < 0 ? + (Compare::Compare(compobj, *b, *c) < 0 ? b : Compare::Compare(compobj, *a, *c) < 0 ? c : a) : + (Compare::Compare(compobj, *b, *c) > 0 ? b : Compare::Compare(compobj, *a, *c) > 0 ? c : a); +} + + +template +void +insertion_sort(T a[], unsigned int n, Compare *compobj) +{ + unsigned int i, j; + T _swap; + + for (i=1; i +void +qsort(T *a, unsigned int n, Compare *compobj) +{ + for (;;) { + if (n < InsertSortLevel) { + insertion_sort(a, n, compobj); + return; + } + T *middle = a + (n/2); + T *left = a; + T *right = a + n - 1; + if (n > Median9Level) { + size_t s = n/8; + left = median3 + (left, left + s, left + 2*s, compobj); + middle = median3 + (middle - s, middle, middle+s, compobj); + right = median3 + (right - 2*s, right - s, right, compobj); + } + middle = median3(left, middle, right, compobj); + T *pa, *pb, *pc, *pd; + pa = pb = a; + pc = pd = a + n - 1; + T swap; + T pivot = *middle; + int r; + for (;;) { + while (pb <= pc && (r = Compare::Compare(compobj, *pb, pivot)) <= 0) { + if (r == 0) { + swap = *pa; + *pa = *pb; + *pb = swap; + pa++; + } + pb++; + } + while (pb <= pc && (r = Compare::Compare(compobj, *pc, pivot)) >= 0) { + if (r == 0) { + swap = *pc; + *pc = *pd; + *pd = swap; + pd--; + } + pc--; + } + if (pb > pc) + break; + swap = *pb; + *pb = *pc; + *pc = swap; + pb++; + pc--; + } + right = a + n; + int s = std::min(pa - a, pb - pa); + T *swapa = a; + T *swapb = pb-s; + T *swapaend = a + s; + while (swapa < swapaend) { + T tmp = *swapa; + *swapa++ = *swapb; + *swapb++ = tmp; + } + s = std::min(pd - pc, right - pd - 1); + swapa = pb; + swapb = right - s; + swapaend = pb + s; + while (swapa < swapaend) { + T tmp = *swapa; + *swapa++ = *swapb; + *swapb++ = tmp; + } + // Recurse on the smaller partition. + if (pb - pa < pd - pc) { + if ((s = pb - pa) > 1) + qsort + (a, s, compobj); + if ((s = pd - pc) > 1) { + a = right - s; + n = s; + continue; + } + } else { + if ((s = pd - pc) > 1) + qsort + (right - s, s, compobj); + if ((s = pb - pa) > 1) { + n = s; + continue; + } + } + break; + } +} + +} + diff --git a/searchlib/src/vespa/searchlib/util/statebuf.cpp b/searchlib/src/vespa/searchlib/util/statebuf.cpp new file mode 100644 index 00000000000..69021f3b5f5 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/statebuf.cpp @@ -0,0 +1,215 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "statebuf.h" + +static const char *hexx = "0123456789abcdef"; + +namespace search +{ + +void +StateBuf::overflow() noexcept +{ + abort(); +} + + + +StateBuf::StateBuf(void *buf, size_t bufLen) noexcept + : _start(static_cast(buf)), + _cur(static_cast(buf)), + _end(static_cast(buf) + bufLen) +{ +} + + +StateBuf & +StateBuf::operator<<(const char *s) noexcept +{ + for (const char *p = s; *p != '\0'; ++p) { + *this << *p; + } + return *this; +} + + +StateBuf & +StateBuf::appendQuoted(const char *s) noexcept +{ + *this << '"'; + for (const char *p = s; *p != '\0'; ++p) { + switch (*p) { + case '\\': + *this << '\\' << '\\'; + break; + case '\n': + *this << '\\' << 'n'; + break; + case '"': + *this << '\\' << '"'; + break; + default: + *this << *p; + } + } + *this << '"'; + return *this; +} + + +StateBuf & +StateBuf::appendKey(const char *s) noexcept +{ + if (_cur != _start) { + *this << ' '; + } + *this << s << '='; + return *this; +} + + +StateBuf & +StateBuf::operator<<(unsigned long val) noexcept +{ + char buf[22]; + char *p = buf; + for (; val != 0; ++p) { + *p = '0' + (val % 10); + val /= 10; + } + if (p == buf) { + *this << '0'; + } + while (p != buf) { + --p; + *this << *p; + } + return *this; +} + + +StateBuf & +StateBuf::operator<<(long val) noexcept +{ + if (val < 0) { + *this << '-' << static_cast(- val); + } else { + *this << static_cast(val); + } + return *this; +} + + +StateBuf & +StateBuf::operator<<(unsigned int val) noexcept +{ + *this << static_cast(val); + return *this; +} + + +StateBuf & +StateBuf::operator<<(int val) noexcept +{ + *this << static_cast(val); + return *this; +} + + +StateBuf & +StateBuf::appendDecFraction(unsigned long val, unsigned int width) noexcept +{ + char buf[22]; + if (width > sizeof(buf)) { + abort(); + } + char *p = buf; + char *pe = buf + width; + for (; p != pe; ++p) { + *p = '0' + (val % 10); + val /= 10; + } + while (p != buf) { + --p; + *this << *p; + } + return *this; +} + +StateBuf & +StateBuf::appendHex(unsigned long val) noexcept +{ + *this << "0x"; + for (int shft = 64; shft != 0;) { + shft -= 4; + *this << hexx[(val >> shft) & 15]; + } + return *this; +} + + +StateBuf & +StateBuf::operator<<(const struct timespec &ts) noexcept +{ + (*this << static_cast(ts.tv_sec) << '.'). + appendDecFraction(static_cast(ts.tv_nsec), 9); + return *this; +} + + +StateBuf & +StateBuf::appendTimestamp(const struct timespec &ts) noexcept +{ + appendKey("ts") << ts; + return *this; +} + + +StateBuf & +StateBuf::appendTimestamp() noexcept +{ + struct timespec ts; + /* + * clock_gettime() is supposed to be async signal safe. + * gettimeofday() is not documented to be async signal safe. + */ + int gtres = clock_gettime(CLOCK_REALTIME, &ts); + if (gtres != 0) { + abort(); + } + appendTimestamp(ts); + return *this; +} + + +StateBuf & +StateBuf::appendAddr(void *addr) noexcept +{ + appendKey("addr"); + appendHex(reinterpret_cast(addr)); + return *this; +} + + +size_t +StateBuf::size() const noexcept +{ + return _cur - _start; +}; + + +const char * +StateBuf::base() const noexcept +{ + return _start; +} + + +std::string +StateBuf::str() const +{ + return std::string(_start, _cur); +} + +} diff --git a/searchlib/src/vespa/searchlib/util/statebuf.h b/searchlib/src/vespa/searchlib/util/statebuf.h new file mode 100644 index 00000000000..0e2df4f8d7b --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/statebuf.h @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include + +namespace search +{ + +/** + * Class used to serialize application state in a mostly safe manner. + * + * Only async signal safe methods can be called, except for unit test + * helper methods (str). + * + */ +class StateBuf +{ + char *_start; + char *_cur; + char *_end; + + void + overflow() noexcept __attribute__((__noinline__, __noreturn__)); + +public: + StateBuf(void *buf, size_t bufLen) noexcept; + + inline StateBuf & + operator<<(char c) noexcept __attribute__((__always_inline__)) + { + if (__builtin_expect(_cur != _end, true)) { + *_cur++ = c; + return *this; + } + overflow(); + } + + + StateBuf & + operator<<(const char *s) noexcept; + + StateBuf & + appendQuoted(const char *s) noexcept; + + StateBuf & + appendKey(const char *s) noexcept; + + StateBuf & + operator<<(const struct timespec &ts) noexcept; + + StateBuf & + appendTimestamp(const struct timespec &ts) noexcept; + + StateBuf & + appendTimestamp() noexcept; + + StateBuf & + appendAddr(void *addr) noexcept; + + StateBuf & + operator<<(unsigned long val) noexcept; + + StateBuf & + operator<<(long val) noexcept; + + StateBuf & + operator<<(unsigned int val) noexcept; + + StateBuf & + operator<<(int val) noexcept; + + StateBuf & + appendDecFraction(unsigned long val, unsigned int width) noexcept; + + StateBuf & + appendHex(unsigned long val) noexcept; + + size_t + size() const noexcept; + + const char * + base() const noexcept; + + /* + * Unit test helper methods. + */ + std::string + str() const; +}; + +} diff --git a/searchlib/src/vespa/searchlib/util/statefile.cpp b/searchlib/src/vespa/searchlib/util/statefile.cpp new file mode 100644 index 00000000000..d093a036f19 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/statefile.cpp @@ -0,0 +1,460 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include +#include "statefile.h" +#include +#include + +using Mutex = std::mutex; +using Guard = std::lock_guard; + +namespace search +{ + +namespace +{ + +Mutex stateMutex; + +/* + * Assumes that std::atomic implementation is lock free, which it is + * for gcc 4.9.2. Usage is not async signal safe unless the + * implementation is lock free. + */ +std::atomic nestingCount; + +int +myopen(const char *name) noexcept +{ + int fd = open(name, O_CREAT | O_CLOEXEC | O_SYNC | O_RDWR, 0644); + if (fd < 0) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, + "Could not open %s: %s\n", name, ec.message().c_str()); + abort(); + } + return fd; +} + + +void +myfstat(const char *name, int fd, struct stat &stbuf) noexcept +{ + int fsres = fstat(fd, &stbuf); + if (fsres != 0) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, "Could not fstat %s: %s\n", name, ec.message().c_str()); + abort(); + } +} + + +void +mypread(const char *name, int fd, void *buf, size_t bufLen, int64_t offset) noexcept +{ + ssize_t rres = pread(fd, buf, bufLen, offset); + if (static_cast(rres) != bufLen) { + if (rres >= 0) { + fprintf(stderr, + "Could not read %zu bytes from %s offset %" PRId64 + ": short read (%zd)\n", + bufLen, name, offset, rres); + } else { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, + "Could not read %zu bytes from %s offset %" PRId64 ": %s\n", + bufLen, name, offset, ec.message().c_str()); + } + abort(); + } +} + + +void +mypwrite(const char *name, int fd, const void *buf, size_t bufLen, + int64_t offset) noexcept +{ + ssize_t wres = pwrite(fd, buf, bufLen, offset); + if (static_cast(wres) != bufLen) { + if (wres >= 0) { + fprintf(stderr,"Could not write %zu bytes to %s offset %" PRId64 + ": short write (%zd)\n", + bufLen, name, offset, wres); + } else { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, + "Could not write %zu bytes to %s offset %" PRId64 ": %s\n", + bufLen, name, offset, ec.message().c_str()); + } + abort(); + } +} + + +void +myclose(const char *name, int fd) noexcept +{ + int closeres = close(fd); + if (closeres != 0) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, "Could not close %s: %s\n", + name, ec.message().c_str()); + abort(); + } +} + + +void +myfsync(const char *name, int fd) noexcept +{ + int fsyncres = fsync(fd); + if (fsyncres != 0) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, "Could not fsync %s: %s\n", + name, ec.message().c_str()); + abort(); + } +} + + +void +myunlink(const char *name) noexcept +{ + int unlinkres = unlink(name); + if (unlinkres != 0 && errno != ENOENT) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, "Could not unlink %s: %s\n", + name, ec.message().c_str()); + abort(); + } +} + + +/* + * Write string to standard error using only async signal safe methods. + */ +void +mystderr(const char *msg) noexcept +{ + const char *p = msg; + while (*p != '\0') { + ++p; + } + write(STDERR_FILENO, msg, static_cast(p - msg)); +} + + +/* + * Get async signal safe spinlock. + */ +void +getLock() noexcept +{ + int expzero = 0; + while (!nestingCount.compare_exchange_weak(expzero, 1)) { + expzero = 0; + sleep(1); + } +} + + +/* + * Release async signal safe spinlock + */ +void +releaseLock() noexcept +{ + nestingCount = 0; +} + +class SpinGuard +{ +public: + SpinGuard() noexcept + { + getLock(); + } + + ~SpinGuard() noexcept + { + releaseLock(); + } +}; + +} + + +StateFile::StateFile(const std::string &name) + : _name(nullptr), + _historyName(nullptr), + _gen(0) +{ + _name = strdup(name.c_str()); + std::string historyName = name + ".history"; + _historyName = strdup(historyName.c_str()); + zeroPad(); + fixupHistory(); +} + + +StateFile::~StateFile() +{ + free(_name); + free(_historyName); +} + + +void +StateFile::erase(const std::string &name) +{ + std::string historyName = name + ".history"; + myunlink(name.c_str()); + myunlink(historyName.c_str()); +} + + +void +StateFile::readRawState(std::vector &buf) +{ + struct stat stbuf; + Guard guard(stateMutex); // Serialize states + SpinGuard spinGuard; + int fd = myopen(_name); + myfstat(_name, fd, stbuf); + buf.resize(stbuf.st_size); + mypread(_name, fd, &buf[0], buf.size(), 0); + myclose(_name, fd); +} + + +void +StateFile::trimState(std::vector &buf) +{ + auto newBufEnd = buf.cbegin(); + auto bufEnd = buf.cend(); + for (auto p = buf.cbegin(); p != bufEnd; ++p) { + if (*p == '\n') { // End of state string + newBufEnd = p + 1; + break; // stop scanning after first state + } + if (*p == '\0') { // padding encountered, stop scanning for end + break; + } + } + size_t newStateSize = newBufEnd - buf.cbegin(); + buf.resize(newStateSize); +} + + +void +StateFile::readState(std::vector &buf) +{ + readRawState(buf); + trimState(buf); +} + + +void +StateFile::trimHistory(std::vector &history, const char *name, int hfd, + std::vector &lastHistoryState) +{ + auto historyEnd = history.cend(); + auto prevHistoryEnd = history.cbegin(); + auto newHistoryEnd = history.cbegin(); + for (auto p = history.cbegin(); p != historyEnd; ++p) { + if (*p == '\n') { // End of state string + prevHistoryEnd = newHistoryEnd; + newHistoryEnd = p + 1; + } + if (*p == '\0') { // corruption, stop scanning for end + break; + } + } + std::vector historyEntry(prevHistoryEnd, newHistoryEnd); + size_t newHistSize = newHistoryEnd - history.cbegin(); + if (newHistSize != history.size()) { + int ftruncres = ftruncate(hfd, newHistSize); + if (ftruncres != 0) { + std::error_code ec(errno, std::system_category()); + fprintf(stderr, "Could not truncate %s: %s\n", + name, ec.message().c_str()); + abort(); + } + history.resize(newHistSize); + } + historyEntry.swap(lastHistoryState); +} + +/* + * Fixup history after failed append, e.g. trucated write caused partial + * last state. + */ +void +StateFile::fixupHistory() +{ + struct stat sthbuf; + int hfd = myopen(_historyName); + myfstat(_historyName, hfd, sthbuf); + std::vector history(sthbuf.st_size); + mypread(_historyName, hfd, &history[0], history.size(), 0); + std::vector lastHistory; + trimHistory(history, _historyName, hfd, lastHistory); + std::vector buf; + readState(buf); + if (!buf.empty() && buf != lastHistory) { + mypwrite(_historyName, hfd, &buf[0], buf.size(), history.size()); + myfsync(_historyName, hfd); + } + myclose(_historyName, hfd); + if (buf.empty() && !lastHistory.empty()) { + // Restore state in main state file from last state in history. + int fd = myopen(_name); + mypwrite(_name, fd, &lastHistory[0], lastHistory.size(), 0); + myfsync(_name, fd); + myclose(_name, fd); + } +} + + +void +StateFile::zeroPad() +{ + struct stat stbuf; + int minSize = 4096; + int fd = myopen(_name); + myfstat(_name, fd, stbuf); + std::vector buf(minSize); + if (stbuf.st_size < minSize) { + int padSize = minSize - stbuf.st_size; + mypwrite(_name, fd, &buf[0], padSize, stbuf.st_size); + myfsync(_name, fd); + } + myclose(_name, fd); +} + + +void +StateFile::checkState(const char *buf, size_t bufLen) noexcept +{ + const char *pe = buf + bufLen; + for (const char *p = buf; p < pe; ++p) { + if (*p == '\n') { + if (p != buf + bufLen - 1) { + mystderr("statefile state corrupted: early newline\n"); + abort(); + } + return; + } + if (*p == '\0') { + mystderr("statefile state corrupted: nul byte found\n"); + abort(); + } + } + mystderr("statefile state corrupted: missing newline at end\n"); + abort(); +} + + +void +StateFile::internalAddSignalState(const char *buf, size_t bufLen, + const char *name, + int appendFlag, + const char *openerr, + const char *writeerr, + const char *fsyncerr, + const char *closeerr) noexcept +{ + // Write to main state file, overwriting previous state + int fd = open(name, O_CREAT | O_CLOEXEC | O_SYNC | O_RDWR | appendFlag, + 0644); + if (fd < 0) { + mystderr(openerr); + abort(); + } + ssize_t wres = write(fd, buf, bufLen); + if (static_cast(wres) != bufLen) { + mystderr(writeerr); + abort(); + } + int fsyncres = fsync(fd); + if (fsyncres != 0) { + mystderr(fsyncerr); + abort(); + } + int closeres = close(fd); + if (closeres != 0) { + mystderr(closeerr); + abort(); + } +} + +/* + * Write state string to file. State string contains one newline, at the end. + * + * Async signal safe functions used: + * open(), write(), fsync(), close() + * + * Is in signal handler, thus cannot throw exception. + */ +void +StateFile::addSignalState(const char *buf, size_t bufLen) noexcept +{ + checkState(buf, bufLen); + SpinGuard spinGuard; + // Write to main state file, overwriting previous state + internalAddSignalState(buf, bufLen, _name, 0, + "Could not open statefile for read/write\n", + "Error writing to statefile\n", + "Error syncing statefile\n", + "Error closing statefile\n"); + // Write to state file history, appending + internalAddSignalState(buf, bufLen, _historyName, O_APPEND, + "Could not open statefile history for read/write\n", + "Error writing to statefile history\n", + "Error syncing statefile history\n", + "Error closing statefile history\n"); + ++_gen; +} + +/* + * Write state string to file. State string contains one newline, at the end. + */ +void +StateFile::addState(const char *buf, size_t bufLen, bool signal) +{ + if (signal) { + // In signal context, degraded error reporting on state file failures + addSignalState(buf, bufLen); + return; + } + checkState(buf, bufLen); + Guard guard(stateMutex); // Serialize states + SpinGuard spinGuard; + { + // Write to main state file, overwriting previous state + int fd = myopen(_name); + mypwrite(_name, fd, buf, bufLen, 0); + myfsync(_name, fd); + myclose(_name, fd); + } + { + // Write to state file history, appending + int hfd = myopen(_historyName); + struct stat sthbuf; + myfstat(_historyName, hfd, sthbuf); + mypwrite(_historyName, hfd, buf, bufLen, sthbuf.st_size); + myfsync(_historyName, hfd); + myclose(_historyName, hfd); + } + ++_gen; +} + + +int +StateFile::getGen() const +{ + return _gen; +} + + +} diff --git a/searchlib/src/vespa/searchlib/util/statefile.h b/searchlib/src/vespa/searchlib/util/statefile.h new file mode 100644 index 00000000000..fddfc5bb8fc --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/statefile.h @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace search +{ + +/* + * Class used to store application state in a mostly safe manner. + * + * It maintaines two files, one file with zero-padding at end to store + * last state, and another file with history of states. + * + * State files can not be shared between processes, file locking is not + * async signal safe. + * + * Standalone implementation (doesn't use fastos or vespalib) to + * ensure that we don't trigger callback hooks in fastos. + * + */ +class StateFile +{ + char *_name; + char *_historyName; + std::atomic _gen; + + /* + * Zero pad file, to ensure that a later write won't run out of space. + */ + void + zeroPad(); + + /* + * Read state file to buffer in raw form, including padding. + */ + void + readRawState(std::vector &buf); + + /* + * Trim padding and everything after state (i.e. stop at first newline). + */ + static void + trimState(std::vector &buf); + + /* + * Trim partial state from end of history. + */ + static void + trimHistory(std::vector &history, const char *historyName, int hfd, + std::vector &lastHistoryState); + + /* + * Fixup history: trim partial state from end and append current state + * in state file to history if different from last state in history. + * If main state file doesn't have a state but history has a state then + * restore main state from history. + */ + void + fixupHistory(); + + /* + * Check that state doesn't contain nul bytes or early newline and + * that it is terminated by a newline at end. + */ + void + checkState(const char *buf, size_t bufLen) noexcept; + + void + internalAddSignalState(const char *buf, size_t bufLen, + const char *name, + int appendFlag, + const char *openerr, + const char *writeerr, + const char *fsyncerr, + const char *closeerr) noexcept; + + void + addSignalState(const char *buf, size_t bufLen) noexcept; +public: + StateFile(const std::string &name); + + ~StateFile(); + + void + addState(const char *buf, size_t bufLen, bool signal); + + static void + erase(const std::string &name); + + /* + * Read state file to buffer and trim it down to a state. + */ + void + readState(std::vector &buf); + + /* + * Get current state generation (bumped whenever new state is written). + */ + int + getGen(void) const; +}; + +} diff --git a/searchlib/src/vespa/searchlib/util/stringenum.cpp b/searchlib/src/vespa/searchlib/util/stringenum.cpp new file mode 100644 index 00000000000..c89312e4276 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/stringenum.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#include +#include +LOG_SETUP(".seachlib.util.stringenum"); +#include +#include + +namespace search { +namespace util { + +static inline char * +StripString(char *str) +{ + char *first = NULL; // first non-space char + char *last = NULL; // last non-space char + + if (str == NULL) + return NULL; + + for (; *str != '\0' && isspace(*str); str++); + first = str; + + for (; *str != '\0'; str++) + if (!isspace(*str)) + last = str; + + if (last != NULL) + *(last + 1) = '\0'; + + return first; +} + +StringEnum::~StringEnum() +{ +} + +void +StringEnum::CreateReverseMapping() const +{ + _reverseMap.resize(_numEntries); + + for (Map::const_iterator it = _mapping.begin(); + it != _mapping.end(); + it++) + { + assert(it->second >= 0); + assert(it->second < (int)_numEntries); + _reverseMap[it->second] = it->first.c_str(); + } +} + + +bool +StringEnum::Save(const char *filename) +{ + char str[1024]; + + Fast_BufferedFile file; + file.WriteOpen(filename); + if (!file.IsOpened()) + return false; + + file.SetSize(0); + sprintf(str, "%d\n", _numEntries); + file.WriteString(str); + + for (uint32_t i = 0; i < _numEntries; i++) { + file.WriteString(Lookup(i)); + file.WriteString("\n"); + } + + file.Close(); + return true; +} + + +bool +StringEnum::Load(const char *filename) +{ + char line[1024]; + char *pt; + uint32_t entries; // from first line of file + uint32_t lineNumber; // current line in file + uint32_t entryCnt; // # entries obtained from file + + Clear(); + + Fast_BufferedFile file; + if (!file.OpenReadOnly(filename)) + return false; + + lineNumber = 0; + entryCnt = 0; + + pt = StripString(file.ReadLine(line, sizeof(line))); + if (pt == NULL || *pt == '\0') + return false; + lineNumber++; + + entries = atoi(pt); + + while (!file.Eof()) { + pt = StripString(file.ReadLine(line, sizeof(line))); + if (pt == NULL) // end of input ? + break; + lineNumber++; + if (*pt == '\0') // empty line ? + continue; + + uint32_t tmp = _numEntries; + if (static_cast(Add(pt)) != tmp) { + LOG(error, "(%s:%d) duplicate enum entry: %s", filename, lineNumber, pt); + } + entryCnt++; + } + + file.Close(); + if (entries != _numEntries + || entries != entryCnt) { + Clear(); + return false; + } + return true; +} + +} +} diff --git a/searchlib/src/vespa/searchlib/util/stringenum.h b/searchlib/src/vespa/searchlib/util/stringenum.h new file mode 100644 index 00000000000..86a84261b10 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/stringenum.h @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include +#include + +namespace search { +namespace util { + +/** + * An object of this class represents an enumeration of a set of + * strings. This is useful for mapping a set of strings into a + * continuous range of integers. + **/ +class StringEnum +{ +private: + StringEnum(const StringEnum &); + StringEnum& operator=(const StringEnum &); + typedef vespalib::hash_map Map; + + uint32_t _numEntries; + Map _mapping; + mutable std::vector _reverseMap; + + /** + * Create a reverse mapping that enables the user to map integers + * into strings. This method is called by the Lookup(int) method. + **/ + void CreateReverseMapping() const; + +public: + + /** + * Create an empty string enumeration. + **/ + StringEnum() + : _numEntries(0), + _mapping(), + _reverseMap() + { + } + + /** + * Destructor. + **/ + ~StringEnum(); + + + /** + * Discard all entries held by this object. + **/ + void Clear() + { + _reverseMap.clear(); + _mapping.clear(); + _numEntries = 0; + } + + + /** + * Add a string to this enumeration. Equal strings will get the same + * enumerated value. Different string will get different enumerated + * values. The set of values returned from multiple invocations of + * this method will always be a contiuous range beginning at 0. + * + * @return the enumerated value for the given string. + * @param str string you want to add. + **/ + int Add(const char *str) + { + Map::const_iterator found(_mapping.find(str)); + if (found != _mapping.end()) { + return found->second; + } else { + int value = _numEntries++; + _mapping[str] = value; + return value; + } + } + + + /** + * Obtain the enumerated value for the given string. + * + * @return enumerated value or -1 if not present. + * @param str the string to look up. + **/ + int Lookup(const char *str) const + { + Map::const_iterator found(_mapping.find(str)); + return (found != _mapping.end()) ? found->second : -1; + } + + + /** + * Obtain the string for the given enumerated value. + * + * @return string or NULL if out of range. + * @param value the enumerated value to look up. + **/ + const char *Lookup(uint32_t value) const + { + if (value >= _numEntries) + return NULL; + + if (_numEntries > _reverseMap.size()) + CreateReverseMapping(); + + return _reverseMap[value]; + } + + + /** + * Obtain the number of entries currently present in this + * enumeration. + * + * @return current number of entries. + **/ + uint32_t GetNumEntries() const { return _numEntries; } + + + /** + * Save the enumeration currently held by this object to file. + * + * @return success(true)/fail(false). + * @param filename name of save file. + **/ + bool Save(const char *filename); + + + /** + * Load an enumeration from file. The loaded enumeration will + * replace the one currently held by this object. + * + * @return success(true)/fail(false). + * @param filename name of file to load. + **/ + bool Load(const char *filename); +}; + +} +} + diff --git a/searchlib/src/vespa/searchlib/util/url.cpp b/searchlib/src/vespa/searchlib/util/url.cpp new file mode 100644 index 00000000000..d60ed222305 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/url.cpp @@ -0,0 +1,555 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2000-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +/* + * Note for bugs / fixes: + * Please update fastserver4/test/regress/url/testurl.cpp + * with test for new url's when bugs are discovered/fixed. + */ + +#include +#include +#include + +LOG_SETUP(".searchlib.util.url"); + +namespace search { +namespace util { +bool +URL::IsAlphaChar(unsigned char c) // According to RFC2396 +{ + return (c>='A' && c<='Z') || (c>='a' && c<='z'); +} + +bool +URL::IsDigitChar(unsigned char c) // According to RFC2396 +{ + return (c>='0' && c<='9'); +} + +bool +URL::IsMarkChar(unsigned char c) // According to RFC2396 +{ + return (c=='-' || c=='_' || c=='.' || c=='!' || c=='~' || + c=='*' || c=='\'' || c=='(' || c==')'); +} + +bool +URL::IsUnreservedChar(unsigned char c) // According to RFC2396 +{ + return (IsAlphaChar(c) || + IsDigitChar(c) || + IsMarkChar(c)); +} + +bool +URL::IsEscapedChar(unsigned char c) // According to RFC2396 +{ + // Cheat! Shoud be ('%' hex hex) + return (c=='%'); +} + +bool +URL::IsReservedChar(unsigned char c) // According to RFC2396 +{ + return (c==';' || c=='/' || c=='?' || c==':' || c=='@' || + c=='&' || c=='=' || c=='+' || c=='$' || c==','); +} + +bool +URL::IsPChar(unsigned char c) // According to RFC2068 +{ + return (IsUnreservedChar(c) || + IsEscapedChar(c) || + (c==':' || c=='@' || c=='&' || c=='=' || c=='+' || + c=='$' || c==',')); +} +bool +URL::IsUricChar(unsigned char c) // According to RFC2068 +{ + return (IsUnreservedChar(c) || + IsEscapedChar(c) || + IsReservedChar(c)); +} + + + + + +bool +URL::IsSchemeChar(unsigned char c) // According to RFC2068 +{ + return (IsAlphaChar(c) || + IsDigitChar(c) || + c=='+' || c=='-' || c=='.'); +} + +bool +URL::IsHostChar(unsigned char c) // According to RFC2068 +{ + return (IsAlphaChar(c) || + IsDigitChar(c) || + c=='.' || c=='+' || c=='-'); +} + +bool +URL::IsPortChar(unsigned char c) // According to RFC2068 +{ + return IsDigitChar(c); +} + +bool +URL::IsPathChar(unsigned char c) // According to RFC2068 +{ + return (IsPChar(c) || + c=='/' || c==';'); +} + +bool +URL::IsFileNameChar(unsigned char c) // According to RFC2068 +{ + return IsPChar(c); +} + +bool +URL::IsParamChar(unsigned char c) // According to RFC2068 +{ + return IsPChar(c) || c=='/'; +} + +bool +URL::IsParamsChar(unsigned char c) // According to RFC2068 +{ + return IsParamChar(c) || c==';'; +} + +bool +URL::IsQueryChar(unsigned char c) // According to RFC2068 +{ + return IsUricChar(c); +} + +bool +URL::IsFragmentChar(unsigned char c) // According to RFC2068 +{ + return IsUricChar(c); +} + +bool +URL::IsTokenChar(unsigned char c) // According to FAST URL tokenization +{ + return (IsAlphaChar(c) || + IsDigitChar(c) || + c == '_' || c == '-'); +} + +unsigned char * +URL::ParseURLPart(unsigned char *src, + unsigned char *dest, + unsigned int destsize, + bool (*IsPartChar)(unsigned char c)) +{ + unsigned char *p = src; + unsigned int len = 0; + + while (IsPartChar(*p) && len 0) { + strncpy(reinterpret_cast(dest), + reinterpret_cast(src), len); + dest[len] = '\0'; + } + + return p; +} + + +URL::URL(const unsigned char *url, size_t len) : + _maintld(_emptystring), + _tld(reinterpret_cast("")), + _domain(reinterpret_cast("")), + _tldregion(reinterpret_cast("")), + _pathDepth(0), + _startScheme(&_token[sizeof(_token) - 1]), + _startHost(&_token[sizeof(_token) - 1]), + _startDomain(&_token[sizeof(_token) - 1]), + _startMainTld(&_token[sizeof(_token)-1]), + _startPort(&_token[sizeof(_token)-1]), + _startPath(&_token[sizeof(_token)-1]), + _startFileName(&_token[sizeof(_token) - 1]), + _startExtension(&_token[sizeof(_token) - 1]), + _startParams(&_token[sizeof(_token) - 1]), + _startQuery(&_token[sizeof(_token) - 1]), + _startFragment(&_token[sizeof(_token) - 1]), + _startAddress(&_token[sizeof(_token) - 1]), + _tokenPos(_url), + _gotCompleteURL(false) +{ + Reset(); + if (url != NULL) + SetURL(url, len); +} + + +void +URL::Reset(void) +{ + _gotCompleteURL = false; + + _emptystring[0] = '\0'; + + _url[0] = '\0'; + _scheme[0] = '\0'; + _host[0] = '\0'; + _siteowner[0] = '\0'; + _port[0] = '\0'; + _path[0] = '\0'; + _filename[0] = '\0'; + _extension[0] = '\0'; + _params[0] = '\0'; + _query[0] = '\0'; + _fragment[0] = '\0'; + _address[0] = '\0'; + _maintld = _emptystring; // Hack needed to please langid. + _tld = (const unsigned char *) ""; + _domain = (const unsigned char *) ""; + _tldregion = (const unsigned char *) ""; + _pathDepth = 0; + + _token[0] = '\0'; + + _startScheme = &_token[sizeof(_token)-1]; + _startHost = &_token[sizeof(_token)-1]; + _startDomain = &_token[sizeof(_token)-1]; + _startMainTld = &_token[sizeof(_token)-1]; + _startPort = &_token[sizeof(_token)-1]; + _startPath = &_token[sizeof(_token)-1]; + _startFileName = &_token[sizeof(_token)-1]; + _startExtension = &_token[sizeof(_token)-1]; + _startParams = &_token[sizeof(_token)-1]; + _startQuery = &_token[sizeof(_token)-1]; + _startFragment = &_token[sizeof(_token)-1]; + _startAddress = &_token[sizeof(_token)-1]; + + _tokenPos = _url; +} + +void +URL::SetURL(const unsigned char *url, size_t length) +{ + int len = 0; + unsigned char + *p, *ptmp, *siteowner = 0, *filename = 0, *extension = 0; + + Reset(); + if (length > MAX_URL_LEN) { + LOG(warning, + "Max link size overflow: len=%lu, max=%d", + static_cast(length), MAX_URL_LEN); + length = MAX_URL_LEN; + } + if (length == 0) + length = MAX_URL_LEN; + + strncpy(reinterpret_cast(_url), + reinterpret_cast(url), length); + _url[length] = '\0'; + + p = _url; + + // Look for ':' as the first non-scheme-char character. If so => scheme + for (p = _url, len = 0; *p != '\0' && IsSchemeChar(*p); p++, len++) + ; + + if (*p++ == ':') { + strncpy(reinterpret_cast(_scheme), + reinterpret_cast(_url), len); + _scheme[len] = '\0'; + _startScheme = _url; + } else + p = _url; + + // get host name + if ((strncasecmp(reinterpret_cast(_scheme), "http", 4) == 0 && + p[0] == '/' && p[1] == '/') || + strncasecmp(reinterpret_cast(_url), "www.", 4) == 0) { + if (p[0] == '/' && p[1] == '/') + p += 2; + _startHost = p; + p = ParseURLPart(p, _host, sizeof(_host), IsHostChar); + + // Locate siteowner. eg. 'www.sony.com' => 'sony' + if (_host[0] != '\0') { + unsigned char *pso; + + int solen = 0; + + // First check entries from config. + siteowner = pso = _host; + + for (solen = 0; *pso != '\0'; pso++, solen++) { + if (*pso == '.') { + siteowner = pso + 1; + solen = -1; + } + } + _domain = siteowner; + _startDomain = _startHost + (siteowner - _host); + _startMainTld = _startDomain; + + // Locate main-tld info. + ptmp = reinterpret_cast + (strrchr(reinterpret_cast(_host), '.')); + if (ptmp != NULL) { + _maintld = &ptmp[1]; + _startMainTld = _startHost + (_maintld - _host); + if (*_tld == '\0') { + _tld = _maintld; + } + } + + // If siteowner is not found in config entries use second latest word in host. + if (_siteowner[0] == '\0') { + pso = reinterpret_cast + (strrchr(reinterpret_cast(_host), '.')); + if (pso != NULL && pso > _host) { + pso--; + solen = 0; + while (pso > _host && *pso != '.') { + solen++; + pso--; + } + if (*pso != '.') + solen++; + else + pso++; + if (solen > 0) { + strncpy(reinterpret_cast(_siteowner), + reinterpret_cast(pso), solen); + _siteowner[solen] = '\0'; + _startDomain = _startHost + (pso - _host); + _domain = pso; + } + } + } + } + + // Parse port number + if (*p == ':') { + p++; + _startPort = p; + p = ParseURLPart(p, _port, sizeof(_port), IsDigitChar); + } + } + + if (_scheme[0] == '\0' || + strncasecmp(reinterpret_cast(_scheme), "http", 4) == 0) { + // Handle http url. + + // Parse path, filename, extension. + _startPath = p; + p = ParseURLPart(p, _path, sizeof(_path), IsPathChar); + + filename = _path; + if (IsFileNameChar(*filename)) + _pathDepth++; + for (ptmp = _path ; *ptmp != '\0' && *ptmp != ';' ; ptmp++) + if (*ptmp == '/') { + filename = ptmp + 1; + if (IsFileNameChar(*filename)) + _pathDepth++; + } + _startFileName = _startPath + (filename - _path); + ParseURLPart(filename, _filename, sizeof(_filename), IsFileNameChar); + + extension = reinterpret_cast + (strrchr(reinterpret_cast(_filename), '.')); + if (extension != NULL) { + extension++; + strcpy(reinterpret_cast(_extension), + reinterpret_cast(extension)); + _startExtension = _startFileName + (extension - _filename); + } + + // Parse params part. + if ((ptmp = reinterpret_cast + (strchr(reinterpret_cast(_path), ';'))) != NULL) { + ptmp++; + _startParams = _startPath + (ptmp - _path); + ParseURLPart(ptmp, _params, sizeof(_params), IsParamsChar); + } + + // Parse query part. + if (*p == '?') { + p++; + _startQuery = p; + p = ParseURLPart(p, _query, sizeof(_query), IsQueryChar); + } + + // Parse fragment part + if (*p == '#') { + p++; + _startFragment = p; + p = ParseURLPart(p, _fragment, sizeof(_fragment), IsFragmentChar); + } + + // stuff the rest into address + _startAddress = p; + strncpy(reinterpret_cast(_address), + reinterpret_cast(p), sizeof(_address) - 1); + _address[sizeof(_address) - 1] = '\0'; + } else { + _startAddress = p; + strncpy(reinterpret_cast(_address), + reinterpret_cast(p), sizeof(_address) - 1); + _address[sizeof(_address) - 1] = '\0'; + } +} + +bool +URL::IsBaseURL(void) const +{ + return (_scheme[0] != '\0' && + _host[0] != '\0' && + _path[0] == '/'); +} + +const unsigned char * +URL::GetToken(URL_CONTEXT &ctx) +{ + int i = 0; + + // Skip whitespace + while (!IsTokenChar(*_tokenPos) && *_tokenPos != '\0') + _tokenPos++; + + while (IsTokenChar(*_tokenPos)) + _token[i++] = *_tokenPos++; + _token[i] = '\0'; + + ctx = URL_SCHEME; + if (_tokenPos > _startHost) + ctx = URL_HOST; + if (_tokenPos > _startDomain) + ctx = URL_DOMAIN; + if (_tokenPos > _startMainTld) + ctx = URL_MAINTLD; + if (_tokenPos > _startPort) + ctx = URL_PORT; + if (_tokenPos > _startPath) + ctx = URL_PATH; + if (_tokenPos > _startFileName) + ctx = URL_FILENAME; + if (_tokenPos > _startExtension) + ctx = URL_EXTENSION; + if (_tokenPos > _startParams) + ctx = URL_PARAMS; + if (_tokenPos > _startQuery) + ctx = URL_QUERY; + if (_tokenPos > _startFragment) + ctx = URL_FRAGMENT; + if (_tokenPos > _startAddress) + ctx = URL_ADDRESS; + + if (_token[0] != '\0') + return _token; + else + return NULL; +} + +const char * +URL::ContextName(URL_CONTEXT ctx) +{ + switch (ctx) { + case URL_SCHEME: + return "SCHEME"; + case URL_HOST: + return "HOST"; + case URL_DOMAIN: + return "DOMAIN"; + case URL_MAINTLD: + return "MAINTLD"; + case URL_PORT: + return "PORT"; + case URL_PATH: + return "PATH"; + case URL_FILENAME: + return "FILENAME"; + case URL_EXTENSION: + return "EXTENSION"; + case URL_PARAMS: + return "PARAMS"; + case URL_QUERY: + return "QUERY"; + case URL_FRAGMENT: + return "FRAGMENT"; + case URL_ADDRESS: + return "ADDRESS"; + } + + return "UNKNOWN"; +} + +void +URL::Dump(void) +{ + printf("URL: '%s'\n", _url); + + if (_scheme[0] != '\0') + printf(" scheme: '%s'\n", _scheme); + if (_host[0] != '\0') + printf(" host: '%s'\n", _host); + if (_domain[0] != '\0') + printf(" domain: '%s'\n", _domain); + if (_siteowner[0] != '\0') + printf(" siteowner: '%s'\n", _siteowner); + if (_maintld[0] != '\0') + printf(" maintld: '%s'\n", _maintld); + if (_tld[0] != '\0') + printf(" tld: '%s'\n", _tld); + if (_tldregion[0] != '\0') + printf(" tldregion: '%s'\n", _tldregion); + if (_port[0] != '\0') + printf(" port: '%s'\n", _port); + if (_path[0] != '\0') + printf(" path: '%s'\n", _path); + if (_pathDepth != 0) + printf(" pathdepth: '%d'\n", _pathDepth); + if (_filename[0] != '\0') + printf(" filename: '%s'\n", _filename); + if (_extension[0] != '\0') + printf(" extension: '%s'\n", _extension); + if (_params[0] != '\0') + printf(" params: '%s'\n", _params); + if (_query[0] != '\0') + printf(" query: '%s'\n", _query); + if (_fragment[0] != '\0') + printf(" fragment: '%s'\n", _fragment); + if (_address[0] != '\0') + printf(" address: '%s'\n", _address); + + printf("_startScheme: '%s'\n", _startScheme); + printf("_startHost: '%s'\n", _startHost); + printf("_startDomain: '%s'\n", _startDomain); + printf("_startMainTld: '%s'\n", _startMainTld); + printf("_startPort: '%s'\n", _startPort); + printf("_startPath: '%s'\n", _startPath); + printf("_startFileName: '%s'\n", _startFileName); + printf("_startExtension: '%s'\n", _startExtension); + printf("_startParams: '%s'\n", _startParams); + printf("_startQuery: '%s'\n", _startQuery); + printf("_startFragment: '%s'\n", _startFragment); + printf("_startAddress: '%s'\n", _startAddress); + + const unsigned char *token; + URL_CONTEXT ctx; + while ((token = GetToken(ctx)) != NULL) { + printf("TOKEN: %s '%s'\n", ContextName(ctx), token); + } +} + +} +} diff --git a/searchlib/src/vespa/searchlib/util/url.h b/searchlib/src/vespa/searchlib/util/url.h new file mode 100644 index 00000000000..f700f0f79f1 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/url.h @@ -0,0 +1,277 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2000-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS +#pragma once + +#ifndef MAX_URL_LEN +#define MAX_URL_LEN 4096 +#endif + +/** + * Class that parses URL's and split them into + * a number of subelements. Detects different types + * of "URL's", such as http:, https:, ftp:, mailto:, + * file:, etc. Only http: and https: URL's are + * processed into smaller subelements. For http: and + * https: URL's, the parser tries to locate the name + * of the owner of the domain ('siteowner') by + * extracting the last word before the TLD from the + * domain part of the URL. A list of TLD's may be + * loaded to improve the siteowner extraction algorithm. + * The class handles relative as well as absolute URL's. + * + * Note that memory consumption is quite high for this version, + * roughly 40kB / instance. + */ + +namespace search { +namespace util { + +class URL +{ +private: + URL(const URL &); + URL& operator=(const URL &); + +public: + enum URL_CONTEXT { + URL_SCHEME, + URL_HOST, + URL_DOMAIN, + URL_MAINTLD, + URL_PORT, + URL_PATH, + URL_FILENAME, + URL_EXTENSION, + URL_PARAMS, + URL_QUERY, + URL_FRAGMENT, + URL_ADDRESS + }; + +protected: + + unsigned char _url[MAX_URL_LEN+1]; + unsigned char _scheme[MAX_URL_LEN+1]; + unsigned char _host[MAX_URL_LEN+1]; + unsigned char _siteowner[MAX_URL_LEN+1]; + unsigned char _port[MAX_URL_LEN+1]; + unsigned char _path[MAX_URL_LEN+1]; + unsigned char _filename[MAX_URL_LEN+1]; + unsigned char _extension[MAX_URL_LEN+1]; + unsigned char _params[MAX_URL_LEN+1]; + unsigned char _query[MAX_URL_LEN+1]; + unsigned char _fragment[MAX_URL_LEN+1]; + unsigned char _address[MAX_URL_LEN+1]; + unsigned char *_maintld; + const unsigned char *_tld; + const unsigned char *_domain; + const unsigned char *_tldregion; + unsigned char _emptystring[1]; + int _pathDepth; + unsigned char _token[MAX_URL_LEN+1]; + + unsigned char *_startScheme; + unsigned char *_startHost; + unsigned char *_startDomain; + unsigned char *_startMainTld; + unsigned char *_startPort; + unsigned char *_startPath; + unsigned char *_startFileName; + unsigned char *_startExtension; + unsigned char *_startParams; + unsigned char *_startQuery; + unsigned char *_startFragment; + unsigned char *_startAddress; + unsigned char *_tokenPos; + + bool _gotCompleteURL; + + void Reset(void); + + static inline unsigned char *ParseURLPart(unsigned char *url, + unsigned char *buf, + unsigned int bufsize, + bool (*IsPartChar)(unsigned char c)); + +public: + static inline bool IsAlphaChar(unsigned char c); + static inline bool IsDigitChar(unsigned char c); + static inline bool IsMarkChar(unsigned char c); + static inline bool IsUnreservedChar(unsigned char c); + static inline bool IsEscapedChar(unsigned char c); + static inline bool IsReservedChar(unsigned char c); + static inline bool IsUricChar(unsigned char c); + static inline bool IsPChar(unsigned char c); + + static inline bool IsSchemeChar(unsigned char c); + static inline bool IsHostChar(unsigned char c); + static inline bool IsPortChar(unsigned char c); + static inline bool IsPathChar(unsigned char c); + static inline bool IsFileNameChar(unsigned char c); + static inline bool IsParamsChar(unsigned char c); + static inline bool IsParamChar(unsigned char c); + static inline bool IsQueryChar(unsigned char c); + static inline bool IsFragmentChar(unsigned char c); + + static inline bool IsTokenChar(unsigned char c); + + /** + * Defautl constructor. Optionally, the URL to be parsed may be given + * as a parameter. + * + * @param url The URL to parse. + * @param length The length of url. + */ + URL(const unsigned char *url=0, size_t length=0); + + /** + * Use a new URL to be parsed and split into subelements. + * + * @param url The URL to parse. + * @param length The length of url. + */ + void SetURL(const unsigned char *url, size_t length=0); + + /** + * Check if the current URL is a base (absolute) URL. + * + * @return true if this is an absolute URL, false otherwise. + */ + bool IsBaseURL(void) const; + + /** + * Get a pointer to the current URL. + * @return Pointer to string containing the URL, "" if none set. + */ + const unsigned char *GetURL() const {return _url;} + + /** + * Get the scheme part of the current URL (e.g. "http", "mailto", etc). + * @return Pointer to string containing the scheme, "" if none found. + */ + const unsigned char *GetScheme() const {return _scheme;} + + /** + * Get the host part of the current URL. + * @return Pointer to string containing the host name, "" if none found. + */ + const unsigned char *GetHost() const {return _host;} + + /** + * Get the domain part of the current URL. + * @return Pointer to string containing the domain name, "" if none found. + */ + const unsigned char *GetDomain() const {return _domain;} + + /** + * Get the siteowner part of the current URL. + * @return Pointer to string containing the siteowner, "" if none found. + */ + const unsigned char *GetSiteOwner() const {return _siteowner;} + + /** + * Get the region correlated to the document tld. I.e. 'no', 'com', etc. + * @return Pointer to string containing the tld name, "" if none found. + */ + const unsigned char *GetMainTLD() const {return _maintld;} + unsigned char *GetMainTLD_NoConst() const {return _maintld;} + + /** + * Similar til GetMainTLD, but includes tld's taken from the tldlist file; + * may return strings like 'co.uk.'. + * @return Pointer to string containing the tld name, "" if none found. + */ + const unsigned char *GetTLD() const {return _tld;} + + /** + * Get the region correlated to the document tld. I.e. 'europe' for '.no'. + * @return Pointer to string containing the region name, "" if none found. + */ + const unsigned char *GetTLDRegion() const {return _tldregion;} + + /** + * Get the port part of the current URL. + * @return Pointer to string containing the port, "" if none found. + */ + const unsigned char *GetPort() const {return _port;} + + /** + * Get the path part of the current URL. + * @return Pointer to string containing the path, "" if none found. + */ + const unsigned char *GetPath() const {return _path;} + + /** + * Get the path part of the current URL. + * @return Pointer to string containing the path, "" if none found. + */ + unsigned int GetPathDepth() const {return _pathDepth;} + + /** + * Get the filename part of the current URL. + * @return Pointer to string containing the filename, "" if none found. + */ + const unsigned char *GetFilename() const {return _filename;} + + /** + * Get the filename extension of the current URL. + * @return Pointer to string containing the extension, "" if none found. + */ + const unsigned char *GetExtension() const {return _extension;} + + /** + * Get the params information part of the current URL. This is the part + * of the URL located between the filename and the params parts of the URL. + * @return Pointer to string containing the params part, "" if none found. + */ + const unsigned char *GetParams() const {return _params;} + + /** + * Get the query information part of the current URL. This is the part + * of the URL located between the path and the fragment parts of the URL. + * @return Pointer to string containing the param part, "" if none found. + */ + const unsigned char *GetQuery() const {return _query;} + + /** + * Get the fragment part of the current URL. This is + * treated as everythin behind any '#' character in the URL. + * @return Pointer to string containing the fragment, "" if none found. + */ + const unsigned char *GetFragment() const {return _fragment;} + + /** + * Get the adress part of the current URL. In the current version, + * this is everything behind the type field if different from + * http: and https:. + * @return Pointer to string containing the address, "" if none found. + */ + const unsigned char *GetAddress() const {return _address;} + + /** + * Get tokens with corresponding context information from the current url. + * The first call to this function will return the first token in the url. + * This function may be called repetedly untill the value NULL is returned. + * @return Pointer to string containing the token, NULL when all tokens have + * been returned. + */ + const unsigned char *GetToken(URL_CONTEXT &ctx); + + /** + * Get a pointer to a string that contains the name of a given context. + * @return Pointer to string containing the name of a given contexttoken. + */ + const char *ContextName(URL_CONTEXT ctx); + + /** + * Dump the contents of the URL and subelements to stdout. Only + * elements that contains information are shown. + */ + void Dump(void); +}; + +} +} + + -- cgit v1.2.3